Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
code
pfabric
Commits
b65ced0b
Commit
b65ced0b
authored
Feb 16, 2018
by
Constantin Pohl
Browse files
Load and ask model
parent
9e9a0f42
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
src/DEBS2018/data/1000rowspublic.csv
deleted
100644 → 0
View file @
9e9a0f42
This diff is collapsed.
Click to expand it.
src/DEBS2018/data/1000rowspublic_fixed.csv
0 → 100644
View file @
b65ced0b
This diff is collapsed.
Click to expand it.
src/DEBS2018/data/vessel24hpublic.csv
deleted
100644 → 0
View file @
9e9a0f42
This diff is collapsed.
Click to expand it.
src/DEBS2018/data/vessel24hpublic_fixed.csv
0 → 100644
View file @
b65ced0b
This diff is collapsed.
Click to expand it.
src/DEBS2018/training/ask_tf.py
0 → 100644
View file @
b65ced0b
##
#
# This file loads the previously trained model and allows to predict the arrival port of a single tuple.
#
##
import
tensorflow
as
tf
import
os
import
sys
import
numpy
as
np
#suppress warnings regarding AVX
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'2'
#attribute names
CSV_HEADER
=
[
'SHIP_ID'
,
'SHIPTYPE'
,
'SPEED'
,
'LON'
,
'LAT'
,
'COURSE'
,
'HEADING'
,
'TIMESTAMP'
,
'DEPARTURE_PORT_NAME'
,
'REPORTED_DRAUGHT'
]
#port list for prediction
PORTS
=
[
'ALEXANDRIA'
,
'AUGUSTA'
,
'BARCELONA'
,
'CARTAGENA'
,
'CEUTA'
,
'DAMIETTA'
,
'DILISKELESI'
,
'FOS SUR MER'
,
'GEMLIK'
,
'GENOVA'
,
'GIBRALTAR'
,
'HAIFA'
,
'ISKENDERUN'
,
'LIVORNO'
,
'MARSAXLOKK'
,
'MONACO'
,
'NEMRUT'
,
'PALMA DE MALLORCA'
,
'PIRAEUS'
,
'PORT SAID'
,
'TARRAGONA'
,
'TUZLA'
,
'VALENCIA'
,
'VALLETTA'
,
'YALOVA'
]
#default initialization: [''] is string, [0] is int, [0.0] is float
CSV_DATA_DEFAULTS
=
[[
''
],[
0
],[
0.0
],[
0.0
],[
0.0
],[
0
],[
0
],[
''
],[
0
],[
''
]]
#specify features (same like in trained model)
def
build_model_columns
():
#numeric column because of float value in certain range
#COURSE is the key, necessary to map attribute to feature column
course
=
tf
.
feature_column
.
numeric_column
(
'COURSE'
)
#see above
shiptype
=
tf
.
feature_column
.
numeric_column
(
'SHIPTYPE'
)
lon
=
tf
.
feature_column
.
numeric_column
(
'LON'
)
lat
=
tf
.
feature_column
.
numeric_column
(
'LAT'
)
#combine to single variable
base_columns
=
[
lon
,
lat
,
course
,
shiptype
]
return
base_columns
#classifier needs function for prediction
#tp_string is simply the tuple as a string
def
ask_fun
(
tp_string
):
#split string into feature columns
columns
=
tf
.
decode_csv
(
tp_string
,
record_defaults
=
CSV_DATA_DEFAULTS
)
features
=
dict
(
zip
(
CSV_HEADER
,
columns
))
#remove unneccessary values from tuple
features
.
pop
(
'REPORTED_DRAUGHT'
)
features
.
pop
(
'DEPARTURE_PORT_NAME'
)
features
.
pop
(
'TIMESTAMP'
)
features
.
pop
(
'HEADING'
)
features
.
pop
(
'SPEED'
)
features
.
pop
(
'SHIP_ID'
)
#create a dataset from features, return
dataset
=
tf
.
data
.
Dataset
.
from_tensors
(
features
)
dataset
=
dataset
.
batch
(
1
)
iterator
=
dataset
.
make_one_shot_iterator
()
feat
=
iterator
.
get_next
()
return
feat
#main
def
main
(
unused_argv
):
#directory where the model can be found
model_dir
=
"./model/"
#load model
column_model
=
build_model_columns
()
model
=
tf
.
estimator
.
LinearClassifier
(
model_dir
=
model_dir
,
n_classes
=
25
,
feature_columns
=
column_model
)
#test string for the model (feel free to change)
tp_string
=
"0x03a0f55ff3f188e443af584638a7f9970744cd4b,80,13.4,1.163337,40.41387,228,227,12-05-15 22:24,2,63"
#predict
prediction
=
model
.
predict
(
input_fn
=
lambda
:
ask_fun
(
tp_string
))
#print result: PORTS[22] is the "real" solution of the tuple above, do not forget to change accordingly
for
x
,
each
in
enumerate
(
prediction
):
print
(
"Prediction: "
+
PORTS
[
np
.
argmax
(
each
[
"logits"
])]
+
", Expectation: "
+
PORTS
[
22
])
if
__name__
==
'__main__'
:
tf
.
app
.
run
(
main
=
main
,
argv
=
[
sys
.
argv
[
0
]])
src/DEBS2018/training/train_tf.py
View file @
b65ced0b
...
...
@@ -15,8 +15,8 @@
import
tensorflow
as
tf
import
os
import
csv
import
sys
import
numpy
as
np
#suppress warnings regarding AVX
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'2'
...
...
@@ -40,11 +40,11 @@ def build_model_columns():
#numeric column because of float value in certain range
#COURSE is the key, necessary to map attribute to feature column
course
=
tf
.
feature_column
.
numeric_column
(
'COURSE'
)
shiptype
=
tf
.
feature_column
.
numeric_column
(
'SHIPTYPE'
)
#see above
shiptype
=
tf
.
feature_column
.
numeric_column
(
'SHIPTYPE'
)
lon
=
tf
.
feature_column
.
numeric_column
(
'LON'
)
lat
=
tf
.
feature_column
.
numeric_column
(
'LAT'
)
#combine to single variable
base_columns
=
[
lon
,
lat
,
course
,
shiptype
]
return
base_columns
...
...
@@ -58,39 +58,47 @@ def build_estimator(model_dir):
#return a linear classifier model
return
tf
.
estimator
.
LinearClassifier
(
model_dir
=
model_dir
,
n_classes
=
25
,
feature_columns
=
column_model
,
config
=
run_config
)
#get the data, data_file = path to file, num_epochs =
, batch_size =
#get the data, data_file = path to file, num_epochs =
amount of reruns, batch_size = divide data into batches
def
input_fn
(
data_file
,
num_epochs
,
batch_size
):
#check if file exists
assert
tf
.
gfile
.
Exists
(
data_file
),
(
'% not found.'
%
data_file
)
#parse string into tensors, value = line in file
def
parse_csv
(
value
):
#print directory path
print
(
"Parsing file %s"
%
data_file
)
#convert data into tensors
columns
=
tf
.
decode_csv
(
value
,
record_defaults
=
CSV_DATA_DEFAULTS
)
#create a dictionary, mapping attribute names (e.g. course) to tensors
features
=
dict
(
zip
(
CSV_HEADER
,
columns
))
#use arrival (port) as label
label
=
features
.
pop
(
'ARRIVAL_PORT_CALC'
)
#pop unneccessary columns
features
.
pop
(
'ARRIVAL_CALC'
)
features
.
pop
(
'REPORTED_DRAUGHT'
)
features
.
pop
(
'DEPARTURE_PORT_NAME'
)
features
.
pop
(
'TIMESTAMP'
)
features
.
pop
(
'HEADING'
)
features
.
pop
(
'SPEED'
)
features
.
pop
(
'SHIP_ID'
)
return
features
,
label
#reading raw file
dataset
=
tf
.
data
.
TextLineDataset
(
data_file
)
#shuffle for each epoch run
#HINT: does not improve accuracy; since there are timestamps shuffling maybe not useful
#dataset = dataset.shuffle(buffer_size=32000)
#map and parse input strings into features
dataset
=
dataset
.
map
(
parse_csv
,
num_parallel_calls
=
5
)
#learning finetuning
dataset
=
dataset
.
repeat
(
num_epochs
)
dataset
=
dataset
.
batch
(
batch_size
)
#iterator provides access to features & labels one at a time
iterator
=
dataset
.
make_one_shot_iterator
()
features
,
labels
=
iterator
.
get_next
()
...
...
@@ -108,12 +116,12 @@ def main(unused_argv):
model_dir
=
"./model/"
train_data
=
"./train_data/training_data.csv"
test_data
=
"./train_data/testing_data.csv"
#first, build an estimator (currently a linear classifier)
model
=
build_estimator
(
model_dir
)
#train the classifier with training data
model
.
train
(
input_fn
=
lambda
:
input_fn
(
train_data
,
num_epochs
,
batch_size
))
#evaluate classifier with test data
results
=
model
.
evaluate
(
input_fn
=
lambda
:
input_fn
(
test_data
,
1
,
batch_size
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment