Commit b65ced0b authored by Constantin Pohl's avatar Constantin Pohl
Browse files

Load and ask model

parent 9e9a0f42
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
##
#
# This file loads the previously trained model and allows to predict the arrival port of a single tuple.
#
##
import tensorflow as tf
import os
import sys
import numpy as np
#suppress warnings regarding AVX
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#attribute names
CSV_HEADER = ['SHIP_ID','SHIPTYPE','SPEED','LON','LAT','COURSE','HEADING','TIMESTAMP','DEPARTURE_PORT_NAME',
'REPORTED_DRAUGHT']
#port list for prediction
PORTS = ['ALEXANDRIA','AUGUSTA','BARCELONA','CARTAGENA','CEUTA','DAMIETTA','DILISKELESI',
'FOS SUR MER','GEMLIK','GENOVA','GIBRALTAR','HAIFA','ISKENDERUN','LIVORNO',
'MARSAXLOKK','MONACO','NEMRUT','PALMA DE MALLORCA','PIRAEUS','PORT SAID',
'TARRAGONA','TUZLA','VALENCIA','VALLETTA','YALOVA']
#default initialization: [''] is string, [0] is int, [0.0] is float
CSV_DATA_DEFAULTS = [[''],[0],[0.0],[0.0],[0.0],[0],[0],[''],[0],['']]
#specify features (same like in trained model)
def build_model_columns():
#numeric column because of float value in certain range
#COURSE is the key, necessary to map attribute to feature column
course = tf.feature_column.numeric_column('COURSE')
#see above
shiptype = tf.feature_column.numeric_column('SHIPTYPE')
lon = tf.feature_column.numeric_column('LON')
lat = tf.feature_column.numeric_column('LAT')
#combine to single variable
base_columns = [lon, lat, course, shiptype]
return base_columns
#classifier needs function for prediction
#tp_string is simply the tuple as a string
def ask_fun(tp_string):
#split string into feature columns
columns = tf.decode_csv(tp_string, record_defaults=CSV_DATA_DEFAULTS)
features = dict(zip(CSV_HEADER, columns))
#remove unneccessary values from tuple
features.pop('REPORTED_DRAUGHT')
features.pop('DEPARTURE_PORT_NAME')
features.pop('TIMESTAMP')
features.pop('HEADING')
features.pop('SPEED')
features.pop('SHIP_ID')
#create a dataset from features, return
dataset = tf.data.Dataset.from_tensors(features)
dataset = dataset.batch(1)
iterator = dataset.make_one_shot_iterator()
feat = iterator.get_next()
return feat
#main
def main(unused_argv):
#directory where the model can be found
model_dir = "./model/"
#load model
column_model = build_model_columns()
model = tf.estimator.LinearClassifier(model_dir=model_dir, n_classes=25, feature_columns=column_model)
#test string for the model (feel free to change)
tp_string = "0x03a0f55ff3f188e443af584638a7f9970744cd4b,80,13.4,1.163337,40.41387,228,227,12-05-15 22:24,2,63"
#predict
prediction = model.predict(input_fn=lambda: ask_fun(tp_string))
#print result: PORTS[22] is the "real" solution of the tuple above, do not forget to change accordingly
for x, each in enumerate(prediction):
print("Prediction: " + PORTS[np.argmax(each["logits"])] + ", Expectation: " + PORTS[22])
if __name__ == '__main__':
tf.app.run(main=main, argv=[sys.argv[0]])
......@@ -15,8 +15,8 @@
import tensorflow as tf
import os
import csv
import sys
import numpy as np
#suppress warnings regarding AVX
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
......@@ -40,11 +40,11 @@ def build_model_columns():
#numeric column because of float value in certain range
#COURSE is the key, necessary to map attribute to feature column
course = tf.feature_column.numeric_column('COURSE')
shiptype = tf.feature_column.numeric_column('SHIPTYPE')
#see above
shiptype = tf.feature_column.numeric_column('SHIPTYPE')
lon = tf.feature_column.numeric_column('LON')
lat = tf.feature_column.numeric_column('LAT')
#combine to single variable
base_columns = [lon, lat, course, shiptype]
return base_columns
......@@ -58,39 +58,47 @@ def build_estimator(model_dir):
#return a linear classifier model
return tf.estimator.LinearClassifier(model_dir=model_dir, n_classes=25, feature_columns=column_model, config=run_config)
#get the data, data_file = path to file, num_epochs = , batch_size =
#get the data, data_file = path to file, num_epochs = amount of reruns, batch_size = divide data into batches
def input_fn(data_file, num_epochs, batch_size):
#check if file exists
assert tf.gfile.Exists(data_file), ('% not found.' % data_file)
#parse string into tensors, value = line in file
def parse_csv(value):
#print directory path
print("Parsing file %s" % data_file)
#convert data into tensors
columns = tf.decode_csv(value, record_defaults=CSV_DATA_DEFAULTS)
#create a dictionary, mapping attribute names (e.g. course) to tensors
features = dict(zip(CSV_HEADER, columns))
#use arrival (port) as label
label = features.pop('ARRIVAL_PORT_CALC')
#pop unneccessary columns
features.pop('ARRIVAL_CALC')
features.pop('REPORTED_DRAUGHT')
features.pop('DEPARTURE_PORT_NAME')
features.pop('TIMESTAMP')
features.pop('HEADING')
features.pop('SPEED')
features.pop('SHIP_ID')
return features, label
#reading raw file
dataset = tf.data.TextLineDataset(data_file)
#shuffle for each epoch run
#HINT: does not improve accuracy; since there are timestamps shuffling maybe not useful
#dataset = dataset.shuffle(buffer_size=32000)
#map and parse input strings into features
dataset = dataset.map(parse_csv, num_parallel_calls=5)
#learning finetuning
dataset = dataset.repeat(num_epochs)
dataset = dataset.batch(batch_size)
#iterator provides access to features & labels one at a time
iterator = dataset.make_one_shot_iterator()
features, labels = iterator.get_next()
......@@ -108,12 +116,12 @@ def main(unused_argv):
model_dir = "./model/"
train_data = "./train_data/training_data.csv"
test_data = "./train_data/testing_data.csv"
#first, build an estimator (currently a linear classifier)
model = build_estimator(model_dir)
#train the classifier with training data
model.train(input_fn=lambda: input_fn(train_data, num_epochs, batch_size))
#evaluate classifier with test data
results = model.evaluate(input_fn=lambda: input_fn(test_data, 1, batch_size))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment