Commit 8f5d2414 authored by Constantin Pohl's avatar Constantin Pohl
Browse files

restructured features (~41% acc)

parent b65ced0b
......@@ -8,6 +8,7 @@ import tensorflow as tf
import os
import sys
import numpy as np
from features_tf import build_model_columns, trim_features
#suppress warnings regarding AVX
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
......@@ -25,34 +26,13 @@ PORTS = ['ALEXANDRIA','AUGUSTA','BARCELONA','CARTAGENA','CEUTA','DAMIETTA','DILI
#default initialization: [''] is string, [0] is int, [0.0] is float
CSV_DATA_DEFAULTS = [[''],[0],[0.0],[0.0],[0.0],[0],[0],[''],[0],['']]
#specify features (same like in trained model)
def build_model_columns():
#numeric column because of float value in certain range
#COURSE is the key, necessary to map attribute to feature column
course = tf.feature_column.numeric_column('COURSE')
#see above
shiptype = tf.feature_column.numeric_column('SHIPTYPE')
lon = tf.feature_column.numeric_column('LON')
lat = tf.feature_column.numeric_column('LAT')
#combine to single variable
base_columns = [lon, lat, course, shiptype]
return base_columns
#classifier needs function for prediction
#tp_string is simply the tuple as a string
def ask_fun(tp_string):
#split string into feature columns
columns = tf.decode_csv(tp_string, record_defaults=CSV_DATA_DEFAULTS)
features = dict(zip(CSV_HEADER, columns))
#remove unneccessary values from tuple
features.pop('REPORTED_DRAUGHT')
features.pop('DEPARTURE_PORT_NAME')
features.pop('TIMESTAMP')
features.pop('HEADING')
features.pop('SPEED')
features.pop('SHIP_ID')
features = trim_features(features)
#create a dataset from features, return
dataset = tf.data.Dataset.from_tensors(features)
......
import tensorflow as tf
#declare features
def build_model_columns():
course = tf.feature_column.numeric_column('COURSE')
shiptype = tf.feature_column.numeric_column('SHIPTYPE')
lon = tf.feature_column.numeric_column('LON')
lat = tf.feature_column.numeric_column('LAT')
shipid = tf.feature_column.categorical_column_with_hash_bucket("SHIP_ID",1000)
base_columns = [lon, lat, course, shiptype, shipid]
return base_columns
#remove unneccessary values from tuple
def trim_features(feat):
feat.pop('REPORTED_DRAUGHT')
feat.pop('DEPARTURE_PORT_NAME')
feat.pop('TIMESTAMP')
feat.pop('HEADING')
feat.pop('SPEED')
return feat
\ No newline at end of file
......@@ -17,6 +17,8 @@ import tensorflow as tf
import os
import sys
import numpy as np
from features_tf import build_model_columns
from features_tf import trim_features
#suppress warnings regarding AVX
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
......@@ -35,20 +37,6 @@ CSV_DATA_DEFAULTS = [[''],[0],[0.0],[0.0],[0.0],[0],[0],[''],[0],[''],[''],[0]]
# 'NEMRUT':16, 'PALMA DE MALLORCA':17, 'PIRAEUS':18, 'PORT SAID':19, 'TARRAGONA':20,
# 'TUZLA':21, 'VALENCIA':22, 'VALLETTA':23, 'YALOVA':24}
#construct features in tensorflow-format
def build_model_columns():
#numeric column because of float value in certain range
#COURSE is the key, necessary to map attribute to feature column
course = tf.feature_column.numeric_column('COURSE')
#see above
shiptype = tf.feature_column.numeric_column('SHIPTYPE')
lon = tf.feature_column.numeric_column('LON')
lat = tf.feature_column.numeric_column('LAT')
#combine to single variable
base_columns = [lon, lat, course, shiptype]
return base_columns
#construct estimator for the model (with directory specification)
def build_estimator(model_dir):
#get features
......@@ -77,12 +65,8 @@ def input_fn(data_file, num_epochs, batch_size):
label = features.pop('ARRIVAL_PORT_CALC')
#pop unneccessary columns
features.pop('ARRIVAL_CALC')
features.pop('REPORTED_DRAUGHT')
features.pop('DEPARTURE_PORT_NAME')
features.pop('TIMESTAMP')
features.pop('HEADING')
features.pop('SPEED')
features.pop('SHIP_ID')
features = trim_features(features)
return features, label
#reading raw file
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment