In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import random
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops

 from ._conv import register_converters as _register_converters


In [2]:
#import data as array
# 8 hits with x,y,z

testset = pd.read_pickle('matched_8hittracks.pkl')

In [3]:
#Check testset with arbitrary particle

tset = np.array(testset)
tset = tset.astype('float32')
print(tset.shape)
for i in range(8):
 print(tset[1,3*i:(3*i+3)])
print(tset[0,:])

(46896, 24)
[-20.411108 -9.417887 4.7599998]
[-27.813803 -6.944843 4.7599998]
[-66.736946 22.9032 4.3599997]
[-74.0961 35.649506 4.04 ]
[78.324196 26.359665 -3.7200012]
[69.040436 14.306461 -4.04 ]
[26.880571 -9.817033 -4.84 ]
[ 19.68401 -11.173258 -5. ]
[ -2.2485821 23.380732 -6.04 -6.489999 28.598572
 -5.6400003 -21.724771 67.052704 -3.2400002 -22.225971
 79.267685 -2.6000004 82.22602 3.0700002 7.24
 70.390724 0.19000006 7.5599995 28.802656 3.9014618
 6.04 21.421392 6.978845 5.64 ]


In [4]:
### Reshape original array into the shape (particlenumber, timesteps, input = coordinates)###

def reshapor(arr_orig):
 timesteps = int(arr_orig.shape[1]/3)
 number_examples = int(arr_orig.shape[0])
 arr = np.zeros((number_examples, timesteps, 3))
 
 for i in range(number_examples):
 for t in range(timesteps):
 arr[i,t,0:3] = arr_orig[i,3*t:3*t+3]
 
 return arr

In [5]:
### create the training set and the test set###

def create_random_sets(dataset, train_to_total_ratio):
 num_examples = dataset.shape[0]
 train_set_size = np.int(num_examples*train_to_total_ratio)
 test_set_size = num_examples - train_set_size
 random_indices = random.sample(range(num_examples), train_set_size)
 train_set = np.zeros((train_set_size, dataset.shape[1]))
 test_set = np.zeros((test_set_size, dataset.shape[1]))
 
 trc=0
 tec=0
 
 for i in range(num_examples):
 if i in random_indices:
 train_set[trc,:] += tset[i,:]
 trc += 1
 else:
 test_set[tec,:] += tset[i,:]
 tec +=1
 
 train_set = reshapor(train_set)
 test_set = reshapor(test_set)
 
 return train_set, test_set
 

In [6]:
train_set, test_set = create_random_sets(tset, 0.99)
print(test_set.shape, train_set.shape, reshapor(tset).shape)
print(test_set[0,:,:])

(469, 8, 3) (46427, 8, 3) (46896, 8, 3)
[[ 18.9492569 -12.94710732 -6.44000053]
 [ 21.15423965 -19.98032379 -6.60000038]
 [ 27.98397064 -65.37555695 -7.88000011]
 [ 27.91954994 -77.96816254 -8.03999996]
 [-48.85122299 66.21347809 -21.55999947]
 [-38.2697525 59.18515396 -21.56000137]
 [ -7.20999908 28.59857178 -21.96000099]
 [ -3.13550663 23.01335526 -22.12000084]]


In [7]:
### create target and input arrays input of shape (num_examples, 8 timesteps, n_inputs)###

def target_and_input(data_set):
 
 num_ex = data_set.shape[0]
 inputt = np.zeros((num_ex, 4, 12))
 target = np.zeros((num_ex, 4, 3))
 
 
 for i in range(4):
 target[:,i,:] = data_set[:,4+i,:]
 for f in range(4):
 inputt[:,i,3*f:3*f+3] = data_set[:,i+f,:]
 
 
 
 
 return inputt, target
 

In [8]:
inputt_train, target_train = target_and_input(train_set)
inputt_test, target_test = target_and_input(test_set)
print(inputt_train[0,:,:])
print(target_train[0,:,:])

[[ -2.24858212 23.38073158 -6.03999996 -6.48999882 28.59857178
 -5.64000034 -21.7247715 67.05270386 -3.24000025 -22.22597122
 79.26768494 -2.60000038]
 [ -6.48999882 28.59857178 -5.64000034 -21.7247715 67.05270386
 -3.24000025 -22.22597122 79.26768494 -2.60000038 82.22602081
 3.07000017 7.23999977]
 [-21.7247715 67.05270386 -3.24000025 -22.22597122 79.26768494
 -2.60000038 82.22602081 3.07000017 7.23999977 70.39072418
 0.19000006 7.55999947]
 [-22.22597122 79.26768494 -2.60000038 82.22602081 3.07000017
 7.23999977 70.39072418 0.19000006 7.55999947 28.80265617
 3.90146184 6.03999996]]
[[82.22602081 3.07000017 7.23999977]
 [70.39072418 0.19000006 7.55999947]
 [28.80265617 3.90146184 6.03999996]
 [21.42139244 6.97884512 5.63999987]]


In [9]:
###create random mini_batches###


def unison_shuffled_copies(a, b):
 assert a.shape[0] == b.shape[0]
 p = np.random.permutation(a.shape[0])
 return a[p,:,:], b[p,:,:]

def random_mini_batches(inputt, target, minibatch_size = 500):
 
 num_examples = inputt.shape[0]
 
 
 #Number of complete batches
 
 number_of_batches = int(num_examples/minibatch_size)
 minibatches = []
 
 #shuffle particles
 _i, _t = unison_shuffled_copies(inputt, target)
 print(_t.shape)
 
 
 for i in range(number_of_batches):
 
 minibatch_train = _i[minibatch_size*i:minibatch_size*(i+1), :, :]
 
 minibatch_true = _t[minibatch_size*i:minibatch_size*(i+1), :, :]
 
 minibatches.append((minibatch_train, minibatch_true))
 
 
 minibatches.append((_i[number_of_batches*minibatch_size:, :, :], _t[number_of_batches*minibatch_size:, :, :]))
 
 
 return minibatches
 

In [10]:
minibatches = random_mini_batches(inputt_train, target_train)


testinputt, testtarget = minibatches[int(inputt_train.shape[0]/500)]

print(len(minibatches))

minibatches = random_mini_batches(train_set[:,:-1,:], train_set[:,1:,:])
train, target = minibatches[0]
test_input, test_target = test_set[:,:-1,:], test_set[:,1:,:]
print(train[0,:,:], target[0,:,:])

(46427, 4, 3)
93
(46427, 7, 3)
[[-14.14815044 18.45176888 20.84000015]
 [-17.72400856 22.47253418 20.76000023]
 [-25.43392563 66.05883789 17.79999924]
 [-18.5602417 80.10436249 16.52000046]
 [ 65.26576996 50.039608 10.35999966]
 [ 61.54515457 34.18211365 9.63999939]
 [ 27.2204895 8.77087116 10.35999966]] [[-17.72400856 22.47253418 20.76000023]
 [-25.43392563 66.05883789 17.79999924]
 [-18.5602417 80.10436249 16.52000046]
 [ 65.26576996 50.039608 10.35999966]
 [ 61.54515457 34.18211365 9.63999939]
 [ 27.2204895 8.77087116 10.35999966]
 [ 20.99278641 8.01359081 10.43999958]]


In [11]:
class RNNPlacePrediction():
 
 
 def __init__(self, time_steps, future_steps, ninputs, ncells, num_output, cell_type="basic_rnn"):
 
 self.nsteps = time_steps
 self.future_steps = future_steps
 self.ninputs = ninputs
 self.ncells = ncells
 self.num_output = num_output
 
 #### The input is of shape (nbatches, time_steps, ninputs)
 #### ninputs is the dimentionality (number of features) of the time series
 self.X = tf.placeholder(dtype=tf.float32, shape=(None, time_steps, ninputs))
 self.Y = tf.placeholder(dtype=tf.float32, shape=(None, time_steps, ninputs))
 
 
 if cell_type=="basic_rnn":
 self.cell = tf.contrib.rnn.BasicRNNCell(num_units=ncells, activation=tf.nn.relu)
 
 elif cell_type=="lstm":
 self.cell = tf.contrib.rnn.BasicLSTMCell(num_units=ncells, activation=tf.nn.relu)
 
 elif cell_type=="GRU":
 self.cell = tf.contrib.rnn.GRUCell(num_units=ncells, activation=tf.nn.relu)
 
 else:
 print("Wrong rnn cell type: ", cell_type)
 assert(False)
 
 
 #### I now define the output
 self.RNNCell = tf.contrib.rnn.OutputProjectionWrapper(self.cell, output_size= num_output)
 
 
 
 
 
 self.sess = tf.Session()
 
 def set_cost_and_functions(self, LR=0.001):
 #### I define here the function that unrolls the RNN cell
 self.output, self.state = tf.nn.dynamic_rnn(self.RNNCell, self.X, dtype=tf.float32)
 #### I define the cost function as the square error
 self.cost = tf.reduce_mean(tf.losses.mean_squared_error(self.Y, self.output)) 
 
 #### the rest proceed as usual
 self.train = tf.train.AdamOptimizer(LR).minimize(self.cost)
 #### Variable initializer
 self.init = tf.global_variables_initializer()
 self.saver = tf.train.Saver()
 self.sess.run(self.init)
 
 
 
 def fit(self, minibatches, epochs, print_step):
 self.loss_list = []
 for iep in range(epochs):
 loss = 0
 for batch in range(len(minibatches)):
 #### Here I train the RNNcell
 #### The x is the time serie, the y is shifted by 1 time step
 train, target = minibatches[batch]
 self.sess.run(self.train, feed_dict={self.X:train, self.Y:target})
 
 
 loss += self.sess.run(self.cost, feed_dict={self.X:train, self.Y:target})
 
 self.loss_list.append(loss)
 
 print(loss)
 
 
 #early stopping
 if iep > 100 and self.loss_list(iep)-self.loss_list(iep-100) < 0.8:
 break
 
 if iep%print_step==0:
 print("Epoch number ",iep)
 print("Cost: ",loss)
 
 
 
 def save(self, filename="./rnn_model/rnn_basic"):
 self.saver.save(self.sess, filename)
 
 
 def load(self, filename="./rnn_model/rnn_basic"):
 self.saver.restore(self.sess, filename)
 
 
 def predict(self, x):
 return self.sess.run(self.output, feed_dict={self.X:x})
 
 

In [12]:
tf.reset_default_graph()

In [13]:
timesteps = 7
future_steps = 1
ninputs = 3
ncells = 30
num_output = 3

In [14]:
rnn = RNNPlacePrediction(time_steps=timesteps, future_steps=future_steps, ninputs=ninputs, 
 ncells=ncells, num_output=num_output, cell_type="lstm")

Instructions for updating:
Use the retry module or similar alternatives.


In [15]:
rnn.set_cost_and_functions()

In [16]:
rnn.fit(minibatches, epochs=5000, print_step=500)

108164.484375
Epoch number 0
Cost: 108164.484375
57559.07028198242
23417.070388793945
14891.905502319336
12499.229606628418
11122.449005126953
10109.593124389648
9254.187873840332
8533.7841796875
7911.04532623291
7397.553016662598
6970.429878234863
6615.027526855469
6295.746952056885
5980.429901123047
5706.560279846191
5445.698154449463
5201.659439086914
4964.256591796875
4748.1755447387695
4547.136264801025
4359.255859375
4191.035224914551
4038.443386077881
3900.314914703369
3775.984146118164
3661.9904403686523
3555.3094215393066
3457.7933769226074
3364.8507347106934
3273.5197563171387
3184.8794593811035
3099.1916484832764
3017.266839981079
2937.607749938965
2861.451858520508
2788.084276199341
2718.1153564453125
2651.056453704834
2588.111207962036
2528.6895790100098
2472.8057384490967
2419.3036403656006
2368.177661895752
2320.108238220215
2275.061798095703
2232.7341709136963
2192.880346298218
2155.6921787261963
2120.9273471832275
2088.330379486084
2057.153877258301
2027.961145401001


KeyboardInterrupt: 

In [None]:
rnn.save()

In [None]:
###rnn.load()###

In [None]:
###test_input.shape###

In [None]:
#test_pred = rnn.predict(test_input)

In [None]:
#print(test_pred[5,:,:]-test_target[5,:,:])

In [None]:
#rnn.sess.run(rnn.cost, feed_dict={rnn.X:test_input, rnn.Y:test_target})