In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import random
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops

  from ._conv import register_converters as _register_converters


In [2]:
#import data as array
# 8 hits with x,y,z

testset = pd.read_pickle('matched_8hittracks.pkl')

In [3]:
#Check testset with arbitrary particle

tset = np.array(testset)
tset = tset.astype('float32')
#print(tset.shape)
#for i in range(8):
    #print(tset[1,3*i:(3*i+3)])
#print(tset[0,:])

In [4]:
### Reshape original array into the shape (particlenumber, timesteps, input = coordinates)###

def reshapor(arr_orig):
    timesteps = int(arr_orig.shape[1]/3)
    number_examples = int(arr_orig.shape[0])
    arr = np.zeros((number_examples, timesteps, 3))
    
    for i in range(number_examples):
        for t in range(timesteps):
            arr[i,t,0:3] = arr_orig[i,3*t:3*t+3]
        
    return arr

In [5]:
### create the training set and the test set###

def create_random_sets(dataset, train_to_total_ratio):
    #shuffle the dataset
    num_examples = dataset.shape[0]
    p = np.random.permutation(num_examples)
    dataset = dataset[p,:]
    
    #evaluate siye of training and test set and initialize them
    train_set_size = np.int(num_examples*train_to_total_ratio)
    test_set_size = num_examples - train_set_size
    
    train_set = np.zeros((train_set_size, dataset.shape[1]))
    test_set = np.zeros((test_set_size, dataset.shape[1]))
   

    #fill train and test sets
    for i in range(num_examples):
        if train_set_size > i:
            train_set[i,:] += dataset[i,:]
        else:
            test_set[i - train_set_size,:]  += dataset[i,:]
            
        
    train_set = reshapor(train_set)
    test_set = reshapor(test_set)
    
    return train_set, test_set
        

In [6]:
train_set, test_set = create_random_sets(tset, 0.99)
#print(test_set.shape, train_set.shape, reshapor(tset).shape)
#print(test_set[0,:,:])

In [7]:
### create target array of shape (num_examples, 4 timesteps, 3 = n_inputs), inputt array of shape (num_examples, 4 timesteps, 12 = n_inputs)###

def target_and_input(data_set):
    
    num_ex = data_set.shape[0]
    inputt = np.zeros((num_ex, 4, 12))
    target = np.zeros((num_ex, 4, 3))
    
    
    for i in range(4):
        target[:,i,:] = data_set[:,4+i,:]
        for f in range(4):
            inputt[:,i,3*f:3*f+3] = data_set[:,i+f,:]
    
        
    
    
    return inputt, target
    

In [8]:
inputt_train, target_train = target_and_input(train_set)
inputt_test, target_test = target_and_input(test_set)
#print(inputt_train[0,:,:])
#print(target_train[0,:,:])

In [9]:
###create random mini_batches###


def unison_shuffled_copies(a, b):
    assert a.shape[0] == b.shape[0]
    p = np.random.permutation(a.shape[0])
    return a[p,:,:], b[p,:,:]

def random_mini_batches(inputt, target, minibatch_size = 500):
    
    num_examples = inputt.shape[0]
    
    
    #Number of complete batches
    
    number_of_batches = int(num_examples/minibatch_size)
    minibatches = []
   
    #shuffle particles
    _i, _t = unison_shuffled_copies(inputt, target)
    #print(_t.shape)
        
    
    for i in range(number_of_batches):
        
        minibatch_train = _i[minibatch_size*i:minibatch_size*(i+1), :, :]
        
        minibatch_true = _t[minibatch_size*i:minibatch_size*(i+1), :, :]
        
        minibatches.append((minibatch_train, minibatch_true))
        
        
    minibatches.append((_i[number_of_batches*minibatch_size:, :, :], _t[number_of_batches*minibatch_size:, :, :]))
    
    
    return minibatches
        

In [10]:
#Create random minibatches of train and test set with input and target array


minibatches = random_mini_batches(train_set[:,:-1,:], train_set[:,1:,:])
#_train, _target = minibatches[0]
test_input, test_target = test_set[:,:-1,:], test_set[:,1:,:]
#print(train[0,:,:], target[0,:,:])

In [11]:
#minibatches = random_mini_batches(inputt_train, target_train)


#_inputt, _target = minibatches[int(inputt_train.shape[0]/500)]

#print(len(minibatches))



In [67]:
class RNNPlacePrediction():
    
    
    def __init__(self, time_steps, future_steps, ninputs, ncells, num_output, cell_type="basic_rnn"):
        
        self.nsteps = time_steps
        self.future_steps = future_steps
        self.ninputs = ninputs
        self.ncells = ncells
        self.num_output = num_output
        
        self._ = cell_type
        
        #### The input is of shape (num_examples, time_steps, ninputs)
        #### ninputs is the dimentionality (number of features) of the time series (here coordinates)
        self.X = tf.placeholder(dtype=tf.float32, shape=(None, time_steps, ninputs))
        self.Y = tf.placeholder(dtype=tf.float32, shape=(None, time_steps, ninputs))
        
        
        if cell_type=="basic_rnn":
            self.cell_type = tf.contrib.rnn.BasicRNNCell
            
        elif cell_type=="lstm":
            self.cell_type = tf.contrib.rnn.BasicLSTMCell
                    
        elif cell_type=="GRU":
            self.cell_type = tf.contrib.rnn.GRUCell
            
        else:  # JONAS
            raise ValueError("Wrong rnn cell type: {}".format(cell_type))
            
        
        #Check Input of ncells
        
        if (type(self.ncells) == int):
            self.ncells = [self.ncells]
        
        if (type(self.ncells) != list):
            raise ValueError("Wrong type of Input for ncells")
        
        for _ in range(len(self.ncells)):
            if type(self.ncells[_]) != int:
                raise ValueError("Wrong type of Input for ncells")
                
        
        self.cell = tf.contrib.rnn.MultiRNNCell([self.cell_type(num_units=self.ncells[layer], activation=tf.nn.relu)
                                                 for layer in range(len(self.ncells))])
            
        
        #### I now define the output
        self.RNNCell = tf.contrib.rnn.OutputProjectionWrapper(self.cell, output_size= num_output)
        
        
        
        
        
        self.sess = tf.Session()
        
    def set_cost_and_functions(self, LR=0.001):
        #### I define here the function that unrolls the RNN cell
        self.output, self.state = tf.nn.dynamic_rnn(self.RNNCell, self.X, dtype=tf.float32)
        #### I define the cost function as the mean_squared_error (distance of predicted point to target)
        self.cost = tf.reduce_mean(tf.losses.mean_squared_error(self.Y, self.output))   
        
        #### the rest proceed as usual
        self.train = tf.train.AdamOptimizer(LR).minimize(self.cost)
        #### Variable initializer
        self.init = tf.global_variables_initializer()
        self.saver = tf.train.Saver()
        self.sess.run(self.init)
        
        
        
    def fit(self, minibatches, epochs, print_step):
        self.loss_list = []
        for iep in range(epochs):
            loss = 0
            
            #Here I iterate through the batches
            for batch in range(len(minibatches)):
            #### Here I train the RNNcell
            #### The X is the time serie, the Z is shifted by 1 time step
                train, target = minibatches[batch]
                self.sess.run(self.train, feed_dict={self.X:train, self.Y:target})
                
            
                loss += self.sess.run(self.cost, feed_dict={self.X:train, self.Y:target})
            
            self.loss_list.append(loss)
            
            print(loss)
            
            
            #early stopping
            if iep > 100 and abs(self.loss_list[iep]-self.loss_list[iep-100]) < 0.5:
                print("Early stopping at epoch ", iep, ", difference: ", abs(self.loss_list[iep]-self.loss_list[iep-100]))
                break
            
            if iep%print_step==0:
                print("Epoch number ",iep)
                print("Cost: ",loss)
            
                
                
    def save(self, filename="./rnn_model/rnn_basic"):
        self.saver.save(self.sess, filename)
            
            
    def load(self, filename="./rnn_model/rnn_basic"):
        self.saver.restore(self.sess, filename)
        
        
    def predict(self, x):
        return self.sess.run(self.output, feed_dict={self.X:x})
    
    

In [83]:
timesteps = 7
future_steps = 1

ninputs = 3

#ncells as int or list of int
ncells = [30, 30, 30]

num_output = 3

In [84]:
tf.reset_default_graph()
rnn = RNNPlacePrediction(time_steps=timesteps, future_steps=future_steps, ninputs=ninputs, 
                        ncells=ncells, num_output=num_output, cell_type="lstm")

In [85]:
rnn.set_cost_and_functions()

In [86]:
rnn.fit(minibatches, epochs=10, print_step=500)

106114.12908935547
Epoch number  0
Cost:  106114.12908935547
37096.763900756836
15050.049377441406
10696.98137664795
8839.393844604492
7496.866744995117
6476.7746925354
5627.203907012939
4952.669666290283
4406.38809967041


In [87]:
folder = "./trained_models/rnn_model_" + str(rnn._) + "_"  + str(len(rnn.ncells)) + "l_" + str(ncells) + "c/rnn_basic"
rnn.save(folder)

In [None]:
###rnn.load(folder)###

In [None]:
###test_input.shape###

In [None]:
#Here I predict based on my test set

#test_pred = rnn.predict(test_input)

In [None]:
#Here i subtract a prediction (random particle) from the target to get an idea of the predictions

#print(test_pred[5,:,:]-test_target[5,:,:])

In [None]:
#Here I evaluate my model on the test set based on mean_squared_error

#rnn.sess.run(rnn.cost, feed_dict={rnn.X:test_input, rnn.Y:test_target})

1
