Newer
Older
rnn_bachelor_thesis / keras_model_classifier.py
import pandas as pd
import numpy as np
import matplotlib as mpl
import random
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
from sklearn import preprocessing
import pickle as pkl
from pathlib import Path
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dropout
#from keras.layers.convolutional import Conv1D
#from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.callbacks import History
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

#import seaborn as sns

### Reshape original array into the shape (particlenumber, timesteps, input = coordinates)###

def reshapor(arr_orig, num_inputs=3):
    timesteps = int(arr_orig.shape[1]/num_inputs)
    number_examples = int(arr_orig.shape[0])
    arr = np.zeros((number_examples, timesteps, num_inputs))
    
    for i in range(number_examples):
        for t in range(timesteps):
            arr[i,t,:] = arr_orig[i,num_inputs*t:num_inputs*t+num_inputs]
        
    return arr

def reshapor_inv(array_shaped, num_inputs=3):
    timesteps = int(array_shaped.shape[1])
    num_examples = int(array_shaped.shape[0])
    arr = np.zeros((num_examples, timesteps*num_inputs))
    
    for i in range(num_examples):
        for t in range(timesteps):
            arr[i,num_inputs*t:num_inputs*t+num_inputs] = array_shaped[i,t,:]
        
    return arr

timesteps = 7
future_steps = 1

ninputs = 3

#ncells as int or list of int
ncells = [150, 150, 150]
activation = "leaky_relu"
cell_type = "lstm"

num_output = 3
	
scalor = pd.read_pickle('scalor.pkl')
	
tset_matched = pd.read_pickle('matched_and_unmatched_8hittracks.pkl')


tset_matched = np.array(tset_matched)
tset_matched = tset_matched.astype('float32')
truth = tset_matched[:,-1]
#tset_matched = scaler(reshapor(tset_matched[:,:-1]), scalerfunc = func, scalor= scalor)


#tset_matched = reshapor_inv(tset_matched)


def tracks_to_particle(tset_matched, truth):
    start = 0
    start_points = [0]
    converse = False
    
    if len(tset_matched.shape) == 3:
        tset_matched = reshapor_inv(tset_matched)
        converse = True
    
    for track in range(tset_matched.shape[0]-1):
    
        for coord in range(12):
        
            if tset_matched[track, coord] != tset_matched[track+1, coord]:
                start = track + 1
    
        if start != start_points[-1]:
            start_points.append(start)

    num_part = len(start_points)

    particle_tracks = []
    track_truth = []

    if converse:
        tset_matched = reshapor(tset_matched)
    
    for particle in range(num_part-1):
        particle_tracks.append(reshapor(tset_matched[start_points[particle]:start_points[particle+1]]))
        track_truth.append(truth[start_points[particle]:start_points[particle+1]])
        
    
    return particle_tracks, track_truth

particle_tracks, track_truth = tracks_to_particle(tset_matched= tset_matched, truth= truth)

num_particles = len(track_truth)

particle_start_array = np.zeros((num_particles,4,3))

for particle in range(num_particles):
    particle_start_array[particle,:,:] += particle_tracks[particle][0][:4,:]

print(particle_start_array[11,:,:])

track_exist_truth = np.zeros((num_particles))

for particle in range(num_particles):
    correct = False
    num_tracks = len(track_truth[particle])
    
    for track in range(num_tracks):
        if track_truth[particle][track] == 1:
            correct = True
    
    if correct:
        track_exist_truth[particle] += 1

#Input: a = 3d array, b = 1d array

def unison_shuffled_copies2(a, b):
    assert a.shape[0] == b.shape[0]
    p = np.random.permutation(a.shape[0])
    return a[p,:,:], b[p]

def create_random_sets2(particle_start_array= particle_start_array, track_exist_truth= track_exist_truth, train_to_total_ratio= 0.9):
    #shuffle the dataset
    num_examples = particle_start_array.shape[0]
    particle_start_array, track_exist_truth = unison_shuffled_copies2(particle_start_array, track_exist_truth)
    
    #evaluate siye of training and test set and initialize them
    train_set_size = np.int(num_examples*train_to_total_ratio)
    test_set_size = num_examples - train_set_size
    
    train_part_start = np.zeros((train_set_size, particle_start_array.shape[1], particle_start_array.shape[2]))
    train_track_e_tr = np.zeros((train_set_size))
    test_part_start = np.zeros((test_set_size, particle_start_array.shape[1], particle_start_array.shape[2]))
    test_track_e_tr = np.zeros((test_set_size))
   

    #fill train and test sets
    for i in range(num_examples):
        if train_set_size > i:
            train_part_start[i,:,:] += particle_start_array[i,:,:]
            train_track_e_tr[i] +=  track_exist_truth[i]
        else:
            test_part_start[i - train_set_size,:,:]  += particle_start_array[i,:,:]
            test_track_e_tr[i - train_set_size] += track_exist_truth[i]
                
    return train_part_start, train_track_e_tr, test_part_start, test_track_e_tr

X_train, Y_train, X_test, Y_test = create_random_sets2()

# truncate and pad input sequences
max_review_length = 4
filepath = "./keras_model_classifier.h5"

callbacks = [
    EarlyStopping(monitor='val_loss', patience=30, min_delta=0),
    ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True),
    History()
]

#
# create the model
model = Sequential()
#model.add(Dense(12, input_shape=(4,3)))
model.add(LSTM(40, return_sequences=True, input_shape=(4,3), activation = 'relu'))
model.add(Dropout(0.5))
model.add(LSTM(40, return_sequences=True, activation = 'relu'))    
model.add(Dropout(0.5))
model.add(LSTM(40, return_sequences=True, activation = 'relu'))    
model.add(Dropout(0.5))
model.add(LSTM(4, activation = 'relu'))    
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=500, batch_size=50, callbacks= callbacks, verbose = 2)
model = load_model(filepath)
# Final evaluation of the model
scores = model.evaluate(X_test, Y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

model.save(filepath)

#keras.models.load_model(filepath)