diff --git a/1_to_1_lstm_30_multilayer.ipynb b/1_to_1_lstm_30_multilayer.ipynb deleted file mode 100644 index 20e1bd2..0000000 --- a/1_to_1_lstm_30_multilayer.ipynb +++ /dev/null @@ -1,551 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\users\\sascha\\anaconda3\\envs\\rnn-tf-ker\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", - " from ._conv import register_converters as _register_converters\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib as mpl\n", - "import random\n", - "import math\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import tensorflow as tf\n", - "from tensorflow.python.framework import ops" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "#import data as array\n", - "# 8 hits with x,y,z\n", - "\n", - "testset = pd.read_pickle('matched_8hittracks.pkl')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "#Check testset with arbitrary particle\n", - "\n", - "tset = np.array(testset)\n", - "tset = tset.astype('float32')\n", - "#print(tset.shape)\n", - "#for i in range(8):\n", - " #print(tset[1,3*i:(3*i+3)])\n", - "#print(tset[0,:])" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "### Reshape original array into the shape (particlenumber, timesteps, input = coordinates)###\n", - "\n", - "def reshapor(arr_orig):\n", - " timesteps = int(arr_orig.shape[1]/3)\n", - " number_examples = int(arr_orig.shape[0])\n", - " arr = np.zeros((number_examples, timesteps, 3))\n", - " \n", - " for i in range(number_examples):\n", - " for t in range(timesteps):\n", - " arr[i,t,0:3] = arr_orig[i,3*t:3*t+3]\n", - " \n", - " return arr" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "### create the training set and the test set###\n", - "\n", - "def create_random_sets(dataset, train_to_total_ratio):\n", - " #shuffle the dataset\n", - " num_examples = dataset.shape[0]\n", - " p = np.random.permutation(num_examples)\n", - " dataset = dataset[p,:]\n", - " \n", - " #evaluate siye of training and test set and initialize them\n", - " train_set_size = np.int(num_examples*train_to_total_ratio)\n", - " test_set_size = num_examples - train_set_size\n", - " \n", - " train_set = np.zeros((train_set_size, dataset.shape[1]))\n", - " test_set = np.zeros((test_set_size, dataset.shape[1]))\n", - " \n", - "\n", - " #fill train and test sets\n", - " for i in range(num_examples):\n", - " if train_set_size > i:\n", - " train_set[i,:] += dataset[i,:]\n", - " else:\n", - " test_set[i - train_set_size,:] += dataset[i,:]\n", - " \n", - " \n", - " train_set = reshapor(train_set)\n", - " test_set = reshapor(test_set)\n", - " \n", - " return train_set, test_set\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "train_set, test_set = create_random_sets(tset, 0.99)\n", - "#print(test_set.shape, train_set.shape, reshapor(tset).shape)\n", - "#print(test_set[0,:,:])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "### create target array of shape (num_examples, 4 timesteps, 3 = n_inputs), inputt array of shape (num_examples, 4 timesteps, 12 = n_inputs)###\n", - "\n", - "def target_and_input(data_set):\n", - " \n", - " num_ex = data_set.shape[0]\n", - " inputt = np.zeros((num_ex, 4, 12))\n", - " target = np.zeros((num_ex, 4, 3))\n", - " \n", - " \n", - " for i in range(4):\n", - " target[:,i,:] = data_set[:,4+i,:]\n", - " for f in range(4):\n", - " inputt[:,i,3*f:3*f+3] = data_set[:,i+f,:]\n", - " \n", - " \n", - " \n", - " \n", - " return inputt, target\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "inputt_train, target_train = target_and_input(train_set)\n", - "inputt_test, target_test = target_and_input(test_set)\n", - "#print(inputt_train[0,:,:])\n", - "#print(target_train[0,:,:])" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "###create random mini_batches###\n", - "\n", - "\n", - "def unison_shuffled_copies(a, b):\n", - " assert a.shape[0] == b.shape[0]\n", - " p = np.random.permutation(a.shape[0])\n", - " return a[p,:,:], b[p,:,:]\n", - "\n", - "def random_mini_batches(inputt, target, minibatch_size = 500):\n", - " \n", - " num_examples = inputt.shape[0]\n", - " \n", - " \n", - " #Number of complete batches\n", - " \n", - " number_of_batches = int(num_examples/minibatch_size)\n", - " minibatches = []\n", - " \n", - " #shuffle particles\n", - " _i, _t = unison_shuffled_copies(inputt, target)\n", - " #print(_t.shape)\n", - " \n", - " \n", - " for i in range(number_of_batches):\n", - " \n", - " minibatch_train = _i[minibatch_size*i:minibatch_size*(i+1), :, :]\n", - " \n", - " minibatch_true = _t[minibatch_size*i:minibatch_size*(i+1), :, :]\n", - " \n", - " minibatches.append((minibatch_train, minibatch_true))\n", - " \n", - " \n", - " minibatches.append((_i[number_of_batches*minibatch_size:, :, :], _t[number_of_batches*minibatch_size:, :, :]))\n", - " \n", - " \n", - " return minibatches\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "#Create random minibatches of train and test set with input and target array\n", - "\n", - "\n", - "minibatches = random_mini_batches(train_set[:,:-1,:], train_set[:,1:,:])\n", - "#_train, _target = minibatches[0]\n", - "test_input, test_target = test_set[:,:-1,:], test_set[:,1:,:]\n", - "#print(train[0,:,:], target[0,:,:])" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "#minibatches = random_mini_batches(inputt_train, target_train)\n", - "\n", - "\n", - "#_inputt, _target = minibatches[int(inputt_train.shape[0]/500)]\n", - "\n", - "#print(len(minibatches))\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "class RNNPlacePrediction():\n", - " \n", - " \n", - " def __init__(self, time_steps, future_steps, ninputs, ncells, num_output, cell_type=\"basic_rnn\",\n", - " n_layers=1):\n", - " \n", - " self.nsteps = time_steps\n", - " self.future_steps = future_steps\n", - " self.ninputs = ninputs\n", - " self.ncells = ncells\n", - " self.num_output = num_output\n", - " self.n_layers = n_layers\n", - " \n", - " self._ = cell_type\n", - " \n", - " #### The input is of shape (num_examples, time_steps, ninputs)\n", - " #### ninputs is the dimentionality (number of features) of the time series (here coordinates)\n", - " self.X = tf.placeholder(dtype=tf.float32, shape=(None, time_steps, ninputs))\n", - " self.Y = tf.placeholder(dtype=tf.float32, shape=(None, time_steps, ninputs))\n", - " \n", - " \n", - " if cell_type==\"basic_rnn\":\n", - " self.cell_type = tf.contrib.rnn.BasicRNNCell\n", - " \n", - " elif cell_type==\"lstm\":\n", - " self.cell_type = tf.contrib.rnn.BasicLSTMCell\n", - " \n", - " elif cell_type==\"GRU\":\n", - " self.cell_type = tf.contrib.rnn.GRUCell\n", - " \n", - " else: # JONAS\n", - " raise ValueError(\"Wrong rnn cell type: {}\".format(cell_type))\n", - " \n", - " \n", - " assert(len(self.ncells) == self.n_layers), \"Number of number of cells vector and number of layers have different dimension\"\n", - " self.cell = tf.contrib.rnn.MultiRNNCell([self.cell_type(num_units=self.ncells[layer], activation=tf.nn.relu)\n", - " for layer in range(len(self.ncells))])\n", - " \n", - " \n", - " #### I now define the output\n", - " self.RNNCell = tf.contrib.rnn.OutputProjectionWrapper(self.cell, output_size= num_output)\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " self.sess = tf.Session()\n", - " \n", - " def set_cost_and_functions(self, LR=0.001):\n", - " #### I define here the function that unrolls the RNN cell\n", - " self.output, self.state = tf.nn.dynamic_rnn(self.RNNCell, self.X, dtype=tf.float32)\n", - " #### I define the cost function as the mean_squared_error (distance of predicted point to target)\n", - " self.cost = tf.reduce_mean(tf.losses.mean_squared_error(self.Y, self.output)) \n", - " \n", - " #### the rest proceed as usual\n", - " self.train = tf.train.AdamOptimizer(LR).minimize(self.cost)\n", - " #### Variable initializer\n", - " self.init = tf.global_variables_initializer()\n", - " self.saver = tf.train.Saver()\n", - " self.sess.run(self.init)\n", - " \n", - " \n", - " \n", - " def fit(self, minibatches, epochs, print_step):\n", - " self.loss_list = []\n", - " for iep in range(epochs):\n", - " loss = 0\n", - " \n", - " #Here I iterate through the batches\n", - " for batch in range(len(minibatches)):\n", - " #### Here I train the RNNcell\n", - " #### The X is the time serie, the Z is shifted by 1 time step\n", - " train, target = minibatches[batch]\n", - " self.sess.run(self.train, feed_dict={self.X:train, self.Y:target})\n", - " \n", - " \n", - " loss += self.sess.run(self.cost, feed_dict={self.X:train, self.Y:target})\n", - " \n", - " self.loss_list.append(loss)\n", - " \n", - " print(loss)\n", - " \n", - " \n", - " #early stopping\n", - " if iep > 100 and abs(self.loss_list[iep]-self.loss_list[iep-100]) < 0.5:\n", - " print(\"Early stopping at epoch \", iep, \", difference: \", self.loss_list[iep]-self.loss_list[iep-100])\n", - " break\n", - " \n", - " if iep%print_step==0:\n", - " print(\"Epoch number \",iep)\n", - " print(\"Cost: \",loss)\n", - " \n", - " \n", - " \n", - " def save(self, filename=\"./rnn_model/rnn_basic\"):\n", - " self.saver.save(self.sess, filename)\n", - " \n", - " \n", - " def load(self, filename=\"./rnn_model/rnn_basic\"):\n", - " self.saver.restore(self.sess, filename)\n", - " \n", - " \n", - " def predict(self, x):\n", - " return self.sess.run(self.output, feed_dict={self.X:x})\n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "timesteps = 7\n", - "future_steps = 1\n", - "\n", - "ninputs = 3\n", - "\n", - "ncells = [30, 20, 5]\n", - "n_layers = 3\n", - "num_output = 3" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From c:\\users\\sascha\\anaconda3\\envs\\rnn-tf-ker\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\datasets\\base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "Use the retry module or similar alternatives.\n" - ] - } - ], - "source": [ - "tf.reset_default_graph()\n", - "rnn = RNNPlacePrediction(time_steps=timesteps, future_steps=future_steps, ninputs=ninputs, \n", - " ncells=ncells, num_output=num_output, cell_type=\"lstm\", n_layers=n_layers)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "rnn.set_cost_and_functions()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "rnn.fit(minibatches, epochs=10, print_step=500)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "folder = \"./trained_models/rnn_model_\" + str(rnn._) + \"_\" + str(n_layers) + \"l_\" + str(ncells) + \"c/rnn_basic\"\n", - "rnn.save(folder)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###rnn.load(folder)###" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "###test_input.shape###" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Here I predict based on my test set\n", - "\n", - "#test_pred = rnn.predict(test_input)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Here i subtract a prediction (random particle) from the target to get an idea of the predictions\n", - "\n", - "#print(test_pred[5,:,:]-test_target[5,:,:])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Here I evaluate my model on the test set based on mean_squared_error\n", - "\n", - "#rnn.sess.run(rnn.cost, feed_dict={rnn.X:test_input, rnn.Y:test_target})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}