diff --git a/1_to_1_multi_compact.ipynb b/1_to_1_multi_compact.ipynb new file mode 100644 index 0000000..151a3d6 --- /dev/null +++ b/1_to_1_multi_compact.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\users\\sa_li\\anaconda3\\envs\\rnn-tf-ker\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", + " from ._conv import register_converters as _register_converters\n" + ] + } + ], + "source": [ + "#Here i do all the preprocessing of my data and define my functions and the RNNPlacePrediction class\n", + "\n", + "exec(open(\"requiremements.py\").read())" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "timesteps = 7\n", + "future_steps = 1\n", + "ninputs = 3\n", + "num_output = 3\n", + "\n", + "#ncells as int or list of int\n", + "ncells = [50, 40, 30, 20, 10]\n", + "\n", + "cell_type = \"lstm\"\n", + "activation = \"leaky_relu\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "rnn = RNNPlacePrediction(time_steps=timesteps, future_steps=future_steps, ninputs=ninputs, \n", + " ncells=ncells, num_output=num_output, cell_type=cell_type, activation=activation)\n", + "rnn.set_cost_and_functions()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch number 0\n", + "Cost: 373770.60410824226 e-6\n", + "Patience: 0 / 200\n", + "Last checkpoint at: Epoch 0 \n", + "\n", + "\n", + "\n", + "Model saved in at: ./rnn_model_lstm_leaky_relu_[50,40,30,20,10]c/rnn_basic\n", + "Model saved at: ./rnn_model_lstm_leaky_relu_[50,40,30,20,10]c/rnn_basic\n", + "Remaining data saved as: rnn_model_lstm_leaky_relu_[50,40,30,20,10]c.pkl\n" + ] + } + ], + "source": [ + "rnn.fit(minibatches, epochs = 5, print_step=5)\n", + "full_save(rnn)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "#plot_loss_list(loss_list = rnn.loss_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#folder = get_rnn_folder(ncells = ncells, cell_type = \"lstm\", activation = \"leaky_relu\")\n", + "#rnn, data = full_load(folder)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0.24988278 -16.92240567 -11.00905584]\n", + " [ -0.95585176 -19.82122722 -9.62447234]\n", + " [ 2.90237107 -13.03493918 -11.99622082]\n", + " [ -2.20826846 4.92884641 12.53874474]\n", + " [-20.9477203 13.2462497 -1.09616262]\n", + " [-31.69245226 0.34849761 4.28013375]\n", + " [ 0.24281463 5.55824599 3.57549133]]\n", + "Loss on test set: 0.17207867\n" + ] + } + ], + "source": [ + "test_pred, test_loss = rnn_test(rnn = rnn)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/1_to_1_multi_layer.ipynb b/1_to_1_multi_layer.ipynb index cd2c117..cffcf09 100644 --- a/1_to_1_multi_layer.ipynb +++ b/1_to_1_multi_layer.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\users\\sa_li\\anaconda3\\envs\\rnn-tf-ker\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", + " from ._conv import register_converters as _register_converters\n" + ] + } + ], "source": [ "import pandas as pd\n", "import numpy as np\n", @@ -24,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -44,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -60,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -91,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -124,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -136,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -203,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -270,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -284,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -320,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -341,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -386,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -401,7 +410,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -437,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -618,11 +627,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 66, "metadata": {}, "outputs": [], "source": [ - "def full_save(rnn):\n", + "#saves the rnn model and all its parameters including the scaler used\n", + "#optional also saves the minibatches used to train and the test set\n", + "\n", + "def full_save(rnn, train= True, test= True):\n", " folder = \"./rnn_model_\" + str(rnn._)+ \"_\" + rnn.__ + \"_\" + str(rnn.ncells).replace(\" \",\"\") + \"c/rnn_basic\"\n", " rnn.save(folder)\n", " pkl_name = folder[2:-10] + \".pkl\"\n", @@ -637,12 +649,27 @@ " \"activation\": rnn.__, #Activation\n", " \"loss_list\": rnn.loss_list,\n", " \"scalor\": rnn.scalor}\n", + " \n", + " if train == True:\n", + " pkl_dic[\"minibatches\"] = minibatches\n", + " \n", + " if test == True:\n", + " pkl_dic[\"test_input\"] = test_input\n", + " pkl_dic[\"test_target\"] = test_target\n", + " \n", " pkl.dump( pkl_dic, open(pkl_name , \"wb\" ) )\n", + " \n", + " print(\"Model saved at: \", folder)\n", + " print(\"Remaining data saved as: {}\".format(pkl_name))\n", "\n", "\n", "\n", - "def full_load(folder):\n", - " #Directory of okl file\n", + "#loads the rnn model with all its parameters including the scaler used\n", + "#Checks if the pkl data also contains the training or test sets an return them accordingly\n", + "def full_load(folder): \n", + " #returns state of rnn with all information and returns the train and test set used\n", + " \n", + " #Directory of pkl file\n", " pkl_name = folder[2:-10] + \".pkl\"\n", " \n", " #Check if pkl file exists\n", @@ -659,7 +686,20 @@ " num_output = pkl_dic[\"num_output\"]\n", " cell_type = pkl_dic[\"cell_type\"]\n", " activation = pkl_dic[\"activation\"]\n", - "\n", + " \n", + " #Check if test or trainng set in dictionary\n", + " batch = False\n", + " test = False\n", + " if \"minibatches\" in pkl_dic:\n", + " batch = True\n", + " minibatches = pkl_dic[\"minibatches\"]\n", + " if \"test_input\" in pkl_dic:\n", + " test = True\n", + " test_input = [\"test_input\"]\n", + " test_target = [\"test_target\"]\n", + " \n", + " #loads and initializes a new model with the exact same properties\n", + " \n", " tf.reset_default_graph()\n", " rnn = RNNPlacePrediction(time_steps=timesteps, future_steps=future_steps, ninputs=ninputs, \n", " ncells=ncells, num_output=num_output, cell_type=cell_type, activation=activation, scalor=scalor)\n", @@ -670,8 +710,31 @@ " \n", " rnn.loss_list = pkl_dic[\"loss_list\"]\n", " \n", - " return rnn\n", - "\n", + " print(\"Model succesfully loaded\")\n", + " \n", + " if batch and test:\n", + " data = [minibatches, test_input, test_target]\n", + " print(\"Minibatches (=training data) and test_input and test_target in data loaded\")\n", + " return rnn, data\n", + " \n", + " elif batch:\n", + " data = [minibatches]\n", + " print(\"Minibatches (=training data) loaded in data\")\n", + " return rnn, data\n", + " \n", + " elif test:\n", + " data = [test_input, test_target]\n", + " print(\"test_input and test_target loaded in data\")\n", + " return rnn, data\n", + " \n", + " else:\n", + " data = []\n", + " print(\"Only Model restored, no trainig or test data found in {}\".format(pkl_name))\n", + " print(\"Returned data is empty!\")\n", + " return rnn, data\n", + " \n", + " \n", + "#returns the folder name used by full_save and full_load for a given architecture\n", "def get_rnn_folder(ncells, cell_type, activation):\n", " folder = \"./rnn_model_\" + cell_type + \"_\" + activation + \"_\" + str(ncells).replace(\" \",\"\") + \"c/rnn_basic\"\n", " return folder" @@ -679,7 +742,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -696,7 +759,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -707,7 +770,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -723,11 +786,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch number 5\n", + "Cost: 138837.22811303238 e-6\n", + "Patience: 0 / 200\n", + "Last checkpoint at: Epoch 5 \n", + "\n", + "\n", + "\n", + "Model saved in at: ./rnn_model_lstm_leaky_relu_[50,40,30,20,10]c/rnn_basic\n", + "Model saved at ./rnn_model_lstm_leaky_relu_[50,40,30,20,10]c/rnn_basic\n", + "Remaining data saved as rnn_model_lstm_leaky_relu_[50,40,30,20,10]c.pkl\n" + ] + } + ], "source": [ "rnn.fit(minibatches, epochs = 5, print_step=5)\n", "full_save(rnn)" @@ -735,11 +815,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xd8lfX5//HXlR3ChrBHEkSmDAl7uapYLWhFReuoojir1Wqrdvy+tbZWbR1VVKgTd8FRat2iUgUCQRmyJIQVhoSwZwhcvz9y1CMGEiCHOznn/Xw8zoNzr3OuHCDvc3/u+75uc3dEREQOJi7oAkREpOpTWIiISLkUFiIiUi6FhYiIlEthISIi5VJYiIhIuRQWIiJSLoWFiIiUS2EhIiLlSgi6gMrSsGFDz8jICLoMEZFqZebMmevdPb289aImLDIyMsjNzQ26DBGRasXMlldkPQ1DiYhIuRQWIiJSLoWFiIiUS2EhIiLlUliIiEi5FBYiIlIuhYWIiJQr5sNi1569/PXthazcsCPoUkREqqyYD4ui7cU8P205v3l1Dvv26X7kIiJlifmwaF43ld+e0YEpS4p4IadCFzKKiMScmA8LgBE9WzKwbUPufnshK4o0HCUisj+FBWBm3HNOF+LNuHXCbA1HiYjsR2ER0qxuKr8/syM5SzcwbuqyoMsREalSFBZhzs1uwQnt0rnnnUUsW7896HJERKoMhUUYM+Punx5HQryGo0REwiks9tO0Tir/7yedmLFsI09PWRZ0OSIiVYLCogznHN+ck9o34r53F5JfuC3ockREAqewKMM3w1FJ8XHcOmEOezUcJSIxTmFxAI1rp/B/Qzsxc/lGnv5sadDliIgESmFxEGd3b84pHRpz37uLyFun4SgRiV0Ki4MwM/7y086kJsVz64TZGo4SkZgV0bAwsyFmtsjM8szstjKWX21mc81slpl9amYdQ/MzzGxnaP4sM3s8knUeTKNaKfxxaCe+WLGJJ/6XH1QZIiKBilhYmFk8MBo4HegIXPBNGIR50d2Pc/duwL3A/WHLlrh7t9Dj6kjVWRFDuzbjtE6N+fv7X5G3bmuQpYiIBCKSexa9gDx3z3f3YuBlYFj4Cu6+JWwyDaiS4zxmxl1nHUdaUjy/Gj+Hkr37gi5JROSoimRYNAdWhk0XhOZ9j5ldZ2ZLKN2zuCFsUaaZfWFmn5jZwLLewMxGmVmumeUWFhZWZu0/kF4rmTuHdWb2yk2M1XCUiMSYSIaFlTHvB3sO7j7a3dsAvwF+F5q9Bmjl7t2Bm4EXzax2GduOdfdsd89OT0+vxNLLdmaXpvz4uCY8+P5iFq3VcJSIxI5IhkUB0DJsugWw+iDrvwycBeDuu929KPR8JrAEODZCdVaYmXHnsM7UTEnglvGz2aPhKBGJEZEMixlAWzPLNLMkYAQwMXwFM2sbNnkGsDg0Pz10gBwzywLaAlVi7KdhzWT+NKwzc1dtZswnS4IuR0TkqEiI1Au7e4mZXQ+8C8QDT7n7PDO7E8h194nA9WZ2CrAH2AhcGtp8EHCnmZUAe4Gr3X1DpGo9VGd0acrbXzbloQ8Xc0rHxrRv8oMRMhGRqGLuVfIEpEOWnZ3tubm5R+39Nmwv5tQHPqFx7RTeuK4/ifG6vlFEqh8zm+nu2eWtp99wh6l+WhJ3nXUc81Zv4dGPNBwlItFNYXEEhnRuwtCuzXh40mLmrd4cdDkiIhGjsDhCfxzaibo1krhl/ByKS3R2lIhEJ4XFEaqXlsRfzu7MgjVbGP1RXtDliIhEhMKiEpzaqQlnd2/O6I/y+HKVhqNEJPooLCrJ//tJR+qnJXHL+NkajhKRqKOwqCR1ayRx90+PY+HarTw8aXHQ5YiIVCqFRSU6uUNjzjm+BY9+vIQ5BZuCLkdEpNIoLCrZH37SkYY1S4ejdpfsDbocEZFKobCoZHVSE/nrOV346uttPPSBhqNEJDooLCLgxHaNOC+7BY9/soRZKzUcJSLVn8IiQn53Zkca107hlvGz2bVHw1EiUr0pLCKkdkrpcFTeum088MFXQZcjInJEFBYRNPjYdC7o1ZJ/Ts7n8xUbgy5HROSwKSwi7I4fd6BpnVQNR4lItaawiLBaKYncc04X8gu38/f3FgVdjojIYVFYHAUD2jbkwt6teOLTpcxcXmVu+CciUmEKi6Pkjh93oFmdVG4ZP4edxRqOEpHqRWFxlNRMTuC+4V1Yun47972r4SgRqV4UFkdRv2MacnGf1jw9ZSnTl2o4SkSqD4XFUXbb6e1pUS+VWyfMZkdxSdDliIhUiMLiKEtLTuDec7qyvGgH976j4SgRqR4iGhZmNsTMFplZnpndVsbyq81srpnNMrNPzaxj2LLbQ9stMrPTIlnn0da3TQN+3i+DZ6YsY1p+UdDliIiUK2JhYWbxwGjgdKAjcEF4GIS86O7HuXs34F7g/tC2HYERQCdgCPBo6PWixq+HtKN1gxrcOmE223drOEpEqrZI7ln0AvLcPd/di4GXgWHhK7j7lrDJNMBDz4cBL7v7bndfCuSFXi9q1EhK4L7hXSnYuJN73lkYdDkiIgcVybBoDqwMmy4IzfseM7vOzJZQumdxw6FsW931yqzPZf0yGTd1OVPy1gddjojIAUUyLKyMef6DGe6j3b0N8Bvgd4eyrZmNMrNcM8stLCw8omKDcutp7chsmMavX53DNg1HiUgVFcmwKABahk23AFYfZP2XgbMOZVt3H+vu2e6enZ6efoTlBiM1KZ77hndh1aad3P3WgqDLEREpUyTDYgbQ1swyzSyJ0gPWE8NXMLO2YZNnAN/ch3QiMMLMks0sE2gLTI9grYHKzqjPFQMyeSFnBS9NXxF0OSIiP5AQqRd29xIzux54F4gHnnL3eWZ2J5Dr7hOB683sFGAPsBG4NLTtPDP7FzAfKAGuc/eobqj0q1PbsXjdNm5/bS6bduzhmhPaBF2SiMi3zP0HhwKqpezsbM/NzQ26jCNSXLKPX42fzX9mr+aqwVncNqQ9ZmUdvhERqRxmNtPds8tbL2J7FnLokhLiePD8btRJTWDMJ/ls3rGHP599HPFxCgwRCZbCooqJjzP+NKwz9Wok8fCkPLbs2sMD53cjOSGqrkkUkWpGYVEFmRm/OrUddVITueu/C9i6K5fHL+pBWrL+ukQkGGokWIVdMTCLe4d34bO89Vz0ZA6bdhQHXZKIxCiFRRV3XnZLHv1ZD+at2sL5Y6bx9ZZdQZckIjFIYVENDOnchKcv60nBxh0Mf3wKy4u2B12SiMQYhUU10f+YhrxwZR+27iph+ONTWbh2S/kbiYhUEoVFNdKtZV3GX9WXeDPOe3wqM5fr1qwicnQoLKqZto1rMf7qvtRPS+KiJ6bzyVfVs4GiiFQvCotqqGX9Goy/uh+ZDdO44tkZvDnnYP0ZRUSOnMKimkqvlcxLo/rQrWVdfvHSF2pAKCIRpbCoxuqkJjLu8t4MPjad21+by2MfLwm6JBGJUgqLai41KZ6xF2cztGsz7nlnIXe/tYBoaQ4pIlWH+kdEge8aECYyZnI+m3bs4S8/VQNCEak8CosoERdn3DmsE3VrJH7bgPDBEWpAKCKVQ8NQUeSbBoS/O6MDb3+5liuezWW77ustIpVAYRGFrhiYxX2hBoQ/e0INCEXkyCksotS52S157KIezF+9hfPGTFUDQhE5IgqLKHZapyY8c1lPVm3cyTmPqQGhiBw+hUWU63dMQ168sg/bd5c2IFywRg0IReTQKSxiQNeWdflXqAHh+WPUgFBEDp3CIka0bVyLCdf0pUHNZH72RA4fL1oXdEkiUo0oLGJIi3o1+NdVfclqWJMrx+Xyn9lqQCgiFRPRsDCzIWa2yMzyzOy2MpbfbGbzzWyOmX1oZq3Dlu01s1mhx8RI1hlLwhsQ3vDyF7yYowaEIlK+iIWFmcUDo4HTgY7ABWbWcb/VvgCy3b0LMAG4N2zZTnfvFnoMjVSdseibBoQnHJvOHa/P5dGP89RPSkQOKpJ7Fr2APHfPd/di4GVgWPgK7v6Ru+8ITU4DWkSwHgmTmhTP2EuyGdatGfe+s4i7316owBCRA4pkb6jmwMqw6QKg90HWHwm8HTadYma5QAnwV3d/o/JLjG2J8XE8cF43aqckMnZyPpt2FPOXs48jIV6HskTk+yIZFmW1PC3zq6uZXQRkA4PDZrdy99VmlgVMMrO57r5kv+1GAaMAWrVqVTlVx5gfNCDcWcJDF6gBoYh8XyS/QhYALcOmWwA/OP3GzE4BfgsMdffd38x399WhP/OBj4Hu+2/r7mPdPdvds9PT0yu3+hjyTQPC35/ZkXfmreXSp6aTt25r0GWJSBUSybCYAbQ1s0wzSwJGAN87q8nMugNjKA2KdWHz65lZcuh5Q6A/MD+CtQowckAmfz+3K3MLNnPqA5O56ZVZLFuvFiEiEsFhKHcvMbPrgXeBeOApd59nZncCue4+EbgPqAmMNzOAFaEznzoAY8xsH6WB9ld3V1gcBef0aMGJ7Rsx5pMlPDt1GRNnr+ac45vzi5Pa0rJ+jaDLE5GAWLScAZOdne25ublBlxFV1m3dxWMfL+GFnBXs2+ec17Ml1594DM3qpgZdmohUEjOb6e7Z5a6nsJDyrNm8k9Ef5fHKjJUYxoW9W3HtCW1oVDsl6NJE5AgpLKTSFWzcwSOT8hg/s4CEOOOSvq25anAbGtZMDro0ETlMCguJmOVF23now8W88cUqUhLjubRfBqMGZlEvLSno0kTkECksJOLy1m3jHx8u5j9zVpOWlMDlAzIZOSCTOqmJQZcmIhWksJCjZtHarTzw/le8M28ttVMSGDUoi5/3z6RmciSv+RSRylDRsKjQdRZm9lxF5klsatekFo9f3IM3fzGAXpn1+dt7XzHwnkmM+WQJO4pLgi5PRCpBRS/K6xQ+Eeoo26Pyy5HqrHPzOjxxaU/euK4/x7Woy91vL2TQvR/z5KdL2bVnb9DlicgROGhYmNntZrYV6GJmW0KPrcA64N9HpUKpdrq1rMu4y3sx4eq+HNu4Jn96cz6D7/uI56YuY3eJQkOkOqrQMQszu9vdbz8K9Rw2HbOouqYuKeL+9xcxY9lGmtdN5fqTjmF4jxYkqrutSOAq9ZgF8KaZpYVe+CIzuz/8rnYiB9O3TQP+dVVfxl3ei4a1krn9tbmc/PdPmDCzgJK9+4IuT0QqoKJh8Riww8y6Ar8GlgPjIlaVRB0zY9Cx6bxxbT+evDSbWikJ3DJ+Nqc+MJl/z1rF3n3RcVaeSLSqaFiUeOl41TDgIXd/CKgVubIkWpkZJ3dozJu/GMDjFx1PYnwcN748i9Mfmszbc9ewT6EhUiVVNCy2mtntwMXAf0NnQ+nKKzlsZsaQzk15+8aBPHxBd/buc6554XPOfPhT3p//tW7xKlLFVDQszgd2A5e7+1pKb5l6X8SqkpgRF2f8pGsz3rtpMPef15XtxSVcOS6Xs0Z/xpQl64MuT0RCKnwFt5k1BnqGJqeH36yoKtDZUNFhz959vPZ5Af/4MI/Vm3fy2x93YOSATEL3OxGRSlbZV3CfB0wHzgXOA3LMbPiRlSjyQ4nxcZzfsxUf3DyYIZ2acNd/F3DH61+yR2dNiQSqos17fgv0/GZvwszSgQ+ACZEqTGJbalI8oy88nvveW8RjHy9h5YYdjP7Z8WpSKBKQih6ziNtv2KnoELYVOSxxccZvhrTn3uFdyFlaxE8f/YzlRbonuEgQKvoL/x0ze9fMfm5mPwf+C7wVubJEvnNedkueG9mb9duKOfvRKeQu2xB0SSIxp7zeUMeYWX93vxUYA3QBugJTgbFHoT4RAPpkNeD1a/tRJzWRC/+ZwxtfrAq6JJGYUt6exYPAVgB3f83db3b3myjdq3gw0sWJhMtKr8nr1/aje6u6/PKVWdz/3iJdjyFylJQXFhnuPmf/me6eC2REpCKRg6hbI4nnRvbm3B4t+MekPH7x0hdqfy5yFJR3NlTKQZalVmYhIhWVlBDHvcO7kJVek3veWciqTTsZe3E26bWSgy5NJGqVt2cxw8yu3H+mmY0EZpb34mY2xMwWmVmemd1WxvKbzWy+mc0xsw/DO9ma2aVmtjj0uLQiP4zEDjPjmhPa8NjPjmfBmi2cNfozFq3dGnRZIlHroFdwh67afh0o5rtwyAaSgLNDrT8OtG088BXwI6AAmAFc4O7zw9Y5Echx9x1mdg1wgrufb2b1gdzQe3novXu4+8YDvZ+u4I5dcwo2MfLZXHYW7+WRC7tzQrtGQZckUm1UyhXc7v61u/cD/ggsCz3+6O59DxYUIb2APHfPd/di4GVKu9aGv/5H7r4jNDkNaBF6fhrwvrtvCAXE+8CQ8n4YiU1dWtTl39f1p2X9Glz+zAzGTV0WdEkiUadC11mEfqk/HHpMquBrNwdWhk0XhOYdyEjg7UPZ1sxGmVmumeUWFhZWsCyJRs3qpjLh6r6c2K4Rf/j3PP5v4jzdI0OkEkXyKuyyOr+V+b/XzC6idMjpm062FdrW3ce6e7a7Z6enpx92oRId0pITGHtJNiMHZPLMlGVc8ewMtu0uCboskagQybAoAFqGTbcAVu+/kpmdQmnvqaHuvvtQthXZX3yc8fszO3LXWZ2ZvHg9wx+bwqpNO4MuS6Tai2RYzADamlmmmSUBI4CJ4SuYWXdKrwwful/vqXeBU82snpnVA04NzROpkIv6tOaZy3qyatNOhj3yGbNWbgq6JJFqLWJh4e4lwPWU/pJfAPzL3eeZ2Z1mNjS02n1ATWC8mc0ys4mhbTcAf6I0cGYAd4bmiVTYwLbpvHZNP1KT4jh/zFT+O2dN0CWJVFsVvvlRVadTZ+VAirbtZtRzM5m5fCO3ntaOa09oo5spiYRU6s2PRKqzBjWTeeGK3gzr1oz73l3ELePnsLtELUJEDkVFb34kUq2lJMbz4PndyGpYkwc++IqVG3Yw5uIe1EtLCro0kWpBexYSM8yMG09py0MjujGrYBNnP/oZSwq3BV2WSLWgsJCYM6xbc166sjdbd5Vw9ujPmLJkfdAliVR5CguJST1a1+eN6/rTuHYKlzw5nVdmrAi6JJEqTWEhMatl/Rq8em0/+rZpwG9encvdby1gn1qEiJRJYSExrXZKIk//vCcX9WnFmMn5XPPCTHYUq0WIyP4UFhLzEuLj+NOwzvzhzI68P/9rzhszla+37Aq6LJEqRWEhQumZUpcPyOSJS7NZWridYY98xperNgddlkiVobAQCXNS+8ZMuKYfcQbnPj6V9+d/HXRJIlWCwkJkPx2a1uaN6/pzbOOajHoul4c/XMyevfuCLkskUAoLkTI0qp3Cy6P68pMuzfj7+19x1mgNS0lsU1iIHEBqUjz/uKA7j190POu27mbY6M+4552F7NqjvlISexQWIuUY0rkpH9w0mHOOb85jHy/hxw/9j+lL1TFfYovCQqQC6tRI5N7hXXl+ZG+K9+7jvDFT+f0bX7J1156gSxM5KhQWIodgQNuGvHfTIC7vn8nzOcs57YHJfLRwXfkbilRzCguRQ1QjKYE//KQjr17Tj7TkBC57ZgY3vTKLDduLgy5NJGIUFiKH6fhW9XjzhgHccHJb/jN7NT+6/xP+M3s10XL3SZFwCguRI5CcEM/NPzqWN28YQIt6qfzipS+4ctxM1m5WuxCJLgoLkUrQvkltXru2P787owOf5hXyo/s/4cWcFepiK1FDYSFSSeLjjCsGZvHuLwfRuXkd7nh9Lhc+MY1l67cHXZrIEVNYiFSy1g3SePHK3vz1p8cxb9UWTntwMmMnL6FELUOkGotoWJjZEDNbZGZ5ZnZbGcsHmdnnZlZiZsP3W7bXzGaFHhMjWadIZTMzRvRqxfs3D2Zg23T+8tZCzn50CvNXbwm6NJHDErGwMLN4YDRwOtARuMDMOu632grg58CLZbzETnfvFnoMjVSdIpHUpE4K/7ykB49c2J3Vm3Yy9JFP+ft7i9hdopYhUr1Ecs+iF5Dn7vnuXgy8DAwLX8Hdl7n7HED75xK1zIwzuzTjg5sHM7RbMx6elMcZ//iUmcvVMkSqj0iGRXNgZdh0QWheRaWYWa6ZTTOzsyq3NJGjr15aEvef141nLuvJzuK9DH98Kv83cR7bd+s2rlL1RTIsrIx5h3IeYSt3zwYuBB40szY/eAOzUaFAyS0sLDzcOkWOqhPaNeLdmwZxSZ/WPDt1Gac+MJnJX+nfr1RtkQyLAqBl2HQLYHVFN3b31aE/84GPge5lrDPW3bPdPTs9Pf3IqhU5imomJ/DHYZ0Zf1VfkhPjuOSp6dwyfjabdqhliFRNkQyLGUBbM8s0syRgBFChs5rMrJ6ZJYeeNwT6A/MjVqlIQLIz6vPWDQO5/sRjeP2LVZxy/2Tenrsm6LJEfiBiYeHuJcD1wLvAAuBf7j7PzO40s6EAZtbTzAqAc4ExZjYvtHkHINfMZgMfAX91d4WFRKWUxHhuOa0dE6/vT5M6yVzzwudc9Vwu67aoZYhUHRYtTc+ys7M9Nzc36DJEjkjJ3n088elSHnj/K5IT4vjdGR05N7sFZmUdAhQ5cmY2M3R8+KB0BbdIFZIQH8fVg9vw9o0Dad+0Nr9+dQ4XPzmdFUU7gi5NYpzCQqQKykqvyctX9uGuszoza+UmTntwMn97dxGFW3cHXZrEKA1DiVRxqzft5K7/zuftL9eSGB/HOcc354qBWbRJrxl0aRIFKjoMpbAQqSbyC7fxxKdLmTCzgD1793FKh8ZcNSiL7Iz6QZcm1ZjCQiRKrd+2m3FTljFu2nI27djD8a3qMmpQFj/q2IT4OB0Il0OjsBCJcjuKS5gws4An/reUFRt2kNkwjZEDMhneowUpifFBlyfVhMJCJEbs3ee88+Vaxk5ewuyCzTRIS+KSvhlc3Lc19dOSgi5PqjiFhUiMcXemL93A2Mn5fLhwHSmJcZzboyVXDMykdYO0oMuTKqqiYZFwNIoRkcgzM3pnNaB3VgMWf72Vf/4vn1dmrOT5nOUM6dSEUYOy6N6qXtBlSjWlPQuRKLZuyy6enrKM56ctZ+uuEnpl1GfUoCxOat+IOB0MFzQMJSJhtu0u4ZUZK3nq06Ws2rSTNulpjBqUxbBuzXUwPMYpLETkB/bs3cdbc9cw5pN85q/ZQsOayVzWP4OLeremTo3EoMuTACgsROSA3J0pS4oYMzmfyV8VUiMpnvN7tmTkgExa1KsRdHlyFCksRKRCFqzZwj8n5zNx9moc+PFxTblqUBadm9cJujQ5ChQWInJIVm/aydOfLeWl6SvZtruEfm0aMGpQFoOPTVeL9CimsBCRw7Jl1x5eylnBU58t5estu2nfpBZXDsziJ12bkZSgRtXRRmEhIkekuGQfE2ev5p+T81n09Vaa1E7hsv4ZjOjZSgfDo4jCQkQqhbvzyVeFjJ2cz5QlRaQkxnF29+Zc3CeDjs1qB12eHCFdwS0ilcLMOKFdI05o14h5qzfz3NTlvP7FKl6avpJeGfW5uG9rhnRuQmK8hqiimfYsROSQbdpRzPjcAp6btpwVG3bQqFYyF/RqxYW9W9G4dkrQ5ckh0DCUiETcvn2lQ1TPTl3Gx4sKSYgzhnRuwqX9MshuXU9nUVUDGoYSkYiLizNObN+IE9s3Ytn67Tw/bTn/yl3Jm3PW0L5JLS7tl8Gwbs2okaRfNdWd9ixEpFLtKC7h37NWM27qchas2UKtlATOy27JxX1ak9FQrdKrmoruWUT0iJSZDTGzRWaWZ2a3lbF8kJl9bmYlZjZ8v2WXmtni0OPSSNYpIpWnRlICF/RqxVs3DGD81X05oV0jnp2yjBP+9jE/f3o6kxZ+zb590fElNZZEbM/CzOKBr4AfAQXADOACd58ftk4GUBu4BZjo7hNC8+sDuUA24MBMoIe7bzzQ+2nPQqTqWrdlFy9OX8GLOStYt3U3rerX4OI+rTk3uwV1a+hufkGqCnsWvYA8d89392LgZWBY+Aruvszd5wD79tv2NOB9d98QCoj3gSERrFVEIqhR7RR+ecqxfHbbSTxyYXea1E7hz28toPdfPuQ3E+bw5arNQZco5YjkUafmwMqw6QKg9xFs27yS6hKRgCTGx3Fml2ac2aUZC9ZsYdzU5bzxxSpeyV1Jj9b1uKRva07v3FRtRaqgSP6NlHXOXEXHvCq0rZmNMrNcM8stLCw8pOJEJFgdmtbm7p8ex7Q7Tub3Z3akaNtubnx5Fv3+Oon731vE2s27gi5RwkQyLAqAlmHTLYDVlbmtu49192x3z05PTz/sQkUkOHVSExk5IJNJvzqBZy7rSdcWdXj4ozz63zOJa1+YybT8IqLlrM3qLJLDUDOAtmaWCawCRgAXVnDbd4G/mNk3d5c/Fbi98ksUkaoiLu67tiIrinbwfM5yXpmxkrfmrqVd41pc0q81Z3VrTlqyrtkIQkSvszCzHwMPAvHAU+7+ZzO7E8h194lm1hN4HagH7ALWunun0LaXA3eEXurP7v70wd5LZ0OJRJ+dxXv5z+zVPDt1GfNWb6FWcgLDs1twUZ/WtEmvGXR5UUHtPkQkarg7n6/YxLipy3hr7hr27HXaNqrJSR0acVK7RvRoXY8ENTI8LAoLEYlKhVt38+9Zq/ho0TqmL93Anr1O7ZQEBrdrxEnt0xl8bCPqp+najYpSWIhI1Nu6aw+fLl7PpIXr+GhRIeu37SbOoFvLupzcoTEntmtEh6a11NDwIBQWIhJT9u1z5q7aHAqOdcwpKL3Qr2mdFE5sXzpc1f+YhqQmxQdcadWisBCRmLZuyy4+XlTIpIXr+N/iQrYX7yUpIY6+WQ04uUMjTmzXiJb1awRdZuAUFiIiIbtL9jJj6UYmLVzHpIVfs6xoB4AOkqOwEBE5oPzCbd8OV+Xkb6BkX+weJFdYiIhUQKwfJFdYiIgcolg8SK6wEBE5QuUdJD/h2Ea0rJ9arfc6FBYiIpXoQAfJm9VJoXdWA3pn1qd3VgMyGtSoVuGhsBARiaD8wm18mreenPwNTMsvomh7MQCNayfTO7MBvbPq0zuzAW3S06p0eCgsRESOEndnSeE2puVvIGfpBnLyi1i3dTcADWsmh/Y6SsOjbaOaxMVVnfCoaFio16+IyBEyM45pVItjGtXioj6tcXeWFe1u9kQ7AAAG9UlEQVQgJ7+IaflF5CzdwH/nrgGgfloSvTK+C4/2TWpVqfA4EIWFiEglMzMyG6aR2TCNEb1a4e6s3LCTaUuLyMnfQM7SIt6ZtxYovflTz4z69AmFR8dmtYmvguGhsBARiTAzo1WDGrRqUIPzsktvArpq087v7Xl8sOBrAGqlJNAzo/63B8w7N6tdJa4s1zELEZEqYO3mXeQsLQod9ygiv3A7AGlJ8fQIhUefrAZ0aVGHxEoMDx3gFhGpxtZt3cX0paVnWuXkb2Dxum0ApCbG06N1vW/3PLq2rENywuFfJKiwEBGJIkXbdjN9aenZVtPyi1i4disAyQlxnNKxMaMvPP6wXldnQ4mIRJEGNZM5/bimnH5cUwA2bi9m+rIN5ORvICUx8sc0FBYiItVQvbQkTuvUhNM6NTkq7xf8IXYREanyFBYiIlKuiIaFmQ0xs0Vmlmdmt5WxPNnMXgktzzGzjND8DDPbaWazQo/HI1mniIgcXMSOWZhZPDAa+BFQAMwws4nuPj9stZHARnc/xsxGAPcA54eWLXH3bpGqT0REKi6Sexa9gDx3z3f3YuBlYNh+6wwDng09nwCcbFW5PaOISIyKZFg0B1aGTReE5pW5jruXAJuBBqFlmWb2hZl9YmYDI1iniIiUI5Knzpa1h7D/FYAHWmcN0Mrdi8ysB/CGmXVy9y3f29hsFDAKoFWrVpVQsoiIlCWSexYFQMuw6RbA6gOtY2YJQB1gg7vvdvciAHefCSwBjt3/Ddx9rLtnu3t2enp6BH4EERGByO5ZzADamlkmsAoYAVy43zoTgUuBqcBwYJK7u5mlUxoae80sC2gL5B/szWbOnLnezJYfQb0NgfVHsH000Wfxffo8vk+fx3ei4bNoXZGVIhYW7l5iZtcD7wLxwFPuPs/M7gRy3X0i8CTwnJnlARsoDRSAQcCdZlYC7AWudvcN5bzfEe1amFluRfqjxAJ9Ft+nz+P79Hl8J5Y+i4i2+3D3t4C39pv3h7Dnu4Bzy9juVeDVSNYmIiIVpyu4RUSkXAqL74wNuoAqRJ/F9+nz+D59Ht+Jmc8iau5nISIikaM9CxERKVfMh0V5zQ5jiZm1NLOPzGyBmc0zsxuDriloZhYf6iTwZtC1BM3M6prZBDNbGPo30jfomoJkZjeF/p98aWYvmVlK0DVFUkyHRVizw9OBjsAFZtYx2KoCVQL8yt07AH2A62L88wC4EVgQdBFVxEPAO+7eHuhKDH8uZtYcuAHIdvfOlF4eMOLgW1VvMR0WVKzZYcxw9zXu/nno+VZKfxns388rZphZC+AM4ImgawmamdWm9PqnJwHcvdjdNwVbVeASgNRQ94ka/LBDRVSJ9bCoSLPDmBS6t0h3ICfYSgL1IPBrYF/QhVQBWUAh8HRoWO4JM0sLuqiguPsq4G/ACkp72W129/eCrSqyYj0sKtLsMOaYWU1KL4r85f7NG2OFmZ0JrAv1JpPSb9HHA4+5e3dgOxCzx/jMrB6loxCZQDMgzcwuCraqyIr1sKhIs8OYYmaJlAbFC+7+WtD1BKg/MNTMllE6PHmSmT0fbEmBKgAK3P2bPc0JlIZHrDoFWOruhe6+B3gN6BdwTREV62HxbbNDM0ui9ADVxIBrCkzoxlNPAgvc/f6g6wmSu9/u7i3cPYPSfxeT3D2qvzkejLuvBVaaWbvQrJOB+QfZJNqtAPqYWY3Q/5uTifID/hHtDVXVHajZYcBlBak/cDEw18xmhebdEerxJfIL4IXQF6t84LKA6wmMu+eY2QTgc0rPIvyCKL+aW1dwi4hIuWJ9GEpERCpAYSEiIuVSWIiISLkUFiIiUi6FhYiIlEthIXIIzGyvmc0Ke1TaVcxmlmFmX1bW64lUppi+zkLkMOx0925BFyFytGnPQqQSmNkyM7vHzKaHHseE5rc2sw/NbE7oz1ah+Y3N7HUzmx16fNMqIt7M/hm6T8J7ZpYa2A8lEkZhIXJoUvcbhjo/bNkWd+8FPEJpx1pCz8e5exfgBeAfofn/AD5x966U9lj6pnNAW2C0u3cCNgHnRPjnEakQXcEtcgjMbJu71yxj/jLgJHfPDzVjXOvuDcxsPdDU3feE5q9x94ZmVgi0cPfdYa+RAbzv7m1D078BEt39rsj/ZCIHpz0LkcrjB3h+oHXKsjvs+V50XFGqCIWFSOU5P+zPqaHnU/judps/Az4NPf8QuAa+vc937aNVpMjh0LcWkUOTGtaRF0rvSf3N6bPJZpZD6ZewC0LzbgCeMrNbKb3T3DedWm8ExprZSEr3IK6h9I5rIlWSjlmIVILQMYtsd18fdC0ikaBhKBERKZf2LEREpFzasxARkXIpLEREpFwKCxERKZfCQkREyqWwEBGRciksRESkXP8fPBeXVsOIBqsAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#Plot the loss\n", "def plot_loss_list(loss_list= rnn.loss_list):\n", @@ -753,21 +844,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 74, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model saved at: ./rnn_model_lstm_leaky_relu_[50,40,30,20,10]c/rnn_basic\n", + "Remaining data saved as: rnn_model_lstm_leaky_relu_[50,40,30,20,10]c.pkl\n" + ] + } + ], "source": [ "full_save(rnn)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Restoring parameters from ./rnn_model_lstm_leaky_relu_[50,40,30,20,10]c/rnn_basic\n", + "Model succesfully loaded\n", + "Minibatches (=training data) and test_input and test_target in data loaded\n" + ] + } + ], + "source": [ + "folder = get_rnn_folder(ncells = ncells, cell_type = \"lstm\", activation = \"leaky_relu\")\n", + "rnn, data = full_load(folder)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, "metadata": {}, "outputs": [], "source": [ - "#folder = get_rnn_folder(ncells = [50, 40, 30, 20, 10], cell_type = \"lstm\", activation = \"leaky_relu\")\n", - "#rnn = full_load(folder)" + "def rnn_test(rnn, test_input= test_input, test_target= test_target):\n", + " \n", + " #Here I predict based on my test set\n", + " test_pred = rnn.predict(test_input)\n", + " \n", + " #Here i subtract a prediction (random particle) from the target to get an idea of the predictions\n", + " #scaler_inv(test_input, scalerfunc = func)[0,:,:]\n", + " diff = scaler_inv(test_pred, scalerfunc = func)-scaler_inv(test_target, scalerfunc = func )\n", + " print(diff[0,:,:])\n", + " \n", + " #Here I evaluate my model on the test set based on mean_squared_error\n", + " loss = rnn.sess.run(rnn.cost, feed_dict={rnn.X:test_input, rnn.Y:test_target})\n", + " print(\"Loss on test set:\", loss)\n", + " \n", + " return test_pred, loss" ] }, { @@ -775,8 +908,30 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-14.1891082 5.49341339 -18.52827875]\n", + " [-13.87708179 14.74682446 -17.92266896]\n", + " [-27.59613792 41.24122176 -17.39950265]\n", + " [ 45.07231116 35.11842858 -14.05784146]\n", + " [ 51.55853583 26.47433645 -18.04338036]\n", + " [ 47.99403133 3.63808725 -18.35658836]\n", + " [ 20.2514592 0.82678895 -19.31475041]]\n", + "Loss on test set: 0.38916114\n" + ] + } + ], "source": [ - "###test_input.shape###" + "test_pred, test_loss = rnn_test()" ] }, { @@ -784,38 +939,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "#Here I predict based on my test set\n", - "\n", - "test_pred = rnn.predict(test_input)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Here i subtract a prediction (random particle) from the target to get an idea of the predictions\n", - "\n", - "#scaler_inv(test_input, scalerfunc = func)[0,:,:]\n", - "\n", - "\n", - "diff = scaler_inv(test_pred, scalerfunc = func)-scaler_inv(test_target, scalerfunc = func )\n", - "\n", - "print(diff[0,:,:])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Here I evaluate my model on the test set based on mean_squared_error\n", - "\n", - "print(\"Loss on test set:\", rnn.sess.run(rnn.cost, feed_dict={rnn.X:test_input, rnn.Y:test_target}))" - ] + "source": [] }, { "cell_type": "code", diff --git a/Requiremements.ipynb b/Requiremements.ipynb new file mode 100644 index 0000000..9b3135e --- /dev/null +++ b/Requiremements.ipynb @@ -0,0 +1,730 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\users\\sa_li\\anaconda3\\envs\\rnn-tf-ker\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", + " from ._conv import register_converters as _register_converters\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib as mpl\n", + "import random\n", + "import math\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "from tensorflow.python.framework import ops\n", + "from sklearn import preprocessing\n", + "import pickle as pkl\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "### Reshape original array into the shape (particlenumber, timesteps, input = coordinates)###\n", + "\n", + "def reshapor(arr_orig):\n", + " timesteps = int(arr_orig.shape[1]/3)\n", + " number_examples = int(arr_orig.shape[0])\n", + " arr = np.zeros((number_examples, timesteps, 3))\n", + " \n", + " for i in range(number_examples):\n", + " for t in range(timesteps):\n", + " arr[i,t,0:3] = arr_orig[i,3*t:3*t+3]\n", + " \n", + " return arr\n", + "\n", + "def reshapor_inv(array_shaped):\n", + " timesteps = int(array_shaped.shape[1])\n", + " num_examples = int(array_shaped.shape[0])\n", + " arr = np.zeros((num_examples, timesteps*3))\n", + " \n", + " for i in range(num_examples):\n", + " for t in range(timesteps):\n", + " arr[i,3*t:3*t+3] = array_shaped[i,t,:]\n", + " \n", + " return arr" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "### create the training set and the test set###\n", + "\n", + "def create_random_sets(dataset, train_to_total_ratio):\n", + " #shuffle the dataset\n", + " num_examples = dataset.shape[0]\n", + " p = np.random.permutation(num_examples)\n", + " dataset = dataset[p,:]\n", + " \n", + " #evaluate siye of training and test set and initialize them\n", + " train_set_size = np.int(num_examples*train_to_total_ratio)\n", + " test_set_size = num_examples - train_set_size\n", + " \n", + " train_set = np.zeros((train_set_size, dataset.shape[1]))\n", + " test_set = np.zeros((test_set_size, dataset.shape[1]))\n", + " \n", + "\n", + " #fill train and test sets\n", + " for i in range(num_examples):\n", + " if train_set_size > i:\n", + " train_set[i,:] += dataset[i,:]\n", + " else:\n", + " test_set[i - train_set_size,:] += dataset[i,:]\n", + " \n", + " return train_set, test_set" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "testset = pd.read_pickle('matched_8hittracks.pkl')\n", + "tset = np.array(testset)\n", + "tset = tset.astype('float32')\n", + "train_set, test_set = create_random_sets(tset, 0.99)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "#Normalize the data advanced version with scikit learn\n", + "\n", + "#set the transormation based on training set\n", + "def set_min_max_scaler(arr, feature_range= (-1,1)):\n", + " min_max_scalor = preprocessing.MinMaxScaler(feature_range=feature_range)\n", + " if len(arr.shape) == 3:\n", + " arr = reshapor(min_max_scalor.fit_transform(reshapor_inv(arr))) \n", + " else:\n", + " arr = min_max_scalor.fit_transform(arr)\n", + " return min_max_scalor\n", + "\n", + "min_max_scalor = set_min_max_scaler(train_set)\n", + "\n", + "\n", + "#transform data\n", + "def min_max_scaler(arr, min_max_scalor= min_max_scalor):\n", + " \n", + " if len(arr.shape) == 3:\n", + " if arr.shape[1] == 8:\n", + " arr = reshapor(min_max_scalor.transform(reshapor_inv(arr)))\n", + " else: \n", + " arr_ = np.zeros((arr.shape[0],24))\n", + " arr = reshapor_inv(arr)\n", + " arr_[:,:arr.shape[1]] += arr\n", + " arr = min_max_scalor.transform(arr_)[:,:arr.shape[1]]\n", + " arr = reshapor(arr)\n", + " \n", + " else:\n", + " if arr.shape[1] == 24:\n", + " arr = min_max_scalor.transform(arr)\n", + " else:\n", + " arr_ = np.zeros((arr.shape[0],24))\n", + " arr_[:,:arr.shape[1]] += arr\n", + " arr = min_max_scalor.transform(arr_)[:,:arr.shape[1]]\n", + " \n", + " return arr\n", + " \n", + "#inverse transformation\n", + "def min_max_scaler_inv(arr, min_max_scalor= min_max_scalor):\n", + " \n", + " if len(arr.shape) == 3:\n", + " if arr.shape[1] == 8:\n", + " arr = reshapor(min_max_scalor.inverse_transform(reshapor_inv(arr)))\n", + " else: \n", + " arr_ = np.zeros((arr.shape[0],24))\n", + " arr = reshapor_inv(arr)\n", + " arr_[:,:arr.shape[1]] += arr\n", + " arr = min_max_scalor.inverse_transform(arr_)[:,:arr.shape[1]]\n", + " arr = reshapor(arr)\n", + " \n", + " else:\n", + " if arr.shape[1] == 24:\n", + " arr = min_max_scalor.inverse_transform(arr)\n", + " else:\n", + " arr_ = np.zeros((arr.shape[0],24))\n", + " arr_[:,:arr.shape[1]] += arr\n", + " arr = min_max_scalor.nverse_transform(arr_)[:,:arr.shape[1]]\n", + " \n", + " return arr" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#Normalize the data advanced version with scikit learn - Standard scaler\n", + "\n", + "#set the transormation based on training set\n", + "def set_std_scaler(arr):\n", + " std_scalor = preprocessing.StandardScaler()\n", + " if len(arr.shape) == 3:\n", + " arr = reshapor(std_scalor.fit(reshapor_inv(arr))) \n", + " else:\n", + " arr = std_scalor.fit(arr)\n", + " return std_scalor\n", + "\n", + "std_scalor = set_std_scaler(train_set)\n", + "\n", + "#transform data\n", + "def std_scaler(arr, std_scalor= std_scalor):\n", + " \n", + " if len(arr.shape) == 3:\n", + " if arr.shape[1] == 8:\n", + " arr = reshapor(std_scalor.transform(reshapor_inv(arr)))\n", + " else: \n", + " arr_ = np.zeros((arr.shape[0],24))\n", + " arr = reshapor_inv(arr)\n", + " arr_[:,:arr.shape[1]] += arr\n", + " arr = std_scalor.transform(arr_)[:,:arr.shape[1]]\n", + " arr = reshapor(arr)\n", + " \n", + " else:\n", + " if arr.shape[1] == 24:\n", + " arr = std_scalor.transform(arr)\n", + " else:\n", + " arr_ = np.zeros((arr.shape[0],24))\n", + " arr_[:,:arr.shape[1]] += arr\n", + " arr = std_scalor.transform(arr_)[:,:arr.shape[1]]\n", + " \n", + " return arr\n", + " \n", + "#inverse transformation\n", + "def std_scaler_inv(arr, std_scalor= std_scalor):\n", + " \n", + " if len(arr.shape) == 3:\n", + " if arr.shape[1] == 8:\n", + " arr = reshapor(std_scalor.inverse_transform(reshapor_inv(arr)))\n", + " else: \n", + " arr_ = np.zeros((arr.shape[0],24))\n", + " arr = reshapor_inv(arr)\n", + " arr_[:,:arr.shape[1]] += arr\n", + " arr = std_scalor.inverse_transform(arr_)[:,:arr.shape[1]]\n", + " arr = reshapor(arr)\n", + " \n", + " else:\n", + " if arr.shape[1] == 24:\n", + " arr = std_scalor.inverse_transform(arr)\n", + " else:\n", + " arr_ = np.zeros((arr.shape[0],24))\n", + " arr_[:,:arr.shape[1]] += arr\n", + " arr = std_scalor.inverse_transform(arr_)[:,:arr.shape[1]]\n", + " \n", + " return arr\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "train_set = reshapor(train_set)\n", + "test_set = reshapor(test_set)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#Scale data either with MinMax scaler or with Standard scaler\n", + "#Return scalor if fit = True and and scaled array otherwise\n", + "\n", + "def scaler(arr, std_scalor= std_scalor, min_max_scalor= min_max_scalor, scalerfunc= \"std\"):\n", + " \n", + " if scalerfunc == \"std\":\n", + " arr = std_scaler(arr, std_scalor= std_scalor)\n", + " return arr\n", + " \n", + " elif scalerfunc == \"minmax\":\n", + " arr = min_max_scaler(arr, min_max_scalor= min_max_scalor)\n", + " return arr\n", + " \n", + " else:\n", + " raise ValueError(\"Uknown scaler chosen: {}\".format(scalerfunc))\n", + "\n", + "def scaler_inv(arr, std_scalor= std_scalor, min_max_scalor= min_max_scalor, scalerfunc= \"std\"):\n", + "\n", + " if scalerfunc == \"std\":\n", + " arr = std_scaler_inv(arr, std_scalor= std_scalor)\n", + " return arr\n", + " \n", + " elif scalerfunc == \"minmax\":\n", + " arr = min_max_scaler_inv(arr, min_max_scalor= std_scalor)\n", + " return arr\n", + " \n", + " else:\n", + " raise ValueError(\"Uknown scaler chosen: {}\".format(scalerfunc))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#scale the data\n", + "\n", + "func = \"minmax\"\n", + "\n", + "train_set = scaler(train_set, scalerfunc = func)\n", + "test_set = scaler(test_set, scalerfunc = func)\n", + "\n", + "if func == \"minmax\":\n", + " scalor = min_max_scalor\n", + "elif func == \"std\":\n", + " scalor = std_scalor\n", + "\n", + "#print(train_set[0,:,:])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "###create random mini_batches###\n", + "\n", + "\n", + "def unison_shuffled_copies(a, b):\n", + " assert a.shape[0] == b.shape[0]\n", + " p = np.random.permutation(a.shape[0])\n", + " return a[p,:,:], b[p,:,:]\n", + "\n", + "def random_mini_batches(inputt, target, minibatch_size = 500):\n", + " \n", + " num_examples = inputt.shape[0]\n", + " \n", + " \n", + " #Number of complete batches\n", + " \n", + " number_of_batches = int(num_examples/minibatch_size)\n", + " minibatches = []\n", + " \n", + " #shuffle particles\n", + " _i, _t = unison_shuffled_copies(inputt, target)\n", + " #print(_t.shape)\n", + " \n", + " \n", + " for i in range(number_of_batches):\n", + " \n", + " minibatch_train = _i[minibatch_size*i:minibatch_size*(i+1), :, :]\n", + " \n", + " minibatch_true = _t[minibatch_size*i:minibatch_size*(i+1), :, :]\n", + " \n", + " minibatches.append((minibatch_train, minibatch_true))\n", + " \n", + " \n", + " minibatches.append((_i[number_of_batches*minibatch_size:, :, :], _t[number_of_batches*minibatch_size:, :, :]))\n", + " \n", + " \n", + " return minibatches\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "#Create random minibatches of train and test set with input and target array\n", + "\n", + "\n", + "minibatches = random_mini_batches(train_set[:,:-1,:], train_set[:,1:,:], minibatch_size = 1000)\n", + "#_train, _target = minibatches[0]\n", + "test_input, test_target = test_set[:,:-1,:], test_set[:,1:,:]\n", + "#print(train[0,:,:], target[0,:,:])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "class RNNPlacePrediction():\n", + " \n", + " \n", + " def __init__(self, time_steps, future_steps, ninputs, ncells, num_output, cell_type=\"basic_rnn\", activation=\"relu\", scalor= scalor):\n", + " \n", + " self.nsteps = time_steps\n", + " self.future_steps = future_steps\n", + " self.ninputs = ninputs\n", + " self.ncells = ncells\n", + " self.num_output = num_output\n", + " self._ = cell_type #later used to create folder name\n", + " self.__ = activation #later used to create folder name\n", + " self.loss_list = []\n", + " self.scalor = scalor\n", + " \n", + " #### The input is of shape (num_examples, time_steps, ninputs)\n", + " #### ninputs is the dimentionality (number of features) of the time series (here coordinates)\n", + " self.X = tf.placeholder(dtype=tf.float32, shape=(None, self.nsteps, ninputs))\n", + " self.Y = tf.placeholder(dtype=tf.float32, shape=(None, self.nsteps, ninputs))\n", + "\n", + " \n", + " #Check if activation function valid and set activation\n", + " if self.__==\"relu\":\n", + " self.activation = tf.nn.relu\n", + " \n", + " elif self.__==\"tanh\":\n", + " self.activation = tf.nn.tanh\n", + " \n", + " elif self.__==\"leaky_relu\":\n", + " self.activation = tf.nn.leaky_relu\n", + " \n", + " elif self.__==\"elu\":\n", + " self.activation = tf.nn.elu\n", + " \n", + " else:\n", + " raise ValueError(\"Wrong rnn avtivation function: {}\".format(self.__))\n", + " \n", + " \n", + " \n", + " #Check if cell type valid and set cell_type\n", + " if self._==\"basic_rnn\":\n", + " self.cell_type = tf.contrib.rnn.BasicRNNCell\n", + " \n", + " elif self._==\"lstm\":\n", + " self.cell_type = tf.contrib.rnn.BasicLSTMCell\n", + " \n", + " elif self._==\"GRU\":\n", + " self.cell_type = tf.contrib.rnn.GRUCell\n", + " \n", + " else:\n", + " raise ValueError(\"Wrong rnn cell type: {}\".format(self._))\n", + " \n", + " \n", + " #Check Input of ncells \n", + " if (type(self.ncells) == int):\n", + " self.ncells = [self.ncells]\n", + " \n", + " if (type(self.ncells) != list):\n", + " raise ValueError(\"Wrong type of Input for ncells\")\n", + " \n", + " for _ in range(len(self.ncells)):\n", + " if type(self.ncells[_]) != int:\n", + " raise ValueError(\"Wrong type of Input for ncells\")\n", + " \n", + " self.activationlist = []\n", + " for _ in range(len(self.ncells)-1):\n", + " self.activationlist.append(self.activation)\n", + " self.activationlist.append(tf.nn.tanh)\n", + " \n", + " self.cell = tf.contrib.rnn.MultiRNNCell([self.cell_type(num_units=self.ncells[layer], activation=self.activationlist[layer])\n", + " for layer in range(len(self.ncells))])\n", + " \n", + " \n", + " #### I now define the output\n", + " self.RNNCell = tf.contrib.rnn.OutputProjectionWrapper(self.cell, output_size= num_output)\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " self.sess = tf.Session()\n", + " \n", + " def set_cost_and_functions(self, LR=0.001):\n", + " #### I define here the function that unrolls the RNN cell\n", + " self.output, self.state = tf.nn.dynamic_rnn(self.RNNCell, self.X, dtype=tf.float32)\n", + " #### I define the cost function as the mean_squared_error (distance of predicted point to target)\n", + " self.cost = tf.reduce_mean(tf.losses.mean_squared_error(self.Y, self.output)) \n", + " \n", + " #### the rest proceed as usual\n", + " self.train = tf.train.AdamOptimizer(LR).minimize(self.cost)\n", + " #### Variable initializer\n", + " self.init = tf.global_variables_initializer()\n", + " self.saver = tf.train.Saver()\n", + " self.sess.run(self.init)\n", + " \n", + " \n", + " def save(self, rnn_folder=\"./rnn_model/rnn_basic\"):\n", + " self.saver.save(self.sess, rnn_folder) \n", + " \n", + " \n", + " def load(self, filename=\"./rnn_model/rnn_basic\"):\n", + " self.saver.restore(self.sess, filename)\n", + "\n", + " \n", + " \n", + " def fit(self, minibatches, epochs, print_step, checkpoint = 5, patience = 200):\n", + " patience_cnt = 0\n", + " start = len(self.loss_list)\n", + " epoche_save = start\n", + " \n", + " folder = \"./rnn_model_\" + str(self._)+ \"_\" + self.__ + \"_\" + str(self.ncells).replace(\" \",\"\") + \"c\" + \"_checkpoint/rnn_basic\"\n", + " \n", + " for iep in range(start, start + epochs):\n", + " loss = 0\n", + " \n", + " batches = len(minibatches)\n", + " #Here I iterate over the batches\n", + " for batch in range(batches):\n", + " #### Here I train the RNNcell\n", + " #### The X is the time series, the Y is shifted by 1 time step\n", + " train, target = minibatches[batch]\n", + " self.sess.run(self.train, feed_dict={self.X:train, self.Y:target})\n", + " \n", + " \n", + " loss += self.sess.run(self.cost, feed_dict={self.X:train, self.Y:target})\n", + " \n", + " #Normalize loss over number of batches and scale it back before normaliziation\n", + " loss /= batches\n", + " self.loss_list.append(loss)\n", + " \n", + " #print(loss)\n", + " \n", + " #Here I create the checkpoint if the perfomance is better\n", + " if iep > 1 and iep%checkpoint == 0 and self.loss_list[iep] < self.loss_list[epoche_save]:\n", + " #print(\"Checkpoint created at epoch: \", iep)\n", + " self.save(folder)\n", + " epoche_save = iep\n", + " \n", + " #early stopping with patience\n", + " if iep > 1 and abs(self.loss_list[iep]-self.loss_list[iep-1]) < 1.5/10**7:\n", + " patience_cnt += 1\n", + " #print(\"Patience now at: \", patience_cnt, \" of \", patience)\n", + " \n", + " if patience_cnt + 1 > patience:\n", + " print(\"\\n\", \"Early stopping at epoch \", iep, \", difference: \", abs(self.loss_list[iep]-self.loss_list[iep-1]))\n", + " print(\"Cost: \",loss)\n", + " break\n", + " \n", + " #Note that the loss here is multiplied with 1000 for easier reading\n", + " if iep%print_step==0:\n", + " print(\"Epoch number \",iep)\n", + " print(\"Cost: \",loss*10**6, \"e-6\")\n", + " print(\"Patience: \",patience_cnt, \"/\", patience)\n", + " print(\"Last checkpoint at: Epoch \", epoche_save, \"\\n\")\n", + " \n", + " #Set model back to the last checkpoint if performance was better\n", + " if self.loss_list[epoche_save] < self.loss_list[iep]:\n", + " self.load(folder)\n", + " print(\"\\n\")\n", + " print(\"State of last checkpoint checkpoint at epoch \", epoche_save, \" restored\")\n", + " print(\"Performance at last checkpoint is \" ,(self.loss_list[iep] - self.loss_list[epoche_save])/self.loss_list[iep]*100, \"% better\" )\n", + " \n", + " folder = \"./rnn_model_\" + str(self._)+ \"_\" + self.__ + \"_\" + str(self.ncells).replace(\" \",\"\") + \"c/rnn_basic\"\n", + " self.save(folder)\n", + " print(\"\\n\")\n", + " print(\"Model saved in at: \", folder)\n", + " \n", + " \n", + " \n", + " def predict(self, x):\n", + " return self.sess.run(self.output, feed_dict={self.X:x})\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#saves the rnn model and all its parameters including the scaler used\n", + "#optional also saves the minibatches used to train and the test set\n", + "\n", + "def full_save(rnn, train= True, test= True):\n", + " folder = \"./rnn_model_\" + str(rnn._)+ \"_\" + rnn.__ + \"_\" + str(rnn.ncells).replace(\" \",\"\") + \"c/rnn_basic\"\n", + " rnn.save(folder)\n", + " pkl_name = folder[2:-10] + \".pkl\"\n", + " \n", + " \n", + " pkl_dic = {\"ncells\": rnn.ncells,\n", + " \"ninputs\": rnn.ninputs,\n", + " \"future_steps\": rnn.future_steps,\n", + " \"nsteps\": rnn.nsteps,\n", + " \"num_output\": rnn.num_output,\n", + " \"cell_type\": rnn._, #cell_type\n", + " \"activation\": rnn.__, #Activation\n", + " \"loss_list\": rnn.loss_list,\n", + " \"scalor\": rnn.scalor}\n", + " \n", + " if train == True:\n", + " pkl_dic[\"minibatches\"] = minibatches\n", + " \n", + " if test == True:\n", + " pkl_dic[\"test_input\"] = test_input\n", + " pkl_dic[\"test_target\"] = test_target\n", + " \n", + " pkl.dump( pkl_dic, open(pkl_name , \"wb\" ) )\n", + " \n", + " print(\"Model saved at: \", folder)\n", + " print(\"Remaining data saved as: {}\".format(pkl_name))\n", + "\n", + "\n", + "\n", + "#loads the rnn model with all its parameters including the scaler used\n", + "#Checks if the pkl data also contains the training or test sets an return them accordingly\n", + "def full_load(folder): \n", + " #returns state of rnn with all information and returns the train and test set used\n", + " \n", + " #Directory of pkl file\n", + " pkl_name = folder[2:-10] + \".pkl\"\n", + " \n", + " #Check if pkl file exists\n", + " my_file = Path(pkl_name)\n", + " if my_file.is_file() == False:\n", + " raise ValueError(\"There is no .pkl file with the name: {}\".format(pkl_name))\n", + " \n", + " pkl_dic = pkl.load( open(pkl_name , \"rb\" ) )\n", + " ncells = pkl_dic[\"ncells\"]\n", + " ninputs = pkl_dic[\"ninputs\"]\n", + " scalor = pkl_dic[\"scalor\"]\n", + " future_steps = pkl_dic[\"future_steps\"]\n", + " timesteps = pkl_dic[\"nsteps\"] \n", + " num_output = pkl_dic[\"num_output\"]\n", + " cell_type = pkl_dic[\"cell_type\"]\n", + " activation = pkl_dic[\"activation\"]\n", + " \n", + " #Check if test or trainng set in dictionary\n", + " batch = False\n", + " test = False\n", + " if \"minibatches\" in pkl_dic:\n", + " batch = True\n", + " minibatches = pkl_dic[\"minibatches\"]\n", + " if \"test_input\" in pkl_dic:\n", + " test = True\n", + " test_input = [\"test_input\"]\n", + " test_target = [\"test_target\"]\n", + " \n", + " #loads and initializes a new model with the exact same properties\n", + " \n", + " tf.reset_default_graph()\n", + " rnn = RNNPlacePrediction(time_steps=timesteps, future_steps=future_steps, ninputs=ninputs, \n", + " ncells=ncells, num_output=num_output, cell_type=cell_type, activation=activation, scalor=scalor)\n", + "\n", + " rnn.set_cost_and_functions()\n", + " \n", + " rnn.load(folder)\n", + " \n", + " rnn.loss_list = pkl_dic[\"loss_list\"]\n", + " \n", + " print(\"Model succesfully loaded\")\n", + " \n", + " if batch and test:\n", + " data = [minibatches, test_input, test_target]\n", + " print(\"Minibatches (=training data) and test_input and test_target in data loaded\")\n", + " return rnn, data\n", + " \n", + " elif batch:\n", + " data = [minibatches]\n", + " print(\"Minibatches (=training data) loaded in data\")\n", + " return rnn, data\n", + " \n", + " elif test:\n", + " data = [test_input, test_target]\n", + " print(\"test_input and test_target loaded in data\")\n", + " return rnn, data\n", + " \n", + " else:\n", + " data = []\n", + " print(\"Only Model restored, no trainig or test data found in {}\".format(pkl_name))\n", + " print(\"Returned data is empty!\")\n", + " return rnn, data\n", + " \n", + " \n", + "#returns the folder name used by full_save and full_load for a given architecture\n", + "def get_rnn_folder(ncells, cell_type, activation):\n", + " folder = \"./rnn_model_\" + cell_type + \"_\" + activation + \"_\" + str(ncells).replace(\" \",\"\") + \"c/rnn_basic\"\n", + " return folder" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#Plot the loss\n", + "def plot_loss_list(loss_list):\n", + " plt.plot(loss_list)\n", + " plt.xlabel(\"Epoch\")\n", + " plt.ylabel(\"Cost\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def rnn_test(rnn, test_input= test_input, test_target= test_target):\n", + " \n", + " #Here I predict based on my test set\n", + " test_pred = rnn.predict(test_input)\n", + " \n", + " #Here i subtract a prediction (random particle) from the target to get an idea of the predictions\n", + " #scaler_inv(test_input, scalerfunc = func)[0,:,:]\n", + " diff = scaler_inv(test_pred, scalerfunc = func)-scaler_inv(test_target, scalerfunc = func )\n", + " print(diff[0,:,:])\n", + " \n", + " #Here I evaluate my model on the test set based on mean_squared_error\n", + " loss = rnn.sess.run(rnn.cost, feed_dict={rnn.X:test_input, rnn.Y:test_target})\n", + " print(\"Loss on test set:\", loss)\n", + " \n", + " return test_pred, loss" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requiremements.py b/requiremements.py new file mode 100644 index 0000000..b7bc218 --- /dev/null +++ b/requiremements.py @@ -0,0 +1,645 @@ + +# coding: utf-8 + +# In[1]: + + +import pandas as pd +import numpy as np +import matplotlib as mpl +import random +import math +import numpy as np +import matplotlib.pyplot as plt +import tensorflow as tf +from tensorflow.python.framework import ops +from sklearn import preprocessing +import pickle as pkl +from pathlib import Path + + +# In[2]: + + +### Reshape original array into the shape (particlenumber, timesteps, input = coordinates)### + +def reshapor(arr_orig): + timesteps = int(arr_orig.shape[1]/3) + number_examples = int(arr_orig.shape[0]) + arr = np.zeros((number_examples, timesteps, 3)) + + for i in range(number_examples): + for t in range(timesteps): + arr[i,t,0:3] = arr_orig[i,3*t:3*t+3] + + return arr + +def reshapor_inv(array_shaped): + timesteps = int(array_shaped.shape[1]) + num_examples = int(array_shaped.shape[0]) + arr = np.zeros((num_examples, timesteps*3)) + + for i in range(num_examples): + for t in range(timesteps): + arr[i,3*t:3*t+3] = array_shaped[i,t,:] + + return arr + + +# In[3]: + + +### create the training set and the test set### + +def create_random_sets(dataset, train_to_total_ratio): + #shuffle the dataset + num_examples = dataset.shape[0] + p = np.random.permutation(num_examples) + dataset = dataset[p,:] + + #evaluate siye of training and test set and initialize them + train_set_size = np.int(num_examples*train_to_total_ratio) + test_set_size = num_examples - train_set_size + + train_set = np.zeros((train_set_size, dataset.shape[1])) + test_set = np.zeros((test_set_size, dataset.shape[1])) + + + #fill train and test sets + for i in range(num_examples): + if train_set_size > i: + train_set[i,:] += dataset[i,:] + else: + test_set[i - train_set_size,:] += dataset[i,:] + + return train_set, test_set + + +# In[4]: + + +testset = pd.read_pickle('matched_8hittracks.pkl') +tset = np.array(testset) +tset = tset.astype('float32') +train_set, test_set = create_random_sets(tset, 0.99) + + +# In[5]: + + +#Normalize the data advanced version with scikit learn + +#set the transormation based on training set +def set_min_max_scaler(arr, feature_range= (-1,1)): + min_max_scalor = preprocessing.MinMaxScaler(feature_range=feature_range) + if len(arr.shape) == 3: + arr = reshapor(min_max_scalor.fit_transform(reshapor_inv(arr))) + else: + arr = min_max_scalor.fit_transform(arr) + return min_max_scalor + +min_max_scalor = set_min_max_scaler(train_set) + + +#transform data +def min_max_scaler(arr, min_max_scalor= min_max_scalor): + + if len(arr.shape) == 3: + if arr.shape[1] == 8: + arr = reshapor(min_max_scalor.transform(reshapor_inv(arr))) + else: + arr_ = np.zeros((arr.shape[0],24)) + arr = reshapor_inv(arr) + arr_[:,:arr.shape[1]] += arr + arr = min_max_scalor.transform(arr_)[:,:arr.shape[1]] + arr = reshapor(arr) + + else: + if arr.shape[1] == 24: + arr = min_max_scalor.transform(arr) + else: + arr_ = np.zeros((arr.shape[0],24)) + arr_[:,:arr.shape[1]] += arr + arr = min_max_scalor.transform(arr_)[:,:arr.shape[1]] + + return arr + +#inverse transformation +def min_max_scaler_inv(arr, min_max_scalor= min_max_scalor): + + if len(arr.shape) == 3: + if arr.shape[1] == 8: + arr = reshapor(min_max_scalor.inverse_transform(reshapor_inv(arr))) + else: + arr_ = np.zeros((arr.shape[0],24)) + arr = reshapor_inv(arr) + arr_[:,:arr.shape[1]] += arr + arr = min_max_scalor.inverse_transform(arr_)[:,:arr.shape[1]] + arr = reshapor(arr) + + else: + if arr.shape[1] == 24: + arr = min_max_scalor.inverse_transform(arr) + else: + arr_ = np.zeros((arr.shape[0],24)) + arr_[:,:arr.shape[1]] += arr + arr = min_max_scalor.nverse_transform(arr_)[:,:arr.shape[1]] + + return arr + + +# In[6]: + + +#Normalize the data advanced version with scikit learn - Standard scaler + +#set the transormation based on training set +def set_std_scaler(arr): + std_scalor = preprocessing.StandardScaler() + if len(arr.shape) == 3: + arr = reshapor(std_scalor.fit(reshapor_inv(arr))) + else: + arr = std_scalor.fit(arr) + return std_scalor + +std_scalor = set_std_scaler(train_set) + +#transform data +def std_scaler(arr, std_scalor= std_scalor): + + if len(arr.shape) == 3: + if arr.shape[1] == 8: + arr = reshapor(std_scalor.transform(reshapor_inv(arr))) + else: + arr_ = np.zeros((arr.shape[0],24)) + arr = reshapor_inv(arr) + arr_[:,:arr.shape[1]] += arr + arr = std_scalor.transform(arr_)[:,:arr.shape[1]] + arr = reshapor(arr) + + else: + if arr.shape[1] == 24: + arr = std_scalor.transform(arr) + else: + arr_ = np.zeros((arr.shape[0],24)) + arr_[:,:arr.shape[1]] += arr + arr = std_scalor.transform(arr_)[:,:arr.shape[1]] + + return arr + +#inverse transformation +def std_scaler_inv(arr, std_scalor= std_scalor): + + if len(arr.shape) == 3: + if arr.shape[1] == 8: + arr = reshapor(std_scalor.inverse_transform(reshapor_inv(arr))) + else: + arr_ = np.zeros((arr.shape[0],24)) + arr = reshapor_inv(arr) + arr_[:,:arr.shape[1]] += arr + arr = std_scalor.inverse_transform(arr_)[:,:arr.shape[1]] + arr = reshapor(arr) + + else: + if arr.shape[1] == 24: + arr = std_scalor.inverse_transform(arr) + else: + arr_ = np.zeros((arr.shape[0],24)) + arr_[:,:arr.shape[1]] += arr + arr = std_scalor.inverse_transform(arr_)[:,:arr.shape[1]] + + return arr + + +# In[7]: + + +train_set = reshapor(train_set) +test_set = reshapor(test_set) + + +# In[8]: + + +#Scale data either with MinMax scaler or with Standard scaler +#Return scalor if fit = True and and scaled array otherwise + +def scaler(arr, std_scalor= std_scalor, min_max_scalor= min_max_scalor, scalerfunc= "std"): + + if scalerfunc == "std": + arr = std_scaler(arr, std_scalor= std_scalor) + return arr + + elif scalerfunc == "minmax": + arr = min_max_scaler(arr, min_max_scalor= min_max_scalor) + return arr + + else: + raise ValueError("Uknown scaler chosen: {}".format(scalerfunc)) + +def scaler_inv(arr, std_scalor= std_scalor, min_max_scalor= min_max_scalor, scalerfunc= "std"): + + if scalerfunc == "std": + arr = std_scaler_inv(arr, std_scalor= std_scalor) + return arr + + elif scalerfunc == "minmax": + arr = min_max_scaler_inv(arr, min_max_scalor= std_scalor) + return arr + + else: + raise ValueError("Uknown scaler chosen: {}".format(scalerfunc)) + + +# In[9]: + + +#scale the data + +func = "minmax" + +train_set = scaler(train_set, scalerfunc = func) +test_set = scaler(test_set, scalerfunc = func) + +if func == "minmax": + scalor = min_max_scalor +elif func == "std": + scalor = std_scalor + +#print(train_set[0,:,:]) + + +# In[10]: + + +###create random mini_batches### + + +def unison_shuffled_copies(a, b): + assert a.shape[0] == b.shape[0] + p = np.random.permutation(a.shape[0]) + return a[p,:,:], b[p,:,:] + +def random_mini_batches(inputt, target, minibatch_size = 500): + + num_examples = inputt.shape[0] + + + #Number of complete batches + + number_of_batches = int(num_examples/minibatch_size) + minibatches = [] + + #shuffle particles + _i, _t = unison_shuffled_copies(inputt, target) + #print(_t.shape) + + + for i in range(number_of_batches): + + minibatch_train = _i[minibatch_size*i:minibatch_size*(i+1), :, :] + + minibatch_true = _t[minibatch_size*i:minibatch_size*(i+1), :, :] + + minibatches.append((minibatch_train, minibatch_true)) + + + minibatches.append((_i[number_of_batches*minibatch_size:, :, :], _t[number_of_batches*minibatch_size:, :, :])) + + + return minibatches + + + +# In[11]: + + +#Create random minibatches of train and test set with input and target array + + +minibatches = random_mini_batches(train_set[:,:-1,:], train_set[:,1:,:], minibatch_size = 1000) +#_train, _target = minibatches[0] +test_input, test_target = test_set[:,:-1,:], test_set[:,1:,:] +#print(train[0,:,:], target[0,:,:]) + + +# In[12]: + + +class RNNPlacePrediction(): + + + def __init__(self, time_steps, future_steps, ninputs, ncells, num_output, cell_type="basic_rnn", activation="relu", scalor= scalor): + + self.nsteps = time_steps + self.future_steps = future_steps + self.ninputs = ninputs + self.ncells = ncells + self.num_output = num_output + self._ = cell_type #later used to create folder name + self.__ = activation #later used to create folder name + self.loss_list = [] + self.scalor = scalor + + #### The input is of shape (num_examples, time_steps, ninputs) + #### ninputs is the dimentionality (number of features) of the time series (here coordinates) + self.X = tf.placeholder(dtype=tf.float32, shape=(None, self.nsteps, ninputs)) + self.Y = tf.placeholder(dtype=tf.float32, shape=(None, self.nsteps, ninputs)) + + + #Check if activation function valid and set activation + if self.__=="relu": + self.activation = tf.nn.relu + + elif self.__=="tanh": + self.activation = tf.nn.tanh + + elif self.__=="leaky_relu": + self.activation = tf.nn.leaky_relu + + elif self.__=="elu": + self.activation = tf.nn.elu + + else: + raise ValueError("Wrong rnn avtivation function: {}".format(self.__)) + + + + #Check if cell type valid and set cell_type + if self._=="basic_rnn": + self.cell_type = tf.contrib.rnn.BasicRNNCell + + elif self._=="lstm": + self.cell_type = tf.contrib.rnn.BasicLSTMCell + + elif self._=="GRU": + self.cell_type = tf.contrib.rnn.GRUCell + + else: + raise ValueError("Wrong rnn cell type: {}".format(self._)) + + + #Check Input of ncells + if (type(self.ncells) == int): + self.ncells = [self.ncells] + + if (type(self.ncells) != list): + raise ValueError("Wrong type of Input for ncells") + + for _ in range(len(self.ncells)): + if type(self.ncells[_]) != int: + raise ValueError("Wrong type of Input for ncells") + + self.activationlist = [] + for _ in range(len(self.ncells)-1): + self.activationlist.append(self.activation) + self.activationlist.append(tf.nn.tanh) + + self.cell = tf.contrib.rnn.MultiRNNCell([self.cell_type(num_units=self.ncells[layer], activation=self.activationlist[layer]) + for layer in range(len(self.ncells))]) + + + #### I now define the output + self.RNNCell = tf.contrib.rnn.OutputProjectionWrapper(self.cell, output_size= num_output) + + + + + + self.sess = tf.Session() + + def set_cost_and_functions(self, LR=0.001): + #### I define here the function that unrolls the RNN cell + self.output, self.state = tf.nn.dynamic_rnn(self.RNNCell, self.X, dtype=tf.float32) + #### I define the cost function as the mean_squared_error (distance of predicted point to target) + self.cost = tf.reduce_mean(tf.losses.mean_squared_error(self.Y, self.output)) + + #### the rest proceed as usual + self.train = tf.train.AdamOptimizer(LR).minimize(self.cost) + #### Variable initializer + self.init = tf.global_variables_initializer() + self.saver = tf.train.Saver() + self.sess.run(self.init) + + + def save(self, rnn_folder="./rnn_model/rnn_basic"): + self.saver.save(self.sess, rnn_folder) + + + def load(self, filename="./rnn_model/rnn_basic"): + self.saver.restore(self.sess, filename) + + + + def fit(self, minibatches, epochs, print_step, checkpoint = 5, patience = 200): + patience_cnt = 0 + start = len(self.loss_list) + epoche_save = start + + folder = "./rnn_model_" + str(self._)+ "_" + self.__ + "_" + str(self.ncells).replace(" ","") + "c" + "_checkpoint/rnn_basic" + + for iep in range(start, start + epochs): + loss = 0 + + batches = len(minibatches) + #Here I iterate over the batches + for batch in range(batches): + #### Here I train the RNNcell + #### The X is the time series, the Y is shifted by 1 time step + train, target = minibatches[batch] + self.sess.run(self.train, feed_dict={self.X:train, self.Y:target}) + + + loss += self.sess.run(self.cost, feed_dict={self.X:train, self.Y:target}) + + #Normalize loss over number of batches and scale it back before normaliziation + loss /= batches + self.loss_list.append(loss) + + #print(loss) + + #Here I create the checkpoint if the perfomance is better + if iep > 1 and iep%checkpoint == 0 and self.loss_list[iep] < self.loss_list[epoche_save]: + #print("Checkpoint created at epoch: ", iep) + self.save(folder) + epoche_save = iep + + #early stopping with patience + if iep > 1 and abs(self.loss_list[iep]-self.loss_list[iep-1]) < 1.5/10**7: + patience_cnt += 1 + #print("Patience now at: ", patience_cnt, " of ", patience) + + if patience_cnt + 1 > patience: + print("\n", "Early stopping at epoch ", iep, ", difference: ", abs(self.loss_list[iep]-self.loss_list[iep-1])) + print("Cost: ",loss) + break + + #Note that the loss here is multiplied with 1000 for easier reading + if iep%print_step==0: + print("Epoch number ",iep) + print("Cost: ",loss*10**6, "e-6") + print("Patience: ",patience_cnt, "/", patience) + print("Last checkpoint at: Epoch ", epoche_save, "\n") + + #Set model back to the last checkpoint if performance was better + if self.loss_list[epoche_save] < self.loss_list[iep]: + self.load(folder) + print("\n") + print("State of last checkpoint checkpoint at epoch ", epoche_save, " restored") + print("Performance at last checkpoint is " ,(self.loss_list[iep] - self.loss_list[epoche_save])/self.loss_list[iep]*100, "% better" ) + + folder = "./rnn_model_" + str(self._)+ "_" + self.__ + "_" + str(self.ncells).replace(" ","") + "c/rnn_basic" + self.save(folder) + print("\n") + print("Model saved in at: ", folder) + + + + def predict(self, x): + return self.sess.run(self.output, feed_dict={self.X:x}) + + + + +# In[13]: + + +#saves the rnn model and all its parameters including the scaler used +#optional also saves the minibatches used to train and the test set + +def full_save(rnn, train= True, test= True): + folder = "./rnn_model_" + str(rnn._)+ "_" + rnn.__ + "_" + str(rnn.ncells).replace(" ","") + "c/rnn_basic" + rnn.save(folder) + pkl_name = folder[2:-10] + ".pkl" + + + pkl_dic = {"ncells": rnn.ncells, + "ninputs": rnn.ninputs, + "future_steps": rnn.future_steps, + "nsteps": rnn.nsteps, + "num_output": rnn.num_output, + "cell_type": rnn._, #cell_type + "activation": rnn.__, #Activation + "loss_list": rnn.loss_list, + "scalor": rnn.scalor} + + if train == True: + pkl_dic["minibatches"] = minibatches + + if test == True: + pkl_dic["test_input"] = test_input + pkl_dic["test_target"] = test_target + + pkl.dump( pkl_dic, open(pkl_name , "wb" ) ) + + print("Model saved at: ", folder) + print("Remaining data saved as: {}".format(pkl_name)) + + + +#loads the rnn model with all its parameters including the scaler used +#Checks if the pkl data also contains the training or test sets an return them accordingly +def full_load(folder): + #returns state of rnn with all information and returns the train and test set used + + #Directory of pkl file + pkl_name = folder[2:-10] + ".pkl" + + #Check if pkl file exists + my_file = Path(pkl_name) + if my_file.is_file() == False: + raise ValueError("There is no .pkl file with the name: {}".format(pkl_name)) + + pkl_dic = pkl.load( open(pkl_name , "rb" ) ) + ncells = pkl_dic["ncells"] + ninputs = pkl_dic["ninputs"] + scalor = pkl_dic["scalor"] + future_steps = pkl_dic["future_steps"] + timesteps = pkl_dic["nsteps"] + num_output = pkl_dic["num_output"] + cell_type = pkl_dic["cell_type"] + activation = pkl_dic["activation"] + + #Check if test or trainng set in dictionary + batch = False + test = False + if "minibatches" in pkl_dic: + batch = True + minibatches = pkl_dic["minibatches"] + if "test_input" in pkl_dic: + test = True + test_input = ["test_input"] + test_target = ["test_target"] + + #loads and initializes a new model with the exact same properties + + tf.reset_default_graph() + rnn = RNNPlacePrediction(time_steps=timesteps, future_steps=future_steps, ninputs=ninputs, + ncells=ncells, num_output=num_output, cell_type=cell_type, activation=activation, scalor=scalor) + + rnn.set_cost_and_functions() + + rnn.load(folder) + + rnn.loss_list = pkl_dic["loss_list"] + + print("Model succesfully loaded") + + if batch and test: + data = [minibatches, test_input, test_target] + print("Minibatches (=training data) and test_input and test_target in data loaded") + return rnn, data + + elif batch: + data = [minibatches] + print("Minibatches (=training data) loaded in data") + return rnn, data + + elif test: + data = [test_input, test_target] + print("test_input and test_target loaded in data") + return rnn, data + + else: + data = [] + print("Only Model restored, no trainig or test data found in {}".format(pkl_name)) + print("Returned data is empty!") + return rnn, data + + +#returns the folder name used by full_save and full_load for a given architecture +def get_rnn_folder(ncells, cell_type, activation): + folder = "./rnn_model_" + cell_type + "_" + activation + "_" + str(ncells).replace(" ","") + "c/rnn_basic" + return folder + + +# In[15]: + + +#Plot the loss +def plot_loss_list(loss_list): + plt.plot(loss_list) + plt.xlabel("Epoch") + plt.ylabel("Cost") + plt.show() + + +# In[17]: + + +def rnn_test(rnn, test_input= test_input, test_target= test_target): + + #Here I predict based on my test set + test_pred = rnn.predict(test_input) + + #Here i subtract a prediction (random particle) from the target to get an idea of the predictions + #scaler_inv(test_input, scalerfunc = func)[0,:,:] + diff = scaler_inv(test_pred, scalerfunc = func)-scaler_inv(test_target, scalerfunc = func ) + print(diff[0,:,:]) + + #Here I evaluate my model on the test set based on mean_squared_error + loss = rnn.sess.run(rnn.cost, feed_dict={rnn.X:test_input, rnn.Y:test_target}) + print("Loss on test set:", loss) + + return test_pred, loss +