diff --git a/DNN_selection.ipynb b/DNN_selection.ipynb new file mode 100644 index 0000000..f8d9343 --- /dev/null +++ b/DNN_selection.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "import pickle\n", + "import math\n", + "\n", + "trunc_normal= tf.truncated_normal_initializer(stddev=1)\n", + "normal = tf.random_normal_initializer(stddev=1)\n", + "\n", + "from architectures.data_processing import *\n", + "from architectures.utils.toolbox import *\n", + "from architectures.DNN import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "l_index=1\n", + "mag_index=1" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "with open('/disk/lhcb_data/davide/Rphipi/NN_for_selection/'+l_flv[l_index]+l_flv[l_index]+'/'+'data_for_NN_'+l_flv[l_index]+l_flv[l_index]+'_Mag'+mag_status[mag_index]+'.pickle', 'rb') as f:\n", + " data_dict=pickle.load(f, encoding='latin1')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "data_dict[\"Ds_ENDVERTEX_CHI2\"]=data_dict[\"Ds_ENDVERTEX_CHI2\"]/data_dict[\"Ds_ENDVERTEX_NDOF\"]\n", + "data_dict[\"Ds_OWNPV_CHI2\"]=data_dict[\"Ds_OWNPV_CHI2\"]/data_dict[\"Ds_OWNPV_NDOF\"]\n", + "data_dict[\"Ds_IPCHI2_OWNPV\"]=data_dict[\"Ds_IPCHI2_OWNPV\"]/data_dict[\"Ds_ENDVERTEX_NDOF\"]\n", + "\n", + "del data_dict[\"Ds_ENDVERTEX_NDOF\"]\n", + "del data_dict[\"Ds_OWNPV_NDOF\"]\n", + "\n", + "data_dict[\"phi_ENDVERTEX_CHI2\"]=data_dict[\"phi_ENDVERTEX_CHI2\"]/data_dict[\"phi_ENDVERTEX_NDOF\"]\n", + "data_dict[\"phi_OWNPV_CHI2\"]=data_dict[\"phi_OWNPV_CHI2\"]/data_dict[\"phi_OWNPV_NDOF\"]\n", + "data_dict[\"phi_IPCHI2_OWNPV\"]=data_dict[\"phi_IPCHI2_OWNPV\"]/data_dict[\"phi_ENDVERTEX_NDOF\"]\n", + "\n", + "del data_dict[\"phi_ENDVERTEX_NDOF\"]\n", + "del data_dict[\"phi_OWNPV_NDOF\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "branches_needed = [\n", + " \"Ds_ENDVERTEX_CHI2\",\n", + " #\"Ds_ENDVERTEX_NDOF\",\n", + " \"Ds_OWNPV_CHI2\",\n", + " #\"Ds_OWNPV_NDOF\",\n", + " \"Ds_IPCHI2_OWNPV\",\n", + " \"Ds_IP_OWNPV\",\n", + " \"Ds_DIRA_OWNPV\",\n", + " #l_flv[l_index]+\"_plus_MC15TuneV1_ProbNN\"+l_flv[l_index],\n", + " #\"Ds_Hlt1TrackMVADecision_TOS\",\n", + " #\"Ds_Hlt2RareCharmD2Pi\"+l_flv[l_index].capitalize()+l_flv[l_index].capitalize()+\"OSDecision_TOS\",\n", + " #\"Ds_Hlt2Phys_TOS\",\n", + " \"phi_ENDVERTEX_CHI2\",\n", + " #\"phi_ENDVERTEX_NDOF\",\n", + " \"phi_OWNPV_CHI2\",\n", + " #\"phi_OWNPV_NDOF\",\n", + " \"phi_IPCHI2_OWNPV\",\n", + " \"phi_IP_OWNPV\",\n", + " \"phi_DIRA_OWNPV\",\n", + " #\"Ds_ConsD_M\",\n", + " ] " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#Number of input features\n", + "m=data_dict[\"Ds_ConsD_M\"].shape[0]\n", + "dim=len(branches_needed)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "data = extract_array(data_dict, branches_needed, dim, m)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(106404, 10)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "task='TEST'\n", + "\n", + "PATH=l_flv[l_index]+'_Mag'+mag_status[mag_index]+'_test_4'" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "if task == 'TEST' and os.path.exists(PATH+'/hyper_parameters.pkl'):\n", + " with open(PATH+'/hyper_parameters.pkl', 'rb') as f: \n", + " hyper_dict = pickle.load(f)\n", + " #for key, item in hyper_dict.items():\n", + " # print(key+':'+str(item))\n", + " \n", + " #m=hyper_dict[\"m\"]\n", + " test_size=hyper_dict[\"test_size\"]\n", + " val_size=hyper_dict[\"val_size\"]\n", + " LEARNING_RATE=hyper_dict[\"LEARNING_RATE\"]\n", + " BETA1=hyper_dict[\"BETA1\"]\n", + " BATCH_SIZE=hyper_dict[\"BATCH_SIZE\"]\n", + " EPOCHS=hyper_dict[\"EPOCHS\"]\n", + " VAL_PERIOD=hyper_dict[\"VAL_PERIOD\"]\n", + " SEED=hyper_dict[\"SEED\"]\n", + " sizes=hyper_dict[\"sizes\"]\n", + " LAMBD=hyper_dict[\"LAMBD\"]\n", + " PATH=hyper_dict[\"PATH\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def bkg(data):\n", + " \n", + " batch_size_output=5000\n", + " n_batches_output = m//batch_size_output\n", + "\n", + " tf.reset_default_graph()\n", + " nn = DNN(dim, sizes,\n", + " lr=LEARNING_RATE, beta1=BETA1, lambd=LAMBD,\n", + " batch_size=BATCH_SIZE, epochs=EPOCHS,\n", + " save_sample=VAL_PERIOD, path=PATH, seed=SEED)\n", + " \n", + " vars_to_train= tf.trainable_variables()\n", + " vars_all = tf.global_variables()\n", + " vars_to_init = list(set(vars_all)-set(vars_to_train))\n", + " init_op = tf.variables_initializer(vars_to_init)\n", + " \n", + " # Add ops to save and restore all the variables.\n", + " saver = tf.train.Saver()\n", + " gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)\n", + " \n", + " with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:\n", + " \n", + " sess.run(init_op)\n", + " print('\\n Selecting signal events with model...')\n", + " saver.restore(sess,PATH+'/CNN_model.ckpt')\n", + " print('Model restored.')\n", + " \n", + " nn.set_session(sess)\n", + " output_dict={}\n", + " \n", + " for i in range(n_batches_output):\n", + " small_dataset = data[i:i+batch_size_output]\n", + " output_dict[i] = nn.predict(small_dataset)\n", + " output=np.concatenate([output_dict[i] for i in range(len(output_dict))])\n", + " return output\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'sizes' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'No checkpoint'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbkg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36mbkg\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset_default_graph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m nn = DNN(dim, sizes,\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mLEARNING_RATE\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbeta1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBETA1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlambd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mLAMBD\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBATCH_SIZE\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mEPOCHS\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'sizes' is not defined" + ] + } + ], + "source": [ + "if __name__=='__main__':\n", + " if not os.path.exists(PATH+'/checkpoint'):\n", + " print('No checkpoint')\n", + " else:\n", + " output=bkg(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.argmax(output, axis=1).astype(np.bool).sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a = data_dict[\"Ds_ConsD_M\"][0:output.shape[0]][np.argmax(output, axis=1).astype(np.bool)]\n", + "b = [data_dict[\"Ds_ConsD_M\"][0:output.shape[0]][i] for i in range(output.shape[0])]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "NN_selected=np.array([a[i][0] for i in range(len(a))])\n", + "full = np.array([b[i][0] for i in range(len(b))])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "full=np.delete(full,np.where(full<0))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(full,alpha=0.4,bins=100, range=(0,3000));\n", + "plt.hist(NN_selected,alpha=0.4,bins=100, range=(0,3000));\n", + "fig=plt.gcf();\n", + "fig.set_size_inches(16,10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.randint(14)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}