Newer
Older
R_phipi / DNN_selection.ipynb
@Davide Lancierini Davide Lancierini on 10 Oct 2018 11 KB Added script to test NN performances on data
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "import pickle\n",
    "import math\n",
    "\n",
    "trunc_normal= tf.truncated_normal_initializer(stddev=1)\n",
    "normal = tf.random_normal_initializer(stddev=1)\n",
    "\n",
    "from architectures.data_processing import *\n",
    "from architectures.utils.toolbox import *\n",
    "from architectures.DNN import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "l_index=1\n",
    "mag_index=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/disk/lhcb_data/davide/Rphipi/NN_for_selection/'+l_flv[l_index]+l_flv[l_index]+'/'+'data_for_NN_'+l_flv[l_index]+l_flv[l_index]+'_Mag'+mag_status[mag_index]+'.pickle', 'rb') as f:\n",
    "    data_dict=pickle.load(f, encoding='latin1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "data_dict[\"Ds_ENDVERTEX_CHI2\"]=data_dict[\"Ds_ENDVERTEX_CHI2\"]/data_dict[\"Ds_ENDVERTEX_NDOF\"]\n",
    "data_dict[\"Ds_OWNPV_CHI2\"]=data_dict[\"Ds_OWNPV_CHI2\"]/data_dict[\"Ds_OWNPV_NDOF\"]\n",
    "data_dict[\"Ds_IPCHI2_OWNPV\"]=data_dict[\"Ds_IPCHI2_OWNPV\"]/data_dict[\"Ds_ENDVERTEX_NDOF\"]\n",
    "\n",
    "del data_dict[\"Ds_ENDVERTEX_NDOF\"]\n",
    "del data_dict[\"Ds_OWNPV_NDOF\"]\n",
    "\n",
    "data_dict[\"phi_ENDVERTEX_CHI2\"]=data_dict[\"phi_ENDVERTEX_CHI2\"]/data_dict[\"phi_ENDVERTEX_NDOF\"]\n",
    "data_dict[\"phi_OWNPV_CHI2\"]=data_dict[\"phi_OWNPV_CHI2\"]/data_dict[\"phi_OWNPV_NDOF\"]\n",
    "data_dict[\"phi_IPCHI2_OWNPV\"]=data_dict[\"phi_IPCHI2_OWNPV\"]/data_dict[\"phi_ENDVERTEX_NDOF\"]\n",
    "\n",
    "del data_dict[\"phi_ENDVERTEX_NDOF\"]\n",
    "del data_dict[\"phi_OWNPV_NDOF\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "branches_needed = [\n",
    "                    \"Ds_ENDVERTEX_CHI2\",\n",
    "                    #\"Ds_ENDVERTEX_NDOF\",\n",
    "                    \"Ds_OWNPV_CHI2\",\n",
    "                    #\"Ds_OWNPV_NDOF\",\n",
    "                    \"Ds_IPCHI2_OWNPV\",\n",
    "                    \"Ds_IP_OWNPV\",\n",
    "                    \"Ds_DIRA_OWNPV\",\n",
    "                    #l_flv[l_index]+\"_plus_MC15TuneV1_ProbNN\"+l_flv[l_index],\n",
    "                    #\"Ds_Hlt1TrackMVADecision_TOS\",\n",
    "                    #\"Ds_Hlt2RareCharmD2Pi\"+l_flv[l_index].capitalize()+l_flv[l_index].capitalize()+\"OSDecision_TOS\",\n",
    "                    #\"Ds_Hlt2Phys_TOS\",\n",
    "                    \"phi_ENDVERTEX_CHI2\",\n",
    "                    #\"phi_ENDVERTEX_NDOF\",\n",
    "                    \"phi_OWNPV_CHI2\",\n",
    "                    #\"phi_OWNPV_NDOF\",\n",
    "                    \"phi_IPCHI2_OWNPV\",\n",
    "                    \"phi_IP_OWNPV\",\n",
    "                    \"phi_DIRA_OWNPV\",\n",
    "                    #\"Ds_ConsD_M\",\n",
    "                  ] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Number of input features\n",
    "m=data_dict[\"Ds_ConsD_M\"].shape[0]\n",
    "dim=len(branches_needed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = extract_array(data_dict, branches_needed, dim, m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(106404, 10)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "task='TEST'\n",
    "\n",
    "PATH=l_flv[l_index]+'_Mag'+mag_status[mag_index]+'_test_4'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "if task == 'TEST' and os.path.exists(PATH+'/hyper_parameters.pkl'):\n",
    "    with open(PATH+'/hyper_parameters.pkl', 'rb') as f:  \n",
    "        hyper_dict = pickle.load(f)\n",
    "        #for key, item in hyper_dict.items():\n",
    "        #    print(key+':'+str(item))\n",
    "            \n",
    "    #m=hyper_dict[\"m\"]\n",
    "    test_size=hyper_dict[\"test_size\"]\n",
    "    val_size=hyper_dict[\"val_size\"]\n",
    "    LEARNING_RATE=hyper_dict[\"LEARNING_RATE\"]\n",
    "    BETA1=hyper_dict[\"BETA1\"]\n",
    "    BATCH_SIZE=hyper_dict[\"BATCH_SIZE\"]\n",
    "    EPOCHS=hyper_dict[\"EPOCHS\"]\n",
    "    VAL_PERIOD=hyper_dict[\"VAL_PERIOD\"]\n",
    "    SEED=hyper_dict[\"SEED\"]\n",
    "    sizes=hyper_dict[\"sizes\"]\n",
    "    LAMBD=hyper_dict[\"LAMBD\"]\n",
    "    PATH=hyper_dict[\"PATH\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def bkg(data):\n",
    "    \n",
    "    batch_size_output=5000\n",
    "    n_batches_output = m//batch_size_output\n",
    "\n",
    "    tf.reset_default_graph()\n",
    "    nn = DNN(dim, sizes,\n",
    "              lr=LEARNING_RATE, beta1=BETA1, lambd=LAMBD,\n",
    "              batch_size=BATCH_SIZE, epochs=EPOCHS,\n",
    "              save_sample=VAL_PERIOD, path=PATH, seed=SEED)\n",
    "    \n",
    "    vars_to_train= tf.trainable_variables()\n",
    "    vars_all = tf.global_variables()\n",
    "    vars_to_init = list(set(vars_all)-set(vars_to_train))\n",
    "    init_op = tf.variables_initializer(vars_to_init)\n",
    "    \n",
    "    # Add ops to save and restore all the variables.\n",
    "    saver = tf.train.Saver()\n",
    "    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)\n",
    "    \n",
    "    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:\n",
    "        \n",
    "        sess.run(init_op)\n",
    "        print('\\n Selecting signal events with model...')\n",
    "        saver.restore(sess,PATH+'/CNN_model.ckpt')\n",
    "        print('Model restored.')\n",
    "        \n",
    "        nn.set_session(sess)\n",
    "        output_dict={}\n",
    "            \n",
    "        for i in range(n_batches_output):\n",
    "            small_dataset = data[i:i+batch_size_output]\n",
    "            output_dict[i] = nn.predict(small_dataset)\n",
    "        output=np.concatenate([output_dict[i] for i in range(len(output_dict))])\n",
    "        return output\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'sizes' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-12-e6881bf43157>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m             \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'No checkpoint'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m             \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbkg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-11-b7f16e1c1d0b>\u001b[0m in \u001b[0;36mbkg\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset_default_graph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m     nn = DNN(dim, sizes,\n\u001b[0m\u001b[1;32m      8\u001b[0m               \u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mLEARNING_RATE\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbeta1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBETA1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlambd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mLAMBD\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m               \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBATCH_SIZE\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mEPOCHS\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'sizes' is not defined"
     ]
    }
   ],
   "source": [
    "if __name__=='__main__':\n",
    "        if not os.path.exists(PATH+'/checkpoint'):\n",
    "            print('No checkpoint')\n",
    "        else:\n",
    "            output=bkg(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.argmax(output, axis=1).astype(np.bool).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = data_dict[\"Ds_ConsD_M\"][0:output.shape[0]][np.argmax(output, axis=1).astype(np.bool)]\n",
    "b = [data_dict[\"Ds_ConsD_M\"][0:output.shape[0]][i] for i in range(output.shape[0])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "NN_selected=np.array([a[i][0] for i in range(len(a))])\n",
    "full = np.array([b[i][0] for i in range(len(b))])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "full=np.delete(full,np.where(full<0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.hist(full,alpha=0.4,bins=100, range=(0,3000));\n",
    "plt.hist(NN_selected,alpha=0.4,bins=100, range=(0,3000));\n",
    "fig=plt.gcf();\n",
    "fig.set_size_inches(16,10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.randint(14)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}