import os import pickle import numpy as np import sys from tools.data_processing import * mother_ID=['Dplus','Ds','both'] l_flv = ['e','mu'] data_type = ['MC','data'] mag_status =['Up','Down'] tree_name = 'Ds_OfflineTree/DecayTree' cmp_lt = lambda x,y: x < y cmp_gt = lambda x,y: x > y cmp_eq = lambda x,y: x == y cmp_lte = lambda x,y: x <= y cmp_gte = lambda x,y: x >= y cmp_neq = lambda x,y: x != y log_or = lambda x,y: np.logical_or(x,y) log_and = lambda x,y: np.logical_and(x,y) log_and_not = lambda x,y: np.logical_and(x,np.invert(y)) def namestr(obj, namespace): return [name for name in namespace if namespace[name] is obj] def filter_tuple_cond(namespace, tuple_full, l_index, cat_name, comparator, threshold, wantEff=False, include_costhetal=True): if "Ds" in namestr(tuple_full, namespace)[0]: mother_index=1 elif "Dplus" in namestr(tuple_full, namespace)[0]: mother_index=0 elif "data" in namestr(tuple_full, namespace)[0]: mother_index=1 if "MC" in namestr(tuple_full, namespace)[0]: data_index=0 else: data_index=1 indices=np.where(comparator(tuple_full[cat_name],threshold))[0] tuple_filtered={} branches=return_branches(data_index=data_index, mother_index=mother_index, l_index=l_index) if include_costhetal: for label in branches: tuple_filtered[label]=tuple_full[label][indices] else: for label in branches: if label!='cos_thetal': tuple_filtered[label]=tuple_full[label][indices] bf_cut=np.float32(len(tuple_full[cat_name])) af_cut=np.float32(len(tuple_filtered[cat_name])) eff = af_cut/bf_cut*100 if wantEff: print('************************') print(' \n Applying mass cut on '+cat_name+' on '+ namestr(tuple_full, namespace)[0] +'\n') if data_index==0: print('{0:.4g}% of signal passed this cut'.format(eff)) print('************************') return tuple_filtered, eff else: return tuple_filtered def filter_tuple_mass_cut(namespace, tuple_full, l_index, cat_name, lower_bound=None, upper_bound=None, cut_kind=None, equal=False, wantEff=False, include_costhetal=True): if "Ds" in namestr(tuple_full, namespace)[0]: mother_index=1 elif "Dplus" in namestr(tuple_full, namespace)[0]: mother_index=0 elif "data" in namestr(tuple_full, namespace)[0]: mother_index=1 if "MC" in namestr(tuple_full, namespace)[0]: data_index=0 else: data_index=1 if lower_bound == None or upper_bound==None: if lower_bound == None or upper_bound!=None: threshold=upper_bound if equal: comparator = cmp_lte else: comparator = cmp_lt elif upper_bound==None and lower_bound != None: threshold=lower_bound if equal: comparator = cmp_gte else: comparator = cmp_gt indices=np.where(comparator(tuple_full[cat_name],threshold))[0] elif upper_bound!=None and lower_bound != None: assert cut_kind!=None, '\n Specify cut_kind, either inside or outside bounds' if cut_kind == 'inside': if equal: indices_1 = np.where(tuple_full[cat_name]<=upper_bound)[0] indices_2 = np.where(tuple_full[cat_name]>=lower_bound)[0] else: indices_1 = np.where(tuple_full[cat_name]<upper_bound)[0] indices_2 = np.where(tuple_full[cat_name]>lower_bound)[0] indices = np.intersect1d(indices_1,indices_2) elif cut_kind == 'outside': if equal: indices_1 = np.where(tuple_full[cat_name]>=0)[0] indices_2 = np.where(tuple_full[cat_name]<=lower_bound)[0] indices_3 = np.where(tuple_full[cat_name]>=upper_bound)[0] indices_temp = np.intersect1d(indices_1,indices_2) else: indices_1 = np.where(tuple_full[cat_name]>0)[0] indices_2 = np.where(tuple_full[cat_name]<lower_bound)[0] indices_3 = np.where(tuple_full[cat_name]>upper_bound)[0] indices_temp = np.intersect1d(indices_1,indices_2) indices = np.concatenate([indices_temp,indices_3]) tuple_filtered={} branches=return_branches(data_index=data_index, mother_index=mother_index, l_index=l_index) if include_costhetal: for label in branches: tuple_filtered[label]=tuple_full[label][indices] else: for label in branches: if label!='cos_thetal': tuple_filtered[label]=tuple_full[label][indices] bf_cut=np.float32(len(tuple_full[cat_name])) af_cut=np.float32(len(tuple_filtered[cat_name])) eff = af_cut/bf_cut*100 if wantEff: print('************************') print(' \n Applying mass cut on '+cat_name+' on '+ namestr(tuple_full, namespace)[0] +'\n') if data_index==0: print('{0:.4g}% of signal passed this cut'.format(eff)) print('************************') return tuple_filtered, eff else: return tuple_filtered def filter_tuple_L0TrigCat(namespace, tuple_full, l_index, trig_cat, wantEff=False, include_costhetal=True): if "Ds" in namestr(tuple_full, namespace)[0]: mother_index=1 elif "Dplus" in namestr(tuple_full, namespace)[0]: mother_index=0 elif "data" in namestr(tuple_full, namespace)[0]: mother_index=1 if "MC" in namestr(tuple_full, namespace)[0]: data_index=0 else: data_index=1 if l_index ==0: cat_name_TOS = mother_ID[mother_index]+'_L0ElectronDecision_TOS' else: cat_name_TOS = mother_ID[mother_index]+'_L0MuonDecision_TOS' cat_name_TIS = mother_ID[mother_index]+'_L0Global_TIS' indices_TOS=tuple_full[cat_name_TOS] indices_TIS=tuple_full[cat_name_TIS] if trig_cat == 0: #Exclusive L0_Lepton_TOS indices=log_and_not(indices_TOS,indices_TIS) if trig_cat == 1: #Exclusive L0_Global_TIS indices=log_and_not(indices_TIS,indices_TOS) if trig_cat == 2: #L0_Lepton_TOS and L0_Global_TIS indices=log_and(indices_TIS,indices_TOS) if trig_cat == 3: #Either L0_Lepton_Tos or L0_Global_TIS indices=log_or(indices_TIS,indices_TOS) if trig_cat == 4: #TOS and TOSandTIS indices_and=log_and(indices_TIS,indices_TOS) indices=log_or(indices_TOS,indices_and) if trig_cat == 5: #TIS and TOSandTIS indices_and=log_and(indices_TIS,indices_TOS) indices=log_or(indices_TIS,indices_and) tuple_filtered={} branches=return_branches(data_index=data_index, mother_index=mother_index, l_index=l_index) if include_costhetal: for label in branches: tuple_filtered[label]=tuple_full[label][indices] else: for label in branches: if label!='cos_thetal': tuple_filtered[label]=tuple_full[label][indices] bf_cut=np.float32(len(tuple_full[cat_name_TOS])) af_cut=np.float32(len(tuple_filtered[cat_name_TOS])) eff = af_cut/bf_cut*100 if wantEff: print('************************') print(' \n Applying L0TrigCat '+str(trig_cat)+' cut on '+ namestr(tuple_full, namespace)[0] +'\n') if data_index==0: print('{0:.4g}% of signal passed this cut'.format(eff)) print('************************') return tuple_filtered, eff else: return tuple_filtered # def apply_BDT_selection(namespace, data_to_select, cut_threshold, BDTs_PATH): # if "Ds" in namestr(data_to_select, namespace)[0]: # mother_index=1 # elif "Dplus" in namestr(data_to_select, namespace)[0]: # mother_index=0 # elif "data" in namestr(data_to_select, namespace)[0]: # mother_index=1 # if "MC" in namestr(data_to_select, namespace)[0]: # data_index=0 # else: # data_index=1 # branches = data_to_select.keys() # n_evts = data_to_select[data_to_select.keys()[0]].shape[0] # with open(BDTs_PATH+'variables_used.pickle') as f: # branches_BDT_Ds = pickle.load(f) # branches_BDT_Dplus=[] # for label in branches_BDT_Ds: # if 'Ds' in label: # branches_BDT_Dplus.append(label.replace("Ds", "Dplus")) # else: # branches_BDT_Dplus.append(label) # if mother_index == 0: branches_BDT = branches_BDT_Dplus # elif mother_index == 1: branches_BDT = branches_BDT_Ds # data_BDT_input={} # for label in branches_BDT: # data_BDT_input[label]=data_to_select[label] # fold = 0 # while os.path.exists(BDTs_PATH+'/XG_'+str(fold)): # fold+=1 # k = np.random.randint(fold) # MODEL_PATH=BDTs_PATH+'/XG_'+str(k) # loaded_model = pickle.load(open(MODEL_PATH+"/XG_"+str(k)+".pickle.dat", "rb")) # data_BDT_input_array=extract_array_for_BDT(data_BDT_input, branches_BDT, n_evts) # output_XG=loaded_model.predict_proba(data_BDT_input_array) # bdt_selection_data= np.where(output_XG[:,1]>cut_threshold) # data_BDT_output={} # for label in branches: # data_BDT_output[label] = data_to_select[label][bdt_selection_data] # n_before= data_BDT_input[data_BDT_input.keys()[0]].shape[0] # n_after= data_BDT_output[data_BDT_output.keys()[0]].shape[0] # eff = np.float32(n_after)/np.float32(n_before) # return data_BDT_output, eff def get_BDT_weights(namespace, data_to_select, BDTs_PATH): if "Ds" in namestr(data_to_select, namespace)[0]: mother_index=1 elif "Dplus" in namestr(data_to_select, namespace)[0]: mother_index=0 elif "data" in namestr(data_to_select, namespace)[0]: mother_index=1 if "MC" in namestr(data_to_select, namespace)[0]: data_index=0 else: data_index=1 branches = data_to_select.keys() n_evts = data_to_select[data_to_select.keys()[0]].shape[0] with open(BDTs_PATH+'variables_used.pickle') as f: branches_BDT_Ds = pickle.load(f) branches_BDT_Dplus=[] for label in branches_BDT_Ds: if 'Ds' in label: branches_BDT_Dplus.append(label.replace("Ds", "Dplus")) else: branches_BDT_Dplus.append(label) if mother_index == 0: branches_BDT = branches_BDT_Dplus elif mother_index == 1: branches_BDT = branches_BDT_Ds data_BDT_input={} for label in branches_BDT: data_BDT_input[label]=data_to_select[label] fold = 0 while os.path.exists(BDTs_PATH+'/XG_'+str(fold)): fold+=1 k = np.random.randint(fold) MODEL_PATH=BDTs_PATH+'/XG_'+str(k) loaded_model = pickle.load(open(MODEL_PATH+"/XG_"+str(k)+".pickle.dat", "rb")) data_BDT_input_array=extract_array_for_BDT(data_BDT_input, branches_BDT, n_evts) output_XG=loaded_model.predict_proba(data_BDT_input_array) data_to_select['BDT_selection']=output_XG[:,1] return data_to_select