Rphipi_new/tools/preselector.py at master

Fork: 0
davide / Rphipi_new
Find file
Newer
Older
Rphipi_new / tools / preselector.py
Davide Lancierini on 28 May 2019 11 KB first commit
Raw Blame History
import os
import pickle
import numpy as np
import sys
from tools.data_processing import *

mother_ID=['Dplus','Ds','both']
l_flv = ['e','mu']
data_type = ['MC','data']
mag_status =['Up','Down'] 
tree_name = 'Ds_OfflineTree/DecayTree'

cmp_lt = lambda x,y: x < y
cmp_gt = lambda x,y: x > y
cmp_eq = lambda x,y: x == y
cmp_lte = lambda x,y: x <= y
cmp_gte = lambda x,y: x >= y
cmp_neq = lambda x,y: x != y

log_or = lambda x,y: np.logical_or(x,y)
log_and = lambda x,y: np.logical_and(x,y)
log_and_not = lambda x,y: np.logical_and(x,np.invert(y))


def namestr(obj, namespace):
    return [name for name in namespace if namespace[name] is obj]


def filter_tuple_cond(namespace, tuple_full, l_index, 
                      cat_name, comparator, threshold, wantEff=False,
                      include_costhetal=True):

    if "Ds" in namestr(tuple_full, namespace)[0]:
        
        mother_index=1

    elif "Dplus" in namestr(tuple_full, namespace)[0]:
        
        mother_index=0

    elif "data" in namestr(tuple_full, namespace)[0]:
        
        mother_index=1
    
    if "MC" in namestr(tuple_full, namespace)[0]:
        data_index=0
    else:
        data_index=1
    
    indices=np.where(comparator(tuple_full[cat_name],threshold))[0]
    
    tuple_filtered={}

    branches=return_branches(data_index=data_index, mother_index=mother_index, l_index=l_index)
    if include_costhetal: 
        for label in branches:
            tuple_filtered[label]=tuple_full[label][indices]

    else:
        for label in branches:
            if label!='cos_thetal':
                tuple_filtered[label]=tuple_full[label][indices]

    bf_cut=np.float32(len(tuple_full[cat_name]))
    af_cut=np.float32(len(tuple_filtered[cat_name]))

    eff = af_cut/bf_cut*100

    if wantEff:
        print('************************')
        print(' \n Applying mass cut on '+cat_name+' on '+ namestr(tuple_full, namespace)[0] +'\n')
        if data_index==0: print('{0:.4g}% of signal passed this cut'.format(eff))
        print('************************')
        return tuple_filtered, eff
    else:
        return tuple_filtered



def filter_tuple_mass_cut(namespace, tuple_full, l_index, 
                          cat_name, lower_bound=None, upper_bound=None,
                          cut_kind=None, equal=False, wantEff=False,
                          include_costhetal=True):

    if "Ds" in namestr(tuple_full, namespace)[0]:
        
        mother_index=1

    elif "Dplus" in namestr(tuple_full, namespace)[0]:
        
        mother_index=0

    elif "data" in namestr(tuple_full, namespace)[0]:
        
        mother_index=1
    
    if "MC" in namestr(tuple_full, namespace)[0]:
        data_index=0
    else:
        data_index=1

    if lower_bound == None or upper_bound==None:

        if lower_bound == None or upper_bound!=None:
            threshold=upper_bound
            if equal: comparator = cmp_lte
            else: comparator = cmp_lt

        elif upper_bound==None and lower_bound != None:
            threshold=lower_bound
            if equal: comparator = cmp_gte
            else: comparator = cmp_gt

        indices=np.where(comparator(tuple_full[cat_name],threshold))[0]

    elif upper_bound!=None and lower_bound != None:
        
        assert cut_kind!=None, '\n Specify cut_kind, either inside or outside bounds'

        if cut_kind == 'inside':
            if equal:
                indices_1 = np.where(tuple_full[cat_name]<=upper_bound)[0]
                indices_2 = np.where(tuple_full[cat_name]>=lower_bound)[0]
            else:
                indices_1 = np.where(tuple_full[cat_name]<upper_bound)[0]
                indices_2 = np.where(tuple_full[cat_name]>lower_bound)[0]

            indices = np.intersect1d(indices_1,indices_2)

        elif cut_kind == 'outside':
            if equal:
                indices_1 = np.where(tuple_full[cat_name]>=0)[0]
                indices_2 = np.where(tuple_full[cat_name]<=lower_bound)[0]
                indices_3 = np.where(tuple_full[cat_name]>=upper_bound)[0]

                indices_temp = np.intersect1d(indices_1,indices_2)
            else:
                indices_1 = np.where(tuple_full[cat_name]>0)[0]
                indices_2 = np.where(tuple_full[cat_name]<lower_bound)[0]
                indices_3 = np.where(tuple_full[cat_name]>upper_bound)[0]

                indices_temp = np.intersect1d(indices_1,indices_2)

            indices =  np.concatenate([indices_temp,indices_3])
    
    tuple_filtered={}
    
    branches=return_branches(data_index=data_index, mother_index=mother_index, l_index=l_index)
    if include_costhetal: 
        for label in branches:
            tuple_filtered[label]=tuple_full[label][indices]

    else:
        for label in branches:
            if label!='cos_thetal':
                tuple_filtered[label]=tuple_full[label][indices]

    bf_cut=np.float32(len(tuple_full[cat_name]))
    af_cut=np.float32(len(tuple_filtered[cat_name]))

    eff = af_cut/bf_cut*100

    if wantEff:
        print('************************')
        print(' \n Applying mass cut on '+cat_name+' on '+ namestr(tuple_full, namespace)[0] +'\n')
        if data_index==0: print('{0:.4g}% of signal passed this cut'.format(eff))
        print('************************')
        return tuple_filtered, eff
    else:
        return tuple_filtered
    
def filter_tuple_L0TrigCat(namespace, tuple_full, l_index, 
                           trig_cat, wantEff=False,
                           include_costhetal=True):
    
    if "Ds" in namestr(tuple_full, namespace)[0]:
        
        mother_index=1

    elif "Dplus" in namestr(tuple_full, namespace)[0]:
        
        mother_index=0

    elif "data" in namestr(tuple_full, namespace)[0]:
        
        mother_index=1
    
    if "MC" in namestr(tuple_full, namespace)[0]:
        data_index=0
    else:
        data_index=1

    if l_index ==0:
        cat_name_TOS = mother_ID[mother_index]+'_L0ElectronDecision_TOS'
    else:
        cat_name_TOS = mother_ID[mother_index]+'_L0MuonDecision_TOS'

    cat_name_TIS = mother_ID[mother_index]+'_L0Global_TIS'

    indices_TOS=tuple_full[cat_name_TOS]
    indices_TIS=tuple_full[cat_name_TIS]


    if trig_cat == 0:
        #Exclusive L0_Lepton_TOS 
        indices=log_and_not(indices_TOS,indices_TIS)

    if trig_cat == 1:
        #Exclusive L0_Global_TIS
        indices=log_and_not(indices_TIS,indices_TOS)

    if trig_cat == 2:
        #L0_Lepton_TOS and L0_Global_TIS
        indices=log_and(indices_TIS,indices_TOS)

    if trig_cat == 3:
        #Either L0_Lepton_Tos or L0_Global_TIS
        indices=log_or(indices_TIS,indices_TOS)

    if trig_cat == 4:
        #TOS and TOSandTIS
        indices_and=log_and(indices_TIS,indices_TOS)
        indices=log_or(indices_TOS,indices_and)

    if trig_cat == 5:
        #TIS and TOSandTIS
        indices_and=log_and(indices_TIS,indices_TOS)
        indices=log_or(indices_TIS,indices_and)
    
    tuple_filtered={}
    
    branches=return_branches(data_index=data_index, mother_index=mother_index, l_index=l_index)
    if include_costhetal: 
        for label in branches:
            tuple_filtered[label]=tuple_full[label][indices]

    else:
        for label in branches:
            if label!='cos_thetal':
                tuple_filtered[label]=tuple_full[label][indices]
    
    bf_cut=np.float32(len(tuple_full[cat_name_TOS]))
    af_cut=np.float32(len(tuple_filtered[cat_name_TOS]))

    eff = af_cut/bf_cut*100

    if wantEff:
        print('************************')
        print(' \n Applying L0TrigCat '+str(trig_cat)+' cut on '+ namestr(tuple_full, namespace)[0] +'\n')
        if data_index==0: print('{0:.4g}% of signal passed this cut'.format(eff))
        print('************************')
        return tuple_filtered, eff
    else:
        return tuple_filtered


# def apply_BDT_selection(namespace, data_to_select, cut_threshold, BDTs_PATH):

#     if "Ds" in namestr(data_to_select, namespace)[0]:

#         mother_index=1

#     elif "Dplus" in namestr(data_to_select, namespace)[0]:

#         mother_index=0

#     elif "data" in namestr(data_to_select, namespace)[0]:

#         mother_index=1

#     if "MC" in namestr(data_to_select, namespace)[0]:
#         data_index=0
#     else:
#         data_index=1


#     branches = data_to_select.keys()
#     n_evts = data_to_select[data_to_select.keys()[0]].shape[0]

#     with open(BDTs_PATH+'variables_used.pickle') as f:

#         branches_BDT_Ds = pickle.load(f)

#     branches_BDT_Dplus=[]

#     for label in branches_BDT_Ds:
#         if 'Ds' in label:
#             branches_BDT_Dplus.append(label.replace("Ds", "Dplus"))
#         else:
#             branches_BDT_Dplus.append(label)

#     if mother_index == 0: branches_BDT = branches_BDT_Dplus
#     elif mother_index == 1: branches_BDT = branches_BDT_Ds
    
#     data_BDT_input={}
#     for label in branches_BDT:

#         data_BDT_input[label]=data_to_select[label]

#     fold = 0

#     while os.path.exists(BDTs_PATH+'/XG_'+str(fold)):
#         fold+=1
      
#     k = np.random.randint(fold)

#     MODEL_PATH=BDTs_PATH+'/XG_'+str(k)
#     loaded_model = pickle.load(open(MODEL_PATH+"/XG_"+str(k)+".pickle.dat", "rb"))

#     data_BDT_input_array=extract_array_for_BDT(data_BDT_input, branches_BDT, n_evts)

#     output_XG=loaded_model.predict_proba(data_BDT_input_array)
#     bdt_selection_data= np.where(output_XG[:,1]>cut_threshold)

#     data_BDT_output={}


#     for label in branches:   
#         data_BDT_output[label] = data_to_select[label][bdt_selection_data]

#     n_before= data_BDT_input[data_BDT_input.keys()[0]].shape[0]
#     n_after= data_BDT_output[data_BDT_output.keys()[0]].shape[0]
#     eff = np.float32(n_after)/np.float32(n_before)

#     return data_BDT_output, eff


def get_BDT_weights(namespace, data_to_select, BDTs_PATH):

    if "Ds" in namestr(data_to_select, namespace)[0]:

        mother_index=1

    elif "Dplus" in namestr(data_to_select, namespace)[0]:

        mother_index=0

    elif "data" in namestr(data_to_select, namespace)[0]:

        mother_index=1

    if "MC" in namestr(data_to_select, namespace)[0]:
        data_index=0
    else:
        data_index=1


    branches = data_to_select.keys()
    n_evts = data_to_select[data_to_select.keys()[0]].shape[0]

    with open(BDTs_PATH+'variables_used.pickle') as f:

        branches_BDT_Ds = pickle.load(f)

    branches_BDT_Dplus=[]

    for label in branches_BDT_Ds:
        if 'Ds' in label:
            branches_BDT_Dplus.append(label.replace("Ds", "Dplus"))
        else:
            branches_BDT_Dplus.append(label)

    if mother_index == 0: branches_BDT = branches_BDT_Dplus
    elif mother_index == 1: branches_BDT = branches_BDT_Ds
    
    data_BDT_input={}
    for label in branches_BDT:

        data_BDT_input[label]=data_to_select[label]

    fold = 0

    while os.path.exists(BDTs_PATH+'/XG_'+str(fold)):
        fold+=1
      
    k = np.random.randint(fold)

    MODEL_PATH=BDTs_PATH+'/XG_'+str(k)
    loaded_model = pickle.load(open(MODEL_PATH+"/XG_"+str(k)+".pickle.dat", "rb"))

    data_BDT_input_array=extract_array_for_BDT(data_BDT_input, branches_BDT, n_evts)

    output_XG=loaded_model.predict_proba(data_BDT_input_array)

    data_to_select['BDT_selection']=output_XG[:,1]


    return data_to_select