Newer
Older
R_phipi / tools / data_processing.py
@Davide Lancierini Davide Lancierini on 15 Nov 2018 25 KB big changes
import os
import pickle
import numpy as np
import sys

mother_ID=['Ds','Dplus','both']
meson_ID =['pi','X']
l_flv = ['e','mu']
data_type = ['MC','data']
mag_status =['Up','Down'] 
tree_name = 'Ds_OfflineTree/DecayTree'

def sel_eff(tp,threshold):
    
    sig_eps = np.float(np.where(tp>threshold)[0].shape[0])/np.float(tp.shape[0])
    return sig_eps

def load_datasets(l_index):
	if sys.version_info[0]>2:
		data_index=0
		with open('/disk/lhcb_data/davide/Rphipi/BDT_training/'+l_flv[l_index]+l_flv[l_index]+'/'+data_type[data_index]+'_for_BDT_training_Ds_'+l_flv[l_index]+l_flv[l_index]+'.pickle', 'rb') as f:
				MC_Ds_sig_dict=pickle.load(f, encoding='latin1')

		data_index=0
		with open('/disk/lhcb_data/davide/Rphipi/BDT_training/'+l_flv[l_index]+l_flv[l_index]+'/'+data_type[data_index]+'_for_BDT_training_Dplus_'+l_flv[l_index]+l_flv[l_index]+'.pickle', 'rb') as f:
				MC_Dplus_sig_dict=pickle.load(f, encoding='latin1')

		data_index=1
		with open('/disk/lhcb_data/davide/Rphipi/BDT_training/'+l_flv[l_index]+l_flv[l_index]+'/'+data_type[data_index]+'_for_BDT_training_'+l_flv[l_index]+l_flv[l_index]+'.pickle', 'rb') as f:
				data_bkg_dict=pickle.load(f, encoding='latin1')
	else:
		data_index=0
		with open('/disk/lhcb_data/davide/Rphipi/BDT_training/'+l_flv[l_index]+l_flv[l_index]+'/'+data_type[data_index]+'_for_BDT_training_Ds_'+l_flv[l_index]+l_flv[l_index]+'.pickle', 'rb') as f:
				MC_Ds_sig_dict=pickle.load(f)#, encoding='latin1')

		data_index=0
		with open('/disk/lhcb_data/davide/Rphipi/BDT_training/'+l_flv[l_index]+l_flv[l_index]+'/'+data_type[data_index]+'_for_BDT_training_Dplus_'+l_flv[l_index]+l_flv[l_index]+'.pickle', 'rb') as f:
				MC_Dplus_sig_dict=pickle.load(f)#, encoding='latin1')

		data_index=1
		with open('/disk/lhcb_data/davide/Rphipi/BDT_training/'+l_flv[l_index]+l_flv[l_index]+'/'+data_type[data_index]+'_for_BDT_training_'+l_flv[l_index]+l_flv[l_index]+'.pickle', 'rb') as f:
				data_bkg_dict=pickle.load(f)#, encoding='latin1')


	return MC_Dplus_sig_dict, MC_Ds_sig_dict, data_bkg_dict


def extract_array(event_dict, branches_needed, features, examples):

	extracted = np.array(
		[np.zeros(shape=features) for i in range(examples)]
		)

	for event in range(examples):
		for i, key in enumerate(branches_needed):
			extracted[event][i]=event_dict[key][event]

	return extracted

def add_labels(ndarray, signal=True):
	if signal==True:
		labeled_ndarray=np.concatenate((ndarray, np.ones(shape=(ndarray.shape[0],1))), axis =1)
	elif signal==False:
		labeled_ndarray=np.concatenate((ndarray, np.zeros(shape=(ndarray.shape[0],1))), axis =1)

	return labeled_ndarray

def to_one_hot(labels):

	temp = np.zeros(shape=(labels.shape[0],2))

	for i in range(labels.shape[0]):
		if labels[i]==0:
			temp[i][0]=1
		else:
			temp[i][1]=1

	return temp

def k_subsets(i, k, X, Y_labels):

	train_size, dim=X.shape
	#divide in k subsets and strip out the Ds_mass branch
	k_batch_size = train_size//k

	X_dict={}
	Y_dict={}

	for b in range(k):
		X_dict[b]=X[k_batch_size*b:k_batch_size*(b+1)]
		Y_dict[b]=Y_labels[k_batch_size*b:k_batch_size*(b+1)]

	k_range=np.arange(k)

	X_test = X_dict[i][:,0:dim-1]
	Y_test = Y_dict[i][:,0:dim-1]


	k_subset=np.delete(k_range, i)


	X_train = np.concatenate([X_dict[j][:,0:dim-1] for j in k_subset],axis=0)
	Y_train = np.concatenate([Y_dict[j][:,0:dim-1] for j in k_subset],axis=0)

	return X_train, Y_train, X_test, Y_test, X_dict, Y_dict


def return_branches(data_index=None, mother_index=None, l_index=None, meson_index=None):
    if data_index==0:
          if l_index==1:
               branches_needed = [
                    #________________________________________
                    #D MC true info
                    

                    mother_ID[mother_index]+'_MC_MOTHER_ID',
                    mother_ID[mother_index]+'_BKGCAT',
                    mother_ID[mother_index]+'_TRUEID',
                    #________________________________________
                    #D Geometric variables, pT and FD
        
                    mother_ID[mother_index]+"_ENDVERTEX_CHI2",
                    mother_ID[mother_index]+"_ENDVERTEX_NDOF",
                    mother_ID[mother_index]+"_IPCHI2_OWNPV",

                    mother_ID[mother_index]+"_OWNPV_CHI2",
                    mother_ID[mother_index]+"_OWNPV_NDOF",
                    mother_ID[mother_index]+"_IP_OWNPV",
                    mother_ID[mother_index]+"_DIRA_OWNPV",
                    
                    mother_ID[mother_index]+"_PX",
                    mother_ID[mother_index]+"_PY",
                    mother_ID[mother_index]+"_PZ",
                    mother_ID[mother_index]+"_PT",
                    mother_ID[mother_index]+"_FD_OWNPV",
                    mother_ID[mother_index]+"_FDCHI2_OWNPV",
                    
                    #D Reconstructed mass
                    mother_ID[mother_index]+"_ConsD_M",
        
                    #D Trigger variables
                    mother_ID[mother_index]+"_Hlt1TrackMVADecision_TOS",
                    mother_ID[mother_index]+"_Hlt2RareCharmD2Pi"+l_flv[l_index].capitalize()+l_flv[l_index].capitalize()+"OSDecision_TOS",
                    mother_ID[mother_index]+"_Hlt2Phys_TOS",
                    
                    #________________________________________
                    #PHI MC TRUE INFO
        
                    'phi_MC_MOTHER_ID',
                    'phi_BKGCAT',
                    'phi_TRUEID',
                    #________________________________________
                    #phi geometric variables, pT and FD
        
                    "phi_ENDVERTEX_CHI2",
                    "phi_ENDVERTEX_NDOF",
                    "phi_IPCHI2_OWNPV",
        
                    #"phi_OWNPV_CHI2",
                    #"phi_OWNPV_NDOF",
                    #"phi_IP_OWNPV",
                    #"phi_DIRA_OWNPV",
                    
                    "phi_PT",
        
                    #phi Reconstructed mass
        
                    "phi_M",
        
                    #________________________________________
                    #PION
                    #Pion mother ID and bkg cat
                    
                    meson_ID[meson_index]+'_MC_MOTHER_ID',
                    meson_ID[meson_index]+'_TRUEID',
        
                    #_____________________________________
                    #pi Geometric variables and pT
                    #"pi_OWNPV_CHI2",
                    #"pi_OWNPV_NDOF",
                    #'pi_IP_OWNPV',
        
                    meson_ID[meson_index]+"_PX",
                    meson_ID[meson_index]+"_PY",
                    meson_ID[meson_index]+"_PZ",

                    meson_ID[meson_index]+'_PT',
        
                    #pi PID variables
        
                    "pi_MC15TuneV1_ProbNNpi",
        
                    #________________________________________
                    #LEPTONS
                    l_flv[l_index]+'_plus_MC_MOTHER_ID',
                    l_flv[l_index]+'_plus_TRUEID',

                    l_flv[l_index]+'_minus_MC_MOTHER_ID',
                    l_flv[l_index]+'_minus_TRUEID',
                    #________________________________________
                    #leptons Geometric variables and pT
                    
                    l_flv[l_index]+"_plus_OWNPV_CHI2",
                    #l_flv[l_index]+"_plus_OWNPV_NDOF",
                    l_flv[l_index]+"_minus_OWNPV_CHI2",
                    #l_flv[l_index]+"_minus_OWNPV_NDOF",
                    #
                    #l_flv[l_index]+"_plus_IP_OWNPV",
                    #l_flv[l_index]+"_minus_IP_OWNPV",

                    l_flv[l_index]+"_plus_PX",
                    l_flv[l_index]+"_plus_PY",
                    l_flv[l_index]+"_plus_PZ",

                    l_flv[l_index]+"_minus_PX",
                    l_flv[l_index]+"_minus_PY",
                    l_flv[l_index]+"_minus_PZ",

                    l_flv[l_index]+"_plus_PT",
                    l_flv[l_index]+"_minus_PT",
        
                    #leptons PID variables
        
                    l_flv[l_index]+"_plus_MC15TuneV1_ProbNN"+l_flv[l_index],
                    l_flv[l_index]+"_minus_MC15TuneV1_ProbNN"+l_flv[l_index],
                    
                    
                  ]
          if l_index==0:
               branches_needed = [
                    #________________________________________
                    #D MC true info
                    

                    mother_ID[mother_index]+'_MC_MOTHER_ID',
                    mother_ID[mother_index]+'_BKGCAT',
                    mother_ID[mother_index]+'_TRUEID',
                    #________________________________________
                    #D Geometric variables, pT and FD
        
                    mother_ID[mother_index]+"_ENDVERTEX_CHI2",
                    mother_ID[mother_index]+"_ENDVERTEX_NDOF",
                    mother_ID[mother_index]+"_IPCHI2_OWNPV",

                    mother_ID[mother_index]+"_OWNPV_CHI2",
                    mother_ID[mother_index]+"_OWNPV_NDOF",
                    mother_ID[mother_index]+"_IP_OWNPV",
                    mother_ID[mother_index]+"_DIRA_OWNPV",
        
                    mother_ID[mother_index]+"_PX",
                    mother_ID[mother_index]+"_PY",
                    mother_ID[mother_index]+"_PZ",
                    mother_ID[mother_index]+"_PT",
                    mother_ID[mother_index]+"_FD_OWNPV",
                    mother_ID[mother_index]+"_FDCHI2_OWNPV",
                    
                    #D Reconstructed mass
                    mother_ID[mother_index]+"_ConsD_M",
        
                    #D Trigger variables
                    mother_ID[mother_index]+"_Hlt1TrackMVADecision_TOS",
                    mother_ID[mother_index]+"_Hlt2RareCharmD2Pi"+l_flv[l_index].capitalize()+l_flv[l_index].capitalize()+"OSDecision_TOS",
                    mother_ID[mother_index]+"_Hlt2Phys_TOS",
                    
                    #________________________________________
                    #PHI MC TRUE INFO
        
                    'phi_MC_MOTHER_ID',
                    'phi_BKGCAT',
                    'phi_TRUEID',
                    #________________________________________
                    #phi geometric variables, pT and FD
        
                    "phi_ENDVERTEX_CHI2",
                    "phi_ENDVERTEX_NDOF",
                    "phi_IPCHI2_OWNPV",
        
                    #"phi_OWNPV_CHI2",
                    #"phi_OWNPV_NDOF",
                    #"phi_IP_OWNPV",
                    #"phi_DIRA_OWNPV",
                    
                    "phi_PT",
        
                    #phi Reconstructed mass
        
                    "phi_M",
        
                    #________________________________________
                    #PION
                    #Pion mother ID and bkg cat
                    
                    meson_ID[meson_index]+'_MC_MOTHER_ID',
                    meson_ID[meson_index]+'_TRUEID',
        
                    #_____________________________________
                    #pi Geometric variables and pT
                    #"pi_OWNPV_CHI2",
                    #"pi_OWNPV_NDOF",
                    #'pi_IP_OWNPV',
        
                    
                    meson_ID[meson_index]+"_PX",
                    meson_ID[meson_index]+"_PY",
                    meson_ID[meson_index]+"_PZ",

                    meson_ID[meson_index]+'_PT',
        
                    #pi PID variables
        
                    "pi_MC15TuneV1_ProbNNpi",
        
                    #________________________________________
                    #LEPTONS
                    l_flv[l_index]+'_plus_MC_MOTHER_ID',
                    l_flv[l_index]+'_plus_TRUEID',

                    l_flv[l_index]+'_minus_MC_MOTHER_ID',
                    l_flv[l_index]+'_minus_TRUEID',
                    #________________________________________
                    #leptons Geometric variables and pT
                    
                    l_flv[l_index]+"_plus_OWNPV_CHI2",
                    #l_flv[l_index]+"_plus_OWNPV_NDOF",
                    l_flv[l_index]+"_minus_OWNPV_CHI2",
                    #l_flv[l_index]+"_minus_OWNPV_NDOF",
                    #
                    #l_flv[l_index]+"_plus_IP_OWNPV",
                    #l_flv[l_index]+"_minus_IP_OWNPV",
                    
                    l_flv[l_index]+"_plus_PX",
                    l_flv[l_index]+"_plus_PY",
                    l_flv[l_index]+"_plus_PZ",

                    l_flv[l_index]+"_plus_PT",

                    l_flv[l_index]+"_minus_PX",
                    l_flv[l_index]+"_minus_PY",
                    l_flv[l_index]+"_minus_PZ",
                    
                    l_flv[l_index]+"_minus_PT",
        
                    #leptons PID variables
        
                    l_flv[l_index]+"_plus_MC15TuneV1_ProbNN"+l_flv[l_index],
                    l_flv[l_index]+"_minus_MC15TuneV1_ProbNN"+l_flv[l_index],
                    
                    
                  ]                
    if data_index==1:
        branches_needed = [

					#________________________________
                    #D Geometric variables, pT and FD
        
                    mother_ID[mother_index]+"_ENDVERTEX_CHI2",
                    mother_ID[mother_index]+"_ENDVERTEX_NDOF",
                    mother_ID[mother_index]+"_IPCHI2_OWNPV",

                    mother_ID[mother_index]+"_OWNPV_CHI2",
                    mother_ID[mother_index]+"_OWNPV_NDOF",
                    mother_ID[mother_index]+"_IP_OWNPV",
                    mother_ID[mother_index]+"_DIRA_OWNPV",
                    
                    mother_ID[mother_index]+"_PX",
                    mother_ID[mother_index]+"_PY",
                    mother_ID[mother_index]+"_PZ",
                    mother_ID[mother_index]+"_PT",
                    mother_ID[mother_index]+"_FD_OWNPV",
                    mother_ID[mother_index]+"_FDCHI2_OWNPV",
                    
                    #D Reconstructed mass
                    mother_ID[mother_index]+"_ConsD_M",
        
                    #D Trigger variables
                    mother_ID[mother_index]+"_Hlt1TrackMVADecision_TOS",
                    mother_ID[mother_index]+"_Hlt2RareCharmD2Pi"+l_flv[l_index].capitalize()+l_flv[l_index].capitalize()+"OSDecision_TOS",
                    mother_ID[mother_index]+"_Hlt2Phys_TOS",
                    

                    #________________________________________
                    #phi geometric variables, pT and FD
        
                    "phi_ENDVERTEX_CHI2",
                    "phi_ENDVERTEX_NDOF",
                    "phi_IPCHI2_OWNPV",
        
                    #"phi_OWNPV_CHI2",
                    #"phi_OWNPV_NDOF",
                    #"phi_IP_OWNPV",
                    #"phi_DIRA_OWNPV",
                    
                    "phi_PT",
        
                    #phi Reconstructed mass
        
                    "phi_M",
        
                    #________________________________________
                    #PION
        
                    #_____________________________________
                    #pi Geometric variables and pT
                    #"pi_OWNPV_CHI2",
                    #"pi_OWNPV_NDOF",
                    #'pi_IP_OWNPV',
        
                    
                    meson_ID[meson_index]+"_PX",
                    meson_ID[meson_index]+"_PY",
                    meson_ID[meson_index]+"_PZ",

                    meson_ID[meson_index]+'_PT',
        
                    #pi PID variables
        
                    "pi_MC15TuneV1_ProbNNpi",
        
                    #________________________________________
                    #LEPTONS
                    #________________________________________
                    #leptons Geometric variables and pT
                    
                    l_flv[l_index]+"_plus_OWNPV_CHI2",
                    #l_flv[l_index]+"_plus_OWNPV_NDOF",
                    l_flv[l_index]+"_minus_OWNPV_CHI2",
                    #l_flv[l_index]+"_minus_OWNPV_NDOF",
                    #
                    #l_flv[l_index]+"_plus_IP_OWNPV",
                    #l_flv[l_index]+"_minus_IP_OWNPV",
                    
                    l_flv[l_index]+"_plus_PX",
                    l_flv[l_index]+"_plus_PY",
                    l_flv[l_index]+"_plus_PZ",

                    l_flv[l_index]+"_plus_PT",

                    l_flv[l_index]+"_minus_PX",
                    l_flv[l_index]+"_minus_PY",
                    l_flv[l_index]+"_minus_PZ",

                    l_flv[l_index]+"_minus_PT",
        
                    #leptons PID variables
        
                    l_flv[l_index]+"_plus_MC15TuneV1_ProbNN"+l_flv[l_index],
                    l_flv[l_index]+"_minus_MC15TuneV1_ProbNN"+l_flv[l_index],
                    
                    
                  ] 
    return branches_needed

def return_branches_BDT(mother_index=None, l_index=None, meson_index=None):
     if l_index==1:
          branches_needed = [
                    #________________________________
                    #D Geometric variables, pT and FD
        
                    mother_ID[mother_index]+"_ENDVERTEX_CHI2",
                    mother_ID[mother_index]+"_IPCHI2_OWNPV",

                    #mother_ID[mother_index]+"_OWNPV_CHI2",
                    #mother_ID[mother_index]+"_IP_OWNPV",
                    mother_ID[mother_index]+"_DIRA_OWNPV",
        
                    mother_ID[mother_index]+"_PT",
                    #mother_ID[mother_index]+"_FD_OWNPV",
                    mother_ID[mother_index]+"_FDCHI2_OWNPV",
                    

                    

                    #________________________________________
                    #phi geometric variables, pT and FD
        
                    #"phi_ENDVERTEX_CHI2",
                    "phi_IPCHI2_OWNPV",
        
                    #"phi_OWNPV_CHI2",
                    #"phi_OWNPV_NDOF",
                    #"phi_IP_OWNPV",
                    #"phi_DIRA_OWNPV",
                    
                    #"phi_PT",
        
                    #________________________________________
                    #PION
        
                    #_____________________________________
                    #pi Geometric variables and pT
                    #"pi_OWNPV_CHI2",
                    #"pi_OWNPV_NDOF",
                    #'pi_IP_OWNPV',
        
                    
                    meson_ID[meson_index]+'_PT',
        
        
                    #________________________________________
                    #LEPTONS
                    #________________________________________
                    #leptons Geometric variables and pT
                    
                    #l_flv[l_index]+"_plus_OWNPV_CHI2",
                    #l_flv[l_index]+"_plus_OWNPV_NDOF",
                    #l_flv[l_index]+"_minus_OWNPV_CHI2",
                    #l_flv[l_index]+"_minus_OWNPV_NDOF",
                    #
                    #l_flv[l_index]+"_plus_IP_OWNPV",
                    #l_flv[l_index]+"_minus_IP_OWNPV",
        
                    l_flv[l_index]+"_plus_PT",
                    l_flv[l_index]+"_minus_PT",
        
                    
                    #D Reconstructed mass
                    mother_ID[mother_index]+"_ConsD_M",
                ]
     if l_index==0:
          branches_needed = [
                    #________________________________
                    #D Geometric variables, pT and FD
        
                    mother_ID[mother_index]+"_ENDVERTEX_CHI2",
                    mother_ID[mother_index]+"_IPCHI2_OWNPV",

                    mother_ID[mother_index]+"_OWNPV_CHI2",
                    #mother_ID[mother_index]+"_IP_OWNPV",
                    mother_ID[mother_index]+"_DIRA_OWNPV",
        
                    mother_ID[mother_index]+"_PT",
                    mother_ID[mother_index]+"_FD_OWNPV",
                    mother_ID[mother_index]+"_FDCHI2_OWNPV",
                    

                    #________________________________________
                    #phi geometric variables, pT and FD
        
                    #"phi_ENDVERTEX_CHI2",
                    #"phi_IPCHI2_OWNPV",
        
                    #"phi_OWNPV_CHI2",
                    #"phi_OWNPV_NDOF",
                    #"phi_IP_OWNPV",
                    #"phi_DIRA_OWNPV",
                    
                    #"phi_PT",
        
                    #________________________________________
                    #PION
        
                    #_____________________________________
                    #pi Geometric variables and pT
                    #"pi_OWNPV_CHI2",
                    #"pi_OWNPV_NDOF",
                    #'pi_IP_OWNPV',
        
                    
                    meson_ID[meson_index]+'_PT',
        
        
                    #________________________________________
                    #LEPTONS
                    #________________________________________
                    #leptons Geometric variables and pT
                    
                    #l_flv[l_index]+"_plus_OWNPV_CHI2",
                    #l_flv[l_index]+"_plus_OWNPV_NDOF",
                    #l_flv[l_index]+"_minus_OWNPV_CHI2",
                    #l_flv[l_index]+"_minus_OWNPV_NDOF",
                    #
                    #l_flv[l_index]+"_plus_IP_OWNPV",
                    #l_flv[l_index]+"_minus_IP_OWNPV",
        
                    l_flv[l_index]+"_plus_PT",
                    l_flv[l_index]+"_minus_PT",
        
                    
                    #D Reconstructed mass
                    mother_ID[mother_index]+"_ConsD_M",
                ]
     return branches_needed

def norm_chi2(MC_Dplus_sig_dict, MC_Ds_sig_dict, data_dict):
     MC_Ds_sig_dict["Ds_ENDVERTEX_CHI2"]=MC_Ds_sig_dict["Ds_ENDVERTEX_CHI2"]/MC_Ds_sig_dict["Ds_ENDVERTEX_NDOF"]
     MC_Ds_sig_dict["Ds_IPCHI2_OWNPV"]=MC_Ds_sig_dict["Ds_IPCHI2_OWNPV"]/MC_Ds_sig_dict["Ds_ENDVERTEX_NDOF"]

     MC_Ds_sig_dict["Ds_FDCHI2_OWNPV"]=MC_Ds_sig_dict["Ds_FDCHI2_OWNPV"]/MC_Ds_sig_dict["Ds_OWNPV_NDOF"]
     MC_Ds_sig_dict["Ds_OWNPV_CHI2"]=MC_Ds_sig_dict["Ds_OWNPV_CHI2"]/MC_Ds_sig_dict["Ds_OWNPV_NDOF"]

     del MC_Ds_sig_dict["Ds_OWNPV_NDOF"]
     del MC_Ds_sig_dict["Ds_ENDVERTEX_NDOF"]

     MC_Dplus_sig_dict["Dplus_ENDVERTEX_CHI2"]=MC_Dplus_sig_dict["Dplus_ENDVERTEX_CHI2"]/MC_Dplus_sig_dict["Dplus_ENDVERTEX_NDOF"]
     MC_Dplus_sig_dict["Dplus_IPCHI2_OWNPV"]=MC_Dplus_sig_dict["Dplus_IPCHI2_OWNPV"]/MC_Dplus_sig_dict["Dplus_ENDVERTEX_NDOF"]

     MC_Dplus_sig_dict["Dplus_FDCHI2_OWNPV"]=MC_Dplus_sig_dict["Dplus_FDCHI2_OWNPV"]/MC_Dplus_sig_dict["Dplus_OWNPV_NDOF"]
     MC_Dplus_sig_dict["Dplus_OWNPV_CHI2"]=MC_Dplus_sig_dict["Dplus_OWNPV_CHI2"]/MC_Dplus_sig_dict["Dplus_OWNPV_NDOF"]

     del MC_Dplus_sig_dict["Dplus_OWNPV_NDOF"]
     del MC_Dplus_sig_dict["Dplus_ENDVERTEX_NDOF"]

     data_dict["Ds_ENDVERTEX_CHI2"]=data_dict["Ds_ENDVERTEX_CHI2"]/data_dict["Ds_ENDVERTEX_NDOF"]
     data_dict["Ds_IPCHI2_OWNPV"]=data_dict["Ds_IPCHI2_OWNPV"]/data_dict["Ds_ENDVERTEX_NDOF"]

     data_dict["Ds_FDCHI2_OWNPV"]=data_dict["Ds_FDCHI2_OWNPV"]/data_dict["Ds_OWNPV_NDOF"]
     data_dict["Ds_OWNPV_CHI2"]=data_dict["Ds_OWNPV_CHI2"]/data_dict["Ds_OWNPV_NDOF"]

     del data_dict["Ds_OWNPV_NDOF"]
     del data_dict["Ds_ENDVERTEX_NDOF"]

     return MC_Dplus_sig_dict, MC_Ds_sig_dict, data_dict

def mass_cut_for_fit(lower_cut, upper_cut, mother_index_fit=None, l_index=None,
                     branches_needed=None, data_dict=None, 
                     MC_Dplus_dict=None, MC_Ds_dict=None):
     
     #applying needed mass cuts on data
     #Applies needed mass cuts on data and 
     #returns arrays of mass distr for MC and data
     

     data_indices=[]

     for i in range(len(data_dict["Ds_ConsD_M"])):

         D_m = data_dict["Ds_ConsD_M"][i]
         #fixing a window on the phi mass
         if lower_cut<D_m<upper_cut:
             data_indices.append(i)

     for label in branches_needed:  
         
         data_dict[label] = data_dict[label][data_indices]
         

     m_plus=MC_Dplus_dict["Dplus_ConsD_M"].shape[0]
     m_s=MC_Ds_dict["Ds_ConsD_M"].shape[0]
     m=data_dict["Ds_ConsD_M"].shape[0]


     mc_Dplus_mass=np.array([MC_Dplus_dict["Dplus_ConsD_M"][i][0] for i in range(m_plus)])
     mc_Ds_mass=np.array([MC_Ds_dict["Ds_ConsD_M"][i][0] for i in range(m_s)])

     #mc_D_mass=np.concatenate((mc_Ds_mass,mc_Dplus_mass),axis=0)
     data_mass=np.array([data_dict["Ds_ConsD_M"][i][0] for i in range(m)])

     # if mother_index_fit==0:
     #     mc_mass=mc_Ds_mass
     # if mother_index_fit==1:
     #     mc_mass=mc_Dplus_mass
     # if mother_index_fit==2:
     #     mc_mass=mc_D_mass

     return data_mass, mc_Dplus_mass, mc_Ds_mass, data_dict

def preprocess_for_XGBoost(MC_Dplus_dict, MC_Ds_dict, data_dict, branches_needed, mother_index_fit=None):

     m_plus=MC_Dplus_dict["Dplus_ConsD_M"].shape[0]
     m_s=MC_Ds_dict["Ds_ConsD_M"].shape[0]
     m=data_dict["Ds_ConsD_M"].shape[0]
     print(m, m_plus, m_s)
     branches_needed.pop()
     dim=len(branches_needed)

     branches_needed_Dplus=[]
     for branch in branches_needed:
         if 'Ds' in branch:
             branches_needed_Dplus.append(branch.replace("Ds", "Dplus"))
         else:
             branches_needed_Dplus.append(branch)
             
     data = extract_array(data_dict, branches_needed, dim, m)
     mc_Ds_sig = extract_array(MC_Ds_dict, branches_needed, dim, m_s)
     mc_Dplus_sig = extract_array(MC_Dplus_dict, branches_needed_Dplus, dim, m_plus)

         
     mc_Ds_mean=mc_Ds_sig.mean(axis=0)
     mc_Ds_1=mc_Ds_sig-mc_Ds_mean
     mc_Ds_std=mc_Ds_1.std(axis=0)
     mc_Ds_2=mc_Ds_1/mc_Ds_std

     mc_Dplus_mean=mc_Dplus_sig.mean(axis=0)
     mc_Dplus_1=mc_Dplus_sig-mc_Dplus_mean
     mc_Dplus_std=mc_Dplus_1.std(axis=0)
     mc_Dplus_2=mc_Dplus_1/mc_Dplus_std

     data_mean=data.mean(axis=0)
     data_1=data-data_mean
     data_std=data_1.std(axis=0)
     data_2=data_1/data_std

     return data_2, mc_Dplus_2, mc_Ds_2