Skip to content

Commit 414dd74

Browse files
committed
Extend jet analyzer to read feeddown info from processor
1 parent 0342386 commit 414dd74

3 files changed

Lines changed: 74 additions & 51 deletions

File tree

machine_learning_hep/analysis/analyzer_jets.py

Lines changed: 70 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,12 @@ def __init__(self, datap, case, typean, period):
6666
self.d_resultsallpdata = (
6767
self.cfg(f"data.results.{period}") if period is not None else self.cfg("data.resultsallp")
6868
)
69+
self.d_resultsallpfd = self.cfg(f"fd.results.{period}") if period is not None else self.cfg("fd.resultsallp")
6970

7071
# input directories (processor output)
7172
self.d_resultsallpmc_proc = self.d_resultsallpmc
7273
self.d_resultsallpdata_proc = self.d_resultsallpdata
74+
self.d_resultsallpfd_proc = self.d_resultsallpfd
7375
# use a different processor output
7476
if "data_proc" in datap["analysis"][typean]:
7577
self.d_resultsallpdata_proc = (
@@ -79,11 +81,16 @@ def __init__(self, datap, case, typean, period):
7981
self.d_resultsallpmc_proc = (
8082
self.cfg(f"mc_proc.results.{period}") if period is not None else self.cfg("mc_proc.resultsallp")
8183
)
84+
if "fd_proc" in datap["analysis"][typean]:
85+
self.d_resultsallpfd_proc = (
86+
self.cfg(f"fd_proc.results.{period}") if period is not None else self.cfg("fd_proc.resultsallp")
87+
)
8288

8389
# input files
8490
n_filemass_name = datap["files_names"]["histofilename"]
8591
self.n_filemass = os.path.join(self.d_resultsallpdata_proc, n_filemass_name)
8692
self.n_filemass_mc = os.path.join(self.d_resultsallpmc_proc, n_filemass_name)
93+
self.n_filemass_fd = os.path.join(self.d_resultsallpfd_proc, n_filemass_name)
8794
self.n_fileeff = datap["files_names"]["efffilename"]
8895
self.n_fileeff = os.path.join(self.d_resultsallpmc_proc, self.n_fileeff)
8996
self.n_fileresp = datap["files_names"]["respfilename"]
@@ -1050,57 +1057,73 @@ def _extract_signal(self, hist, var, mcordata, ipt):
10501057
# hres.Sumw2() # TODO: check if we should do this here
10511058
return hres
10521059

1053-
# region feeddown
1054-
# pylint: disable=too-many-statements
1055-
def estimate_feeddown(self):
1056-
self.logger.info("Estimating feeddown")
1057-
1058-
with TFile(self.cfg("fd_root")) as rfile:
1059-
powheg_xsection = rfile.Get("fHistXsection")
1060-
powheg_xsection_scale_factor = powheg_xsection.GetBinContent(1) / powheg_xsection.GetEntries()
1061-
self.logger.info("POWHEG luminosity (mb^{-1}): %g", 1.0 / powheg_xsection_scale_factor)
10621060

1063-
df = pd.read_parquet(self.cfg("fd_parquet"))
1064-
col_mapping = {"dr": "delta_r_jet", "zpar": "z"} # TODO: check mapping
1061+
def estimate_feeddown(self):
1062+
"""Estimate feeddown from legacy Run 2 trees or gen-only simulation"""
1063+
match self.cfg("fd_input", "tree"):
1064+
case "tree":
1065+
with TFile(self.cfg("fd_root")) as rfile:
1066+
powheg_xsection = rfile.Get("fHistXsection")
1067+
powheg_xsection_scale_factor = powheg_xsection.GetBinContent(1) / powheg_xsection.GetEntries()
1068+
self.logger.info("POWHEG luminosity (mb^{-1}): %g", 1.0 / powheg_xsection_scale_factor)
1069+
1070+
df = pd.read_parquet(self.cfg("fd_parquet"))
1071+
col_mapping = {"dr": "delta_r_jet", "zpar": "z"} # TODO: check mapping
1072+
1073+
# TODO: generalize to higher dimensions
1074+
h3_fd_gen_orig = {}
1075+
for var in self.observables["all"]:
1076+
bins_ptjet = np.asarray(self.cfg("bins_ptjet"), "d")
1077+
# TODO: generalize or derive from histogram?
1078+
bins_obs = {}
1079+
if binning := self.cfg(f"observables.{var}.bins_gen_var"):
1080+
bins_tmp = np.asarray(binning, "d")
1081+
elif binning := self.cfg(f"observables.{var}.bins_gen_fix"):
1082+
bins_tmp = bin_array(*binning)
1083+
elif binning := self.cfg(f"observables.{var}.bins_var"):
1084+
bins_tmp = np.asarray(binning, "d")
1085+
elif binning := self.cfg(f"observables.{var}.bins_fix"):
1086+
bins_tmp = bin_array(*binning)
1087+
else:
1088+
self.logger.error("no binning specified for %s, using defaults", var)
1089+
bins_tmp = bin_array(10, 0.0, 1.0)
1090+
bins_obs[var] = bins_tmp
1091+
1092+
colname = col_mapping.get(var, f"{var}_jet")
1093+
if f"{colname}" not in df:
1094+
if var is not None:
1095+
self.logger.error("No feeddown information for %s (%s), cannot estimate feeddown", var, colname)
1096+
# print(df.info(), flush=True)
1097+
continue
1098+
1099+
# TODO: derive histogram
1100+
h3_fd_gen_orig[var] = create_hist(
1101+
"h3_feeddown_gen",
1102+
f";p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}",
1103+
bins_ptjet,
1104+
self.bins_candpt,
1105+
bins_obs[var],
1106+
)
1107+
fill_hist_fast(h3_fd_gen_orig, df[["pt_jet", "pt_cand", f"{colname}"]])
1108+
self._save_hist(project_hist(h3_fd_gen_orig, [0, 2], {}), f"fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png")
1109+
1110+
case "sim":
1111+
# TODO: recover cross section
1112+
h3_fd_gen_orig = {}
1113+
with TFile(self.n_filemass_fd) as rfile:
1114+
for var in self.observables["all"]:
1115+
self.logger.info("Running feeddown analysis for obs. %s", var)
1116+
label = f"-{var}" if var else ""
1117+
if fh := rfile.Get(f"h_mass-ptjet-pthf{label}"):
1118+
h3_fd_gen_orig[var] = project_hist(fh, list(range(1, get_dim(fh))), {})
1119+
ensure_sumw2(h3_fd_gen_orig[var])
1120+
1121+
case fd_input:
1122+
self.logger.critical("Invalid feeddown input %s", fd_input)
10651123

1066-
# TODO: generalize to higher dimensions
10671124
for var in self.observables["all"]:
1068-
bins_ptjet = np.asarray(self.cfg("bins_ptjet"), "d")
1069-
# TODO: generalize or derive from histogram?
1070-
bins_obs = {}
1071-
if binning := self.cfg(f"observables.{var}.bins_gen_var"):
1072-
bins_tmp = np.asarray(binning, "d")
1073-
elif binning := self.cfg(f"observables.{var}.bins_gen_fix"):
1074-
bins_tmp = bin_array(*binning)
1075-
elif binning := self.cfg(f"observables.{var}.bins_var"):
1076-
bins_tmp = np.asarray(binning, "d")
1077-
elif binning := self.cfg(f"observables.{var}.bins_fix"):
1078-
bins_tmp = bin_array(*binning)
1079-
else:
1080-
self.logger.error("no binning specified for %s, using defaults", var)
1081-
bins_tmp = bin_array(10, 0.0, 1.0)
1082-
bins_obs[var] = bins_tmp
1083-
1084-
colname = col_mapping.get(var, f"{var}_jet")
1085-
if f"{colname}" not in df:
1086-
if var is not None:
1087-
self.logger.error("No feeddown information for %s (%s), cannot estimate feeddown", var, colname)
1088-
# print(df.info(), flush=True)
1089-
continue
1090-
1091-
# TODO: derive histogram
1092-
h3_fd_gen_orig = create_hist(
1093-
"h3_feeddown_gen",
1094-
f";p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}",
1095-
bins_ptjet,
1096-
self.bins_candpt,
1097-
bins_obs[var],
1098-
)
1099-
fill_hist_fast(h3_fd_gen_orig, df[["pt_jet", "pt_cand", f"{colname}"]])
1100-
self._save_hist(project_hist(h3_fd_gen_orig, [0, 2], {}), f"fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png")
1101-
11021125
# new method
1103-
h3_fd_gen = h3_fd_gen_orig.Clone()
1126+
h3_fd_gen = h3_fd_gen_orig[var].Clone()
11041127
ensure_sumw2(h3_fd_gen)
11051128
self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f"fd/h_ptjet-{var}_fdnew_gen.png")
11061129
# apply np efficiency

machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ D0Jet_pp:
308308
multi:
309309
data:
310310
nprocessesparallel: 80
311-
maxfiles: [1] #list of periods
311+
maxfiles: [-1] #list of periods
312312
chunksizeunp: [100] #list of periods
313313
chunksizeskim: [100] #list of periods
314314
fracmerge: [.1] #list of periods
@@ -325,7 +325,7 @@ D0Jet_pp:
325325
mcreweights: [../Analyses] #list of periods
326326
mc:
327327
nprocessesparallel: 80
328-
maxfiles: [1] #list of periods
328+
maxfiles: [-1] #list of periods
329329
chunksizeunp: [100] #list of periods
330330
chunksizeskim: [1000] #list of periods
331331
fracmerge: [1.] #list of periods
@@ -342,7 +342,7 @@ D0Jet_pp:
342342
mcreweights: [../Analyses] #list of periods
343343
fd:
344344
nprocessesparallel: 80
345-
maxfiles: [1] #list of periods
345+
maxfiles: [-1] #list of periods
346346
chunksizeunp: [100] #list of periods
347347
chunksizeskim: [1000] #list of periods
348348
fracmerge: [1.] #list of periods

machine_learning_hep/submission/processor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ analysis:
6363
histomass: true # processer: process_histomass
6464
mc:
6565
histomass: true # processer: process_histomass
66-
efficiency: false # processer: process_efficiency
66+
efficiency: true # processer: process_efficiency
6767
fd:
6868
histomass: true
6969
steps:

0 commit comments

Comments
 (0)