Skip to content

Commit 3a2095a

Browse files
committed
cms-2016-collision-datasets: fix record IDs for new RECO files
Fixes generation of record IDs for RECO configuration files that were jumping into the space reserved for the collision data already. And we have more RECO files due to adding full provenance chain, so they have to jump the given record ID interval. Adds NanoAOD data semantics documents from latest run.
1 parent 784813d commit 3a2095a

21 files changed

+1762
-71
lines changed

cms-2016-collision-datasets/code/create_cms_2016_collision_datasets.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def get_run_numbers(dataset_full_name):
239239

240240

241241
def get_dataset_config_file_name(dataset_full_name):
242-
dataset = dataset_full_name.split("/")[1]
242+
dataset = dataset_full_name.split("/")[1]
243243
run_period = dataset_full_name.split("/")[2].split("-", 1)[0]
244244
version = dataset_full_name.split("/")[2].split("-")[1]
245245
config_file = f"ReReco-{run_period}-{dataset}-{version}"
@@ -276,7 +276,6 @@ def create_selection_information(dataset, dataset_full_name):
276276
out += "<p><strong>Data taking / HLT</strong>"
277277
out += '<br/>The collision data were assigned to different RAW datasets using the following <a href="/record/30300">HLT configuration</a>.</p>'
278278
# data processing / NANO/PAT/RECO:
279-
run_period = re.search(r"(Run[0-9]+.)", dataset_full_name).groups()[0]
280279
aodformat = dataset_full_name.split("/")[3]
281280
step_dataset = dataset_full_name
282281
steps = []
@@ -291,8 +290,8 @@ def create_selection_information(dataset, dataset_full_name):
291290
{"process": "PAT"},
292291
{"process": "RECO"}
293292
]
294-
295-
out += f"<p><strong>Data processing </strong>"
293+
294+
out += "<p><strong>Data processing </strong>"
296295
out += (
297296
"<br/>This %s dataset was processed from the RAW dataset by the following steps: "
298297
% (aodformat)
@@ -305,7 +304,7 @@ def create_selection_information(dataset, dataset_full_name):
305304
generator_text = "Configuration file for " + steps[i]['process'] + " step " + afile
306305
release = get_release_for_processing(step_dataset)
307306
global_tag = get_global_tag_for_processing(step_dataset)
308-
307+
309308
out += "<br/><strong>Step %s </strong>" % steps[i]['process']
310309
out += "<br/>Release: %s" % release
311310
out += "<br/>Global tag: %s" % global_tag
@@ -359,14 +358,15 @@ def get_dataset_index_files(dataset_full_name):
359358
files.append((afile_uri, afile_size, afile_checksum))
360359
return files
361360

361+
362362
def get_dataset_semantics_doc(dataset_name, sample_file_path, recid):
363363
"""Produce the dataset semantics files and return their data-curation paths for the given dataset."""
364364
output_dir = f"outputs/docs/NanoAOD/{recid}"
365-
eos_dir=f"/eos/opendata/cms/dataset-semantics/NanoAOD/{recid}"
365+
eos_dir = f"/eos/opendata/cms/dataset-semantics/NanoAOD/{recid}"
366366
isExist = os.path.exists(output_dir)
367367
if not isExist:
368368
os.makedirs(output_dir)
369-
369+
370370
script = "inspectNanoFile.py"
371371

372372
html_doc_path = f"{output_dir}/{dataset_name}_doc.html"
@@ -381,6 +381,7 @@ def get_dataset_semantics_doc(dataset_name, sample_file_path, recid):
381381

382382
return {"url": html_eos_path, "json": json_eos_path}
383383

384+
384385
def get_doi(dataset_full_name):
385386
"Return DOI for the given dataset."
386387
return DOI_INFO.get(dataset_full_name, "")

cms-2016-collision-datasets/code/create_reco_config_file_records.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323

2424

2525
RECID_START = 30400
26+
RECID_MAX = 30500 # when this record ID number is reached, continue from the "next" number
27+
RECID_NEXT = 30566 # next free record ID number
2628
YEAR_CREATED = "2016"
2729
YEAR_PUBLISHED = "2024"
2830
COLLISION_ENERGY = "13Tev"
@@ -128,10 +130,10 @@ def main():
128130

129131
if not afile_python_filename.startswith("ReReco") and not afile_python_filename.startswith("recoskim"):
130132
continue
131-
133+
132134
if afile_python_filename in files_seen:
133135
continue
134-
136+
135137
files_seen.append(afile_python_filename)
136138

137139
# Create nice reco_*.py files for copying them over to EOSPUBLIC
@@ -209,6 +211,10 @@ def main():
209211
)
210212
recid += 1
211213

214+
# jump over some record ID range which were already preselected for collision data
215+
if recid == RECID_MAX:
216+
recid = RECID_NEXT
217+
212218
fdesc.write("}\n")
213219
fdesc.close()
214220

cms-2016-collision-datasets/outputs/docs/NanoAOD/30518/BTagMu_doc.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12870,8 +12870,8 @@
1287012870
"Proton_singleRP_thetaY",
1287112871
"Proton_singleRP_xi",
1287212872
"Proton_singleRP_decRPId",
12873-
"nProton_multiRP",
12874-
"nProton_singleRP"
12873+
"nProton_singleRP",
12874+
"nProton_multiRP"
1287512875
]
1287612876
},
1287712877
"Muon": {

cms-2016-collision-datasets/outputs/docs/NanoAOD/30524/HTMHT_doc.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12663,8 +12663,8 @@
1266312663
"Proton_singleRP_thetaY",
1266412664
"Proton_singleRP_xi",
1266512665
"Proton_singleRP_decRPId",
12666-
"nProton_singleRP",
12667-
"nProton_multiRP"
12666+
"nProton_multiRP",
12667+
"nProton_singleRP"
1266812668
]
1266912669
},
1267012670
"Muon": {

0 commit comments

Comments
 (0)