Skip to content

Commit 11477df

Browse files
committed
WIP3
1 parent 8f558fe commit 11477df

File tree

5 files changed

+857
-13
lines changed

5 files changed

+857
-13
lines changed

cms-2016-simulated-datasets/code/dataset_records.py

+36-10
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def get_dataset(dataset_full_name):
9191

9292
def get_dataset_version(dataset_full_name):
9393
"Return dataset version from dataset full name."
94-
return re.search(r'^.*Summer12_DR53X-(.*)/AODSIM$', dataset_full_name).groups()[0]
94+
return re.search(r'^.*RunIISummer20UL16.*?-(.*)/(MINI|NANO)AODSIM$', dataset_full_name).groups()[0]
9595

9696

9797
def get_dataset_index_files(dataset_full_name, eos_dir):
@@ -288,13 +288,42 @@ def populate_mininanorelation_cache(dataset_full_names, mcm_dir):
288288
print("A corresponding NANOAODSIM was not found for dataset: " + dataset_full_name)
289289

290290

291+
def get_dataset_semantics_doc(dataset_name, sample_file_path, recid):
292+
"""Produce the dataset semantics files and return their data-curation paths for the given dataset."""
293+
if dataset_name.endswith('/NANOAODSIM'):
294+
output_dir = f"outputs/docs/NanoAODSIM/{recid}"
295+
eos_dir = f"/eos/opendata/cms/dataset-semantics/NanoAODSIM/{recid}"
296+
elif dataset_name.endswith('/MINIAODSIM'):
297+
output_dir = f"outputs/docs/MiniAODSIM/{recid}"
298+
eos_dir = f"/eos/opendata/cms/dataset-semantics/MiniAODSIM/{recid}"
299+
isExist = os.path.exists(output_dir)
300+
if not isExist:
301+
os.makedirs(output_dir)
302+
303+
script = "inspectNanoFile.py"
304+
305+
html_doc_path = f"{output_dir}/{dataset_name}_doc.html"
306+
cmd = f"python3 external-scripts/{script} --doc {html_doc_path} {sample_file_path}"
307+
output = subprocess.getoutput(cmd)
308+
html_eos_path = f"{eos_dir}/{dataset_name}_doc.html"
309+
310+
json_doc_path = f"{output_dir}/{dataset_name}_doc.json"
311+
cmd = f"python3 external-scripts/{script} --json {json_doc_path} {sample_file_path}"
312+
output = subprocess.getoutput(cmd)
313+
json_eos_path = f"{eos_dir}/{dataset_name}_doc.json"
314+
315+
return {"url": html_eos_path, "json": json_eos_path}
316+
317+
291318
def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_dir):
292319
"""Create record for the given dataset."""
293320

294321
rec = {}
295322

296323
dataset = get_dataset(dataset_full_name)
297324
dataset_format = get_dataset_format(dataset_full_name)
325+
dataset_version = get_dataset_version(dataset_full_name)
326+
298327
year_created = '2016'
299328
year_published = '2023' #
300329
run_period = ['Run2016G', 'Run2016H'] #
@@ -318,15 +347,12 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
318347
rec['collision_information']['energy'] = collision_energy
319348
rec['collision_information']['type'] = collision_type
320349

321-
# FIXME cross section will be read in separately
322-
generator_parameters = get_generator_parameters_from_mcm(dataset_full_name, mcm_dir)
323-
# if generator_parameters:
324-
# rec['cross_section'] = {}
325-
# rec['cross_section']['value'] = generator_parameters.get('cross_section', None)
326-
# rec['cross_section']['filter_efficiency:'] = generator_parameters.get('filter_efficiency', None)
327-
# rec['cross_section']['filter_efficiency_error:'] = generator_parameters.get('filter_efficiency_error', None)
328-
# rec['cross_section']['match_efficiency:'] = generator_parameters.get('match_efficiency', None)
329-
# rec['cross_section']['match_efficiency error:'] = generator_parameters.get('match_efficiency_error', None)
350+
if dataset_format == "NANOAODSIM":
351+
dataset_path = f"/eos/opendata/cms//mc/{run_period}/{dataset}/NANOAODSIM/{dataset_version}"
352+
intermediate_dir = os.listdir(dataset_path)
353+
sample_file_path = f"{dataset_path}/{intermediate_dir[0]}"
354+
sample_file_with_path = f"{sample_file_path}/{os.listdir(sample_file_path)[0]}"
355+
rec["dataset_semantics_files"] = get_dataset_semantics_doc(dataset, sample_file_with_path, recid)
330356

331357
rec['date_created'] = [year_created]
332358
rec['date_published'] = year_published

cms-2016-simulated-datasets/code/eos_store.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def get_dataset_volume_files(dataset, volume):
9090
"Return file list with information about name, size, location for the given dataset and volume."
9191
files = []
9292
dataset_location = get_dataset_location(dataset)
93-
output = subprocess.check_output('eos find --size --checksum ' + dataset_location + '/' + volume, shell=True)
93+
output = subprocess.check_output('eos oldfind --size --checksum ' + dataset_location + '/' + volume, shell=True)
9494
output = str(output.decode("utf-8"))
9595
for line in output.split('\n'):
9696
if line and line != 'file-indexes':
@@ -141,7 +141,7 @@ def create_index_files(dataset, volume, eos_dir):
141141
copy_index_file(dataset, volume, filename, eos_dir)
142142

143143

144-
def main(datasets = [], eos_dir = './inputs/eos-file-indexes'):
144+
def main(datasets = [], eos_dir = './inputs/eos-file-indexes/'):
145145
"Do the job."
146146

147147
if not os.path.exists(eos_dir):

cms-2016-simulated-datasets/code/interface.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
@click.option('--create-eos-indexes/--no-create-eos-indexes', default=False,
1616
show_default=True,
1717
help="Create EOS rich index files")
18-
@click.option('--eos-dir', default='./inputs/eos-file-indexes',
18+
@click.option('--eos-dir', default='./inputs/eos-file-indexes/',
1919
show_default=True,
2020
help='Output directory for the EOS file indexes')
2121
@click.option('--ignore-eos-store/--no-ignore-eos-store',

0 commit comments

Comments
 (0)