@@ -91,7 +91,7 @@ def get_dataset(dataset_full_name):
91
91
92
92
def get_dataset_version (dataset_full_name ):
93
93
"Return dataset version from dataset full name."
94
- return re .search (r'^.*Summer12_DR53X -(.*)/AODSIM$' , dataset_full_name ).groups ()[0 ]
94
+ return re .search (r'^.*RunIISummer20UL16.*? -(.*)/(MINI|NANO) AODSIM$' , dataset_full_name ).groups ()[0 ]
95
95
96
96
97
97
def get_dataset_index_files (dataset_full_name , eos_dir ):
@@ -288,13 +288,42 @@ def populate_mininanorelation_cache(dataset_full_names, mcm_dir):
288
288
print ("A corresponding NANOAODSIM was not found for dataset: " + dataset_full_name )
289
289
290
290
291
+ def get_dataset_semantics_doc (dataset_name , sample_file_path , recid ):
292
+ """Produce the dataset semantics files and return their data-curation paths for the given dataset."""
293
+ if dataset_name .endswith ('/NANOAODSIM' ):
294
+ output_dir = f"outputs/docs/NanoAODSIM/{ recid } "
295
+ eos_dir = f"/eos/opendata/cms/dataset-semantics/NanoAODSIM/{ recid } "
296
+ elif dataset_name .endswith ('/MINIAODSIM' ):
297
+ output_dir = f"outputs/docs/MiniAODSIM/{ recid } "
298
+ eos_dir = f"/eos/opendata/cms/dataset-semantics/MiniAODSIM/{ recid } "
299
+ isExist = os .path .exists (output_dir )
300
+ if not isExist :
301
+ os .makedirs (output_dir )
302
+
303
+ script = "inspectNanoFile.py"
304
+
305
+ html_doc_path = f"{ output_dir } /{ dataset_name } _doc.html"
306
+ cmd = f"python3 external-scripts/{ script } --doc { html_doc_path } { sample_file_path } "
307
+ output = subprocess .getoutput (cmd )
308
+ html_eos_path = f"{ eos_dir } /{ dataset_name } _doc.html"
309
+
310
+ json_doc_path = f"{ output_dir } /{ dataset_name } _doc.json"
311
+ cmd = f"python3 external-scripts/{ script } --json { json_doc_path } { sample_file_path } "
312
+ output = subprocess .getoutput (cmd )
313
+ json_eos_path = f"{ eos_dir } /{ dataset_name } _doc.json"
314
+
315
+ return {"url" : html_eos_path , "json" : json_eos_path }
316
+
317
+
291
318
def create_record (dataset_full_name , doi_info , recid_info , eos_dir , das_dir , mcm_dir , conffiles_dir ):
292
319
"""Create record for the given dataset."""
293
320
294
321
rec = {}
295
322
296
323
dataset = get_dataset (dataset_full_name )
297
324
dataset_format = get_dataset_format (dataset_full_name )
325
+ dataset_version = get_dataset_version (dataset_full_name )
326
+
298
327
year_created = '2016'
299
328
year_published = '2023' #
300
329
run_period = ['Run2016G' , 'Run2016H' ] #
@@ -318,15 +347,12 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
318
347
rec ['collision_information' ]['energy' ] = collision_energy
319
348
rec ['collision_information' ]['type' ] = collision_type
320
349
321
- # FIXME cross section will be read in separately
322
- generator_parameters = get_generator_parameters_from_mcm (dataset_full_name , mcm_dir )
323
- # if generator_parameters:
324
- # rec['cross_section'] = {}
325
- # rec['cross_section']['value'] = generator_parameters.get('cross_section', None)
326
- # rec['cross_section']['filter_efficiency:'] = generator_parameters.get('filter_efficiency', None)
327
- # rec['cross_section']['filter_efficiency_error:'] = generator_parameters.get('filter_efficiency_error', None)
328
- # rec['cross_section']['match_efficiency:'] = generator_parameters.get('match_efficiency', None)
329
- # rec['cross_section']['match_efficiency error:'] = generator_parameters.get('match_efficiency_error', None)
350
+ if dataset_format == "NANOAODSIM" :
351
+ dataset_path = f"/eos/opendata/cms//mc/{ run_period } /{ dataset } /NANOAODSIM/{ dataset_version } "
352
+ intermediate_dir = os .listdir (dataset_path )
353
+ sample_file_path = f"{ dataset_path } /{ intermediate_dir [0 ]} "
354
+ sample_file_with_path = f"{ sample_file_path } /{ os .listdir (sample_file_path )[0 ]} "
355
+ rec ["dataset_semantics_files" ] = get_dataset_semantics_doc (dataset , sample_file_with_path , recid )
330
356
331
357
rec ['date_created' ] = [year_created ]
332
358
rec ['date_published' ] = year_published
0 commit comments