@@ -91,7 +91,7 @@ def get_dataset(dataset_full_name):
9191
9292def get_dataset_version (dataset_full_name ):
9393 "Return dataset version from dataset full name."
94- return re .search (r'^.*Summer12_DR53X -(.*)/AODSIM$' , dataset_full_name ).groups ()[0 ]
94+ return re .search (r'^.*RunIISummer20UL16.*? -(.*)/(MINI|NANO) AODSIM$' , dataset_full_name ).groups ()[0 ]
9595
9696
9797def get_dataset_index_files (dataset_full_name , eos_dir ):
@@ -288,13 +288,42 @@ def populate_mininanorelation_cache(dataset_full_names, mcm_dir):
288288 print ("A corresponding NANOAODSIM was not found for dataset: " + dataset_full_name )
289289
290290
291+ def get_dataset_semantics_doc (dataset_name , sample_file_path , recid ):
292+ """Produce the dataset semantics files and return their data-curation paths for the given dataset."""
293+ if dataset_name .endswith ('/NANOAODSIM' ):
294+ output_dir = f"outputs/docs/NanoAODSIM/{ recid } "
295+ eos_dir = f"/eos/opendata/cms/dataset-semantics/NanoAODSIM/{ recid } "
296+ elif dataset_name .endswith ('/MINIAODSIM' ):
297+ output_dir = f"outputs/docs/MiniAODSIM/{ recid } "
298+ eos_dir = f"/eos/opendata/cms/dataset-semantics/MiniAODSIM/{ recid } "
299+ isExist = os .path .exists (output_dir )
300+ if not isExist :
301+ os .makedirs (output_dir )
302+
303+ script = "inspectNanoFile.py"
304+
305+ html_doc_path = f"{ output_dir } /{ dataset_name } _doc.html"
306+ cmd = f"python3 external-scripts/{ script } --doc { html_doc_path } { sample_file_path } "
307+ output = subprocess .getoutput (cmd )
308+ html_eos_path = f"{ eos_dir } /{ dataset_name } _doc.html"
309+
310+ json_doc_path = f"{ output_dir } /{ dataset_name } _doc.json"
311+ cmd = f"python3 external-scripts/{ script } --json { json_doc_path } { sample_file_path } "
312+ output = subprocess .getoutput (cmd )
313+ json_eos_path = f"{ eos_dir } /{ dataset_name } _doc.json"
314+
315+ return {"url" : html_eos_path , "json" : json_eos_path }
316+
317+
291318def create_record (dataset_full_name , doi_info , recid_info , eos_dir , das_dir , mcm_dir , conffiles_dir ):
292319 """Create record for the given dataset."""
293320
294321 rec = {}
295322
296323 dataset = get_dataset (dataset_full_name )
297324 dataset_format = get_dataset_format (dataset_full_name )
325+ dataset_version = get_dataset_version (dataset_full_name )
326+
298327 year_created = '2016'
299328 year_published = '2023' #
300329 run_period = ['Run2016G' , 'Run2016H' ] #
@@ -318,15 +347,12 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
318347 rec ['collision_information' ]['energy' ] = collision_energy
319348 rec ['collision_information' ]['type' ] = collision_type
320349
321- # FIXME cross section will be read in separately
322- generator_parameters = get_generator_parameters_from_mcm (dataset_full_name , mcm_dir )
323- # if generator_parameters:
324- # rec['cross_section'] = {}
325- # rec['cross_section']['value'] = generator_parameters.get('cross_section', None)
326- # rec['cross_section']['filter_efficiency:'] = generator_parameters.get('filter_efficiency', None)
327- # rec['cross_section']['filter_efficiency_error:'] = generator_parameters.get('filter_efficiency_error', None)
328- # rec['cross_section']['match_efficiency:'] = generator_parameters.get('match_efficiency', None)
329- # rec['cross_section']['match_efficiency error:'] = generator_parameters.get('match_efficiency_error', None)
350+ if dataset_format == "NANOAODSIM" :
351+ dataset_path = f"/eos/opendata/cms//mc/{ run_period } /{ dataset } /NANOAODSIM/{ dataset_version } "
352+ intermediate_dir = os .listdir (dataset_path )
353+ sample_file_path = f"{ dataset_path } /{ intermediate_dir [0 ]} "
354+ sample_file_with_path = f"{ sample_file_path } /{ os .listdir (sample_file_path )[0 ]} "
355+ rec ["dataset_semantics_files" ] = get_dataset_semantics_doc (dataset , sample_file_with_path , recid )
330356
331357 rec ['date_created' ] = [year_created ]
332358 rec ['date_published' ] = year_published
0 commit comments