@@ -166,6 +166,8 @@ def get_globaltag_from_conffile(afile, conf_dir):
166
166
globaltag = m .groups (1 )[0 ]
167
167
return globaltag
168
168
169
+ # TODO when we are doing MINI, then exclude the NANO step so that it does not appear
170
+ # TODO move generator cards to the root and exclude LOG.txt when assembling list
169
171
170
172
def get_all_generator_text (dataset , das_dir , mcm_dir , conf_dir , recid_info ):
171
173
"""Return DICT with all information about the generator steps."""
@@ -186,7 +188,7 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
186
188
step = {}
187
189
process = ''
188
190
output_dataset = get_output_dataset_from_mcm (dataset , mcm_step_dir )
189
- if output_dataset :
191
+ if output_dataset :
190
192
step ['output_dataset' ] = output_dataset [0 ]
191
193
release = get_cmssw_version_from_mcm (dataset , mcm_step_dir )
192
194
if release :
@@ -208,7 +210,7 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
208
210
generator_names = get_generator_name (dataset , mcm_step_dir )
209
211
if generator_names :
210
212
step ['generators' ] = generator_names
211
-
213
+
212
214
m = re .search ('-(.+?)-' , step_dir )
213
215
if m :
214
216
step_name = m .group (1 )
@@ -238,8 +240,8 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
238
240
239
241
step ['type' ] = process
240
242
241
- # Extend LHE steps
242
- if step_name .endswith ('LHEGEN' ):
243
+ # Extend LHE steps
244
+ if step_name .endswith ('LHEGEN' ):
243
245
step ['type' ] = "LHE GEN"
244
246
for i , configuration_files in enumerate (step ['configuration_files' ]):
245
247
if configuration_files ['title' ] == 'Generator parameters' :
@@ -260,7 +262,7 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
260
262
else :
261
263
if 'generators' in step :
262
264
generators_present = True
263
-
265
+
264
266
return info
265
267
266
268
def populate_containerimages_cache ():
@@ -276,13 +278,14 @@ def populate_mininanorelation_cache(dataset_full_names, mcm_dir):
276
278
if dataset_full_name .endswith ('MINIAODSIM' ):
277
279
nano_found = 0
278
280
dataset_first_name = get_from_deep_json (get_mcm_dict (dataset_full_name , mcm_dir ), 'dataset_name' )
279
- for x in os .listdir (mcm_dir + '/chain' ):
280
- if x .startswith ('@' + dataset_first_name ):
281
- dataset_name_for_nano = x .replace ('@' , '/' )
282
- nano_found = 1
283
- MININANORELATION_CACHE [dataset_full_name ] = dataset_name_for_nano
281
+ if dataset_first_name :
282
+ for x in os .listdir (mcm_dir + '/chain' ):
283
+ if x and x .startswith ('@' + dataset_first_name ):
284
+ dataset_name_for_nano = x .replace ('@' , '/' )
285
+ nano_found = 1
286
+ MININANORELATION_CACHE [dataset_full_name ] = dataset_name_for_nano
284
287
if nano_found == 0 :
285
- print ("A corresponding NANOAODSIM was not found for dataset: " + dataset_full_name )
288
+ print ("A corresponding NANOAODSIM was not found for dataset: " + dataset_full_name )
286
289
287
290
288
291
def create_record (dataset_full_name , doi_info , recid_info , eos_dir , das_dir , mcm_dir , conffiles_dir ):
@@ -293,7 +296,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
293
296
dataset = get_dataset (dataset_full_name )
294
297
dataset_format = get_dataset_format (dataset_full_name )
295
298
year_created = '2016'
296
- year_published = '2023' #
299
+ year_published = '2023' #
297
300
run_period = ['Run2016G' , 'Run2016H' ] #
298
301
299
302
additional_title = 'Simulated dataset ' + dataset + ' in ' + dataset_format + ' format for ' + year_created + ' collision data'
@@ -338,7 +341,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
338
341
if doi :
339
342
rec ['doi' ] = doi
340
343
341
- rec ['experiment' ] = 'CMS'
344
+ rec ['experiment' ] = [ 'CMS' , ]
342
345
343
346
rec_files = get_dataset_index_files (dataset_full_name , eos_dir )
344
347
if rec_files :
@@ -367,12 +370,13 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
367
370
368
371
pileup_dataset_name = ''
369
372
pileup_dataset_name = get_pileup_from_mcm (dataset_name_for_nano , mcm_dir )
370
-
373
+
371
374
pileup_dataset_recid = {
372
375
'/MinBias_TuneZ2_7TeV-pythia6/Summer11Leg-START53_LV4-v1/GEN-SIM' : 36 , # 2011
373
376
'/MinBias_TuneZ2star_8TeV-pythia6/Summer12-START50_V13-v3/GEN-SIM' : 37 , # 2012
374
377
'/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIISummer15GS-MCRUN2_71_V1-v2/GEN-SIM' : 22314 , # 2015
375
- #'/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIISummer15GS-magnetOffBS0T_MCRUN2_71_V1-v1/GEN-SIM': {recid}, # 2015 TODO
378
+ # TODO 2016 take from Kati's PR
379
+ #'/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIISummer15GS-magnetOffBS0T_MCRUN2_71_V1-v1/GEN-SIM': {recid}, # 2015
376
380
'/MinBias_TuneCP5_13TeV-pythia8/RunIIFall18GS-IdealGeometry_102X_upgrade2018_design_v9-v1/GEN-SIM' : 12302 # 2018
377
381
}.get (pileup_dataset_name , 0 )
378
382
@@ -381,7 +385,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
381
385
rec ['pileup' ] = {}
382
386
if pileup_dataset_recid :
383
387
rec ['pileup' ]['description' ] = "<p>To make these simulated data comparable with the collision data, <a href=\" /docs/cms-guide-pileup-simulation\" >pile-up events</a> are added to the simulated event in the DIGI2RAW step.</p>"
384
- rec ['pileup' ]['links' ] = [
388
+ rec ['pileup' ]['links' ] = [
385
389
{
386
390
"recid" : str (pileup_dataset_recid ),
387
391
"title" : pileup_dataset_name
@@ -398,7 +402,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
398
402
399
403
if dataset_full_name .endswith ('NANOAODSIM' ):
400
404
# Query from mcm dict fails for an example dataset because Mini is v1 in mcm and v2 in dataset list
401
- # Get it from das instead
405
+ # Get it from das instead
402
406
#dataset_name_for_mini = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'input_dataset')
403
407
dataset_name_for_mini = get_parent_dataset (dataset_full_name , das_dir )
404
408
relations_description = 'The corresponding MINIAODSIM dataset:'
@@ -408,7 +412,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
408
412
relations_description = 'The corresponding NANOAODSIM dataset:'
409
413
relations_recid = str (recid_info [dataset_name_for_nano ])
410
414
relations_type = 'isChildOf'
411
-
415
+
412
416
rec ['relations' ] = [
413
417
{
414
418
'description' : relations_description ,
@@ -452,17 +456,18 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
452
456
rec ['usage' ]['description' ] = "You can access these data through the CMS Open Data container or the CMS Virtual Machine. See the instructions for setting up one of the two alternative environments and getting started in"
453
457
rec ['usage' ]['links' ] = [
454
458
{
455
- "description" : "Running CMS analysis code using Docker" ,
459
+ "description" : "Running CMS analysis code using Docker" ,
456
460
"url" : "/docs/cms-guide-docker"
457
- },
461
+ },
458
462
{
459
- "description" : "How to install the CMS Virtual Machine" ,
463
+ "description" : "How to install the CMS Virtual Machine" ,
460
464
"url" : "/docs/cms-virtual-machine-2016"
461
- },
465
+ },
462
466
{
463
- "description" : "Getting started with CMS open data" ,
467
+ "description" : "Getting started with CMS open data" ,
464
468
"url" : "/docs/cms-getting-started-2016"
465
469
}
470
+ # TODO Amend links taking them from Kati's pile-up PR
466
471
]
467
472
468
473
rec ['validation' ] = {}
@@ -476,7 +481,7 @@ def create(dataset, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_d
476
481
if os .path .exists (filepath ) and os .stat (filepath ).st_size != 0 :
477
482
print ("==> " + dataset + "\n ==> Already exist. Skipping...\n " )
478
483
return
479
-
484
+
480
485
Record = create_record (dataset , doi_info , recid_info , eos_dir , das_dir , mcm_dir , conffiles_dir )
481
486
482
487
with open (filepath , 'w' ) as file :
@@ -500,8 +505,8 @@ def create_records(dataset_full_names, doi_file, recid_file, eos_dir, das_dir, m
500
505
t = threading .Thread (target = create , args = (dataset_full_name , doi_info , recid_info , eos_dir , das_dir , mcm_dir , conffiles_dir , records_dir ))
501
506
t .start ()
502
507
while threading .activeCount () >= 20 :
503
- sleep (0.5 ) # run 20 parallel
504
-
508
+ sleep (0.5 ) # run 20 parallel
509
+
505
510
#records.append(create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_dir))
506
511
#return records
507
512
@@ -541,10 +546,10 @@ def get_step_generator_parameters(dataset, mcm_dir, recid, force_lhe=0):
541
546
if mcdb_id > 1 :
542
547
print ("Got mcdb > 1: " + str (mcdb_id ))
543
548
configuration_files ['title' ] = 'Generator parameters'
544
- configuration_files ['url' ] = "/eos/opendata/cms/lhe_generators/2015-sim/mcdb/{mcdb_id}_header.txt" .format (mcdb_id = mcdb_id )
545
- return [configuration_files ]
546
- else :
547
- dir = './lhe_generators/2016-sim/gridpacks/' + str (recid ) + '/'
549
+ configuration_files ['url' ] = "/eos/opendata/cms/lhe_generators/2015-sim/mcdb/{mcdb_id}_header.txt" .format (mcdb_id = mcdb_id )
550
+ return [configuration_files ]
551
+ else :
552
+ dir = './lhe_generators/2016-sim/gridpacks/' + str (recid ) + '/'
548
553
files = []
549
554
files = [f for f in os .listdir (dir ) if os .path .isfile (os .path .join (dir , f ))]
550
555
confarr = []
0 commit comments