Skip to content

Commit 8f558fe

Browse files
committed
WIP2
1 parent 38af7bf commit 8f558fe

File tree

4 files changed

+44
-35
lines changed

4 files changed

+44
-35
lines changed

cms-2016-simulated-datasets/code/config_store.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ def get_conffile_ids_all_chain_steps(dataset, mcm_dir):
1919
step_dir = path + '/' + step
2020
mcm_config_ids = get_conffile_ids_from_mcm(dataset, step_dir)
2121

22-
for someid in mcm_config_ids:
22+
if mcm_config_ids:
23+
for someid in mcm_config_ids:
2324
ids[someid] = 1
2425

2526
return list(ids.keys())

cms-2016-simulated-datasets/code/dataset_records.py

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ def get_globaltag_from_conffile(afile, conf_dir):
166166
globaltag = m.groups(1)[0]
167167
return globaltag
168168

169+
# TODO when we are doing MINI, then exclude the NANO step so that it does not appear
170+
# TODO move generator cards to the root and exclude LOG.txt when assembling list
169171

170172
def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
171173
"""Return DICT with all information about the generator steps."""
@@ -186,7 +188,7 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
186188
step = {}
187189
process = ''
188190
output_dataset = get_output_dataset_from_mcm(dataset, mcm_step_dir)
189-
if output_dataset:
191+
if output_dataset:
190192
step['output_dataset'] = output_dataset[0]
191193
release = get_cmssw_version_from_mcm(dataset, mcm_step_dir)
192194
if release:
@@ -208,7 +210,7 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
208210
generator_names = get_generator_name(dataset, mcm_step_dir)
209211
if generator_names:
210212
step['generators'] = generator_names
211-
213+
212214
m = re.search('-(.+?)-', step_dir)
213215
if m:
214216
step_name = m.group(1)
@@ -238,8 +240,8 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
238240

239241
step['type'] = process
240242

241-
# Extend LHE steps
242-
if step_name.endswith('LHEGEN'):
243+
# Extend LHE steps
244+
if step_name.endswith('LHEGEN'):
243245
step['type'] = "LHE GEN"
244246
for i, configuration_files in enumerate(step['configuration_files']):
245247
if configuration_files['title'] == 'Generator parameters':
@@ -260,7 +262,7 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
260262
else:
261263
if 'generators' in step:
262264
generators_present = True
263-
265+
264266
return info
265267

266268
def populate_containerimages_cache():
@@ -276,13 +278,14 @@ def populate_mininanorelation_cache(dataset_full_names, mcm_dir):
276278
if dataset_full_name.endswith('MINIAODSIM'):
277279
nano_found = 0
278280
dataset_first_name = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'dataset_name')
279-
for x in os.listdir(mcm_dir + '/chain'):
280-
if x.startswith('@'+dataset_first_name):
281-
dataset_name_for_nano = x.replace('@', '/')
282-
nano_found = 1
283-
MININANORELATION_CACHE[dataset_full_name] = dataset_name_for_nano
281+
if dataset_first_name:
282+
for x in os.listdir(mcm_dir + '/chain'):
283+
if x and x.startswith('@'+dataset_first_name):
284+
dataset_name_for_nano = x.replace('@', '/')
285+
nano_found = 1
286+
MININANORELATION_CACHE[dataset_full_name] = dataset_name_for_nano
284287
if nano_found==0:
285-
print("A corresponding NANOAODSIM was not found for dataset: " + dataset_full_name)
288+
print("A corresponding NANOAODSIM was not found for dataset: " + dataset_full_name)
286289

287290

288291
def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_dir):
@@ -293,7 +296,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
293296
dataset = get_dataset(dataset_full_name)
294297
dataset_format = get_dataset_format(dataset_full_name)
295298
year_created = '2016'
296-
year_published = '2023' #
299+
year_published = '2023' #
297300
run_period = ['Run2016G', 'Run2016H'] #
298301

299302
additional_title = 'Simulated dataset ' + dataset + ' in ' + dataset_format + ' format for ' + year_created + ' collision data'
@@ -338,7 +341,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
338341
if doi:
339342
rec['doi'] = doi
340343

341-
rec['experiment'] = 'CMS'
344+
rec['experiment'] = ['CMS', ]
342345

343346
rec_files = get_dataset_index_files(dataset_full_name, eos_dir)
344347
if rec_files:
@@ -367,12 +370,13 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
367370

368371
pileup_dataset_name= ''
369372
pileup_dataset_name= get_pileup_from_mcm(dataset_name_for_nano, mcm_dir)
370-
373+
371374
pileup_dataset_recid = {
372375
'/MinBias_TuneZ2_7TeV-pythia6/Summer11Leg-START53_LV4-v1/GEN-SIM': 36, # 2011
373376
'/MinBias_TuneZ2star_8TeV-pythia6/Summer12-START50_V13-v3/GEN-SIM': 37, # 2012
374377
'/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIISummer15GS-MCRUN2_71_V1-v2/GEN-SIM': 22314, # 2015
375-
#'/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIISummer15GS-magnetOffBS0T_MCRUN2_71_V1-v1/GEN-SIM': {recid}, # 2015 TODO
378+
# TODO 2016 take from Kati's PR
379+
#'/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIISummer15GS-magnetOffBS0T_MCRUN2_71_V1-v1/GEN-SIM': {recid}, # 2015
376380
'/MinBias_TuneCP5_13TeV-pythia8/RunIIFall18GS-IdealGeometry_102X_upgrade2018_design_v9-v1/GEN-SIM': 12302 # 2018
377381
}.get(pileup_dataset_name, 0)
378382

@@ -381,7 +385,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
381385
rec['pileup'] = {}
382386
if pileup_dataset_recid:
383387
rec['pileup']['description'] = "<p>To make these simulated data comparable with the collision data, <a href=\"/docs/cms-guide-pileup-simulation\">pile-up events</a> are added to the simulated event in the DIGI2RAW step.</p>"
384-
rec['pileup']['links'] = [
388+
rec['pileup']['links'] = [
385389
{
386390
"recid": str(pileup_dataset_recid),
387391
"title": pileup_dataset_name
@@ -398,7 +402,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
398402

399403
if dataset_full_name.endswith('NANOAODSIM'):
400404
# Query from mcm dict fails for an example dataset because Mini is v1 in mcm and v2 in dataset list
401-
# Get it from das instead
405+
# Get it from das instead
402406
#dataset_name_for_mini = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'input_dataset')
403407
dataset_name_for_mini = get_parent_dataset(dataset_full_name, das_dir)
404408
relations_description = 'The corresponding MINIAODSIM dataset:'
@@ -408,7 +412,7 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
408412
relations_description = 'The corresponding NANOAODSIM dataset:'
409413
relations_recid = str(recid_info[dataset_name_for_nano])
410414
relations_type = 'isChildOf'
411-
415+
412416
rec['relations'] = [
413417
{
414418
'description': relations_description,
@@ -452,17 +456,18 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
452456
rec['usage']['description'] = "You can access these data through the CMS Open Data container or the CMS Virtual Machine. See the instructions for setting up one of the two alternative environments and getting started in"
453457
rec['usage']['links'] = [
454458
{
455-
"description": "Running CMS analysis code using Docker",
459+
"description": "Running CMS analysis code using Docker",
456460
"url": "/docs/cms-guide-docker"
457-
},
461+
},
458462
{
459-
"description": "How to install the CMS Virtual Machine",
463+
"description": "How to install the CMS Virtual Machine",
460464
"url": "/docs/cms-virtual-machine-2016"
461-
},
465+
},
462466
{
463-
"description": "Getting started with CMS open data",
467+
"description": "Getting started with CMS open data",
464468
"url": "/docs/cms-getting-started-2016"
465469
}
470+
# TODO Amend links taking them from Kati's pile-up PR
466471
]
467472

468473
rec['validation'] = {}
@@ -476,7 +481,7 @@ def create(dataset, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_d
476481
if os.path.exists(filepath) and os.stat(filepath).st_size != 0:
477482
print("==> " + dataset + "\n==> Already exist. Skipping...\n")
478483
return
479-
484+
480485
Record= create_record(dataset, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_dir)
481486

482487
with open(filepath, 'w') as file:
@@ -500,8 +505,8 @@ def create_records(dataset_full_names, doi_file, recid_file, eos_dir, das_dir, m
500505
t= threading.Thread(target=create, args=(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_dir, records_dir))
501506
t.start()
502507
while threading.activeCount() >= 20 :
503-
sleep(0.5) # run 20 parallel
504-
508+
sleep(0.5) # run 20 parallel
509+
505510
#records.append(create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_dir))
506511
#return records
507512

@@ -541,10 +546,10 @@ def get_step_generator_parameters(dataset, mcm_dir, recid, force_lhe=0):
541546
if mcdb_id > 1:
542547
print("Got mcdb > 1: " + str(mcdb_id))
543548
configuration_files['title'] = 'Generator parameters'
544-
configuration_files['url'] = "/eos/opendata/cms/lhe_generators/2015-sim/mcdb/{mcdb_id}_header.txt".format(mcdb_id=mcdb_id)
545-
return [configuration_files]
546-
else:
547-
dir='./lhe_generators/2016-sim/gridpacks/' + str(recid) + '/'
549+
configuration_files['url'] = "/eos/opendata/cms/lhe_generators/2015-sim/mcdb/{mcdb_id}_header.txt".format(mcdb_id=mcdb_id)
550+
return [configuration_files]
551+
else:
552+
dir='./lhe_generators/2016-sim/gridpacks/' + str(recid) + '/'
548553
files = []
549554
files = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]
550555
confarr=[]

cms-2016-simulated-datasets/code/lhe_generators.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ def log(recid, logtype, logmessage):
3131
def get_lhe(dataset, mcm_dir):
3232
"""Get LHE Parent or False"""
3333
path = mcm_dir + "/chain/" + dataset.replace("/", "@")
34-
step_dirs = os.listdir(path)
34+
try:
35+
step_dirs = os.listdir(path)
36+
except:
37+
return False
3538
for step in step_dirs:
3639
step_dir = path + "/" + step
3740
datatier = get_from_deep_json(get_mcm_dict(dataset, step_dir), "datatier")
@@ -78,12 +81,12 @@ def create_lhe_generator(
7881
return
7982

8083
# Find gridpack path
81-
path = re.search(r"cms.vstring\(['\"](/cvmfs.*?)['\"]", fragment)
84+
path = re.search(r"cms.vstring\(['\"\[]\s*(/cvmfs.*?)['\"]", fragment)
8285
if not path:
8386
log(
8487
recid,
8588
"ERROR",
86-
f"No 'cms.vstring(['\"]/cvmfs' found in fragment; skipping.",
89+
f"No 'cms.vstring(/cvmfs' found in fragment; skipping.",
8790
)
8891
return
8992

cms-2016-simulated-datasets/code/mcm_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def get_mcm_dict(dataset, mcm_dir):
137137
except:
138138
return json.loads('{}')
139139
else:
140-
print('[ERROR] There is no McM JSON store dict for dataset ' + dataset,
140+
print(f'[ERROR] There is no McM JSON store dict for dataset {dataset} in directory {mcm_dir}',
141141
file=sys.stderr)
142142
return json.loads('{}')
143143

0 commit comments

Comments
 (0)