15
15
16
16
import json
17
17
import os
18
- import sys
19
18
import re
20
19
import subprocess
21
- from utils import get_dataset_name , \
22
- get_dataset_runperiod , \
23
- get_dataset_version , \
24
- get_dataset_format , \
25
- get_dataset_year
20
+ import sys
21
+
22
+ from utils import (get_dataset_format , get_dataset_name , get_dataset_runperiod ,
23
+ get_dataset_version , get_dataset_year )
26
24
27
- XROOTD_URI_BASE = ' root://eospublic.cern.ch/'
25
+ XROOTD_URI_BASE = " root://eospublic.cern.ch/"
28
26
29
- XROOTD_DIR_BASE = ' /eos/opendata/'
27
+ XROOTD_DIR_BASE = " /eos/opendata/"
30
28
31
- MCDIR_BASE = 'mc'
29
+ MCDIR_BASE = "mc"
32
30
33
- EXPERIMENT = ' cms'
31
+ EXPERIMENT = " cms"
34
32
35
33
DEBUG = True
36
34
@@ -41,9 +39,13 @@ def check_datasets_in_eos_dir(datasets, eos_dir):
41
39
dataset_full_names = []
42
40
for dataset in datasets :
43
41
dataset_index_file_base = get_dataset_index_file_base (dataset )
44
- if subprocess .call ('ls ' + eos_dir + ' | grep -q ' + dataset_index_file_base , shell = True ):
45
- print ('[ERROR] Missing EOS information, ignoring dataset ' + dataset ,
46
- file = sys .stderr )
42
+ if subprocess .call (
43
+ "ls " + eos_dir + " | grep -q " + dataset_index_file_base , shell = True
44
+ ):
45
+ print (
46
+ "[ERROR] Missing EOS information, ignoring dataset " + dataset ,
47
+ file = sys .stderr ,
48
+ )
47
49
else :
48
50
dataset_full_names .append (dataset )
49
51
@@ -52,36 +54,51 @@ def check_datasets_in_eos_dir(datasets, eos_dir):
52
54
53
55
def get_dataset_index_file_base (dataset ):
54
56
"Return index file base for given dataset."
55
- filebase = EXPERIMENT .upper () + '_' + \
56
- MCDIR_BASE + '_' + \
57
- get_dataset_runperiod (dataset ) + '_' + \
58
- get_dataset_name (dataset ) + '_' + \
59
- get_dataset_format (dataset ) + '_' + \
60
- get_dataset_version (dataset )
57
+ filebase = (
58
+ EXPERIMENT .upper ()
59
+ + "_"
60
+ + MCDIR_BASE
61
+ + "_"
62
+ + get_dataset_runperiod (dataset )
63
+ + "_"
64
+ + get_dataset_name (dataset )
65
+ + "_"
66
+ + get_dataset_format (dataset )
67
+ + "_"
68
+ + get_dataset_version (dataset )
69
+ )
61
70
return filebase
62
71
72
+
63
73
def get_dataset_location (dataset ):
64
74
"Return EOS location of the dataset."
65
- return XROOTD_DIR_BASE + \
66
- EXPERIMENT + '/' + \
67
- MCDIR_BASE + '/' + \
68
- get_dataset_runperiod (dataset ) + '/' + \
69
- get_dataset_name (dataset ) + '/' + \
70
- get_dataset_format (dataset ) + '/' + \
71
- get_dataset_version (dataset )
75
+ return (
76
+ XROOTD_DIR_BASE
77
+ + EXPERIMENT
78
+ + "/"
79
+ + MCDIR_BASE
80
+ + "/"
81
+ + get_dataset_runperiod (dataset )
82
+ + "/"
83
+ + get_dataset_name (dataset )
84
+ + "/"
85
+ + get_dataset_format (dataset )
86
+ + "/"
87
+ + get_dataset_version (dataset )
88
+ )
72
89
73
90
74
91
def get_dataset_volumes (dataset ):
75
92
"Return list of volumes for the given dataset."
76
93
volumes = []
77
94
dataset_location = get_dataset_location (dataset )
78
95
try :
79
- output = subprocess .check_output (' eos ls -1 ' + dataset_location , shell = True )
96
+ output = subprocess .check_output (" eos ls -1 " + dataset_location , shell = True )
80
97
except subprocess .CalledProcessError :
81
98
return []
82
99
output = str (output .decode ("utf-8" ))
83
- for line in output .split (' \n ' ):
84
- if line and line != ' file-indexes' :
100
+ for line in output .split (" \n " ):
101
+ if line and line != " file-indexes" :
85
102
volumes .append (line )
86
103
return volumes
87
104
@@ -90,45 +107,61 @@ def get_dataset_volume_files(dataset, volume):
90
107
"Return file list with information about name, size, location for the given dataset and volume."
91
108
files = []
92
109
dataset_location = get_dataset_location (dataset )
93
- output = subprocess .check_output ('eos oldfind --size --checksum ' + dataset_location + '/' + volume , shell = True )
110
+ output = subprocess .check_output (
111
+ "eos oldfind --size --checksum " + dataset_location + "/" + volume , shell = True
112
+ )
94
113
output = str (output .decode ("utf-8" ))
95
- for line in output .split (' \n ' ):
96
- if line and line != ' file-indexes' :
97
- match = re .match (r' ^path=(.*) size=(.*) checksum=(.*)$' , line )
114
+ for line in output .split (" \n " ):
115
+ if line and line != " file-indexes" :
116
+ match = re .match (r" ^path=(.*) size=(.*) checksum=(.*)$" , line )
98
117
if match :
99
118
path , size , checksum = match .groups ()
100
- files .append ({'filename' : os .path .basename (path ),
101
- 'size' : int (size ),
102
- 'checksum' : 'adler32:' + checksum ,
103
- 'uri' : XROOTD_URI_BASE + path })
119
+ files .append (
120
+ {
121
+ "filename" : os .path .basename (path ),
122
+ "size" : int (size ),
123
+ "checksum" : "adler32:" + checksum ,
124
+ "uri" : XROOTD_URI_BASE + path ,
125
+ }
126
+ )
104
127
return files
105
128
106
129
107
130
def create_index_file (filebase , files , eos_dir , style , volume_dir ):
108
131
"Create index file in the given style format (text, json)."
109
132
110
- filename = filebase + '.' + style
133
+ filename = filebase + "." + style
111
134
try :
112
- fdesc = open (f"{ eos_dir } /{ str (volume_dir )} /{ filename } " , 'w' )
113
- if style == ' txt' :
135
+ fdesc = open (f"{ eos_dir } /{ str (volume_dir )} /{ filename } " , "w" )
136
+ if style == " txt" :
114
137
for afile in files :
115
- fdesc .write (afile [' uri' ])
116
- fdesc .write (' \n ' )
117
- elif style == ' json' :
138
+ fdesc .write (afile [" uri" ])
139
+ fdesc .write (" \n " )
140
+ elif style == " json" :
118
141
fdesc .write (json .dumps (files , indent = 2 , sort_keys = True ))
119
- fdesc .write (' \n ' )
142
+ fdesc .write (" \n " )
120
143
fdesc .close ()
121
144
except Exception as exc :
122
145
print ("Error doing the file '" , filename , "': " , exc )
123
146
return None
124
147
return filename
125
148
126
149
127
-
128
150
def copy_index_file (dataset , volume , filename , eos_dir , volume_dir ):
129
151
"Copy index file filename to its final destination on EOS."
130
152
dataset_location = get_dataset_location (dataset )
131
- cmd = 'eos cp ' + eos_dir + '/' + str (volume_dir ) + '/' + filename + ' ' + dataset_location + '/file-indexes/' + filename
153
+ cmd = (
154
+ "eos cp "
155
+ + eos_dir
156
+ + "/"
157
+ + str (volume_dir )
158
+ + "/"
159
+ + filename
160
+ + " "
161
+ + dataset_location
162
+ + "/file-indexes/"
163
+ + filename
164
+ )
132
165
if DEBUG :
133
166
print (cmd )
134
167
else :
@@ -138,34 +171,34 @@ def copy_index_file(dataset, volume, filename, eos_dir, volume_dir):
138
171
def create_index_files (dataset , volume , eos_dir , volume_dir ):
139
172
"Create index files for the given dataset and volumes."
140
173
files = get_dataset_volume_files (dataset , volume )
141
- filebase = get_dataset_index_file_base (dataset ) + '_' + \
142
- volume + '_' + 'file_index'
174
+ filebase = get_dataset_index_file_base (dataset ) + "_" + volume + "_" + "file_index"
143
175
144
- for output_type in [' txt' , ' json' ]:
176
+ for output_type in [" txt" , " json" ]:
145
177
filename = create_index_file (filebase , files , eos_dir , output_type , volume_dir )
146
178
if filename :
147
179
copy_index_file (dataset , volume , filename , eos_dir , volume_dir )
148
180
149
181
150
- def main (datasets = [], eos_dir = ' ./inputs/eos-file-indexes/' ):
182
+ def main (datasets = [], eos_dir = " ./inputs/eos-file-indexes/" ):
151
183
"Do the job."
152
184
153
185
volume_dir = 0
154
- volume_counter = 0
186
+ volume_counter = 0
155
187
if not os .path .isdir (f"{ eos_dir } /{ str (volume_dir )} " ):
156
188
os .makedirs (f"{ eos_dir } /{ str (volume_dir )} " )
157
189
158
190
for dataset in datasets :
159
191
volumes = get_dataset_volumes (dataset )
160
192
for volume in volumes :
161
193
create_index_files (dataset , volume , eos_dir , volume_dir )
162
- volume_counter += 1
163
- if volume_counter > 999 :
164
- volume_counter = 0
165
- volume_dir += 1
194
+ volume_counter += 1
195
+ if volume_counter > 999 :
196
+ volume_counter = 0
197
+ volume_dir += 1
166
198
167
199
if not os .path .isdir (f"{ eos_dir } /{ str (volume_dir )} " ):
168
200
os .makedirs (f"{ eos_dir } /{ str (volume_dir )} " )
169
201
170
- if __name__ == '__main__' :
202
+
203
+ if __name__ == "__main__" :
171
204
main ()
0 commit comments