@@ -161,7 +161,8 @@ def __call__(self, *data, level=INFO, file=None, end=None, flush=True):
161
161
if idx == 0 :
162
162
print (header , file = f , end = "" , flush = flush )
163
163
else :
164
- print (header_empty , file = f , end = "" , flush = flush )
164
+ print (header_empty , file = f ,
165
+ end = "" , flush = flush )
165
166
if idx == len (lines ) - 1 :
166
167
print (line , file = f , end = end , flush = flush )
167
168
else :
@@ -228,7 +229,8 @@ def log(*messages, level=INFO, file=None, end=None, flush=True):
228
229
class recorder :
229
230
def __init__ (self , captured_level = INFO ):
230
231
self .buffer = StringIO ()
231
- self .logger = context_logger (file = self .buffer , level = captured_level , can_overwrite = False )
232
+ self .logger = context_logger (
233
+ file = self .buffer , level = captured_level , can_overwrite = False )
232
234
233
235
def __enter__ (self ):
234
236
self .logger .__enter__ ()
@@ -256,7 +258,7 @@ def summarize_exception(e):
256
258
res = {}
257
259
res ["error" ] = error_msg (e , verbose = False )
258
260
res ["error_type" ] = res ["error" ].split ("(" )[0 ]
259
- res ["error_msg" ] = res ["error" ][len (res ["error_type" ]) + 2 : - 2 ]
261
+ res ["error_msg" ] = res ["error" ][len (res ["error_type" ]) + 2 : - 2 ]
260
262
res ["traceback" ] = error_msg (e , verbose = True )
261
263
return res
262
264
@@ -274,7 +276,8 @@ def __init__(self, f, inp, out, worker_id=None, cache_inp=None, build_inp=None,
274
276
log ("started worker-{}" .format ("?" if worker_id is None else worker_id ))
275
277
276
278
def run (self ):
277
- self .built_inp = None if self .built_inp is None else {k : v [0 ](* v [1 :]) for k , v in self .built_inp .items ()}
279
+ self .built_inp = None if self .built_inp is None else {
280
+ k : v [0 ](* v [1 :]) for k , v in self .built_inp .items ()}
278
281
while True :
279
282
task_id , kwargs = self .inp .get ()
280
283
try :
@@ -287,7 +290,8 @@ def run(self):
287
290
res = self .f (** _kwargs )
288
291
else :
289
292
res = self .f (* kwargs )
290
- self .out .put ({"worker_id" : self .worker_id , "task_id" : task_id , "task" : kwargs , "res" : res })
293
+ self .out .put ({"worker_id" : self .worker_id ,
294
+ "task_id" : task_id , "task" : kwargs , "res" : res })
291
295
except Exception as e :
292
296
self .out .put (
293
297
{
@@ -310,7 +314,8 @@ def __init__(self, f, num_workers=CPU_COUNT, cache_inp=None, build_inp=None, ign
310
314
self .ignore_error = ignore_error
311
315
self .f = f
312
316
for i in range (num_workers ):
313
- worker = Worker (f , self .inp , self .out , i , cache_inp , build_inp , verbose )
317
+ worker = Worker (f , self .inp , self .out , i ,
318
+ cache_inp , build_inp , verbose )
314
319
worker .start ()
315
320
self .workers .append (worker )
316
321
@@ -350,14 +355,16 @@ def add_task(self, inp):
350
355
def get_result (self ):
351
356
res = self .out .get ()
352
357
if "error" in res :
353
- err_msg = "worker-{} failed task-{} : {}" .format (res ["worker_id" ], res ["task_id" ], res ["error" ])
358
+ err_msg = "worker-{} failed task-{} : {}" .format (
359
+ res ["worker_id" ], res ["task_id" ], res ["error" ])
354
360
if not self .ignore_error :
355
361
self .terminate ()
356
362
assert False , err_msg
357
363
if self .verbose :
358
364
log (err_msg )
359
365
elif self .verbose :
360
- log ("worker-{} completed task-{}" .format (res ["worker_id" ], res ["task_id" ]))
366
+ log ("worker-{} completed task-{}" .format (
367
+ res ["worker_id" ], res ["task_id" ]))
361
368
return res
362
369
363
370
def terminate (self ):
@@ -378,7 +385,8 @@ def work(
378
385
res_only = True ,
379
386
verbose = False ,
380
387
):
381
- workers = Workers (f , num_workers , cache_inp , build_inp , ignore_error , verbose )
388
+ workers = Workers (f , num_workers , cache_inp ,
389
+ build_inp , ignore_error , verbose )
382
390
for d in workers .map (tasks , ordered ):
383
391
yield d .get ("res" , None ) if res_only else d
384
392
workers .terminate ()
@@ -401,7 +409,8 @@ def check(self, msg="", reset=True):
401
409
self .duration = (end_time - self .start ) * 1000
402
410
if reset :
403
411
self .start = end_time
404
- log ("{}took {:.3f} ms" .format ("" if msg == "" else f"{ msg } ==> " , self .duration ), level = self .level )
412
+ log ("{}took {:.3f} ms" .format ("" if msg == "" else f"{
413
+ msg } ==> " , self .duration ), level = self .level )
405
414
return self .duration
406
415
407
416
def __exit__ (self , * args ):
@@ -423,7 +432,9 @@ def iterate(data, first_n=None, sample_p=1.0, sample_seed=None, report_n=None):
423
432
yield d
424
433
if report_n is not None and counter % report_n == 0 :
425
434
current_time = time .time ()
426
- speed = report_n / (current_time - prev_time ) if current_time - prev_time != 0 else "inf"
435
+ speed = report_n / \
436
+ (current_time - prev_time ) if current_time - \
437
+ prev_time != 0 else "inf"
427
438
log ("{}/{} ==> {:.3f} items/s" .format (counter , total , speed ))
428
439
prev_time = current_time
429
440
@@ -599,7 +610,8 @@ def _build_table(rows, space=3, cell_space=1, filler=" ", max_column_width=None,
599
610
if len (row ) != num_col :
600
611
row += ["" ] * (num_col - len (row ))
601
612
row = [
602
- _build_table (item , cell_space , cell_space , filler ) if type_of (item , COLLECTION_TYPES ) else [str (item )]
613
+ _build_table (item , cell_space , cell_space , filler ) if type_of (
614
+ item , COLLECTION_TYPES ) else [str (item )]
603
615
for item in row
604
616
]
605
617
max_height = max (len (r ) for r in row )
@@ -645,9 +657,12 @@ def print_table(
645
657
):
646
658
if headers is not None :
647
659
rows = [headers ] + rows
648
- _res = _build_table (rows , space , cell_space , filler , max_column_width , min_column_widths )
649
- first_sep_line = print_line (text = name , width = len (_res [0 ]), char = sep , res = True )
650
- sep_line = print_line (width = max (len (first_sep_line ), len (_res [0 ])), char = sep , res = True )
660
+ _res = _build_table (rows , space , cell_space , filler ,
661
+ max_column_width , min_column_widths )
662
+ first_sep_line = print_line (
663
+ text = name , width = len (_res [0 ]), char = sep , res = True )
664
+ sep_line = print_line (width = max (len (first_sep_line ),
665
+ len (_res [0 ])), char = sep , res = True )
651
666
if headers is not None :
652
667
_res = [first_sep_line , _res [0 ], sep_line ] + _res [1 :] + [sep_line ]
653
668
if not res :
@@ -732,7 +747,8 @@ def print_cache(_tokens, _shift, _extra_indent):
732
747
if isinstance (d , list ):
733
748
print_cache (d , shift + 1 , 0 if idx == 0 else None )
734
749
else :
735
- _prints (data [d ], indent , width , level , shift + 1 , 0 if idx == 0 else None , sep , quote , kv_sep , compact )
750
+ _prints (data [d ], indent , width , level , shift + 1 ,
751
+ 0 if idx == 0 else None , sep , quote , kv_sep , compact )
736
752
if idx != len (cache ) - 1 :
737
753
log_raw ("{}\n " .format (sep ))
738
754
log_raw (marker_r )
@@ -752,7 +768,8 @@ def print_cache(_tokens, _shift, _extra_indent):
752
768
for idx , (k , v ) in enumerate (kv ):
753
769
str_k = put_quote (k )
754
770
if is_short_data (v ):
755
- log_raw ("{}{}{}{}" .format (shift_str + indent_str , str_k , kv_sep , put_quote (v )))
771
+ log_raw ("{}{}{}{}" .format (
772
+ shift_str + indent_str , str_k , kv_sep , put_quote (v )))
756
773
else :
757
774
log_raw ("{}{}{}" .format (shift_str + indent_str , str_k , kv_sep ))
758
775
# for non-compact
@@ -766,7 +783,8 @@ def print_cache(_tokens, _shift, _extra_indent):
766
783
else :
767
784
v_shift = shift + indent + len (str_k ) + kv_sep_len
768
785
v_indent = 0
769
- _prints (v , indent , width , level , v_shift , v_indent , sep , quote , kv_sep , compact )
786
+ _prints (v , indent , width , level , v_shift ,
787
+ v_indent , sep , quote , kv_sep , compact )
770
788
if idx != len (kv ) - 1 :
771
789
log_raw (sep + "\n " )
772
790
else :
@@ -782,10 +800,12 @@ def print_cache(_tokens, _shift, _extra_indent):
782
800
continue
783
801
elif idx != 0 or extra_indent is None :
784
802
log_raw ("\n {}" .format (shift_str ))
785
- log_raw ("{}{}{}{}" .format (quote , s , "\\ n" if idx != len (_data ) - 1 else "" , quote ))
803
+ log_raw ("{}{}{}{}" .format (quote , s , "\\ n" if idx !=
804
+ len (_data ) - 1 else "" , quote ))
786
805
else :
787
806
data = str (data )
788
- _prints (data , indent , width , level , shift , extra_indent , sep , quote , kv_sep , compact )
807
+ _prints (data , indent , width , level , shift ,
808
+ extra_indent , sep , quote , kv_sep , compact )
789
809
790
810
791
811
def prints (
@@ -804,11 +824,13 @@ def prints(
804
824
if res :
805
825
with recorder () as r :
806
826
for d in data :
807
- _prints (d , indent , width , level , shift , extra_indent , sep , quote , kv_sep , compact )
827
+ _prints (d , indent , width , level , shift ,
828
+ extra_indent , sep , quote , kv_sep , compact )
808
829
return r .flush ()
809
830
else :
810
831
for d in data :
811
- _prints (d , indent , width , level , shift , extra_indent , sep , quote , kv_sep , compact )
832
+ _prints (d , indent , width , level , shift ,
833
+ extra_indent , sep , quote , kv_sep , compact )
812
834
log ("" , level = level )
813
835
814
836
@@ -871,13 +893,16 @@ def debug(*data, mode=prints, char="-", level=DEBUG):
871
893
stack = inspect .stack ()
872
894
lineno = " [{}]" .format (stack [1 ].lineno )
873
895
filename = file_basename (stack [1 ][1 ]).split ("." )[0 ]
874
- function_name = ".{}" .format (stack [1 ][3 ]) if stack [1 ][3 ] != "<module>" else ""
896
+ function_name = ".{}" .format (
897
+ stack [1 ][3 ]) if stack [1 ][3 ] != "<module>" else ""
875
898
876
899
code_str = stack [1 ].code_context [0 ].strip ()
877
- arguments = code_str [code_str .index ("(" ) + 1 : - 1 ]
900
+ arguments = code_str [code_str .index ("(" ) + 1 : - 1 ]
878
901
arguments = [a .strip () for a in arguments .split ("," ) if "=" not in a ]
879
- assert len (data ) == len (arguments ), '{} ==> debug() can not take arguments with "," in it' .format (code_str )
880
- argument_str = "" if len (arguments ) > 1 else ": {}" .format (arguments [0 ])
902
+ assert len (data ) == len (
903
+ arguments ), '{} ==> debug() can not take arguments with "," in it' .format (code_str )
904
+ argument_str = "" if len (
905
+ arguments ) > 1 else ": {}" .format (arguments [0 ])
881
906
882
907
with enclose ("{}{}{}{}" .format (filename , function_name , lineno , argument_str ), char = char ):
883
908
if mode is None :
@@ -926,7 +951,8 @@ def try_f(*args, **kwargs):
926
951
927
952
def n_min_max_avg (data , key_f = None , first_n = None , sample_p = 1.0 , sample_seed = None ):
928
953
res_min , res_max , res_sum = float ("inf" ), - float ("inf" ), 0
929
- iterator = iterate (data , first_n = first_n , sample_p = sample_p , sample_seed = sample_seed )
954
+ iterator = iterate (data , first_n = first_n ,
955
+ sample_p = sample_p , sample_seed = sample_seed )
930
956
if key_f is not None :
931
957
iterator = map (key_f , iterator )
932
958
counter = 0
@@ -986,7 +1012,8 @@ def __init__(self, text="", width=None, max_width=80, char="=", end="\n\n", capt
986
1012
987
1013
def __enter__ (self ):
988
1014
if self .width is not None :
989
- top_line = print_line (text = self .text , width = self .width , char = self .char , res = True )
1015
+ top_line = print_line (
1016
+ text = self .text , width = self .width , char = self .char , res = True )
990
1017
self .top_line_size = len (top_line )
991
1018
log (top_line , level = self .level )
992
1019
else :
@@ -997,18 +1024,22 @@ def __exit__(self, *args):
997
1024
self .recorder .__exit__ (* args )
998
1025
content = self .recorder .flush ()
999
1026
# enclosed lines should be slightly longer than the longest content
1000
- content_width = 0 if not content else max (len (line ) for line in content .split ("\n " ))
1027
+ content_width = 0 if not content else max (
1028
+ len (line ) for line in content .split ("\n " ))
1001
1029
content_width = min (self .max_width , content_width + 3 )
1002
- top_line = print_line (text = self .text , width = content_width , char = self .char , res = True )
1030
+ top_line = print_line (
1031
+ text = self .text , width = content_width , char = self .char , res = True )
1003
1032
self .top_line_size = len (top_line )
1004
1033
log (top_line , level = self .level )
1005
1034
log (content , level = self .level , end = "" )
1006
- log (print_line (width = self .top_line_size , char = self .char , level = self .level , res = True ), end = self .end )
1035
+ log (print_line (width = self .top_line_size , char = self .char ,
1036
+ level = self .level , res = True ), end = self .end )
1007
1037
1008
1038
1009
1039
class enclose_timer :
1010
1040
def __init__ (self , text = "" , width = None , max_width = 80 , char = "=" , end = "\n \n " , captured_level = INFO , level = INFO ):
1011
- self ._enclose = enclose (text , width , max_width , char , "\n " , captured_level , level )
1041
+ self ._enclose = enclose (text , width , max_width ,
1042
+ char , "\n " , captured_level , level )
1012
1043
self .end = end
1013
1044
self .level = level
1014
1045
@@ -1019,15 +1050,17 @@ def __enter__(self):
1019
1050
def __exit__ (self , * args ):
1020
1051
time_end = time .time ()
1021
1052
self ._enclose .__exit__ (* args )
1022
- log ("took {:.3f} ms" .format ((time_end - self .time_start ) * 1000 ), end = self .end , level = self .level )
1053
+ log ("took {:.3f} ms" .format ((time_end - self .time_start )
1054
+ * 1000 ), end = self .end , level = self .level )
1023
1055
1024
1056
1025
1057
def env (key , default_value = None ):
1026
1058
return os .environ .get (key , default_value )
1027
1059
1028
1060
1029
1061
def load_env (dict_or_path ):
1030
- d = load_yaml (dict_or_path ) if isinstance (dict_or_path , str ) else dict_or_path
1062
+ d = load_yaml (dict_or_path ) if isinstance (
1063
+ dict_or_path , str ) else dict_or_path
1031
1064
if d is not None :
1032
1065
for k , v in d .items ():
1033
1066
os .environ [k ] = str (v )
@@ -1036,12 +1069,21 @@ def load_env(dict_or_path):
1036
1069
def get_args (* args , ** kwargs ):
1037
1070
parser = argparse .ArgumentParser ()
1038
1071
for k in args :
1039
- parser .add_argument (k , type = str )
1072
+ if "?" not in k :
1073
+ parser .add_argument (k , type = str )
1074
+ else :
1075
+ tokens = k .split ("?" )
1076
+ assert len (tokens ) == 2 , f'position argument "{
1077
+ k } " is in wrong format (should be "key?default_value")'
1078
+ parser .add_argument (tokens [0 ], nargs = "?" ,
1079
+ default = tokens [1 ], type = str )
1040
1080
for k , v in kwargs .items ():
1041
1081
if isinstance (v , list ):
1042
- parser .add_argument (f"--{ k } " , nargs = "+" , default = v , type = type (v [0 ]))
1082
+ parser .add_argument (f"--{ k } " , nargs = "+" ,
1083
+ default = v , type = type (v [0 ]))
1043
1084
elif isinstance (v , bool ):
1044
- parser .add_argument (f"--{ k } " , default = v , type = type (v ), action = argparse .BooleanOptionalAction )
1085
+ parser .add_argument (
1086
+ f"--{ k } " , default = v , type = type (v ), action = argparse .BooleanOptionalAction )
1045
1087
else :
1046
1088
parser .add_argument (f"--{ k } " , default = v , type = type (v ))
1047
1089
return parser .parse_args ()
@@ -1071,16 +1113,19 @@ def run_dir():
1071
1113
1072
1114
def _is_file_and_exist (path ):
1073
1115
if os .path .exists (path ):
1074
- assert os .path .isfile (path ), "{} ==> already exist but it's a directory" .format (path )
1116
+ assert os .path .isfile (
1117
+ path ), "{} ==> already exist but it's a directory" .format (path )
1075
1118
1076
1119
1077
1120
def _is_dir_and_exist (path ):
1078
1121
if os .path .exists (path ):
1079
- assert os .path .isdir (path ), "{} ==> already exist but it's a file" .format (path )
1122
+ assert os .path .isdir (
1123
+ path ), "{} ==> already exist but it's a file" .format (path )
1080
1124
1081
1125
1082
1126
def build_dirs (path_or_paths , overwrite = False ):
1083
- paths = path_or_paths if isinstance (path_or_paths , list ) else [path_or_paths ]
1127
+ paths = path_or_paths if isinstance (
1128
+ path_or_paths , list ) else [path_or_paths ]
1084
1129
for path in paths :
1085
1130
path = Path (os .path .abspath (path ))
1086
1131
_is_dir_and_exist (path )
@@ -1090,7 +1135,8 @@ def build_dirs(path_or_paths, overwrite=False):
1090
1135
1091
1136
1092
1137
def build_files (path_or_paths , overwrite = False ):
1093
- paths = path_or_paths if isinstance (path_or_paths , list ) else [path_or_paths ]
1138
+ paths = path_or_paths if isinstance (
1139
+ path_or_paths , list ) else [path_or_paths ]
1094
1140
for path in paths :
1095
1141
_is_file_and_exist (path )
1096
1142
if overwrite and os .path .exists (path ):
@@ -1100,7 +1146,8 @@ def build_files(path_or_paths, overwrite=False):
1100
1146
1101
1147
1102
1148
def build_dirs_for (path_or_paths , overwrite = False ):
1103
- paths = path_or_paths if isinstance (path_or_paths , list ) else [path_or_paths ]
1149
+ paths = path_or_paths if isinstance (
1150
+ path_or_paths , list ) else [path_or_paths ]
1104
1151
for path in paths :
1105
1152
path = Path (os .path .abspath (path ))
1106
1153
_is_file_and_exist (path )
@@ -1129,12 +1176,14 @@ def traverse(path, go_up=0, go_to=None, should_exist=False):
1129
1176
go_up = max (go_up , 0 )
1130
1177
for _ in range (go_up ):
1131
1178
n_res = res .parent
1132
- assert n_res != res , "{} (go up {} times) ==> already reach root and cannot go up further" .format (o_res , go_up )
1179
+ assert n_res != res , "{} (go up {} times) ==> already reach root and cannot go up further" .format (
1180
+ o_res , go_up )
1133
1181
res = n_res
1134
1182
res = str (res )
1135
1183
if go_to is not None :
1136
1184
res = jpath (res , go_to )
1137
- assert not should_exist or os .path .exists (res ), "{} ==> does not exist" .format (res )
1185
+ assert not should_exist or os .path .exists (
1186
+ res ), "{} ==> does not exist" .format (res )
1138
1187
return _np (res )
1139
1188
1140
1189
@@ -1154,7 +1203,8 @@ def this_dir(go_up=0, go_to=None, should_exist=False):
1154
1203
def unwrap_file (path ):
1155
1204
if os .path .isdir (path ):
1156
1205
sub_paths = os .listdir (path )
1157
- assert len (sub_paths ) == 1 , "there are more than one files/dirs in {}" .format (path )
1206
+ assert len (
1207
+ sub_paths ) == 1 , "there are more than one files/dirs in {}" .format (path )
1158
1208
return unwrap_file (jpath (path , sub_paths [0 ]))
1159
1209
return _np (path )
1160
1210
0 commit comments