@@ -161,7 +161,8 @@ def __call__(self, *data, level=INFO, file=None, end=None, flush=True):
161161 if idx == 0 :
162162 print (header , file = f , end = "" , flush = flush )
163163 else :
164- print (header_empty , file = f , end = "" , flush = flush )
164+ print (header_empty , file = f ,
165+ end = "" , flush = flush )
165166 if idx == len (lines ) - 1 :
166167 print (line , file = f , end = end , flush = flush )
167168 else :
@@ -228,7 +229,8 @@ def log(*messages, level=INFO, file=None, end=None, flush=True):
228229class recorder :
229230 def __init__ (self , captured_level = INFO ):
230231 self .buffer = StringIO ()
231- self .logger = context_logger (file = self .buffer , level = captured_level , can_overwrite = False )
232+ self .logger = context_logger (
233+ file = self .buffer , level = captured_level , can_overwrite = False )
232234
233235 def __enter__ (self ):
234236 self .logger .__enter__ ()
@@ -256,7 +258,7 @@ def summarize_exception(e):
256258 res = {}
257259 res ["error" ] = error_msg (e , verbose = False )
258260 res ["error_type" ] = res ["error" ].split ("(" )[0 ]
259- res ["error_msg" ] = res ["error" ][len (res ["error_type" ]) + 2 : - 2 ]
261+ res ["error_msg" ] = res ["error" ][len (res ["error_type" ]) + 2 : - 2 ]
260262 res ["traceback" ] = error_msg (e , verbose = True )
261263 return res
262264
@@ -274,7 +276,8 @@ def __init__(self, f, inp, out, worker_id=None, cache_inp=None, build_inp=None,
274276 log ("started worker-{}" .format ("?" if worker_id is None else worker_id ))
275277
276278 def run (self ):
277- self .built_inp = None if self .built_inp is None else {k : v [0 ](* v [1 :]) for k , v in self .built_inp .items ()}
279+ self .built_inp = None if self .built_inp is None else {
280+ k : v [0 ](* v [1 :]) for k , v in self .built_inp .items ()}
278281 while True :
279282 task_id , kwargs = self .inp .get ()
280283 try :
@@ -287,7 +290,8 @@ def run(self):
287290 res = self .f (** _kwargs )
288291 else :
289292 res = self .f (* kwargs )
290- self .out .put ({"worker_id" : self .worker_id , "task_id" : task_id , "task" : kwargs , "res" : res })
293+ self .out .put ({"worker_id" : self .worker_id ,
294+ "task_id" : task_id , "task" : kwargs , "res" : res })
291295 except Exception as e :
292296 self .out .put (
293297 {
@@ -310,7 +314,8 @@ def __init__(self, f, num_workers=CPU_COUNT, cache_inp=None, build_inp=None, ign
310314 self .ignore_error = ignore_error
311315 self .f = f
312316 for i in range (num_workers ):
313- worker = Worker (f , self .inp , self .out , i , cache_inp , build_inp , verbose )
317+ worker = Worker (f , self .inp , self .out , i ,
318+ cache_inp , build_inp , verbose )
314319 worker .start ()
315320 self .workers .append (worker )
316321
@@ -350,14 +355,16 @@ def add_task(self, inp):
350355 def get_result (self ):
351356 res = self .out .get ()
352357 if "error" in res :
353- err_msg = "worker-{} failed task-{} : {}" .format (res ["worker_id" ], res ["task_id" ], res ["error" ])
358+ err_msg = "worker-{} failed task-{} : {}" .format (
359+ res ["worker_id" ], res ["task_id" ], res ["error" ])
354360 if not self .ignore_error :
355361 self .terminate ()
356362 assert False , err_msg
357363 if self .verbose :
358364 log (err_msg )
359365 elif self .verbose :
360- log ("worker-{} completed task-{}" .format (res ["worker_id" ], res ["task_id" ]))
366+ log ("worker-{} completed task-{}" .format (
367+ res ["worker_id" ], res ["task_id" ]))
361368 return res
362369
363370 def terminate (self ):
@@ -378,7 +385,8 @@ def work(
378385 res_only = True ,
379386 verbose = False ,
380387):
381- workers = Workers (f , num_workers , cache_inp , build_inp , ignore_error , verbose )
388+ workers = Workers (f , num_workers , cache_inp ,
389+ build_inp , ignore_error , verbose )
382390 for d in workers .map (tasks , ordered ):
383391 yield d .get ("res" , None ) if res_only else d
384392 workers .terminate ()
@@ -401,7 +409,8 @@ def check(self, msg="", reset=True):
401409 self .duration = (end_time - self .start ) * 1000
402410 if reset :
403411 self .start = end_time
404- log ("{}took {:.3f} ms" .format ("" if msg == "" else f"{ msg } ==> " , self .duration ), level = self .level )
412+ log ("{}took {:.3f} ms" .format ("" if msg == "" else f"{
413+ msg } ==> " , self .duration ), level = self .level )
405414 return self .duration
406415
407416 def __exit__ (self , * args ):
@@ -423,7 +432,9 @@ def iterate(data, first_n=None, sample_p=1.0, sample_seed=None, report_n=None):
423432 yield d
424433 if report_n is not None and counter % report_n == 0 :
425434 current_time = time .time ()
426- speed = report_n / (current_time - prev_time ) if current_time - prev_time != 0 else "inf"
435+ speed = report_n / \
436+ (current_time - prev_time ) if current_time - \
437+ prev_time != 0 else "inf"
427438 log ("{}/{} ==> {:.3f} items/s" .format (counter , total , speed ))
428439 prev_time = current_time
429440
@@ -599,7 +610,8 @@ def _build_table(rows, space=3, cell_space=1, filler=" ", max_column_width=None,
599610 if len (row ) != num_col :
600611 row += ["" ] * (num_col - len (row ))
601612 row = [
602- _build_table (item , cell_space , cell_space , filler ) if type_of (item , COLLECTION_TYPES ) else [str (item )]
613+ _build_table (item , cell_space , cell_space , filler ) if type_of (
614+ item , COLLECTION_TYPES ) else [str (item )]
603615 for item in row
604616 ]
605617 max_height = max (len (r ) for r in row )
@@ -645,9 +657,12 @@ def print_table(
645657):
646658 if headers is not None :
647659 rows = [headers ] + rows
648- _res = _build_table (rows , space , cell_space , filler , max_column_width , min_column_widths )
649- first_sep_line = print_line (text = name , width = len (_res [0 ]), char = sep , res = True )
650- sep_line = print_line (width = max (len (first_sep_line ), len (_res [0 ])), char = sep , res = True )
660+ _res = _build_table (rows , space , cell_space , filler ,
661+ max_column_width , min_column_widths )
662+ first_sep_line = print_line (
663+ text = name , width = len (_res [0 ]), char = sep , res = True )
664+ sep_line = print_line (width = max (len (first_sep_line ),
665+ len (_res [0 ])), char = sep , res = True )
651666 if headers is not None :
652667 _res = [first_sep_line , _res [0 ], sep_line ] + _res [1 :] + [sep_line ]
653668 if not res :
@@ -732,7 +747,8 @@ def print_cache(_tokens, _shift, _extra_indent):
732747 if isinstance (d , list ):
733748 print_cache (d , shift + 1 , 0 if idx == 0 else None )
734749 else :
735- _prints (data [d ], indent , width , level , shift + 1 , 0 if idx == 0 else None , sep , quote , kv_sep , compact )
750+ _prints (data [d ], indent , width , level , shift + 1 ,
751+ 0 if idx == 0 else None , sep , quote , kv_sep , compact )
736752 if idx != len (cache ) - 1 :
737753 log_raw ("{}\n " .format (sep ))
738754 log_raw (marker_r )
@@ -752,7 +768,8 @@ def print_cache(_tokens, _shift, _extra_indent):
752768 for idx , (k , v ) in enumerate (kv ):
753769 str_k = put_quote (k )
754770 if is_short_data (v ):
755- log_raw ("{}{}{}{}" .format (shift_str + indent_str , str_k , kv_sep , put_quote (v )))
771+ log_raw ("{}{}{}{}" .format (
772+ shift_str + indent_str , str_k , kv_sep , put_quote (v )))
756773 else :
757774 log_raw ("{}{}{}" .format (shift_str + indent_str , str_k , kv_sep ))
758775 # for non-compact
@@ -766,7 +783,8 @@ def print_cache(_tokens, _shift, _extra_indent):
766783 else :
767784 v_shift = shift + indent + len (str_k ) + kv_sep_len
768785 v_indent = 0
769- _prints (v , indent , width , level , v_shift , v_indent , sep , quote , kv_sep , compact )
786+ _prints (v , indent , width , level , v_shift ,
787+ v_indent , sep , quote , kv_sep , compact )
770788 if idx != len (kv ) - 1 :
771789 log_raw (sep + "\n " )
772790 else :
@@ -782,10 +800,12 @@ def print_cache(_tokens, _shift, _extra_indent):
782800 continue
783801 elif idx != 0 or extra_indent is None :
784802 log_raw ("\n {}" .format (shift_str ))
785- log_raw ("{}{}{}{}" .format (quote , s , "\\ n" if idx != len (_data ) - 1 else "" , quote ))
803+ log_raw ("{}{}{}{}" .format (quote , s , "\\ n" if idx !=
804+ len (_data ) - 1 else "" , quote ))
786805 else :
787806 data = str (data )
788- _prints (data , indent , width , level , shift , extra_indent , sep , quote , kv_sep , compact )
807+ _prints (data , indent , width , level , shift ,
808+ extra_indent , sep , quote , kv_sep , compact )
789809
790810
791811def prints (
@@ -804,11 +824,13 @@ def prints(
804824 if res :
805825 with recorder () as r :
806826 for d in data :
807- _prints (d , indent , width , level , shift , extra_indent , sep , quote , kv_sep , compact )
827+ _prints (d , indent , width , level , shift ,
828+ extra_indent , sep , quote , kv_sep , compact )
808829 return r .flush ()
809830 else :
810831 for d in data :
811- _prints (d , indent , width , level , shift , extra_indent , sep , quote , kv_sep , compact )
832+ _prints (d , indent , width , level , shift ,
833+ extra_indent , sep , quote , kv_sep , compact )
812834 log ("" , level = level )
813835
814836
@@ -871,13 +893,16 @@ def debug(*data, mode=prints, char="-", level=DEBUG):
871893 stack = inspect .stack ()
872894 lineno = " [{}]" .format (stack [1 ].lineno )
873895 filename = file_basename (stack [1 ][1 ]).split ("." )[0 ]
874- function_name = ".{}" .format (stack [1 ][3 ]) if stack [1 ][3 ] != "<module>" else ""
896+ function_name = ".{}" .format (
897+ stack [1 ][3 ]) if stack [1 ][3 ] != "<module>" else ""
875898
876899 code_str = stack [1 ].code_context [0 ].strip ()
877- arguments = code_str [code_str .index ("(" ) + 1 : - 1 ]
900+ arguments = code_str [code_str .index ("(" ) + 1 : - 1 ]
878901 arguments = [a .strip () for a in arguments .split ("," ) if "=" not in a ]
879- assert len (data ) == len (arguments ), '{} ==> debug() can not take arguments with "," in it' .format (code_str )
880- argument_str = "" if len (arguments ) > 1 else ": {}" .format (arguments [0 ])
902+ assert len (data ) == len (
903+ arguments ), '{} ==> debug() can not take arguments with "," in it' .format (code_str )
904+ argument_str = "" if len (
905+ arguments ) > 1 else ": {}" .format (arguments [0 ])
881906
882907 with enclose ("{}{}{}{}" .format (filename , function_name , lineno , argument_str ), char = char ):
883908 if mode is None :
@@ -926,7 +951,8 @@ def try_f(*args, **kwargs):
926951
927952def n_min_max_avg (data , key_f = None , first_n = None , sample_p = 1.0 , sample_seed = None ):
928953 res_min , res_max , res_sum = float ("inf" ), - float ("inf" ), 0
929- iterator = iterate (data , first_n = first_n , sample_p = sample_p , sample_seed = sample_seed )
954+ iterator = iterate (data , first_n = first_n ,
955+ sample_p = sample_p , sample_seed = sample_seed )
930956 if key_f is not None :
931957 iterator = map (key_f , iterator )
932958 counter = 0
@@ -986,7 +1012,8 @@ def __init__(self, text="", width=None, max_width=80, char="=", end="\n\n", capt
9861012
9871013 def __enter__ (self ):
9881014 if self .width is not None :
989- top_line = print_line (text = self .text , width = self .width , char = self .char , res = True )
1015+ top_line = print_line (
1016+ text = self .text , width = self .width , char = self .char , res = True )
9901017 self .top_line_size = len (top_line )
9911018 log (top_line , level = self .level )
9921019 else :
@@ -997,18 +1024,22 @@ def __exit__(self, *args):
9971024 self .recorder .__exit__ (* args )
9981025 content = self .recorder .flush ()
9991026 # enclosed lines should be slightly longer than the longest content
1000- content_width = 0 if not content else max (len (line ) for line in content .split ("\n " ))
1027+ content_width = 0 if not content else max (
1028+ len (line ) for line in content .split ("\n " ))
10011029 content_width = min (self .max_width , content_width + 3 )
1002- top_line = print_line (text = self .text , width = content_width , char = self .char , res = True )
1030+ top_line = print_line (
1031+ text = self .text , width = content_width , char = self .char , res = True )
10031032 self .top_line_size = len (top_line )
10041033 log (top_line , level = self .level )
10051034 log (content , level = self .level , end = "" )
1006- log (print_line (width = self .top_line_size , char = self .char , level = self .level , res = True ), end = self .end )
1035+ log (print_line (width = self .top_line_size , char = self .char ,
1036+ level = self .level , res = True ), end = self .end )
10071037
10081038
10091039class enclose_timer :
10101040 def __init__ (self , text = "" , width = None , max_width = 80 , char = "=" , end = "\n \n " , captured_level = INFO , level = INFO ):
1011- self ._enclose = enclose (text , width , max_width , char , "\n " , captured_level , level )
1041+ self ._enclose = enclose (text , width , max_width ,
1042+ char , "\n " , captured_level , level )
10121043 self .end = end
10131044 self .level = level
10141045
@@ -1019,15 +1050,17 @@ def __enter__(self):
10191050 def __exit__ (self , * args ):
10201051 time_end = time .time ()
10211052 self ._enclose .__exit__ (* args )
1022- log ("took {:.3f} ms" .format ((time_end - self .time_start ) * 1000 ), end = self .end , level = self .level )
1053+ log ("took {:.3f} ms" .format ((time_end - self .time_start )
1054+ * 1000 ), end = self .end , level = self .level )
10231055
10241056
10251057def env (key , default_value = None ):
10261058 return os .environ .get (key , default_value )
10271059
10281060
10291061def load_env (dict_or_path ):
1030- d = load_yaml (dict_or_path ) if isinstance (dict_or_path , str ) else dict_or_path
1062+ d = load_yaml (dict_or_path ) if isinstance (
1063+ dict_or_path , str ) else dict_or_path
10311064 if d is not None :
10321065 for k , v in d .items ():
10331066 os .environ [k ] = str (v )
@@ -1036,12 +1069,21 @@ def load_env(dict_or_path):
10361069def get_args (* args , ** kwargs ):
10371070 parser = argparse .ArgumentParser ()
10381071 for k in args :
1039- parser .add_argument (k , type = str )
1072+ if "?" not in k :
1073+ parser .add_argument (k , type = str )
1074+ else :
1075+ tokens = k .split ("?" )
1076+ assert len (tokens ) == 2 , f'position argument "{
1077+ k } " is in wrong format (should be "key?default_value")'
1078+ parser .add_argument (tokens [0 ], nargs = "?" ,
1079+ default = tokens [1 ], type = str )
10401080 for k , v in kwargs .items ():
10411081 if isinstance (v , list ):
1042- parser .add_argument (f"--{ k } " , nargs = "+" , default = v , type = type (v [0 ]))
1082+ parser .add_argument (f"--{ k } " , nargs = "+" ,
1083+ default = v , type = type (v [0 ]))
10431084 elif isinstance (v , bool ):
1044- parser .add_argument (f"--{ k } " , default = v , type = type (v ), action = argparse .BooleanOptionalAction )
1085+ parser .add_argument (
1086+ f"--{ k } " , default = v , type = type (v ), action = argparse .BooleanOptionalAction )
10451087 else :
10461088 parser .add_argument (f"--{ k } " , default = v , type = type (v ))
10471089 return parser .parse_args ()
@@ -1071,16 +1113,19 @@ def run_dir():
10711113
10721114def _is_file_and_exist (path ):
10731115 if os .path .exists (path ):
1074- assert os .path .isfile (path ), "{} ==> already exist but it's a directory" .format (path )
1116+ assert os .path .isfile (
1117+ path ), "{} ==> already exist but it's a directory" .format (path )
10751118
10761119
10771120def _is_dir_and_exist (path ):
10781121 if os .path .exists (path ):
1079- assert os .path .isdir (path ), "{} ==> already exist but it's a file" .format (path )
1122+ assert os .path .isdir (
1123+ path ), "{} ==> already exist but it's a file" .format (path )
10801124
10811125
10821126def build_dirs (path_or_paths , overwrite = False ):
1083- paths = path_or_paths if isinstance (path_or_paths , list ) else [path_or_paths ]
1127+ paths = path_or_paths if isinstance (
1128+ path_or_paths , list ) else [path_or_paths ]
10841129 for path in paths :
10851130 path = Path (os .path .abspath (path ))
10861131 _is_dir_and_exist (path )
@@ -1090,7 +1135,8 @@ def build_dirs(path_or_paths, overwrite=False):
10901135
10911136
10921137def build_files (path_or_paths , overwrite = False ):
1093- paths = path_or_paths if isinstance (path_or_paths , list ) else [path_or_paths ]
1138+ paths = path_or_paths if isinstance (
1139+ path_or_paths , list ) else [path_or_paths ]
10941140 for path in paths :
10951141 _is_file_and_exist (path )
10961142 if overwrite and os .path .exists (path ):
@@ -1100,7 +1146,8 @@ def build_files(path_or_paths, overwrite=False):
11001146
11011147
11021148def build_dirs_for (path_or_paths , overwrite = False ):
1103- paths = path_or_paths if isinstance (path_or_paths , list ) else [path_or_paths ]
1149+ paths = path_or_paths if isinstance (
1150+ path_or_paths , list ) else [path_or_paths ]
11041151 for path in paths :
11051152 path = Path (os .path .abspath (path ))
11061153 _is_file_and_exist (path )
@@ -1129,12 +1176,14 @@ def traverse(path, go_up=0, go_to=None, should_exist=False):
11291176 go_up = max (go_up , 0 )
11301177 for _ in range (go_up ):
11311178 n_res = res .parent
1132- assert n_res != res , "{} (go up {} times) ==> already reach root and cannot go up further" .format (o_res , go_up )
1179+ assert n_res != res , "{} (go up {} times) ==> already reach root and cannot go up further" .format (
1180+ o_res , go_up )
11331181 res = n_res
11341182 res = str (res )
11351183 if go_to is not None :
11361184 res = jpath (res , go_to )
1137- assert not should_exist or os .path .exists (res ), "{} ==> does not exist" .format (res )
1185+ assert not should_exist or os .path .exists (
1186+ res ), "{} ==> does not exist" .format (res )
11381187 return _np (res )
11391188
11401189
@@ -1154,7 +1203,8 @@ def this_dir(go_up=0, go_to=None, should_exist=False):
11541203def unwrap_file (path ):
11551204 if os .path .isdir (path ):
11561205 sub_paths = os .listdir (path )
1157- assert len (sub_paths ) == 1 , "there are more than one files/dirs in {}" .format (path )
1206+ assert len (
1207+ sub_paths ) == 1 , "there are more than one files/dirs in {}" .format (path )
11581208 return unwrap_file (jpath (path , sub_paths [0 ]))
11591209 return _np (path )
11601210
0 commit comments