77import re
88import sys
99from encodings .aliases import aliases as ALIASES
10- from functools import reduce , wraps
10+ from functools import reduce , update_wrapper , wraps
1111from importlib import import_module
1212from inspect import currentframe
1313from itertools import chain , product
3939 "remove" , "reset" , "s2i" , "search" , "stopfunc" , "BytesIO" , "_input" , "_stripl" , "CodecMacro" ,
4040 "DARWIN" , "LANG" , "LINUX" , "MASKS" , "PY3" , "UNIX" , "WINDOWS" ]
4141CODECS_REGISTRY = None
42+ CODECS_OVERWRITTEN = []
4243CODECS_CATEGORIES = ["native" , "custom" ]
43- LANG = getlocale ()[0 ][:2 ].lower () if getlocale () else None
44+ LANG = getlocale ()
45+ if LANG :
46+ LANG = (LANG [0 ] or "" )[:2 ].lower ()
4447MASKS = {
4548 'a' : printable ,
4649 'b' : "" .join (chr (i ) for i in range (256 )),
@@ -142,6 +145,20 @@ def __repr__(self):
142145 return "<codext.CodecMacro object for encoding %s at %#x>" % (self .name , id (self ))
143146
144147
148+ # inspired from: https://stackoverflow.com/questions/10875442/possible-to-change-a-functions-repr-in-python
149+ class Repr (object ):
150+ def __init__ (self , name , func ):
151+ self .__name = name
152+ self .__func = func
153+ update_wrapper (self , func )
154+
155+ def __call__ (self , * args , ** kwargs ):
156+ return self .__func (* args , ** kwargs )
157+
158+ def __repr__ (self ):
159+ return "<search-function %s at 0x%x>" % (self .__name , id (self ))
160+
161+
145162def __stdin_pipe ():
146163 """ Stdin pipe read function. """
147164 try :
@@ -173,6 +190,12 @@ def _stripl(s, st_lines, st_crlf):
173190 return s
174191
175192
193+ def _with_repr (name ):
194+ def _wrapper (f ):
195+ return Repr (name , f )
196+ return _wrapper
197+
198+
176199def add (ename , encode = None , decode = None , pattern = None , text = True , add_to_codecs = False , ** kwargs ):
177200 """ This adds a new codec to the codecs module setting its encode and/or decode functions, eventually dynamically
178201 naming the encoding with a pattern and with file handling.
@@ -195,6 +218,7 @@ def add(ename, encode=None, decode=None, pattern=None, text=True, add_to_codecs=
195218 raise ValueError ("At least one en/decoding function must be defined" )
196219 glob = currentframe ().f_back .f_globals
197220 # search function for the new encoding
221+ @_with_repr (ename )
198222 def getregentry (encoding ):
199223 if encoding != ename and not (pattern and re .match (pattern , encoding )):
200224 return
@@ -304,6 +328,7 @@ class StreamReader(Codec, codecs.StreamReader):
304328 getregentry .__aliases__ = list (map (lambda n : re .sub (r"[\s\-]" , "_" , n ), kwargs ['aliases' ]))
305329 getregentry .__pattern__ = pattern
306330 register (getregentry , add_to_codecs )
331+ return getregentry
307332
308333
309334def add_macro (mname , * encodings ):
@@ -500,7 +525,7 @@ def __get_value(token, position, case_changed=False):
500525 return __get_value (token_inv_case , position , True )
501526 return error_func (token , position )
502527 if isinstance (result , list ):
503- result = random . choice ( result )
528+ result = result [ 0 ]
504529 return result + lsep
505530
506531 # if a separator is defined, rely on it by splitting the input text
@@ -567,7 +592,7 @@ def __get_value(token, position, case_changed=False):
567592 kwargs ['repl_minlen_b' ] = max (1 , min (map (len , map (b , set (smapdict .values ()) - {'' }))))
568593 except :
569594 pass
570- add (ename , __generic_code (), __generic_code (True ), ** kwargs )
595+ return add (ename , __generic_code (), __generic_code (True ), ** kwargs )
571596codecs .add_map = add_map
572597
573598
@@ -651,17 +676,15 @@ def list_encodings(*categories):
651676 if (len (categories ) == 0 or "native" in categories ) and "native" not in exclude :
652677 for a in set (ALIASES .values ()):
653678 try :
654- __orig_lookup (a )
679+ ci = __orig_lookup (a )
655680 except LookupError :
656681 continue
657- enc .append (a )
658- for search_function in __codecs_registry :
682+ if lookup (a ) is ci :
683+ enc .append (ci .name )
684+ for search_function in CODECS_OVERWRITTEN + __codecs_registry :
659685 name = search_function .__name__ .replace ("_" , "-" )
660686 p = search_function .__pattern__
661- if p is None :
662- ci = search_function (name )
663- else :
664- ci = search_function (generate_string_from_regex (p ))
687+ ci = search_function (name ) if p is None else search_function (generate_string_from_regex (p ))
665688 c = "other" if ci is None else ci .parameters ['category' ]
666689 if (len (categories ) == 0 or c in categories ) and c not in exclude :
667690 enc .append (name )
@@ -834,8 +857,9 @@ def _handle_error(token, position, output=""):
834857__orig_register = _codecs .register
835858
836859
837- def __add (ename , encode = None , decode = None , pattern = None , text = True , add_to_codecs = True ):
838- add (ename , encode , decode , pattern , text , add_to_codecs )
860+ def __add (ename , encode = None , decode = None , pattern = None , text = True , ** kwargs ):
861+ kwargs .pop ('add_to_codecs' , None )
862+ return add (ename , encode , decode , pattern , text , True , ** kwargs )
839863__add .__doc__ = add .__doc__
840864codecs .add = __add
841865
@@ -862,19 +886,19 @@ def encode(obj, encoding='utf-8', errors='strict'):
862886def lookup (encoding , macro = True ):
863887 """ Hooked lookup function for searching first for codecs in the local registry of this module. """
864888 # first, try to match the given encoding with codecs' search functions
865- for search_function in __codecs_registry :
889+ for search_function in CODECS_OVERWRITTEN + __codecs_registry :
866890 codecinfo = search_function (encoding )
867891 if codecinfo is not None :
868892 return codecinfo
869893 # then, if a codec name was given, generate an encoding name from its pattern and get the CodecInfo
870- for search_function in __codecs_registry :
894+ for search_function in CODECS_OVERWRITTEN + __codecs_registry :
871895 if search_function .__name__ .replace ("_" , "-" ) == encoding or \
872896 encoding in getattr (search_function , "__aliases__" , []):
873897 codecinfo = search_function (generate_string_from_regex (search_function .__pattern__ ))
874898 if codecinfo is not None :
875899 return codecinfo
900+ # finally, get a CodecInfo with the original lookup function and refine it with a dictionary of parameters
876901 try :
877- # finally, get a CodecInfo with the original lookup function and refine it with a dictionary of parameters
878902 ci = __orig_lookup (encoding )
879903 ci .parameters = {'category' : "native" , 'module' : "codecs" , 'name' : ALIASES .get (ci .name , ci .name )}
880904 return ci
@@ -898,14 +922,19 @@ def register(search_function, add_to_codecs=False):
898922 to remove the codec later
899923 """
900924 if search_function not in __codecs_registry :
901- __codecs_registry .append (search_function )
925+ try :
926+ __orig_lookup (search_function .__name__ )
927+ l = CODECS_OVERWRITTEN
928+ except LookupError :
929+ l = __codecs_registry
930+ l .append (search_function )
902931 if add_to_codecs :
903932 __orig_register (search_function )
904933
905934
906- def __register (search_function , add_to_codecs = True ):
935+ def __register (search_function ):
907936 """ Same as register(...), but with add_to_codecs set by default to True. """
908- register (search_function , add_to_codecs )
937+ register (search_function , True )
909938codecs .register = __register
910939
911940
0 commit comments