|
2846 | 2846 | 0x9F: "\u0178",
|
2847 | 2847 | }
|
2848 | 2848 |
|
2849 |
| -encodings = { |
2850 |
| - '437': 'cp437', |
2851 |
| - '850': 'cp850', |
2852 |
| - '852': 'cp852', |
2853 |
| - '855': 'cp855', |
2854 |
| - '857': 'cp857', |
2855 |
| - '860': 'cp860', |
2856 |
| - '861': 'cp861', |
2857 |
| - '862': 'cp862', |
2858 |
| - '863': 'cp863', |
2859 |
| - '865': 'cp865', |
2860 |
| - '866': 'cp866', |
2861 |
| - '869': 'cp869', |
2862 |
| - 'ansix341968': 'ascii', |
2863 |
| - 'ansix341986': 'ascii', |
2864 |
| - 'arabic': 'iso8859-6', |
2865 |
| - 'ascii': 'ascii', |
2866 |
| - 'asmo708': 'iso8859-6', |
2867 |
| - 'big5': 'big5', |
2868 |
| - 'big5hkscs': 'big5hkscs', |
2869 |
| - 'chinese': 'gbk', |
2870 |
| - 'cp037': 'cp037', |
2871 |
| - 'cp1026': 'cp1026', |
2872 |
| - 'cp154': 'ptcp154', |
2873 |
| - 'cp367': 'ascii', |
2874 |
| - 'cp424': 'cp424', |
2875 |
| - 'cp437': 'cp437', |
2876 |
| - 'cp500': 'cp500', |
2877 |
| - 'cp775': 'cp775', |
2878 |
| - 'cp819': 'windows-1252', |
2879 |
| - 'cp850': 'cp850', |
2880 |
| - 'cp852': 'cp852', |
2881 |
| - 'cp855': 'cp855', |
2882 |
| - 'cp857': 'cp857', |
2883 |
| - 'cp860': 'cp860', |
2884 |
| - 'cp861': 'cp861', |
2885 |
| - 'cp862': 'cp862', |
2886 |
| - 'cp863': 'cp863', |
2887 |
| - 'cp864': 'cp864', |
2888 |
| - 'cp865': 'cp865', |
2889 |
| - 'cp866': 'cp866', |
2890 |
| - 'cp869': 'cp869', |
2891 |
| - 'cp936': 'gbk', |
2892 |
| - 'cpgr': 'cp869', |
2893 |
| - 'cpis': 'cp861', |
2894 |
| - 'csascii': 'ascii', |
2895 |
| - 'csbig5': 'big5', |
2896 |
| - 'cseuckr': 'cp949', |
2897 |
| - 'cseucpkdfmtjapanese': 'euc_jp', |
2898 |
| - 'csgb2312': 'gbk', |
2899 |
| - 'cshproman8': 'hp-roman8', |
2900 |
| - 'csibm037': 'cp037', |
2901 |
| - 'csibm1026': 'cp1026', |
2902 |
| - 'csibm424': 'cp424', |
2903 |
| - 'csibm500': 'cp500', |
2904 |
| - 'csibm855': 'cp855', |
2905 |
| - 'csibm857': 'cp857', |
2906 |
| - 'csibm860': 'cp860', |
2907 |
| - 'csibm861': 'cp861', |
2908 |
| - 'csibm863': 'cp863', |
2909 |
| - 'csibm864': 'cp864', |
2910 |
| - 'csibm865': 'cp865', |
2911 |
| - 'csibm866': 'cp866', |
2912 |
| - 'csibm869': 'cp869', |
2913 |
| - 'csiso2022jp': 'iso2022_jp', |
2914 |
| - 'csiso2022jp2': 'iso2022_jp_2', |
2915 |
| - 'csiso2022kr': 'iso2022_kr', |
2916 |
| - 'csiso58gb231280': 'gbk', |
2917 |
| - 'csisolatin1': 'windows-1252', |
2918 |
| - 'csisolatin2': 'iso8859-2', |
2919 |
| - 'csisolatin3': 'iso8859-3', |
2920 |
| - 'csisolatin4': 'iso8859-4', |
2921 |
| - 'csisolatin5': 'windows-1254', |
2922 |
| - 'csisolatin6': 'iso8859-10', |
2923 |
| - 'csisolatinarabic': 'iso8859-6', |
2924 |
| - 'csisolatincyrillic': 'iso8859-5', |
2925 |
| - 'csisolatingreek': 'iso8859-7', |
2926 |
| - 'csisolatinhebrew': 'iso8859-8', |
2927 |
| - 'cskoi8r': 'koi8-r', |
2928 |
| - 'csksc56011987': 'cp949', |
2929 |
| - 'cspc775baltic': 'cp775', |
2930 |
| - 'cspc850multilingual': 'cp850', |
2931 |
| - 'cspc862latinhebrew': 'cp862', |
2932 |
| - 'cspc8codepage437': 'cp437', |
2933 |
| - 'cspcp852': 'cp852', |
2934 |
| - 'csptcp154': 'ptcp154', |
2935 |
| - 'csshiftjis': 'shift_jis', |
2936 |
| - 'csunicode11utf7': 'utf-7', |
2937 |
| - 'cyrillic': 'iso8859-5', |
2938 |
| - 'cyrillicasian': 'ptcp154', |
2939 |
| - 'ebcdiccpbe': 'cp500', |
2940 |
| - 'ebcdiccpca': 'cp037', |
2941 |
| - 'ebcdiccpch': 'cp500', |
2942 |
| - 'ebcdiccphe': 'cp424', |
2943 |
| - 'ebcdiccpnl': 'cp037', |
2944 |
| - 'ebcdiccpus': 'cp037', |
2945 |
| - 'ebcdiccpwt': 'cp037', |
2946 |
| - 'ecma114': 'iso8859-6', |
2947 |
| - 'ecma118': 'iso8859-7', |
2948 |
| - 'elot928': 'iso8859-7', |
2949 |
| - 'eucjp': 'euc_jp', |
2950 |
| - 'euckr': 'cp949', |
2951 |
| - 'extendedunixcodepackedformatforjapanese': 'euc_jp', |
2952 |
| - 'gb18030': 'gb18030', |
2953 |
| - 'gb2312': 'gbk', |
2954 |
| - 'gb231280': 'gbk', |
2955 |
| - 'gbk': 'gbk', |
2956 |
| - 'greek': 'iso8859-7', |
2957 |
| - 'greek8': 'iso8859-7', |
2958 |
| - 'hebrew': 'iso8859-8', |
2959 |
| - 'hproman8': 'hp-roman8', |
2960 |
| - 'hzgb2312': 'hz', |
2961 |
| - 'ibm037': 'cp037', |
2962 |
| - 'ibm1026': 'cp1026', |
2963 |
| - 'ibm367': 'ascii', |
2964 |
| - 'ibm424': 'cp424', |
2965 |
| - 'ibm437': 'cp437', |
2966 |
| - 'ibm500': 'cp500', |
2967 |
| - 'ibm775': 'cp775', |
2968 |
| - 'ibm819': 'windows-1252', |
2969 |
| - 'ibm850': 'cp850', |
2970 |
| - 'ibm852': 'cp852', |
2971 |
| - 'ibm855': 'cp855', |
2972 |
| - 'ibm857': 'cp857', |
2973 |
| - 'ibm860': 'cp860', |
2974 |
| - 'ibm861': 'cp861', |
2975 |
| - 'ibm862': 'cp862', |
2976 |
| - 'ibm863': 'cp863', |
2977 |
| - 'ibm864': 'cp864', |
2978 |
| - 'ibm865': 'cp865', |
2979 |
| - 'ibm866': 'cp866', |
2980 |
| - 'ibm869': 'cp869', |
2981 |
| - 'iso2022jp': 'iso2022_jp', |
2982 |
| - 'iso2022jp2': 'iso2022_jp_2', |
2983 |
| - 'iso2022kr': 'iso2022_kr', |
2984 |
| - 'iso646irv1991': 'ascii', |
2985 |
| - 'iso646us': 'ascii', |
2986 |
| - 'iso88591': 'windows-1252', |
2987 |
| - 'iso885910': 'iso8859-10', |
2988 |
| - 'iso8859101992': 'iso8859-10', |
2989 |
| - 'iso885911987': 'windows-1252', |
2990 |
| - 'iso885913': 'iso8859-13', |
2991 |
| - 'iso885914': 'iso8859-14', |
2992 |
| - 'iso8859141998': 'iso8859-14', |
2993 |
| - 'iso885915': 'iso8859-15', |
2994 |
| - 'iso885916': 'iso8859-16', |
2995 |
| - 'iso8859162001': 'iso8859-16', |
2996 |
| - 'iso88592': 'iso8859-2', |
2997 |
| - 'iso885921987': 'iso8859-2', |
2998 |
| - 'iso88593': 'iso8859-3', |
2999 |
| - 'iso885931988': 'iso8859-3', |
3000 |
| - 'iso88594': 'iso8859-4', |
3001 |
| - 'iso885941988': 'iso8859-4', |
3002 |
| - 'iso88595': 'iso8859-5', |
3003 |
| - 'iso885951988': 'iso8859-5', |
3004 |
| - 'iso88596': 'iso8859-6', |
3005 |
| - 'iso885961987': 'iso8859-6', |
3006 |
| - 'iso88597': 'iso8859-7', |
3007 |
| - 'iso885971987': 'iso8859-7', |
3008 |
| - 'iso88598': 'iso8859-8', |
3009 |
| - 'iso885981988': 'iso8859-8', |
3010 |
| - 'iso88599': 'windows-1254', |
3011 |
| - 'iso885991989': 'windows-1254', |
3012 |
| - 'isoceltic': 'iso8859-14', |
3013 |
| - 'isoir100': 'windows-1252', |
3014 |
| - 'isoir101': 'iso8859-2', |
3015 |
| - 'isoir109': 'iso8859-3', |
3016 |
| - 'isoir110': 'iso8859-4', |
3017 |
| - 'isoir126': 'iso8859-7', |
3018 |
| - 'isoir127': 'iso8859-6', |
3019 |
| - 'isoir138': 'iso8859-8', |
3020 |
| - 'isoir144': 'iso8859-5', |
3021 |
| - 'isoir148': 'windows-1254', |
3022 |
| - 'isoir149': 'cp949', |
3023 |
| - 'isoir157': 'iso8859-10', |
3024 |
| - 'isoir199': 'iso8859-14', |
3025 |
| - 'isoir226': 'iso8859-16', |
3026 |
| - 'isoir58': 'gbk', |
3027 |
| - 'isoir6': 'ascii', |
3028 |
| - 'koi8r': 'koi8-r', |
3029 |
| - 'koi8u': 'koi8-u', |
3030 |
| - 'korean': 'cp949', |
3031 |
| - 'ksc5601': 'cp949', |
3032 |
| - 'ksc56011987': 'cp949', |
3033 |
| - 'ksc56011989': 'cp949', |
3034 |
| - 'l1': 'windows-1252', |
3035 |
| - 'l10': 'iso8859-16', |
3036 |
| - 'l2': 'iso8859-2', |
3037 |
| - 'l3': 'iso8859-3', |
3038 |
| - 'l4': 'iso8859-4', |
3039 |
| - 'l5': 'windows-1254', |
3040 |
| - 'l6': 'iso8859-10', |
3041 |
| - 'l8': 'iso8859-14', |
3042 |
| - 'latin1': 'windows-1252', |
3043 |
| - 'latin10': 'iso8859-16', |
3044 |
| - 'latin2': 'iso8859-2', |
3045 |
| - 'latin3': 'iso8859-3', |
3046 |
| - 'latin4': 'iso8859-4', |
3047 |
| - 'latin5': 'windows-1254', |
3048 |
| - 'latin6': 'iso8859-10', |
3049 |
| - 'latin8': 'iso8859-14', |
3050 |
| - 'latin9': 'iso8859-15', |
3051 |
| - 'ms936': 'gbk', |
3052 |
| - 'mskanji': 'shift_jis', |
3053 |
| - 'pt154': 'ptcp154', |
3054 |
| - 'ptcp154': 'ptcp154', |
3055 |
| - 'r8': 'hp-roman8', |
3056 |
| - 'roman8': 'hp-roman8', |
3057 |
| - 'shiftjis': 'shift_jis', |
3058 |
| - 'tis620': 'cp874', |
3059 |
| - 'unicode11utf7': 'utf-7', |
3060 |
| - 'us': 'ascii', |
3061 |
| - 'usascii': 'ascii', |
3062 |
| - 'utf16': 'utf-16', |
3063 |
| - 'utf16be': 'utf-16-be', |
3064 |
| - 'utf16le': 'utf-16-le', |
3065 |
| - 'utf8': 'utf-8', |
3066 |
| - 'windows1250': 'cp1250', |
3067 |
| - 'windows1251': 'cp1251', |
3068 |
| - 'windows1252': 'cp1252', |
3069 |
| - 'windows1253': 'cp1253', |
3070 |
| - 'windows1254': 'cp1254', |
3071 |
| - 'windows1255': 'cp1255', |
3072 |
| - 'windows1256': 'cp1256', |
3073 |
| - 'windows1257': 'cp1257', |
3074 |
| - 'windows1258': 'cp1258', |
3075 |
| - 'windows936': 'gbk', |
3076 |
| - 'x-x-big5': 'big5'} |
3077 |
| - |
3078 | 2849 | tokenTypes = {
|
3079 | 2850 | "Doctype": 0,
|
3080 | 2851 | "Characters": 1,
|
|
0 commit comments