1
1
/*
2
- * Copyright (c) 2018, 2023 , Oracle and/or its affiliates. All rights reserved.
2
+ * Copyright (c) 2018, 2024 , Oracle and/or its affiliates. All rights reserved.
3
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
4
*
5
5
* The Universal Permissive License (UPL), Version 1.0
43
43
import static com .oracle .graal .python .runtime .exception .PythonErrorType .ValueError ;
44
44
import static com .oracle .graal .python .util .PythonUtils .TS_ENCODING ;
45
45
46
- import java .text .Normalizer ;
47
46
import java .util .List ;
48
47
49
48
import org .graalvm .shadowed .com .ibm .icu .lang .UCharacter ;
50
49
import org .graalvm .shadowed .com .ibm .icu .lang .UProperty ;
50
+ import org .graalvm .shadowed .com .ibm .icu .text .Normalizer2 ;
51
+ import org .graalvm .shadowed .com .ibm .icu .util .VersionInfo ;
51
52
52
53
import com .oracle .graal .python .annotations .ArgumentClinic ;
53
54
import com .oracle .graal .python .builtins .Builtin ;
64
65
import com .oracle .truffle .api .CompilerDirectives .TruffleBoundary ;
65
66
import com .oracle .truffle .api .dsl .Bind ;
66
67
import com .oracle .truffle .api .dsl .Cached ;
68
+ import com .oracle .truffle .api .dsl .Cached .Exclusive ;
67
69
import com .oracle .truffle .api .dsl .GenerateNodeFactory ;
70
+ import com .oracle .truffle .api .dsl .ImportStatic ;
68
71
import com .oracle .truffle .api .dsl .NodeFactory ;
69
72
import com .oracle .truffle .api .dsl .Specialization ;
70
73
import com .oracle .truffle .api .nodes .Node ;
@@ -78,90 +81,10 @@ protected List<? extends NodeFactory<? extends PythonBuiltinBaseNode>> getNodeFa
78
81
}
79
82
80
83
public static String getUnicodeVersion () {
81
-
82
- // Preliminary Unicode 11 data obtained from
83
- // <https://www.unicode.org/Public/11.0.0/ucd/DerivedAge-11.0.0d13.txt>.
84
- if (Character .getType ('\u0560' ) != Character .UNASSIGNED ) {
85
- return "11.0.0" ; // 11.0, June 2018.
86
- }
87
-
88
- if (Character .getType ('\u0860' ) != Character .UNASSIGNED ) {
89
- return "10.0.0" ; // 10.0, June 2017.
90
- }
91
-
92
- if (Character .getType ('\u08b6' ) != Character .UNASSIGNED ) {
93
- return "9.0.0" ; // 9.0, June 2016.
94
- }
95
-
96
- if (Character .getType ('\u08b3' ) != Character .UNASSIGNED ) {
97
- return "8.0.0" ; // 8.0, June 2015.
98
- }
99
-
100
- if (Character .getType ('\u037f' ) != Character .UNASSIGNED ) {
101
- return "7.0.0" ; // 7.0, June 2014.
102
- }
103
-
104
- if (Character .getType ('\u061c' ) != Character .UNASSIGNED ) {
105
- return "6.3.0" ; // 6.3, September 2013.
106
- }
107
-
108
- if (Character .getType ('\u20ba' ) != Character .UNASSIGNED ) {
109
- return "6.2.0" ; // 6.2, September 2012.
110
- }
111
-
112
- if (Character .getType ('\u058f' ) != Character .UNASSIGNED ) {
113
- return "6.1.0" ; // 6.1, January 2012.
114
- }
115
-
116
- if (Character .getType ('\u0526' ) != Character .UNASSIGNED ) {
117
- return "6.0.0" ; // 6.0, October 2010.
118
- }
119
-
120
- if (Character .getType ('\u0524' ) != Character .UNASSIGNED ) {
121
- return "5.2.0" ; // 5.2, October 2009.
122
- }
123
-
124
- if (Character .getType ('\u0370' ) != Character .UNASSIGNED ) {
125
- return "5.1.0" ; // 5.1, March 2008.
126
- }
127
-
128
- if (Character .getType ('\u0242' ) != Character .UNASSIGNED ) {
129
- return "5.0.0" ; // 5.0, July 2006.
130
- }
131
-
132
- if (Character .getType ('\u0237' ) != Character .UNASSIGNED ) {
133
- return "4.1.0" ; // 4.1, March 2005.
134
- }
135
-
136
- if (Character .getType ('\u0221' ) != Character .UNASSIGNED ) {
137
- return "4.0.0" ; // 4.0, April 2003.
138
- }
139
-
140
- if (Character .getType ('\u0220' ) != Character .UNASSIGNED ) {
141
- return "3.2.0" ; // 3.2, March 2002.
142
- }
143
-
144
- if (Character .getType ('\u03f4' ) != Character .UNASSIGNED ) {
145
- return "3.1.0" ; // 3.1, March 2001.
146
- }
147
-
148
- if (Character .getType ('\u01f6' ) != Character .UNASSIGNED ) {
149
- return "3.0.0" ; // 3.0, September 1999.
150
- }
151
-
152
- if (Character .getType ('\u20ac' ) != Character .UNASSIGNED ) {
153
- return "2.1.0" ; // 2.1, May 1998.
154
- }
155
-
156
- if (Character .getType ('\u0591' ) != Character .UNASSIGNED ) {
157
- return "2.0.0" ; // 2.0, July 1996.
158
- }
159
-
160
- if (Character .getType ('\u0000' ) != Character .UNASSIGNED ) {
161
- return "1.1.0" ; // 1.1, June 1993.
162
- }
163
-
164
- return "1.0.0" ; // 1.0
84
+ VersionInfo version = UCharacter .getUnicodeVersion ();
85
+ return Integer .toString (version .getMajor ()) + '.' +
86
+ version .getMinor () + '.' +
87
+ version .getMicro ();
165
88
}
166
89
167
90
/**
@@ -186,39 +109,44 @@ public void initialize(Python3Core core) {
186
109
addBuiltinConstant ("unidata_version" , getUnicodeVersion ());
187
110
}
188
111
112
+ static final int NORMALIZER_FORM_COUNT = 4 ;
113
+
114
+ @ TruffleBoundary
115
+ static Normalizer2 getNormalizer (TruffleString form ) {
116
+ return switch (form .toJavaStringUncached ()) {
117
+ case "NFC" -> Normalizer2 .getNFCInstance ();
118
+ case "NFKC" -> Normalizer2 .getNFKCInstance ();
119
+ case "NFD" -> Normalizer2 .getNFDInstance ();
120
+ case "NFKD" -> Normalizer2 .getNFKDInstance ();
121
+ default -> null ;
122
+ };
123
+ }
124
+
189
125
// unicodedata.normalize(form, unistr)
190
126
@ Builtin (name = "normalize" , minNumOfPositionalArgs = 2 , parameterNames = {"form" , "unistr" })
191
127
@ ArgumentClinic (name = "form" , conversion = ArgumentClinic .ClinicConversion .TString )
192
128
@ ArgumentClinic (name = "unistr" , conversion = ArgumentClinic .ClinicConversion .TString )
193
129
@ GenerateNodeFactory
130
+ @ ImportStatic (UnicodeDataModuleBuiltins .class )
194
131
public abstract static class NormalizeNode extends PythonBinaryClinicBuiltinNode {
195
- @ TruffleBoundary
196
- protected Normalizer .Form getForm (TruffleString form ) {
197
- try {
198
- return Normalizer .Form .valueOf (form .toJavaStringUncached ());
199
- } catch (IllegalArgumentException e ) {
200
- return null ;
201
- }
202
- }
203
-
204
- @ Specialization (guards = {"stringEquals(form, cachedForm, equalNode)" }, limit = "4" )
132
+ @ Specialization (guards = {"cachedNormalizer != null" , "stringEquals(form, cachedForm, equalNode)" }, limit = "NORMALIZER_FORM_COUNT" )
205
133
static TruffleString normalize (@ SuppressWarnings ("unused" ) TruffleString form , TruffleString unistr ,
206
- @ Bind ("this" ) Node inliningTarget ,
207
134
@ SuppressWarnings ("unused" ) @ Cached ("form" ) TruffleString cachedForm ,
208
- @ Cached ("getForm (cachedForm)" ) Normalizer . Form cachedNormForm ,
135
+ @ Cached ("getNormalizer (cachedForm)" ) Normalizer2 cachedNormalizer ,
209
136
@ SuppressWarnings ("unused" ) @ Cached TruffleString .EqualNode equalNode ,
210
137
@ Cached TruffleString .ToJavaStringNode toJavaStringNode ,
211
- @ Cached TruffleString .FromJavaStringNode fromJavaStringNode ,
212
- @ Cached PRaiseNode .Lazy raiseNode ) {
213
- if (cachedNormForm == null ) {
214
- throw raiseNode .get (inliningTarget ).raise (ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
215
- }
216
- return fromJavaStringNode .execute (normalize (toJavaStringNode .execute (unistr ), cachedNormForm ), TS_ENCODING );
138
+ @ Exclusive @ Cached TruffleString .FromJavaStringNode fromJavaStringNode ) {
139
+ return fromJavaStringNode .execute (normalize (toJavaStringNode .execute (unistr ), cachedNormalizer ), TS_ENCODING );
140
+ }
141
+
142
+ @ Specialization (guards = "getNormalizer(form) == null" )
143
+ TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ) {
144
+ throw PRaiseNode .raiseUncached (this , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
217
145
}
218
146
219
147
@ TruffleBoundary
220
- private static String normalize (String str , Normalizer . Form normForm ) {
221
- return Normalizer .normalize (str , normForm );
148
+ private static String normalize (String str , Normalizer2 normalizer ) {
149
+ return normalizer .normalize (str );
222
150
}
223
151
224
152
@ Override
@@ -232,26 +160,20 @@ protected ArgumentClinicProvider getArgumentClinic() {
232
160
@ ArgumentClinic (name = "form" , conversion = ArgumentClinic .ClinicConversion .TString )
233
161
@ ArgumentClinic (name = "unistr" , conversion = ArgumentClinic .ClinicConversion .TString )
234
162
@ GenerateNodeFactory
163
+ @ ImportStatic (UnicodeDataModuleBuiltins .class )
235
164
public abstract static class IsNormalizedNode extends PythonBinaryClinicBuiltinNode {
236
- @ TruffleBoundary
237
- protected Normalizer .Form getForm (TruffleString form ) {
238
- try {
239
- return Normalizer .Form .valueOf (form .toJavaStringUncached ());
240
- } catch (IllegalArgumentException e ) {
241
- return null ;
242
- }
243
- }
244
-
245
- @ Specialization (guards = {"stringEquals(form, cachedForm, equalNode)" }, limit = "4" )
165
+ @ Specialization (guards = {"cachedNormalizer != null" , "stringEquals(form, cachedForm, equalNode)" }, limit = "NORMALIZER_FORM_COUNT" )
246
166
@ TruffleBoundary
247
167
boolean isNormalized (@ SuppressWarnings ("unused" ) TruffleString form , TruffleString unistr ,
248
168
@ SuppressWarnings ("unused" ) @ Cached ("form" ) TruffleString cachedForm ,
249
- @ Cached ("getForm (cachedForm)" ) Normalizer . Form cachedNormForm ,
169
+ @ Cached ("getNormalizer (cachedForm)" ) Normalizer2 cachedNormalizer ,
250
170
@ SuppressWarnings ("unused" ) @ Cached TruffleString .EqualNode equalNode ) {
251
- if (cachedNormForm == null ) {
252
- throw PRaiseNode .raiseUncached (this , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
253
- }
254
- return Normalizer .isNormalized (unistr .toJavaStringUncached (), cachedNormForm );
171
+ return cachedNormalizer .isNormalized (unistr .toJavaStringUncached ());
172
+ }
173
+
174
+ @ Specialization (guards = "getNormalizer(form) == null" )
175
+ TruffleString invalidForm (@ SuppressWarnings ("unused" ) TruffleString form , @ SuppressWarnings ("unused" ) TruffleString unistr ) {
176
+ throw PRaiseNode .raiseUncached (this , ValueError , ErrorMessages .INVALID_NORMALIZATION_FORM );
255
177
}
256
178
257
179
@ Override
0 commit comments