13
13
import sys
14
14
import numpy
15
15
import threading
16
+ import re
16
17
17
18
is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE
18
19
from numexpr import interpreter , expressions , use_vml
@@ -259,10 +260,17 @@ def __init__(self, astnode):
259
260
def __str__ (self ):
260
261
return 'Immediate(%d)' % (self .node .value ,)
261
262
262
-
263
+ _forbidden_re = re . compile ( '[\;[\:]|__' )
263
264
def stringToExpression (s , types , context ):
264
265
"""Given a string, convert it to a tree of ExpressionNode's.
265
266
"""
267
+ # sanitize the string for obvious attack vectors that NumExpr cannot
268
+ # parse into its homebrew AST. This is to protect the call to `eval` below.
269
+ # We forbid `;`, `:`. `[` and `__`
270
+ # We would like to forbid `.` but it is both a reference and decimal point.
271
+ if _forbidden_re .search (s ) is not None :
272
+ raise ValueError (f'Expression { s } has forbidden control characters.' )
273
+
266
274
old_ctx = expressions ._context .get_current_context ()
267
275
try :
268
276
expressions ._context .set_new_context (context )
@@ -285,8 +293,10 @@ def stringToExpression(s, types, context):
285
293
t = types .get (name , default_type )
286
294
names [name ] = expressions .VariableNode (name , type_to_kind [t ])
287
295
names .update (expressions .functions )
296
+
288
297
# now build the expression
289
298
ex = eval (c , names )
299
+
290
300
if expressions .isConstant (ex ):
291
301
ex = expressions .ConstantNode (ex , expressions .getKind (ex ))
292
302
elif not isinstance (ex , expressions .ExpressionNode ):
@@ -611,9 +621,7 @@ def NumExpr(ex, signature=(), **kwargs):
611
621
612
622
Returns a `NumExpr` object containing the compiled function.
613
623
"""
614
- # NumExpr can be called either directly by the end-user, in which case
615
- # kwargs need to be sanitized by getContext, or by evaluate,
616
- # in which case kwargs are in already sanitized.
624
+
617
625
# In that case _frame_depth is wrong (it should be 2) but it doesn't matter
618
626
# since it will not be used (because truediv='auto' has already been
619
627
# translated to either True or False).
@@ -758,7 +766,7 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2):
758
766
_names_cache = CacheDict (256 )
759
767
_numexpr_cache = CacheDict (256 )
760
768
_numexpr_last = {}
761
-
769
+ _numexpr_sanity = set ()
762
770
evaluate_lock = threading .Lock ()
763
771
764
772
# MAYBE: decorate this function to add attributes instead of having the
@@ -861,7 +869,7 @@ def evaluate(ex: str,
861
869
out : numpy .ndarray = None ,
862
870
order : str = 'K' ,
863
871
casting : str = 'safe' ,
864
- _frame_depth : int = 3 ,
872
+ _frame_depth : int = 3 ,
865
873
** kwargs ) -> numpy .ndarray :
866
874
"""
867
875
Evaluate a simple array expression element-wise using the virtual machine.
@@ -909,6 +917,8 @@ def evaluate(ex: str,
909
917
_frame_depth: int
910
918
The calling frame depth. Unless you are a NumExpr developer you should
911
919
not set this value.
920
+
921
+
912
922
"""
913
923
# We could avoid code duplication if we called validate and then re_evaluate
914
924
# here, but they we have difficulties with the `sys.getframe(2)` call in
@@ -921,10 +931,6 @@ def evaluate(ex: str,
921
931
else :
922
932
raise e
923
933
924
-
925
-
926
-
927
-
928
934
def re_evaluate (local_dict : Optional [Dict ] = None ,
929
935
_frame_depth : int = 2 ) -> numpy .ndarray :
930
936
"""
0 commit comments