-
Notifications
You must be signed in to change notification settings - Fork 28
Regex support in Python→Laurel translation #623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
333a800
8736f54
b868c65
caa351a
e81c9e8
2376558
c5156e4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,7 +48,8 @@ datatype Error () { | |
| AssertionError (Assertion_msg : string), | ||
| UnimplementedError (Unimplement_msg : string), | ||
| UndefinedError (Undefined_msg : string), | ||
| IndexError (IndexError_msg : string) | ||
| IndexError (IndexError_msg : string), | ||
| RePatternError (Re_msg : string) | ||
| }; | ||
|
|
||
| // ///////////////////////////////////////////////////////////////////////////////////// | ||
|
|
@@ -93,12 +94,103 @@ datatype DictStrAny () { | |
| DictStrAny_cons (key: string, val: Any, tail: DictStrAny) | ||
| }; | ||
|
|
||
| // Forward declarations: needed so the inline functions after CoreOnlyDelimiter | ||
| // can reference these during DDM parsing. Filtered out at merge (precede the | ||
| // sentinel). The real definitions with concreteEval are supplied by ReFactory | ||
| // at verification time. | ||
| function re_fullmatch_str(pattern : string) : regex; | ||
| function re_match_str(pattern : string) : regex; | ||
| function re_search_str(pattern : string) : regex; | ||
| function re_pattern_error(pattern : string) : Error; | ||
|
|
||
| type CoreOnlyDelimiter; | ||
|
|
||
| // ===================================================================== | ||
| // Core-only declarations (not expressed in Laurel) | ||
| // ===================================================================== | ||
|
|
||
| // ///////////////////////////////////////////////////////////////////////////////////// | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you tried putting all of this in the Laurel prelude instead of the Core one?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The initial few functions may have to stay in Core because they call the SMTLIB regex functions, but the functions that call them might be movable to Laurel. Let me look into that...
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Managed to move them to the Laurel prelude, but getting all asserts with regexes going from pass -> unknown b/c those functions are no longer marked inline. Any existing way to do that in Laurel?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not right now. I think I will change it so Laurel will just mark everything inline when using the Python front-end.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it - is that something you can do on Monday (tomorrow)? Otherwise I could sneak that into this PR |
||
| // Regex support | ||
| // | ||
| // Python signatures: | ||
| // re.compile(pattern: str) -> re.Pattern | ||
| // re.match(pattern: str | re.Pattern, string: str) -> re.Match | None | ||
| // re.search(pattern: str | re.Pattern, string: str) -> re.Match | None | ||
| // re.fullmatch(pattern: str | re.Pattern, string: str) -> re.Match | None | ||
| // | ||
| // Architecture: | ||
| // | ||
| // re.compile is a semantic no-op — it returns the pattern string unchanged. | ||
| // The mode-specific factory functions re_fullmatch_str, re_match_str, | ||
| // re_search_str each compile a pattern string to a regex with the correct | ||
| // MatchMode (via pythonRegexToCore), so anchors (^/$) are handled properly. | ||
| // Their concreteEval fires when the pattern is a string literal. | ||
| // | ||
| // The _bool helpers call the mode-specific factories, so there is a single | ||
| // source of truth for mode-specific compilation. | ||
| // | ||
| // On match, we return a from_ClassInstance wrapping a concrete re_Match | ||
| // with pos=0 and endpos=str.len(s), which is sound for the module-level | ||
| // API (no pos/endpos parameters). | ||
| // ///////////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| // Mode-specific factory functions are declared via ReFactory (with concreteEval | ||
| // for literal pattern expansion), not in this prelude, to avoid duplicate | ||
| // definitions. | ||
|
|
||
| inline function re_fullmatch_bool(pattern : string, s : string) : bool { | ||
| str.in.re(s, re_fullmatch_str(pattern)) | ||
| } | ||
| inline function re_match_bool(pattern : string, s : string) : bool { | ||
| str.in.re(s, re_match_str(pattern)) | ||
| } | ||
| inline function re_search_bool(pattern : string, s : string) : bool { | ||
| str.in.re(s, re_search_str(pattern)) | ||
| } | ||
|
|
||
| inline function mk_re_Match(s : string) : Any { | ||
| from_ClassInstance("re_Match", | ||
| DictStrAny_cons("re_match_string", from_string(s), | ||
| DictStrAny_cons("re_match_pos", from_int(0), | ||
| DictStrAny_cons("re_match_endpos", from_int(str.len(s)), | ||
| DictStrAny_empty())))) | ||
| } | ||
|
|
||
| // re.compile is a no-op: returns the pattern string unchanged. | ||
| inline function re_compile(pattern : Any) : Any | ||
| requires Any..isfrom_string(pattern); | ||
| { | ||
| pattern | ||
| } | ||
|
|
||
| inline function re_fullmatch(pattern : Any, s : Any) : Any | ||
| requires Any..isfrom_string(pattern) && Any..isfrom_string(s); | ||
| { | ||
| if Error..isRePatternError(re_pattern_error(Any..as_string!(pattern))) | ||
| then exception(re_pattern_error(Any..as_string!(pattern))) | ||
| else if re_fullmatch_bool(Any..as_string!(pattern), Any..as_string!(s)) | ||
| then mk_re_Match(Any..as_string!(s)) | ||
| else from_none() | ||
| } | ||
| inline function re_match(pattern : Any, s : Any) : Any | ||
| requires Any..isfrom_string(pattern) && Any..isfrom_string(s); | ||
| { | ||
| if Error..isRePatternError(re_pattern_error(Any..as_string!(pattern))) | ||
| then exception(re_pattern_error(Any..as_string!(pattern))) | ||
| else if re_match_bool(Any..as_string!(pattern), Any..as_string!(s)) | ||
| then mk_re_Match(Any..as_string!(s)) | ||
| else from_none() | ||
| } | ||
| inline function re_search(pattern : Any, s : Any) : Any | ||
| requires Any..isfrom_string(pattern) && Any..isfrom_string(s); | ||
| { | ||
| if Error..isRePatternError(re_pattern_error(Any..as_string!(pattern))) | ||
| then exception(re_pattern_error(Any..as_string!(pattern))) | ||
| else if re_search_bool(Any..as_string!(pattern), Any..as_string!(s)) | ||
| then mk_re_Match(Any..as_string!(s)) | ||
| else from_none() | ||
| } | ||
|
|
||
| // ///////////////////////////////////////////////////////////////////////////////////// | ||
| //Functions that we provide to Python user | ||
| //to write assertions/contracts about about types of variables | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is incorrect. Use
.TCore "regex"instead of a.Userdefined "regex"type, so you don't need this change.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that comment is LLM nonsense; I'll just follow the convention of the other builtin Laurel types that map to Core types by DDM ops for them.