-
Notifications
You must be signed in to change notification settings - Fork 39
Expand file tree
/
Copy pathEncoder.lean
More file actions
358 lines (304 loc) · 15.2 KB
/
Encoder.lean
File metadata and controls
358 lines (304 loc) · 15.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
/-
Copyright Strata Contributors
SPDX-License-Identifier: Apache-2.0 OR MIT
-/
module
public import Strata.DL.SMT.DDMTransform.Translate
public import Strata.DL.SMT.Factory
public import Strata.DL.SMT.Op
public import Strata.DL.SMT.Solver
public import Strata.DL.SMT.Term
public import Strata.DL.SMT.TermType
import Std.Data.HashMap
/-!
Based on Cedar's Term language.
(https://github.com/cedar-policy/cedar-spec/blob/main/cedar-lean/Cedar/SymCC/Encoder.lean)
This file defines the encoder, which translates a list of boolean Terms
into a list of SMT assertions. Term encoding is trusted.
## Architecture
The encoding pipeline has two layers:
1. **Solver layer** (`SolverM`): A stateful monad that wraps the solver process
and caches `Term → SMT-LIB string` and `TermType → SMT-LIB string`
conversions. All string formatting lives in the Solver layer.
2. **Encoder layer** (`EncoderM`): Sits on top of `SolverM` and manages
A-normal form decomposition purely in the `Term` domain:
- **Term → abbreviated Term cache** (`terms`): Maps each `Term` to its
abbreviated `Term.var` reference (e.g., a variable named `$__t.0`, `$__t.1`).
Large terms are broken into small `define-fun` definitions with short
names, and the Solver handles all `Term → String` conversion.
- **UF → abbreviated name cache** (`ufs`): Maps uninterpreted functions to
their abbreviated identifiers (e.g., `$__f.0`, `$__f.1`).
The Encoder works purely with `Term` values. The `SolverM` layer handles all
string conversion and caching when emitting commands.
We will use the following type representations for primitive types:
* `TermType.bool`: builtin SMT `Bool` type
* `TermType.int`: builtin SMT `Int` type
* `TermType.string`: builtin SMT `String` type
* `TermType.regex`: builtin SMT `RegLan` type
* `TermType.bitvec n`: builtin SMT `(_ BitVec n)` type
We will represent non-primitive types as SMT algebraic datypes:
* `TermType.option T`: a parameterized SMT algebraic datatype of the same name,
and with the constructors `(some (val T))` and `(none)`. For each constructor
argument, SMTLib introduces a corresponding (total) selector function. We
will translate `Term.some` nodes in the Term language as applications of the
`val` selector function.
Similarly to types and attributes, all uninterpreted functions, variables, and
Terms are mapped to their SMT encoding that conforms to the SMTLib syntax. We
keep track of these mappings to ensure that each Term construct is translated
to its SMT encoding exactly once. This translation invariant is necessary for
correctness in the case of UF names and variable
names; and it is neccessary for compactness in the case of terms. In
particular, the resulting SMT encoding will be in A-normal form (ANF): the body
of every s-expression in the encoding consists of atomic subterms (identifiers
or literals).
-/
namespace Strata.SMT
open Solver
public section
structure EncoderState where
/-- Maps a `Term` to its abbreviated `Term` (a `Term.var` with name like `$__t.0`).
This is a cache after converting terms to A-Normal Form. -/
terms : Std.HashMap Term Term
/-- Maps a `UF` to its abbreviated SMT identifier (e.g., `$__f.0`, `$__f.1`). -/
ufs : Std.HashMap UF String
def EncoderState.init : EncoderState where
terms := {}
ufs := {}
@[expose] abbrev EncoderM (α) := StateT EncoderState SolverM α
namespace Encoder
/-- SMT-LIB reserved keywords that should not be used as variable names.
Includes command names, logical connectives, sort names, and theory
function symbols that cvc5 disallows shadowing. -/
def smtReservedKeywords : List String :=
-- SMT-LIB reserved words from the DDM parser
let parserKeywords := _root_.Strata.reservedKeywords.map (·.2)
-- Additional keywords not in the parser list
parserKeywords ++
["true", "false", "Int", "Bool", "Real", "Array", "BitVec",
-- Core theory symbols
"abs", "and", "distinct", "/", "=", ">", ">=", "ite", "=>",
"div", "is_int", "<", "<=", "-", "mod", "*", "not", "or", "+",
"to_int", "to_real", "xor",
-- String theory symbols
"str.at", "str.++", "str.contains", "str.from_code", "str.from_int",
"str.in_re", "str.indexof", "str.is_digit", "str.<=", "str.len",
"str.<", "str.prefixof", "str.replace", "str.substr", "str.suffixof",
"str.to_code", "str.to_int", "str.to_re",
-- Regex theory symbols
"re.*", "re.+", "re.opt", "re.++", "re.union", "re.inter", "re.diff",
"re.comp", "re.loop", "re.^", "re.range", "re.none", "re.all",
"re.allchar",
-- Array theory symbols
"select", "store"]
/-- Generate a disambiguated name by appending @suffix -/
def disambiguateName (baseName : String) (suffix : Nat) : String :=
s!"{baseName}@{suffix}"
/-- Parse a list of digit characters as a natural number. -/
def digitsToNat (cs : List Char) : Nat :=
cs.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0
/-- Break a potentially disambiguated name into its base name and next suffix.
If the name has an `@N` suffix, returns `(base, N + 1)`.
Otherwise returns `(name, 1)`. -/
def breakDisambiguatedName (name : String) : String × Nat :=
let cs := name.toList
let digitSuffix := cs.reverse.takeWhile Char.isDigit |>.reverse
let rest := cs.reverse.dropWhile Char.isDigit |>.reverse
match rest.reverse, digitSuffix with
| '@' :: _, _ :: _ => (String.ofList rest.dropLast, digitsToNat digitSuffix + 1)
| _, _ => (name, 1)
/-- Find a unique name by trying candidates with increasing suffixes.
The `isUsed` predicate checks if a candidate name is already taken. -/
def findUniqueName (baseName : String) (startSuffix : Nat) (isUsed : String → Bool) (limit : Nat := 1000) : String :=
let rec loop (candidate : String) (suffix : Nat) (remaining : Nat) : String :=
if h : remaining == 0 then candidate -- Fallback after limit attempts
else if isUsed candidate then
loop (disambiguateName baseName suffix) (suffix + 1) (remaining - 1)
else
candidate
termination_by remaining
decreasing_by
have : remaining ≠ 0 := by intro h'; simp [h'] at h
omega
loop (if startSuffix == 1 then baseName else disambiguateName baseName (startSuffix - 1)) startSuffix limit
/-- The `$__` prefix is reserved for internal use and cannot appear in user
identifiers (see `Strata.DL.Lambda.LState.EvalConfig.varPrefix`).
The `.` after `t`/`f` prevents collision with Lambda-generated names
like `$__t0` (variable `t`, index 0). -/
def termId (n : Nat) : String := s!"$__t.{n}"
def ufId (n : Nat) : String := s!"$__f.{n}"
def termNum : EncoderM Nat := do return (← get).terms.size
def ufNum : EncoderM Nat := do return (← get).ufs.size
def declareType (id : String) (mks : List String) : EncoderM String := do
let constrs := mks.map fun name => SMTConstructor.mk name []
declareDatatype id [] constrs
return id
/-- Emit a `define-fun` for a term in ANF. When not in a binder, emits a
`define-fun` with the given body `Term` and returns a `Term.var` reference
to the abbreviated name. When in a binder, just returns the body term. -/
def defineTerm (inBinder : Bool) (ty : TermType) (body : Term) : EncoderM Term := do
if inBinder
then return body
else do
let id := termId (← termNum)
Solver.defineFunTerm id [] ty body
return .var ⟨id, ty⟩
def defineTermBound := defineTerm True
def defineTermUnbound := defineTerm False
def defineSet (ty : TermType) (tEncs : List Term) : EncoderM Term := do
-- Build: (set.insert tN ... (set.insert t2 (set.insert t1 (as set.empty ty))))
let empty : Term := .app (.datatype_op .constructor "set.empty") [] ty
let result := tEncs.foldl (fun acc t => Term.app (.uf ⟨"set.insert", [⟨"x", t.typeOf⟩, ⟨"s", ty⟩], ty⟩) [t, acc] ty) empty
defineTermUnbound ty result
def defineRecord (ty : TermType) (tEncs : List Term) : EncoderM Term := do
defineTermUnbound ty (.app (.datatype_op .constructor ty.mkName) tEncs ty)
def encodeUF (uf : UF) : EncoderM String := do
if let (.some enc) := (← get).ufs.get? uf then return enc
-- Check for name clashes with already-encoded UFs and reserved keywords, disambiguate
let baseName := uf.id
let existingNames := (← get).ufs.toList.map (·.2) |>.toArray
let isUsed := fun candidate => existingNames.contains candidate || smtReservedKeywords.contains candidate
let id := findUniqueName baseName 1 isUsed (existingNames.size + smtReservedKeywords.length)
comment uf.id
let argTys := uf.args.map (fun vt => vt.ty)
Solver.declareFun id argTys uf.out
modifyGet λ state => (id, {state with ufs := state.ufs.insert uf id})
def defineApp (inBinder : Bool) (ty : TermType) (op : Op) (tEncs : List Term) : EncoderM Term := do
match op with
| .uf f =>
let ufName ← encodeUF f
let ufRef : UF := { id := ufName, args := f.args, out := f.out }
defineTerm inBinder ty (.app (.uf ufRef) tEncs ty)
| _ =>
defineTerm inBinder ty (.app op tEncs ty)
def extractTriggerGroup : Term -> List Term
| .app .triggers ts .trigger => ts
| e => [e]
def extractTriggers : Term -> List (List Term)
| .app .triggers ts .trigger => ts.map extractTriggerGroup
| e => [[e]]
/-- Every term in `extractTriggerGroup t` has `sizeOf ≤ sizeOf t`. -/
private theorem extractTriggerGroup_sizeOf (t ti : Term) (h : ti ∈ extractTriggerGroup t) :
sizeOf ti ≤ sizeOf t := by
unfold extractTriggerGroup at h
split at h
· have := List.sizeOf_lt_of_mem h; simp_all; omega
· simp_all
/-- Every term nested in `extractTriggers t` has `sizeOf ≤ sizeOf t`. -/
private theorem extractTriggers_sizeOf (t : Term) (ts : List Term) (ti : Term)
(hts : ts ∈ extractTriggers t) (hti : ti ∈ ts) :
sizeOf ti ≤ sizeOf t := by
unfold extractTriggers at hts
split at hts
· rw [List.mem_map] at hts
obtain ⟨t_elem, h_mem, h_eq⟩ := hts
subst h_eq
have h1 := extractTriggerGroup_sizeOf t_elem ti hti
have h2 := List.sizeOf_lt_of_mem h_mem
simp_all; omega
· simp_all
-- Helper function for quantifier generation
def defineQuantifierHelper (inBinder : Bool) (qk : QuantifierKind) (args : List TermVar) (trEncs: List (List Term)) (bodyEnc : Term) : EncoderM Term := do
let tr : Term := match trEncs with
| [] => .app .triggers [] .trigger -- empty trigger
| groups =>
-- Build trigger term from encoded trigger groups
let triggerTerms := groups.map fun group =>
.app .triggers group .trigger
.app .triggers triggerTerms .trigger
defineTerm inBinder .bool (.quant qk args tr bodyEnc)
def defineMultiAll (inBinder : Bool) (args : List TermVar) (trEncs: List (List Term)) (bodyEnc : Term) : EncoderM Term :=
defineQuantifierHelper inBinder .all args trEncs bodyEnc
def defineMultiExist (inBinder : Bool) (args : List TermVar) (trEncs: List (List Term)) (bodyEnc : Term) : EncoderM Term :=
defineQuantifierHelper inBinder .exist args trEncs bodyEnc
-- Convenience wrappers for single-variable quantifiers
def defineAll (inBinder : Bool) (x : String) (xty : TermType) (trEncs: List (List Term)) (bodyEnc : Term) : EncoderM Term :=
defineQuantifierHelper inBinder .all [⟨x, xty⟩] trEncs bodyEnc
def defineExist (inBinder : Bool) (x : String) (xty : TermType) (trEncs: List (List Term)) (bodyEnc : Term) : EncoderM Term :=
defineQuantifierHelper inBinder .exist [⟨x, xty⟩] trEncs bodyEnc
def mapM₁ {m : Type u → Type v} [Monad m] {α : Type w} {β : Type u}
(xs : List α) (f : {x : α // x ∈ xs} → m β) : m (List β) :=
xs.attach.mapM f
def encodeTerm (inBinder : Bool) (t : Term) : EncoderM Term := do
if let (.some enc) := (← get).terms.get? t then return enc
let ty := t.typeOf
let enc ←
match t with
| .var _ => return t
| .prim _ => return t
| .none _ => defineTerm inBinder ty t
| .some t₁ =>
let t₁Enc ← encodeTerm inBinder t₁
defineTerm inBinder ty (.some t₁Enc)
| .app .re_allchar [] .regex => return t
| .app .re_all [] .regex => return t
| .app .re_none [] .regex => return t
| .app .bvnego [inner] .bool =>
match inner.typeOf with
| .bitvec n =>
let innerEnc ← encodeTerm inBinder inner
let minVal : Term := .prim (.bitvec (BitVec.intMin n))
defineApp inBinder ty .eq [innerEnc, minVal]
| _ =>
return Term.bool false
| .app op ts _ => defineApp inBinder ty op (← mapM₁ ts (λ ⟨tᵢ, _⟩ => encodeTerm inBinder tᵢ))
| .quant qk qargs tr body =>
let trExprs := if Factory.isSimpleTrigger tr then [] else extractTriggers tr
let trEncs ← mapM₁ trExprs (fun ⟨ts, _⟩ => mapM₁ ts (fun ⟨ti, _⟩ => encodeTerm True ti))
let bodyEnc ← encodeTerm True body
match qk, qargs with
| .all, [⟨x, xty⟩] => defineAll inBinder x xty trEncs bodyEnc
| .all, _ => defineMultiAll inBinder qargs trEncs bodyEnc
| .exist, [⟨x, xty⟩] => defineExist inBinder x xty trEncs bodyEnc
| .exist, _ => defineMultiExist inBinder qargs trEncs bodyEnc
if inBinder
then pure enc
else modifyGet λ state => (enc, {state with terms := state.terms.insert t enc})
termination_by sizeOf t
decreasing_by
all_goals first
| term_by_mem
| -- Trigger case: ti ∈ ts, ts ∈ trExprs, trExprs from extractTriggers tr
-- Grab the membership hypotheses via ‹›, inline the let-binding
-- (trExprs is definitionally the if-then-else), split, and apply our lemma.
add_mem_size_lemmas
have hmem : _ ∈ (if Factory.isSimpleTrigger tr then ([] : List (List Term)) else extractTriggers tr) := ‹_ ∈ trExprs›
split at hmem
· simp at hmem
· have := extractTriggers_sizeOf tr _ _ hmem ‹_ ∈ _›
simp_all; omega
def encodeFunction (uf : UF) (body : Term) : EncoderM String := do
if let (.some enc) := (← get).ufs.get? uf then return enc
let id := ufId (← ufNum)
comment uf.id
let argPairs := uf.args.map (fun vt => (vt.id, vt.ty))
let bodyEnc ← encodeTerm true body
Solver.defineFunTerm id argPairs uf.out bodyEnc
modifyGet λ state => (id, {state with ufs := state.ufs.insert uf id})
/-- A utility for debugging. -/
def termToString (e : Term) : IO String := do
let b ← IO.mkRef { : IO.FS.Stream.Buffer }
let solver ← Solver.bufferWriter b
let _ ← ((Encoder.encodeTerm False e).run EncoderState.init).run solver
let contents ← b.get
if h: contents.data.IsValidUTF8
then pure (String.fromUTF8 contents.data h)
else pure "Converting SMT Term to bytes produced an invalid UTF-8 sequence."
/--
Once you've generated `Asserts` with one of the functions in Verifier.lean, you
can use this function to encode them as SMTLib assertions.
To actually solve these SMTLib assertions, you want to combine this `encode`
action with other `SolverM` actions, such as `Solver.check-sat` at a minimum.
Then you can run any `SolverM` action `act` with `act |>.run solver`, where
`solver` is a `Solver` instance you can construct using functions in
Solver.lean.
-/
def encode (ts : List Term) : SolverM Unit := do
Solver.setLogic "ALL"
Solver.declareDatatype "Option" ["X"]
[⟨"none", []⟩, ⟨"some", [("val", .constr "X" [])]⟩]
let (termEncs, _) ← ts.mapM (encodeTerm False) |>.run EncoderState.init
for t in termEncs do
Solver.assert t
end Encoder
end
namespace Strata.SMT