11package lisa .test .automation
22
3-
4-
5-
6-
7-
8-
3+ import lisa .automation .Tableau .solve
4+ import lisa .kernel .proof .SCProofChecker .checkSCProof
95import org .scalatest .funsuite .AnyFunSuite
106
11- class TableauTest extends AnyFunSuite {
12-
13- /*
14- test(s"Propositional Positive cases (${posi.size})") {
15- assert(posi.forall(_._3), posi.map((i, f, b, proof, judg) => s"$i $b" + (if !b then s" $f" else "")).mkString("\n"))
16- if posi.exists(tup => tup._5.nonEmpty & !tup._5.get.isValid) then
17- fail("A proof is wrong: " + posi.map(tup => if tup._5.nonEmpty & !tup._5.get.isValid then prettySCProof(tup._5.get) + "\n").mkString("\n"))
18- }
19-
20- test(s"First Order Quantifier Free Positive cases (${posqf.size})") {
21- assert(posqf.forall(_._3), posqf.map((i, f, b, proof, judg) => (s"$i $b" + (if !b then s" $f" else ""))).mkString("\n"))
22- if posqf.exists(tup => tup._5.nonEmpty & !tup._5.get.isValid) then
23- fail("A proof is wrong: " + posi.map(tup => if tup._5.nonEmpty & !tup._5.get.isValid then prettySCProof(tup._5.get) + "\n").mkString("\n"))
24- }
25-
26- test(s"First Order Easy Positive cases (${poseasy.size})") {
27- assert(poseasy.forall(_._3), poseasy.map((i, f, b, proof, judg) => (s"$i $b" + (if !b then s" $f" else ""))).mkString("\n"))
28- if poseasy.exists(tup => tup._5.nonEmpty & !tup._5.get.isValid) then
29- fail("A proof is wrong: " + posi.map(tup => if tup._5.nonEmpty & !tup._5.get.isValid then prettySCProof(tup._5.get) + "\n").mkString("\n"))
30- }
31-
32- test(s"First Order Hard Positive cases (${poshard.size})") {
33- assert(poshard.forall(_._3), poshard.map((i, f, b, proof, judg) => (s"$i $b" + (if !b then s" $f" else ""))).mkString("\n"))
34- if poshard.exists(tup => tup._5.nonEmpty & !tup._5.get.isValid) then
35- fail("A proof is wrong: " + posi.map(tup => if tup._5.nonEmpty & !tup._5.get.isValid then prettySCProof(tup._5.get) + "\n").mkString("\n"))
7+ class TableauTest extends AnyFunSuite with lisa.TestMain {
8+
9+ given lib : lisa.SetTheoryLibrary .type = lisa.SetTheoryLibrary
10+
11+ // --- Individual variables ---
12+ private val u = variable[Ind ]
13+ private val w = variable[Ind ]
14+ private val x = variable[Ind ]
15+ private val y = variable[Ind ]
16+ private val z = variable[Ind ]
17+ private val h2 = variable[Ind ]
18+
19+ // --- Propositional variables ---
20+ private val a = variable[Prop ]
21+ private val b = variable[Prop ]
22+ private val c = variable[Prop ]
23+ private val d = variable[Prop ]
24+
25+ // --- Function variables ---
26+ private val f = variable[Ind >>: Ind ]
27+ private val g = variable[Ind >>: Ind ]
28+ private val h = variable[Ind >>: Ind >>: Ind ]
29+
30+ // --- Predicate variables ---
31+ private val D = variable[Ind >>: Prop ]
32+ private val E = variable[Ind >>: Ind >>: Prop ]
33+ private val P = variable[Ind >>: Prop ]
34+ private val Q = variable[Ind >>: Prop ]
35+ private val R = variable[Ind >>: Ind >>: Prop ]
36+
37+ /**
38+ * Solve a formula and return (proofFound, proofValid).
39+ */
40+ private def solveAndCheck (formula : Expr [Prop ]): (Boolean , Boolean ) = {
41+ val res = Tableau .solve(() |- formula)
42+ res match
43+ case None => (false , false )
44+ case Some (proof) =>
45+ val judgement = checkSCProof(proof)
46+ (true , judgement.isValid)
3647 }
3748
38- }
39- object TableauTest {
40-
41- val u = variable[Ind]
42- val w = variable[Ind]
43- val x = variable[Ind]
44- val y = variable[Ind]
45- val z = variable[Ind]
46-
47- val a = variable[Prop]
48- val b = variable[Prop]
49- val c = variable[Prop]
50- val d = variable[Prop]
51- val e = variable[Prop]
49+ // ========================================================================
50+ // Propositional
51+ // ========================================================================
5252
53- val f = variable[Ind >>: Ind]
54- val g = variable[Ind >>: Ind]
55- val h = variable[Ind >>: Ind >>: Ind]
56-
57- val D = variable[Ind >>: Prop]
58- val E = variable[Ind >>: Ind >>: Prop]
59- val P, Q = variable[Ind >>: Prop]
60- val R = variable[Ind >>: Ind >>: Prop]
61-
62- var doprint: Boolean = false
63- def printif(s: Any) = if doprint then println(s) else ()
64-
65- val posi = List(
53+ private val propFormulas : List [Expr [Prop ]] = List (
6654 a <=> a,
6755 a \/ ! a,
6856 ((a ==> b) /\ (b ==> c)) ==> (a ==> c),
6957 (a <=> b) <=> ((a /\ b) \/ (! a /\ ! b)),
7058 ((a ==> c) /\ (b ==> c)) <=> ((a \/ b) ==> c),
7159 ((a ==> b) /\ (c ==> d)) ==> ((a \/ c) ==> (b \/ d))
72- ).zipWithIndex.map(f =>
73- val res = solve(() |- f._1)
74- (f._2, f._1, res.nonEmpty, res, res.map(checkSCProof))
7560 )
7661
77- // Quantifier Free
62+ test(" Propositional Positive cases" ) {
63+ val results = propFormulas.zipWithIndex.map { case (formula, i) =>
64+ val (found, valid) = solveAndCheck(formula)
65+ (i, formula, found, valid)
66+ }
67+ val failures = results.filterNot(_._3)
68+ assert(failures.isEmpty, failures.map(r => s " # ${r._1}: ${r._2}" ).mkString(" Not proved: " , " , " , " " ))
69+ val invalidProofs = results.filter(r => r._3 && ! r._4)
70+ assert(invalidProofs.isEmpty, invalidProofs.map(r => s " # ${r._1}" ).mkString(" Invalid proof: " , " , " , " " ))
71+ }
7872
79- val posqf = List(
80- posi.map(fo => fo._2.substitute(a := P(h(x)(h2)), b := P(x), c := R(x)(h(x)(h2)))),
81- posi.map(fo => fo._2.substitute(a := P(h(x)(h2)), b := P(h(x)(h2)), c := R(x)(h(x)(h2))))),
82- posi.map(fo => fo._2.substitute(a := R(y)(y), b := P(h(y)(h2)), c := R(h(x)(y))(h2))))
83- ).flatten.zipWithIndex.map(f =>
84- val res = solve(() |- f._1)
85- (f._2, f._1, res.nonEmpty, res, res.map(checkSCProof))
86- )
73+ // ========================================================================
74+ // First Order – Quantifier Free
75+ // ========================================================================
76+
77+ private val qfFormulas : List [Expr [Prop ]] = List (
78+ propFormulas.map(fo => fo.substitute(a := P (h(x)(h2)), b := P (x), c := R (x)(h(x)(h2)))),
79+ propFormulas.map(fo => fo.substitute(a := P (h(x)(h2)), b := P (h(x)(h2)), c := R (x)(h(x)(h2)))),
80+ propFormulas.map(fo => fo.substitute(a := R (y)(y), b := P (h(y)(h2)), c := R (h(x)(y))(h2)))
81+ ).flatten
82+
83+ test(" First Order Quantifier-Free Positive cases" ) {
84+ val results = qfFormulas.zipWithIndex.map { case (formula, i) =>
85+ val (found, valid) = solveAndCheck(formula)
86+ (i, formula, found, valid)
87+ }
88+ val failures = results.filterNot(_._3)
89+ assert(failures.isEmpty, failures.map(r => s " # ${r._1}: ${r._2}" ).mkString(" Not proved: " , " , " , " " ))
90+ val invalidProofs = results.filter(r => r._3 && ! r._4)
91+ assert(invalidProofs.isEmpty, invalidProofs.map(r => s " # ${r._1}" ).mkString(" Invalid proof: " , " , " , " " ))
92+ }
8793
88- // First Order Easy
94+ // ========================================================================
95+ // First Order – Easy (quantified substitutions)
96+ // ========================================================================
97+
98+ private val easyFormulas : List [Expr [Prop ]] = List (
99+ propFormulas.map(fo => fo.substitute(a := ∀ (x, P (x)), b := ∀ (x, P (y)), c := ∃ (x, P (x)))),
100+ propFormulas.map(fo => fo.substitute(a := ∀ (x, P (x) /\ Q (f(x))), b := ∀ (x, P (x) \/ R (y)(x)), c := ∃ (x, Q (x) ==> R (x)(y)))),
101+ propFormulas.map(fo => fo.substitute(a := ∃ (y, ∀ (x, P (x) /\ Q (f(y)))), b := ∀ (y, ∃ (x, P (x) \/ R (y)(x))), c := ∀ (y, ∃ (x, Q (x) ==> R (x)(y)))))
102+ ).flatten
103+
104+ test(" Regression test for proof reconstruction failure in delta" ) {
105+ val a = variable[Prop ]
106+ val b = ∀ (y, ∃ (x, P (x)))
107+ val formula = (a <=> b) ==> ((a /\ b) \/ (! a /\ ! b))
108+ val (found, valid) = solveAndCheck(formula)
109+ assert(found, s " Tableau should find a proof for $formula" )
110+ assert(valid, s " Proof found for $formula should be valid " )
111+ }
89112
90- val poseasy = List(
91- posi.map(fo => fo._2.substitute(a := forall(x, P(x)), b := forall(x, P(y)), c := exists(x, P(x)))),
92- posi.map(fo => fo._2.substitute(a := forall(x, P(x) /\ Q(f(x))), b := forall(x, P(x) \/ R(y)(x)), c := exists(x, Q(x) ==> R(x)(y)))),
93- posi.map(fo => fo._2.substitute(a := exists(y, forall(x, P(x) /\ Q(f(y)))), b := forall(y, exists(x, P(x) \/ R(y)(x))), c := forall(y, exists(x, Q(x) ==> R(x)(y)))))
94- ).flatten.zipWithIndex.map(f =>
95- val res = solve(() |- f._1)
96- (f._2, f._1, res.nonEmpty, res, res.map(checkSCProof))
97- )
113+ test(" First Order Easy Positive cases" ) {
114+ val results = easyFormulas.zipWithIndex.map { case (formula, i) =>
115+ val (found, valid) = solveAndCheck(formula)
116+ (i, formula, found, valid)
117+ }
118+ val failures = results.filterNot(_._3)
119+ assert(failures.isEmpty, failures.map(r => s " # ${r._1}: ${r._2}" ).mkString(" Not proved: " , " , " , " " ))
120+ val invalidProofs = results.filter(r => r._3 && ! r._4)
121+ assert(invalidProofs.isEmpty, invalidProofs.map(r => s " # ${r._1}: ${r._2}, proofFound: ${r._3}, proofValid: ${r._4}" ).mkString(" Invalid proof: \n " , " \n " , " " ))
122+ }
98123
99- // First Order Hard, from https://isabelle.in.tum.de/library/FOL/FOL-ex/Quantifiers_Cla.html
124+ // ========================================================================
125+ // First Order – Hard (from Isabelle FOL-ex/Quantifiers_Cla.html)
126+ // ========================================================================
100127
101- val a1 = forall (x, forall (y, forall (z, ((E(x)(y) /\ E(y)(z)) ==> E(x)(z)))))
102- val a2 = forall (x, forall (y, (E(x)(y) ==> E(f(x))(f(y)))))
103- val a3 = forall (x, E(f(g(x)))(g(f(x))))
104- val biga = forall (
128+ private val a1 = ∀ (x, ∀ (y, ∀ (z, ((E (x)(y) /\ E (y)(z)) ==> E (x)(z)))))
129+ private val a2 = ∀ (x, ∀ (y, (E (x)(y) ==> E (f(x))(f(y)))))
130+ private val a3 = ∀ (x, E (f(g(x)))(g(f(x))))
131+ private val biga = ∀ (
105132 x,
106- forall (
133+ ∀ (
107134 y,
108- forall (
135+ ∀ (
109136 z,
110137 ((E (x)(y) /\ E (y)(z)) ==> E (x)(z)) /\
111138 (E (x)(y) ==> E (f(x))(f(y))) /\
@@ -114,42 +141,73 @@ object TableauTest {
114141 )
115142 )
116143
117- val poshard = List(
118- forall (x, P(x) ==> Q(x)) ==> (forall (x, P(x)) ==> forall (x, Q(x))),
119- forall (x, forall (y, R(x)(y))) ==> forall (y, forall (x, R(x)(y))),
120- forall (x, forall (y, R(x)(y))) ==> forall (y, forall (x, R(y)(x))),
121- exists (x, exists (y, R(x)(y))) ==> exists (y, exists (x, R(x)(y))),
122- exists (x, exists (y, R(x)(y))) ==> exists (y, exists (x, R(y)(x))),
123- (forall (x, P(x)) \/ forall (x, Q(x))) ==> forall (x, P(x) \/ Q(x)),
124- forall (x, a ==> Q(x)) <=> (a ==> forall (x, Q(x))),
125- forall (x, P(x) ==> a) <=> (exists (x, P(x)) ==> a),
126- exists (x, P(x) \/ Q(x)) <=> (exists (x, P(x)) \/ exists (x, Q(x))),
127- exists (x, P(x) /\ Q(x)) ==> (exists (x, P(x)) /\ exists (x, Q(x))),
128- exists (y, forall (x, R(x)(y))) ==> forall (x, exists (y, R(x)(y))),
129- forall (x, Q(x)) ==> exists (x, Q(x)),
130- (forall (x, P(x) ==> Q(x)) /\ exists (x, P(x))) ==> exists (x, Q(x)),
131- ((a ==> exists (x, Q(x))) /\ a) ==> exists (x, Q(x)),
132- forall (x, P(x) ==> Q(f(x))) /\ forall (x, Q(x) ==> R(g(x))(x)) ==> (P(y) ==> R(g(f(y)))(f(y))),
133- forall (x, forall (y, P(x) ==> Q(y))) ==> (exists (x, P(x)) ==> forall (y, Q(y))),
134- (exists (x, P(x)) ==> forall (y, Q(y))) ==> forall (x, forall (y, P(x) ==> Q(y))),
135- forall (x, forall (y, P(x) ==> Q(y))) <=> (exists (x, P(x)) ==> forall (y, Q(y))),
136- exists (x, exists (y, P(x) /\ R(x)(y))) ==> (exists( x, P(x) /\ exists (y, R(x)(y) ))),
137- (exists( x, P(x) /\ exists (y, R(x)(y)))) ==> exists (x, exists (y, P(x) /\ R(x)(y))),
138- exists (x, exists (y, P(x) /\ R(x)(y))) <=> (exists( x, P(x) /\ exists (y, R(x)(y) ))),
139- exists (y, forall (x, P(x) ==> R(x)(y))) ==> (forall (x, P(x)) ==> exists (y, R(x)(y))),
140- forall (x, P(x)) ==> P(y),
141- !forall( x, D(x) /\ !D(f(x))),
142- !forall( x, (D(x) /\ !D(f(x))) \/ (D(x) /\ !D(x))),
143- forall (x, forall (y, (E(x)(y) ==> E(f(x))(f(y))) /\ E(f(g(x)))(g(f(x))))) ==> E(f(f(g(u))))(f(g(f(u)))),
144- !(forall (x, !(( E(f(x))(x)))) /\ forall (x, forall (y, !(E(x)(y)) /\ E(f(x))(g(x))))),
144+ private val hardFormulas : List [ Expr [ Prop ]] = List (
145+ ∀ (x, P (x) ==> Q (x)) ==> (∀ (x, P (x)) ==> ∀ (x, Q (x))),
146+ ∀ (x, ∀ (y, R (x)(y))) ==> ∀ (y, ∀ (x, R (x)(y))),
147+ ∀ (x, ∀ (y, R (x)(y))) ==> ∀ (y, ∀ (x, R (y)(x))),
148+ ∃ (x, ∃ (y, R (x)(y))) ==> ∃ (y, ∃ (x, R (x)(y))),
149+ ∃ (x, ∃ (y, R (x)(y))) ==> ∃ (y, ∃ (x, R (y)(x))),
150+ (∀ (x, P (x)) \/ ∀ (x, Q (x))) ==> ∀ (x, P (x) \/ Q (x)),
151+ ∀ (x, a ==> Q (x)) <=> (a ==> ∀ (x, Q (x))),
152+ ∀ (x, P (x) ==> a) <=> (∃ (x, P (x)) ==> a),
153+ ∃ (x, P (x) \/ Q (x)) <=> (∃ (x, P (x)) \/ ∃ (x, Q (x))),
154+ ∃ (x, P (x) /\ Q (x)) ==> (∃ (x, P (x)) /\ ∃ (x, Q (x))),
155+ ∃ (y, ∀ (x, R (x)(y))) ==> ∀ (x, ∃ (y, R (x)(y))),
156+ ∀ (x, Q (x)) ==> ∃ (x, Q (x)),
157+ (∀ (x, P (x) ==> Q (x)) /\ ∃ (x, P (x))) ==> ∃ (x, Q (x)),
158+ ((a ==> ∃ (x, Q (x))) /\ a) ==> ∃ (x, Q (x)),
159+ ∀ (x, P (x) ==> Q (f(x))) /\ ∀ (x, Q (x) ==> R (g(x))(x)) ==> (P (y) ==> R (g(f(y)))(f(y))),
160+ ∀ (x, ∀ (y, P (x) ==> Q (y))) ==> (∃ (x, P (x)) ==> ∀ (y, Q (y))),
161+ (∃ (x, P (x)) ==> ∀ (y, Q (y))) ==> ∀ (x, ∀ (y, P (x) ==> Q (y))),
162+ ∀ (x, ∀ (y, P (x) ==> Q (y))) <=> (∃ (x, P (x)) ==> ∀ (y, Q (y))),
163+ ∃ (x, ∃ (y, P (x) /\ R (x)(y))) ==> ∃ ( x, P (x) /\ ∃ (y, R (x)(y))),
164+ ∃ ( x, P (x) /\ ∃ (y, R (x)(y))) ==> ∃ (x, ∃ (y, P (x) /\ R (x)(y))),
165+ ∃ (x, ∃ (y, P (x) /\ R (x)(y))) <=> ∃ ( x, P (x) /\ ∃ (y, R (x)(y))),
166+ ∃ (y, ∀ (x, P (x) ==> R (x)(y))) ==> (∀ (x, P (x)) ==> ∃ (y, R (x)(y))),
167+ ∀ (x, P (x)) ==> P (y),
168+ ! ( ∀ ( x, D (x) /\ ! D (f(x) ))),
169+ ! ( ∀ ( x, (D (x) /\ ! D (f(x))) \/ (D (x) /\ ! D (x) ))),
170+ ∀ (x, ∀ (y, (E (x)(y) ==> E (f(x))(f(y))) /\ E (f(g(x)))(g(f(x))))) ==> E (f(f(g(u))))(f(g(f(u)))),
171+ ! (∀ (x, ! (E (f(x))(x))) /\ ∀ (x, ∀ (y, ! (E (x)(y)) /\ E (f(x))(g(x))))),
145172 a1 /\ a2 /\ a3 ==> E (f(f(g(u))))(f(g(f(u)))),
146173 a1 /\ a2 /\ a3 ==> E (f(g(f(u))))(g(f(f(u)))),
147174 biga ==> E (f(f(g(u))))(f(g(f(u)))),
148175 biga ==> E (f(g(f(u))))(g(f(f(u))))
149- ).zipWithIndex.map(f =>
150- val res = solve(() |- f._1)
151- (f._2, f._1, res.nonEmpty, res, res.map(checkSCProof))
152176 )
153- */
154177
155- }
178+ test(" First Order Hard Positive cases" ) {
179+ val results = hardFormulas.zipWithIndex.map { case (formula, i) =>
180+ val (found, valid) = solveAndCheck(formula)
181+ (i, formula, found, valid)
182+ }
183+ val failures = results.filterNot(_._3)
184+ assert(failures.isEmpty, failures.map(r => s " # ${r._1}: ${r._2}" ).mkString(" Not proved: " , " , " , " " ))
185+ val invalidProofs = results.filter(r => r._3 && ! r._4)
186+ assert(invalidProofs.isEmpty, invalidProofs.map(r => s " # ${r._1}" ).mkString(" Invalid proof: " , " , " , " " ))
187+ }
188+
189+ // ========================================================================
190+ // StackOverflow regression tests
191+ // ========================================================================
192+
193+ test(" triggerStackOverflow1 - Skolem dependency loop (abstracted set theory)" ) {
194+ // ∃(z, U(z) ∧ P(z)), ∀(z, U(z) ⟺ ∃(R, E(R)(w) ∧ E(z)(R))) ⊢ ∃(y, E(y)(w) ∧ ∃(z, E(z)(y) ∧ Q(z)))
195+ val tE = variable[Ind >>: Ind >>: Prop ]
196+ val tP = variable[Ind >>: Prop ]
197+ val tQ = variable[Ind >>: Prop ]
198+ val formula = ∃ (z, tP(z) /\ tQ(z)) /\ ∀ (z, tP(z) <=> ∃ (u, tE(u)(w) /\ tE(z)(u))) ==> ∃ (y, tE(y)(w) /\ ∃ (z, tE(z)(y) /\ tQ(z)))
199+ val (found, valid) = solveAndCheck(formula)
200+ assert(found, " Tableau should find a proof for triggerStackOverflow1" )
201+ assert(valid, " Proof for triggerStackOverflow1 should be valid" )
202+ }
203+
204+ test(" triggerStackOverflow2 - minimal ∀∃/∀∀¬ loop" ) {
205+ // ∀x.∃y.R(x,y) and ∀u.∀z.¬R(z,u) are contradictory
206+ val tR = variable[Ind >>: Ind >>: Prop ]
207+ val formula = (∀ (x, ∃ (y, tR(x)(y))) /\ ∀ (u, ∀ (z, ! tR(z)(u)))) ==> bot
208+ val (found, valid) = solveAndCheck(formula)
209+ assert(found, " Tableau should find a proof for triggerStackOverflow2" )
210+ assert(valid, " Proof for triggerStackOverflow2 should be valid" )
211+ }
212+
213+ }
0 commit comments