-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f14f30c
commit ea979d4
Showing
6 changed files
with
288 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,36 @@ | ||
# Formally Verified APPS | ||
|
||
## Conceptually 1 | ||
- llm-solve an apps subset in imp, at least in slow versions | ||
- Provided in benchmark: | ||
- write slow imp programs | ||
- write hoare triples | ||
- prove hoare triples | ||
- Task; | ||
- ask llm to write faster versions | ||
- proof of hoare agreement | ||
|
||
### value prop | ||
- challenging LLMs to write formally verified code. | ||
|
||
### TODO | ||
- [ ] some array story in Imp | ||
- [ ] strings in imp? | ||
|
||
## Conceptually 2 | ||
- find two implementations of the same problem | ||
- quantify over all inputs that they agree | ||
|
||
### Question | ||
- mathlib free or mathlib | ||
|
||
### Value prop | ||
challenging LLMs to write dependently typed _and_ formally verified code | ||
|
||
### TODO | ||
- [ ] trawl aoc of the ones we have lean4 solutions for some nontriviality: | ||
- meaning a way to beat the dumb way on O() | ||
|
||
## TODO | ||
- [ ] read dafny benchmark paper | ||
- [ ] read the APPS easies. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import Imp.Expr.Eval | ||
import Imp.Stmt.BigStep | ||
import Imp.Stmt.Basic | ||
|
||
open Imp Stmt | ||
|
||
@[simp] | ||
def ValidHoareTriple (P : Env → Prop) (c : Stmt) (Q : Env → Prop) : Prop := | ||
forall (st st' : Env), P st -> c / st ↓ st' -> Q st' | ||
|
||
syntax "{{" term:90 "}} " term:91 " {{" term:92 "}}" : term | ||
macro_rules | ||
| `({{$P}}$c{{$Q}}) => `(ValidHoareTriple $P $c $Q) | ||
|
||
theorem hoare_skip : forall P, {{P}}(imp { skip; }){{P}} := by | ||
intro P | ||
simp [ValidHoareTriple] | ||
intros st st' h1 h2 | ||
cases h2; assumption | ||
|
||
theorem hoare_seq : forall P Q R c1 c2, {{P}}c1{{Q}} -> {{Q}}c2{{R}} -> {{P}}(imp { ~c1 ~c2 }){{R}} := by | ||
intro P Q R c1 c2 | ||
simp [ValidHoareTriple] | ||
intros h1 h2 st st' hP h3 | ||
cases h3 with | ||
| seq h3 h4 => | ||
apply h2 | ||
apply h1 | ||
apply hP | ||
apply h3 | ||
apply h4 | ||
|
||
def assertionSubstitution (P : Env → Prop) (x : String) (a : Expr) : Env → Prop := | ||
fun st => P (st.setOption x (Expr.eval st a)) | ||
|
||
syntax term " [ " ident " |-> " term "]" : term | ||
macro_rules | ||
| `($P [$x |-> $a]) => `(assertionSubstitution $P $x $a) | ||
|
||
theorem hoare_assign : forall P x a, {{P[x |-> a]}}(imp { ~x := a; }){{P}} := by | ||
intros P x a | ||
simp [ValidHoareTriple] | ||
intros st st' h1 h2 | ||
cases h2 with | ||
| assign h3 => | ||
simp [assertionSubstitution] at h1 | ||
simp [Expr.eval, Env.get] at h3 | ||
simp [Env.setOption] at h1 | ||
sorry |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,5 @@ wheels/ | |
|
||
# venv | ||
.venv | ||
|
||
.ipynb_checkpoints/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "80fd1cee-6d47-4fcc-beca-c141459f6eec", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from datasets import load_dataset\n", | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 46, | ||
"id": "fcb649a0-1dd4-4d1a-9f56-b5c835326ff0", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"apps = load_dataset(\"codeparrot/apps\", difficulties=[\"introductory\"]) # difficulties flag not working for some reason" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 55, | ||
"id": "5bd853fd-57ef-494e-a200-7db128141ed2", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df = pd.DataFrame(apps[\"train\"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 56, | ||
"id": "9f4dd708-b75d-4aff-9126-e121fa80488d", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"introductory = df[df.difficulty == \"introductory\"]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 57, | ||
"id": "9cf0c21c-9f12-4567-aa86-347ca8bf5607", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"arrays = introductory[introductory.question.map(lambda s: \"array\" in s)]\n", | ||
"noarrays = introductory[introductory.question.map(lambda s: \"array\" not in s)]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 58, | ||
"id": "ba6775de-b6fe-45df-925a-bd5f064a6236", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"(786, 7)\n", | ||
"(1853, 7)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(arrays.shape)\n", | ||
"print(noarrays.shape)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 59, | ||
"id": "18816b30-3fa7-4aba-81cf-2af8f831dacb", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"$n$ robots have escaped from your laboratory! You have to find them as soon as possible, because these robots are experimental, and their behavior is not tested yet, so they may be really dangerous!\n", | ||
"\n", | ||
"Fortunately, even though your robots have escaped, you still have some control over them. First of all, you know the location of each robot: the world you live in can be modeled as an infinite coordinate plane, and the $i$-th robot is currently located at the point having coordinates ($x_i$, $y_i$). Furthermore, you may send exactly one command to all of the robots. The command should contain two integer numbers $X$ and $Y$, and when each robot receives this command, it starts moving towards the point having coordinates ($X$, $Y$). The robot stops its movement in two cases: either it reaches ($X$, $Y$); or it cannot get any closer to ($X$, $Y$). \n", | ||
"\n", | ||
"Normally, all robots should be able to get from any point of the coordinate plane to any other point. Each robot usually can perform four actions to move. Let's denote the current coordinates of the robot as ($x_c$, $y_c$). Then the movement system allows it to move to any of the four adjacent points: the first action allows it to move from ($x_c$, $y_c$) to ($x_c - 1$, $y_c$); the second action allows it to move from ($x_c$, $y_c$) to ($x_c$, $y_c + 1$); the third action allows it to move from ($x_c$, $y_c$) to ($x_c + 1$, $y_c$); the fourth action allows it to move from ($x_c$, $y_c$) to ($x_c$, $y_c - 1$). \n", | ||
"\n", | ||
"Unfortunately, it seems that some movement systems of some robots are malfunctioning. For each robot you know which actions it can perform, and which it cannot perform.\n", | ||
"\n", | ||
"You want to send a command so all robots gather at the same point. To do so, you have to choose a pair of integer numbers $X$ and $Y$ so that each robot can reach the point ($X$, $Y$). Is it possible to find such a point?\n", | ||
"\n", | ||
"\n", | ||
"-----Input-----\n", | ||
"\n", | ||
"The first line contains one integer $q$ ($1 \\le q \\le 10^5$) — the number of queries.\n", | ||
"\n", | ||
"Then $q$ queries follow. Each query begins with one line containing one integer $n$ ($1 \\le n \\le 10^5$) — the number of robots in the query. Then $n$ lines follow, the $i$-th of these lines describes the $i$-th robot in the current query: it contains six integer numbers $x_i$, $y_i$, $f_{i, 1}$, $f_{i, 2}$, $f_{i, 3}$ and $f_{i, 4}$ ($-10^5 \\le x_i, y_i \\le 10^5$, $0 \\le f_{i, j} \\le 1$). The first two numbers describe the initial location of the $i$-th robot, and the following four numbers describe which actions the $i$-th robot can use to move ($f_{i, j} = 1$ if the $i$-th robot can use the $j$-th action, and $f_{i, j} = 0$ if it cannot use the $j$-th action).\n", | ||
"\n", | ||
"It is guaranteed that the total number of robots over all queries does not exceed $10^5$.\n", | ||
"\n", | ||
"\n", | ||
"-----Output-----\n", | ||
"\n", | ||
"You should answer each query independently, in the order these queries appear in the input.\n", | ||
"\n", | ||
"To answer a query, you should do one of the following: if it is impossible to find a point that is reachable by all $n$ robots, print one number $0$ on a separate line; if it is possible to find a point that is reachable by all $n$ robots, print three space-separated integers on the same line: $1$ $X$ $Y$, where $X$ and $Y$ are the coordinates of the point reachable by all $n$ robots. Both $X$ and $Y$ should not exceed $10^5$ by absolute value; it is guaranteed that if there exists at least one point reachable by all robots, then at least one of such points has both coordinates not exceeding $10^5$ by absolute value.\n", | ||
"\n", | ||
"\n", | ||
"-----Example-----\n", | ||
"Input\n", | ||
"4\n", | ||
"2\n", | ||
"-1 -2 0 0 0 0\n", | ||
"-1 -2 0 0 0 0\n", | ||
"3\n", | ||
"1 5 1 1 1 1\n", | ||
"2 5 0 1 0 1\n", | ||
"3 5 1 0 0 0\n", | ||
"2\n", | ||
"1337 1337 0 1 1 1\n", | ||
"1336 1337 1 1 0 1\n", | ||
"1\n", | ||
"3 5 1 1 1 1\n", | ||
"\n", | ||
"Output\n", | ||
"1 -1 -2\n", | ||
"1 2 5\n", | ||
"0\n", | ||
"1 -100000 -100000\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(list(noarrays.question)[0])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 62, | ||
"id": "d7b6516d-bf88-44aa-91b2-e454ae36cd9b", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"4107 4107\n", | ||
"Name: problem_id, dtype: int64\n", | ||
"4107 # Task\\n\\nGiven the birthdates of two people, ...\n", | ||
"Name: question, dtype: object\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"samp = noarrays.sample(1)\n", | ||
"print(samp.problem_id)\n", | ||
"print(samp.question)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6ace71f5-5660-4968-a1c0-98f8f9afd3c1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |