diff --git a/Gemfile b/Gemfile index 8475474..28d8692 100644 --- a/Gemfile +++ b/Gemfile @@ -7,4 +7,5 @@ gem "github-pages" gem "webrick" gem "jekyll-redirect-from" gem "jekyll-email-protect" - +gem "kramdown-math-katex" +gem "therubyracer" # JS engine to run katex diff --git a/Gemfile.lock b/Gemfile.lock index b97e14c..950c94c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -13,6 +13,7 @@ GEM minitest (>= 5.1) securerandom (>= 0.3) tzinfo (~> 2.0, >= 2.0.5) + uri (>= 0.13.1) addressable (2.8.7) public_suffix (>= 2.0.2, < 7.0) base64 (0.2.0) @@ -43,7 +44,16 @@ GEM logger faraday-net_http (3.4.0) net-http (>= 0.5.0) + ffi (1.17.0-aarch64-linux-gnu) + ffi (1.17.0-aarch64-linux-musl) + ffi (1.17.0-arm-linux-gnu) + ffi (1.17.0-arm-linux-musl) + ffi (1.17.0-arm64-darwin) + ffi (1.17.0-x86-linux-gnu) + ffi (1.17.0-x86-linux-musl) + ffi (1.17.0-x86_64-darwin) ffi (1.17.0-x86_64-linux-gnu) + ffi (1.17.0-x86_64-linux-musl) forwardable-extended (2.6.0) gemoji (4.1.0) github-pages (232) @@ -221,10 +231,21 @@ GEM jekyll-include-cache jekyll-seo-tag (>= 2.0) rake (>= 12.3.1) + katex (0.10.0) + execjs (~> 2.8) kramdown (2.4.0) rexml + kramdown-math-katex (1.0.1) + katex (~> 0.4) + kramdown (~> 2.0) kramdown-parser-gfm (1.1.0) kramdown (~> 2.0) + libv8 (3.16.14.19.1) + libv8 (3.16.14.19.1-arm-linux) + libv8 (3.16.14.19.1-universal-darwin) + libv8 (3.16.14.19.1-x86-linux) + libv8 (3.16.14.19.1-x86_64-darwin) + libv8 (3.16.14.19.1-x86_64-linux) liquid (4.0.4) listen (3.9.0) rb-fsevent (~> 0.10, >= 0.10.3) @@ -238,6 +259,16 @@ GEM minitest (5.25.1) net-http (0.5.0) uri + nokogiri (1.16.7-aarch64-linux) + racc (~> 1.4) + nokogiri (1.16.7-arm-linux) + racc (~> 1.4) + nokogiri (1.16.7-arm64-darwin) + racc (~> 1.4) + nokogiri (1.16.7-x86-linux) + racc (~> 1.4) + nokogiri (1.16.7-x86_64-darwin) + racc (~> 1.4) nokogiri (1.16.7-x86_64-linux) racc (~> 1.4) octokit (4.25.1) @@ -251,6 +282,7 @@ GEM rb-fsevent (0.11.2) rb-inotify (0.11.1) ffi (~> 1.0) + ref (2.0.0) rexml (3.3.9) rouge (3.30.0) rubyzip (2.3.2) @@ -267,6 +299,9 @@ GEM simpleidn (0.2.3) terminal-table (1.8.0) unicode-display_width (~> 1.1, >= 1.1.1) + therubyracer (0.12.3) + libv8 (~> 3.16.14.15) + ref typhoeus (1.4.1) ethon (>= 0.9.0) tzinfo (2.0.6) @@ -276,14 +311,28 @@ GEM webrick (1.9.0) PLATFORMS + aarch64-linux + aarch64-linux-gnu + aarch64-linux-musl + arm-linux + arm-linux-gnu + arm-linux-musl + arm64-darwin + x86-linux + x86-linux-gnu + x86-linux-musl + x86_64-darwin x86_64-linux + x86_64-linux-musl DEPENDENCIES github-pages jekyll-email-protect jekyll-redirect-from just-the-docs (= 0.10.0) + kramdown-math-katex + therubyracer webrick BUNDLED WITH - 2.3.26 + 2.5.23 diff --git a/_config.yml b/_config.yml index c5aecb3..b2900c7 100644 --- a/_config.yml +++ b/_config.yml @@ -38,4 +38,8 @@ exclude: [ plugins: - jekyll-redirect-from - - jekyll-email-protect + - jekyll-email-protect + +markdown: kramdown +kramdown: + math_engine: katex diff --git a/_data/publications.yml b/_data/publications.yml index 793145e..fa20e2d 100644 --- a/_data/publications.yml +++ b/_data/publications.yml @@ -4,6 +4,25 @@ # # Make sure all paths start with "/", espacially the easy-summary one, # otherwise the _layout/paper.html WILL break (it looks into this DB for a matching entry) +- year: 2025 + publications: + - title: Relational Abstractions Based on Labeled Union-Find + authors: + - name: Dorian Lesbre + - name: Matthieu Lemerre + - name: Hichem Rami Ait-El-Hara + - name: François Bobot + venue: 46th ACM-SIGPLAN Symposium on Programming Language Design and Implementation + venue-acronym: PLDI + doi: 10.1145/3729298 + artifact: https://zenodo.org/records/15165896 + bibtex: /assets/publications/bibtex/2025-pldi-relational-abstractions-labeled-uf.bib + easy-summary: /papers/2025-pldi-relational-abstractions-labeled-uf.html + pdf-with-appendix: /assets/publications/pdfs/2025-pldi-relational-abstractions-labeled-uf-with-appendices.pdf + topics: + - relational abstract domain + - labeled union-find + - abstract interpretation - year: 2024 publications: - title: "A Dependent Nominal Physical Type System for Static Analysis of Memory in Low Level Code" @@ -15,12 +34,12 @@ venue-acronym: OOPSLA artifact: https://zenodo.org/records/13383433 artifact-evaluated-reusable: true - artifact-available: true + artifact-available: true easy-summary: /papers/2024-oopsla-typedc-dependent-nominal-physical-type-system.html pdf: /assets/publications/pdfs/2024-oopsla-full-with-appendices.pdf talk-slides: /assets/publications/slides/2024-oopsla-typedc-dependent-nominal-physical-typesystem.pdf bibtex: /assets/publications/bibtex/2024-oopsla-typedc-dependent-nominal-typesystem.bib -# talk-video: TODO + # talk-video: TODO topics: - abstract interpretation - memory safety @@ -39,9 +58,9 @@ artifact-custom-image-height: "70" easy-summary: /papers/2024-sas-trace-partitioning-as-an-optimization-problem.html pdf: /assets/publications/pdfs/2024-sas-tpop.pdf -# bibtex: TODO -# talk-slides: TODO -# talk-video: TODO + # bibtex: TODO + # talk-slides: TODO + # talk-video: TODO topics: - abstract interpretation - trace partitioning diff --git a/_includes/head_custom.html b/_includes/head_custom.html index ae41c03..fea8474 100644 --- a/_includes/head_custom.html +++ b/_includes/head_custom.html @@ -21,3 +21,7 @@ {% for css in page.css %} {% endfor %} +{% if page.katex %} + + +{% endif %} diff --git a/assets/publications/bibtex/2024-pldi-compiling-with-abstract-interpretation.bib b/assets/publications/bibtex/2024-pldi-compiling-with-abstract-interpretation.bib index d69655e..e214015 100644 --- a/assets/publications/bibtex/2024-pldi-compiling-with-abstract-interpretation.bib +++ b/assets/publications/bibtex/2024-pldi-compiling-with-abstract-interpretation.bib @@ -1,3 +1,4 @@ +% The main publication - which is most likely the one you'll want to cite @article{10.1145/3656392, author = {Lesbre, Dorian and Lemerre, Matthieu}, title = {Compiling with Abstract Interpretation}, @@ -16,6 +17,7 @@ @article{10.1145/3656392 keywords = {Compilers, Abstract Interpretation, Static Single Assignment, SSA}, } +% The extended version - includes appendices and proofs @techreport{hal-04535159, author = {Lesbre, Dorian and Lemerre, Matthieu}, title = {Compiling with Abstract Interpretation (with appendices)}, @@ -23,6 +25,7 @@ @techreport{hal-04535159 url = {https://hal.science/hal-04535159}, } +% The associated software artifact @misc{10.5281/zenodo.10895582, author = {Dorian Lesbre and Matthieu Lemerre}, title = {Compiling with Abstract Interpretation: Artifact}, diff --git a/assets/publications/bibtex/2025-pldi-relational-abstractions-labeled-uf.bib b/assets/publications/bibtex/2025-pldi-relational-abstractions-labeled-uf.bib new file mode 100644 index 0000000..a8ac2a1 --- /dev/null +++ b/assets/publications/bibtex/2025-pldi-relational-abstractions-labeled-uf.bib @@ -0,0 +1,41 @@ +% The main publication - which is most likely the one you'll want to cite +@article{10.1145/3729298, + author = {Lesbre, Dorian and Lemerre, Matthieu and Ait-El-Hara, Hichem Rami + and Bobot, François}, + title = {Relational Abstractions Based on Labeled Union-Find}, + month = 6, + year = 2025, + journal = {Proc. {ACM} Program. Lang.}, + issn = {2475-1421}, + number = {{PLDI}}, + volume = {9}, + articleno = {195}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + doi = {10.1145/3729298}, +} + +% The extended version - includes appendices and proofs +@techreport{hal-05029216, + author = {Lesbre, Dorian and Lemerre, Matthieu and Ait-El-Hara, Hichem Rami + and Bobot, François}, + title = {Relational Abstractions Based on Labeled Union-Find (with appendices) + }, + month = apr, + year = 2025, + publisher = {HAL}, + url = {https://hal.science/hal-05029216}, +} + +% The associated software artifact +@software{10.5281/zenodo.15165896, + author = {Lesbre, Dorian and Lemerre, Matthieu and Ait-El-Hara, Hichem Rami + and Bobot, François}, + title = {Artifact for paper "Relational Abstractions Based on Labeled + Union-Find" }, + month = apr, + year = 2025, + publisher = {Zenodo}, + doi = {10.5281/zenodo.15165896}, + url = {https://doi.org/10.5281/zenodo.15165896}, +} diff --git a/assets/publications/imgs/2025-pldi-labeled-uf.svg b/assets/publications/imgs/2025-pldi-labeled-uf.svg new file mode 100644 index 0000000..a559215 --- /dev/null +++ b/assets/publications/imgs/2025-pldi-labeled-uf.svg @@ -0,0 +1,221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/publications/imgs/2025-pldi-spanning-tree.svg b/assets/publications/imgs/2025-pldi-spanning-tree.svg new file mode 100644 index 0000000..d5e6cbf --- /dev/null +++ b/assets/publications/imgs/2025-pldi-spanning-tree.svg @@ -0,0 +1,165 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/publications/pdfs/2025-pldi-relational-abstractions-labeled-uf-with-appendices.pdf b/assets/publications/pdfs/2025-pldi-relational-abstractions-labeled-uf-with-appendices.pdf new file mode 100644 index 0000000..fae0515 Binary files /dev/null and b/assets/publications/pdfs/2025-pldi-relational-abstractions-labeled-uf-with-appendices.pdf differ diff --git a/papers/2024-pldi-compiling-with-abstract-interpretation.md b/papers/2024-pldi-compiling-with-abstract-interpretation.md index 8a90afd..c2b85b3 100644 --- a/papers/2024-pldi-compiling-with-abstract-interpretation.md +++ b/papers/2024-pldi-compiling-with-abstract-interpretation.md @@ -70,7 +70,7 @@ Our paper shows the following novel results: variable immutability to store information on expressions. This is always more precise than direct numerical analysis, while just adding a constant overhead. - In particular, this domain car analyze compiled code with the same precision as source + In particular, this domain can analyze compiled code with the same precision as source (when compilation corresponds, e.g., to transformation into three-address code). The usual precision loss resulting from compiling large expressions into instruction sequences with multiple intermediate variables is recovered thanks to our SSA-based analysis. @@ -103,6 +103,6 @@ Our paper shows the following novel results: ## Further information - Read the **paper**: either [published version](/assets/publications/pdfs/2024-pldi-compiling-with-abstract-interpretation.pdf) or the [version with appendices](/assets/publications/pdfs/2024-pldi-compiling-with-abstract-interpretation-with-appendices.pdf) -- Presented at the [Programming Language Design and Implementation (PLDI) 2024 conference](https://pldi24.sigplan.org/). Watch the [**talk video**](https://www.youtube.com/watch?v=2Btkn9AvM8o) or look at the [**slides**](/assets/publications/slides/2024-pldi-compiling-with-abstract-interpretation.pdf) +- Presented at the [Programming Language Design and Implementation (PLDI) 2024 conference](https://pldi24.sigplan.org/). Watch the [**talk video**](https://www.youtube.com/watch?v=2Btkn9AvM8o) or look at the [**slides**](/assets/publications/slides/2024-pldi-compiling-with-abstract-interpretation.pdf). - Download the [**software artifact**](https://doi.org/10.5281/zenodo.10895582) from Zenodo to try out our example analyzer and explore the code. diff --git a/papers/2025-pldi-relational-abstractions-labeled-uf.md b/papers/2025-pldi-relational-abstractions-labeled-uf.md new file mode 100644 index 0000000..9c188fd --- /dev/null +++ b/papers/2025-pldi-relational-abstractions-labeled-uf.md @@ -0,0 +1,127 @@ +--- +layout: paper +parent: Publications +title: "PLDI'25" +categories: new publication +date: 2025-06-15 +nav_order: 2025-06-15 +katex: True +--- + +## Context + +In program analysis, various abstractions are used to store known facts about the +program being studied. Two popular choices are *non-relational abstractions*, +which store numeric information on variables (like intervals $$x \in [0:5]$$), or *relational +abstraction*, which store relations between variables (like $$3x + 2y \leqslant -5z$$). +The former is fast (complexity in $$\mathcal O(|\mathbb X|)$$ where $$|\mathbb X|$$ is the number of variables) +but imprecise, whereas the latter is very precise but cost-prohibitive +(polyhedra has $$\mathcal O(2^{|\mathbb X|})$$ complexity). + +In the middle of this spectrum lie *weakly-relational abstractions*. They only +store relations between pairs of variables. For example, octagons +store relations $$\pm x \pm y \leqslant c$$ for some constant $$c$$. These abstractions are faster than +full relational domains, but still expensive, in large part due to having to compute +a transitive closure to find all known relations, which costs $$\mathcal O(|\mathbb X|^3)$$. + +Thus, the central assumption of the paper can be expressed as **what if we do not need to compute +this expensive transitive closure?** More formally, we assume that the relation obtained on each path between +two variables is always the same. This allows eliminating the vast majority of relations, **we only need to +store a spanning tree** and can still recover any arbitrary relation in constant time. + + + +
+Fig. Graph of relations between variables. Each arrow represents a relation, labels have been omitted. +Left is the initial configuration, middle is the computed closure, and right is a minimal spanning tree +
+ +## Labeled union-find + +We can use an extension of the classical +[union-find](https://en.wikipedia.org/wiki/Disjoint-set_data_structure) data +structure to represent this spanning tree. The extension proceeds by adding +labels $$\mathbb L$$ (representing relations) to the parent edges, and is thus +called *labeled union-find*. In order to properly adapt the union-find +algorithms, these labels need an associative composition operation $$\mathbb +;$$, an inverse $$\cdot^{-1}$$ and a neutral element. That is to say, they must +have a [**group structure**](https://en.wikipedia.org/wiki/Group_(mathematics)). +This requirement also derives fairly naturally from our previous assumption +(same relation on each path). + + +
+Fig. Main operation of labeled union-find. Compared to classical union-find, +the "union" operation has been renamed "add_relation", and the get_relation operation +is new. +
+ +## The labeled union-find relational abstraction + +When using labeled union-find to represent abstract relations between variables, +the soundness of operations places strong requirements on the relations that +can be used. We show that these relations **must correspond to injective functions** +between equivalence classes. The following examples relations are suitable: + +- Constant offset: relations of the form $$y = x + b$$ for some constant $$b$$ +- Two Value per Equality (TVPE): $$ax + by + c = 0$$, with $$a,b,c \in \mathbb Z^3$$, $$\mathbb Q^3$$ or $$\mathbb R^3$$ +- Modular TVPE: $$y = ax + b\mathop{\texttt{mod}} 2^{64}$$ between 64-bit vectors with $$a$$ odd. +- Xor and rotation: $$y = (x \mathop{\mathtt{xor}} c) \mathop{\mathtt{rot}} n$$ between fixed-size bitvector, which can encode many shifts + +However, we cannot use relations like bounded difference $$y - x \in [a:b]$$, as +they are not injective. Doing so inevitably leads to precision loss. + +## Combining with other abstractions + +Labeled union-find groups variables into related class, which each point to the +same representative. This can often be used to simplify other abstractions, +especially whenever information on any element can be deduced from information +on the representative. + +For example, when combining the constant offset labeled union-find with the interval abstraction, we +only need to store intervals on representative elements, not on all variables, since these can be +recovered. If we know that $$y \xrightarrow{+2} x$$ and $$x \in [0:2]$$ then we do not need to store +$$y \in [2:4]$$. This reduces storage cost and propagation time, since all related variables are +updated at once any time new information is learned. + +Labeled union-find can also help relational abstraction similarly, shrinking their size and thus their +computation cost. Furthermore, it can be modified to detect any entailed equalities and notify other +abstractions of these facts. + +## Examples + +We have implemented labeled union-find domains both in [Codex](https://codex.top), +a non-relational analyzer, and in [Colibri2](https://colibri.frama-c.com/index.html), a constraint solver. + +In Codex, we found one of the main sources of improvements comes from relating +simultaneously incremented loop counters. For instance, consider the following C snippet: +```c +int i = 0; +int j = 4; +while(i < 10) { + i += 1; + j += 3; +} +``` +Without labeled union-find, Codex learns that at the end of the loop, $$\mathtt{i} = 10$$, +$$\mathtt{j} \in [4:+\infty]$$ and $$\mathtt{j} \equiv 1 \mathop{\mathtt{mod}} 3$$. However, +with labeled union find and the TVPE relation, $$\mathtt{j} = 3\mathtt{i} + 4$$ is inferred. +Thus, at the end of the loop, Codex knows $$\mathtt{j} = 34$$. + +In Colibri2, we were able to increase propagations when using constant difference +alongside the interval domain. For example, instance, if $$f(x) = 2a+x+3b$$ +for some $$a,b$$, and if we know that $$f(4) < 10$$, then we can learn that +$$f(9)^2 \leqslant 225$$ is unsatifsiable. This sort of reasoning seems easy, but +in practice, a decision procedure for non linear-arithmetic is difficult to implement and costly. +Using labeled union-find (to relate $$f(4)$$ and $$f(9)$$) enables solving some +easy cases such as these. + +## Going further + +- Read the [**paper**](/assets/publications/pdfs/2025-pldi-relational-abstractions-labeled-uf-with-appendices.pdf). +- You can also read the [**WIP workshop paper**](https://hal.science/cea-04996700v1). It is only 4 pages long and less technical. +- To be presented at the [Programming Language Design and Implementation (PLDI) 2025 conference](https://pldi25.sigplan.org/). +- Download the [**software artifact**](https://zenodo.org/records/15261356) from + Zenodo to explore the code and see the performance results.