sosy-lab
diff --git a/‎src/org/sosy_lab/java_smt/basicimpl/AbstractFormulaManager.java
Lines changed: 77 additions & 16 deletions b/‎src/org/sosy_lab/java_smt/basicimpl/AbstractFormulaManager.java
Lines changed: 77 additions & 16 deletions
diff --git a/‎src/org/sosy_lab/java_smt/basicimpl/Tokenizer.java
Lines changed: 245 additions & 0 deletions b/‎src/org/sosy_lab/java_smt/basicimpl/Tokenizer.java
Lines changed: 245 additions & 0 deletions
@@ -18,28 +18,17 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import org.checkerframework.checker.nullness.qual.Nullable;
 import org.sosy_lab.common.Appender;
-import org.sosy_lab.java_smt.api.ArrayFormulaManager;
-import org.sosy_lab.java_smt.api.BooleanFormula;
-import org.sosy_lab.java_smt.api.EnumerationFormulaManager;
-import org.sosy_lab.java_smt.api.FloatingPointFormulaManager;
-import org.sosy_lab.java_smt.api.Formula;
-import org.sosy_lab.java_smt.api.FormulaManager;
-import org.sosy_lab.java_smt.api.FormulaType;
+import org.sosy_lab.common.Appenders;
+import org.sosy_lab.java_smt.api.*;
 import org.sosy_lab.java_smt.api.FormulaType.ArrayFormulaType;
 import org.sosy_lab.java_smt.api.FormulaType.BitvectorType;
 import org.sosy_lab.java_smt.api.FormulaType.FloatingPointType;
-import org.sosy_lab.java_smt.api.FunctionDeclaration;
-import org.sosy_lab.java_smt.api.FunctionDeclarationKind;
-import org.sosy_lab.java_smt.api.IntegerFormulaManager;
-import org.sosy_lab.java_smt.api.RationalFormulaManager;
-import org.sosy_lab.java_smt.api.SLFormulaManager;
-import org.sosy_lab.java_smt.api.StringFormulaManager;
-import org.sosy_lab.java_smt.api.Tactic;
 import org.sosy_lab.java_smt.api.visitors.FormulaTransformationVisitor;
 import org.sosy_lab.java_smt.api.visitors.FormulaVisitor;
 import org.sosy_lab.java_smt.api.visitors.TraversalProcess;
@@ -261,11 +250,83 @@ public EnumerationFormulaManager getEnumerationFormulaManager() {
     return enumManager;
   }
 
-  public abstract Appender dumpFormula(TFormulaInfo t);
+  protected abstract TFormulaInfo parseImpl(String formulaStr) throws IllegalArgumentException;
+
+  /**
+   * Takes a SMT-LIB2 script and cleans it up.
+   *
+   * <p>We remove all comments and put each command on its own line. Declarations and asserts are
+   * kept and everything else is removed. For <code>(set-logic ..)</code> we make sure that it's at
+   * the top of the file before removing it, and for <code>(exit)</code> we make sure that it can
+   * only occur as the last command.
+   */
+  private String sanitize(String formulaStr) {
+    List<String> tokens = Tokenizer.tokenize(formulaStr);
+
+    StringBuilder builder = new StringBuilder();
+    int pos = 0; // index of the current token
+
+    for (String token : tokens) {
+      if (Tokenizer.isSetLogicToken(token)) {
+        // Skip the (set-logic ...) command at the beginning of the input
+        Preconditions.checkArgument(pos == 0);
+
+      } else if (Tokenizer.isExitToken(token)) {
+        // Skip the (exit) command at the end of the input
+        Preconditions.checkArgument(pos == tokens.size() - 1);
+
+      } else if (Tokenizer.isDeclarationToken(token)
+          || Tokenizer.isDefinitionToken(token)
+          || Tokenizer.isAssertToken(token)) {
+        // Keep only declaration, definitions and assertion
+        builder.append(token).append('\n');
+
+      } else if (Tokenizer.isForbiddenToken(token)) {
+        // Throw an exception if the script contains commands like (pop) or (reset) that change the
+        // state of the assertion stack.
+        // We could keep track of the state of the stack and only consider the formulas that remain
+        // on the stack at the end of the script. However, this does not seem worth it at the
+        // moment. If needed, this feature can still be added later.
+        String message;
+        if (Tokenizer.isPushToken(token)) {
+          message = "(push ...)";
+        } else if (Tokenizer.isPopToken(token)) {
+          message = "(pop ...)";
+        } else if (Tokenizer.isResetAssertionsToken(token)) {
+          message = "(reset-assertions)";
+        } else if (Tokenizer.isResetToken(token)) {
+          message = "(reset)";
+        } else {
+          // Should be unreachable
+          throw new UnsupportedOperationException();
+        }
+        throw new IllegalArgumentException(
+            String.format("SMTLIB command '%s' is not supported when parsing formulas.", message));
+
+      } else {
+        // Remove everything else
+      }
+      pos++;
+    }
+    return builder.toString();
+  }
+
+  @Override
+  public BooleanFormula parse(String formulaStr) throws IllegalArgumentException {
+    return formulaCreator.encapsulateBoolean(parseImpl(sanitize(formulaStr)));
+  }
+
+  protected abstract String dumpFormulaImpl(TFormulaInfo t) throws IOException;
 
   @Override
   public Appender dumpFormula(BooleanFormula t) {
-    return dumpFormula(formulaCreator.extractInfo(t));
+    return new Appenders.AbstractAppender() {
+      @Override
+      public void appendTo(Appendable out) throws IOException {
+        String raw = dumpFormulaImpl(formulaCreator.extractInfo(t));
+        out.append(sanitize(raw));
+      }
+    };
   }
 
   @Override
 
@@ -0,0 +1,245 @@
+/*
+ * This file is part of JavaSMT,
+ * an API wrapper for a collection of SMT solvers:
+ * https://github.com/sosy-lab/java-smt
+ *
+ * SPDX-FileCopyrightText: 2024 Dirk Beyer <https://www.sosy-lab.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.sosy_lab.java_smt.basicimpl;
+
+import com.google.common.collect.ImmutableList;
+import java.util.List;
+import java.util.Optional;
+
+/** Helper class for splitting up an SMT-LIB2 file into a string of commands */
+public class Tokenizer {
+  /**
+   * Split up a sequence of lisp expressions.
+   *
+   * <p>This is used by {@link AbstractFormulaManager#parse(String)} as part of the preprocessing
+   * before the input is passed on to the solver. SMT-LIB2 scripts are sequences of commands that
+   * are just r-expression. We split them up and then return the list.
+   *
+   * <p>As an example <code>tokenize("(define-const a Int)(assert (= a 0)")</code> will return the
+   * sequence <code>["(define-const a Int)", "(assert (= a 0))"]</code>
+   */
+  public static List<String> tokenize(String input) {
+    ImmutableList.Builder<String> builder = ImmutableList.builder();
+    boolean inComment = false;
+    boolean inString = false;
+    boolean inQuoted = false;
+
+    int level = 0;
+
+    StringBuilder token = new StringBuilder();
+    for (int i = 0; i < input.length(); i++) {
+      char c = input.charAt(i);
+      if (inComment) {
+        if (c == '\n') {
+          // End of a comment
+          inComment = false;
+          if (level > 0) {
+            // If we're in an expression we need to replace the entire comment (+ the newline) with
+            // some whitespace. Otherwise symbols might get merged across line-wraps. This is not
+            // a problem at the top-level where all terms are surrounded by brackets.
+            token.append(c);
+          }
+        }
+
+      } else if (inString) {
+        if (c == '"') {
+          // We have a double quote: Check that it's not followed by another and actually closes
+          // the string.
+          Optional<Character> n =
+              (i == input.length() - 1) ? Optional.empty() : Optional.of(input.charAt(i + 1));
+          if (n.isEmpty() || n.orElseThrow() != '"') {
+            // Close the string
+            token.append(c);
+            inString = false;
+          } else {
+            // Add both quotes to the token and skip one character ahead
+            token.append(c);
+            token.append(n.orElseThrow());
+            i++;
+          }
+        } else {
+          token.append(c);
+        }
+
+      } else if (inQuoted) {
+        if (c == '|') {
+          // Close the quotes
+          inQuoted = false;
+        }
+        if (c == '\\') {
+          // The SMT-LIB2 standard does not allow backslash inside quoted symbols:
+          // Throw an exception
+          throw new IllegalArgumentException();
+        }
+        token.append(c);
+
+      } else if (c == ';') {
+        // Start of a comment
+        inComment = true;
+
+      } else if (c == '"') {
+        // Start of a string literal
+        inString = true;
+        token.append(c);
+
+      } else if (c == '|') {
+        // Start of a quoted symbol
+        inQuoted = true;
+        token.append(c);
+
+      } else {
+        // Just a regular character outside of comments, quotes or string literals
+        if (level == 0) {
+          // We're at the top-level
+          if (!Character.isWhitespace(c)) {
+            if (c == '(') {
+              // Handle opening brackets
+              token.append("(");
+              level++;
+            } else {
+              // Should be unreachable: all top-level expressions need parentheses around them
+              throw new IllegalArgumentException();
+            }
+          }
+        } else {
+          // We're inside an r-expression
+          token.append(c);
+          // Handle opening/closing brackets
+          if (c == '(') {
+            level++;
+          }
+          if (c == ')') {
+            if (level == 1) {
+              builder.add(token.toString());
+              token = new StringBuilder();
+            }
+            level--;
+          }
+        }
+      }
+    }
+    if (level != 0) {
+      // Throw an exception if the brackets don't match
+      throw new IllegalArgumentException();
+    }
+    return builder.build();
+  }
+
+  private static boolean matchesOneOf(String token, String... regexp) {
+    return token.matches("\\(\\s*(" + String.join("|", regexp) + ")[\\S\\s]*");
+  }
+
+  /**
+   * Check if the token is <code>(set-logic ..)</code>.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isSetLogicToken(String token) {
+    return matchesOneOf(token, "set-logic");
+  }
+
+  /**
+   * Check if the token is a function or variable declaration.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isDeclarationToken(String token) {
+    return matchesOneOf(token, "declare-const", "declare-fun");
+  }
+
+  /**
+   * Check if the token is a function definition.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isDefinitionToken(String token) {
+    return matchesOneOf(token, "define-fun");
+  }
+
+  /**
+   * Check if the token is an <code>(assert ...)</code>.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isAssertToken(String token) {
+    return matchesOneOf(token, "assert");
+  }
+
+  /**
+   * Check if the token is an <code>(push ...)</code>.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isPushToken(String token) {
+    return matchesOneOf(token, "push");
+  }
+
+  /**
+   * Check if the token is an <code>(pop ...)</code>.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isPopToken(String token) {
+    return matchesOneOf(token, "pop");
+  }
+
+  /**
+   * Check if the token is an <code>(reset-assertions ...)</code>.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isResetAssertionsToken(String token) {
+    return matchesOneOf(token, "reset-assertions");
+  }
+
+  /**
+   * Check if the token is an <code>(reset)</code>.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isResetToken(String token) {
+    return matchesOneOf(token, "reset");
+  }
+
+  /**
+   * Check if the token is <code>(exit)</code>.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isExitToken(String token) {
+    return matchesOneOf(token, "exit");
+  }
+
+  /**
+   * Check if this is a forbidden token.
+   *
+   * <p>The list of forbidden tokens contains:
+   *
+   * <ul>
+   *   <li>push
+   *   <li>pop
+   *   <li>reset-assertions
+   *   <li>reset
+   * </ul>
+   *
+   * <p>Forbidden tokens manipulate the stack and are not supported while parsing SMT-lIB2 string.
+   * When a forbidden token is found parsing should be aborted by throwing an {@link
+   * IllegalArgumentException} exception.
+   *
+   * <p>Use {@link #tokenize(String)} to turn an SMT-LIB2 script into a string of input tokens.
+   */
+  public static boolean isForbiddenToken(String token) {
+    return isPushToken(token)
+        || isPopToken(token)
+        || isResetAssertionsToken(token)
+        || isResetToken(token);
+  }
+}