Skip to content
2 changes: 2 additions & 0 deletions src/org/sosy_lab/java_smt/api/FunctionDeclarationKind.java
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ public enum FunctionDeclarationKind {
STR_IN_RE,
STR_TO_INT,
INT_TO_STR,
STR_FROM_CODE,
STR_TO_CODE,
STR_LT,
Copy link
Member Author

@kfriedberger kfriedberger Dec 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the mentioned operations already here.
And Junit tests assure that the operations are visited correctly.

STR_LE,
RE_PLUS,
Expand Down
35 changes: 27 additions & 8 deletions src/org/sosy_lab/java_smt/api/StringFormulaManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,23 @@
import org.sosy_lab.java_smt.api.NumeralFormula.IntegerFormula;

/**
* Manager for dealing with string formulas. Functions come from
* http://smtlib.cs.uiowa.edu/theories-UnicodeStrings.shtml.
* Manager for dealing with string formulas. Functions come from <a
* href="http://smtlib.cs.uiowa.edu/theories-UnicodeStrings.shtml">String theory in SMTLIB</a>.
*/
public interface StringFormulaManager {

/**
* Returns a {@link StringFormula} representing the given constant.
* Creates a {@link StringFormula} representing the given constant String.
*
* @param value the string value the returned <code>Formula</code> should represent
* @return a Formula representing the given value
* <p>This method accepts plain Java Strings with Unicode characters from the Basic Multilingual
* Plane (BMP) (codepoints in range [0x00000, 0x2FFFF]). JavaSMT handles escaping internally, as
* some solvers follow the SMTLIB standard and escape Unicode characters with curly braces.
*
* <p>Additionally, you can use SMTLIB escaping like "\\u{1234}" to represent Unicode characters
* directly.
*
* @param value the string value the returned {@link StringFormula} should represent
* @return a {@link StringFormula} representing the given value
*/
StringFormula makeString(String value);

Expand Down Expand Up @@ -71,9 +78,8 @@ public interface StringFormulaManager {
IntegerFormula indexOf(StringFormula str, StringFormula part, IntegerFormula startIndex);

/**
* Get a substring of length 1 from the given String.
*
* <p>The result is underspecified, if the index is out of bounds for the given String.
* Get a substring of length 1 from the given String if the given index is within bounds.
* Otherwise, returns an empty string.
*/
StringFormula charAt(StringFormula str, IntegerFormula index);

Expand Down Expand Up @@ -231,4 +237,17 @@ default RegexFormula concat(RegexFormula... parts) {
* It returns the empty string <code>""</code> for negative numbers.
*/
StringFormula toStringFormula(IntegerFormula number);

/**
* Returns an Integer formula representing the code point of the only character of the given
* String formula, if it represents a single character. Otherwise, returns -1.
*/
IntegerFormula toCodePoint(StringFormula str);

/**
* Returns a String formula representing the single character with the given code point, if it is
* a valid Unicode code point within the Basic Multilingual Plane (BMP) (codepoints in range
* [0x00000, 0x2FFFF]). Otherwise, returns the empty string.
*/
StringFormula fromCodePoint(IntegerFormula codePoint);
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@

package org.sosy_lab.java_smt.basicimpl;

import static com.google.common.base.Preconditions.checkArgument;
import static org.sosy_lab.java_smt.basicimpl.AbstractFormulaManager.checkVariableName;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.sosy_lab.java_smt.api.BooleanFormula;
import org.sosy_lab.java_smt.api.FormulaType;
import org.sosy_lab.java_smt.api.NumeralFormula;
Expand All @@ -27,6 +30,16 @@ public abstract class AbstractStringFormulaManager<TFormulaInfo, TType, TEnv, TF
extends AbstractBaseFormulaManager<TFormulaInfo, TType, TEnv, TFuncDecl>
implements StringFormulaManager {

private static final Pattern UNICODE_ESCAPE_PATTERN =
Pattern.compile(
// start with "\\u"
"\\\\u"
// either a plain Unicode letter like "\\u0061"
+ "((?<codePoint>[0-9a-fA-F]{4})"
+ "|"
// or curly brackets like "\\u{61}"
+ "(\\{(?<codePointInBrackets>[0-9a-fA-F]{1,5})}))");

protected AbstractStringFormulaManager(
FormulaCreator<TFormulaInfo, TType, TEnv, TFuncDecl> pCreator) {
super(pCreator);
Expand All @@ -40,11 +53,59 @@ protected RegexFormula wrapRegex(TFormulaInfo formulaInfo) {
return getFormulaCreator().encapsulateRegex(formulaInfo);
}

protected IntegerFormula wrapInteger(TFormulaInfo formulaInfo) {
return getFormulaCreator().encapsulate(FormulaType.IntegerType, formulaInfo);
}

@Override
public StringFormula makeString(String value) {
checkArgument(
areAllCodePointsInRange(value),
"String constant is out of supported Unicode range (Plane 0-2).");
return wrapString(makeStringImpl(value));
}

/** returns whether all Unicode characters in Planes 0-2. */
private static boolean areAllCodePointsInRange(String str) {
return str.codePoints().allMatch(AbstractStringFormulaManager::isCodePointInRange);
}

private static boolean isCodePointInRange(int codePoint) {
return 0x00000 <= codePoint && codePoint <= 0x2FFFF;
}

/** Replace Unicode letters in UTF16 representation with their escape sequences. */
protected static String escapeUnicodeForSmtlib(String input) {
StringBuilder sb = new StringBuilder();
for (int codePoint : input.codePoints().toArray()) {
if (0x20 <= codePoint && codePoint <= 0x7E) {
sb.appendCodePoint(codePoint); // normal printable chars
} else {
sb.append("\\u{").append(String.format("%05X", codePoint)).append("}");
}
}
return sb.toString();
}

/** Replace escaped Unicode letters in SMTLIB representation with their UTF16 pendant. */
public static String unescapeUnicodeForSmtlib(String input) {
Matcher matcher = UNICODE_ESCAPE_PATTERN.matcher(input);
StringBuilder sb = new StringBuilder();
while (matcher.find()) {
String hexCodePoint = matcher.group("codePoint");
if (hexCodePoint == null) {
hexCodePoint = matcher.group("codePointInBrackets");
}
int codePoint = Integer.parseInt(hexCodePoint, 16);
checkArgument(
isCodePointInRange(codePoint),
"SMTLIB does only specify Unicode letters from Planes 0-2");
matcher.appendReplacement(sb, Character.toString(codePoint));
}
matcher.appendTail(sb);
return sb.toString();
}

protected abstract TFormulaInfo makeStringImpl(String value);

@Override
Expand Down Expand Up @@ -141,10 +202,7 @@ public BooleanFormula contains(StringFormula str, StringFormula part) {

@Override
public IntegerFormula indexOf(StringFormula str, StringFormula part, IntegerFormula startIndex) {
return getFormulaCreator()
.encapsulate(
FormulaType.IntegerType,
indexOf(extractInfo(str), extractInfo(part), extractInfo(startIndex)));
return wrapInteger(indexOf(extractInfo(str), extractInfo(part), extractInfo(startIndex)));
}

protected abstract TFormulaInfo indexOf(
Expand Down Expand Up @@ -287,8 +345,7 @@ public RegexFormula times(RegexFormula regex, int repetitions) {

@Override
public IntegerFormula toIntegerFormula(StringFormula str) {
return getFormulaCreator()
.encapsulate(FormulaType.IntegerType, toIntegerFormula(extractInfo(str)));
return wrapInteger(toIntegerFormula(extractInfo(str)));
}

protected abstract TFormulaInfo toIntegerFormula(TFormulaInfo pParam);
Expand All @@ -299,4 +356,18 @@ public StringFormula toStringFormula(IntegerFormula number) {
}

protected abstract TFormulaInfo toStringFormula(TFormulaInfo pParam);

@Override
public IntegerFormula toCodePoint(StringFormula number) {
return wrapInteger(toCodePoint(extractInfo(number)));
}

protected abstract TFormulaInfo toCodePoint(TFormulaInfo pParam);

@Override
public StringFormula fromCodePoint(IntegerFormula number) {
return wrapString(fromCodePoint(extractInfo(number)));
}

protected abstract TFormulaInfo fromCodePoint(TFormulaInfo pParam);
}
Original file line number Diff line number Diff line change
Expand Up @@ -354,4 +354,22 @@ public StringFormula toStringFormula(IntegerFormula number) {
debugging.addFormulaTerm(result);
return result;
}

@Override
public IntegerFormula toCodePoint(StringFormula str) {
debugging.assertThreadLocal();
debugging.assertFormulaInContext(str);
IntegerFormula result = delegate.toCodePoint(str);
debugging.addFormulaTerm(result);
return result;
}

@Override
public StringFormula fromCodePoint(IntegerFormula codepoint) {
debugging.assertThreadLocal();
debugging.assertFormulaInContext(codepoint);
StringFormula result = delegate.fromCodePoint(codepoint);
debugging.addFormulaTerm(result);
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -233,4 +233,16 @@ public StringFormula toStringFormula(IntegerFormula number) {
stats.stringOperations.getAndIncrement();
return delegate.toStringFormula(number);
}

@Override
public IntegerFormula toCodePoint(StringFormula str) {
stats.stringOperations.getAndIncrement();
return delegate.toCodePoint(str);
}

@Override
public StringFormula fromCodePoint(IntegerFormula codepoint) {
stats.stringOperations.getAndIncrement();
return delegate.fromCodePoint(codepoint);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -268,4 +268,18 @@ public StringFormula toStringFormula(IntegerFormula number) {
return delegate.toStringFormula(number);
}
}

@Override
public IntegerFormula toCodePoint(StringFormula str) {
synchronized (sync) {
return delegate.toCodePoint(str);
}
}

@Override
public StringFormula fromCodePoint(IntegerFormula codepoint) {
synchronized (sync) {
return delegate.fromCodePoint(codepoint);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static org.sosy_lab.java_smt.basicimpl.AbstractStringFormulaManager.unescapeUnicodeForSmtlib;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -511,6 +512,8 @@ private Expr normalize(Expr operator) {
.put(Kind.STRING_IN_REGEXP, FunctionDeclarationKind.STR_IN_RE)
.put(Kind.STRING_STOI, FunctionDeclarationKind.STR_TO_INT)
.put(Kind.STRING_ITOS, FunctionDeclarationKind.INT_TO_STR)
.put(Kind.STRING_TO_CODE, FunctionDeclarationKind.STR_TO_CODE)
.put(Kind.STRING_FROM_CODE, FunctionDeclarationKind.STR_FROM_CODE)
.put(Kind.STRING_LT, FunctionDeclarationKind.STR_LT)
.put(Kind.STRING_LEQ, FunctionDeclarationKind.STR_LE)
.put(Kind.REGEXP_PLUS, FunctionDeclarationKind.RE_PLUS)
Expand Down Expand Up @@ -609,7 +612,7 @@ public Object convertValue(Expr expForType, Expr value) {
return convertFloatingPoint(value);

} else if (valueType.isString()) {
return value.getConstString().toString();
return unescapeUnicodeForSmtlib(value.getConstString().toString());

} else {
// String serialization for unknown terms.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class CVC4StringFormulaManager extends AbstractStringFormulaManager<Expr, Type,
@Override
protected Expr makeStringImpl(String pValue) {
// The boolean enables escape characters!
return exprManager.mkConst(new CVC4String(pValue, true));
return exprManager.mkConst(new CVC4String(escapeUnicodeForSmtlib(pValue), true));
}

@Override
Expand Down Expand Up @@ -190,4 +190,14 @@ protected Expr toIntegerFormula(Expr pParam) {
protected Expr toStringFormula(Expr pParam) {
return exprManager.mkExpr(Kind.STRING_ITOS, pParam);
}

@Override
protected Expr toCodePoint(Expr pParam) {
return exprManager.mkExpr(Kind.STRING_TO_CODE, pParam);
}

@Override
protected Expr fromCodePoint(Expr pParam) {
return exprManager.mkExpr(Kind.STRING_FROM_CODE, pParam);
}
}
20 changes: 16 additions & 4 deletions src/org/sosy_lab/java_smt/solvers/cvc4/CVC4TheoremProver.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableList;
import edu.stanford.CVC4.Exception;
import edu.stanford.CVC4.Expr;
import edu.stanford.CVC4.ExprManager;
import edu.stanford.CVC4.ExprManagerMapCollection;
Expand Down Expand Up @@ -123,16 +124,25 @@ protected void popImpl() {
}

@Override
protected @Nullable Void addConstraintImpl(BooleanFormula pF) throws InterruptedException {
protected @Nullable Void addConstraintImpl(BooleanFormula pF)
throws InterruptedException, SolverException {
Preconditions.checkState(!closed);
setChanged();
Expr exp = creator.extractInfo(pF);
if (incremental) {
smtEngine.assertFormula(importExpr(exp));
assertFormula(pF);
}
return null;
}

private void assertFormula(BooleanFormula pF) throws SolverException {
try {
smtEngine.assertFormula(importExpr(creator.extractInfo(pF)));
} catch (Exception cvc4Exception) {
throw new SolverException(
String.format("CVC4 crashed while adding the constraint '%s'", pF), cvc4Exception);
}
}

@SuppressWarnings("resource")
@Override
public CVC4Model getModel() throws InterruptedException, SolverException {
Expand Down Expand Up @@ -188,7 +198,9 @@ public boolean isUnsat() throws InterruptedException, SolverException {
closeAllEvaluators();
changedSinceLastSatQuery = false;
if (!incremental) {
getAssertedFormulas().forEach(f -> smtEngine.assertFormula(creator.extractInfo(f)));
for (BooleanFormula f : getAssertedFormulas()) {
assertFormula(f);
}
}

Result result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,8 @@ private Term normalize(Term operator) {
.put(Kind.STRING_IN_REGEXP, FunctionDeclarationKind.STR_IN_RE)
.put(Kind.STRING_FROM_INT, FunctionDeclarationKind.INT_TO_STR)
.put(Kind.STRING_TO_INT, FunctionDeclarationKind.STR_TO_INT)
.put(Kind.STRING_TO_CODE, FunctionDeclarationKind.STR_TO_CODE)
.put(Kind.STRING_FROM_CODE, FunctionDeclarationKind.STR_FROM_CODE)
.put(Kind.STRING_LT, FunctionDeclarationKind.STR_LT)
.put(Kind.STRING_LEQ, FunctionDeclarationKind.STR_LE)
.put(Kind.REGEXP_PLUS, FunctionDeclarationKind.RE_PLUS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class CVC5StringFormulaManager extends AbstractStringFormulaManager<Term, Sort,

@Override
protected Term makeStringImpl(String pValue) {
return solver.mkString(pValue, true);
return solver.mkString(escapeUnicodeForSmtlib(pValue), true);
}

@Override
Expand Down Expand Up @@ -186,4 +186,14 @@ protected Term toStringFormula(Term pParam) {
"CVC5 only supports INT to STRING conversion.");
return solver.mkTerm(Kind.STRING_FROM_INT, pParam);
}

@Override
protected Term toCodePoint(Term pParam) {
return solver.mkTerm(Kind.STRING_TO_CODE, pParam);
}

@Override
protected Term fromCodePoint(Term pParam) {
return solver.mkTerm(Kind.STRING_FROM_CODE, pParam);
}
}
Loading