-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathStringFormulaManager.java
257 lines (212 loc) · 9.42 KB
/
StringFormulaManager.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
// This file is part of JavaSMT,
// an API wrapper for a collection of SMT solvers:
// https://github.com/sosy-lab/java-smt
//
// SPDX-FileCopyrightText: 2021 Alejandro Serrano Mena
//
// SPDX-License-Identifier: Apache-2.0
package org.sosy_lab.java_smt.api;
import com.google.common.base.Preconditions;
import java.util.Arrays;
import java.util.List;
import org.sosy_lab.java_smt.api.NumeralFormula.IntegerFormula;
import org.sosy_lab.java_smt.basicimpl.AbstractStringFormulaManager;
/**
* Manager for dealing with string formulas. Functions come from <a
* href="http://smtlib.cs.uiowa.edu/theories-UnicodeStrings.shtml">String theory in SMTLIB</a>.
*/
public interface StringFormulaManager {
/**
* Creates a {@link StringFormula} representing the given constant String.
*
* <p>The String argument is expected to be a regular Java String and may contain Unicode
* characters from the first 3 planes (codepoints 0x00000-0x2FFFFF). Higher codepoints are not
* allowed due to limitations in SMTLIB. We do not support SMTLIB escape sequences in this method,
* and String like <code>"\\u{abc}"</code> are read verbatim and are not substituted with their
* Unicode character. If you still want to use SMTLIB Strings with this method, the function
* {@link AbstractStringFormulaManager#unescapeUnicodeForSmtlib(String)} can be used to handle the
* conversion before calling this method. Note that you may then also have to use {@link
* AbstractStringFormulaManager#escapeUnicodeForSmtlib(String)} to convert Strings from the model
* back to a format that is compatible with other SMTLIB based solvers.
*
* @param value the string value the returned {@link StringFormula} should represent
* @return a {@link StringFormula} representing the given value
*/
StringFormula makeString(String value);
/**
* Creates a variable of type String with exactly the given name.
*
* <p>This variable (symbol) represents a "String" for which the SMT solver needs to find a model.
*
* <p>Please make sure that the given name is valid in SMT-LIB2. Take a look at {@link
* FormulaManager#isValidName} for further information.
*
* <p>This method does not quote or unquote the given name, but uses the plain name "AS IS".
* {@link Formula#toString} can return a different String than the given one.
*/
StringFormula makeVariable(String pVar);
// TODO: There is currently no way to use variables of type "Regex", i.e., that represent full
// regular expression for which the SMT solver need to find a model.
// The reason for this is that the current SMT solvers do not support this feature.
// However, we can build a RegexFormula from basic parts like Strings (including
// variables of type String) and operations like range, union, or complement.
BooleanFormula equal(StringFormula str1, StringFormula str2);
BooleanFormula greaterThan(StringFormula str1, StringFormula str2);
BooleanFormula greaterOrEquals(StringFormula str1, StringFormula str2);
BooleanFormula lessThan(StringFormula str1, StringFormula str2);
BooleanFormula lessOrEquals(StringFormula str1, StringFormula str2);
/** Check whether the given prefix is a real prefix of str. */
BooleanFormula prefix(StringFormula prefix, StringFormula str);
/** Check whether the given suffix is a real suffix of str. */
BooleanFormula suffix(StringFormula suffix, StringFormula str);
BooleanFormula contains(StringFormula str, StringFormula part);
/**
* Get the first index for a substring in a String, or -1 if the substring is not found.
* startIndex (inlcuded) denotes the start of the search for the index.
*/
IntegerFormula indexOf(StringFormula str, StringFormula part, IntegerFormula startIndex);
/**
* Get a substring of length 1 from the given String if the given index is within bounds.
* Otherwise, returns an empty string.
*/
StringFormula charAt(StringFormula str, IntegerFormula index);
/**
* Get a substring from the given String.
*
* <p>The result is underspecified, if the start index is out of bounds for the given String or if
* the requested length is negative. The length of the result is the minimum of the requested
* length and the remaining length of the given String.
*/
StringFormula substring(StringFormula str, IntegerFormula index, IntegerFormula length);
/** Replace the first appearances of target in fullStr with the replacement. */
StringFormula replace(StringFormula fullStr, StringFormula target, StringFormula replacement);
/** Replace all appearances of target in fullStr with the replacement. */
StringFormula replaceAll(StringFormula fullStr, StringFormula target, StringFormula replacement);
IntegerFormula length(StringFormula str);
default StringFormula concat(StringFormula... parts) {
return concat(Arrays.asList(parts));
}
StringFormula concat(List<StringFormula> parts);
/**
* @param str formula representing the string to match
* @param regex formula representing the regular expression
* @return a formula representing the acceptance of the string by the regular expression
*/
BooleanFormula in(StringFormula str, RegexFormula regex);
/**
* Returns a {@link RegexFormula} representing the given constant.
*
* <p>This method does not parse an existing regex from String, but uses the String directly as a
* constant.
*
* @param value the regular expression the returned <code>Formula</code> should represent
*/
RegexFormula makeRegex(String value);
// basic regex operations
/**
* @return formula denoting the empty set of strings
*/
RegexFormula none();
/**
* Note: The size of the used alphabet depends on the underlying SMT solver.
*
* @return formula denoting the set of all strings, also known as Regex <code>".*"</code>.
*/
RegexFormula all();
/**
* Note: The size of the used alphabet depends on the underlying SMT solver.
*
* @return formula denoting the set of all strings of length 1, also known as DOT operator which
* represents one arbitrary char, or as Regex <code>"."</code>.
*/
RegexFormula allChar();
/**
* @return formula denoting the range regular expression over two sequences of length 1.
*/
RegexFormula range(StringFormula start, StringFormula end);
/**
* @return formula denoting the range regular expression over two chars.
* @see #range(StringFormula, StringFormula)
*/
default RegexFormula range(char start, char end) {
Preconditions.checkArgument(
start <= end,
"Range from start '%s' (%s) to end '%s' (%s) is empty.",
start,
(int) start,
end,
(int) end);
return range(makeString(String.valueOf(start)), makeString(String.valueOf(end)));
}
/**
* @return formula denoting the concatenation
*/
default RegexFormula concat(RegexFormula... parts) {
return concatRegex(Arrays.asList(parts));
}
/**
* @return formula denoting the concatenation
*/
// TODO the naming of this function collides with #concat(List<StringFormula>).
// Maybe we should split String and Regex manager.
RegexFormula concatRegex(List<RegexFormula> parts);
/**
* @return formula denoting the union
*/
RegexFormula union(RegexFormula regex1, RegexFormula regex2);
/**
* @return formula denoting the intersection
*/
RegexFormula intersection(RegexFormula regex1, RegexFormula regex2);
/**
* @return formula denoting the Kleene closure
*/
RegexFormula complement(RegexFormula regex);
/**
* @return formula denoting the Kleene closure (0 or more), also known as STAR operand.
*/
RegexFormula closure(RegexFormula regex);
// derived regex operations
/**
* @return formula denoting the difference
*/
RegexFormula difference(RegexFormula regex1, RegexFormula regex2);
/**
* @return formula denoting the Kleene cross (1 or more), also known as PLUS operand.
*/
RegexFormula cross(RegexFormula regex);
/**
* @return formula denoting the optionality, also known as QUESTIONMARK operand.
*/
RegexFormula optional(RegexFormula regex);
/**
* @return formula denoting the concatenation n times
*/
RegexFormula times(RegexFormula regex, int repetitions);
/**
* Interpret a String formula as an Integer formula.
*
* <p>The number is interpreted in base 10 and has no leading zeros. The method works as expected
* for positive numbers, including zero. It returns the constant value of <code>-1</code> for
* negative numbers or invalid number representations, for example if any char is no digit.
*/
IntegerFormula toIntegerFormula(StringFormula str);
/**
* Interpret an Integer formula as a String formula.
*
* <p>The number is in base 10. The method works as expected for positive numbers, including zero.
* It returns the empty string <code>""</code> for negative numbers.
*/
StringFormula toStringFormula(IntegerFormula number);
/**
* Returns an Integer formula representing the code point of the only character of the given
* String formula, if it represents a single character. Otherwise, returns -1.
*/
IntegerFormula toCodePoint(StringFormula str);
/**
* Returns a String formula representing the single character with the given code point, if it is
* a valid Unicode code point within the Basic Multilingual Plane (BMP) or planes 1 and 2
* (codepoints in range [0x00000, 0x2FFFF]). Otherwise, returns the empty string.
*/
StringFormula fromCodePoint(IntegerFormula codePoint);
}