Skip to content

Commit 4620c32

Browse files
committed
change code unit width to 8
1 parent 5f3687a commit 4620c32

20 files changed

+259
-491
lines changed

charLowerUpper.go

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package regexp2
2+
3+
import "unicode"
4+
5+
func toLowerChar(ch byte) byte {
6+
return byte(unicode.ToLower(rune(ch)))
7+
}
8+
9+
func toUpperChar(ch byte) byte {
10+
return byte(unicode.ToUpper(rune(ch)))
11+
}

match.go

+12-12
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@ type Group struct {
4343
// Capture is a single capture of text within the larger original string
4444
type Capture struct {
4545
// the original string
46-
text []rune
47-
// Index is the position in the underlying rune slice where the first character of
48-
// captured substring was found. Even if you pass in a string this will be in Runes.
46+
text []byte
47+
// Index is the position in the underlying byte slice where the first character of
48+
// captured substring was found. Even if you pass in a string this will be in Bytes.
4949
Index int
50-
// Length is the number of runes in the captured substring.
50+
// Length is the number of bytes in the captured substring.
5151
Length int
5252
}
5353

@@ -56,12 +56,12 @@ func (c *Capture) String() string {
5656
return string(c.text[c.Index : c.Index+c.Length])
5757
}
5858

59-
// Runes returns the captured text as a rune slice
60-
func (c *Capture) Runes() []rune {
59+
// Bytes returns the captured text as a byte slice
60+
func (c *Capture) Bytes() []byte {
6161
return c.text[c.Index : c.Index+c.Length]
6262
}
6363

64-
func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
64+
func newMatch(regex *Regexp, capcount int, text []byte, startpos int) *Match {
6565
m := Match{
6666
regex: regex,
6767
matchcount: make([]int, capcount),
@@ -75,13 +75,13 @@ func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
7575
return &m
7676
}
7777

78-
func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
78+
func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []byte, startpos int) *Match {
7979
m := newMatch(regex, capcount, text, startpos)
8080
m.sparseCaps = caps
8181
return m
8282
}
8383

84-
func (m *Match) reset(text []rune, textstart int) {
84+
func (m *Match) reset(text []byte, textstart int) {
8585
m.text = text
8686
m.textstart = textstart
8787
for i := 0; i < len(m.matchcount); i++ {
@@ -288,11 +288,11 @@ func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
288288
last := index + matches[(c*2)-1]
289289

290290
for ; index < last; index++ {
291-
buf.WriteRune(m.text[index])
291+
buf.WriteByte(m.text[index])
292292
}
293293
}
294294

295-
func newGroup(name string, text []rune, caps []int, capcount int) Group {
295+
func newGroup(name string, text []byte, caps []int, capcount int) Group {
296296
g := Group{}
297297
g.text = text
298298
if capcount > 0 {
@@ -315,7 +315,7 @@ func newGroup(name string, text []rune, caps []int, capcount int) Group {
315315

316316
func (m *Match) dump() string {
317317
buf := &bytes.Buffer{}
318-
buf.WriteRune('\n')
318+
buf.WriteByte('\n')
319319
if len(m.sparseCaps) > 0 {
320320
for k, v := range m.sparseCaps {
321321
fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)

regexp.go

+22-22
Original file line numberDiff line numberDiff line change
@@ -178,12 +178,12 @@ func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, c
178178

179179
// FindStringMatch searches the input string for a Regexp match
180180
func (re *Regexp) FindStringMatch(s string) (*Match, error) {
181-
// convert string to runes
182-
return re.run(false, -1, getRunes(s))
181+
// convert string to bytes
182+
return re.run(false, -1, getBytes(s))
183183
}
184184

185-
// FindRunesMatch searches the input rune slice for a Regexp match
186-
func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) {
185+
// FindBytesMatch searches the input byte slice for a Regexp match
186+
func (re *Regexp) FindBytesMatch(r []byte) (*Match, error) {
187187
return re.run(false, -1, r)
188188
}
189189

@@ -192,17 +192,17 @@ func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, erro
192192
if startAt > len(s) {
193193
return nil, errors.New("startAt must be less than the length of the input string")
194194
}
195-
r, startAt := re.getRunesAndStart(s, startAt)
195+
r, startAt := re.getBytesAndStart(s, startAt)
196196
if startAt == -1 {
197197
// we didn't find our start index in the string -- that's a problem
198-
return nil, errors.New("startAt must align to the start of a valid rune in the input string")
198+
return nil, errors.New("startAt must align to the start of a valid byte in the input string")
199199
}
200200

201201
return re.run(false, startAt, r)
202202
}
203203

204-
// FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index
205-
func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) {
204+
// FindBytesMatchStartingAt searches the input byte slice for a Regexp match starting at the startAt index
205+
func (re *Regexp) FindBytesMatchStartingAt(r []byte, startAt int) (*Match, error) {
206206
return re.run(false, startAt, r)
207207
}
208208

@@ -233,44 +233,44 @@ func (re *Regexp) FindNextMatch(m *Match) (*Match, error) {
233233
// MatchString return true if the string matches the regex
234234
// error will be set if a timeout occurs
235235
func (re *Regexp) MatchString(s string) (bool, error) {
236-
m, err := re.run(true, -1, getRunes(s))
236+
m, err := re.run(true, -1, getBytes(s))
237237
if err != nil {
238238
return false, err
239239
}
240240
return m != nil, nil
241241
}
242242

243-
func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) {
243+
func (re *Regexp) getBytesAndStart(s string, startAt int) ([]byte, int) {
244244
if startAt < 0 {
245245
if re.RightToLeft() {
246-
r := getRunes(s)
246+
r := getBytes(s)
247247
return r, len(r)
248248
}
249-
return getRunes(s), 0
249+
return getBytes(s), 0
250250
}
251-
ret := make([]rune, len(s))
251+
ret := make([]byte, len(s))
252252
i := 0
253-
runeIdx := -1
254-
for strIdx, r := range s {
253+
byteIdx := -1
254+
for strIdx, r := range []byte(s) {
255255
if strIdx == startAt {
256-
runeIdx = i
256+
byteIdx = i
257257
}
258258
ret[i] = r
259259
i++
260260
}
261261
if startAt == len(s) {
262-
runeIdx = i
262+
byteIdx = i
263263
}
264-
return ret[:i], runeIdx
264+
return ret[:i], byteIdx
265265
}
266266

267-
func getRunes(s string) []rune {
268-
return []rune(s)
267+
func getBytes(s string) []byte {
268+
return []byte(s)
269269
}
270270

271-
// MatchRunes return true if the runes matches the regex
271+
// MatchBytes return true if the bytes matches the regex
272272
// error will be set if a timeout occurs
273-
func (re *Regexp) MatchRunes(r []rune) (bool, error) {
273+
func (re *Regexp) MatchBytes(r []byte) (bool, error) {
274274
m, err := re.run(true, -1, r)
275275
if err != nil {
276276
return false, err

regexp_pcre_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -268,14 +268,14 @@ func containsEnder(line string, ender byte, allowFirst bool) bool {
268268
}
269269

270270
func unEscapeToMatch(line string) string {
271-
idx := strings.IndexRune(line, '\\')
271+
idx := strings.IndexByte(line, '\\')
272272
// no slashes means no unescape needed
273273
if idx == -1 {
274274
return line
275275
}
276276

277277
buf := bytes.NewBufferString(line[:idx])
278-
// get the runes for the rest of the string -- we're going full parser scan on this
278+
// get the bytes for the rest of the string -- we're going full parser scan on this
279279

280280
inEscape := false
281281
// take any \'s and convert them

regexp_performance_test.go

+13-13
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,17 @@ func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
8686
b.StopTimer()
8787

8888
data := "abcdefghijklmnopqrstuvwxyz"
89-
x := make([]rune, 32768*len(data))
89+
x := make([]byte, 32768*len(data))
9090
for i := 0; i < 32768; /*(2^15)*/ i++ {
9191
for j := 0; j < len(data); j++ {
92-
x[i*len(data)+j] = rune(data[j])
92+
x[i*len(data)+j] = byte(data[j])
9393
}
9494
}
9595

9696
re := MustCompile("^zbc(d|e)", 0)
9797
b.StartTimer()
9898
for i := 0; i < b.N; i++ {
99-
if m, err := re.MatchRunes(x); m || err != nil {
99+
if m, err := re.MatchBytes(x); m || err != nil {
100100
b.Fatalf("unexpected match or error! %v", err)
101101
}
102102
}
@@ -117,17 +117,17 @@ func BenchmarkAnchoredShortMatch(b *testing.B) {
117117
func BenchmarkAnchoredLongMatch(b *testing.B) {
118118
b.StopTimer()
119119
data := "abcdefghijklmnopqrstuvwxyz"
120-
x := make([]rune, 32768*len(data))
120+
x := make([]byte, 32768*len(data))
121121
for i := 0; i < 32768; /*(2^15)*/ i++ {
122122
for j := 0; j < len(data); j++ {
123-
x[i*len(data)+j] = rune(data[j])
123+
x[i*len(data)+j] = byte(data[j])
124124
}
125125
}
126126

127127
re := MustCompile("^.bc(d|e)", 0)
128128
b.StartTimer()
129129
for i := 0; i < b.N; i++ {
130-
if m, err := re.MatchRunes(x); !m || err != nil {
130+
if m, err := re.MatchBytes(x); !m || err != nil {
131131
b.Fatalf("no match or error! %v", err)
132132
}
133133
}
@@ -205,13 +205,13 @@ func BenchmarkOnePassLongNotPrefix(b *testing.B) {
205205
}
206206
}
207207

208-
var text []rune
208+
var text []byte
209209

210-
func makeText(n int) []rune {
210+
func makeText(n int) []byte {
211211
if len(text) >= n {
212212
return text[:n]
213213
}
214-
text = make([]rune, n)
214+
text = make([]byte, n)
215215
x := ^uint32(0)
216216
for i := range text {
217217
x += x
@@ -222,7 +222,7 @@ func makeText(n int) []rune {
222222
if x%31 == 0 {
223223
text[i] = '\n'
224224
} else {
225-
text[i] = rune(x%(0x7E+1-0x20) + 0x20)
225+
text[i] = byte(x%(0x7E+1-0x20) + 0x20)
226226
}
227227
}
228228
return text
@@ -234,7 +234,7 @@ func benchmark(b *testing.B, re string, n int) {
234234
b.ResetTimer()
235235
b.SetBytes(int64(n))
236236
for i := 0; i < b.N; i++ {
237-
if m, err := r.MatchRunes(t); m {
237+
if m, err := r.MatchBytes(t); m {
238238
b.Fatal("match!")
239239
} else if err != nil {
240240
b.Fatalf("Err %v", err)
@@ -301,7 +301,7 @@ func BenchmarkLeading(b *testing.B) {
301301
inp := makeText(1000000)
302302
b.StartTimer()
303303
for i := 0; i < b.N; i++ {
304-
if m, err := r.MatchRunes(inp); !m {
304+
if m, err := r.MatchBytes(inp); !m {
305305
b.Errorf("Expected match")
306306
} else if err != nil {
307307
b.Errorf("Error: %v", err)
@@ -331,7 +331,7 @@ func BenchmarkShortSearch(b *testing.B) {
331331
t := makeText(100)
332332
b.SetBytes(int64(len(t)))
333333
matchOnce := func(r *Regexp) {
334-
if m, err := r.MatchRunes(t); m {
334+
if m, err := r.MatchBytes(t); m {
335335
b.Fatal("match!")
336336
} else if err != nil {
337337
b.Fatalf("Err %v", err)

regexp_re2_test.go

-38
Original file line numberDiff line numberDiff line change
@@ -122,33 +122,6 @@ func TestRE2Dollar_Multiline(t *testing.T) {
122122
}
123123
}
124124

125-
func TestRE2ExtendedZero(t *testing.T) {
126-
notZero := "߀" // \u07c0
127-
r := MustCompile(`^\d$`, RE2)
128-
if m, _ := r.MatchString(notZero); m {
129-
t.Fatal("Expected no match")
130-
}
131-
132-
r = MustCompile(`^\D$`, RE2)
133-
if m, _ := r.MatchString(notZero); !m {
134-
t.Fatal("Expected match")
135-
}
136-
}
137-
138-
func TestRegularExtendedZero(t *testing.T) {
139-
notZero := "߀" // \u07c0
140-
141-
r := MustCompile(`^\d$`, 0)
142-
if m, _ := r.MatchString(notZero); !m {
143-
t.Fatal("Expected match")
144-
}
145-
146-
r = MustCompile(`^\D$`, 0)
147-
if m, _ := r.MatchString(notZero); m {
148-
t.Fatal("Expected no match")
149-
}
150-
}
151-
152125
func TestRE2Word(t *testing.T) {
153126
r := MustCompile(`\w`, RE2)
154127
if m, _ := r.MatchString("å"); m {
@@ -162,17 +135,6 @@ func TestRE2Word(t *testing.T) {
162135

163136
}
164137

165-
func TestRegularWord(t *testing.T) {
166-
r := MustCompile(`\w`, 0)
167-
if m, _ := r.MatchString("å"); !m {
168-
t.Fatal("Expected match")
169-
}
170-
r = MustCompile(`\W`, 0)
171-
if m, _ := r.MatchString("å"); m {
172-
t.Fatal("Expected no match")
173-
}
174-
}
175-
176138
func TestRE2Space(t *testing.T) {
177139
r := MustCompile(`\s`, RE2)
178140
if m, _ := r.MatchString("\x0b"); m {

0 commit comments

Comments
 (0)