-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtokenizer.go
77 lines (62 loc) · 1.81 KB
/
tokenizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
package addy
import (
"strings"
)
// separators are the runes we will use to tokenize.
var separators = map[rune]bool{
' ': true,
',': true,
'\t': true,
}
// token represents a string defined between separators or a separator itself.
type token struct {
value string
isSeparator bool
}
// tokens are a token slice.
type tokens []*token
// join will construct a string from a slice of tokens.
func (t tokens) join() string {
var joined string
for _, tk := range t {
joined += tk.value
}
return joined
}
// tokenize will take an address string break it into string tokens
func tokenize(address string) (tokens tokens) {
address = strings.TrimSpace(address)
reader := strings.NewReader(address)
var (
currentTokenValue string
rn rune
err error
)
for {
// Read the next rune. If an error is returned, we've reached the end of the string.
// Add the remaining token to the slice of tokens if we have one and exit the tokenizing loop.
rn, _, err = reader.ReadRune()
if err != nil {
if len(currentTokenValue) != 0 {
tokens = append(tokens, &token{value: currentTokenValue})
}
break
}
// If it isn't a separator, add the rune to the current token and move to the next rune.
if !separators[rn] {
currentTokenValue += string(rn)
continue
}
// Current rune is a separator. Save and reset the token if we have one.
if len(currentTokenValue) != 0 {
tokens = append(tokens, &token{value: currentTokenValue})
currentTokenValue = ""
}
// Add the separator to the tokens list if the previous token wasn't the same separator.
// This helps to remove thing like duplicate spaces, etc.
if len(tokens) > 0 && tokens[len(tokens)-1].value != string(rn) {
tokens = append(tokens, &token{value: string(rn), isSeparator: true})
}
}
return
}