-
Notifications
You must be signed in to change notification settings - Fork 114
/
Copy pathverbal_expressions.py
122 lines (90 loc) · 2.93 KB
/
verbal_expressions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import re
def re_escape(fn):
def arg_escaped(this, *args):
t = [isinstance(a, VerEx) and a.s or re.escape(str(a)) for a in args]
return fn(this, *t)
return arg_escaped
class VerEx(object):
'''
--- VerbalExpressions class ---
the following methods behave different from the original js lib!
- end_of_line
- start_of_line
- or
when you say you want `$`, `^` and `|`, we just insert it right there.
No other tricks.
And any string you inserted will be automatically grouped
except `tab` and `add`.
'''
def __init__(self):
self.s = ''
self.modifiers = {'I': 0, 'M': 0}
def __getattr__(self, attr):
''' any other function will be sent to the regex object '''
regex = self.regex()
return getattr(regex, attr)
def __str__(self):
return self.s
def add(self, value):
self.s += value
return self
def regex(self):
''' get a regular expression object. '''
return re.compile(self.s, self.modifiers['I'] | self.modifiers['M'])
compile = regex
def source(self):
''' return the raw string'''
return self.s
raw = value = source
# ---------------------------------------------
def anything(self):
return self.add('(.*)')
@re_escape
def anything_but(self, value):
return self.add('([^' + value + ']*)')
def end_of_line(self):
return self.add('$')
@re_escape
def maybe(self, value):
return self.add("(" + value + ")?")
def start_of_line(self):
return self.add('^')
@re_escape
def find(self, value):
return self.add('(' + value + ')')
then = find
# special characters and groups
@re_escape
def any(self, value):
return self.add("([" + value + "])")
any_of = any
def line_break(self):
return self.add(r"(\n|(\r\n))")
br = line_break
@re_escape
def range(self, *args):
from_tos = [args[i:i+2] for i in range(0, len(args), 2)]
return self.add("([" + ''.join(['-'.join(i) for i in from_tos]) + "])")
def tab(self):
return self.add(r'\t')
def word(self):
return self.add(r"(\w+)")
def number(self):
return self.add(r"([1-9]\d*)")
def one_character(self):
return self.add(r"(.)")
def OR(self, value=None):
''' `or` is a python keyword so we use `OR` instead. '''
self.add("|")
return self.find(value) if value else self
def replace(self, string, repl):
return self.sub(repl, string)
# --------------- modifiers ------------------------
# no global option. It depends on which method
# you called on the regex object.
def with_any_case(self, value=False):
self.modifiers['I'] = re.I if value else 0
return self
def search_one_line(self, value=False):
self.modifiers['M'] = re.M if value else 0
return self