-
Notifications
You must be signed in to change notification settings - Fork 114
/
Copy pathverbal_expressions.py
124 lines (91 loc) · 3.09 KB
/
verbal_expressions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import re
def re_escape(fn):
def arg_escaped(this, *args, **kwargs):
t = [isinstance(a, VerEx) and a.s or re.escape(str(a)) for a in args]
return fn(this, *t, **kwargs)
return arg_escaped
def group(val, name=None):
prefix = '?P<{0}>'.format(name) if name else ''
return '(' + prefix + val + ')'
class VerEx(object):
'''
--- VerbalExpressions class ---
the following methods behave different from the original js lib!
- end_of_line
- start_of_line
- or
when you say you want `$`, `^` and `|`, we just insert it right there.
No other tricks.
And any string you inserted will be automatically grouped
except `tab` and `add`.
'''
def __init__(self):
self.s = ''
self.modifiers = {'I': 0, 'M': 0}
def __getattr__(self, attr):
''' any other function will be sent to the regex object '''
regex = self.regex()
return getattr(regex, attr)
def __str__(self):
return self.s
def add(self, value):
self.s += value
return self
def regex(self):
''' get a regular expression object. '''
return re.compile(self.s, self.modifiers['I'] | self.modifiers['M'])
compile = regex
def source(self):
''' return the raw string'''
return self.s
raw = value = source
# ---------------------------------------------
def anything(self, name=None):
return self.add(group('.*', name))
@re_escape
def anything_but(self, value):
return self.add(group('[^' + value + ']*'))
def end_of_line(self):
return self.add('$')
@re_escape
def maybe(self, value):
return self.add(group(value) + "?")
def start_of_line(self):
return self.add('^')
@re_escape
def find(self, value):
return self.add(group(value))
then = find
# special characters and groups
@re_escape
def any(self, value):
return self.add(group("[" + value + "]"))
any_of = any
def line_break(self):
return self.add(group(r"\n|(\r\n)"))
br = line_break
@re_escape
def range(self, *args):
from_tos = [args[i:i+2] for i in range(0, len(args), 2)]
return self.add(group("[" + ''.join(['-'.join(i) for i in from_tos]) + "]"))
def tab(self):
return self.add(r'\t')
def word(self, name=None):
return self.add(group(r"\w+", name))
def number(self, name=None):
return self.add(group(r"\d+", name))
def OR(self, value=None):
''' `or` is a python keyword so we use `OR` instead. '''
self.add("|")
return self.find(value) if value else self
def replace(self, string, repl):
return self.sub(repl, string)
# --------------- modifiers ------------------------
# no global option. It depends on which method
# you called on the regex object.
def with_any_case(self, value=False):
self.modifiers['I'] = re.I if value else 0
return self
def search_one_line(self, value=False):
self.modifiers['M'] = re.M if value else 0
return self