-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpsyh.py
389 lines (304 loc) · 13 KB
/
psyh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
#!/usr/bin/env python
# psyh. Copyright (C) 2017 Yuval Sedan. Use, distribution and/or modification
# of this program are only permitted under the terms specified in the LICENSE
# file which should be included along with this program.
import re, sys, traceback
import collections, itertools
import regex
class RegexpMatcher(object):
class Engine(object):
RE = 1
REGEX = 2
def __init__(self, engine, ignore_case=False, only_matching=False, line_match=False, extra_flags=0):
if only_matching:
self.match = self.match_only_matching
else:
self.match = self.match_normal
self.line_match = line_match
if engine == RegexpMatcher.Engine.RE:
self.re_module = re
elif engine == RegexpMatcher.Engine.REGEX:
self.re_module = regex
self.flags = extra_flags
if ignore_case:
self.flags |= self.re_module.IGNORECASE
def set_patterns(self, patterns):
if self.line_match:
patterns = ['^(' + pattern + ')$' for pattern in patterns]
big_pattern = '(' + '|'.join(patterns) + ')'
self.re = self.re_module.compile(big_pattern, self.flags)
def match_only_matching(self, s):
matches = [ match.group(0) for match in self.re.finditer(s) ]
return bool(matches), matches
def match_normal(self, s):
return self.re.search(s) is not None, [s]
class BasicMatcher(RegexpMatcher):
def __init__(self, *args, **kwargs):
raise NotImplementedException('BRE matcher is not implemented')
class PcreMatcher(RegexpMatcher):
def __init__(self, *args, **kwargs):
super(__class__, self).__init__(*args, engine=RegexpMatcher.Engine.RE, **kwargs)
class PosixMatcher(RegexpMatcher):
def __init__(self, *args, **kwargs):
super(__class__, self).__init__(*args, engine=RegexpMatcher.Engine.REGEX, extra_flags=regex.POSIX, **kwargs)
class FixedStringsMatcher(PcreMatcher):
def set_patterns(self, patterns):
patterns = [ re.escape(pattern) for pattern in patterns ]
super(__class__, self).set_patterns(patterns)
def grep(matcher=None, pattern_files=[], patterns=[], invert_match=False, max_count=None, yield_counts=False, before_context=0, after_context=0, inputs=[]):
if yield_counts and (after_context or before_context):
raise ValueError('Cannot specify after_context or before_context when yield_counts=True')
for pattern_file in pattern_files:
for line in pattern_file:
patterns.append(line.rstrip('\r\n'))
matcher.set_patterns(patterns)
empty_tuple = tuple()
for input_sequence in inputs:
count = 0
line_number = 0
before_context_lines = collections.deque([], before_context) if before_context > 0 else empty_tuple
after_context_lines = collections.deque([], after_context) if after_context > 0 else empty_tuple
if after_context > 0:
# read after_context lines ahead
for line in input_sequence:
# line should be ready te be yielded
line = line.rstrip('\n')
after_context_lines.append(line)
if len(after_context_lines) == after_context:
break
consuming_after_context_lines = False
after_context_offset = 0
# NOTE : we rely on itertools.chain() to get the iterator for
# after_context_lines only after its done with input_sequence
for line in itertools.chain(input_sequence, after_context_lines):
line_number += 1
if after_context > 0:
if not consuming_after_context_lines and line is after_context_lines[0]:
consuming_after_context_lines = True
if consuming_after_context_lines:
# the after_context is getting thinner now that we're done with the file
after_context_offset += 1
else:
# place line in queue and take a previously read line
prev_line = after_context_lines.popleft()
after_context_lines.append(line.rstrip('\n'))
line = prev_line
else:
line = line.rstrip('\n')
matched, matches = matcher.match(line)
if matched != invert_match: # != acts as xor
if not yield_counts:
yield input_sequence, line_number, matches, tuple(before_context_lines) if before_context > 0 else empty_tuple, tuple(after_context_lines)[after_context_offset:] if after_context > 0 else empty_tuple
count += 1
if max_count:
if count == max_count:
break # next file
if before_context > 0:
before_context_lines.append(line)
if yield_counts:
yield input_sequence, count
def file_generator(filenames, mode='r', std_hypens=True, exc_handler=None, newline=None):
if std_hypens and mode not in ('r', 'w'):
raise ValueError('Cannot use std_hypens=True with a mode other than "r" or "w"')
for filename in filenames:
if std_hypens and filename == '-':
if mode == 'r':
yield sys.stdin
elif mode == 'w':
yield sys.stdout
continue
try:
with open(filename, mode, newline=newline) as file_obj:
yield file_obj
except IOError as e:
if exc_handler is not None:
exc_handler(e)
def grep_sh(argv=None):
import argparse
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('pattern', metavar='PATTERN', nargs='?')
parser.add_argument('input_files', metavar='FILE', nargs='*')
parser.add_argument('-e', '--regexp', metavar='PATTERN', action='append', dest='patterns')
parser.add_argument('-f', '--file', metavar='FILE', action='append', dest='pattern_files')
parser.add_argument('-i', '--ignore-case', action='store_true')
parser.add_argument('-v', '--invert-match', action='store_true')
parser.add_argument('-o', '--only-matching', action='store_true')
parser.add_argument('-x', '--line-regexp', action='store_true', dest='line_match')
parser.add_argument('-c', '--count', action='store_true')
parser.add_argument('-m', '--max-count', type=int, metavar='NUM')
parser.add_argument('-q', '--quiet', '--silent', dest='quiet', action='store_true')
parser.add_argument('-s', '--no-messages', action='store_true')
parser.add_argument('-A', '--after-context', metavar='NUM', type=int, default=0)
parser.add_argument('-B', '--before-context', metavar='NUM', type=int, default=0)
parser.add_argument('-C', '--context', metavar='NUM', type=int)
parser.add_argument('-H', '--with-filename', action='store_const', dest='show_filename', const=True)
parser.add_argument('-h', '--no-filename', action='store_const', dest='show_filename', const=False)
parser.add_argument('-L', '--files-without-match', action='store_const', dest='files_matching', const=False)
parser.add_argument('-l', '--files-with-matches', action='store_const', dest='files_matching', const=True)
parser.add_argument('--label', default='(standard input)')
parser.add_argument('-n', '--line-number', action='store_true')
parser.add_argument('--help', action='help')
group = parser.add_mutually_exclusive_group()
group.add_argument('-F', '--fixed-strings', action='store_true')
group.add_argument('-P', '--perl-regexp', action='store_true')
group.add_argument('-E', '--extended-regexp', action='store_true')
group.add_argument('-G', '--basic-regexp', action='store_true')
args = parser.parse_args(args=argv)
#print(args)
if args.pattern is None:
if args.patterns is None:
parser.error('Must specify at least one pattern')
else:
if args.patterns is None:
args.patterns = [args.pattern]
else:
# patterns were speficied with -e/--pattern; positional arguments
# should all be interepreted as filenames
args.input_files.append(args.pattern)
args.pattern = None
matcher_type = BasicMatcher
if args.perl_regexp:
matcher_type = PcreMatcher
elif args.extended_regexp:
matcher_type = PosixMatcher
elif args.fixed_strings:
matcher_type = FixedStringsMatcher
if not args.input_files:
# default to standard input
args.input_files.append('-')
if args.pattern_files is None:
args.pattern_files = []
if args.files_matching is not None:
args.max_count = 1
args.count = False
if args.quiet:
args.count = False
args.files_matching = None
args.max_count = 1
yield_counts = args.count or args.files_matching is not None or args.quiet
if yield_counts and (args.show_filename is not None or args.line_number):
parser.error('Cannot use -H (--with-filename) or -n (--line-number) in conjunction with -c (--count), -L (--files-without-match), -l (--files-with-matches) or -q (--quiet or --silent).')
if args.show_filename is None and not yield_counts:
args.show_filename = len(args.input_files) > 1
some_line_matched = False
errors = []
def file_exc_handler(e):
if not (args.no_messages or args.quiet):
traceback.print_exc()
errors.append(e)
if args.context:
if not args.before_context:
args.before_context = args.context
if not args.after_context:
args.after_context = args.context
if args.only_matching and (args.after_context or args.before_context):
parser.error('Cannot specify -o (--only-matching) together with context lines.')
if args.only_matching and args.invert_match:
parser.error('Cannot specify -o (--only-matching) together with -v (--invert-match).')
#print(args)
count = 0
save_after_context_lines = None
last_line_number = 0
last_input_file = None
def get_filename(input_file):
if input_file is sys.stdin:
return args.label
else:
return input_file.name
def print_results(results, input_file, sep, first_line_number, lines):
filename = get_filename(input_file)
line_number = first_line_number
for line in results:
if args.line_number:
line = str(line_number) + sep + line
if args.show_filename:
line = filename + sep + line
print(line)
if lines:
line_number += 1
# cache these vars
before_context = args.before_context
after_context = args.after_context
any_context = after_context > 0 or before_context > 0
# specifying newline='\n' is critical, otherwise python replaces '\r\n' with '\n'
for match_tuple in grep(
matcher=matcher_type(
ignore_case=args.ignore_case,
only_matching=args.only_matching,
line_match=args.line_match
),
patterns=args.patterns,
invert_match=args.invert_match,
max_count=args.max_count,
before_context=before_context,
after_context=after_context,
yield_counts=yield_counts,
pattern_files=file_generator(args.pattern_files, exc_handler=file_exc_handler, newline='\n'),
inputs=file_generator(args.input_files, exc_handler=file_exc_handler, newline='\n')
):
if yield_counts:
input_file, count = match_tuple
else:
input_file, line_number, matches, before_context_lines, after_context_lines = match_tuple
some_line_matched = not yield_counts or count > 0
if args.quiet:
if some_line_matched:
break
continue
if args.files_matching is not None:
has_match = (count == 1)
# if we want matching files and the files has a match
# or if we want non-matching files and the file has no match
if has_match == args.files_matching:
print(get_filename(input_file))
continue
if yield_counts:
result = [str(count)] # result is actually the count, as an integer
else:
result = matches
# The following bit handles the logic for overlapping before_context and after_context lines.
# We never display the after_context_lines immediately. Instead we save them in save_after_context_lines,
# and before each match (or after the very last match for each file)
# The possible cases are: (m=match, x=some line cached in either before_context_lines or save_after_context_lines)
# ..mxxx..xxxm.. num_lines_before > before_context + after_context
# ..mxxxxxm.. num_lines_before <= before_context + after_context
# ..mxxm.. num_lines_before <= before_context
# ..mm.. num_lines_before == 0
if any_context:
if save_after_context_lines is not None and input_file is not last_input_file:
print_results(save_after_context_lines, last_input_file, '-', last_line_number + 1, lines=True)
save_after_context_lines = None
last_line_number = 0
print('--') # between files
num_lines_before = line_number - last_line_number - 1
if num_lines_before > 0:
# this asks: should we use the save_after_context_lines lines at all?
if num_lines_before > before_context and save_after_context_lines is not None:
save_after_context_lines = save_after_context_lines[:num_lines_before - before_context]
print_results(save_after_context_lines, input_file, '-', last_line_number + 1, lines=True)
if num_lines_before > before_context + after_context and last_line_number > 0:
print('--')
# for the calculation of line number is before_context_lines we need to make
# sure we hold the accurate (and not exccess) count of lines that will be printed
if num_lines_before > before_context:
num_lines_before = before_context
print_results(before_context_lines[-num_lines_before:], input_file, '-', line_number - num_lines_before, lines=True)
print_results(result, input_file, ':', line_number, lines=False)
save_after_context_lines = after_context_lines
last_line_number = line_number
last_input_file = input_file
if save_after_context_lines:
print_results(save_after_context_lines, last_input_file, '-', line_number + 1, lines=True)
if errors:
return 2
if not some_line_matched:
return 1
return 0
if __name__ == '__main__':
# TODO : make this nicer
if len(sys.argv) <= 1:
raise Exception('Must specify command (options: grep)')
if sys.argv[1] == 'grep':
sys.exit(grep_sh(sys.argv[2:]))
else:
raise Exception('Invalid command: %s' % (sys.argv[1],))