-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcsvunmerge
executable file
·81 lines (67 loc) · 2.38 KB
/
csvunmerge
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/python3
import csv
import sys
import argparse
import re
from collections import deque
def main():
args = parse_args()
p = Processor(args)
p.process()
def parse_args():
p = argparse.ArgumentParser()
p.add_argument('file', metavar='FILE', type=argparse.FileType('r'), nargs='?',
default=sys.stdin,
help='input filename (default to STDIN)')
p.add_argument('-u', '--unmerge', metavar='COL', dest='unmerge_index',
type=int, required=True,
help='column index to be parsed (unmerged)')
p.add_argument('-l', '--left-paren', default=' (',
help='left parenthesis (default to " (")')
p.add_argument('-r', '--right-paren', default=')',
help='right parenthesis (default to ")")')
p.add_argument('-c', '--comma', default=', ',
help='comma (default to ", ")')
p.add_argument('-t', '--trim', action='store_true',
help='trim parsed (unmerged) columns')
return p.parse_args()
class Processor(object):
def __init__(self, args):
for k in ['file', 'unmerge_index', 'left_paren', 'comma', 'right_paren',
'trim']:
setattr(self, k, getattr(args, k))
self.extracted_col_rows_p = re.compile('({}|{}|{})'.format(re.escape(
self.left_paren), re.escape(self.comma), re.escape(self.right_paren)))
def process(self):
reader = csv.reader(self.file)
result = []
for row in reader:
extracted = self.extracted_col_rows(row[self.unmerge_index])
new_rows = self.stamped_rows(row, self.unmerge_index, extracted)
result.extend(new_rows)
writer = csv.writer(sys.stdout)
writer.writerows(result)
def stamped_rows(self, mail_row, index, label_rows):
"""Duplicate mail for each label, stamp each label at index"""
return (mail_row[:index] + label_row + mail_row[index+1:]
for label_row in label_rows)
def extracted_col_rows(self, s):
"""Extract (parse) a column into rows of columns"""
tokens = re.split(self.extracted_col_rows_p, s + self.right_paren)
stack = deque()
prev = None
for token in tokens:
if token == self.left_paren:
prev = token
elif token == self.comma or token == self.right_paren:
if prev == 0:
row = list(stack)
yield row
stack.pop()
prev = token
elif prev != self.right_paren:
stack.append(token.strip() if self.trim else token)
prev = 0
if __name__ == '__main__':
main()
# fikr4n 2015