2
2
3
3
corestring = """Extracted Annotations (7/13/2018, 9:14:41 PM)
4
4
sample notes extracted from PDF
5
- #g asd asd asd g#
5
+ #g eita ek
6
+ line g#
6
7
#b asd asd
7
8
asd b#
8
9
#p asd asd asd p#
9
10
#i asd asd asd i#
10
11
#g asd asd asd g#
11
12
#b asd asd asd b#
12
- #p asd asd
13
- asd p#
13
+ #p point point
14
+ point p#
14
15
15
16
"""
16
17
@@ -26,7 +27,7 @@ def __count_all_tags(text: str) -> int:
26
27
__count_tags (text , "#p" , "p#" ) + __count_tags (text , "#c" , "c#" )
27
28
28
29
29
- def __find_tag (text : str ):
30
+ def __find_start_tag_in_line (text : str ):
30
31
if list (tag_pairs )[0 ] in text :
31
32
return list (tag_pairs )[0 ]
32
33
if list (tag_pairs )[1 ] in text :
@@ -37,50 +38,41 @@ def __find_tag(text: str):
37
38
return list (tag_pairs )[3 ]
38
39
39
40
40
- # all_lines: List[str] = corestring.splitlines()
41
-
42
- # good_points: List[str] = []
43
- # bad_points: List[str] = []
44
- # comments: List[str] = []
45
- # i_points: List[str] = []
46
- # all_comments = {"b#": bad_points, "g#": good_points,
47
- # "c#": comments, "p#": i_points}
48
-
49
- # flag = False
50
- # end_tag: str = ""
51
- # for line in all_lines:
52
- # if list(tag_pairs)[0] in line or list(tag_pairs)[1] in line or \
53
- # list(tag_pairs)[2] in line or list(tag_pairs)[3] in line:
54
- # flag = True
55
- # end_tag = tag_pairs[__find_tag(line)]
56
- # if flag:
57
- # all_comments[end_tag].append(line)
58
- # if end_tag in line:
59
- # flag = False
60
-
61
-
62
41
def __beautify_output_lines (lines : List [str ], tag_type : str , start_tag : str ,
63
- markdown : bool = False )-> str :
64
- print (len (lines ))
42
+ markdown : bool = False ) -> str :
65
43
if len (lines ) < 1 :
66
44
print ("no lines found, exiting" )
67
45
return ""
68
46
combined_line : str = tag_type .upper ()
69
47
if markdown :
70
48
combined_line = "# " + combined_line
71
49
combined_line += "\n "
50
+ end_tag : str = tag_pairs [start_tag ]
72
51
for line in lines :
73
- line = line .replace (start_tag , "" )
74
- line = line .replace (tag_pairs [start_tag ], "" )
75
- line = line .strip ()
76
- if markdown :
77
- combined_line += "- " + line + "\n "
78
- else :
79
- combined_line += line + "\n "
80
- return combined_line
81
52
53
+ is_end_tag_in_line : bool = line .find (end_tag ) != - 1
54
+ is_start_tag_in_line : bool = line .find (start_tag ) != - 1
55
+
56
+ # print(line + ": " + str(is_end_tag_in_line))
57
+ if is_start_tag_in_line and markdown :
58
+ combined_line += "-"
59
+ # line = line.replace("\n", "")
60
+ combined_line += " " + line .strip ()
61
+ if is_end_tag_in_line :
62
+ combined_line += "\n "
63
+ combined_line = combined_line .replace (start_tag , "" )
64
+ combined_line = combined_line .replace (end_tag , "" )
65
+ combined_line = combined_line .replace (" " , " " )
66
+ return "\n " + combined_line
82
67
83
- def process_content (value : str , markdown : bool = False )-> str :
68
+
69
+ def __start_tag_in_line (line : str ):
70
+ global tag_pairs
71
+ return list (tag_pairs )[0 ] in line or list (tag_pairs )[1 ] in line or \
72
+ list (tag_pairs )[2 ] in line or list (tag_pairs )[3 ] in line
73
+
74
+
75
+ def process_content (value : str , markdown : bool = False ) -> str :
84
76
global tag_pairs
85
77
tag_count = __count_all_tags (value )
86
78
if tag_count % 2 is not 0 :
@@ -93,25 +85,31 @@ def process_content(value: str, markdown: bool=False)->str:
93
85
i_points : List [str ] = []
94
86
all_comments = {"b#" : bad_points , "g#" : good_points ,
95
87
"c#" : comments , "p#" : i_points }
96
- flag = False
88
+ is_looking_for_end_tag = False
97
89
end_tag : str = ""
98
90
for line in all_lines :
99
- if list (tag_pairs )[0 ] in line or list (tag_pairs )[1 ] in line or \
100
- list (tag_pairs )[2 ] in line or list (tag_pairs )[3 ] in line :
101
- flag = True
102
- end_tag = tag_pairs [__find_tag (line )]
103
- if flag :
91
+ # print("processing: "+line)
92
+ if __start_tag_in_line (line ):
93
+ is_looking_for_end_tag = True
94
+ end_tag = tag_pairs [__find_start_tag_in_line (line )]
95
+ if is_looking_for_end_tag :
96
+ # print("looking for end tag: " + line)
104
97
all_comments [end_tag ].append (line )
105
98
if end_tag in line :
106
- flag = False
99
+ # print("found end tag: " + line)
100
+ is_looking_for_end_tag = False
107
101
full_content : str = __beautify_output_lines (
108
- good_points , "Good Points" , "#g" , markdown ) + " \n "
102
+ good_points , "Good Points" , "#g" , markdown )
109
103
full_content += __beautify_output_lines (bad_points ,
110
104
"Bad Points" ,
111
- "#b" , markdown ) + " \n "
105
+ "#b" , markdown )
112
106
full_content += __beautify_output_lines (comments ,
113
- "Comments" , "#c" , markdown ) + " \n "
107
+ "Comments" , "#c" , markdown )
114
108
full_content += __beautify_output_lines (i_points ,
115
109
"Intersting Points" ,
116
- "#p" , markdown ) + " \n "
110
+ "#p" , markdown )
117
111
return full_content
112
+
113
+
114
+ if __name__ == "__main__" :
115
+ print (process_content (corestring , markdown = True ))
0 commit comments