@@ -96,12 +96,16 @@ def parseNonTargetNamespace(
96
96
97
97
detector = mwreverts .Detector ()
98
98
99
+ pageEdits = 0
100
+
99
101
for revision in tqdm .tqdm (
100
102
page , desc = title , unit = " edits" , smoothing = 0 , disable = parallel
101
103
):
102
104
if not revision .user :
103
105
continue
104
106
107
+ pageEdits = pageEdits + 1
108
+
105
109
# Check if not None as there is a user 0, Larry Sanger
106
110
if revision .user .id is not None :
107
111
userId = revision .user .id
@@ -175,6 +179,11 @@ def parseNonTargetNamespace(
175
179
),
176
180
)
177
181
182
+
183
+ query = """UPDATE page (number_of_edits)
184
+ VALUES (%s)
185
+ WHERE title=%s;"""
186
+ cursor .execute (query , (pageEdits , title ))
178
187
179
188
def parseTargetNamespace (page , title : str , namespace : str , cursor , parallel : str ):
180
189
"""Extracts features from each revision of a page into a database
@@ -200,13 +209,17 @@ def parseTargetNamespace(page, title: str, namespace: str, cursor, parallel: str
200
209
201
210
detector = mwreverts .Detector ()
202
211
212
+ pageEdits = 0
213
+
203
214
## Extract page features from each revision
204
215
for revision in tqdm .tqdm (
205
216
page , desc = title , unit = " edits" , smoothing = 0 , disable = parallel
206
217
):
207
218
if not revision .user :
208
219
continue
209
220
221
+ pageEdits = pageEdits + 1
222
+
210
223
# Check if not None as there is a user 0, Larry Sanger
211
224
if revision .user .id is not None :
212
225
userId = revision .user .id
@@ -366,6 +379,11 @@ def parseTargetNamespace(page, title: str, namespace: str, cursor, parallel: str
366
379
## Insert page features into database
367
380
cursor .execute (query , editTuple )
368
381
382
+ query = """UPDATE page (number_of_edits)
383
+ VALUES (%s)
384
+ WHERE title=%s;"""
385
+ cursor .execute (query , (pageEdits , title ))
386
+
369
387
370
388
def getDiff (old : str , new : str , parallel : str ) -> Tuple [str , str ]:
371
389
"""Returns the diff between two edits using wdiff
0 commit comments