Skip to content

Commit 73db0ac

Browse files
committed
description of file
1 parent 78483e9 commit 73db0ac

File tree

3 files changed

+96
-20
lines changed

3 files changed

+96
-20
lines changed
953 Bytes
Binary file not shown.

lib/querying.py

+40
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,21 @@
33
import sys
44
from nltk.corpus import stopwords
55
import re
6+
import sys, os
7+
projectpath = os.path.dirname(os.path.realpath('querying.py'))
8+
#directory path
9+
libpath = projectpath + '/lib'
10+
#lib path
11+
sys.path.append(libpath)
12+
os.chdir(projectpath)
13+
import parsing
14+
import re
15+
import time
16+
collection = 'New Testament'
17+
#mongo folder
18+
# Indicate the path where relative to the collection
19+
os.chdir(projectpath + '/data/' + collection)
20+
files = [file for file in os.listdir('.') if os.path.isfile(file)]
621

722
def cleanQuery(string):
823
frenchStopWords = stopwords.words('french')
@@ -33,4 +48,29 @@ def rankDocuments(index, words):
3348
rankings = list(reversed(sorted(rankings.items(), key=lambda x: x[1])))
3449
return rankings
3550

51+
def rankDocuments1(index, words):
52+
# We rank each document based on query
53+
rankings = {}
54+
for word in words:
55+
for document in index[word]['document(s)'].keys():
56+
# Term Frequency (log to reduce document size scale effect)
57+
TF = index[word]['document(s)'][document]['position(s)']
58+
for file in files:
59+
name = re.match('(^[^.]*)', file).group(0)
60+
if name==document:
61+
data = open(file).read().splitlines()
62+
words = parsing.clean(data)
63+
# Store scores in the ranking dictionary
64+
if document not in rankings:
65+
rankings[document] = words[TF[0]-10:TF[0]+10]
66+
else:
67+
rankings[document] += words[TF[0]-10:TF[0]+10]
68+
#print(rankings[document])
69+
#print(document)
70+
#print("11111111111111")
71+
# Order results according to the scores
72+
rankings = list(rankings.items())
73+
#print(rankings[0])
74+
return rankings
75+
3676

main.py

+56-20
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,23 @@
11
import sys, os
2+
import itertools
23
projectpath = os.path.dirname(os.path.realpath('main.py'))
34
libpath = projectpath + '/lib'
45
sys.path.append(libpath)
56
os.chdir(projectpath)
67
from PyQt4 import QtCore, QtGui
78
from browser import Ui_MainWindow
8-
from querying import cleanQuery, rankDocuments
9+
from querying import cleanQuery, rankDocuments,rankDocuments1
910
from pymongo import MongoClient
1011

12+
import parsing
13+
import re
14+
import time
15+
collection = 'New Testament'
16+
#mongo folder
17+
# Indicate the path where relative to the collection
18+
os.chdir(projectpath + '/data/' + collection)
19+
files = [file for file in os.listdir('.') if os.path.isfile(file)]
20+
1121
# Connect to the database containing inverted indexes
1222
client = MongoClient()
1323
db = client.Inverted_Index
@@ -17,30 +27,56 @@
1727

1828

1929
class browser(QtGui.QMainWindow):
20-
def __init__(self, parent = None):
21-
QtGui.QWidget.__init__(self, parent)
22-
self.ui = Ui_MainWindow()
23-
self.ui.setupUi(self)
30+
def __init__(self, parent = None):
31+
QtGui.QWidget.__init__(self, parent)
32+
self.ui = Ui_MainWindow()
33+
self.ui.setupUi(self)
2434
# Connect the query function with the search button
25-
self.ui.pushButton.clicked.connect(self.query)
26-
def query(self):
35+
self.ui.pushButton.clicked.connect(self.query)
36+
def query(self):
2737
# Empty the list
28-
self.ui.listWidget.clear()
38+
self.ui.listWidget.clear()
2939
# Get the words in the query
30-
words = cleanQuery(self.ui.lineEdit.text())
40+
words = cleanQuery(self.ui.lineEdit.text())
3141
# Collect the information for each word of the query
32-
index = {}
33-
for word in words:
34-
index[word] = collection.find({'_id' : word})[0]['info']
42+
index = {}
43+
for word in words:
44+
index[word] = collection.find({'_id' : word})[0]['info']
3545
# Rank the documents according to the query
36-
results = rankDocuments(index, words)
37-
i=0
38-
for result in results:
39-
if(i<10):
40-
self.ui.listWidget.addItem(result[0]+' : '+str(round(result[1], 2)))
41-
i=i+1
42-
43-
46+
results = rankDocuments(index, words)
47+
results1 = rankDocuments1(index, words)
48+
size=len(results)
49+
print(size)
50+
#print(results1[0])
51+
#results1 = rankDocuments1(index, words)
52+
i=0
53+
rankings = {}
54+
for result in results:
55+
if(i<10):
56+
self.ui.listWidget.addItem(result[0]+' : '+str(round(result[1], 2)))
57+
j=0
58+
for j in range(size):
59+
if(result[0]==results1[j][0]):
60+
break
61+
self.ui.listWidget.addItem(str(" ".join(results1[j][1])))
62+
'''for word in words:
63+
for document in index[word]['document(s)'].keys():
64+
# Term Frequency (log to reduce document size scale effect)
65+
TF = index[word]['document(s)'][document]['position(s)']
66+
for file in files:
67+
name = re.match('(^[^.]*)', file).group(0)
68+
if name==document:
69+
data = open(file).read().splitlines()
70+
words = parsing.clean(data)
71+
# Store scores in the ranking dictionary
72+
if document not in rankings:
73+
rankings[document] = words[TF[0]-10:TF[0]+10]
74+
else:
75+
rankings[document] += words[TF[0]-10:TF[0]+10]
76+
#self.ui.listWidget.addItem(rankings[document])
77+
print(rankings[document])'''
78+
i=i+1
79+
4480

4581
if __name__ == "__main__":
4682
app = QtGui.QApplication(sys.argv)

0 commit comments

Comments
 (0)