|
1 | 1 | import sys, os
|
| 2 | +import itertools |
2 | 3 | projectpath = os.path.dirname(os.path.realpath('main.py'))
|
3 | 4 | libpath = projectpath + '/lib'
|
4 | 5 | sys.path.append(libpath)
|
5 | 6 | os.chdir(projectpath)
|
6 | 7 | from PyQt4 import QtCore, QtGui
|
7 | 8 | from browser import Ui_MainWindow
|
8 |
| -from querying import cleanQuery, rankDocuments |
| 9 | +from querying import cleanQuery, rankDocuments,rankDocuments1 |
9 | 10 | from pymongo import MongoClient
|
10 | 11 |
|
| 12 | +import parsing |
| 13 | +import re |
| 14 | +import time |
| 15 | +collection = 'New Testament' |
| 16 | +#mongo folder |
| 17 | +# Indicate the path where relative to the collection |
| 18 | +os.chdir(projectpath + '/data/' + collection) |
| 19 | +files = [file for file in os.listdir('.') if os.path.isfile(file)] |
| 20 | + |
11 | 21 | # Connect to the database containing inverted indexes
|
12 | 22 | client = MongoClient()
|
13 | 23 | db = client.Inverted_Index
|
|
17 | 27 |
|
18 | 28 |
|
19 | 29 | class browser(QtGui.QMainWindow):
|
20 |
| - def __init__(self, parent = None): |
21 |
| - QtGui.QWidget.__init__(self, parent) |
22 |
| - self.ui = Ui_MainWindow() |
23 |
| - self.ui.setupUi(self) |
| 30 | + def __init__(self, parent = None): |
| 31 | + QtGui.QWidget.__init__(self, parent) |
| 32 | + self.ui = Ui_MainWindow() |
| 33 | + self.ui.setupUi(self) |
24 | 34 | # Connect the query function with the search button
|
25 |
| - self.ui.pushButton.clicked.connect(self.query) |
26 |
| - def query(self): |
| 35 | + self.ui.pushButton.clicked.connect(self.query) |
| 36 | + def query(self): |
27 | 37 | # Empty the list
|
28 |
| - self.ui.listWidget.clear() |
| 38 | + self.ui.listWidget.clear() |
29 | 39 | # Get the words in the query
|
30 |
| - words = cleanQuery(self.ui.lineEdit.text()) |
| 40 | + words = cleanQuery(self.ui.lineEdit.text()) |
31 | 41 | # Collect the information for each word of the query
|
32 |
| - index = {} |
33 |
| - for word in words: |
34 |
| - index[word] = collection.find({'_id' : word})[0]['info'] |
| 42 | + index = {} |
| 43 | + for word in words: |
| 44 | + index[word] = collection.find({'_id' : word})[0]['info'] |
35 | 45 | # Rank the documents according to the query
|
36 |
| - results = rankDocuments(index, words) |
37 |
| - i=0 |
38 |
| - for result in results: |
39 |
| - if(i<10): |
40 |
| - self.ui.listWidget.addItem(result[0]+' : '+str(round(result[1], 2))) |
41 |
| - i=i+1 |
42 |
| - |
43 |
| - |
| 46 | + results = rankDocuments(index, words) |
| 47 | + results1 = rankDocuments1(index, words) |
| 48 | + size=len(results) |
| 49 | + print(size) |
| 50 | + #print(results1[0]) |
| 51 | + #results1 = rankDocuments1(index, words) |
| 52 | + i=0 |
| 53 | + rankings = {} |
| 54 | + for result in results: |
| 55 | + if(i<10): |
| 56 | + self.ui.listWidget.addItem(result[0]+' : '+str(round(result[1], 2))) |
| 57 | + j=0 |
| 58 | + for j in range(size): |
| 59 | + if(result[0]==results1[j][0]): |
| 60 | + break |
| 61 | + self.ui.listWidget.addItem(str(" ".join(results1[j][1]))) |
| 62 | + '''for word in words: |
| 63 | + for document in index[word]['document(s)'].keys(): |
| 64 | + # Term Frequency (log to reduce document size scale effect) |
| 65 | + TF = index[word]['document(s)'][document]['position(s)'] |
| 66 | + for file in files: |
| 67 | + name = re.match('(^[^.]*)', file).group(0) |
| 68 | + if name==document: |
| 69 | + data = open(file).read().splitlines() |
| 70 | + words = parsing.clean(data) |
| 71 | + # Store scores in the ranking dictionary |
| 72 | + if document not in rankings: |
| 73 | + rankings[document] = words[TF[0]-10:TF[0]+10] |
| 74 | + else: |
| 75 | + rankings[document] += words[TF[0]-10:TF[0]+10] |
| 76 | + #self.ui.listWidget.addItem(rankings[document]) |
| 77 | + print(rankings[document])''' |
| 78 | + i=i+1 |
| 79 | + |
44 | 80 |
|
45 | 81 | if __name__ == "__main__":
|
46 | 82 | app = QtGui.QApplication(sys.argv)
|
|
0 commit comments