-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
125 lines (101 loc) · 4.56 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Database setup
# from ast import Or
from sqlalchemy import DateTime, Float, create_engine, Column, Integer, String
from sqlalchemy.orm import sessionmaker, declarative_base
# from config import DATABASE_URL, DEFAULT_CITY
from config import Config
import os
from datetime import datetime, timezone
# Whoosh imports for search functionality
from whoosh.index import create_in, open_dir
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser, MultifieldParser, OrGroup
print(f"Database path: {os.path.abspath('news.db')}")
# Set up the database connection
engine = create_engine(Config.DATABASE_URL)
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base()
# Define the NewsArticle table
class NewsArticle(Base): # New class that inherits from Base to map to database table
__tablename__ = 'news_articles' # Specifies name of database table
id = Column(Integer, primary_key=True) # Defines column named id which is an integer and primary key
headline = Column(String, nullable=False) # Defines column to store strings for article headlines
summary = Column(String, nullable=True) # Defines column to store strings for summary of the articles
link = Column(String, nullable=False) # Defines column to store strings for urls of the articles
updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc)) # Track updates
# Define the Weather table
class Weather(Base): # New class that inherits from Base to map to database table
__tablename__ = 'weather' # Specifies name of database table
id = Column(Integer, primary_key=True) # Defines column named id which is an integer and primary key
temp = Column(Float, nullable=False) # Defines column named temp
city = Column(String, nullable=False, default=Config.DEFAULT_CITY) # Defines column named city and the default is DEFAULT_CITY
description = Column(String, nullable=False) # Defines column named description
last_updated = Column(DateTime, default=lambda: datetime.now(timezone.utc)) # Defines column named last_updated using a callable lambda function to dynamically assign a default value determined at runtime
# Create table if it doesn't exist
Base.metadata.create_all(engine)
# Whoosh schema and index setup
news_article_schema = Schema(
id=ID(stored=True, unique=True),
headline=TEXT(stored=True),
summary=TEXT(stored=True),
link=TEXT(stored=True)
)
# Set up the Whoosh index
index_dir = 'whoosh_index'
if not os.path.exists(index_dir):
os.mkdir(index_dir)
def create_or_open_index():
index_dir = "indexdir"
# Define schema
schema = Schema(
id=ID(stored=True, unique=True),
headline=TEXT(stored=True),
summary=TEXT(stored=True),
link=TEXT(stored=True)
)
if not os.path.exists(index_dir):
os.mkdir(index_dir)
return create_in(index_dir, schema) # Creates a new index
return open_dir(index_dir)
# Function to add a news article to the Whoosh index
def add_article_to_index(article):
ix = create_or_open_index()
writer = ix.writer()
writer.add_document(
id=str(article.id),
headline=article.headline,
summary=article.summary or '',
link=article.link
)
writer.commit()
# Function to search articles in the Whoosh index
def search_articles(query):
ix = create_or_open_index()
searcher = ix.searcher()
query_parser = QueryParser("headline", ix.schema)
parsed_query = query_parser.parse(query)
# Log the query and parsed query to see what is being searched
print(f"Searching for: {query}")
print(f"Parsed query: {parsed_query}")
results = searcher.search(parsed_query)
print(f"Search results: {len(results)} articles found")
return results
# Complex Query 1: Search in both headline and summary
def search_articles_complex_1(query):
ix = create_or_open_index()
searcher = ix.searcher()
# Use MultifieldParser for combining queries
parser = MultifieldParser(["headline", "summary"], ix.schema, group=OrGroup)
combined_query = parser.parse(query)
results = searcher.search(combined_query)
return results
# Complex Query 2: Search with wildcards or exact phrases
def search_articles_complex_2(query):
ix = create_or_open_index()
searcher = ix.searcher()
# Using phrase or wildcard search
query_parser = QueryParser("headline", ix.schema)
parsed_query = query_parser.parse(f'"{query}" OR {query}*') # Exact phrase or starts with query
results = searcher.search(parsed_query)
return results