kulahad
diff --git a/‎extractors.py
Lines changed: 28 additions & 26 deletions b/‎extractors.py
Lines changed: 28 additions & 26 deletions
diff --git a/‎main.py
Lines changed: 3 additions & 3 deletions b/‎main.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎models.py
Lines changed: 2 additions & 1 deletion b/‎models.py
Lines changed: 2 additions & 1 deletion
@@ -1,10 +1,9 @@
-from abc import ABC,abstractmethod
+from abc import ABC, abstractmethod
 import requests
 from bs4 import BeautifulSoup
 from summarize import summarizetext
 
 
-
 class extractor(ABC):
     def __init__(self):
         self.url = ""
@@ -27,51 +26,54 @@ def __init__(self):
     def htmlparser(self):
         content = self.extracthtml()
         # only process articles limited by the limit defined
-        newsdivs = content.find_all("div", class_="col-md-4 pt-2")[:self.limit]
+        newsdivs = content.find_all("div", class_="col-md-4 pt-2")[: self.limit]
         news = []
 
         for div in newsdivs:
-            
+
             news.append(self.extractinfo(div=div))
-        
+
         return news
 
-    def extractinfo(self,div):
+    def extractinfo(self, div):
         try:
-            soup = BeautifulSoup(str(div).encode('utf-8').decode('ascii', 'ignore'), "html.parser")
-            
+            soup = BeautifulSoup(
+                str(div).encode("utf-8").decode("ascii", "ignore"), "html.parser"
+            )
+
             # Extract the article URL and title
-            article_link = soup.find('a', href=True)
-            article_url = article_link['href'] if article_link else None
-            title_tag = soup.find('h6')
+            article_link = soup.find("a", href=True)
+            article_url = article_link["href"] if article_link else None
+            title_tag = soup.find("h6")
             article_title = title_tag.text.strip() if title_tag else None
-            
+
             # Extract the image URL
-            img_tag = soup.find('img')
-            image_url = img_tag.get('src') if img_tag else None
+            img_tag = soup.find("img")
+            image_url = img_tag.get("src") if img_tag else None
 
             # get article from url
             page = BeautifulSoup(requests.get(article_url).content, "html.parser")
             article = page.find("div", class_="news_reader")
 
-            date_span = page.find('span', class_="greytime2")
+            date_span = page.find("span", class_="greytime2")
             date = date_span.get_text().split(" ")[1]
 
             summary = summarizetext(article.text)
-            
+
             return {
-                'id':article_url,
-                'title': article_title,
-                'summary': summary,
-                'article_url': article_url,
-                'publish_time': date,
-                'image_url': image_url,
-                'source': self.source
+                "id": article_url,
+                "title": article_title,
+                "summary": summary,
+                "article_url": article_url,
+                "publish_time": date,
+                "image_url": image_url,
+                "source": self.source,
             }
         except Exception as e:
             print(f"Error parsing HTML snippet: {e}")
             return None
-    
-if __name__ == '__main__':
+
+
+if __name__ == "__main__":
     test = fijivillage()
-    print(test.htmlparser())    
+    print(test.htmlparser())
@@ -1,8 +1,8 @@
 from api import app
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import uvicorn
     import nltk
 
-    nltk.download('punkt_tab')
-    uvicorn.run("main:app", port=10000, host="0.0.0.0", log_level="info")
+    nltk.download("punkt_tab")
+    uvicorn.run("main:app", port=10000, host="0.0.0.0", log_level="info")
@@ -3,11 +3,12 @@
 from beanie import Document, PydanticObjectId, Link
 from pydantic import Field, BaseModel
 
+
 class News(Document):
     id: str
     title: str
     summary: str
     article_url: str
     image_url: str
-    publish_time: str 
+    publish_time: str
     source: str