Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
serhii73 committed Jan 23, 2025
1 parent b9320b3 commit 0ef132a
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
5 changes: 2 additions & 3 deletions dateparser/custom_language_detection/fasttext.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os

import fasttext

from dateparser.custom_language_detection.fasttext_wrapper import load_model
from dateparser_cli.exceptions import FastTextModelNotFoundException
from dateparser_cli.fasttext_manager import fasttext_downloader
from dateparser_cli.utils import create_data_model_home, dateparser_model_home
Expand All @@ -27,7 +26,7 @@ def _load_fasttext_model():
model_path = os.path.join(dateparser_model_home, downloaded_models[0])
if not os.path.isfile(model_path):
raise FastTextModelNotFoundException("Fasttext model file not found")
_FastTextCache.model = fasttext.load_model(model_path)
_FastTextCache.model = load_model(model_path)
return _FastTextCache.model


Expand Down
35 changes: 35 additions & 0 deletions dateparser/custom_language_detection/fasttext_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import fasttext
import numpy as np


class FastTextWrapper:
def __init__(self, model_path):
self.model = fasttext.load_model(model_path)

def predict(self, text, k=1, threshold=0.0, on_unicode_error="strict"):
def check(entry):
if entry.find("\n") != -1:
raise ValueError("predict processes one line at a time (remove '\\n')")
entry += "\n"
return entry

if isinstance(text, list):
text = [check(entry) for entry in text]
all_labels, all_probs = self.model.f.multilinePredict(
text, k, threshold, on_unicode_error
)
return all_labels, all_probs
else:
text = check(text)
predictions = self.model.f.predict(text, k, threshold, on_unicode_error)
if predictions:
probs, labels = zip(*predictions)
else:
probs, labels = ([], ())

# Use np.asarray instead of np.array with copy=False
return labels, np.asarray(probs)


def load_model(model_path):
return FastTextWrapper(model_path)

0 comments on commit 0ef132a

Please sign in to comment.