Skip to content

Commit

Permalink
Add a logger
Browse files Browse the repository at this point in the history
  • Loading branch information
David Moreno García committed Aug 31, 2016
1 parent 7bc524d commit 9b3f2bb
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions normalizr/normalizr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import codecs
import logging
import os
import re
import string
Expand All @@ -23,13 +24,27 @@ class Normalizr:
"""
__punctuation = set(string.punctuation)

def __init__(self, language='en', lazy_load=False):
def __init__(self, language='en', lazy_load=False, logger_level=logging.INFO):
self.__language = language
self.__logger = self._get_logger(logger_level)
self.__stop_words = set()

if not lazy_load:
self._load_stop_words(language)

def _get_logger(self, level):
"""
Initialize logger.
Params:
level (integer): Log level as defined in logging.
"""
logging.basicConfig()
logger = logging.getLogger("Normalizr")
logger.setLevel(level)

return logger

def _load_stop_words(self, language):
"""
Load stop words into __stop_words set.
Expand All @@ -39,7 +54,7 @@ def _load_stop_words(self, language):
Params:
language (string): Language code.
"""
print('loading')
self.__logger.debug('loading stop words')
with codecs.open(os.path.join(path, 'data/stop-' + language), 'r', 'UTF-8') as file:
for line in file:
fields = line.split('|')
Expand Down Expand Up @@ -215,4 +230,4 @@ def replace_urls(self, text, replacement=''):
Returns:
The text without URLs.
"""
return re.sub(regex.URL_REGEX, replacement, text)
return re.sub(regex.URL_REGEX, replacement, text)

0 comments on commit 9b3f2bb

Please sign in to comment.