Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add property for given names #157

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 54 additions & 42 deletions nameparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class HumanName(object):
* :py:attr:`title`
* :py:attr:`first`
* :py:attr:`middle`
* :py:attr:`given`
* :py:attr:`last`
* :py:attr:`suffix`
* :py:attr:`nickname`
Expand All @@ -58,7 +59,7 @@ class HumanName(object):
:param str encoding: string representing the encoding of your input
:param str string_format: python string formatting
:param str initials_format: python initials string formatting
:param str initials_delimter: string delimiter for initials
:param str initials_delimiter: string delimiter for initials
:param str first: first name
:param str middle: middle name
:param str last: last name
Expand Down Expand Up @@ -113,10 +114,10 @@ def __iter__(self):
return self

def __len__(self):
l = 0
length = 0
for x in self:
l += 1
return l
length += 1
return length

def __eq__(self, other):
"""
Expand Down Expand Up @@ -157,7 +158,8 @@ def __unicode__(self):
# string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
_s = self.string_format.format(**self.as_dict())
# remove trailing punctuation from missing nicknames
_s = _s.replace(str(self.C.empty_attribute_default), '').replace(" ()", "").replace(" ''", "").replace(' ""', "")
_s = _s.replace(str(self.C.empty_attribute_default), '').replace(" ()", "").replace(" ''", "").replace(
' ""', "")
return self.collapse_whitespace(_s).strip(', ')
return " ".join(self)

Expand Down Expand Up @@ -221,7 +223,7 @@ def __process_initial__(self, name_part, firstname=False):
initials = []
if len(parts) and isinstance(parts, list):
for part in parts:
if not (self.is_prefix(part) or self.is_conjunction(part)) or firstname == True:
if not (self.is_prefix(part) or self.is_conjunction(part)) or firstname is True:
initials.append(part[0])
if len(initials) > 0:
return " ".join(initials)
Expand Down Expand Up @@ -268,7 +270,7 @@ def initials(self):
last_initials_list = [self.__process_initial__(name) for name in self.last_list if name]

initials_dict = {
"first": (self.initials_delimiter + " ").join(first_initials_list) + self.initials_delimiter
"first": (self.initials_delimiter + " ").join(first_initials_list) + self.initials_delimiter
if len(first_initials_list) else self.C.empty_attribute_default,
"middle": (self.initials_delimiter + " ").join(middle_initials_list) + self.initials_delimiter
if len(middle_initials_list) else self.C.empty_attribute_default,
Expand Down Expand Up @@ -315,6 +317,14 @@ def middle(self):
"""
return " ".join(self.middle_list) or self.C.empty_attribute_default

@property
def given(self):
"""
The person's given names. All name pieces before the last name
parsed from :py:attr:`full_name`.
"""
return " ".join([self.first, self.middle]).strip() or self.C.empty_attribute_default

@property
def last(self):
"""
Expand All @@ -326,7 +336,7 @@ def last(self):
@property
def suffix(self):
"""
The persons's suffixes. Pieces at the end of the name that are found in
The person's suffixes. Pieces at the end of the name that are found in
:py:mod:`~nameparser.config.suffixes`, or pieces that are at the end
of comma separated formats, e.g.
"Lastname, Title Firstname Middle[,] Suffix [, Suffix]" parsed
Expand Down Expand Up @@ -369,7 +379,7 @@ def _set_list(self, attr, value):
raise TypeError(
"Can only assign strings, lists or None to name attributes."
" Got {0}".format(type(value)))
setattr(self, attr+"_list", self.parse_pieces(val))
setattr(self, attr + "_list", self.parse_pieces(val))

@title.setter
def title(self, value):
Expand Down Expand Up @@ -564,8 +574,8 @@ def handle_firstnames(self):
a first name.
"""
if self.title \
and len(self) == 2 \
and not lc(self.title) in self.C.first_name_titles:
and len(self) == 2 \
and lc(self.title) not in self.C.first_name_titles:
self.last, self.first = self.first, self.last

def parse_full_name(self):
Expand Down Expand Up @@ -615,8 +625,8 @@ def parse_full_name(self):

# title must have a next piece, unless it's just a title
if not self.first \
and (nxt or p_len == 1) \
and self.is_title(piece):
and (nxt or p_len == 1) \
and self.is_title(piece):
self.title_list.append(piece)
continue
if not self.first:
Expand All @@ -625,15 +635,15 @@ def parse_full_name(self):
continue
self.first_list.append(piece)
continue
if self.are_suffixes(pieces[i+1:]) or \
(
# if the next piece is the last piece and a roman
# numeral but this piece is not an initial
self.is_roman_numeral(nxt) and i == p_len - 2
and not self.is_an_initial(piece)
):
if self.are_suffixes(pieces[i + 1:]) or \
(
# if the next piece is the last piece and a roman
# numeral but this piece is not an initial
self.is_roman_numeral(nxt) and i == p_len - 2
and not self.is_an_initial(piece)
):
self.last_list.append(piece)
self.suffix_list += pieces[i+1:]
self.suffix_list += pieces[i + 1:]
break
if not nxt:
self.last_list.append(piece)
Expand All @@ -649,7 +659,7 @@ def parse_full_name(self):
post_comma_pieces = self.parse_pieces(parts[1].split(' '), 1)

if self.are_suffixes(parts[1].split(' ')) \
and len(parts[0].split(' ')) > 1:
and len(parts[0].split(' ')) > 1:

# suffix comma:
# title first middle last [suffix], suffix [suffix] [, suffix]
Expand All @@ -665,16 +675,16 @@ def parse_full_name(self):
nxt = None

if not self.first \
and (nxt or len(pieces) == 1) \
and self.is_title(piece):
and (nxt or len(pieces) == 1) \
and self.is_title(piece):
self.title_list.append(piece)
continue
if not self.first:
self.first_list.append(piece)
continue
if self.are_suffixes(pieces[i+1:]):
if self.are_suffixes(pieces[i + 1:]):
self.last_list.append(piece)
self.suffix_list = pieces[i+1:] + self.suffix_list
self.suffix_list = pieces[i + 1:] + self.suffix_list
break
if not nxt:
self.last_list.append(piece)
Expand Down Expand Up @@ -705,8 +715,8 @@ def parse_full_name(self):
nxt = None

if not self.first \
and (nxt or len(post_comma_pieces) == 1) \
and self.is_title(piece):
and (nxt or len(post_comma_pieces) == 1) \
and self.is_title(piece):
self.title_list.append(piece)
continue
if not self.first:
Expand Down Expand Up @@ -761,7 +771,7 @@ def parse_pieces(self, parts, additional_parts_count=0):
# split on periods, any of the split pieces titles or suffixes?
# ("Lt.Gov.")
period_chunks = part.split(".")
titles = list(filter(self.is_title, period_chunks))
titles = list(filter(self.is_title, period_chunks))
suffixes = list(filter(self.is_suffix, period_chunks))

# add the part to the constant so it will be found
Expand Down Expand Up @@ -813,7 +823,7 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0):
contiguous_conj_i = []
for i, val in enumerate(conj_index):
try:
if conj_index[i+1] == val+1:
if conj_index[i + 1] == val + 1:
contiguous_conj_i += [val]
except IndexError:
pass
Expand All @@ -823,12 +833,12 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0):
delete_i = []
for i in contiguous_conj_i:
if type(i) == tuple:
new_piece = " ".join(pieces[i[0]: i[1]+1])
delete_i += list(range(i[0]+1, i[1]+1))
new_piece = " ".join(pieces[i[0]: i[1] + 1])
delete_i += list(range(i[0] + 1, i[1] + 1))
pieces[i[0]] = new_piece
else:
new_piece = " ".join(pieces[i: i+2])
delete_i += [i+1]
new_piece = " ".join(pieces[i: i + 2])
delete_i += [i + 1]
pieces[i] = new_piece
# add newly joined conjunctions to constants to be found later
self.C.conjunctions.add(new_piece)
Expand All @@ -853,23 +863,23 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0):
continue

if i == 0:
new_piece = " ".join(pieces[i:i+2])
if self.is_title(pieces[i+1]):
new_piece = " ".join(pieces[i:i + 2])
if self.is_title(pieces[i + 1]):
# when joining to a title, make new_piece a title too
self.C.titles.add(new_piece)
pieces[i] = new_piece
pieces.pop(i+1)
pieces.pop(i + 1)
# subtract 1 from the index of all the remaining conjunctions
for j, val in enumerate(conj_index):
if val > i:
conj_index[j] = val-1
conj_index[j] = val - 1

else:
new_piece = " ".join(pieces[i-1:i+2])
if self.is_title(pieces[i-1]):
new_piece = " ".join(pieces[i - 1:i + 2])
if self.is_title(pieces[i - 1]):
# when joining to a title, make new_piece a title too
self.C.titles.add(new_piece)
pieces[i-1] = new_piece
pieces[i - 1] = new_piece
pieces.pop(i)
rm_count = 2
try:
Expand Down Expand Up @@ -932,7 +942,7 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0):

def cap_word(self, word, attribute):
if (self.is_prefix(word) and attribute in ('last', 'middle')) \
or self.is_conjunction(word):
or self.is_conjunction(word):
return word.lower()
exceptions = self.C.capitalization_exceptions
if lc(word) in exceptions:
Expand All @@ -941,6 +951,7 @@ def cap_word(self, word, attribute):
if mac_match:
def cap_after_mac(m):
return m.group(1).capitalize() + m.group(2).capitalize()

return self.C.regexes.mac.sub(cap_after_mac, word)
else:
return word.capitalize()
Expand All @@ -950,6 +961,7 @@ def cap_piece(self, piece, attribute):
return ""

def replacement(m): return self.cap_word(m.group(0), attribute)

return self.C.regexes.word.sub(replacement, piece)

def capitalize(self, force=None):
Expand Down
4 changes: 4 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ def test_surnames_attribute(self):
hn = HumanName("John Edgar Casey Williams III")
self.m(hn.surnames, "Edgar Casey Williams", hn)

def test_given_names(self):
hn = HumanName("Dr. Juan Q. Xavier de la Vega")
self.m(hn.given, "Juan Q. Xavier", hn)

def test_is_prefix_with_list(self):
hn = HumanName()
items = ['firstname', 'lastname', 'del']
Expand Down