Skip to content

Commit

Permalink
Merge pull request #57 from GNS-Science/feature/56-location-macrons
Browse files Browse the repository at this point in the history
Feature/56 location macrons
  • Loading branch information
chrisdicaprio authored Nov 20, 2024
2 parents a0f90c8 + d7bf3a1 commit 63110b9
Show file tree
Hide file tree
Showing 12 changed files with 2,605 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.8.3
current_version = 0.8.4
commit = True
tag = True

Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.8.4] - 2024-11-20

### Added
- function `get_name_with_macrons` to map place names to correct Māori spelling with macrons

### Changed
- moved `location` package resources to sub-package and import with `importlib.resources`

## [0.8.3] - 2024-10-09

### Changed
Expand Down
2 changes: 1 addition & 1 deletion nzshm_common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__author__ = "GNS Science"
__email__ = '[email protected]'
__version__ = '0.8.3'
__version__ = '0.8.4'


# Common classes at the top level for convenience
Expand Down
2 changes: 1 addition & 1 deletion nzshm_common/location/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@
"""

from .coded_location import CodedLocation, CodedLocationBin
from .location import get_location_list, get_location_list_names, get_locations, location_by_id
from .location import get_location_list, get_location_list_names, get_locations, get_name_with_macrons, location_by_id
77 changes: 70 additions & 7 deletions nzshm_common/location/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import csv
import importlib.resources as resources
import json
from collections import namedtuple
from pathlib import Path
Expand All @@ -16,13 +17,15 @@
# Omitting country for now, focus on NZ
# https://service.unece.org/trade/locode/nz.htm

locations_filepath = Path(Path(__file__).parent, 'locations.json')
with open(locations_filepath, 'r') as locations_file:
LOCATIONS = json.load(locations_file)
resource_dir = resources.files('nzshm_common.location.resources')

nz_ids_filepath = Path(Path(__file__).parent, 'nz_ids.json')
with open(nz_ids_filepath, 'r') as nz_ids_file:
NZ_IDS = json.load(nz_ids_file)
with resources.as_file(resource_dir / 'locations.json') as path:
with path.open() as file:
LOCATIONS = json.load(file)

with resources.as_file(resource_dir / 'nz_ids.json') as path:
with path.open() as file:
NZ_IDS = json.load(file)

LOCATIONS_BY_ID: Dict[str, Any] = {location["id"]: location for location in LOCATIONS}

Expand All @@ -47,12 +50,72 @@
},
"ALL": {
"id": "ALL",
"name": "Seismic Risk Working Group NZ code locations",
"name": "All locations",
"locations": list(map(lambda loc: loc["id"], LOCATIONS)),
},
}


def _get_macron_word_mapping() -> Dict[str, str]:
"""using the maori_names.csv file as received from LINZ rather than storing the mapping allows
us to update without rebuilding the resource"""

char_map_lower = {
'ā': 'a',
'ē': 'e',
'ī': 'i',
'ō': 'o',
'ū': 'u',
}
char_map = {}
for k, v in char_map_lower.items():
char_map[k] = v
char_map[k.upper()] = v.upper()
translation_table = str.maketrans(char_map)

word_mapping = dict()
with resources.as_file(resource_dir / 'maori_names.csv') as path:
with path.open() as file:
reader = csv.reader(file)
_ = next(reader)
for row in reader:
name = row[1] # second column of LINZ file contains names
for word in name.split(): # treat each whole word seperatly
if any([char in char_map for char in word]): # add to mapping if any characters have macron
word_nomacron = word.translate(translation_table)
word_mapping[word_nomacron] = word
return word_mapping


WORD_MAPPING = _get_macron_word_mapping()


def get_name_with_macrons(name_input: str) -> str:
"""
Corrects the spelling of Māori palce names by adding macrons. Place name spellings from
LINZ "Place names of New Zealand".
See https://www.linz.govt.nz/products-services/place-names/place-names-new-zealand
and https://gazetteer.linz.govt.nz/maori_names.csv
If the input name is not on the LINZ list, the function will return the input.
Args:
input_name: the name to correct with macrons
Returns:
the place name with the correct Māori spelling
"""

words_in = name_input.split()
return " ".join([_map_word(word) for word in words_in])


def _map_word(word_input):
if word_output := WORD_MAPPING.get(word_input):
return word_output
return word_input


def _lat_lon(_id) -> Optional[LatLon]:
loc = location_by_id(_id)
if loc:
Expand Down
1 change: 1 addition & 0 deletions nzshm_common/location/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""init file needed for python3.9 compatibility"""
File renamed without changes.
Loading

0 comments on commit 63110b9

Please sign in to comment.