Skip to content

Commit

Permalink
fixup: clean column names
Browse files Browse the repository at this point in the history
  • Loading branch information
rgraber committed Feb 10, 2025
1 parent cb00206 commit 00be6b9
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 2 deletions.
1 change: 1 addition & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# Install testing / development requirements
coverage[toml]==6.5.0
coveralls==3.3.1
ddt==1.7.2
flake8==7.1.1
funcsigs==1.0.2
geojson-rewind==1.1.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

setup(
name='formpack',
version='3.0.0',
version='3.0.1',
description='Manipulation tools for KoBo forms',
author='the formpack contributors (https://github.com/kobotoolbox/formpack/graphs/contributors)',
url='https://github.com/kobotoolbox/formpack/',
Expand Down
1 change: 1 addition & 0 deletions src/formpack/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def load_version(self, schema):
unique accross an entire FormPack. It can be None, but only for
one version in the FormPack.
"""
breakpoint()
replace_aliases(schema['content'], in_place=True)
expand_content(schema['content'], in_place=True)

Expand Down
63 changes: 62 additions & 1 deletion src/formpack/utils/expand_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def _expand_translatable_content(
row[_expandable_col][_nti] = _oldval
if col_shortname != _expandable_col:
row[_expandable_col][cur_translation_index] = row[col_shortname]
breakpoint()
del row[col_shortname]


Expand Down Expand Up @@ -94,7 +95,67 @@ def _get_translations_from_special_cols(
return translations, set(translated_cols)


def clean_column_name(column_name: str) -> str:
RE_MEDIA_COLUMN_NAMES = '|'.join(MEDIA_COLUMN_NAMES)

# "LaBeL" -> "label", "HiNT" -> "hint"
if column_name.lower() in ['label', 'hint']:
return column_name.lower()

# "Bind:Some:Thing" -> "bind:Some:Thing", "BodY:" -> "body:"
match = re.match(r'^(bind|body):.*', column_name, flags=re.IGNORECASE)
if match:
lower_cased = match.group(0).lower()
return re.sub(r'^(bind|body)', lower_cased, column_name, flags=re.IGNORECASE)

# "Media:Audio::ES" -> "media:audio::ES", "ViDeO : ES" -> "video : ES"
match = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})\s*::?\s*([^:]+)$',
column_name,
flags=re.IGNORECASE
)
if match:
matched = match.groups()
lower_media_prefix = matched[0].lower()
lower_media_type = matched[1].lower()
return re.sub(rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})(\s*::?\s*)([^:]+)$',
rf'{lower_media_prefix}{lower_media_type}\3\4',
column_name, flags=re.IGNORECASE)

# "Media: AuDiO" -> "media: audio", "VIDEO" -> "video"
match = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$', column_name
)
if match:
matched = match.groups()
lower_media_prefix = matched[0].lower()
lower_media_type = matched[1].lower()
return re.sub(rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$',
rf'{lower_media_prefix}{lower_media_type}',
column_name, flags=re.IGNORECASE)

match = re.match(r'^([^:]+)(\s*::?\s*)([^:]+)$', column_name)
if match:
# example: label::x, constraint_message::x, hint::x
matched = match.groups()
lower_column_shortname = matched[0].lower()
return re.sub(r'^([^:]+)(\s*::?\s*)([^:]+)$', rf'{lower_column_shortname}\2\3', column_name,
flags=re.IGNORECASE)
return column_name.lower()


def preprocess_columns(content: Dict[str, List[Any]]) -> None:

for sheet, rows in content.items():
for row in rows:
for column_name, value in row.copy().items():
cleaned_name = clean_column_name(column_name)
del row[column_name]
row[cleaned_name] = value

def expand_content_in_place(content: Dict[str, List[Any]]) -> None:
preprocess_columns(content)

specials, translations, transl_cols = _get_special_survey_cols(content)

if len(translations) > 0:
Expand Down Expand Up @@ -233,7 +294,7 @@ def _mark_special(**kwargs: str) -> None:
_pluck_uniq_cols('choices')

for column_name in uniq_cols.keys():
if column_name.lower() in ['label', 'hint']:
if column_name in ['label', 'hint']:
_mark_special(
column_name=column_name,
column=column_name,
Expand Down
22 changes: 22 additions & 0 deletions tests/test_expand_content.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# coding: utf-8
import copy
from collections import OrderedDict
from ddt import data, ddt, unpack
from unittest import TestCase

from formpack import FormPack
from formpack.constants import OR_OTHER_COLUMN as _OR_OTHER
Expand All @@ -12,6 +14,8 @@
from formpack.utils.flatten_content import flatten_content
from formpack.utils.string import orderable_with_none

from formpack.src.formpack.utils.expand_content import clean_column_name


def test_expand_selects_with_or_other():
assert _expand_type_to_dict('select_one xx or other').get(_OR_OTHER) == True
Expand Down Expand Up @@ -612,3 +616,21 @@ def test_expand_ignores_case():

def _s(rows):
return {'survey': [dict([[key, 'x']]) for key in rows]}

@ddt
class ColumnTestCase(TestCase):
@data(
('FOO', 'foo'),
('LABEL', 'label'),
('HINT', 'hint'),
('BIND::FOO', 'bind::FOO'),
('BODY : FOO', 'body : FOO'),
('MEDIA:AUDIO:Spanish', 'media:audio:Spanish'),
('VIDEO :: SPANISH', 'video :: SPANISH'),
('MEDIA:AUDIO', 'media:audio'),
('IMAGE', 'image'),
('LABEL : SPANISH', 'label : Spanish')
)
@unpack
def test_clean_column_name(self, name, expected):
assert clean_column_name(name) == expected

0 comments on commit 00be6b9

Please sign in to comment.