Skip to content

Commit 0599df1

Browse files
committed
Add unit tests for tagging_util and fix error message formatting
The validate_tag function was passing arguments incorrectly to MetaflowTaggingError, causing str() to fail when the exception was rendered. This also adds comprehensive test coverage for is_utf8_encodable, is_utf8_decodable, validate_tag, and validate_tags including edge cases for type validation, length limits, and tag set size constraints. Fixes error message formatting for non-string tag types.
1 parent f357565 commit 0599df1

2 files changed

Lines changed: 134 additions & 2 deletions

File tree

metaflow/tagging_util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ def validate_tag(tag):
6767
raise MetaflowTaggingError("Tags must be UTF-8 encodable")
6868
else:
6969
raise MetaflowTaggingError(
70-
"Tags must be some kind of string (bytes or unicode), got %s",
71-
str(type(tag)),
70+
"Tags must be some kind of string (bytes or unicode), got %s"
71+
% str(type(tag))
7272
)
7373
if not len(tag):
7474
raise MetaflowTaggingError("Tags must not be empty string")

test/unit/test_tagging_util.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import pytest
2+
3+
from metaflow.exception import MetaflowTaggingError
4+
from metaflow.tagging_util import (
5+
MAX_TAG_SIZE,
6+
MAX_USER_TAG_SET_SIZE,
7+
is_utf8_decodable,
8+
is_utf8_encodable,
9+
validate_tag,
10+
validate_tags,
11+
)
12+
13+
14+
def test_is_utf8_encodable_with_ascii_string():
15+
assert is_utf8_encodable("hello") is True
16+
17+
18+
def test_is_utf8_encodable_with_unicode_string():
19+
assert is_utf8_encodable("hello \u00e9\u00e0\u00fc") is True
20+
21+
22+
def test_is_utf8_encodable_with_empty_string():
23+
assert is_utf8_encodable("") is True
24+
25+
26+
def test_is_utf8_encodable_with_non_string():
27+
with pytest.raises(AttributeError):
28+
is_utf8_encodable(123)
29+
30+
31+
@pytest.mark.parametrize(
32+
"input_bytes",
33+
[
34+
b"hello",
35+
b"valid utf-8: \xc3\xa9",
36+
b"",
37+
],
38+
)
39+
def test_is_utf8_decodable_with_valid_bytes(input_bytes):
40+
assert is_utf8_decodable(input_bytes) is True
41+
42+
43+
def test_is_utf8_decodable_with_invalid_bytes():
44+
assert is_utf8_decodable(b"\xff\xfe") is False
45+
46+
47+
def test_is_utf8_decodable_with_non_bytes():
48+
with pytest.raises(AttributeError):
49+
is_utf8_decodable("string")
50+
51+
52+
class TestValidateTag:
53+
def test_valid_unicode_tag(self):
54+
validate_tag("valid-tag")
55+
56+
def test_valid_bytes_tag(self):
57+
validate_tag(b"valid-bytes-tag")
58+
59+
def test_valid_unicode_with_utf8_chars(self):
60+
validate_tag("caf\u00e9")
61+
62+
def test_rejects_empty_string(self):
63+
with pytest.raises(MetaflowTaggingError, match="must not be empty"):
64+
validate_tag("")
65+
66+
def test_rejects_empty_bytes(self):
67+
with pytest.raises(MetaflowTaggingError, match="must not be empty"):
68+
validate_tag(b"")
69+
70+
def test_rejects_tag_too_long(self):
71+
long_tag = "a" * (MAX_TAG_SIZE + 1)
72+
with pytest.raises(MetaflowTaggingError, match="Tag is too long"):
73+
validate_tag(long_tag)
74+
75+
def test_rejects_tag_too_long_bytes(self):
76+
long_tag = b"a" * (MAX_TAG_SIZE + 1)
77+
with pytest.raises(MetaflowTaggingError, match="Tag is too long"):
78+
validate_tag(long_tag)
79+
80+
def test_rejects_non_string_type(self):
81+
with pytest.raises(MetaflowTaggingError, match="must be some kind of string"):
82+
validate_tag(123)
83+
84+
def test_rejects_none_type(self):
85+
with pytest.raises(MetaflowTaggingError, match="must be some kind of string"):
86+
validate_tag(None)
87+
88+
def test_rejects_list_type(self):
89+
with pytest.raises(MetaflowTaggingError, match="must be some kind of string"):
90+
validate_tag(["tag"])
91+
92+
def test_tag_at_max_length_is_valid(self):
93+
validate_tag("a" * MAX_TAG_SIZE)
94+
95+
96+
class TestValidateTags:
97+
def test_valid_single_tag(self):
98+
validate_tags(["valid-tag"])
99+
100+
def test_valid_multiple_tags(self):
101+
validate_tags(["tag1", "tag2", "tag3"])
102+
103+
def test_valid_empty_list(self):
104+
validate_tags([])
105+
106+
def test_deduplicates_tags(self):
107+
validate_tags(["tag", "tag", "tag"])
108+
109+
def test_validates_each_tag(self):
110+
with pytest.raises(MetaflowTaggingError, match="must not be empty"):
111+
validate_tags(["valid", "", "also-valid"])
112+
113+
def test_rejects_too_many_tags(self):
114+
too_many = ["tag-%d" % i for i in range(MAX_USER_TAG_SET_SIZE + 1)]
115+
with pytest.raises(MetaflowTaggingError, match="Cannot increase size"):
116+
validate_tags(too_many)
117+
118+
def test_allows_remediation_when_existing_tags_provided(self):
119+
large_set = ["tag-%d" % i for i in range(MAX_USER_TAG_SET_SIZE + 1)]
120+
validate_tags(large_set, existing_tags=large_set)
121+
122+
def test_rejects_increase_even_with_existing(self):
123+
existing = ["tag-%d" % i for i in range(MAX_USER_TAG_SET_SIZE + 1)]
124+
larger = existing + ["new-tag"]
125+
with pytest.raises(MetaflowTaggingError, match="Cannot increase size"):
126+
validate_tags(larger, existing_tags=existing)
127+
128+
def test_valid_bytes_tags(self):
129+
validate_tags([b"tag1", b"tag2"])
130+
131+
def test_valid_mixed_unicode_and_bytes(self):
132+
validate_tags(["unicode-tag", b"bytes-tag"])

0 commit comments

Comments
 (0)