Skip to content

Commit 77fa762

Browse files
committed
Improve malaysian ITN implementation
Note that TF prefix is not implemented because that might be a typo, since it doesn't appear on the lists. Fixes #113 Closes #237
1 parent ddb2092 commit 77fa762

File tree

3 files changed

+167
-30
lines changed

3 files changed

+167
-30
lines changed

Diff for: stdnum/my/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# coding: utf-8
33
#
44
# Copyright (C) 2013 Arthur de Jong
5+
# Copyright (C) 2023 Leandro Regueiro
56
#
67
# This library is free software; you can redistribute it and/or
78
# modify it under the terms of the GNU Lesser General Public
@@ -19,3 +20,6 @@
1920
# 02110-1301 USA
2021

2122
"""Collection of Malaysian numbers."""
23+
24+
# provide aliases
25+
from stdnum.my import itn as vat # noqa: F401

Diff for: stdnum/my/itn.py

+81-30
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# itn.py - functions for handling ITN numbers
22
#
33
# Copyright (C) 2020 Sergi Almacellas Abellana
4+
# Copyright (C) 2023 Leandro Regueiro
45
#
56
# This library is free software; you can redistribute it and/or
67
# modify it under the terms of the GNU Lesser General Public
@@ -21,65 +22,115 @@
2122
2223
The number is assigned by The Inland Revenue Board of Malaysia (IRBM) and it
2324
is required to report the income. This unique number is known as
24-
"Nombor CukaiPendapatan" or Income Tax Number.
25+
"Nombor Cukai Pendapatan" or Income Tax Number (ITN).
2526
26-
The number consist of 11 or 12 digits. It is structured by two types, normally
27-
separated by an space. The first one consists of 1 or 2 leters and represents
28-
the type of the file number. The second one is always ten digits an represents
29-
the tax number.
27+
For individuals the ITN consists on the 2 letters Type of File Number (SG for
28+
individual resident or OG for individual non-resident) followed by a space, and
29+
ending with the Income Tax Number (maximum 11 digits).
3030
31-
>>> validate('C2584563202')
31+
For Non-Individuals the ITN consists on the Type of File Number (1 or 2 letters)
32+
followed by a space, and ending with the Income Tax Number (maximum 10 digits).
33+
The Type of File Number for Non-Individuals can be one of the following:
34+
35+
* C: Company, Pte. Ltd. Company, Limited Company or Non-Resident Company.
36+
* CS: Cooperative Society.
37+
* D: Partnership.
38+
* E: Employer.
39+
* F: Association.
40+
* FA: Non-Resident Public Entertainer.
41+
* PT: Limited Liability Partnership.
42+
* TA: Trust Body.
43+
* TC: Unit Trust/ Property Trust.
44+
* TN: Business Trust.
45+
* TR: Real Estate Investment Trust/ Property Trust Fund.
46+
* TP: Deceased Person's Estate.
47+
* TJ: Hindu Joint Family.
48+
* LE: Labuan Entity.
49+
50+
>>> validate('SG 10234567090')
51+
'SG10234567090'
52+
>>> validate('OG 25845632021')
53+
'OG25845632021'
54+
>>> validate('C 2584563202')
3255
'C2584563202'
33-
>>> validate('CDB2584563202') # Should contain the prefix
56+
>>> validate('1')
57+
Traceback (most recent call last):
58+
...
59+
InvalidComponent: ...
60+
>>> validate('12345678901234')
3461
Traceback (most recent call last):
3562
...
36-
InvalidLength: ...
37-
>>> validate('CD12346789012') # Should contain the prefix
63+
InvalidComponent: ...
64+
>>> validate('12345')
3865
Traceback (most recent call last):
3966
...
40-
InvalidLength: ...
41-
>>> validate('C258456320B') # number should only contain digits
67+
InvalidComponent: ...
68+
>>> validate('X 12345')
69+
Traceback (most recent call last):
70+
...
71+
InvalidComponent: ...
72+
>>> validate('C 12345X')
4273
Traceback (most recent call last):
4374
...
4475
InvalidFormat: ...
4576
>>> format('C2584563202')
4677
'C 2584563202'
78+
>>> format('SG10234567090')
79+
'SG 10234567090'
4780
"""
4881

4982
from stdnum.exceptions import *
5083
from stdnum.util import clean, isdigits
5184

5285

53-
def compact(number):
54-
"""Convert the number to the minimal representation. This strips the
55-
number of any valid separators and removes surrounding whitespace."""
56-
return clean(number, ' -*').strip()
86+
PREFIXES_11_DIGITS = ('SG', 'OG')
87+
PREFIXES_10_DIGITS = ('C', 'CS', 'D', 'E', 'F', 'FA', 'PT', 'TA', 'TC', 'TN',
88+
'TR', 'TP', 'TJ', 'LE')
89+
VALID_PREFIXES = PREFIXES_11_DIGITS + PREFIXES_10_DIGITS
5790

5891

59-
def split(number):
60-
number = compact(number)
61-
index = 10
62-
if len(number) > 12:
63-
index += 11
64-
return number[:-index], number[-index:]
92+
def _get_prefix_and_number(number):
93+
"""Return the number separated in prefix and numerical part.
94+
95+
This assumes the number has been previously compacted.
96+
"""
97+
for i, c in enumerate(number):
98+
if c.isdigit():
99+
return number[:i], number[i:]
100+
return number, ''
101+
102+
103+
def compact(number):
104+
"""Convert the number to the minimal representation.
105+
106+
This strips the number of any valid separators and removes surrounding
107+
whitespace.
108+
"""
109+
return clean(number, ' -*').strip().upper()
65110

66111

67112
def validate(number):
68-
"""Check if the number is a valid NRIC number. This checks the length,
69-
formatting and birth date and place."""
113+
"""Check if the number is a valid ITN number.
114+
115+
This checks the length and formatting.
116+
"""
70117
number = compact(number)
71-
if len(number) > 13 or len(number) <= 10:
72-
raise InvalidLength()
73-
prefix, digits = split(number)
74-
if not prefix or len(prefix) > 2:
75-
raise InvalidLength()
118+
prefix, digits = _get_prefix_and_number(number)
119+
if prefix not in VALID_PREFIXES:
120+
raise InvalidComponent()
121+
if not digits:
122+
raise InvalidComponent()
123+
if prefix in PREFIXES_11_DIGITS and len(digits) > 11:
124+
raise InvalidComponent()
125+
if prefix in PREFIXES_10_DIGITS and len(digits) > 10:
126+
raise InvalidComponent()
76127
if not isdigits(digits):
77128
raise InvalidFormat()
78129
return number
79130

80131

81132
def is_valid(number):
82-
"""Check if the number is a valid NRIC number."""
133+
"""Check if the number is a valid ITN number."""
83134
try:
84135
return bool(validate(number))
85136
except ValidationError:
@@ -88,4 +139,4 @@ def is_valid(number):
88139

89140
def format(number):
90141
"""Reformat the number to the standard presentation format."""
91-
return ' '.join(split(number))
142+
return ' '.join(_get_prefix_and_number(compact(number)))

Diff for: tests/test_my_itn.doctest

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
test_my_itn.doctest - more detailed doctests for stdnum.my.itn module
2+
3+
Copyright (C) 2023 Leandro Regueiro
4+
5+
This library is free software; you can redistribute it and/or
6+
modify it under the terms of the GNU Lesser General Public
7+
License as published by the Free Software Foundation; either
8+
version 2.1 of the License, or (at your option) any later version.
9+
10+
This library is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13+
Lesser General Public License for more details.
14+
15+
You should have received a copy of the GNU Lesser General Public
16+
License along with this library; if not, write to the Free Software
17+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18+
02110-1301 USA
19+
20+
21+
This file contains more detailed doctests for the stdnum.my.itn module. It
22+
tries to test more corner cases and detailed functionality that is not really
23+
useful as module documentation.
24+
25+
>>> from stdnum.my import itn
26+
27+
28+
Tests for some corner cases.
29+
30+
>>> itn.validate('SG 10234567090')
31+
'SG10234567090'
32+
>>> itn.validate('OG 25845632021')
33+
'OG25845632021'
34+
>>> itn.validate('C 2584563202')
35+
'C2584563202'
36+
>>> itn.validate('X 12345')
37+
Traceback (most recent call last):
38+
...
39+
InvalidComponent: ...
40+
>>> itn.validate('12345')
41+
Traceback (most recent call last):
42+
...
43+
InvalidComponent: ...
44+
>>> itn.validate('C')
45+
Traceback (most recent call last):
46+
...
47+
InvalidComponent: ...
48+
>>> itn.validate('SG 123456789012')
49+
Traceback (most recent call last):
50+
...
51+
InvalidComponent: ...
52+
>>> itn.validate('C 12345678901')
53+
Traceback (most recent call last):
54+
...
55+
InvalidComponent: ...
56+
>>> itn.validate('C 12345X')
57+
Traceback (most recent call last):
58+
...
59+
InvalidFormat: ...
60+
>>> itn.format('C2584563202')
61+
'C 2584563202'
62+
>>> itn.format('SG10234567090')
63+
'SG 10234567090'
64+
65+
66+
These have been found online and should all be valid numbers.
67+
68+
>>> numbers = '''
69+
...
70+
... C 2128186207
71+
... C 2354867110
72+
... C 2493192407
73+
... F 1064671704
74+
... OG 04455987090
75+
... SG 2178656-09
76+
... SG 10234567090
77+
... OG 25845632021
78+
... C 2584563202
79+
...
80+
... '''
81+
>>> [x for x in numbers.splitlines() if x and not itn.is_valid(x)]
82+
[]

0 commit comments

Comments
 (0)