Skip to content

Add type hints #467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
strategy:
fail-fast: false
matrix:
tox_job: [docs, flake8, headers]
tox_job: [docs, flake8, mypy, headers]
steps:
- uses: actions/checkout@v3
- name: Set up Python
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ __pycache__

# /
/.coverage
/.mypy_cache
/.ruff_cache
/.tox
/.venv
/build
/coverage
/dist
Expand Down
4 changes: 4 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[mypy]
python_version = 3.9
strict = True
warn_unreachable = True
15 changes: 14 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ owner=root
group=root

[tool:pytest]
addopts = --doctest-modules --doctest-glob="*.doctest" stdnum tests --ignore=stdnum/iso9362.py --cov=stdnum --cov-report=term-missing:skip-covered --cov-report=html
addopts = --doctest-modules --doctest-glob="*.doctest" stdnum tests --ignore=stdnum/iso9362.py --ignore=stdnum/_types.py --cov=stdnum --cov-report=term-missing:skip-covered --cov-report=html
doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL

[coverage:run]
branch = true
omit =
_types.py
_typing.py

[coverage:report]
fail_under=100
Expand All @@ -32,16 +35,26 @@ ignore =
Q003 # don't force "" strings to avoid escaping quotes
T001,T201 # we use print statements in the update scripts
W504 # we put the binary operator on the preceding line
per-file-ignores =
# typing re-exports
stdnum/_typing.py: F401,I250
max-complexity = 15
max-line-length = 120
extend-exclude =
.github
.mypy_cache
.pytest_cache
.ruff_cache
.venv
build

[isort]
lines_after_imports = 2
multi_line_output = 4
extra_standard_library =
typing_extensions
classes =
IO
known_third_party =
lxml
openpyxl
Expand Down
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,11 @@
'Topic :: Text Processing :: General',
],
packages=find_packages(),
install_requires=[],
package_data={'': ['*.dat', '*.crt']},
python_requires='>=3.7',
install_requires=[
'importlib_resources >= 1.3 ; python_version < "3.9"',
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type checkers don't have great support for the try/except style of providing backwards compatibility. So instead of relying on pkg_resources, which may be missing even for Python versions 3.7 and 3.8 if a tool like uv was used to setup the environment, it seemed better to just rely on the importlib_resources backport with the minimum version that supports the files function. That way we don't have a soft-dependency on setuptools.

],
package_data={'': ['*.dat', '*.crt', 'py.typed']},
extras_require={
# The SOAP feature is only required for a number of online tests
# of numbers such as the EU VAT VIES lookup, the Dominican Republic
Expand Down
1 change: 1 addition & 0 deletions stdnum/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
Apart from the validate() function, many modules provide extra
parsing, validation, formatting or conversion functions.
"""
from __future__ import annotations

from stdnum.util import get_cc_module

Expand Down
77 changes: 77 additions & 0 deletions stdnum/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# _types.py - module for defining custom types
#
# Copyright (C) 2025 David Salvisberg
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""Module containing custom types.

This module is designed to be accessed through `stdnum._typing` so
you only have the overhead and runtime requirement of Python 3.9
and `typing_extensions`, when that type is introspected at runtime.

As such this module should never be accessed directly.
"""

from __future__ import annotations

from typing import Protocol
from typing_extensions import Required, TypedDict


class NumberValidationModule(Protocol):
"""Minimal interface for a number validation module."""

def compact(self, number: str) -> str:
"""Convert the number to the minimal representation."""

def validate(self, number: str) -> str:
"""Check if the number provided is a valid number of its type."""

def is_valid(self, number: str) -> bool:
"""Check if the number provided is a valid number of its type."""


class IMSIInfo(TypedDict, total=False):
"""Info `dict` returned by `stdnum.imsi.info`."""

number: Required[str]
mcc: Required[str]
mnc: Required[str]
msin: Required[str]
country: str
cc: str
brand: str
operator: str
status: str
bands: str


class GSTINInfo(TypedDict):
"""Info `dict` returned by `stdnum.in_.gstin.info`."""

state: str | None
pan: str
holder_type: str
initial: str
registration_count: int


class PANInfo(TypedDict):
"""Info `dict` returned by `stdnum.in_.pan.info`."""

holder_type: str
initial: str
119 changes: 119 additions & 0 deletions stdnum/_typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# _typing.py - module for typing shims with reduced runtime overhead
#
# Copyright (C) 2025 David Salvisberg
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""Compatibility shims for the Python typing module.

This module is designed in a way, such, that runtime use of
annotations is possible starting with Python 3.9, but it still
supports Python 3.6 - 3.8 if the package is used normally without
introspecting the annotations of the API.

You should never import *from* this module, you should always import
the entire module and then access the members via attribute access.

I.e. use the module like this:
```python
from stdnum import _typing as t

foo: t.Any = ...
```

Instead of like this:
```python
from stdnum._typing import Any

foo: Any = ...
```

The exception to that rule are `TYPE_CHECKING` `cast` and `deprecated`
which can be used at runtime.
"""

from __future__ import annotations


TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import Generator as Generator
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand why this code is here. I don't think there is any situation where you can have this if branch be executed.

This code appears to have another origin and more generic than is needed for python-stdnum, where did it come from?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch is for type checkers only, conversely the else branch is never looked at by type checkers.

The entire point of this file is to defer the typing import, since it is quite expensive, which would make python-stdnum less attractive in embedded environments. But the magic for this is too dynamic for type checkers to be able to understand it, so this branch basically summarizes for type checkers what typing symbols this module re-exports.

We could avoid the branch by shipping a separate _typing.pyi file, but it seems easier to keep the typing-only branch in sync with the runtime branch if it's all in the same file.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still struggling with getting this merged. The whole _typing.py file feels like a pretty big hack just to get some type annotations. I will probably merge some bits of this earlier. I'm in the process of extracting some parts into separate commits which leaves the huge one with just mechanical changes.

Are imports from collections.abc also expensive? If there is an efficient way to be able to reference Any we can probably make something that is has minimal runtime impact.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collections.abc is not as bad as far as overhead is concerned I believe. I just think it's easier if all the types are collected in a common utility module when python-stdnum relies on such a small number of them. There's less confusion overall if there's only one place to import types from.

The only real friction I see with this approach is, that if you need a type that hasn't been added to _typing.py yet, you need to add it to both branches in the correct way. But this should be rare, given how much code there is and how little need there is for anything fancy typing-wise.

There are however alternative approaches. We could simplify things, if you don't care to support runtime introspection of type hints at all we could just put all the typing only imports in a if TYPE_CHECKING block at the start of each file that needs it. This should only really negatively impact users of something like beartype.

There are however still things with runtime effects like cast and @deprecated, so for those alone we do need something like _typing.py, so we can provide our own implementation that doesn't depend on typing, but for type checkers to be able to understand what these functions do, we need to pretend it's their implementation, not ours.

It is unfortunately what you have to do currently if you want to minimize the runtime impact of type annotations. If you don't care to minimize the runtime impact we can add typing_extensions as a dependency and import everything at runtime.

Other than that the only option with no runtime overhead is separate stub files. But then you lose both runtime introspection of the type hints and the ability to type check the implementation. So it will not help you find any bugs in stdnum, just in code that uses it. So at that point it would probably be more ergonomic to contribute the stubs to typeshed, than maintain them in this project and deal with the hassle of updating two separate files each time.

Copy link
Contributor Author

@Daverball Daverball Mar 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also just in case this wasn't clear: The if TYPE_CHECKING: part isn't the hack. typing.TYPE_CHECKING exists as a constant and it's very commonly used to get around limitations of the type system. Doing TYPE_CHECKING = False is the currently supported way to avoid the typing import, since type checkers will still treat it like typing.TYPE_CHECKING (False at runtime, True at type checking time).

The only real hack is the module-level __getattr__ and providing our own implementation for deprecated/cast so we can avoid runtime imports in annotations, since we deferred their execution via from __future__ import annotations. So with this example:

import stdnum._typing as t

x: t.Any

What happens is that, __annotations__ becomes {'x': 't.Any'}, so the code t.Any doesn't get executed until you (or a different dependency) runs typing.get_type_hints or inspect.get_annotations on your modules/functions which both use eval internally to execute the stringized type expressions, so that's the first time stdnum._typing.__getattr__ gets called and the import happens.

A module-level __getattr__ to avoid the typing import is admittedly less common, but there are some libraries that use that trick. What is a lot more common is the use of TYPE_CHECKING = False and not supporting runtime introspection of type hints.

The more commonly used style would look like this:

TYPE_CHECKING = False
if TYPE_CHECKING:
    from typing import Any

x: Any

In this case typing.get_type_hints and inspect.get_annotations would raise a NameError, since Any is never actually imported. Although PEP 649 will improve the situation for runtime introspection and add a mode where unresolvable names are replaced with typing.ForwardRef, so you get back {'x': typing.ForwardRef('Any')} instead of a NameError for the above.

from collections.abc import Iterable as Iterable
from collections.abc import Mapping as Mapping
from collections.abc import Sequence as Sequence
from typing import Any as Any
from typing import IO as IO
from typing import Literal as Literal
from typing import cast as cast
from typing_extensions import TypeAlias as TypeAlias
from typing_extensions import deprecated as deprecated

from stdnum._types import GSTINInfo as GSTINInfo
from stdnum._types import IMSIInfo as IMSIInfo
from stdnum._types import NumberValidationModule as NumberValidationModule
from stdnum._types import PANInfo as PANInfo
else:
def cast(typ, val):
"""Cast a value to a type."""
return val

class deprecated: # noqa: N801
"""Simplified backport of `warnings.deprecated`.

This backport doesn't handle classes or async functions.
"""

def __init__(self, message, category=DeprecationWarning, stacklevel=1): # noqa: D107
self.message = message
self.category = category
self.stacklevel = stacklevel

def __call__(self, func): # noqa: D102
func.__deprecated__ = self.message

if self.category is None:
return func

import functools
import warnings

@functools.wraps(func)
def wrapper(*args, **kwargs):
warnings.warn(self.message, category=self.category, stacklevel=self.stacklevel + 1)
return func(*args, **kwargs)

wrapper.__deprecated__ = self.message
return wrapper

def __getattr__(name):
if name in {'Generator', 'Iterable', 'Mapping', 'Sequence'}:
import collections.abc
return getattr(collections.abc, name)
elif name in {'Any', 'IO', 'Literal'}:
import typing
return getattr(typing, name)
elif name == 'TypeAlias':
import sys
if sys.version_info >= (3, 10):
import typing
else:
import typing_extensions as typing
return getattr(typing, name)
elif name in {'GSTINInfo', 'IMSIInfo', 'NumberValidationModule', 'PANInfo'}:
import stdnum._types
return getattr(stdnum._types, name)
else:
raise AttributeError(name)
1 change: 1 addition & 0 deletions stdnum/ad/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# 02110-1301 USA

"""Collection of Andorran numbers."""
from __future__ import annotations

# Provide aliases.
from stdnum.ad import nrt as vat # noqa: F401
9 changes: 5 additions & 4 deletions stdnum/ad/nrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,13 @@
>>> format('D059888N')
'D-059888-N'
""" # noqa: E501
from __future__ import annotations

from stdnum.exceptions import *
from stdnum.util import clean, isdigits


def compact(number):
def compact(number: str) -> str:
"""Convert the number to the minimal representation.

This strips the number of any valid separators and removes surrounding
Expand All @@ -57,7 +58,7 @@ def compact(number):
return clean(number, ' -.').upper().strip()


def validate(number):
def validate(number: str) -> str:
"""Check if the number is a valid Andorra NRT number.

This checks the length, formatting and other constraints. It does not check
Expand All @@ -79,15 +80,15 @@ def validate(number):
return number


def is_valid(number):
def is_valid(number: str) -> bool:
"""Check if the number is a valid Andorra NRT number."""
try:
return bool(validate(number))
except ValidationError:
return False


def format(number):
def format(number: str) -> str:
"""Reformat the number to the standard presentation format."""
number = compact(number)
return '-'.join([number[0], number[1:-1], number[-1]])
1 change: 1 addition & 0 deletions stdnum/al/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# 02110-1301 USA

"""Collection of Albanian numbers."""
from __future__ import annotations

# provide vat as an alias
from stdnum.al import nipt as vat # noqa: F401
7 changes: 4 additions & 3 deletions stdnum/al/nipt.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
...
InvalidFormat: ...
"""
from __future__ import annotations

import re

Expand All @@ -60,7 +61,7 @@
_nipt_re = re.compile(r'^[A-M][0-9]{8}[A-Z]$')


def compact(number):
def compact(number: str) -> str:
"""Convert the number to the minimal representation. This strips the
number of any valid separators and removes surrounding whitespace."""
number = clean(number, ' ').upper().strip()
Expand All @@ -71,7 +72,7 @@ def compact(number):
return number


def validate(number):
def validate(number: str) -> str:
"""Check if the number is a valid VAT number. This checks the length and
formatting."""
number = compact(number)
Expand All @@ -82,7 +83,7 @@ def validate(number):
return number


def is_valid(number):
def is_valid(number: str) -> bool:
"""Check if the number is a valid VAT number."""
try:
return bool(validate(number))
Expand Down
1 change: 1 addition & 0 deletions stdnum/ar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# 02110-1301 USA

"""Collection of Argentinian numbers."""
from __future__ import annotations

# provide aliases
from stdnum.ar import cuit as vat # noqa: F401
Expand Down
Loading
Loading