Skip to content

Commit 9c54cc3

Browse files
authored
mypy: support namespace packages when passing files (#9742)
This is the successor to #9632. Things should basically be as discussed in that PR. Since #9616 is merged, this should now resolve #5759. We leave the Bazel integration with `--package-root` almost entirely untouched, save for a) one change that's a bugfix / doesn't affect the core of what `--package-root` is doing, b) another drive by bugfix that's not related to this PR. Change a) fixes the package root `__init__.py` hackery when passed absolute paths. Change b) fixes the validation logic for package roots above the current directory; it was broken if you passed `..` as a package root Since we're leaving `--package-root` alone for now, I named the new flag `--explicit-package-base` to try and avoid confusion. Doing so also matches the language used by BuildSource a little better. The new logic is summarised in the docstring of `SourceFinder.crawl_up`. Some commentary: - I change `find_sources_in_dir ` to call `crawl_up` directly to construct the BuildSource. This helps codify the fact that files discovered will use the same module names as if you passed them directly. - Doing so keeps things DRY with the more complicated logic and means, for instance, that we now do more sensible things in some cases when we recursively explore directories that have invalid package names. - Speaking of invalid package names, if we encounter a directory name with an invalid package name, we stop crawling. This is necessary because with namespace packages there's no guarantee that what we're crawling was meant to be a Python package. I add back in a check in the presence of `__init__.py` to preserve current unit tests where we raise InvalidSourceList. - The changes to modulefinder are purely cosmetic and can be ignored (there's some similar logic between the two files and this just makes sure they mirror each other closely) - One notable change is we now always use absolute paths to crawl. This makes the behaviour more predictable and addresses a common complaint: fixes #9677, fixes #8726 and others. - I figured this could use more extensive testing than a couple slow cmdline tests. Hopefully this test setup also helps clarify the behaviour :-) Co-authored-by: hauntsaninja <>
1 parent cdd5bad commit 9c54cc3

File tree

8 files changed

+413
-101
lines changed

8 files changed

+413
-101
lines changed

mypy/build.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2396,7 +2396,7 @@ def find_module_and_diagnose(manager: BuildManager,
23962396
and not options.use_builtins_fixtures
23972397
and not options.custom_typeshed_dir):
23982398
raise CompileError([
2399-
'mypy: "%s" shadows library module "%s"' % (result, id),
2399+
'mypy: "%s" shadows library module "%s"' % (os.path.relpath(result), id),
24002400
'note: A user-defined top-level module with name "%s" is not supported' % id
24012401
])
24022402
return (result, follow_imports)

mypy/find_sources.py

+125-88
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""Routines for finding the sources that mypy will check"""
22

3-
import os.path
3+
import functools
4+
import os
45

5-
from typing import List, Sequence, Set, Tuple, Optional, Dict
6+
from typing import List, Sequence, Set, Tuple, Optional
67
from typing_extensions import Final
78

8-
from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS
9+
from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS, mypy_path
910
from mypy.fscache import FileSystemCache
1011
from mypy.options import Options
1112

@@ -24,7 +25,7 @@ def create_source_list(paths: Sequence[str], options: Options,
2425
Raises InvalidSourceList on errors.
2526
"""
2627
fscache = fscache or FileSystemCache()
27-
finder = SourceFinder(fscache)
28+
finder = SourceFinder(fscache, options)
2829

2930
sources = []
3031
for path in paths:
@@ -34,7 +35,7 @@ def create_source_list(paths: Sequence[str], options: Options,
3435
name, base_dir = finder.crawl_up(path)
3536
sources.append(BuildSource(path, name, None, base_dir))
3637
elif fscache.isdir(path):
37-
sub_sources = finder.find_sources_in_dir(path, explicit_package_roots=None)
38+
sub_sources = finder.find_sources_in_dir(path)
3839
if not sub_sources and not allow_empty_dir:
3940
raise InvalidSourceList(
4041
"There are no .py[i] files in directory '{}'".format(path)
@@ -46,124 +47,161 @@ def create_source_list(paths: Sequence[str], options: Options,
4647
return sources
4748

4849

49-
def keyfunc(name: str) -> Tuple[int, str]:
50+
def keyfunc(name: str) -> Tuple[bool, int, str]:
5051
"""Determines sort order for directory listing.
5152
52-
The desirable property is foo < foo.pyi < foo.py.
53+
The desirable properties are:
54+
1) foo < foo.pyi < foo.py
55+
2) __init__.py[i] < foo
5356
"""
5457
base, suffix = os.path.splitext(name)
5558
for i, ext in enumerate(PY_EXTENSIONS):
5659
if suffix == ext:
57-
return (i, base)
58-
return (-1, name)
60+
return (base != "__init__", i, base)
61+
return (base != "__init__", -1, name)
62+
63+
64+
def normalise_package_base(root: str) -> str:
65+
if not root:
66+
root = os.curdir
67+
root = os.path.abspath(root)
68+
if root.endswith(os.sep):
69+
root = root[:-1]
70+
return root
71+
72+
73+
def get_explicit_package_bases(options: Options) -> Optional[List[str]]:
74+
"""Returns explicit package bases to use if the option is enabled, or None if disabled.
75+
76+
We currently use MYPYPATH and the current directory as the package bases. In the future,
77+
when --namespace-packages is the default could also use the values passed with the
78+
--package-root flag, see #9632.
79+
80+
Values returned are normalised so we can use simple string comparisons in
81+
SourceFinder.is_explicit_package_base
82+
"""
83+
if not options.explicit_package_bases:
84+
return None
85+
roots = mypy_path() + options.mypy_path + [os.getcwd()]
86+
return [normalise_package_base(root) for root in roots]
5987

6088

6189
class SourceFinder:
62-
def __init__(self, fscache: FileSystemCache) -> None:
90+
def __init__(self, fscache: FileSystemCache, options: Options) -> None:
6391
self.fscache = fscache
64-
# A cache for package names, mapping from directory path to module id and base dir
65-
self.package_cache = {} # type: Dict[str, Tuple[str, str]]
66-
67-
def find_sources_in_dir(
68-
self, path: str, explicit_package_roots: Optional[List[str]]
69-
) -> List[BuildSource]:
70-
if explicit_package_roots is None:
71-
mod_prefix, root_dir = self.crawl_up_dir(path)
72-
else:
73-
mod_prefix = os.path.basename(path)
74-
root_dir = os.path.dirname(path) or "."
75-
if mod_prefix:
76-
mod_prefix += "."
77-
return self.find_sources_in_dir_helper(path, mod_prefix, root_dir, explicit_package_roots)
78-
79-
def find_sources_in_dir_helper(
80-
self, dir_path: str, mod_prefix: str, root_dir: str,
81-
explicit_package_roots: Optional[List[str]]
82-
) -> List[BuildSource]:
83-
assert not mod_prefix or mod_prefix.endswith(".")
84-
85-
init_file = self.get_init_file(dir_path)
86-
# If the current directory is an explicit package root, explore it as such.
87-
# Alternatively, if we aren't given explicit package roots and we don't have an __init__
88-
# file, recursively explore this directory as a new package root.
89-
if (
90-
(explicit_package_roots is not None and dir_path in explicit_package_roots)
91-
or (explicit_package_roots is None and init_file is None)
92-
):
93-
mod_prefix = ""
94-
root_dir = dir_path
92+
self.explicit_package_bases = get_explicit_package_bases(options)
93+
self.namespace_packages = options.namespace_packages
9594

96-
seen = set() # type: Set[str]
97-
sources = []
95+
def is_explicit_package_base(self, path: str) -> bool:
96+
assert self.explicit_package_bases
97+
return normalise_package_base(path) in self.explicit_package_bases
9898

99-
if init_file:
100-
sources.append(BuildSource(init_file, mod_prefix.rstrip("."), None, root_dir))
99+
def find_sources_in_dir(self, path: str) -> List[BuildSource]:
100+
sources = []
101101

102-
names = self.fscache.listdir(dir_path)
103-
names.sort(key=keyfunc)
102+
seen = set() # type: Set[str]
103+
names = sorted(self.fscache.listdir(path), key=keyfunc)
104104
for name in names:
105105
# Skip certain names altogether
106106
if name == '__pycache__' or name.startswith('.') or name.endswith('~'):
107107
continue
108-
path = os.path.join(dir_path, name)
108+
subpath = os.path.join(path, name)
109109

110-
if self.fscache.isdir(path):
111-
sub_sources = self.find_sources_in_dir_helper(
112-
path, mod_prefix + name + '.', root_dir, explicit_package_roots
113-
)
110+
if self.fscache.isdir(subpath):
111+
sub_sources = self.find_sources_in_dir(subpath)
114112
if sub_sources:
115113
seen.add(name)
116114
sources.extend(sub_sources)
117115
else:
118116
stem, suffix = os.path.splitext(name)
119-
if stem == '__init__':
120-
continue
121-
if stem not in seen and '.' not in stem and suffix in PY_EXTENSIONS:
117+
if stem not in seen and suffix in PY_EXTENSIONS:
122118
seen.add(stem)
123-
src = BuildSource(path, mod_prefix + stem, None, root_dir)
124-
sources.append(src)
119+
module, base_dir = self.crawl_up(subpath)
120+
sources.append(BuildSource(subpath, module, None, base_dir))
125121

126122
return sources
127123

128124
def crawl_up(self, path: str) -> Tuple[str, str]:
129-
"""Given a .py[i] filename, return module and base directory
125+
"""Given a .py[i] filename, return module and base directory.
126+
127+
For example, given "xxx/yyy/foo/bar.py", we might return something like:
128+
("foo.bar", "xxx/yyy")
129+
130+
If namespace packages is off, we crawl upwards until we find a directory without
131+
an __init__.py
130132
131-
We crawl up the path until we find a directory without
132-
__init__.py[i], or until we run out of path components.
133+
If namespace packages is on, we crawl upwards until the nearest explicit base directory.
134+
Failing that, we return one past the highest directory containing an __init__.py
135+
136+
We won't crawl past directories with invalid package names.
137+
The base directory returned is an absolute path.
133138
"""
139+
path = os.path.abspath(path)
134140
parent, filename = os.path.split(path)
135-
module_name = strip_py(filename) or os.path.basename(filename)
136-
module_prefix, base_dir = self.crawl_up_dir(parent)
137-
if module_name == '__init__' or not module_name:
138-
module = module_prefix
139-
else:
140-
module = module_join(module_prefix, module_name)
141141

142+
module_name = strip_py(filename) or filename
143+
144+
parent_module, base_dir = self.crawl_up_dir(parent)
145+
if module_name == "__init__":
146+
return parent_module, base_dir
147+
148+
# Note that module_name might not actually be a valid identifier, but that's okay
149+
# Ignoring this possibility sidesteps some search path confusion
150+
module = module_join(parent_module, module_name)
142151
return module, base_dir
143152

144153
def crawl_up_dir(self, dir: str) -> Tuple[str, str]:
145-
"""Given a directory name, return the corresponding module name and base directory
154+
return self._crawl_up_helper(dir) or ("", dir)
146155

147-
Use package_cache to cache results.
148-
"""
149-
if dir in self.package_cache:
150-
return self.package_cache[dir]
156+
@functools.lru_cache()
157+
def _crawl_up_helper(self, dir: str) -> Optional[Tuple[str, str]]:
158+
"""Given a directory, maybe returns module and base directory.
151159
152-
parent_dir, base = os.path.split(dir)
153-
if not dir or not self.get_init_file(dir) or not base:
154-
module = ''
155-
base_dir = dir or '.'
156-
else:
157-
# Ensure that base is a valid python module name
158-
if base.endswith('-stubs'):
159-
base = base[:-6] # PEP-561 stub-only directory
160-
if not base.isidentifier():
161-
raise InvalidSourceList('{} is not a valid Python package name'.format(base))
162-
parent_module, base_dir = self.crawl_up_dir(parent_dir)
163-
module = module_join(parent_module, base)
164-
165-
self.package_cache[dir] = module, base_dir
166-
return module, base_dir
160+
We return a non-None value if we were able to find something clearly intended as a base
161+
directory (as adjudicated by being an explicit base directory or by containing a package
162+
with __init__.py).
163+
164+
This distinction is necessary for namespace packages, so that we know when to treat
165+
ourselves as a subpackage.
166+
"""
167+
# stop crawling if we're an explicit base directory
168+
if self.explicit_package_bases is not None and self.is_explicit_package_base(dir):
169+
return "", dir
170+
171+
parent, name = os.path.split(dir)
172+
if name.endswith('-stubs'):
173+
name = name[:-6] # PEP-561 stub-only directory
174+
175+
# recurse if there's an __init__.py
176+
init_file = self.get_init_file(dir)
177+
if init_file is not None:
178+
if not name.isidentifier():
179+
# in most cases the directory name is invalid, we'll just stop crawling upwards
180+
# but if there's an __init__.py in the directory, something is messed up
181+
raise InvalidSourceList("{} is not a valid Python package name".format(name))
182+
# we're definitely a package, so we always return a non-None value
183+
mod_prefix, base_dir = self.crawl_up_dir(parent)
184+
return module_join(mod_prefix, name), base_dir
185+
186+
# stop crawling if we're out of path components or our name is an invalid identifier
187+
if not name or not parent or not name.isidentifier():
188+
return None
189+
190+
# stop crawling if namespace packages is off (since we don't have an __init__.py)
191+
if not self.namespace_packages:
192+
return None
193+
194+
# at this point: namespace packages is on, we don't have an __init__.py and we're not an
195+
# explicit base directory
196+
result = self._crawl_up_helper(parent)
197+
if result is None:
198+
# we're not an explicit base directory and we don't have an __init__.py
199+
# and none of our parents are either, so return
200+
return None
201+
# one of our parents was an explicit base directory or had an __init__.py, so we're
202+
# definitely a subpackage! chain our name to the module.
203+
mod_prefix, base_dir = result
204+
return module_join(mod_prefix, name), base_dir
167205

168206
def get_init_file(self, dir: str) -> Optional[str]:
169207
"""Check whether a directory contains a file named __init__.py[i].
@@ -185,8 +223,7 @@ def module_join(parent: str, child: str) -> str:
185223
"""Join module ids, accounting for a possibly empty parent."""
186224
if parent:
187225
return parent + '.' + child
188-
else:
189-
return child
226+
return child
190227

191228

192229
def strip_py(arg: str) -> Optional[str]:

mypy/fscache.py

+4
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@
3232
import stat
3333
from typing import Dict, List, Set
3434
from mypy.util import hash_digest
35+
from mypy_extensions import mypyc_attr
3536

3637

38+
@mypyc_attr(allow_interpreted_subclasses=True) # for tests
3739
class FileSystemCache:
3840
def __init__(self) -> None:
3941
# The package root is not flushed with the caches.
@@ -112,6 +114,8 @@ def init_under_package_root(self, path: str) -> bool:
112114
return False
113115
ok = False
114116
drive, path = os.path.splitdrive(path) # Ignore Windows drive name
117+
if os.path.isabs(path):
118+
path = os.path.relpath(path)
115119
path = os.path.normpath(path)
116120
for root in self.package_root:
117121
if path.startswith(root):

mypy/main.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,9 @@ def add_invertible_flag(flag: str,
786786
title="Running code",
787787
description="Specify the code you want to type check. For more details, see "
788788
"mypy.readthedocs.io/en/latest/running_mypy.html#running-mypy")
789+
code_group.add_argument(
790+
'--explicit-package-bases', action='store_true',
791+
help="Use current directory and MYPYPATH to determine module names of files passed")
789792
code_group.add_argument(
790793
'-m', '--module', action='append', metavar='MODULE',
791794
default=[],
@@ -862,6 +865,11 @@ def set_strict_flags() -> None:
862865
parser.error("Missing target module, package, files, or command.")
863866
elif code_methods > 1:
864867
parser.error("May only specify one of: module/package, files, or command.")
868+
if options.explicit_package_bases and not options.namespace_packages:
869+
parser.error(
870+
"Can only use --explicit-base-dirs with --namespace-packages, since otherwise "
871+
"examining __init__.py's is sufficient to determine module names for files"
872+
)
865873

866874
# Check for overlapping `--always-true` and `--always-false` flags.
867875
overlap = set(options.always_true) & set(options.always_false)
@@ -980,12 +988,12 @@ def process_package_roots(fscache: Optional[FileSystemCache],
980988
# Empty package root is always okay.
981989
if root:
982990
root = os.path.relpath(root) # Normalize the heck out of it.
991+
if not root.endswith(os.sep):
992+
root = root + os.sep
983993
if root.startswith(dotdotslash):
984994
parser.error("Package root cannot be above current directory: %r" % root)
985995
if root in trivial_paths:
986996
root = ''
987-
elif not root.endswith(os.sep):
988-
root = root + os.sep
989997
package_root.append(root)
990998
options.package_root = package_root
991999
# Pass the package root on the the filesystem cache.

0 commit comments

Comments
 (0)