Skip to content

Fix race condition for import of modules which use apipkg in Python 3.3+ #27

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
2.1.0
----------------------------------------

- fix race condition for import of modules using apipkg.initpkg in Python 3.3+
by updating existing modules in-place rather than replacing in sys.modules
with an apipkg.ApiModule instances. This race condition exists for
import statements (and __import__) in Python 3.3+ where sys.modules is
checked before obtaining an import lock, and for importlib.import_module
in Python 3.11+ for the same reason.

2.0.1
----------------------------------------

Expand Down
105 changes: 85 additions & 20 deletions src/apipkg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@
from .version import version as __version__ # NOQA:F401


_PY2 = sys.version_info[0] == 2
_PRESERVED_MODULE_ATTRS = {
"__file__",
"__version__",
"__loader__",
"__path__",
"__package__",
"__doc__",
"__spec__",
"__dict__",
}


def _py_abspath(path):
"""
special version of abspath
Expand Down Expand Up @@ -48,33 +61,85 @@ def distribution_version(name):
def initpkg(pkgname, exportdefs, attr=None, eager=False):
""" initialize given package from the export definitions. """
attr = attr or {}
oldmod = sys.modules.get(pkgname)
mod = sys.modules.get(pkgname)

if _PY2:
mod = _initpkg_py2(mod, pkgname, exportdefs, attr=attr)
else:
mod = _initpkg_py3(mod, pkgname, exportdefs, attr=attr)

# eagerload in bypthon to avoid their monkeypatching breaking packages
if "bpython" in sys.modules or eager:
for module in list(sys.modules.values()):
if isinstance(module, ApiModule):
module.__dict__

return mod


def _initpkg_py2(mod, pkgname, exportdefs, attr=None):
"""Python 2 helper for initpkg.

In Python 2 we can't update __class__ for an instance of types.Module, and
imports are protected by the global import lock anyway, so it is safe for a
module to replace itself during import.

"""
d = {}
f = getattr(oldmod, "__file__", None)
f = getattr(mod, "__file__", None)
if f:
f = _py_abspath(f)
d["__file__"] = f
if hasattr(oldmod, "__version__"):
d["__version__"] = oldmod.__version__
if hasattr(oldmod, "__loader__"):
d["__loader__"] = oldmod.__loader__
if hasattr(oldmod, "__path__"):
d["__path__"] = [_py_abspath(p) for p in oldmod.__path__]
if hasattr(oldmod, "__package__"):
d["__package__"] = oldmod.__package__
if "__doc__" not in exportdefs and getattr(oldmod, "__doc__", None):
d["__doc__"] = oldmod.__doc__
d["__spec__"] = getattr(oldmod, "__spec__", None)
if hasattr(mod, "__version__"):
d["__version__"] = mod.__version__
if hasattr(mod, "__loader__"):
d["__loader__"] = mod.__loader__
if hasattr(mod, "__path__"):
d["__path__"] = [_py_abspath(p) for p in mod.__path__]
if hasattr(mod, "__package__"):
d["__package__"] = mod.__package__
if "__doc__" not in exportdefs and getattr(mod, "__doc__", None):
d["__doc__"] = mod.__doc__
d["__spec__"] = getattr(mod, "__spec__", None)
d.update(attr)
if hasattr(oldmod, "__dict__"):
oldmod.__dict__.update(d)
if hasattr(mod, "__dict__"):
mod.__dict__.update(d)
mod = ApiModule(pkgname, exportdefs, implprefix=pkgname, attr=d)
sys.modules[pkgname] = mod
# eagerload in bypthon to avoid their monkeypatching breaking packages
if "bpython" in sys.modules or eager:
for module in list(sys.modules.values()):
if isinstance(module, ApiModule):
module.__dict__
return mod


def _initpkg_py3(mod, pkgname, exportdefs, attr=None):
"""Python 3 helper for initpkg.

Python 3.3+ uses finer grained locking for imports, and checks sys.modules before
acquiring the lock to avoid the overhead of the fine-grained locking. This
introduces a race condition when a module is imported by multiple threads
concurrently - some threads will see the initial module and some the replacement
ApiModule. We avoid this by updating the existing module in-place.

"""
if mod is None:
d = {"__file__": None, "__spec__": None}
d.update(attr)
mod = ApiModule(pkgname, exportdefs, implprefix=pkgname, attr=d)
sys.modules[pkgname] = mod
else:
f = getattr(mod, "__file__", None)
if f:
f = _py_abspath(f)
mod.__file__ = f
if hasattr(mod, "__path__"):
mod.__path__ = [_py_abspath(p) for p in mod.__path__]
if "__doc__" in exportdefs and hasattr(mod, "__doc__"):
del mod.__doc__
for name in dir(mod):
if name not in _PRESERVED_MODULE_ATTRS:
delattr(mod, name)

# Updating class of existing module as per importlib.util.LazyLoader
mod.__class__ = ApiModule
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This breaks the compatibility with Python 3.4: #29.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unfortunate, I'm not sure there's a way to fix the race condition if we can't update the module in place.

Options:

  • Figure out a way to update the module in place in Python <= 3.4, maybe by copying from ApiModule into module instance?
  • Accept the race condition in Python <= 3.4 and don't try to update in place
  • Drop support for Python 3.4 completely, and update python_requires appropriately

I'm not sure what @RonnyPfannschmidt's intention was for 3.4 support, I just relied on the automated tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if setattr() would somehow work.

Copy link
Member

@webknjaz webknjaz Nov 25, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@GlenWalker dropping 3.4 from the matrix was accidental: #31

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if setattr() would somehow work.

Setting attributes directly on the original module instance would probably work for eager=True, but not for eager=False. I had a quick look, but so far the only options for lazy loading require support from the module class (i.e. Python calls __getattr__ from the module class, and not the module instance). We wouldn't want to update __getattr__ on types.ModuleType globally, so if we can't update __class__ on the module instance then we are stuck.

I suspect that for Python 3.4 we'll just have to accept the race condition, and replace the module, rather than updating it in place. Unless anyway has any good ideas?

(The race condition has existed for a long time without anyone complaining, so this might be acceptable)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds reasonable.

mod.__init__(pkgname, exportdefs, implprefix=pkgname, attr=attr)
return mod


Expand Down
72 changes: 66 additions & 6 deletions test_apipkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@

import apipkg


PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3


#
# test support for importing modules
#
Expand Down Expand Up @@ -270,12 +275,12 @@ def parsenamespace(spec):
return ns


def test_initpkg_replaces_sysmodules(monkeypatch):
def test_initpkg_updates_sysmodules(monkeypatch):
mod = ModuleType("hello")
monkeypatch.setitem(sys.modules, "hello", mod)
apipkg.initpkg("hello", {"x": "os.path:abspath"})
newmod = sys.modules["hello"]
assert newmod != mod
assert (PY2 and newmod != mod) or (PY3 and newmod is mod)
assert newmod.x == os.path.abspath


Expand All @@ -286,15 +291,17 @@ def test_initpkg_transfers_attrs(monkeypatch):
mod.__loader__ = "loader"
mod.__package__ = "package"
mod.__doc__ = "this is the documentation"
mod.goodbye = "goodbye"
monkeypatch.setitem(sys.modules, "hello", mod)
apipkg.initpkg("hello", {})
newmod = sys.modules["hello"]
assert newmod != mod
assert (PY2 and newmod != mod) or (PY3 and newmod is mod)
assert newmod.__file__ == os.path.abspath(mod.__file__)
assert newmod.__version__ == mod.__version__
assert newmod.__loader__ == mod.__loader__
assert newmod.__package__ == mod.__package__
assert newmod.__doc__ == mod.__doc__
assert not hasattr(newmod, "goodbye")


def test_initpkg_nodoc(monkeypatch):
Expand All @@ -312,7 +319,7 @@ def test_initpkg_overwrite_doc(monkeypatch):
monkeypatch.setitem(sys.modules, "hello", hello)
apipkg.initpkg("hello", {"__doc__": "sys:__doc__"})
newhello = sys.modules["hello"]
assert newhello != hello
assert (PY2 and newhello != hello) or (PY3 and newhello is hello)
assert newhello.__doc__ == sys.__doc__


Expand All @@ -324,7 +331,7 @@ def test_initpkg_not_transfers_not_existing_attrs(monkeypatch):
monkeypatch.setitem(sys.modules, "hello", mod)
apipkg.initpkg("hello", {})
newmod = sys.modules["hello"]
assert newmod != mod
assert (PY2 and newmod != mod) or (PY3 and newmod is mod)
assert newmod.__file__ == os.path.abspath(mod.__file__)
assert not hasattr(newmod, "__path__")
assert not hasattr(newmod, "__package__") or mod.__package__ is None
Expand All @@ -337,7 +344,7 @@ def test_initpkg_not_changing_jython_paths(monkeypatch):
monkeypatch.setitem(sys.modules, "hello", mod)
apipkg.initpkg("hello", {})
newmod = sys.modules["hello"]
assert newmod != mod
assert (PY2 and newmod != mod) or (PY3 and newmod is mod)
assert newmod.__file__.startswith("__pyclasspath__")
unchanged, changed = newmod.__path__
assert changed != "ichange"
Expand Down Expand Up @@ -565,6 +572,59 @@ def run(self):
assert thread.attr == 42


@pytest.mark.skipif("threading" not in sys.modules, reason="requires thread support")
def test_import_race(tmpdir, monkeypatch):
pkgdir = tmpdir.mkdir("importrace")
pkgdir.join("__init__.py").write(
textwrap.dedent(
"""
import time
time.sleep(0.1)
import apipkg
apipkg.initpkg(__name__, exportdefs={
'attr': '.submod:attr',
},
)
"""
)
)
pkgdir.join("submod.py").write(
textwrap.dedent(
"""
attr = 43
"""
)
)
monkeypatch.syspath_prepend(tmpdir)

class TestThread(threading.Thread):
def __init__(self):
super(TestThread, self).__init__()
self.importrace = None

def run(self):
import importrace

self.importrace = importrace

threads = [TestThread() for _ in range(8)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
for thread in threads:
assert isinstance(thread.importrace, apipkg.ApiModule)
assert thread.importrace.attr == 43

import importrace

assert isinstance(importrace, apipkg.ApiModule)
assert importrace.attr == 43

for thread in threads:
assert thread.importrace is importrace


def test_bpython_getattr_override(tmpdir, monkeypatch):
def patchgetattr(self, name):
raise AttributeError(name)
Expand Down