Skip to content

Commit

Permalink
Make SoImport importable (#229)
Browse files Browse the repository at this point in the history
* move a bunch of stuff to plz.py

* Add it to the zipfile

* version
  • Loading branch information
peterebden authored Nov 19, 2024
1 parent 79abc76 commit ea37e5f
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 177 deletions.
4 changes: 4 additions & 0 deletions tools/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Version 1.6.0
-------------
* Import hooks are now added in the `plz` module and are hence more usefully importable (#229)

Version 1.5.5
-------------
* Fix get_code on ModuleDirImport (#226)
Expand Down
2 changes: 1 addition & 1 deletion tools/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.5.5
1.6.0
8 changes: 7 additions & 1 deletion tools/please_pex/pex/pex.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,14 @@ func (pw *Writer) Write(out, moduleDir string) error {
}
}

// Write plz.py which contains much of our import hooks etc
b := mustRead("plz.py")
if err := f.WriteFile(".bootstrap/plz.py", b, 0644); err != nil {
return err
}

// Always write pex_main.py, with some templating.
b := mustRead("pex_main.py")
b = mustRead("pex_main.py")
b = bytes.Replace(b, []byte("__MODULE_DIR__"), []byte(strings.ReplaceAll(moduleDir, ".", "/")), 1)
b = bytes.Replace(b, []byte("__ENTRY_POINT__"), []byte(pw.realEntryPoint), 1)
b = bytes.Replace(b, []byte("__ZIP_SAFE__"), []byte(pythonBool(pw.zipSafe)), 1)
Expand Down
180 changes: 9 additions & 171 deletions tools/please_pex/pex/pex_main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,8 @@
"""Zipfile entry point which supports auto-extracting itself based on zip-safety."""

from collections import defaultdict
from importlib import import_module, machinery
from importlib.abc import MetaPathFinder
from importlib.metadata import Distribution
from importlib.util import spec_from_loader
from site import getsitepackages
import itertools
import os
import re
import runpy
import sys
import tempfile
import zipfile

# Put this pex on the path before anything else.
PEX = os.path.abspath(sys.argv[0])
Expand All @@ -26,168 +16,18 @@
ZIP_SAFE = __ZIP_SAFE__
PEX_STAMP = '__PEX_STAMP__'

# Workaround for https://bugs.python.org/issue15795
class ZipFileWithPermissions(zipfile.ZipFile):
""" Custom ZipFile class handling file permissions. """

def _extract_member(self, member, targetpath, pwd):
if not isinstance(member, zipfile.ZipInfo):
member = self.getinfo(member)

targetpath = super(ZipFileWithPermissions, self)._extract_member(
member, targetpath, pwd
)

attr = member.external_attr >> 16
if attr != 0:
os.chmod(targetpath, attr)
return targetpath

class SoImport(MetaPathFinder):
"""So import. Much binary. Such dynamic. Wow."""

def __init__(self):
self.suffixes = machinery.EXTENSION_SUFFIXES # list, as importlib will not be using the file description
self.suffixes_by_length = sorted(self.suffixes, key=lambda x: -len(x))
# Identify all the possible modules we could handle.
self.modules = {}
if zipfile.is_zipfile(sys.argv[0]):
zf = ZipFileWithPermissions(sys.argv[0])
for name in zf.namelist():
path, _ = self.splitext(name)
if path:
if path.startswith('.bootstrap/'):
path = path[len('.bootstrap/'):]
importpath = path.replace('/', '.')
self.modules.setdefault(importpath, name)
if path.startswith(MODULE_DIR):
self.modules.setdefault(importpath[len(MODULE_DIR)+1:], name)
if self.modules:
self.zf = zf

def find_spec(self, name, path, target=None):
"""Implements abc.MetaPathFinder."""
if name in self.modules:
return spec_from_loader(name, self)

def create_module(self, spec):
"""Create a module object that we're going to load."""
filename = self.modules[spec.name]
prefix, ext = self.splitext(filename)
with tempfile.NamedTemporaryFile(suffix=ext, prefix=os.path.basename(prefix)) as f:
f.write(self.zf.read(filename))
f.flush()
spec.origin = f.name
loader = machinery.ExtensionFileLoader(spec.name, f.name)
spec.loader = loader
mod = loader.create_module(spec)
# Make it look like module came from the original location for nicer tracebacks.
mod.__file__ = filename
return mod

def exec_module(self, mod):
"""Because we set spec.loader above, the ExtensionFileLoader's exec_module is called."""
raise NotImplementedError("SoImport.exec_module isn't used")

def splitext(self, path):
"""Similar to os.path.splitext, but splits our longest known suffix preferentially."""
for suffix in self.suffixes_by_length:
if path.endswith(suffix):
return path[:-len(suffix)], suffix
return None, None


class PexDistribution(Distribution):
"""Represents a distribution package that exists within a pex file (which is, ultimately, a zip
file). Distribution packages are identified by the presence of a suitable dist-info or egg-info
directory member inside the pex file, which need not necessarily exist at the top level if a
directory prefix is specified in the constructor.
"""
def __init__(self, name, pex_file, zip_file, files, prefix):
self._name = name
self._zf = zip_file
self._pex_file = pex_file
self._prefix = prefix
# Mapping of <path within distribution> -> <full path in zipfile>
self._files = files

def read_text(self, filename):
full_name = self._files.get(filename)
if full_name:
return self._zf.read(full_name).decode(encoding="utf-8")

def locate_file(self, path):
return zipfile.Path(
self._pex_file,
at=os.path.join(self._prefix, path) if self._prefix else path,
)

read_text.__doc__ = Distribution.read_text.__doc__


class ModuleDirImport(MetaPathFinder):
"""Handles imports to a directory equivalently to them being at the top level.
This means that if one writes `import third_party.python.six`, it's imported like `import six`,
but becomes accessible under both names. This handles both the fully-qualified import names
and packages importing as their expected top-level names internally.
"""
def __init__(self, module_dir=MODULE_DIR):
self.prefix = module_dir.replace("/", ".") + "."
self._distributions = self._find_all_distributions(module_dir)

def _find_all_distributions(self, module_dir):
pex_file = sys.argv[0]
if zipfile.is_zipfile(pex_file):
zf = ZipFileWithPermissions(pex_file)
r = re.compile(r"{module_dir}{sep}([^/]+)-[^/-]+?\.(?:dist|egg)-info/(.*)".format(
module_dir=module_dir,
sep = os.sep,
))
filenames = defaultdict(dict)
for name in zf.namelist():
match = r.match(name)
if match:
filenames[match.group(1)][match.group(2)] = name
return {mod: [PexDistribution(mod, pex_file, zf, files, prefix=module_dir)]
for mod, files in filenames.items()}
return {}

def find_spec(self, name, path, target=None):
"""Implements abc.MetaPathFinder."""
if name.startswith(self.prefix):
return spec_from_loader(name, self)

def create_module(self, spec):
"""Actually load a module that we said we'd handle in find_module."""
module = import_module(spec.name.removeprefix(self.prefix))
sys.modules[spec.name] = module
return module

def exec_module(self, mod):
"""Nothing to do, create_module already did the work."""

def find_distributions(self, context):
"""Return an iterable of all Distribution instances capable of
loading the metadata for packages for the indicated ``context``.
"""
if context.name:
# The installed directories have underscores in the place of what might be a hyphen
# in the package name (e.g. the package opentelemetry-sdk installs opentelemetry_sdk).
return self._distributions.get(context.name.replace("-", "_"), [])
else:
return itertools.chain(*self._distributions.values())

def get_code(self, fullname):
module = import_module(fullname.removeprefix(self.prefix))
return module.__loader__.get_code(fullname)


def add_module_dir_to_sys_path(dirname):

def add_module_dir_to_sys_path(dirname, zip_safe=True):
"""Adds the given dirname to sys.path if it's nonempty."""
# Add .bootstrap dir to path, after the initial pex entry
sys.path = sys.path[:1] + [os.path.join(sys.path[0], '.bootstrap')] + sys.path[1:]
# Now we have .bootstrap on the path, we can import our own hooks.
import plz
if dirname:
sys.path = sys.path[:1] + [os.path.join(sys.path[0], dirname)] + sys.path[1:]
sys.meta_path.insert(0, ModuleDirImport(dirname))
sys.meta_path.insert(0, plz.ModuleDirImport(dirname))
if zip_safe:
sys.meta_path.append(plz.SoImport(MODULE_DIR))


def pex_basepath(temp=False):
Expand Down Expand Up @@ -297,8 +137,6 @@ def main():
N.B. This gets redefined by pex_test_main to run tests instead.
"""
# Add .bootstrap dir to path, after the initial pex entry
sys.path = sys.path[:1] + [os.path.join(sys.path[0], '.bootstrap')] + sys.path[1:]
# Starts a debugging session, if defined, before running the entry point.
if os.getenv("PLZ_DEBUG") is not None:
start_debugger()
Expand Down
3 changes: 1 addition & 2 deletions tools/please_pex/pex/pex_run.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
def run(explode=False):
if explode or not ZIP_SAFE:
with explode_zip()():
add_module_dir_to_sys_path(MODULE_DIR)
add_module_dir_to_sys_path(MODULE_DIR, zip_safe=False)
return main()
else:
add_module_dir_to_sys_path(MODULE_DIR)
sys.meta_path.append(SoImport())
return main()


Expand Down
2 changes: 0 additions & 2 deletions tools/please_pex/pex/pex_test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ def _xml_file(self, fr, analysis, *args, **kvargs):
def main():
"""Runs the tests. Returns an appropriate exit code."""
args = [arg for arg in sys.argv[1:]]
# Add .bootstrap dir to path, after the initial pex entry
sys.path = sys.path[:1] + [os.path.join(sys.path[0], '.bootstrap')] + sys.path[1:]
if os.getenv('COVERAGE'):
# It's important that we run coverage while we load the tests otherwise
# we get no coverage for import statements etc.
Expand Down
Loading

0 comments on commit ea37e5f

Please sign in to comment.