diff --git a/examples/client/client b/examples/client/client index 9eaffc2308..eeab472d3e 100755 --- a/examples/client/client +++ b/examples/client/client @@ -11,7 +11,8 @@ import sys import traceback from hashlib import sha256 from pathlib import Path -from urllib import request + +import urllib3 from tuf.api.exceptions import DownloadError, RepositoryError from tuf.ngclient import Updater @@ -29,19 +30,30 @@ def build_metadata_dir(base_url: str) -> str: def init_tofu(base_url: str) -> bool: """Initialize local trusted metadata (Trust-On-First-Use) and create a - directory for downloads""" + directory for downloads + + NOTE: This is unsafe and for demonstration only: the bootstrap root + should be deployed alongside your updater application + """ + metadata_dir = build_metadata_dir(base_url) if not os.path.isdir(metadata_dir): os.makedirs(metadata_dir) - root_url = f"{base_url}/metadata/1.root.json" - try: - request.urlretrieve(root_url, f"{metadata_dir}/root.json") - except OSError: - print(f"Failed to download initial root from {root_url}") + response = urllib3.request("GET", f"{base_url}/metadata/1.root.json") + if response.status != 200: + print(f"Failed to download initial root {base_url}/metadata/1.root.json") return False + Updater( + metadata_dir=metadata_dir, + metadata_base_url=f"{base_url}/metadata/", + target_base_url=f"{base_url}/targets/", + target_dir=DOWNLOAD_DIR, + bootstrap=response.data, + ) + print(f"Trust-on-First-Use: Initialized new root in {metadata_dir}") return True @@ -73,6 +85,9 @@ def download(base_url: str, target: str) -> bool: os.mkdir(DOWNLOAD_DIR) try: + # NOTE: initial root should be provided with ``bootstrap`` argument: + # This examples uses unsafe Trust-On-First-Use initialization so it is + # not possible here. updater = Updater( metadata_dir=metadata_dir, metadata_base_url=f"{base_url}/metadata/", @@ -104,7 +119,7 @@ def download(base_url: str, target: str) -> bool: return True -def main() -> None: +def main() -> str | None: """Main TUF Client Example function""" client_args = argparse.ArgumentParser(description="TUF Client Example") @@ -169,6 +184,8 @@ def main() -> None: else: client_args.print_help() + return None + if __name__ == "__main__": sys.exit(main()) diff --git a/tests/test_updater_consistent_snapshot.py b/tests/test_updater_consistent_snapshot.py index 35497864f9..4ceb1fe7f9 100644 --- a/tests/test_updater_consistent_snapshot.py +++ b/tests/test_updater_consistent_snapshot.py @@ -62,7 +62,7 @@ def teardown_subtest(self) -> None: if self.dump_dir is not None: self.sim.write() - utils.cleanup_dir(self.metadata_dir) + utils.cleanup_metadata_dir(self.metadata_dir) def _init_repo( self, consistent_snapshot: bool, prefix_targets: bool = True diff --git a/tests/test_updater_delegation_graphs.py b/tests/test_updater_delegation_graphs.py index ce42a5f6e3..ecdecdd19e 100644 --- a/tests/test_updater_delegation_graphs.py +++ b/tests/test_updater_delegation_graphs.py @@ -92,7 +92,7 @@ def setup_subtest(self) -> None: self.sim.write() def teardown_subtest(self) -> None: - utils.cleanup_dir(self.metadata_dir) + utils.cleanup_metadata_dir(self.metadata_dir) def _init_repo(self, test_case: DelegationsTestCase) -> None: """Create a new RepositorySimulator instance and diff --git a/tests/test_updater_ng.py b/tests/test_updater_ng.py index 9a89c1deea..b37003bb3f 100644 --- a/tests/test_updater_ng.py +++ b/tests/test_updater_ng.py @@ -11,6 +11,7 @@ import sys import tempfile import unittest +from collections.abc import Iterable from typing import TYPE_CHECKING, Callable, ClassVar from unittest.mock import MagicMock, patch diff --git a/tests/test_updater_top_level_update.py b/tests/test_updater_top_level_update.py index fb053cb9f0..f4342f5f97 100644 --- a/tests/test_updater_top_level_update.py +++ b/tests/test_updater_top_level_update.py @@ -62,10 +62,6 @@ def setUp(self) -> None: self.sim = RepositorySimulator() - # boostrap client with initial root metadata - with open(os.path.join(self.metadata_dir, "root.json"), "bw") as f: - f.write(self.sim.signed_roots[0]) - if self.dump_dir is not None: # create test specific dump directory name = self.id().split(".")[-1] @@ -75,22 +71,13 @@ def setUp(self) -> None: def tearDown(self) -> None: self.temp_dir.cleanup() - def _run_refresh(self) -> Updater: + def _run_refresh(self, skip_bootstrap: bool = False) -> Updater: """Create a new Updater instance and refresh""" - if self.dump_dir is not None: - self.sim.write() - - updater = Updater( - self.metadata_dir, - "https://example.com/metadata/", - self.targets_dir, - "https://example.com/targets/", - self.sim, - ) + updater = self._init_updater(skip_bootstrap) updater.refresh() return updater - def _init_updater(self) -> Updater: + def _init_updater(self, skip_bootstrap: bool = False) -> Updater: """Create a new Updater instance""" if self.dump_dir is not None: self.sim.write() @@ -101,6 +88,7 @@ def _init_updater(self) -> Updater: self.targets_dir, "https://example.com/targets/", self.sim, + bootstrap=None if skip_bootstrap else self.sim.signed_roots[0], ) def _assert_files_exist(self, roles: Iterable[str]) -> None: @@ -126,9 +114,6 @@ def _assert_version_equals(self, role: str, expected_version: int) -> None: self.assertEqual(md.signed.version, expected_version) def test_first_time_refresh(self) -> None: - # Metadata dir contains only the mandatory initial root.json - self._assert_files_exist([Root.type]) - # Add one more root version to repository so that # refresh() updates from local trusted root (v1) to # remote root (v2) @@ -142,10 +127,11 @@ def test_first_time_refresh(self) -> None: version = 2 if role == Root.type else None self._assert_content_equals(role, version) - def test_trusted_root_missing(self) -> None: - os.remove(os.path.join(self.metadata_dir, "root.json")) + def test_cached_root_missing_without_bootstrap(self) -> None: + # Run update without a bootstrap, with empty cache: this fails since there is no + # trusted root with self.assertRaises(OSError): - self._run_refresh() + self._run_refresh(skip_bootstrap=True) # Metadata dir is empty self.assertFalse(os.listdir(self.metadata_dir)) @@ -178,15 +164,15 @@ def test_trusted_root_expired(self) -> None: self._assert_files_exist(TOP_LEVEL_ROLE_NAMES) self._assert_content_equals(Root.type, 3) - def test_trusted_root_unsigned(self) -> None: - # Local trusted root is not signed + def test_trusted_root_unsigned_without_bootstrap(self) -> None: + # Cached root is not signed, bootstrap root is not used root_path = os.path.join(self.metadata_dir, "root.json") - md_root = Metadata.from_file(root_path) + md_root = Metadata.from_bytes(self.sim.signed_roots[0]) md_root.signatures.clear() md_root.to_file(root_path) with self.assertRaises(UnsignedMetadataError): - self._run_refresh() + self._run_refresh(skip_bootstrap=True) # The update failed, no changes in metadata self._assert_files_exist([Root.type]) @@ -204,10 +190,7 @@ def test_max_root_rotations(self) -> None: self.sim.root.version += 1 self.sim.publish_root() - md_root = Metadata.from_file( - os.path.join(self.metadata_dir, "root.json") - ) - initial_root_version = md_root.signed.version + initial_root_version = 1 updater.refresh() @@ -712,26 +695,20 @@ def test_load_metadata_from_cache(self, wrapped_open: MagicMock) -> None: updater = self._run_refresh() updater.get_targetinfo("non_existent_target") - # Clean up calls to open during refresh() + # Clear statistics for open() calls and metadata requests wrapped_open.reset_mock() - # Clean up fetch tracker metadata self.sim.fetch_tracker.metadata.clear() # Create a new updater and perform a second update while # the metadata is already stored in cache (metadata dir) - updater = Updater( - self.metadata_dir, - "https://example.com/metadata/", - self.targets_dir, - "https://example.com/targets/", - self.sim, - ) + updater = self._init_updater() updater.get_targetinfo("non_existent_target") # Test that metadata is loaded from cache and not downloaded + root_dir = os.path.join(self.metadata_dir, "root_history") wrapped_open.assert_has_calls( [ - call(os.path.join(self.metadata_dir, "root.json"), "rb"), + call(os.path.join(root_dir, "2.root.json"), "rb"), call(os.path.join(self.metadata_dir, "timestamp.json"), "rb"), call(os.path.join(self.metadata_dir, "snapshot.json"), "rb"), call(os.path.join(self.metadata_dir, "targets.json"), "rb"), @@ -742,6 +719,96 @@ def test_load_metadata_from_cache(self, wrapped_open: MagicMock) -> None: expected_calls = [("root", 2), ("timestamp", None)] self.assertListEqual(self.sim.fetch_tracker.metadata, expected_calls) + @patch.object(builtins, "open", wraps=builtins.open) + def test_intermediate_root_cache(self, wrapped_open: MagicMock) -> None: + """Test that refresh uses the intermediate roots from cache""" + # Add root versions 2, 3 + self.sim.root.version += 1 + self.sim.publish_root() + self.sim.root.version += 1 + self.sim.publish_root() + + # Make a successful update of valid metadata which stores it in cache + self._run_refresh() + + # assert that cache lookups happened but data was downloaded from remote + root_dir = os.path.join(self.metadata_dir, "root_history") + wrapped_open.assert_has_calls( + [ + call(os.path.join(root_dir, "2.root.json"), "rb"), + call(os.path.join(root_dir, "3.root.json"), "rb"), + call(os.path.join(root_dir, "4.root.json"), "rb"), + call(os.path.join(self.metadata_dir, "timestamp.json"), "rb"), + call(os.path.join(self.metadata_dir, "snapshot.json"), "rb"), + call(os.path.join(self.metadata_dir, "targets.json"), "rb"), + ] + ) + expected_calls = [ + ("root", 2), + ("root", 3), + ("root", 4), + ("timestamp", None), + ("snapshot", 1), + ("targets", 1), + ] + self.assertListEqual(self.sim.fetch_tracker.metadata, expected_calls) + + # Clear statistics for open() calls and metadata requests + wrapped_open.reset_mock() + self.sim.fetch_tracker.metadata.clear() + + # Run update again, assert that metadata from cache was used (including intermediate roots) + self._run_refresh() + wrapped_open.assert_has_calls( + [ + call(os.path.join(root_dir, "2.root.json"), "rb"), + call(os.path.join(root_dir, "3.root.json"), "rb"), + call(os.path.join(root_dir, "4.root.json"), "rb"), + call(os.path.join(self.metadata_dir, "timestamp.json"), "rb"), + call(os.path.join(self.metadata_dir, "snapshot.json"), "rb"), + call(os.path.join(self.metadata_dir, "targets.json"), "rb"), + ] + ) + expected_calls = [("root", 4), ("timestamp", None)] + self.assertListEqual(self.sim.fetch_tracker.metadata, expected_calls) + + def test_intermediate_root_cache_poisoning(self) -> None: + """Test that refresh works as expected when intermediate roots in cache are poisoned""" + # Add root versions 2, 3 + self.sim.root.version += 1 + self.sim.publish_root() + self.sim.root.version += 1 + self.sim.publish_root() + + # Make a successful update of valid metadata which stores it in cache + self._run_refresh() + + # Modify cached intermediate root v2 so that it's no longer signed correctly + root_path = os.path.join( + self.metadata_dir, "root_history", "2.root.json" + ) + md = Metadata.from_file(root_path) + md.signatures.clear() + md.to_file(root_path) + + # Clear statistics for metadata requests + self.sim.fetch_tracker.metadata.clear() + + # Update again, assert that intermediate root v2 was downloaded again + self._run_refresh() + + expected_calls = [("root", 2), ("root", 4), ("timestamp", None)] + self.assertListEqual(self.sim.fetch_tracker.metadata, expected_calls) + + # Clear statistics for metadata requests + self.sim.fetch_tracker.metadata.clear() + + # Update again, this time assert that intermediate root v2 was used from cache + self._run_refresh() + + expected_calls = [("root", 4), ("timestamp", None)] + self.assertListEqual(self.sim.fetch_tracker.metadata, expected_calls) + def test_expired_metadata(self) -> None: """Verifies that expired local timestamp/snapshot can be used for updating from remote. diff --git a/tests/utils.py b/tests/utils.py index e020684d49..1f6d9ad9f1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -155,12 +155,16 @@ def configure_test_logging(argv: list[str]) -> None: logging.basicConfig(level=loglevel) -def cleanup_dir(path: str) -> None: - """Delete all files inside a directory""" - for filepath in [ - os.path.join(path, filename) for filename in os.listdir(path) - ]: - os.remove(filepath) +def cleanup_metadata_dir(path: str) -> None: + """Delete the local metadata dir""" + with os.scandir(path) as it: + for entry in it: + if entry.name == "root_history": + cleanup_metadata_dir(entry.path) + elif entry.name.endswith(".json"): + os.remove(entry.path) + else: + raise ValueError(f"Unexpected local metadata file {entry.path}") class TestServerProcess: diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 8c88a96ead..5af7cfe440 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -12,7 +12,8 @@ High-level description of ``Updater`` functionality: * Initializing an ``Updater`` loads and validates the trusted local root metadata: This root metadata is used as the source of trust for all other - metadata. + metadata. Updater should always be initialized with the ``bootstrap`` + argument: if this is not possible, it can be initialized from cache only. * ``refresh()`` can optionally be called to update and load all top-level metadata as described in the specification, using both locally cached metadata and metadata downloaded from the remote repository. If refresh is @@ -49,9 +50,9 @@ from tuf.api import exceptions from tuf.api.metadata import Root, Snapshot, TargetFile, Targets, Timestamp -from tuf.ngclient import urllib3_fetcher -from tuf.ngclient._internal import trusted_metadata_set +from tuf.ngclient._internal.trusted_metadata_set import TrustedMetadataSet from tuf.ngclient.config import EnvelopeType, UpdaterConfig +from tuf.ngclient.urllib3_fetcher import Urllib3Fetcher if TYPE_CHECKING: from tuf.ngclient.fetcher import FetcherInterface @@ -75,6 +76,9 @@ class Updater: download both metadata and targets. Default is ``Urllib3Fetcher`` config: ``Optional``; ``UpdaterConfig`` could be used to setup common configuration options. + bootstrap: ``Optional``; initial root metadata. A boostrap root should + always be provided. If it is not, the current root.json in the + metadata cache is used as the initial root. Raises: OSError: Local root.json cannot be read @@ -89,6 +93,7 @@ def __init__( target_base_url: str | None = None, fetcher: FetcherInterface | None = None, config: UpdaterConfig | None = None, + bootstrap: bytes | None = None, ): self._dir = metadata_dir self._metadata_base_url = _ensure_trailing_slash(metadata_base_url) @@ -99,14 +104,12 @@ def __init__( self._target_base_url = _ensure_trailing_slash(target_base_url) self.config = config or UpdaterConfig() - if fetcher is not None: self._fetcher = fetcher else: - self._fetcher = urllib3_fetcher.Urllib3Fetcher( + self._fetcher = Urllib3Fetcher( app_user_agent=self.config.app_user_agent ) - supported_envelopes = [EnvelopeType.METADATA, EnvelopeType.SIMPLE] if self.config.envelope_type not in supported_envelopes: raise ValueError( @@ -114,12 +117,16 @@ def __init__( f"got '{self.config.envelope_type}'" ) - # Read trusted local root metadata - data = self._load_local_metadata(Root.type) + if not bootstrap: + # if no root was provided, use the cached non-versioned root.json + bootstrap = self._load_local_metadata(Root.type) - self._trusted_set = trusted_metadata_set.TrustedMetadataSet( - data, self.config.envelope_type + # Load the initial root, make sure it's cached + self._trusted_set = TrustedMetadataSet( + bootstrap, self.config.envelope_type ) + self._persist_root(self._trusted_set.root.version, bootstrap) + self._update_root_symlink() def refresh(self) -> None: """Refresh top-level metadata. @@ -296,12 +303,32 @@ def _load_local_metadata(self, rolename: str) -> bytes: return f.read() def _persist_metadata(self, rolename: str, data: bytes) -> None: - """Write metadata to disk atomically to avoid data loss.""" - temp_file_name: str | None = None + """Write metadata to disk atomically to avoid data loss. + + Use a filename _not_ prefixed with version (e.g. "timestamp.json") + . Encode the rolename to avoid issues with e.g. path separators + """ + + encoded_name = parse.quote(rolename, "") + filename = os.path.join(self._dir, f"{encoded_name}.json") + self._persist_file(filename, data) + + def _persist_root(self, version: int, data: bytes) -> None: + """Write root metadata to disk atomically to avoid data loss. + + The metadata is stored with version prefix (e.g. + "root_history/1.root.json"). + """ + rootdir = os.path.join(self._dir, "root_history") + with contextlib.suppress(FileExistsError): + os.mkdir(rootdir) + self._persist_file(os.path.join(rootdir, f"{version}.root.json"), data) + + def _persist_file(self, filename: str, data: bytes) -> None: + """Write a file to disk atomically to avoid data loss.""" + temp_file_name = None + try: - # encode the rolename to avoid issues with e.g. path separators - encoded_name = parse.quote(rolename, "") - filename = os.path.join(self._dir, f"{encoded_name}.json") with tempfile.NamedTemporaryFile( dir=self._dir, delete=False ) as temp_file: @@ -316,32 +343,60 @@ def _persist_metadata(self, rolename: str, data: bytes) -> None: os.remove(temp_file_name) raise e + def _update_root_symlink(self) -> None: + """Symlink root.json to current trusted root version in root_history/""" + linkname = os.path.join(self._dir, "root.json") + version = self._trusted_set.root.version + current = os.path.join("root_history", f"{version}.root.json") + with contextlib.suppress(FileNotFoundError): + os.remove(linkname) + os.symlink(current, linkname) + def _load_root(self) -> None: - """Load remote root metadata. + """Load root metadata. - Sequentially load and persist on local disk every newer root metadata - version available on the remote. + Sequentially load newer root metadata versions. First try to load from + local cache and if that does not work, from the remote repository. + + If metadata is loaded from remote repository, store it in local cache. """ # Update the root role lower_bound = self._trusted_set.root.version + 1 upper_bound = lower_bound + self.config.max_root_rotations - for next_version in range(lower_bound, upper_bound): - try: - data = self._download_metadata( - Root.type, - self.config.root_max_length, - next_version, - ) - self._trusted_set.update_root(data) - self._persist_metadata(Root.type, data) - - except exceptions.DownloadHTTPError as exception: - if exception.status_code not in {403, 404}: - raise - # 404/403 means current root is newest available - break + try: + for next_version in range(lower_bound, upper_bound): + # look for next_version in local cache + try: + root_path = os.path.join( + self._dir, "root_history", f"{next_version}.root.json" + ) + with open(root_path, "rb") as f: + self._trusted_set.update_root(f.read()) + continue + except (OSError, exceptions.RepositoryError) as e: + # this root did not exist locally or is invalid + logger.debug("Local root is not valid: %s", e) + + # next_version was not found locally, try remote + try: + data = self._download_metadata( + Root.type, + self.config.root_max_length, + next_version, + ) + self._trusted_set.update_root(data) + self._persist_root(next_version, data) + + except exceptions.DownloadHTTPError as exception: + if exception.status_code not in {403, 404}: + raise + # 404/403 means current root is newest available + break + finally: + # Make sure the non-versioned root.json links to current version + self._update_root_symlink() def _load_timestamp(self) -> None: """Load local and remote timestamp metadata."""