Skip to content

Commit

Permalink
Use separate metadata files for onedrive (#137549)
Browse files Browse the repository at this point in the history
  • Loading branch information
zweckj authored and frenck committed Feb 7, 2025
1 parent 3390fb3 commit bec569c
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 37 deletions.
43 changes: 42 additions & 1 deletion homeassistant/components/onedrive/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from html import unescape
from json import dumps, loads
import logging
from typing import cast

Expand All @@ -13,6 +15,7 @@
HttpRequestException,
OneDriveException,
)
from onedrive_personal_sdk.models.items import ItemUpdate

from homeassistant.config_entries import ConfigEntry
from homeassistant.const import CONF_ACCESS_TOKEN
Expand Down Expand Up @@ -45,7 +48,6 @@ class OneDriveRuntimeData:
async def async_setup_entry(hass: HomeAssistant, entry: OneDriveConfigEntry) -> bool:
"""Set up OneDrive from a config entry."""
implementation = await async_get_config_entry_implementation(hass, entry)

session = OAuth2Session(hass, entry, implementation)

async def get_access_token() -> str:
Expand Down Expand Up @@ -89,6 +91,14 @@ async def get_access_token() -> str:
backup_folder_id=backup_folder.id,
)

try:
await _migrate_backup_files(client, backup_folder.id)
except OneDriveException as err:
raise ConfigEntryNotReady(
translation_domain=DOMAIN,
translation_key="failed_to_migrate_files",
) from err

_async_notify_backup_listeners_soon(hass)

return True
Expand All @@ -108,3 +118,34 @@ def _async_notify_backup_listeners(hass: HomeAssistant) -> None:
@callback
def _async_notify_backup_listeners_soon(hass: HomeAssistant) -> None:
hass.loop.call_soon(_async_notify_backup_listeners, hass)


async def _migrate_backup_files(client: OneDriveClient, backup_folder_id: str) -> None:
"""Migrate backup files to metadata version 2."""
files = await client.list_drive_items(backup_folder_id)
for file in files:
if file.description and '"metadata_version": 1' in (
metadata_json := unescape(file.description)
):
metadata = loads(metadata_json)
del metadata["metadata_version"]
metadata_filename = file.name.rsplit(".", 1)[0] + ".metadata.json"
metadata_file = await client.upload_file(
backup_folder_id,
metadata_filename,
dumps(metadata), # type: ignore[arg-type]
)
metadata_description = {
"metadata_version": 2,
"backup_id": metadata["backup_id"],
"backup_file_id": file.id,
}
await client.update_drive_item(
path_or_id=metadata_file.id,
data=ItemUpdate(description=dumps(metadata_description)),
)
await client.update_drive_item(
path_or_id=file.id,
data=ItemUpdate(description=""),
)
_LOGGER.debug("Migrated backup file %s", file.name)
95 changes: 62 additions & 33 deletions homeassistant/components/onedrive/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from collections.abc import AsyncIterator, Callable, Coroutine
from functools import wraps
import html
import json
from html import unescape
from json import dumps, loads
import logging
from typing import Any, Concatenate

Expand Down Expand Up @@ -34,6 +34,7 @@
_LOGGER = logging.getLogger(__name__)
UPLOAD_CHUNK_SIZE = 16 * 320 * 1024 # 5.2MB
TIMEOUT = ClientTimeout(connect=10, total=43200) # 12 hours
METADATA_VERSION = 2


async def async_get_backup_agents(
Expand Down Expand Up @@ -120,11 +121,19 @@ async def async_download_backup(
self, backup_id: str, **kwargs: Any
) -> AsyncIterator[bytes]:
"""Download a backup file."""
item = await self._find_item_by_backup_id(backup_id)
if item is None:
metadata_item = await self._find_item_by_backup_id(backup_id)
if (
metadata_item is None
or metadata_item.description is None
or "backup_file_id" not in metadata_item.description
):
raise BackupAgentError("Backup not found")

stream = await self._client.download_drive_item(item.id, timeout=TIMEOUT)
metadata_info = loads(unescape(metadata_item.description))

stream = await self._client.download_drive_item(
metadata_info["backup_file_id"], timeout=TIMEOUT
)
return stream.iter_chunked(1024)

@handle_backup_errors
Expand All @@ -136,31 +145,41 @@ async def async_upload_backup(
**kwargs: Any,
) -> None:
"""Upload a backup."""

filename = suggested_filename(backup)
file = FileInfo(
suggested_filename(backup),
filename,
backup.size,
self._folder_id,
await open_stream(),
)
try:
item = await LargeFileUploadClient.upload(
backup_file = await LargeFileUploadClient.upload(
self._token_function, file, session=async_get_clientsession(self._hass)
)
except HashMismatchError as err:
raise BackupAgentError(
"Hash validation failed, backup file might be corrupt"
) from err

# store metadata in description
backup_dict = backup.as_dict()
backup_dict["metadata_version"] = 1 # version of the backup metadata
description = json.dumps(backup_dict)
# store metadata in metadata file
description = dumps(backup.as_dict())
_LOGGER.debug("Creating metadata: %s", description)
metadata_filename = filename.rsplit(".", 1)[0] + ".metadata.json"
metadata_file = await self._client.upload_file(
self._folder_id,
metadata_filename,
description, # type: ignore[arg-type]
)

# add metadata to the metadata file
metadata_description = {
"metadata_version": METADATA_VERSION,
"backup_id": backup.backup_id,
"backup_file_id": backup_file.id,
}
await self._client.update_drive_item(
path_or_id=item.id,
data=ItemUpdate(description=description),
path_or_id=metadata_file.id,
data=ItemUpdate(description=dumps(metadata_description)),
)

@handle_backup_errors
Expand All @@ -170,46 +189,56 @@ async def async_delete_backup(
**kwargs: Any,
) -> None:
"""Delete a backup file."""
item = await self._find_item_by_backup_id(backup_id)
if item is None:
metadata_item = await self._find_item_by_backup_id(backup_id)
if (
metadata_item is None
or metadata_item.description is None
or "backup_file_id" not in metadata_item.description
):
return
await self._client.delete_drive_item(item.id)
metadata_info = loads(unescape(metadata_item.description))

await self._client.delete_drive_item(metadata_info["backup_file_id"])
await self._client.delete_drive_item(metadata_item.id)

@handle_backup_errors
async def async_list_backups(self, **kwargs: Any) -> list[AgentBackup]:
"""List backups."""
items = await self._client.list_drive_items(self._folder_id)
return [
self._backup_from_description(item.description)
for item in await self._client.list_drive_items(self._folder_id)
if item.description and "homeassistant_version" in item.description
await self._download_backup_metadata(item.id)
for item in items
if item.description
and "backup_id" in item.description
and f'"metadata_version": {METADATA_VERSION}' in unescape(item.description)
]

@handle_backup_errors
async def async_get_backup(
self, backup_id: str, **kwargs: Any
) -> AgentBackup | None:
"""Return a backup."""
item = await self._find_item_by_backup_id(backup_id)
return (
self._backup_from_description(item.description)
if item and item.description
else None
)
metadata_file = await self._find_item_by_backup_id(backup_id)
if metadata_file is None or metadata_file.description is None:
return None

def _backup_from_description(self, description: str) -> AgentBackup:
"""Create a backup object from a description."""
description = html.unescape(
description
) # OneDrive encodes the description on save automatically
return AgentBackup.from_dict(json.loads(description))
return await self._download_backup_metadata(metadata_file.id)

async def _find_item_by_backup_id(self, backup_id: str) -> File | Folder | None:
"""Find an item by backup ID."""
return next(
(
item
for item in await self._client.list_drive_items(self._folder_id)
if item.description and backup_id in item.description
if item.description
and backup_id in item.description
and f'"metadata_version": {METADATA_VERSION}'
in unescape(item.description)
),
None,
)

async def _download_backup_metadata(self, item_id: str) -> AgentBackup:
metadata_stream = await self._client.download_drive_item(item_id)
metadata_json = loads(await metadata_stream.read())
return AgentBackup.from_dict(metadata_json)
3 changes: 3 additions & 0 deletions homeassistant/components/onedrive/strings.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
},
"failed_to_get_folder": {
"message": "Failed to get {folder} folder"
},
"failed_to_migrate_files": {
"message": "Failed to migrate metadata to separate files"
}
}
}
10 changes: 9 additions & 1 deletion tests/components/onedrive/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Fixtures for OneDrive tests."""

from collections.abc import AsyncIterator, Generator
from json import dumps
import time
from unittest.mock import AsyncMock, MagicMock, patch

Expand All @@ -15,11 +16,13 @@
from homeassistant.setup import async_setup_component

from .const import (
BACKUP_METADATA,
CLIENT_ID,
CLIENT_SECRET,
MOCK_APPROOT,
MOCK_BACKUP_FILE,
MOCK_BACKUP_FOLDER,
MOCK_METADATA_FILE,
)

from tests.common import MockConfigEntry
Expand Down Expand Up @@ -89,13 +92,17 @@ def mock_onedrive_client(mock_onedrive_client_init: MagicMock) -> Generator[Magi
client = mock_onedrive_client_init.return_value
client.get_approot.return_value = MOCK_APPROOT
client.create_folder.return_value = MOCK_BACKUP_FOLDER
client.list_drive_items.return_value = [MOCK_BACKUP_FILE]
client.list_drive_items.return_value = [MOCK_BACKUP_FILE, MOCK_METADATA_FILE]
client.get_drive_item.return_value = MOCK_BACKUP_FILE
client.upload_file.return_value = MOCK_METADATA_FILE

class MockStreamReader:
async def iter_chunked(self, chunk_size: int) -> AsyncIterator[bytes]:
yield b"backup data"

async def read(self) -> bytes:
return dumps(BACKUP_METADATA).encode()

client.download_drive_item.return_value = MockStreamReader()

return client
Expand All @@ -107,6 +114,7 @@ def mock_large_file_upload_client() -> Generator[AsyncMock]:
with patch(
"homeassistant.components.onedrive.backup.LargeFileUploadClient.upload"
) as mock_upload:
mock_upload.return_value = MOCK_BACKUP_FILE
yield mock_upload


Expand Down
25 changes: 24 additions & 1 deletion tests/components/onedrive/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,29 @@
quick_xor_hash="hash",
),
mime_type="application/x-tar",
description=escape(dumps(BACKUP_METADATA)),
description="",
created_by=CONTRIBUTOR,
)

MOCK_METADATA_FILE = File(
id="id",
name="23e64aec.tar",
size=34519040,
parent_reference=ItemParentReference(
drive_id="mock_drive_id", id="id", path="path"
),
hashes=Hashes(
quick_xor_hash="hash",
),
mime_type="application/x-tar",
description=escape(
dumps(
{
"metadata_version": 2,
"backup_id": "23e64aec",
"backup_file_id": "id",
}
)
),
created_by=CONTRIBUTOR,
)
2 changes: 1 addition & 1 deletion tests/components/onedrive/test_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ async def test_agents_delete(

assert response["success"]
assert response["result"] == {"agent_errors": {}}
mock_onedrive_client.delete_drive_item.assert_called_once()
assert mock_onedrive_client.delete_drive_item.call_count == 2


async def test_agents_upload(
Expand Down
Loading

0 comments on commit bec569c

Please sign in to comment.