Skip to content

Commit

Permalink
Updates to support additional parsing of blink serialized objects (go…
Browse files Browse the repository at this point in the history
…ogle#37)

* Initial  commit

* Pytype fix

* Updates

* Updates

* Updates

* Add more tests

* pylint/pytype fixes

* Add more tests

* Update docstring

* Updates

* Updates

* Updates

* Reformat test file

* Whitespace fix
  • Loading branch information
sydp authored Apr 13, 2024
1 parent 0f6a1b9 commit 3fedb1b
Show file tree
Hide file tree
Showing 8 changed files with 1,675 additions and 32 deletions.
933 changes: 915 additions & 18 deletions dfindexeddb/indexeddb/chromium/blink.py

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions dfindexeddb/indexeddb/chromium/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,12 @@ class BlinkSerializationTag(IntEnum):
ENCODED_AUDIO_CHUNK = ord('y')
ENCODED_VIDEO_CHUNK = ord('z')
CROP_TARGET = ord('c')
RESTRICTION_TARGET = ord('D')
MEDIA_SOURCE_HANDLE = ord('S')
DEPRECATED_DETECTED_BARCODE = ord('B')
DEPRECATED_DETECTED_FACE = ord('F')
DEPRECATED_DETECTED_TEXT = ord('t')
FENCED_FRAME_CONFIG = ord('C')
DOM_EXCEPTION = ord('x')
TRAILER_OFFSET = 0xFE
VERSION = 0xFF
Expand Down Expand Up @@ -304,3 +306,67 @@ class V8ErrorTag(IntEnum):
CAUSE = ord('c')
STACK = ord('s')
END = ord('.')


class ImageSerializationTag(IntEnum):
"""Image Serialization tags."""
END = 0
PREDEFINED_COLOR_SPACE = 1
CANVAS_PIXEL_FORMAT = 2
IMAGE_DATA_STORAGE_FORMAT = 3
ORIGIN_CLEAN = 4
IS_PREMULTIPLIED = 5
CANVAS_OPACITY_MODE = 6
PARAMETRIC_COLOR_SPACE = 7
IMAGE_ORIENTATION = 8
LAST = IMAGE_ORIENTATION


class SerializedPredefinedColorSpace(IntEnum):
"""Serialized Predefined Color Space enumeration."""
LEGACY_OBSOLETE = 0
SRGB = 1
REC2020 = 2
P3 = 3
REC2100HLG = 4
REC2100PQ = 5
SRGB_LINEAR = 6
LAST = SRGB_LINEAR


class SerializedPixelFormat(IntEnum):
"""Serialized Pixel Format enumeration."""
NATIVE8_LEGACY_OBSOLETE = 0
F16 = 1
RGBA8 = 2
BGRA8 = 3
RGBX8 = 4
LAST = RGBX8


class SerializedImageDataStorageFormat(IntEnum):
"""The Serialized Image Data Storage Format."""
UINT8CLAMPED = 0
UINT16 = 1
FLOAT32 = 2
LAST = FLOAT32


class SerializedOpacityMode(IntEnum):
"""The Serialized Opacity Mode."""
KNONOPAQUE = 0
KOPAQUE = 1
KLAST = KOPAQUE


class SerializedImageOrientation(IntEnum):
"""The Serialized Image Orientation."""
TOP_LEFT = 0
TOP_RIGHT = 1
BOTTOM_RIGHT = 2
BOTTOM_LEFT = 3
LEFT_TOP = 4
RIGHT_TOP = 5
RIGHT_BOTTOM = 6
LEFT_BOTTOM = 7
LAST = LEFT_BOTTOM
16 changes: 8 additions & 8 deletions dfindexeddb/indexeddb/chromium/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def FromDecoder(


@dataclass
class EarlistCompactionTimeKey(BaseIndexedDBKey):
class EarliestCompactionTimeKey(BaseIndexedDBKey):
"""An earliest compaction time IndexedDB key."""

def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int:
Expand All @@ -558,11 +558,11 @@ def DecodeValue(self, decoder: utils.LevelDBDecoder) -> int:
def FromDecoder(
cls, decoder: utils.LevelDBDecoder, key_prefix: KeyPrefix,
base_offset: int = 0
) -> EarlistCompactionTimeKey:
) -> EarliestCompactionTimeKey:
"""Decodes the earliest compaction time key."""
offset, key_type = decoder.DecodeUint8()
if key_type != definitions.GlobalMetadataKeyType.EARLIEST_COMPACTION_TIME:
raise errors.ParserError('Not a EarlistCompactionTimeKey')
raise errors.ParserError('Not a EarliestCompactionTimeKey')
return cls(offset=base_offset + offset, key_prefix=key_prefix)


Expand Down Expand Up @@ -668,7 +668,7 @@ class GlobalMetaDataKey(BaseIndexedDBKey):
definitions.GlobalMetadataKeyType
.EARLIEST_SWEEP: EarliestSweepKey,
definitions.GlobalMetadataKeyType
.EARLIEST_COMPACTION_TIME: EarlistCompactionTimeKey,
.EARLIEST_COMPACTION_TIME: EarliestCompactionTimeKey,
definitions.GlobalMetadataKeyType
.SCOPES_PREFIX: ScopesPrefixKey,
definitions.GlobalMetadataKeyType
Expand All @@ -692,7 +692,7 @@ def FromDecoder(
Type[DatabaseFreeListKey],
Type[DatabaseNameKey],
Type[EarliestSweepKey],
Type[EarlistCompactionTimeKey],
Type[EarliestCompactionTimeKey],
Type[MaxDatabaseIdKey],
Type[RecoveryBlobJournalKey],
Type[SchemaVersionKey],
Expand Down Expand Up @@ -972,7 +972,7 @@ class ObjectStoreDataValue:
blob_offset: the blob offset, only valid if wrapped.
value: the blink serialized value, only valid if not wrapped.
"""
unkown: int
unknown: int
is_wrapped: bool
blob_size: Optional[int]
blob_offset: Optional[int]
Expand Down Expand Up @@ -1003,15 +1003,15 @@ def DecodeValue(
_, blob_size = decoder.DecodeVarint()
_, blob_offset = decoder.DecodeVarint()
return ObjectStoreDataValue(
unkown=unknown_integer,
unknown=unknown_integer,
is_wrapped=True,
blob_size=blob_size,
blob_offset=blob_offset,
value=None)
_, blink_bytes = decoder.ReadBytes()
blink_value = blink.V8ScriptValueDecoder.FromBytes(blink_bytes)
return ObjectStoreDataValue(
unkown=unknown_integer,
unknown=unknown_integer,
is_wrapped=False,
blob_size=None,
blob_offset=None,
Expand Down
11 changes: 8 additions & 3 deletions dfindexeddb/indexeddb/chromium/v8.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,12 @@ def _PeekTag(self) -> Optional[definitions.V8SerializationTag]:
_, tag_value = self.decoder.PeekBytes(1)
except errors.DecoderError:
return None
return definitions.V8SerializationTag(tag_value[0])
try:
return definitions.V8SerializationTag(tag_value[0])
except ValueError as error:
raise errors.ParserError(
f'Invalid v8 tag value {tag_value} at offset'
f' {self.decoder.stream.tell()}') from error

def _ReadTag(self) -> definitions.V8SerializationTag:
"""Returns the next non-padding serialization tag.
Expand Down Expand Up @@ -269,7 +274,7 @@ def _ReadObjectInternal(self) -> Tuple[definitions.V8SerializationTag, Any]:
self.version >= 15):
parsed_object = self.ReadSharedObject()
elif self.version < 13:
self.decoder.stream.seek(-1)
self.decoder.stream.seek(-1, os.SEEK_CUR)
parsed_object = self.ReadHostObject()
else:
parsed_object = None
Expand Down Expand Up @@ -492,7 +497,7 @@ def _ReadJSPrimitiveWrapper(
return value

def _ReadJSRegExp(self) -> RegExp:
"""Reads a Javscript regular expression from the current position."""
"""Reads a Javascript regular expression from the current position."""
next_id = self._GetNextId()
pattern = self.ReadString()
_, flags = self.decoder.DecodeUint32Varint() # TODO: verify flags
Expand Down
4 changes: 4 additions & 0 deletions dfindexeddb/indexeddb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"""A CLI tool for dfindexeddb."""
import argparse
import dataclasses
import enum
from datetime import datetime
import json
import pathlib
Expand Down Expand Up @@ -57,6 +58,8 @@ def default(self, o):
return list(o)
if isinstance(o, v8.RegExp):
return str(o)
if isinstance(o, enum.Enum):
return o.name
return json.JSONEncoder.default(self, o)


Expand Down Expand Up @@ -85,6 +88,7 @@ def IndexeddbCommand(args):
(f'Error parsing blink value: {err} for {record.__class__.__name__} '
f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
print(f'Record: {record}', file=sys.stderr)
_Output(db_record, output=args.output)


Expand Down
14 changes: 14 additions & 0 deletions tests/dfindexeddb/indexeddb/chromium/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading

0 comments on commit 3fedb1b

Please sign in to comment.