Skip to content

Commit

Permalink
Changes to make year-less log helper support full dates
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Mar 29, 2024
1 parent ecdf887 commit 8f34c3b
Show file tree
Hide file tree
Showing 11 changed files with 210 additions and 113 deletions.
95 changes: 83 additions & 12 deletions plaso/containers/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,20 +86,22 @@ class DateLessLogHelper(interface.AttributeContainer):
"""Attribute container to assist with logs without full dates.
Attributes:
earliest_year (int): earliest possible year the event data stream was
created.
last_relative_year (int): last relative year determined by the date-less
log helper.
latest_year (int): latest possible year the event data stream was created.
earliest_date (list[int, int, int]): earliest possible date the event data
stream was created. The date is a tuple of year, month and day of month.
last_relative_date (list[int, int, int]): last relative date determined by
the date-less log helper. The date is a tuple of year, month and day of
month.
latest_date (List[int]): latest possible date the event data stream was
created. The date is a tuple of year, month and day of month.
"""

CONTAINER_TYPE = 'date_less_log_helper'

SCHEMA = {
'_event_data_stream_identifier': 'AttributeContainerIdentifier',
'earliest_year': 'int',
'last_relative_year': 'int',
'latest_year': 'int'}
'earliest_date': 'List[int]',
'last_relative_date': 'List[int]',
'latest_date': 'List[int]'}

_SERIALIZABLE_PROTECTED_ATTRIBUTES = [
'_event_data_stream_identifier']
Expand All @@ -108,9 +110,21 @@ def __init__(self):
"""Initializes a date-less log helper attribute container."""
super(DateLessLogHelper, self).__init__()
self._event_data_stream_identifier = None
self.earliest_year = None
self.last_relative_year = None
self.latest_year = None
self.earliest_date = None
self.last_relative_date = None
self.latest_date = None

# TODO: the YearLessLogHelper attribute container is kept for backwards
# compatibility remove once storage format 20230327 is obsolete.
def CopyFromYearLessLogHelper(self, year_less_log_helper):
"""Copy the values of a year-less log helper.
Args:
year_less_log_helper (YearLessLogHelper): year-less log helper.
"""
self.earliest_date = (year_less_log_helper.earliest_year, 1, 1)
self.last_relative_date = (year_less_log_helper.last_relative_year, 0, 0)
self.latest_date = (year_less_log_helper.latest_year, 1, 1)

def GetEventDataStreamIdentifier(self):
"""Retrieves the identifier of the associated event data stream.
Expand Down Expand Up @@ -438,5 +452,62 @@ def SetEventIdentifier(self, event_identifier):
self._event_identifier = event_identifier


# TODO: the YearLessLogHelper attribute container is kept for backwards
# compatibility remove once storage format 20230327 is obsolete.
class YearLessLogHelper(interface.AttributeContainer):
"""Year-less log helper attribute container.
Attributes:
earliest_year (int): earliest possible year the event data stream was
created.
last_relative_year (int): last relative year determined by the year-less
log helper.
latest_year (int): latest possible year the event data stream was created.
"""

CONTAINER_TYPE = 'year_less_log_helper'

SCHEMA = {
'_event_data_stream_identifier': 'AttributeContainerIdentifier',
'earliest_year': 'int',
'last_relative_year': 'int',
'latest_year': 'int'}

_SERIALIZABLE_PROTECTED_ATTRIBUTES = [
'_event_data_stream_identifier']

def __init__(self):
"""Initializes a year-less log helper attribute container."""
super(YearLessLogHelper, self).__init__()
self._event_data_stream_identifier = None
self.earliest_year = None
self.last_relative_year = None
self.latest_year = None

def GetEventDataStreamIdentifier(self):
"""Retrieves the identifier of the associated event data stream.
The event data stream identifier is a storage specific value that requires
special handling during serialization.
Returns:
AttributeContainerIdentifier: event data stream or None when not set.
"""
return self._event_data_stream_identifier

def SetEventDataStreamIdentifier(self, event_data_stream_identifier):
"""Sets the identifier of the associated event data stream.
The event data stream identifier is a storage specific value that requires
special handling during serialization.
Args:
event_data_stream_identifier (AttributeContainerIdentifier): event data
stream identifier.
"""
self._event_data_stream_identifier = event_data_stream_identifier


manager.AttributeContainersManager.RegisterAttributeContainers([
DateLessLogHelper, EventData, EventDataStream, EventObject, EventTag])
DateLessLogHelper, EventData, EventDataStream, EventObject, EventTag,
YearLessLogHelper])
32 changes: 16 additions & 16 deletions plaso/engine/timeliner.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,39 +115,39 @@ def _GetBaseYear(self, storage_writer, event_data):
base_year = self._current_year

else:
earliest_year = date_less_log_helpers[0].earliest_year
last_relative_year = date_less_log_helpers[0].last_relative_year
latest_year = date_less_log_helpers[0].latest_year
earliest_date = date_less_log_helpers[0].earliest_date
last_relative_date = date_less_log_helpers[0].last_relative_date
latest_date = date_less_log_helpers[0].latest_date

if earliest_year is None and latest_year is None:
if earliest_date is None and latest_date is None:
message = (
f'missing earliest and latest year in date-less log helper, '
f'defaulting to current year: {self._current_year:d}')
self._ProduceTimeliningWarning(storage_writer, event_data, message)

base_year = self._current_year

elif earliest_year + last_relative_year < self._current_year:
base_year = earliest_year
elif earliest_date[0] + last_relative_date[0] < self._current_year:
base_year = earliest_date[0]

elif latest_year < self._current_year:
elif latest_date[0] < self._current_year:
message = (
f'earliest year: {earliest_year:d} as base year would exceed '
f'current year: {self._current_year:d} + {last_relative_year:d}, '
f'using latest year: {latest_year:d}')
f'earliest year: {earliest_date[0]:d} as base year would exceed '
f'current year: {self._current_year:d} + '
f'{last_relative_date[0]:d}, using latest year: {latest_date[0]:d}')
self._ProduceTimeliningWarning(storage_writer, event_data, message)

base_year = latest_year - last_relative_year
base_year = latest_date[0] - last_relative_date[0]

else:
message = (
f'earliest year: {earliest_year:d} and latest: year: '
f'{latest_year:d} as base year would exceed current year: '
f'{self._current_year:d} + {last_relative_year:d}, using current '
f'year')
f'earliest year: {earliest_date[0]:d} and latest: year: '
f'{latest_date[0]:d} as base year would exceed current year: '
f'{self._current_year:d} + {last_relative_date[0]:d}, using '
f'current year')
self._ProduceTimeliningWarning(storage_writer, event_data, message)

base_year = self._current_year - last_relative_year
base_year = self._current_year - last_relative_date[0]

self._base_years[lookup_key] = base_year

Expand Down
75 changes: 40 additions & 35 deletions plaso/lib/dateless_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,42 +29,43 @@ class DateLessLogFormatHelper(object):
def __init__(self):
"""Initializes the date-less log format helper mix-in."""
super(DateLessLogFormatHelper, self).__init__()
self._base_year = None
self._maximum_year = None
self._base_date = None
self._maximum_date = None
self._month = None
self._relative_year = 0
self._relative_date = (0, 0, 0)
self._year = 0

def _GetYearsFromFileEntry(self, file_entry):
"""Retrieves the years from the file entry date and time values.
def _GetDatesFromFileEntry(self, file_entry):
"""Retrieves the dates from the file entry date and time values.
Args:
file_entry (dfvfs.FileEntry): file entry.
Returns:
set[int]: years of the file entry.
set[tuple[int, int, int]]: dates, as tuple of year, month, day, of the
file entry.
"""
if file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_GZIP:
# Ignore a gzip file that contains a modification timestamp of 0.
if (file_entry.modification_time and
file_entry.modification_time.timestamp > 0):
year, _, _ = file_entry.modification_time.GetDate()
return set([year])
date_tuple = file_entry.modification_time.GetDate()
return set([date_tuple])

years = set()
dates = set()

for attribute_name in ('change_time', 'creation_time', 'modification_time'):
date_time = getattr(file_entry, attribute_name, None)
if date_time:
year, _, _ = date_time.GetDate()
date_tuple = date_time.GetDate()

if year == 1970 and file_entry.type_indicator == (
dfvfs_definitions.TYPE_INDICATOR_GZIP):
if (date_tuple == (1970, 1, 1) and
file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_GZIP):
continue

years.add(year)
dates.add(date_tuple)

return years
return dates

def _GetMonthFromString(self, month_string):
"""Retrieves a numeric month value from a string.
Expand All @@ -84,7 +85,7 @@ def _GetRelativeYear(self):
Returns:
int: relative year.
"""
return self._relative_year
return self._relative_date[0]

def _GetYear(self):
"""Retrieves the year.
Expand All @@ -101,32 +102,32 @@ def _SetEstimatedYear(self, parser_mediator):
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
"""
self._base_year = None
self._maximum_year = None
self._base_date = None
self._maximum_date = None
self._month = None
self._relative_year = 0
self._relative_date = (0, 0, 0)
self._year = 0

years = set()
dates = set()

file_entry = parser_mediator.GetFileEntry()
if file_entry:
years = self._GetYearsFromFileEntry(file_entry)
dates = self._GetDatesFromFileEntry(file_entry)

if not years and file_entry.type_indicator in (
if not dates and file_entry.type_indicator in (
dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM,
dfvfs_definitions.TYPE_INDICATOR_GZIP):

parent_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
file_entry.path_spec.parent,
resolver_context=parser_mediator.resolver_context)
if parent_file_entry:
years = self._GetYearsFromFileEntry(parent_file_entry)
dates = self._GetDatesFromFileEntry(parent_file_entry)

if years:
self._base_year = min(years)
self._maximum_year = max(years)
self._year = self._base_year
if dates:
self._base_date = min(dates)
self._maximum_date = max(dates)
self._year = self._base_date[0]

def _SetMonthAndYear(self, month, year):
"""Sets the month and year.
Expand All @@ -142,7 +143,7 @@ def _SetMonthAndYear(self, month, year):
raise ValueError('Invalid month: {0!s}'.format(month))

self._month = month
self._relative_year = 0
self._relative_date = (0, 0, 0)
self._year = year

def _UpdateYear(self, month):
Expand All @@ -158,17 +159,21 @@ def _UpdateYear(self, month):
raise ValueError('Invalid month: {0!s}'.format(month))

if self._month:
relative_year, relative_month, relative_day_of_month = self._relative_date

# Account for log formats that allow out-of-order date and time values
# (Apr->May->Apr) such as rsyslog with the RepeatedMsgReduction setting
# enabled.
if month + 1 < self._month:
self._relative_year += 1
self._relative_date = (
relative_year + 1, relative_month, relative_day_of_month)
self._year += 1

# Account for out-of-order Jan->Dec->Jan with the exception of the start
# of the log file.
elif self._relative_year > 0 and self._month == 1 and month == 12:
self._relative_year -= 1
elif relative_year > 0 and self._month == 1 and month == 12:
self._relative_date = (
relative_year - 1, relative_month, relative_day_of_month)
self._year -= 1

self._month = month
Expand All @@ -179,9 +184,9 @@ def GetDateLessLogHelper(self):
Returns:
DateLessLogHelper: date-less log helper.
"""
year_less_log_helper = events.DateLessLogHelper()
year_less_log_helper.earliest_year = self._base_year
year_less_log_helper.last_relative_year = self._relative_year
year_less_log_helper.latest_year = self._maximum_year
date_less_log_helper = events.DateLessLogHelper()
date_less_log_helper.earliest_date = self._base_date
date_less_log_helper.last_relative_date = self._relative_date
date_less_log_helper.latest_date = self._maximum_date

return year_less_log_helper
return date_less_log_helper
3 changes: 2 additions & 1 deletion plaso/multi_process/extraction_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,9 +757,10 @@ def _ProcessEventSources(self, storage_writer, session_identifier):
# All exceptions need to be caught here to prevent the foreman
# from being killed by an uncaught exception.
except Exception as exception: # pylint: disable=broad-except
path_spec = getattr(event_source, 'path_spec', None) or 'N/A'
self._ProduceExtractionWarning(storage_writer, (
f'unable to process path specification with error: '
f'{exception!s}'), event_source.path_spec)
f'{exception!s}'), path_spec)
event_source = None

for task in self._task_manager.GetFailedTasks():
Expand Down
4 changes: 2 additions & 2 deletions plaso/parsers/text_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ def ParseFileObject(self, parser_mediator, file_object):
parser_mediator.SampleStopTiming(profiling_name)

if hasattr(plugin, 'GetDateLessLogHelper'):
year_less_log_helper = plugin.GetDateLessLogHelper()
parser_mediator.AddDateLessLogHelper(year_less_log_helper)
date_less_log_helper = plugin.GetDateLessLogHelper()
parser_mediator.AddDateLessLogHelper(date_less_log_helper)

break

Expand Down
4 changes: 3 additions & 1 deletion plaso/serializer/json_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
'json': serializers.JSONDateTimeAttributeSerializer()},
'dfvfs.PathSpec': {
'json': serializers.JSONPathSpecAttributeSerializer()},
'List[int]': {
'json': serializers.JSONValueListAttributeSerializer()},
'List[str]': {
'json': serializers.JSONStringsListAttributeSerializer()}})
'json': serializers.JSONValueListAttributeSerializer()}})


class JSONAttributeContainerSerializer(
Expand Down
Loading

0 comments on commit 8f34c3b

Please sign in to comment.