diff --git a/ConfigExamples/CsvOutput.yaml b/ConfigExamples/CsvOutput.yaml index f84809c..20ca9d1 100644 --- a/ConfigExamples/CsvOutput.yaml +++ b/ConfigExamples/CsvOutput.yaml @@ -5,8 +5,6 @@ collect: Audit.Exchange: True Audit.SharePoint: True DLP.All: True - skipKnownLogs: True - resume: True output: file: enabled: True diff --git a/ConfigExamples/azureBlob.yaml b/ConfigExamples/azureBlob.yaml new file mode 100644 index 0000000..a28e657 --- /dev/null +++ b/ConfigExamples/azureBlob.yaml @@ -0,0 +1,15 @@ +collect: + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.Exchange: True + Audit.SharePoint: True + DLP.All: True +output: + azureBlob: + enabled: True + containerName: audit-logs + blobName: AuditLog + tempPath: 'output' + separateByContentType: True + separator: ';' \ No newline at end of file diff --git a/ConfigExamples/azureLogAnalytics.yaml b/ConfigExamples/azureLogAnalytics.yaml index 7693cff..b5dba14 100644 --- a/ConfigExamples/azureLogAnalytics.yaml +++ b/ConfigExamples/azureLogAnalytics.yaml @@ -5,8 +5,6 @@ collect: # Settings determining which audit logs to collect and how to do it Audit.Exchange: True Audit.SharePoint: True DLP.All: True - skipKnownLogs: True - resume: True output: azureLogAnalytics: enabled: True diff --git a/ConfigExamples/azureTable.yaml b/ConfigExamples/azureTable.yaml new file mode 100644 index 0000000..20f75dd --- /dev/null +++ b/ConfigExamples/azureTable.yaml @@ -0,0 +1,11 @@ +collect: # Settings determining which audit logs to collect and how to do it + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.Exchange: True + Audit.SharePoint: True + DLP.All: True +output: + azureTable: + enabled: True + tableName: AuditLogs \ No newline at end of file diff --git a/ConfigExamples/filteredCsvOutput.yaml b/ConfigExamples/filteredCsvOutput.yaml index 4e828e3..09a4d46 100644 --- a/ConfigExamples/filteredCsvOutput.yaml +++ b/ConfigExamples/filteredCsvOutput.yaml @@ -3,8 +3,6 @@ collect: Audit.General: True Audit.AzureActiveDirectory: True Audit.SharePoint: True - skipKnownLogs: True - resume: True # Collect logs concerning spoofing prevention in Audit.General, deleted files from Audit.SharePoint # and login failures from Audit.AzureActiveDirectory filter: diff --git a/ConfigExamples/fullConfig.yaml b/ConfigExamples/fullConfig.yaml index cfba813..a5864ee 100644 --- a/ConfigExamples/fullConfig.yaml +++ b/ConfigExamples/fullConfig.yaml @@ -25,13 +25,25 @@ output: file: # CSV output enabled: False separateByContentType: True # Creates a separate CSV file for each content type, appends content name to path - path: 'output.csv' + path: 'output' separator: ';' + cacheSize: 500000 # Amount of logs to cache until each CSV commit, larger=faster but eats more memory azureLogAnalytics: enabled: False workspaceId: sharedKey: - sql: + azureTable: # Provide connection string to executable at runtime with --table-string + enabled: False + tableName: AuditLogs # Name of the table inside the storage account + azureBlob: # Write CSV to a blob container. Provide connection string to executable at runtime with --blob-string + enabled: False + containerName: AuditLogs # Name of the container inside storage account + blobName: AuditLog # When separatedByContentType is true, this is used as file prefix and becomes e.g. AuditLog_AuditExchange.csv + tempPath: './output' + separateByContentType: True + separator: ';' + cacheSize: 500000 # Amount of logs to cache until each CSV commit, larger=faster but eats more memory + sql: # Provide connection string to executable at runtime with --sql-string enabled: False cacheSize: 500000 # Amount of logs to cache until each SQL commit, larger=faster but eats more memory chunkSize: 2000 # Amount of rows to write simultaneously to SQL, in most cases just set it as high as your DB allows. COUNT errors = too high diff --git a/ConfigExamples/graylog.yaml b/ConfigExamples/graylog.yaml index 13da73b..adf8dfa 100644 --- a/ConfigExamples/graylog.yaml +++ b/ConfigExamples/graylog.yaml @@ -5,8 +5,6 @@ collect: Audit.Exchange: True Audit.SharePoint: True DLP.All: True - skipKnownLogs: True - resume: True output: graylog: enabled: False diff --git a/Linux/OfficeAuditLogCollector-V1.2 b/Linux/LINUX-OfficeAuditLogCollector-V1.3 similarity index 83% rename from Linux/OfficeAuditLogCollector-V1.2 rename to Linux/LINUX-OfficeAuditLogCollector-V1.3 index 769f90e..54d5edd 100644 Binary files a/Linux/OfficeAuditLogCollector-V1.2 and b/Linux/LINUX-OfficeAuditLogCollector-V1.3 differ diff --git a/README.md b/README.md index c3a8803..f4a6343 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Collect/retrieve Office365, Azure and DLP audit logs, optionally filter them, then send them to one or more outputs such as file, PRTG, Azure Log Analytics or Graylog. Onboarding is easy and takes only a few minutes (steps described below). There are Windows and Linux executables, and an optional GUI for Windows only. Easy configuration with a YAML config file (see the 'ConfigExamples' folder for reference). -If you have any issues or questions, feel free to create an issue in this repo. +If you have any issues or questions, or requests for additional interfaces, feel free to create an issue in this repo. - The following Audit logs can be extracted: - Audit.General - Audit.AzureActiveDirectory @@ -12,11 +12,13 @@ If you have any issues or questions, feel free to create an issue in this repo. - DLP.All - The following outputs are supported: - Azure Analytics Workspace (OMS) + - Azure Storage Table + - Azure Storage Blob - PRTG Network Monitor - ( Azure ) SQL server - Graylog (or any other source that accepts a simple socket connection) - CSV Local file - - Power BI (indirectly through SQL or CSV) + - Power BI (indirectly through SQL, CSV, Azure Tables or Azure Blob) Simply download the executable you need from the Windows or Linux folder and copy a config file from the ConfigExamples folder that suits your need: - Windows: @@ -42,12 +44,12 @@ See the following link for more info on the management APIs: https://msdn.micros ## Roadmap: -- Add AzureBlob and AzureTable outputs - Automate onboarding as much as possible to make it easier to use - Make a container that runs this script - Create a tutorial for automatic onboarding + docker container for the easiest way to run this ## Latest changes: +- Added Azure Blob and Azure Table outputs - Added SQL output for Power BI - Changed file to CSV output - Added PRTG output @@ -133,6 +135,13 @@ If you are running this script to get audit events in an Azure Analytics Workspa - Get the ID and key from "Agent management"; - You do not need to prepare any tables or other settings. +### (optional) Creating an Azure Table / Blob account: + +If you are running this script to get audit events in an Azure Table and/or Blob you will need a storage account and connection string: +- Create a storage account from "Create resource" in Azure (no special configuration required); +- Get the connection string from 'Access keys' +- You do not need to prepare any tables or blob containers as they are created in the storage account if they do not exist. + ### (optional) Creating a PRTG sensor To run with PRTG you must create a sensor: diff --git a/Source/AuditLogCollector.py b/Source/AuditLogCollector.py index 1e868bc..38d160e 100644 --- a/Source/AuditLogCollector.py +++ b/Source/AuditLogCollector.py @@ -1,4 +1,5 @@ -from Interfaces import AzureOMSInterface, SqlInterface, GraylogInterface, PRTGInterface, FileInterface +from Interfaces import AzureOMSInterface, SqlInterface, GraylogInterface, PRTGInterface, FileInterface, \ + AzureTableInterface, AzureBlobInterface import AuditLogSubscriber import ApiConnection import os @@ -17,8 +18,8 @@ class AuditLogCollector(ApiConnection.ApiConnection): def __init__(self, content_types=None, resume=True, fallback_time=None, skip_known_logs=True, log_path='collector.log', debug=False, auto_subscribe=False, max_threads=20, retries=3, - retry_cooldown=3, file_output=False, sql_output=False, graylog_output=False, azure_oms_output=False, - prtg_output=False, **kwargs): + retry_cooldown=3, file_output=False, sql_output=False, graylog_output=False, azure_table_output=False, + azure_blob_output=False, azure_oms_output=False, prtg_output=False, **kwargs): """ Object that can retrieve all available content blobs for a list of content types and then retrieve those blobs and output them to a file or Graylog input (i.e. send over a socket). @@ -52,6 +53,10 @@ def __init__(self, content_types=None, resume=True, fallback_time=None, skip_kno self.file_output = file_output self.file_interface = FileInterface.FileInterface(**kwargs) + self.azure_table_output = azure_table_output + self.azure_table_interface = AzureTableInterface.AzureTableInterface(**kwargs) + self.azure_blob_output = azure_blob_output + self.azure_blob_interface = AzureBlobInterface.AzureBlobInterface(**kwargs) self.azure_oms_output = azure_oms_output self.azure_oms_interface = AzureOMSInterface.AzureOMSInterface(**kwargs) self.sql_output = sql_output @@ -76,7 +81,8 @@ def __init__(self, content_types=None, resume=True, fallback_time=None, skip_kno @property def all_interfaces(self): - return {self.file_interface: self.file_output, self.azure_oms_interface: self.azure_oms_output, + return {self.file_interface: self.file_output, self.azure_table_interface: self.azure_table_output, + self.azure_blob_interface: self.azure_blob_output, self.azure_oms_interface: self.azure_oms_output, self.sql_interface: self.sql_output, self.graylog_interface: self.graylog_output, self.prtg_interface: self.prtg_output} @@ -108,6 +114,8 @@ def _load_log_config(self, config): """ :param config: str """ + az_logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy") + az_logger.setLevel(logging.WARNING) if 'log' in config['collect']: if 'path' in config['collect']['log']: self.log_path = config['collect']['log']['path'] @@ -153,6 +161,8 @@ def _load_output_config(self, config): if 'output' in config: self._load_file_output_config(config=config) self._load_azure_log_analytics_output_config(config=config) + self._load_azure_table_output_config(config=config) + self._load_azure_blob_output_config(config=config) self._load_sql_output_config(config=config) self._load_graylog_output_config(config=config) self._load_prtg_output_config(config=config) @@ -170,6 +180,8 @@ def _load_file_output_config(self, config): self.file_interface.separate_by_content_type = config['output']['file']['separateByContentType'] if 'separator' in config['output']['file']: self.file_interface.separator = config['output']['file']['separator'] + if 'cacheSize' in config['output']['file']: + self.file_interface.cache_size = config['output']['file']['cacheSize'] def _load_azure_log_analytics_output_config(self, config): """ @@ -195,6 +207,36 @@ def _load_sql_output_config(self, config): if 'chunkSize' in config['output']['sql']: self.sql_interface.chunk_size = config['output']['sql']['chunkSize'] + def _load_azure_table_output_config(self, config): + """ + :param config: str + """ + if 'azureTable' in config['output']: + if 'enabled' in config['output']['azureTable']: + self.azure_table_output = config['output']['azureTable']['enabled'] + if 'tableName' in config['output']['azureTable']: + self.azure_table_interface.table_name = config['output']['azureTable']['tableName'] + + def _load_azure_blob_output_config(self, config): + """ + :param config: str + """ + if 'azureBlob' in config['output']: + if 'enabled' in config['output']['azureBlob']: + self.azure_blob_output = config['output']['azureBlob']['enabled'] + if 'containerName' in config['output']['azureBlob']: + self.azure_blob_interface.container_name = config['output']['azureBlob']['containerName'] + if 'blobName' in config['output']['azureBlob']: + self.azure_blob_interface.blob_name = config['output']['azureBlob']['blobName'] + if 'tempPath' in config['output']['azureBlob']: + self.azure_blob_interface.output_path = config['output']['azureBlob']['tempPath'] + if 'separateByContentType' in config['output']['azureBlob']: + self.azure_blob_interface.separate_by_content_type = config['output']['azureBlob']['separateByContentType'] + if 'separator' in config['output']['azureBlob']: + self.azure_blob_interface.separator = config['output']['azureBlob']['separator'] + if 'cacheSize' in config['output']['azureBlob']: + self.azure_blob_interface.cache_size = config['output']['azureBlob']['cacheSize'] + def _load_graylog_output_config(self, config): """ :param config: str @@ -612,6 +654,10 @@ def known_content(self): parser.add_argument('secret_key', type=str, help='Secret key generated by Azure application', action='store') parser.add_argument('--config', metavar='config', type=str, help='Path to YAML config file', action='store', dest='config') + parser.add_argument('--table-string', metavar='table_string', type=str, + help='Connection string for Azure Table output interface', action='store', dest='table_string') + parser.add_argument('--blob-string', metavar='blob_string', type=str, + help='Connection string for Azure Blob output interface', action='store', dest='blob_string') parser.add_argument('--sql-string', metavar='sql_string', type=str, help='Connection string for SQL output interface', action='store', dest='sql_string') parser.add_argument('--interactive-subscriber', action='store_true', @@ -686,7 +732,8 @@ def known_content(self): shared_key=argsdict['azure_key'], gl_address=argsdict['graylog_addr'], gl_port=argsdict['graylog_port'], graylog_output=argsdict['graylog'], - sql_connection_string=argsdict['sql_string']) + sql_connection_string=argsdict['sql_string'], table_connection_string=argsdict['table_string'], + blob_connection_string=argsdict['blob_string']) if argsdict['config']: collector.load_config(path=argsdict['config']) collector.init_logging() diff --git a/Source/Interfaces/AzureBlobInterface.py b/Source/Interfaces/AzureBlobInterface.py new file mode 100644 index 0000000..e834c71 --- /dev/null +++ b/Source/Interfaces/AzureBlobInterface.py @@ -0,0 +1,53 @@ +from . import FileInterface +from azure.storage.blob import BlobServiceClient +import os + + +class AzureBlobInterface(FileInterface.FileInterface): + + def __init__(self, blob_connection_string=None, container_name=None, blob_name=None, **kwargs): + """ + Interface to send logs to CSV file(s). Not every audit log has every possible column, so columns in the CSV + file might change over time. Therefore, the CSV file is recreated each time the cache_size is hit to insure + integrity, taking the performance hit. + """ + super().__init__(**kwargs) + self.connection_string = blob_connection_string + self.container_name = container_name + self.blob_name = blob_name + self._blob_service = None + self._container_client = None + + @property + def blob_service(self): + + if not self._blob_service: + self._blob_service = BlobServiceClient.from_connection_string(conn_str=self.connection_string) + return self._blob_service + + @property + def container_client(self): + + if not self._container_client: + if self.container_name not in [container['name'] for container in self.blob_service.list_containers()]: + self._container_client = self._blob_service.create_container(name=self.container_name) + else: + self._container_client = self._blob_service.get_container_client(container=self.container_name) + return self._container_client + + def write_blob(self, blob_name, file_path): + + blob_client = self.container_client.get_blob_client(blob=blob_name) + with open(file_path, 'rb') as ofile: + blob_client.upload_blob(ofile) + + def exit_callback(self): + + super().exit_callback() + if not self.separate_by_content_type: + self.write_blob(blob_name=self.blob_name, file_path=self.path) + for content_type in self.paths.keys(): + temp_file_path = self.paths[content_type] + blob_name = os.path.split(self._path_for(content_type=content_type))[-1] + self.write_blob(blob_name=blob_name, file_path=temp_file_path) + diff --git a/Source/Interfaces/AzureTableInterface.py b/Source/Interfaces/AzureTableInterface.py new file mode 100644 index 0000000..d9153bb --- /dev/null +++ b/Source/Interfaces/AzureTableInterface.py @@ -0,0 +1,84 @@ +import collections +import logging +import threading +import azure.core.exceptions +from . import _Interface +from azure.data.tables import TableServiceClient + + +class AzureTableInterface(_Interface.Interface): + + def __init__(self, table_connection_string=None, table_name=None, max_threads=10, **kwargs): + """ + Interface to send logs to CSV file(s). Not every audit log has every possible column, so columns in the CSV + file might change over time. Therefore, the CSV file is recreated each time the cache_size is hit to insure + integrity, taking the performance hit. + """ + super().__init__(**kwargs) + self.table_name = table_name + self.connection_string = table_connection_string + self._table_service = None + self._table_client = None + self._max_threads = max_threads + self._threads = collections.deque() + + @property + def table_service(self): + + if not self._table_service: + self._table_service = TableServiceClient.from_connection_string(conn_str=self.connection_string) + return self._table_service + + @property + def table_client(self): + + if not self._table_client: + self._table_client = self.table_service.create_table_if_not_exists(table_name=self.table_name) + return self._table_client + + def _validate_fields(self, msg): + + for k, v in msg.copy().items(): + if (isinstance(v, int) and v > 2147483647) or isinstance(v, list) or isinstance(v, dict): + msg[k] = str(v) + return msg + + def monitor_queue(self): + """ + Overloaded for multithreading. + """ + while 1: + self._threads = [running_thread for running_thread in self._threads if running_thread.is_alive()] + if self.queue and len(self._threads) < self._max_threads: + msg, content_type = self.queue.popleft() + if msg == 'stop monitor thread': + [running_thread.join() for running_thread in self._threads] + return + else: + new_thread = threading.Thread(target=self._send_message, + kwargs={"msg": msg, "content_type": content_type}, daemon=True) + new_thread.start() + self._threads.append(new_thread) + + def _send_message(self, msg, content_type, **kwargs): + try: + msg = self._validate_fields(msg=msg) + entity = { + 'PartitionKey': content_type, + 'RowKey': msg['Id'], + } + entity.update(msg) + self.table_client.create_entity(entity) + except azure.core.exceptions.ResourceExistsError: + self.successfully_sent += 1 + return + except Exception as e: + self.unsuccessfully_sent += 1 + logging.error("Error sending log to Azure Table. Log: {}. Error: {}.".format(msg, e)) + else: + self.successfully_sent += 1 + + def exit_callback(self): + + return [thread.join() for thread in self._threads] + diff --git a/Source/Interfaces/FileInterface.py b/Source/Interfaces/FileInterface.py index 500bb47..8acb1f4 100644 --- a/Source/Interfaces/FileInterface.py +++ b/Source/Interfaces/FileInterface.py @@ -3,8 +3,6 @@ from . import _Interface import collections import pandas -import threading -import time class FileInterface(_Interface.Interface): @@ -31,8 +29,13 @@ def total_cache_length(self): def _path_for(self, content_type): if content_type not in self.paths: - self.paths[content_type] = "{}_{}.csv".format(self.path, content_type.replace('.', '')) \ - if self.separate_by_content_type else self.path + if not self.separate_by_content_type: + self.paths[content_type] = self.path + else: + path, file_name = os.path.split(self.path) + file_name = file_name.strip('.csv') + file_name = "{}_{}.csv".format(file_name, content_type.replace('.', '')) + self.paths[content_type] = os.path.join(path, file_name) return self.paths[content_type] def _send_message(self, msg, content_type, **kwargs): diff --git a/Source/Interfaces/SqlInterface.py b/Source/Interfaces/SqlInterface.py index 0b99d3f..6477b8d 100644 --- a/Source/Interfaces/SqlInterface.py +++ b/Source/Interfaces/SqlInterface.py @@ -89,7 +89,6 @@ def _validate_column_value(df): """ Flatten columns that a list as value. E.g. column "ColumnA: [1,2,3]" becomes: "ColumnA_0: 1, ColumnA_1: 2, ColumnA_2: 3". - Logs are processed individually as they come in, so only the first value of a column needs to be checked. :param df: pandas.DataFrame. :return: pandas.DataFrame """ diff --git a/Windows/OfficeAuditLogCollector-V1.2.exe b/Windows/WIN-OfficeAuditLogCollector-V1.3.exe similarity index 89% rename from Windows/OfficeAuditLogCollector-V1.2.exe rename to Windows/WIN-OfficeAuditLogCollector-V1.3.exe index c4830ed..86066df 100644 Binary files a/Windows/OfficeAuditLogCollector-V1.2.exe and b/Windows/WIN-OfficeAuditLogCollector-V1.3.exe differ