|
| 1 | +""" |
| 2 | +Streams / aggregates data from a Kafka topic |
| 3 | +This daemon can run multiple instances in parallel, each instance handling a different topic. |
| 4 | +""" |
| 5 | +import os |
| 6 | +import json |
| 7 | +import subprocess |
| 8 | +from textwrap import dedent |
| 9 | + |
| 10 | +from confluent_kafka import Consumer |
| 11 | + |
| 12 | +from cwm_worker_operator.daemon import Daemon |
| 13 | +from cwm_worker_operator import config, common, logs |
| 14 | +from cwm_worker_operator.domains_config import DomainsConfig |
| 15 | + |
| 16 | + |
| 17 | +MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC = 'minio-tenant-main-audit-logs' |
| 18 | +DEPLOYMENT_API_METRICS_BASE_DATA = { |
| 19 | + 'bytes_in': 0, |
| 20 | + 'bytes_out': 0, |
| 21 | + 'num_requests_in': 0, |
| 22 | + 'num_requests_out': 0, |
| 23 | + 'num_requests_misc': 0, |
| 24 | +} |
| 25 | + |
| 26 | + |
| 27 | +def get_request_type(name): |
| 28 | + if name in ['WebUpload', 'PutObject', 'DeleteObject']: |
| 29 | + return 'in' |
| 30 | + elif name in ['WebDownload', 'GetObject']: |
| 31 | + return 'out' |
| 32 | + else: |
| 33 | + return 'misc' |
| 34 | + |
| 35 | + |
| 36 | +def process_minio_tenant_main_audit_logs(data, agg_data): |
| 37 | + data_api = data.get('api', {}) |
| 38 | + bucket = data_api.get('bucket') or None |
| 39 | + if bucket: |
| 40 | + namespace_name = common.get_namespace_name_from_bucket_name(bucket) |
| 41 | + if namespace_name: |
| 42 | + if namespace_name not in agg_data: |
| 43 | + logs.debug(f"process_minio_tenant_main_audit_logs: {namespace_name}", 8) |
| 44 | + agg_data[namespace_name] = DEPLOYMENT_API_METRICS_BASE_DATA.copy() |
| 45 | + logs.debug('process_minio_tenant_main_audit_logs', 10, data_api=data_api) |
| 46 | + tx = data_api.get('tx') or 0 |
| 47 | + rx = data_api.get('rx') or 0 |
| 48 | + agg_data[namespace_name][f'bytes_in'] += rx |
| 49 | + agg_data[namespace_name][f'bytes_out'] += tx |
| 50 | + request_type = get_request_type(data_api.get('name')) |
| 51 | + agg_data[namespace_name][f'num_requests_{request_type}'] += 1 |
| 52 | + |
| 53 | + |
| 54 | +def commit_minio_tenant_main_audit_logs(domains_config, agg_data): |
| 55 | + logs.debug(f"commit_minio_tenant_main_audit_logs: {agg_data}", 8) |
| 56 | + for namespace_name, data in agg_data.items(): |
| 57 | + domains_config.update_deployment_api_metrics(namespace_name, data) |
| 58 | + domains_config.set_deployment_last_action(namespace_name) |
| 59 | + |
| 60 | + |
| 61 | +def process_data(topic, data, agg_data): |
| 62 | + if topic == MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC: |
| 63 | + process_minio_tenant_main_audit_logs(data, agg_data) |
| 64 | + else: |
| 65 | + raise NotImplementedError(f"topic {topic} is not supported") |
| 66 | + |
| 67 | + |
| 68 | +def commit(topic, consumer, domains_config, agg_data, no_kafka_commit=False): |
| 69 | + if topic == MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC: |
| 70 | + commit_minio_tenant_main_audit_logs(domains_config, agg_data) |
| 71 | + else: |
| 72 | + raise NotImplementedError(f"topic {topic} is not supported") |
| 73 | + if not no_kafka_commit: |
| 74 | + consumer.commit() |
| 75 | + |
| 76 | + |
| 77 | +def delete_records(topic, latest_partition_offset): |
| 78 | + partitions = [ |
| 79 | + {'topic': topic, 'partition': p, 'offset': o} |
| 80 | + for p, o in latest_partition_offset.items() |
| 81 | + ] |
| 82 | + if len(partitions) > 0: |
| 83 | + offset_json = json.dumps({'partitions': partitions, 'version': 1}) |
| 84 | + logs.debug(f"Deleting records: {offset_json}", 8) |
| 85 | + subprocess.check_call([ |
| 86 | + 'kubectl', 'exec', '-n', config.KAFKA_STREAMER_POD_NAMESPACE, config.KAFKA_STREAMER_POD_NAME, '--', 'bash', '-c', dedent(f''' |
| 87 | + TMPFILE=$(mktemp) &&\ |
| 88 | + echo '{offset_json}' > $TMPFILE &&\ |
| 89 | + bin/kafka-delete-records.sh --bootstrap-server localhost:9092 --offset-json-file $TMPFILE &&\ |
| 90 | + rm $TMPFILE |
| 91 | + ''').strip() |
| 92 | + ], env={**os.environ, 'DEBUG': ''}) |
| 93 | + |
| 94 | + |
| 95 | +def run_single_iteration(domains_config: DomainsConfig, topic=None, no_kafka_commit=False, no_kafka_delete=False, **_): |
| 96 | + start_time = common.now() |
| 97 | + if not topic: |
| 98 | + topic = config.KAFKA_STREAMER_TOPIC |
| 99 | + assert topic, "topic is required" |
| 100 | + logs.debug(f"running iteration for topic: {topic}", 8) |
| 101 | + consumer = Consumer({ |
| 102 | + 'bootstrap.servers': config.KAFKA_STREAMER_BOOTSTRAP_SERVERS, |
| 103 | + 'group.id': config.KAFKA_STREAMER_OPERATOR_GROUP_ID, |
| 104 | + **config.KAFKA_STREAMER_CONSUMER_CONFIG |
| 105 | + }) |
| 106 | + consumer.subscribe([topic]) |
| 107 | + latest_partition_offset = {} |
| 108 | + try: |
| 109 | + agg_data = {} |
| 110 | + while (common.now() - start_time).total_seconds() < config.KAFKA_STREAMER_POLL_TIME_SECONDS: |
| 111 | + msg = consumer.poll(timeout=config.KAFKA_STREAMER_CONSUMER_POLL_TIMEOUT_SECONDS) |
| 112 | + if msg is None: |
| 113 | + # logs.debug("Waiting for messages...", 10) |
| 114 | + pass |
| 115 | + elif msg.error(): |
| 116 | + raise Exception(f"Message ERROR: {msg.error()}") |
| 117 | + else: |
| 118 | + offset = msg.offset() |
| 119 | + partition = msg.partition() |
| 120 | + latest_partition_offset[partition] = offset |
| 121 | + data = json.loads(msg.value()) |
| 122 | + process_data(topic, data, agg_data) |
| 123 | + commit(topic, consumer, domains_config, agg_data, no_kafka_commit=no_kafka_commit) |
| 124 | + except KeyboardInterrupt: |
| 125 | + pass |
| 126 | + finally: |
| 127 | + consumer.close() |
| 128 | + if not no_kafka_delete: |
| 129 | + delete_records(topic, latest_partition_offset) |
| 130 | + |
| 131 | + |
| 132 | +def start_daemon(once=False, domains_config=None, topic=None, no_kafka_commit=False, no_kafka_delete=False): |
| 133 | + assert topic |
| 134 | + Daemon( |
| 135 | + name=f"kafka_streamer_{topic}", |
| 136 | + sleep_time_between_iterations_seconds=config.KAFKA_STREAMER_SLEEP_TIME_BETWEEN_ITERATIONS_SECONDS, |
| 137 | + domains_config=domains_config, |
| 138 | + run_single_iteration_callback=run_single_iteration, |
| 139 | + run_single_iteration_extra_kwargs={'topic': topic, 'no_kafka_commit': no_kafka_commit, 'no_kafka_delete': no_kafka_delete}, |
| 140 | + ).start( |
| 141 | + once=once, |
| 142 | + with_prometheus=False, |
| 143 | + ) |
0 commit comments