Skip to content

Commit 158eb45

Browse files
committed
add cdn nginx cache and logs --skip-tests
1 parent c4d8bfb commit 158eb45

File tree

2 files changed

+176
-23
lines changed

2 files changed

+176
-23
lines changed

cwm_worker_operator/deployments_manager.py

Lines changed: 134 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,23 +82,94 @@
8282
# Pulled Feb 26, 2024
8383
image: nginx@sha256:c26ae7472d624ba1fafd296e73cecc4f93f853088e6a9c13c0d52f6ca5865107
8484
volumeMounts: {volume_mounts_json}
85+
- name: fluentbit
86+
# Pulled Apr 29, 2024
87+
image: cr.fluentbit.io/fluent/fluent-bit:3.0.2@sha256:ed0214b0b0c6bff7474c739d9c8c2e128d378b053769c2b12da06296be883898
88+
args: ["-c", "/fluent-bit/etc/fluent-bit.conf"]
89+
volumeMounts:
90+
- name: fluentbit-config
91+
mountPath: /fluent-bit/etc/cwmparsers.conf
92+
subPath: cwmparsers.conf
93+
- name: fluentbit-config
94+
mountPath: /fluent-bit/etc/fluent-bit.conf
95+
subPath: fluent-bit.conf
96+
- name: access-logs
97+
mountPath: /var/log/nginx/cwm-access-logs
8598
volumes: {volumes_json}
8699
''').strip()
100+
NGINX_INCLUDES_CONFIGMAP_TEMPLATE = dedent('''
101+
apiVersion: v1
102+
kind: ConfigMap
103+
metadata:
104+
name: nginx-includes
105+
data:
106+
cache_location.conf: |
107+
proxy_cache minio;
108+
109+
# Buffering is required to enable cache
110+
proxy_buffering on;
111+
112+
# Sets the number and size of the buffers used for reading a response from the
113+
# proxied server, for a single connection.
114+
proxy_buffers 8 16k;
115+
116+
# Sets the size of the buffer used for reading the first part of the response
117+
# received from the proxied server. This part usually contains a small response
118+
# header.
119+
proxy_buffer_size 16k;
120+
121+
# When buffering of responses from the proxied server is enabled, limits the
122+
# total size of buffers that can be busy sending a response to the client while
123+
# the response is not yet fully read. In the meantime, the rest of the buffers
124+
# can be used for reading the response and, if needed, buffering part of the
125+
# response to a temporary file.
126+
proxy_busy_buffers_size 32k;
127+
128+
proxy_cache_valid 200 1m;
129+
130+
# the following lines are required to fix handling of HEAD requests by minio
131+
proxy_cache_convert_head off;
132+
proxy_cache_key "$request_method$request_uri$is_args$args";
133+
proxy_cache_methods GET HEAD;
134+
135+
# when caching is enabled some headers are not passed, we need to explicitly pass them
136+
proxy_set_header If-Match $http_if_match;
137+
proxy_set_header Range $http_range;
138+
139+
add_header X-Cache-Status $upstream_cache_status;
140+
''').strip()
87141
NGINX_HOST_BUCKET_HTTP_CONFIGMAP_TEMPLATE = dedent('''
88142
apiVersion: v1
89143
kind: ConfigMap
90144
metadata:
91145
name: {name}
92146
data:
93-
conf: |
147+
default_conf: |
148+
proxy_cache_path /var/cache/nginx/minio/cache levels=1:2 keys_zone=minio:10m max_size=1g inactive=1m use_temp_path=on;
149+
proxy_temp_path /var/cache/nginx/minio/temp;
150+
log_format json escape=json '{{'
151+
'"bytes_sent": "$bytes_sent", '
152+
'"request_length": "$request_length", '
153+
'"request": "$request", '
154+
'"status": "$status", '
155+
'"server_name": "$server_name", '
156+
'"scheme": "$scheme", '
157+
'"https": "$https", '
158+
'"hostname": "$hostname", '
159+
'"host": "$host", '
160+
'"upstream_cache_status": "$upstream_cache_status", '
161+
'"request_time": "$request_time"'
162+
'}}';
163+
conf: |
94164
server {{
95165
listen 80;
96166
server_name {server_name};
97167
location /{bucket_name} {{
98168
proxy_pass http://cwm-minio.minio-tenant-main.svc.cluster.local;
169+
include /etc/nginx/includes/cache_location.conf;
99170
}}
171+
access_log syslog:server=unix:/var/log/nginx/cwm-access-logs/syslog.sock json;
100172
}}
101-
default_conf: ""
102173
''').strip()
103174
NGINX_HOST_BUCKET_HTTPS_CONFIGMAP_TEMPLATE = dedent('''
104175
apiVersion: v1
@@ -120,12 +191,35 @@
120191
ssl_prefer_server_ciphers on;
121192
location /{bucket_name} {{
122193
proxy_pass http://cwm-minio.minio-tenant-main.svc.cluster.local;
194+
include /etc/nginx/includes/cache_location.conf;
123195
}}
196+
access_log syslog:server=unix:/var/log/nginx/cwm-access-logs/syslog.sock json;
124197
}}
125198
fullchain: "{fullchain}"
126199
key: "{privkey}"
127200
chain: "{chain}"
128201
''').strip()
202+
FLUENT_BIT_CONFIGMAP_TEMPLATE = dedent('''
203+
apiVersion: v1
204+
kind: ConfigMap
205+
metadata:
206+
name: {name}
207+
data:
208+
fluent-bit.conf: |
209+
[SERVICE]
210+
Parsers_File parsers.conf
211+
Parsers_File cwmparsers.conf
212+
213+
[INPUT]
214+
Name syslog
215+
Path /var/log/nginx/cwm-access-logs/syslog.sock
216+
Unix_Perm 0666
217+
218+
[OUTPUT]
219+
Name kafka
220+
Brokers {kafka_brokers}
221+
Topics {kafka_topic}
222+
''').strip()
129223

130224

131225
def kubectl_create(obj, namespace_name='default'):
@@ -343,9 +437,44 @@ def deploy_cdn(self, deployment_config, dry_run=False):
343437
out.append(f"create namespace: {namespace_name}")
344438
else:
345439
subprocess.check_call(['kubectl', 'create', 'namespace', namespace_name])
346-
update_hash = []
347-
volumes = []
348-
volume_mounts = []
440+
from .kafka_streamer import MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC
441+
configmap_input = FLUENT_BIT_CONFIGMAP_TEMPLATE.format(
442+
name='fluentbit-config',
443+
kafka_brokers='minio-audit-kafka-bootstrap.strimzi.svc.cluster.local:9092',
444+
kafka_topic=MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC
445+
)
446+
if dry_run:
447+
out.append(f"create configmap: fluentbit-config")
448+
out.append(configmap_input)
449+
else:
450+
subprocess.run([
451+
'kubectl', '-n', namespace_name, 'apply', '-f', '-'
452+
], input=configmap_input.encode())
453+
update_hash = [
454+
configmap_input
455+
]
456+
configmap_input = NGINX_INCLUDES_CONFIGMAP_TEMPLATE.format()
457+
if dry_run:
458+
out.append(f"create configmap: nginx-includes")
459+
out.append(configmap_input)
460+
else:
461+
subprocess.run([
462+
'kubectl', '-n', namespace_name, 'apply', '-f', '-'
463+
], input=configmap_input.encode())
464+
update_hash.append(configmap_input)
465+
volumes = [
466+
{'name': 'access-logs', 'emptyDir': {}},
467+
{'name': 'fluentbit-config', 'configMap': {'name': 'fluentbit-config'}},
468+
{'name': 'nginx-includes', 'configMap': {'name': 'nginx-includes'}},
469+
{'name': 'nginx-cache', 'emptyDir': {}},
470+
{'name': 'nginx-cache-temp', 'emptyDir': {}},
471+
]
472+
volume_mounts = [
473+
{"name": "access-logs", "mountPath": "/var/log/nginx/cwm-access-logs"},
474+
{'name': 'nginx-includes', 'mountPath': '/etc/nginx/includes'},
475+
{'name': 'nginx-cache', 'mountPath': '/var/cache/nginx/minio/cache'},
476+
{'name': 'nginx-cache-temp', 'mountPath': '/var/cache/nginx/minio/temp'},
477+
]
349478
for i, hostname in enumerate(deployment_config['minio']['nginx']['hostnames']):
350479
id_ = hostname['id']
351480
name = hostname['name']

cwm_worker_operator/kafka_streamer.py

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55
import os
66
import json
7+
import functools
78
import subprocess
89
from textwrap import dedent
910

@@ -33,34 +34,56 @@ def get_request_type(name):
3334
return 'misc'
3435

3536

36-
def process_minio_tenant_main_audit_logs(data, agg_data):
37+
def process_minio_tenant_main_audit_logs_update_agg_data(agg_data, namespace_name, request_type, tx, rx):
38+
if namespace_name not in agg_data:
39+
logs.debug(f"process_minio_tenant_main_audit_logs: {namespace_name}", 10)
40+
agg_data[namespace_name] = DEPLOYMENT_API_METRICS_BASE_DATA.copy()
41+
agg_data[namespace_name][f'bytes_in'] += int(rx)
42+
agg_data[namespace_name][f'bytes_out'] += int(tx)
43+
agg_data[namespace_name][f'num_requests_{request_type}'] += 1
44+
45+
46+
def process_minio_tenant_main_audit_logs(data, agg_data, domains_config):
3747
data_api = data.get('api', {})
38-
bucket = data_api.get('bucket') or None
48+
bucket = data_api.get('bucket')
3949
if bucket:
4050
namespace_name = common.get_namespace_name_from_bucket_name(bucket)
4151
if namespace_name:
42-
if namespace_name not in agg_data:
43-
logs.debug(f"process_minio_tenant_main_audit_logs: {namespace_name}", 8)
44-
agg_data[namespace_name] = DEPLOYMENT_API_METRICS_BASE_DATA.copy()
45-
logs.debug('process_minio_tenant_main_audit_logs', 10, data_api=data_api)
4652
tx = data_api.get('tx') or 0
4753
rx = data_api.get('rx') or 0
48-
agg_data[namespace_name][f'bytes_in'] += rx
49-
agg_data[namespace_name][f'bytes_out'] += tx
5054
request_type = get_request_type(data_api.get('name'))
51-
agg_data[namespace_name][f'num_requests_{request_type}'] += 1
55+
process_minio_tenant_main_audit_logs_update_agg_data(agg_data, namespace_name, request_type, tx, rx)
56+
logs.debug('process_minio_tenant_main_audit_logs (minio)', 10, data_api=data_api)
57+
elif data.get('message') and (data.get('ident') or '').startswith('nginx-'):
58+
message = json.loads(data['message'])
59+
host = message.get('host')
60+
upstream_cache_status = message.get('upstream_cache_status')
61+
if host and upstream_cache_status == 'HIT':
62+
try:
63+
worker_id = domains_config.get_cwm_api_volume_config(hostname=host).id
64+
except:
65+
worker_id = None
66+
if worker_id:
67+
namespace_name = common.get_namespace_name_from_worker_id(worker_id)
68+
if namespace_name:
69+
request = message.get('request') or ''
70+
request_type = 'out' if request.startswith('GET ') else 'misc'
71+
tx = message.get('bytes_sent') or 0
72+
rx = message.get('request_length') or 0
73+
process_minio_tenant_main_audit_logs_update_agg_data(agg_data, namespace_name, request_type, tx, rx)
74+
logs.debug('process_minio_tenant_main_audit_logs (cdn)', 10, message=message)
5275

5376

5477
def commit_minio_tenant_main_audit_logs(domains_config, agg_data):
55-
logs.debug(f"commit_minio_tenant_main_audit_logs: {agg_data}", 8)
78+
logs.debug(f"commit_minio_tenant_main_audit_logs: {agg_data}", 10)
5679
for namespace_name, data in agg_data.items():
5780
domains_config.update_deployment_api_metrics(namespace_name, data)
5881
domains_config.set_deployment_last_action(namespace_name)
5982

6083

61-
def process_data(topic, data, agg_data):
84+
def process_data(topic, data, agg_data, domains_config):
6285
if topic == MINIO_TENANT_MAIN_AUDIT_LOGS_TOPIC:
63-
process_minio_tenant_main_audit_logs(data, agg_data)
86+
process_minio_tenant_main_audit_logs(data, agg_data, domains_config)
6487
else:
6588
raise NotImplementedError(f"topic {topic} is not supported")
6689

@@ -81,7 +104,7 @@ def delete_records(topic, latest_partition_offset):
81104
]
82105
if len(partitions) > 0:
83106
offset_json = json.dumps({'partitions': partitions, 'version': 1})
84-
logs.debug(f"Deleting records: {offset_json}", 8)
107+
logs.debug(f"Deleting records: {offset_json}", 10)
85108
subprocess.check_call([
86109
'kubectl', 'exec', '-n', config.KAFKA_STREAMER_POD_NAMESPACE, config.KAFKA_STREAMER_POD_NAME, '--', 'bash', '-c', dedent(f'''
87110
TMPFILE=$(mktemp) &&\
@@ -96,7 +119,7 @@ def run_single_iteration(domains_config: DomainsConfig, topic, daemon, no_kafka_
96119
start_time = common.now()
97120
assert topic, "topic is required"
98121
assert config.KAFKA_STREAMER_BOOTSTRAP_SERVERS
99-
logs.debug(f"running iteration for topic: {topic}", 8)
122+
logs.debug(f"running iteration for topic: {topic}", 10)
100123
consumer = Consumer({
101124
'bootstrap.servers': config.KAFKA_STREAMER_BOOTSTRAP_SERVERS,
102125
'group.id': config.KAFKA_STREAMER_OPERATOR_GROUP_ID,
@@ -106,20 +129,21 @@ def run_single_iteration(domains_config: DomainsConfig, topic, daemon, no_kafka_
106129
latest_partition_offset = {}
107130
try:
108131
agg_data = {}
132+
commit_ = functools.partial(commit, topic, consumer, domains_config, agg_data, no_kafka_commit=no_kafka_commit)
109133
while (common.now() - start_time).total_seconds() < config.KAFKA_STREAMER_POLL_TIME_SECONDS and not daemon.terminate_requested:
110134
msg = consumer.poll(timeout=config.KAFKA_STREAMER_CONSUMER_POLL_TIMEOUT_SECONDS)
111135
if msg is None:
112-
# logs.debug("Waiting for messages...", 10)
113-
pass
136+
logs.debug("Waiting for messages...", 10)
137+
commit_()
114138
elif msg.error():
115139
raise Exception(f"Message ERROR: {msg.error()}")
116140
else:
117141
offset = msg.offset()
118142
partition = msg.partition()
119143
latest_partition_offset[partition] = offset
120144
data = json.loads(msg.value())
121-
process_data(topic, data, agg_data)
122-
commit(topic, consumer, domains_config, agg_data, no_kafka_commit=no_kafka_commit)
145+
process_data(topic, data, agg_data, domains_config)
146+
commit_()
123147
except KeyboardInterrupt:
124148
pass
125149
finally:

0 commit comments

Comments
 (0)