Skip to content

Commit f2cc8fc

Browse files
Adding containerd compatability to oom_logger - COMPINFRA-3947 (#3904)
* Adding containerd compatability to oom_logger * Addressing reviews + adding new regex for containerd * Adding in addition to the nerdctl regex a regex for capturing containerd-cri oom * Updating packages for the k8s_itest to pass * use wheels + main internal pypi * Revert "Updating packages for the k8s_itest to pass" This reverts commit 52b62c8. * update addict * update argcomplete * Revert "update argcomplete" This reverts commit 8a9f0d1. * Revert "update addict" This reverts commit aa1df40. * prefer binary * prefer binary harder * prefer binary this way * maybe? * missed a spot * upgrade?? * do we still need deadsnakes here * anotha one * more fixes * deadsnakes * distutils is fun * cleanup * Revert "Updating packages for the k8s_itest to pass" This reverts commit 52b62c8. * Removed the nerdctl regex and the adjusted ones for docker * Address getting from config when its none * Add a less structured regex for containerd * Added two tests to test the regex if its working * Adding a unit test for testing main with containerd=true * Addressing more reviews --------- Co-authored-by: Luis Perez <[email protected]>
1 parent 74508cc commit f2cc8fc

12 files changed

+251
-103
lines changed

Diff for: debian/paasta-tools.links

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ opt/venvs/paasta-tools/bin/generate_deployments_for_service.py usr/bin/generate_
1313
opt/venvs/paasta-tools/bin/generate_services_file.py usr/bin/generate_services_file
1414
opt/venvs/paasta-tools/bin/generate_services_yaml.py usr/bin/generate_services_yaml
1515
opt/venvs/paasta-tools/bin/generate_authenticating_services.py usr/bin/generate_authenticating_services
16-
opt/venvs/paasta-tools/bin/kill_orphaned_docker_containers.py usr/bin/kill_orphaned_docker_containers
1716
opt/venvs/paasta-tools/bin/kubernetes_remove_evicted_pods.py usr/bin/kubernetes_remove_evicted_pods
1817
opt/venvs/paasta-tools/bin/paasta-api usr/bin/paasta-api
1918
opt/venvs/paasta-tools/bin/paasta-fsm usr/bin/paasta-fsm

Diff for: docs/source/generated/paasta_tools.monitoring.kill_orphaned_docker_containers.rst

-7
This file was deleted.

Diff for: docs/source/generated/paasta_tools.monitoring.rst

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ Submodules
77
.. toctree::
88

99
paasta_tools.monitoring.check_k8s_api_performance
10-
paasta_tools.monitoring.kill_orphaned_docker_containers
1110

1211
Module contents
1312
---------------

Diff for: paasta_tools/monitoring/kill_orphaned_docker_containers.py

-72
This file was deleted.

Diff for: paasta_tools/oom_logger.py

+73-10
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,16 @@
3333
destination(paasta_oom_logger);
3434
};
3535
"""
36+
import argparse
37+
import json
3638
import re
3739
import sys
3840
from collections import namedtuple
41+
from typing import Dict
3942

43+
import grpc
44+
from containerd.services.containers.v1 import containers_pb2
45+
from containerd.services.containers.v1 import containers_pb2_grpc
4046
from docker.errors import APIError
4147

4248
from paasta_tools.cli.utils import get_instance_config
@@ -76,6 +82,16 @@
7682
)
7783

7884

85+
def parse_args() -> argparse.Namespace:
86+
parser = argparse.ArgumentParser(description="paasta_oom_logger")
87+
parser.add_argument(
88+
"--containerd",
89+
action="store_true",
90+
help="Use containerd to inspect containers, otherwise use docker",
91+
)
92+
return parser.parse_args()
93+
94+
7995
def capture_oom_events_from_stdin():
8096
process_name_regex = re.compile(
8197
r"^\d+\s[a-zA-Z0-9\-]+\s.*\]\s(.+)\sinvoked\soom-killer:"
@@ -93,6 +109,25 @@ def capture_oom_events_from_stdin():
93109
""",
94110
re.VERBOSE,
95111
)
112+
oom_regex_kubernetes_containerd_systemd_cgroup = re.compile(
113+
r"""
114+
^(\d+)\s # timestamp
115+
([a-zA-Z0-9\-]+) # hostname
116+
\s.*oom-kill:.*task_memcg=/.*\.slice/.* # loosely match systemd slice and containerid
117+
cri-containerd:(\w{64}).*$ # containerid
118+
""",
119+
re.VERBOSE,
120+
)
121+
122+
oom_regex_kubernetes_containerd_systemd_cgroup_structured = re.compile(
123+
r"""
124+
^(\d+)\s # timestamp
125+
([a-zA-Z0-9\-]+) # hostname
126+
\s.*oom-kill:.*task_memcg=/kubepods\.slice/.* # match systemd slice and containerid
127+
cri-containerd-(\w{64}).*$ # containerid
128+
""",
129+
re.VERBOSE,
130+
)
96131
oom_regex_kubernetes_structured = re.compile(
97132
r"""
98133
^(\d+)\s # timestamp
@@ -115,6 +150,8 @@ def capture_oom_events_from_stdin():
115150
oom_regex_kubernetes,
116151
oom_regex_kubernetes_structured,
117152
oom_regex_kubernetes_systemd_cgroup,
153+
oom_regex_kubernetes_containerd_systemd_cgroup,
154+
oom_regex_kubernetes_containerd_systemd_cgroup_structured,
118155
]
119156

120157
process_name = ""
@@ -136,11 +173,18 @@ def capture_oom_events_from_stdin():
136173
break
137174

138175

139-
def get_container_env_as_dict(docker_inspect):
176+
def get_container_env_as_dict(
177+
is_cri_containerd: bool, container_inspect: dict
178+
) -> Dict[str, str]:
140179
env_vars = {}
141-
config = docker_inspect.get("Config")
180+
if is_cri_containerd:
181+
config = container_inspect.get("process")
182+
env_key = "env"
183+
else:
184+
config = container_inspect.get("Config")
185+
env_key = "Env"
142186
if config is not None:
143-
env = config.get("Env", [])
187+
env = config.get(env_key, [])
144188
for i in env:
145189
name, _, value = i.partition("=")
146190
env_vars[name] = value
@@ -209,18 +253,26 @@ def send_sfx_event(service, instance, cluster):
209253
counter.count()
210254

211255

256+
def get_containerd_container(container_id: str) -> containers_pb2.Container:
257+
with grpc.insecure_channel("unix:///run/containerd/containerd.sock") as channel:
258+
containersv1 = containers_pb2_grpc.ContainersStub(channel)
259+
return containersv1.Get(
260+
containers_pb2.GetContainerRequest(id=container_id),
261+
metadata=(("containerd-namespace", "k8s.io"),),
262+
).container
263+
264+
212265
def main():
213266
if clog is None:
214267
print("CLog logger unavailable, exiting.", file=sys.stderr)
215268
sys.exit(1)
216-
269+
args = parse_args()
217270
clog.config.configure(
218271
scribe_host="169.254.255.254",
219272
scribe_port=1463,
220273
monk_disable=False,
221274
scribe_disable=False,
222275
)
223-
224276
cluster = load_system_paasta_config().get_cluster()
225277
client = get_docker_client()
226278
for (
@@ -229,11 +281,22 @@ def main():
229281
container_id,
230282
process_name,
231283
) in capture_oom_events_from_stdin():
232-
try:
233-
docker_inspect = client.inspect_container(resource_id=container_id)
234-
except (APIError):
235-
continue
236-
env_vars = get_container_env_as_dict(docker_inspect)
284+
if args.containerd:
285+
# then we're using containerd to inspect containers
286+
try:
287+
container_info = get_containerd_container(container_id)
288+
except grpc.RpcError as e:
289+
print("An error occurred while getting the container:", e)
290+
continue
291+
container_spec_raw = container_info.spec.value.decode("utf-8")
292+
container_inspect = json.loads(container_spec_raw)
293+
else:
294+
# we're using docker to inspect containers
295+
try:
296+
container_inspect = client.inspect_container(resource_id=container_id)
297+
except (APIError):
298+
continue
299+
env_vars = get_container_env_as_dict(args.containerd, container_inspect)
237300
service = env_vars.get("PAASTA_SERVICE", "unknown")
238301
instance = env_vars.get("PAASTA_INSTANCE", "unknown")
239302
mesos_container_id = env_vars.get("MESOS_CONTAINER_NAME", "mesos-null")

Diff for: requirements-minimal.txt

+2
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@ botocore
88
bravado >= 10.2.0
99
certifi
1010
choice >= 0.1
11+
containerd
1112
cookiecutter >= 1.4.0
1213
croniter
1314
docker
1415
dulwich >= 0.17.3
1516
ephemeral-port-reserve >= 1.0.1
1617
graphviz
18+
grpcio
1719
gunicorn
1820
humanfriendly
1921
humanize >= 0.5.1

Diff for: requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ certifi==2017.11.5
1818
chardet==3.0.4
1919
choice==0.1
2020
click==6.6
21+
containerd==1.5.3
2122
cookiecutter==1.4.0
2223
croniter==1.3.4
2324
decorator==4.1.2
@@ -27,6 +28,7 @@ ephemeral-port-reserve==1.1.0
2728
future==0.16.0
2829
google-auth==1.2.0
2930
graphviz==0.8.2
31+
grpcio==1.62.2
3032
gunicorn==19.8.1
3133
http-parser==0.9.0
3234
humanfriendly==4.18

Diff for: setup.py

-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ def get_install_requires():
6262
"paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py",
6363
"paasta_tools/kubernetes/bin/paasta_secrets_sync.py",
6464
"paasta_tools/log_task_lifecycle_events.py",
65-
"paasta_tools/monitoring/kill_orphaned_docker_containers.py",
6665
"paasta_tools/paasta_deploy_tron_jobs",
6766
"paasta_tools/paasta_execute_docker_command.py",
6867
"paasta_tools/paasta_remote_run.py",

0 commit comments

Comments
 (0)