istio · oschaaf · Apr 15, 2020 · Apr 16, 2020 · Apr 16, 2020 · Apr 16, 2020
@@ -141,12 +141,36 @@ optional arguments:
   --no_clientsidecar    do not run clientsidecar-only for all
   --bothsidecar         run both clientsidecar and serversidecar
   --no_sidecar          do not run clientsidecar and serversidecar
+  --custom_profiling_command
+                        runs a custom profiling commands on the nodes for the client and server,
+                        and produces a flamegraph based on that.
+                        Example on-cpu profile using bcc tools for the envoy sidecar proxy:
+                        --custom_profiling_command=\"profile-bpfcc -df {duration} -p {sidecar_pid}\"
+                        - runner.py will replace {duration} with whatever was specified for --duration.
+                        - runner.py will replace {sidecar_pid} with the actual process id of the envoy
+                          sidecar process.
+  --custom_profiling_name
+                        filename prefix for the result of any --custom_profiling_command
 ```
 
 Note:
 - `runner.py` will run all combinations of the parameters given. However, in order to reduce ambiguity when generating the graph, it would be
  better to change one parameter at a time and fix other parameters
 - if you want to run with `--perf` flag to generate a flame graph, please make sure you have the permission to gather perf data, please refer to step 2 of this [README](https://github.com/istio/tools/tree/master/perf/benchmark/flame#setup-perf-tool)
+- if you want to run with `--custom_profiling_command`, `profilingMode` must be set to `true` in `values.yaml`. Doing so will set up the client and server pods to run the perf/profiling container. It's worth noting that this container  runs `--priviledged`, and that `hostIPC` and `hostPID` will also be enabled,
+weakening security. Resulting flamegraphs will be written to `flame/flameoutput`.
+- sample sidecar profiling commands for `--custom_profiling_command`:
+  - "profile-bpfcc -df {duration} -p {sidecar_pid}" sidecar on-cpu profile
+  - "offcputime-bpfcc -df {duration} -p {sidecar_pid}" sidecar off-cpu profile
+  - "offwaketime-bpfcc -df {duration} -p {sidecar_pid}" sidecar offwaktime profile
+  - "wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" sidecar wakeuptime profile
+  - "perf record -F 99 -a -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" on-cpu perf-generated profile
+  - "stackcount-bpfcc c:*alloc* -df -D {duration} -p {sidecar_pid}" profile calls to `*alloc*`
+- It's also possible to run machine-wide profiling, for example:
+  - "profile-bpfcc -df {duration}" for obtaining a machine-wide on-cpu flamegraph.
+  - See http://www.brendangregg.com/FlameGraphs/ for more examples and information.
+- Enabling `profilingMode` in `values.yaml` will also bring up and expose Prometheus's `node_exporter` at the configured port (default: 9100),
+  accessible over http via `/metrics.
 
 For example:
 

@@ -22,8 +22,11 @@
 import shlex
 import uuid
 import sys
+
 from subprocess import getoutput
 from urllib.parse import urlparse
+from threading import Thread
+from time import sleep
 import yaml
 from fortio import METRICS_START_SKIP_DURATION, METRICS_END_SKIP_DURATION
 
@@ -114,7 +117,9 @@ def __init__(
             ingress=None,
             mesh="istio",
             cacert=None,
-            load_gen_type="fortio"):
+            load_gen_type="fortio",
+            custom_profiling_command=None,
+            custom_profiling_name="default-profile"):
         self.run_id = str(uuid.uuid4()).partition('-')[0]
         self.headers = headers
         self.conn = conn
@@ -127,6 +132,8 @@ def __init__(
         self.r = "0.00005"
         self.telemetry_mode = telemetry_mode
         self.perf_record = perf_record
+        self.custom_profiling_command = custom_profiling_command
+        self.custom_profiling_name = custom_profiling_name
         self.server = pod_info("-lapp=" + server, namespace=self.ns)
         self.client = pod_info("-lapp=" + client, namespace=self.ns)
         self.additional_args = additional_args
@@ -245,6 +252,70 @@ def run(self, headers, conn, qps, size, duration):
         headers_cmd = self.generate_headers_cmd(headers)
         fortio_cmd = self.generate_fortio_cmd(headers_cmd, conn, qps, duration, grpc, cacert_arg, labels)
 
+        def run_profiling_in_background(exec_cmd, podname, filename_prefix, profiling_command):
+            filename = "{filename_prefix}-{podname}".format(
+                filename_prefix=filename_prefix, podname=podname)
+            profiler_cmd = "{exec_cmd} \"{profiling_command} > {filename}.profile\"".format(
+                profiling_command=profiling_command,
+                exec_cmd=exec_cmd,
+                filename=filename
+            )
+            # Run the profile collection tool, and wait for it to finish.
+            process = subprocess.Popen(shlex.split(profiler_cmd))
+            process.wait()
+            # Next we feed the profiling data to the flamegraphing script.
+            flamegraph_cmd = "{exec_cmd} \"./FlameGraph/flamegraph.pl --title='{profiling_command} Flame Graph'  < {filename}.profile > {filename}.svg\"".format(
+                exec_cmd=exec_cmd,
+                profiling_command=profiling_command,
+                filename=filename
+            )
+            process = subprocess.Popen(shlex.split(flamegraph_cmd))
+            process.wait()
+            # Lastly copy the resulting flamegraph out of the container
+            kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
+                       "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+
+        threads = []
+
+        if self.custom_profiling_command:
+            # We run any custom profiling command on both pods, as one runs on each node we're interested in.
+            for pod in [self.client.name, self.server.name]:
+                exec_cmd_on_pod = "kubectl exec -n {namespace} {podname} -c perf -it -- bash -c ".format(
+                    namespace=os.environ.get("NAMESPACE", "twopods"),
+                    podname=pod
+                )
+
+                # Wait for node_exporter to run, which indicates the profiling initialization container has finished initializing.
+                # once the init probe is supported, move this to a http probe instead in fortio.yaml
+                ne_pid = ""
+                attempts = 0
+                while ne_pid == "" and attempts < 60:
+                    ne_pid = getoutput("{exec_cmd} \"pgrep 'node_exporter'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
+                    attempts = attempts + 1
+                    print(".")
+                    sleep(1)
+
+                # Find side car process id's in case the profiling command needs it.
+                sidecar_ppid = getoutput("{exec_cmd} \"pgrep -f 'pilot-agent proxy sidecar'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
+                sidecar_pid = getoutput("{exec_cmd} \"pgrep -P {sidecar_ppid}\"".format(exec_cmd=exec_cmd_on_pod, sidecar_ppid=sidecar_ppid)).strip()
+                profiling_command = self.custom_profiling_command.format(
+                    duration=self.duration, sidecar_pid=sidecar_pid)
+                threads.append(Thread(target=run_profiling_in_background, args=[
+                    exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command]))
+
+        for thread in threads:
+            thread.start()
+
+        if self.run_ingress:
+            print('-------------- Running in ingress mode --------------')
+            kubectl_exec(self.client.name, self.ingress(fortio_cmd))
+            if self.perf_record:
+                run_perf(
+                    self.mesh,
+                    self.server.name,
+                    labels + "_srv_ingress",
+                    duration=40)
+
         if self.run_baseline:
             self.execute_sidecar_mode("baseline", self.load_gen_type, fortio_cmd, self.nosidecar, labels, "")
 
@@ -267,6 +338,11 @@ def run(self, headers, conn, qps, size, duration):
                     labels + "_srv_ingress",
                     duration=40)
 
+        if len(threads) > 0:
+            if self.custom_profiling_command:
+                for thread in threads:
+                    thread.join()
+            print("background profiler thread finished - flamegraphs are available in flame/flameoutput")
 
 PERFCMD = "/usr/lib/linux-tools/4.4.0-131-generic/perf"
 FLAMESH = "flame.sh"
@@ -365,7 +441,9 @@ def run_perf_test(args):
             mesh=args.mesh,
             telemetry_mode=args.telemetry_mode,
             cacert=args.cacert,
-            load_gen_type=args.load_gen_type)
+            load_gen_type=args.load_gen_type,
+            custom_profiling_command=args.custom_profiling_command,
+            custom_profiling_name=args.custom_profiling_name)
 
     if fortio.duration <= min_duration:
         print("Duration must be greater than {min_duration}".format(
@@ -425,6 +503,14 @@ def get_parser():
         "--perf",
         help="also run perf and produce flame graph",
         default=False)
+    parser.add_argument(
+        "--custom_profiling_command",
+        help="Run custom profiling commands on the nodes for the client and server, and produce a flamegraph based on their outputs. E.g. --custom_profiling_command=\"/usr/share/bcc/tools/profile -df 40\"",
+        default=False)
+    parser.add_argument(
+        "--custom_profiling_name",
+        help="Name to be added to the flamegraph resulting from --custom_profiling_command",
+        default="default-profile")
     parser.add_argument(
         "--ingress",
         help="run traffic through ingress, should be a valid URL",

@@ -40,7 +40,11 @@ spec:
     protocol: TCP
   - name: grpc-pinga
     port: 8076
+{{- if $.Values.profilingMode }}
+  - name: node-exporter
+    port: 9100
     protocol: TCP
+{{- end }}
   selector:
     app: {{ $.name }}
 {{- if $.V.expose }}
@@ -98,7 +102,7 @@ spec:
         config.linkerd.io/skip-inbound-ports: "8077"
 {{- end }}
         # exclude inbound ports of the uncaptured container
-        traffic.sidecar.istio.io/excludeInboundPorts: "8076,8077,8078"
+        traffic.sidecar.istio.io/excludeInboundPorts: "8076,8077,8078,{{ $.Values.nodeExporterPort }}"
         sidecar.istio.io/proxyCPU: {{ $.Values.proxy.cpu }}
         sidecar.istio.io/proxyMemory: {{ $.Values.proxy.memory }}
       labels:
@@ -118,9 +122,30 @@ spec:
                 - "fortioclient"
 {{- end }}
             topologyKey: "kubernetes.io/hostname"
+{{- if $.Values.profilingMode }}
+      hostIPC: true
+      hostPID: true   
+{{- end }}
       volumes:
       - name: shared-data
         emptyDir: {}
+{{- if $.Values.profilingMode }}
+      - name: sys
+        hostPath:
+          path: /sys
+      - name: lsb-release
+        hostPath:
+          path: /etc/lsb-release
+      - name: modules-generated
+        hostPath:
+          path: /var/cache/kernel/modules
+      - name: headers-generated
+        hostPath:
+          path: /var/cache/kernel/headers
+      - name: usr-host
+        hostPath:
+          path: /usr
+{{- end }}
       containers:
       - name: captured
         securityContext:
@@ -152,6 +177,33 @@ spec:
         args:
         - /bin/sleep
         - infinity
+{{- if $.Values.profilingMode }}
+      - name: perf
+        image: {{ $.Values.perfImage }}
+        imagePullPolicy: Always
+        securityContext:
+          privileged: true
+          capabilities:
+            add:
+              - SYS_ADMIN
+              - SYS_PTRACE
+        command: ["/bin/bash"]
+        args: ["-c", "./setup-node-for-profiling.sh :{{ $.Values.nodeExporterPort }}"]
+        ports:
+        - containerPort: {{ $.Values.nodeExporterPort }}
+          protocol: TCP
+        volumeMounts:
+          - mountPath: /sys
+            name: sys
+          - mountPath: /etc/lsb-release.host
+            name: lsb-release
+          - mountPath: /lib/modules
+            name: modules-generated
+          - mountPath: /usr/src
+            name: headers-generated
+          - mountPath: /usr-host
+            name: usr-host
+{{- end }}
       - name: uncaptured
         securityContext:
           runAsUser: 1

@@ -43,5 +43,7 @@ client: # client overrides
 
 cert: false
 interceptionMode: REDIRECT
-
+profilingMode: true
+perfImage: oschaaf/istio-tools:profiling
+nodeExporterPort: 9100
 namespace: ""
@@ -0,0 +1,21 @@
+FROM ubuntu:18.04
+
+WORKDIR /root
+
+COPY perf/setup-node-for-profiling.sh setup-node-for-profiling.sh
+
+RUN apt update && \
+  apt install -y git gcc make curl wget libelf-dev bc bpfcc-tools \
+    bison flex \
+    libdw-dev systemtap-sdt-dev libunwind-dev  libaudit-dev \
+    libssl-dev libslang2-dev libgtk2.0-dev libperl-dev python-dev && \
+  chmod +x setup-node-for-profiling.sh && \
+  wget -qO- https://github.com/prometheus/node_exporter/releases/download/v0.18.1/node_exporter-0.18.1.linux-amd64.tar.gz | tar -C . -xvzf - && \
+  cp node_exporter-*/node_exporter /usr/bin/ && \
+  rm -rf node_exporter-* && \
+  git clone --depth=1 https://github.com/BrendanGregg/FlameGraph && \
+  rm -rf /var/lib/apt/lists/* && \
+  rm -rf /tmp/*
+
+CMD ["setup-node-for-profiling.sh"]
+
diff --git a/perf/docker/perf/setup-node-for-profiling.sh b/perf/docker/perf/setup-node-for-profiling.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+set -ex
+
+USR_SRC="/usr/src"
+KERNEL_VERSION="$(uname -r)"
+CHROMEOS_RELEASE_VERSION="$(grep 'CHROMEOS_RELEASE_VERSION' /etc/lsb-release.host | cut -d '=' -f 2)"
+
+build_kernel()
+{
+  # Build the headers
+  cd "${WORKING_DIR}"
+  zcat /proc/config.gz > .config
+  make ARCH=x86 oldconfig > /dev/null
+  make ARCH=x86 prepare > /dev/null
+
+  # Build perf
+  cd tools/perf/
+  make ARCH=x86  > /dev/null
+  mv perf /usr/sbin/
+}
+
+prepare_node()
+{
+  WORKING_DIR="/linux-lakitu-${CHROMEOS_RELEASE_VERSION}"
+  SOURCES_DIR="${USR_SRC}/linux-lakitu-${CHROMEOS_RELEASE_VERSION}"
+  mkdir -p "${WORKING_DIR}"
+  curl -s "https://storage.googleapis.com/cos-tools/${CHROMEOS_RELEASE_VERSION}/kernel-src.tar.gz" \
+    | tar -xzf - -C "${WORKING_DIR}"
+  build_kernel
+  rm -rf "${USR_SRC}${WORKING_DIR}"
+  mv "${WORKING_DIR}" "${USR_SRC}"
+}
+
+prepare_node
+mkdir -p "/lib/modules/${KERNEL_VERSION}"
+ln -sf "${SOURCES_DIR}" "/lib/modules/${KERNEL_VERSION}/source"
+ln -sf "${SOURCES_DIR}" "/lib/modules/${KERNEL_VERSION}/build"
+
+# fire up the node exporter process, listening at the passed in address:port
+node_exporter --web.listen-address $1
+