Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: enable iptables modify nat output chain to redirect outbound traffic by init container #36

Merged
merged 1 commit into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 42 additions & 42 deletions artifacts/scripts/proxy-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,27 @@ if [ ! -f "${SA_DIR}/token" ]; then
fi

# Remove the old chains, to generate new configs.
iptables -t nat -D PREROUTING -p tcp -j ctrlmesh_PROXY_INBOUND 2>/dev/null
iptables -t mangle -D PREROUTING -p tcp -j ctrlmesh_PROXY_INBOUND 2>/dev/null
iptables -t nat -D OUTPUT -p tcp -j ctrlmesh_PROXY_OUTPUT 2>/dev/null
iptables -t nat -D PREROUTING -p tcp -j CTRLMESH_INBOUND 2>/dev/null
iptables -t mangle -D PREROUTING -p tcp -j CTRLMESH_INBOUND 2>/dev/null
iptables -t nat -D OUTPUT -p tcp -j CTRLMESH_OUTPUT 2>/dev/null

# Flush and delete the ctrlmesh chains.
iptables -t nat -F ctrlmesh_PROXY_OUTPUT 2>/dev/null
iptables -t nat -X ctrlmesh_PROXY_OUTPUT 2>/dev/null
iptables -t nat -F ctrlmesh_PROXY_INBOUND 2>/dev/null
iptables -t nat -X ctrlmesh_PROXY_INBOUND 2>/dev/null
iptables -t mangle -F ctrlmesh_PROXY_INBOUND 2>/dev/null
iptables -t mangle -X ctrlmesh_PROXY_INBOUND 2>/dev/null
iptables -t mangle -F ctrlmesh_PROXY_DIVERT 2>/dev/null
iptables -t mangle -X ctrlmesh_PROXY_DIVERT 2>/dev/null
iptables -t mangle -F ctrlmesh_PROXY_TPROXY 2>/dev/null
iptables -t mangle -X ctrlmesh_PROXY_TPROXY 2>/dev/null
iptables -t nat -F CTRLMESH_OUTPUT 2>/dev/null
iptables -t nat -X CTRLMESH_OUTPUT 2>/dev/null
iptables -t nat -F CTRLMESH_INBOUND 2>/dev/null
iptables -t nat -X CTRLMESH_INBOUND 2>/dev/null
iptables -t mangle -F CTRLMESH_INBOUND 2>/dev/null
iptables -t mangle -X CTRLMESH_INBOUND 2>/dev/null
iptables -t mangle -F CTRLMESH_DIVERT 2>/dev/null
iptables -t mangle -X CTRLMESH_DIVERT 2>/dev/null
iptables -t mangle -F CTRLMESH_TPROXY 2>/dev/null
iptables -t mangle -X CTRLMESH_TPROXY 2>/dev/null

# Must be last, the others refer to it
iptables -t nat -F ctrlmesh_PROXY_REDIRECT 2>/dev/null
iptables -t nat -X ctrlmesh_PROXY_REDIRECT 2>/dev/null
iptables -t nat -F ctrlmesh_PROXY_IN_REDIRECT 2>/dev/null
iptables -t nat -X ctrlmesh_PROXY_IN_REDIRECT 2>/dev/null
iptables -t nat -F CTRLMESH_REDIRECT 2>/dev/null
iptables -t nat -X CTRLMESH_REDIRECT 2>/dev/null
iptables -t nat -F CTRLMESH_IN_REDIRECT 2>/dev/null
iptables -t nat -X CTRLMESH_IN_REDIRECT 2>/dev/null

if [ "${1:-}" = "clean" ]; then
echo "Only cleaning, no new rules added"
Expand Down Expand Up @@ -70,13 +70,13 @@ set -o pipefail
set -x # echo on

# Create a new chain for redirecting outbound traffic to the apiserver port.
# In both chains, '-j RETURN' bypasses Proxy and '-j ctrlmesh_PROXY_REDIRECT' redirects to Proxy.
iptables -t nat -N ctrlmesh_PROXY_REDIRECT
iptables -t nat -A ctrlmesh_PROXY_REDIRECT -p tcp -j REDIRECT --to-port "${PROXY_APISERVER_PORT}"
# In both chains, '-j RETURN' bypasses Proxy and '-j CTRLMESH_REDIRECT' redirects to Proxy.
iptables -t nat -N CTRLMESH_REDIRECT
iptables -t nat -A CTRLMESH_REDIRECT -p tcp -j REDIRECT --to-port "${PROXY_APISERVER_PORT}"

# Use this chain also for redirecting inbound traffic to the webhook port when not using TPROXY.
iptables -t nat -N ctrlmesh_PROXY_IN_REDIRECT
iptables -t nat -A ctrlmesh_PROXY_IN_REDIRECT -p tcp -j REDIRECT --to-port "${PROXY_WEBHOOK_PORT}"
iptables -t nat -N CTRLMESH_IN_REDIRECT
iptables -t nat -A CTRLMESH_IN_REDIRECT -p tcp -j REDIRECT --to-port "${PROXY_WEBHOOK_PORT}"

# Handling of inbound ports. Traffic will be redirected to Proxy, which will process and forward
# to the local webhook. If not set, no inbound port will be intercepted by the iptables.
Expand All @@ -85,56 +85,56 @@ if [ -n "${INBOUND_WEBHOOK_PORT}" ]; then
# When using TPROXY, create a new chain for routing all inbound traffic to
# Proxy. Any packet entering this chain gets marked with the ${INBOUND_TPROXY_MARK} mark,
# so that they get routed to the loopback interface in order to get redirected to Proxy.
# In the ctrlmesh_PROXY_INBOUND chain, '-j ctrlmesh_PROXY_DIVERT' reroutes to the loopback
# In the CTRLMESH_INBOUND chain, '-j CTRLMESH_DIVERT' reroutes to the loopback
# interface.
# Mark all inbound packets.
iptables -t mangle -N ctrlmesh_PROXY_DIVERT
iptables -t mangle -A ctrlmesh_PROXY_DIVERT -j MARK --set-mark "${INBOUND_TPROXY_MARK}"
iptables -t mangle -A ctrlmesh_PROXY_DIVERT -j ACCEPT
iptables -t mangle -N CTRLMESH_DIVERT
iptables -t mangle -A CTRLMESH_DIVERT -j MARK --set-mark "${INBOUND_TPROXY_MARK}"
iptables -t mangle -A CTRLMESH_DIVERT -j ACCEPT

# Route all packets marked in chain ctrlmesh_PROXY_DIVERT using routing table ${INBOUND_TPROXY_ROUTE_TABLE}.
# Route all packets marked in chain CTRLMESH_DIVERT using routing table ${INBOUND_TPROXY_ROUTE_TABLE}.
ip -f inet rule add fwmark "${INBOUND_TPROXY_MARK}" lookup "${INBOUND_TPROXY_ROUTE_TABLE}"
# In routing table ${INBOUND_TPROXY_ROUTE_TABLE}, create a single default rule to route all traffic to
# the loopback interface.
ip -f inet route add local default dev lo table "${INBOUND_TPROXY_ROUTE_TABLE}" || ip route show table all

# Create a new chain for redirecting inbound traffic to the common Envoy
# port.
# In the ctrlmesh_PROXY_INBOUND chain, '-j RETURN' bypasses Envoy and
# '-j ctrlmesh_PROXY_TPROXY' redirects to Envoy.
iptables -t mangle -N ctrlmesh_PROXY_TPROXY
iptables -t mangle -A ctrlmesh_PROXY_TPROXY ! -d 127.0.0.1/32 -p tcp -j TPROXY --tproxy-mark "${INBOUND_TPROXY_MARK}"/0xffffffff --on-port "${PROXY_PORT}"
# In the CTRLMESH_INBOUND chain, '-j RETURN' bypasses Envoy and
# '-j CTRLMESH_TPROXY' redirects to Envoy.
iptables -t mangle -N CTRLMESH_TPROXY
iptables -t mangle -A CTRLMESH_TPROXY ! -d 127.0.0.1/32 -p tcp -j TPROXY --tproxy-mark "${INBOUND_TPROXY_MARK}"/0xffffffff --on-port "${PROXY_PORT}"

table=mangle
else
table=nat
fi
iptables -t "${table}" -N ctrlmesh_PROXY_INBOUND
iptables -t "${table}" -A PREROUTING -p tcp -j ctrlmesh_PROXY_INBOUND
iptables -t "${table}" -N CTRLMESH_INBOUND
iptables -t "${table}" -A PREROUTING -p tcp -j CTRLMESH_INBOUND

if [ "${INBOUND_INTERCEPTION_MODE}" = "TPROXY" ]; then
iptables -t mangle -A ctrlmesh_PROXY_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -m socket -j ctrlmesh_PROXY_DIVERT || echo "No socket match support"
iptables -t mangle -A ctrlmesh_PROXY_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -m socket -j ctrlmesh_PROXY_DIVERT || echo "No socket match support"
iptables -t mangle -A ctrlmesh_PROXY_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -j ctrlmesh_PROXY_TPROXY
iptables -t mangle -A CTRLMESH_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -m socket -j CTRLMESH_DIVERT || echo "No socket match support"
iptables -t mangle -A CTRLMESH_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -m socket -j CTRLMESH_DIVERT || echo "No socket match support"
iptables -t mangle -A CTRLMESH_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -j CTRLMESH_TPROXY
else
iptables -t nat -A ctrlmesh_PROXY_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -j ctrlmesh_PROXY_IN_REDIRECT
iptables -t nat -A CTRLMESH_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -j CTRLMESH_IN_REDIRECT
fi
fi

# Create a new chain for selectively redirecting outbound packets to Proxy.
iptables -t nat -N ctrlmesh_PROXY_OUTPUT
iptables -t nat -N CTRLMESH_OUTPUT

# Jump to the ctrlmesh_PROXY_OUTPUT chain from OUTPUT chain for all tcp traffic.
iptables -t nat -A OUTPUT -p tcp -j ctrlmesh_PROXY_OUTPUT
# Jump to the CTRLMESH_OUTPUT chain from OUTPUT chain for all tcp traffic.
iptables -t nat -A OUTPUT -p tcp -j CTRLMESH_OUTPUT

for uid in ${PROXY_UID}; do
# Avoid infinite loops. Don't redirect Proxy traffic directly back to
# Proxy for non-loopback traffic.
iptables -t nat -A ctrlmesh_PROXY_OUTPUT -m owner --uid-owner "${uid}" -j RETURN
iptables -t nat -A CTRLMESH_OUTPUT -m owner --uid-owner "${uid}" -j RETURN
done

# Redirect all apiserver outbound traffic to Proxy.
iptables -t nat -A ctrlmesh_PROXY_OUTPUT -d "${KUBERNETES_SERVICE_HOST}" -j ctrlmesh_PROXY_REDIRECT
iptables -t nat -A CTRLMESH_OUTPUT -d "${KUBERNETES_SERVICE_HOST}" -j CTRLMESH_REDIRECT

# Generate certs
mount -o remount,rw "${SA_DIR}"
Expand Down
12 changes: 7 additions & 5 deletions pkg/apis/ctrlmesh/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ const (

// Labels
const (
CtrlmeshControlPrefix = "ctrlmesh.kusionstack.io/"
CtrlmeshIgnoreWebhookLabel = "ctrlmesh.kusionstack.io/ignore-webhook"
CtrlmeshIgnoreValidateLabel = "ctrlmesh.kusionstack.io/ignore-validate"
CtrlmeshDefaultReplicasLabel = "ctrlmesh.kusionstack.io/default-replicas"
CtrlmeshEnableProxyLabel = "ctrlmesh.kusionstack.io/enable-proxy"
CtrlmeshControlPrefix = "ctrlmesh.kusionstack.io/"
CtrlmeshIgnoreWebhookLabel = "ctrlmesh.kusionstack.io/ignore-webhook"
CtrlmeshIgnoreValidateLabel = "ctrlmesh.kusionstack.io/ignore-validate"
CtrlmeshDefaultReplicasLabel = "ctrlmesh.kusionstack.io/default-replicas"
CtrlmeshEnableProxyLabel = "ctrlmesh.kusionstack.io/enable-proxy"
CtrlmeshEnableIptableMode = "ctrlmesh.kusionstack.io/enable-iptables"

CtrlmeshAutoShardingRootLabel = "ctrlmesh.kusionstack.io/auto-sharding-root"
CtrlmeshInRollingLabel = "ctrlmesh.kusionstack.io/rolling"
CtrlmeshDisableFakeKubeconfigArgLabel = "ctrlmesh.kusionstack.io/disable-fake-kubeconfig-arg"
Expand Down
53 changes: 51 additions & 2 deletions pkg/cmd/proxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (

"github.com/KusionStack/controller-mesh/pkg/apis/ctrlmesh/constants"
"github.com/KusionStack/controller-mesh/pkg/client"

proxyapiserver "github.com/KusionStack/controller-mesh/pkg/proxy/apiserver"
proxycache "github.com/KusionStack/controller-mesh/pkg/proxy/cache"
"github.com/KusionStack/controller-mesh/pkg/proxy/circuitbreaker"
Expand All @@ -56,6 +55,8 @@ var (
webhookCertDir = flag.String(constants.ProxyWebhookCertDirFlag, "", "The directory where the webhook certs generated or mounted.")

proxyIptablePort = flag.Int(constants.ProxyIptablesFlag, constants.ProxyIptablesPort, "port that http-tproxy listens on")

enableIpTable = os.Getenv(constants.EnvIPTable) == "true"
)

func main() {
Expand All @@ -66,7 +67,17 @@ func main() {
klog.Fatalf("Environment %s=%s %s=%s not exist.",
constants.EnvPodNamespace, os.Getenv(constants.EnvPodNamespace), constants.EnvPodName, os.Getenv(constants.EnvPodName))
}
cfg := ctrl.GetConfigOrDie()
var cfg *rest.Config

if enableIpTable {
var err error
cfg, err = getRestConfig()
if err != nil {
klog.Fatalf("Failed to get rest config: %v", err)
}
} else {
cfg = ctrl.GetConfigOrDie()
}
cfg.UserAgent = "ctrlmesh"
if err := client.NewRegistry(cfg); err != nil {
klog.Fatalf("Failed to new client registry: %v", err)
Expand Down Expand Up @@ -165,3 +176,41 @@ func serveHTTP(ctx context.Context, readyHandler *healthz.Handler) {
klog.Fatalf("Serve HTTP shutting down on :%d: %v", *metricsHealthPort, err)
}
}

func getRestConfig() (*rest.Config, error) {
const (
tokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token"
//rootCAFile = "/var/run/secrets/kubernetes.io/serviceaccount/..data/ca.crt"
)
host, port := os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT")
if len(host) == 0 || len(port) == 0 {
return nil, rest.ErrNotInCluster
}

token, err := os.ReadFile(tokenFile)
if err != nil {
return nil, err
}

tlsClientConfig := rest.TLSClientConfig{Insecure: true}

//if _, err := certutil.NewPool(rootCAFile); err != nil {
// klog.Errorf("Expected to load root CA config from %s, but got err: %v", rootCAFile, err)
//} else {
// tlsClientConfig.CAFile = rootCAFile
//}

cfg := &rest.Config{
// TODO: switch to using cluster DNS.
Host: "https://" + net.JoinHostPort(host, port),
TLSClientConfig: tlsClientConfig,
BearerToken: string(token),
BearerTokenFile: tokenFile,

Burst: 3000,
QPS: 2000.0,
}
klog.V(3).Infof("Starting with rest config: %v", utils.DumpJSON(cfg))

return cfg, nil
}
20 changes: 9 additions & 11 deletions pkg/webhook/pod/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func (h *MutatingHandler) injectByShardingConfig(ctx context.Context, pod *v1.Po
if *proxyImage == "" {
return fmt.Errorf("the images for ControllerMesh init or proxy container have not set in args")
}

enableIpTable := pod.Labels[ctrlmesh.CtrlmeshEnableIptableMode] == "true"
imagePullPolicy := v1.PullAlways
if *proxyImagePullPolicy == string(v1.PullIfNotPresent) {
imagePullPolicy = v1.PullIfNotPresent
Expand Down Expand Up @@ -177,6 +177,13 @@ func (h *MutatingHandler) injectByShardingConfig(ctx context.Context, pod *v1.Po
},
}

if enableIpTable {
proxyContainer.Env = append(proxyContainer.Env, v1.EnvVar{
Name: constants.EnvIPTable,
Value: "true",
})
}

if val, ok := pod.Annotations[ctrlmesh.CtrlmeshProxyContainerResourceAnno]; ok {
req := &v1.ResourceRequirements{}
if err := json.Unmarshal([]byte(val), req); err != nil {
Expand Down Expand Up @@ -213,15 +220,6 @@ func (h *MutatingHandler) injectByShardingConfig(ctx context.Context, pod *v1.Po
proxyContainer.Env = append(proxyContainer.Env, apiserverHostPortEnvs...)
}

ipTableEnvs := getEnv(pod, constants.EnvIPTable)
enableIpTable := false
if len(ipTableEnvs) > 0 {
initContainer.Env = append(initContainer.Env, ipTableEnvs...)
//proxyContainer.Env = append(proxyContainer.Env, ipTableEnvs...)
if ipTableEnvs[0].Value == "true" {
enableIpTable = true
}
}
if !enableIpTable {
if err := h.applyFakeConfigMap(pod); err != nil {
return err
Expand Down Expand Up @@ -271,7 +269,7 @@ func (h *MutatingHandler) injectByShardingConfig(ctx context.Context, pod *v1.Po
proxyContainer.VolumeMounts = append(proxyContainer.VolumeMounts, certVolumeMounts[0])
}
}
if *initImage != "" {
if enableIpTable && *initImage != "" {
pod.Spec.InitContainers = append([]v1.Container{*initContainer}, pod.Spec.InitContainers...)
}
if pod.Labels == nil {
Expand Down
Loading