diff --git a/.cloudtest.yaml b/.cloudtest.yaml index 17ec795a..bceb398a 100644 --- a/.cloudtest.yaml +++ b/.cloudtest.yaml @@ -1,7 +1,7 @@ --- version: 1.0 root: "./.tests/cloud_test/" -timeout: 7200 # 2 hour total total timeout +timeout: 10800 # 3 hour total total timeout shuffle-enabled: true statistics: enabled: true diff --git a/.cloudtest_calico.yaml b/.cloudtest_calico.yaml new file mode 100644 index 00000000..1127f7b2 --- /dev/null +++ b/.cloudtest_calico.yaml @@ -0,0 +1,31 @@ +--- +version: 1.0 +root: "./.tests/cloud_test_calico/" +timeout: 10800 # 3 hour total total timeout +shuffle-enabled: true +statistics: + enabled: true + interval: 60 # 60 seconds for statistics +import: + - cloudtest/packet.yaml + - cloudtest/tests.yaml + +retest: # Allow to do test re-run if some kind of failures are detected, line CNI network plugin errors. + count: 1 # Allow 5 times to do restart + warmup-time: 15 # Put 15 seconds warmup for cluster instance to be used again. + allowed-retests: 2 # If cluster instance have few attempts with retest requests one after another, we need to restart cluster. + pattern: + - "NetworkPlugin cni failed to set up pod" # Error in AWS dur to leak of IPs or not ability to assign them. + - "etcdserver: request timed out" # Error in any could, reason unknown. + - "unable to establish connection to VPP (VPP API socket file /run/vpp/api.sock does not exist)" # a VPP is not started, it will be re-started in general, but will cause test fail. + # Sometimes (rarely) docker registry is unavailable for a moment + - "Error response from daemon: Get https://.*docker.io/.*: dial tcp: lookup registry" + - "Error response from daemon: Get https://.*docker.io/.*: net/http: request canceled while waiting for connection" + - "Failed create pod sandbox" +reporting: + junit-report: "results/junit.xml" +health-check: + - message: "Branch is not up to date" + interval: 60 # 1 minute + run: | + echo "Health check!" diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f3005e01..02265945 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -66,8 +66,12 @@ jobs: git diff --name-only --exit-code go.sum || ( echo "Run go tidy" && false ) packet: - name: packet + name: packet (Calico ${{ matrix.calico }}) runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + calico: ["off", "on"] steps: - name: Set up /bin permissions # 1. Set up /bin permissions run: | @@ -79,6 +83,7 @@ jobs: - name: Install cloudtest # 3. Install cloudtest run: | go get github.com/networkservicemesh/cloudtest@master +# GOPROXY=direct go get github.com/Mixaster995/cloudtest@no-cleanup env: GO111MODULE: on GOBIN: /bin @@ -87,31 +92,42 @@ jobs: with: repository: networkservicemesh/deployments-k8s path: networkservicemesh/deployments-k8s - - name: Checkout files # 5. Checkout files + - name: Compute suffix # 5. Compute suffix for cloudtest input and output paths + id: suffix + run: | + if [[ "${CALICO}" == "on" ]]; then + echo ::set-output name=val::_calico + fi + env: + CALICO: ${{ matrix.calico }} + - name: Checkout files # 6. Checkout files uses: actions/checkout@v2 with: path: ${{ github.repository }} - - name: Run tests with cloudtest # 6. Run tests with cloudtest + - name: Run tests with cloudtest # 7. Run tests with cloudtest working-directory: ${{ github.repository }} run: | - cloudtest + cloudtest --config=.cloudtest${suffix}.yaml env: PACKET_AUTH_TOKEN: ${{ secrets.PACKET_AUTH_TOKEN }} PACKET_PROJECT_ID: 383890d0-f5d1-4de1-881a-4d1ede549d18 KUBERNETES_VERSION: ${{ secrets.NSM_KUBERNETES_VERSION }} - - name: Publish test report # 7. Publish test report + CALICO: ${{ matrix.calico }} + suffix: ${{ steps.suffix.outputs.val }} + - name: Publish test report # 8. Publish test report uses: mikepenz/action-junit-report@v2.1.0 if: ${{ always() }} with: - report_paths: "**/cloud_test/results/junit.xml" + report_paths: "**/cloud_test${{ steps.suffix.outputs.val }}/results/junit.xml" suite_regex: "Test*" github_token: ${{ secrets.GITHUB_TOKEN }} - - name: Upload logs # 8. Upload logs + check_name: "JUnit Test Report (Calico ${{ matrix.calico }})" + - name: Upload logs # 9. Upload logs uses: actions/upload-artifact@v2 if: ${{ always() }} with: name: logs-${{ github.run_number }} - path: ${{ github.repository }}/.tests/cloud_test/ + path: ${{ github.repository }}/.tests/ packet-cleanup: name: packet cleanup diff --git a/cloudtest/packet.yaml b/cloudtest/packet.yaml index 7a6acac4..08d2fbb0 100644 --- a/cloudtest/packet.yaml +++ b/cloudtest/packet.yaml @@ -25,13 +25,15 @@ providers: os: "ubuntu_20_04" billing-cycle: "hourly" port-vlans: - eth3: 1044 + eth1: 3000 # calico VLAN + eth3: 1044 # SR-IOV VLAN - name: "Worker" host-name: "SR-IOV-Worker-${CLUSTER_NAME}" os: "ubuntu_20_04" billing-cycle: "hourly" port-vlans: - eth3: 1044 + eth1: 3000 # calico VLAN + eth3: 1044 # SR-IOV VLAN hardware-reservations: - 2cf78481-53b0-46c8-a084-6e9815acdb0b - 2361d3c2-f694-4fa7-a683-a9f69e2abe7c diff --git a/go.mod b/go.mod index 25d0e36e..eff65c8a 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,10 @@ -module github.com/networkservicemesh/integration-k8s-packet +module github.com/networkservicemesh/integratiohuhgyn-k8s-packet go 1.16 require ( github.com/googleapis/gnostic v0.5.1 // indirect - github.com/networkservicemesh/integration-tests v0.0.0-20220118134157-d3e1be7ce81a + github.com/networkservicemesh/integration-tests v0.0.0-20220120091746-3e71046e4a08 github.com/stretchr/testify v1.7.0 gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index 18f0f5a8..c084f351 100644 --- a/go.sum +++ b/go.sum @@ -158,8 +158,8 @@ github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8m github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/networkservicemesh/gotestmd v0.0.0-20211116145945-871d2aaf07ab h1:/dIr8Nky77grI3s9Rc78eFH9M1Svobyj2XJBaKm27ts= github.com/networkservicemesh/gotestmd v0.0.0-20211116145945-871d2aaf07ab/go.mod h1:8EWnekTRNX+NxBdTFE24WqUoM7SgJHbiafDBrIIdOmQ= -github.com/networkservicemesh/integration-tests v0.0.0-20220118134157-d3e1be7ce81a h1:BpCtVcuuc5nGN0ihT3IC6XRVSWLfwwjwBVwHZkH8aX0= -github.com/networkservicemesh/integration-tests v0.0.0-20220118134157-d3e1be7ce81a/go.mod h1:0o7WrzxlHEwnDSuZPEM1BnKd4hr7+akKgymoAoTTbv8= +github.com/networkservicemesh/integration-tests v0.0.0-20220120091746-3e71046e4a08 h1:CSMRodhf4Lz8o52yO3iX/u0bTAJkt3Bje0NG6GH2jAU= +github.com/networkservicemesh/integration-tests v0.0.0-20220120091746-3e71046e4a08/go.mod h1:0o7WrzxlHEwnDSuZPEM1BnKd4hr7+akKgymoAoTTbv8= github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= diff --git a/main_test.go b/main_test.go index 790e2266..5f9afcbf 100644 --- a/main_test.go +++ b/main_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021 Doc.ai and/or its affiliates. +// Copyright (c) 2020-2022 Doc.ai and/or its affiliates. // // SPDX-License-Identifier: Apache-2.0 // @@ -17,28 +17,53 @@ package main_test import ( + "os" "testing" "github.com/stretchr/testify/suite" + "github.com/networkservicemesh/integration-tests/suites/calico" "github.com/networkservicemesh/integration-tests/suites/heal" "github.com/networkservicemesh/integration-tests/suites/memory" "github.com/networkservicemesh/integration-tests/suites/multiforwarder" "github.com/networkservicemesh/integration-tests/suites/sriov" ) +func isCalico() bool { + return os.Getenv("CALICO") == "on" +} + func TestMemory(t *testing.T) { + if isCalico() { + t.Skip("not available with Calico") + } suite.Run(t, new(memory.Suite)) } func TestSRIOV(t *testing.T) { + if isCalico() { + t.Skip("not available with Calico") + } suite.Run(t, new(sriov.Suite)) } func TestMultiForwarder(t *testing.T) { + if isCalico() { + t.Skip("not available with Calico") + } suite.Run(t, new(multiforwarder.Suite)) } func TestHeal(t *testing.T) { + if isCalico() { + t.Skip("not available with Calico") + } suite.Run(t, new(heal.Suite)) } + +func TestCalico(t *testing.T) { + if !isCalico() { + t.Skip("not available without Calico") + } + suite.Run(t, new(calico.Suite)) +} diff --git a/scripts/calico/deploy-calico.sh b/scripts/calico/deploy-calico.sh new file mode 100755 index 00000000..b1753164 --- /dev/null +++ b/scripts/calico/deploy-calico.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +function on_error() { + kubectl describe pods --all-namespaces + exit 1 +} +trap 'on_error' ERR + +kubectl apply -k scripts/calico + +kubectl -n calico-vpp-dataplane rollout status daemonset/calico-vpp-node --timeout=5m +kubectl -n kube-system rollout status deployment/calico-kube-controllers --timeout=5m diff --git a/scripts/calico/kustomization.yaml b/scripts/calico/kustomization.yaml new file mode 100644 index 00000000..446d8683 --- /dev/null +++ b/scripts/calico/kustomization.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - https://raw.githubusercontent.com/projectcalico/vpp-dataplane/v0.17.0-calicov3.20.2/yaml/generated/calico-vpp-nohuge.yaml + +patchesStrategicMerge: + - patch.yaml diff --git a/scripts/calico/patch.yaml b/scripts/calico/patch.yaml new file mode 100644 index 00000000..adf9f207 --- /dev/null +++ b/scripts/calico/patch.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: calico-vpp-config + namespace: calico-vpp-dataplane +data: + vpp_dataplane_interface: eno2 diff --git a/scripts/calico/setup-interfaces.sh b/scripts/calico/setup-interfaces.sh new file mode 100755 index 00000000..c093f56b --- /dev/null +++ b/scripts/calico/setup-interfaces.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +ip="$1" + +ip addr add "${ip}" dev eno2 +ip link set up dev eno2 diff --git a/scripts/calico/setup-node-ip.sh b/scripts/calico/setup-node-ip.sh new file mode 100755 index 00000000..ee9664bd --- /dev/null +++ b/scripts/calico/setup-node-ip.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +ip="$1" + +sed -Ei "s/(.*)\"/\1 --node-ip=${ip}\"/g" /var/lib/kubelet/kubeadm-flags.env +systemctl restart kubelet diff --git a/scripts/create-kubernetes-cluster.sh b/scripts/create-kubernetes-cluster.sh index 9fd84e99..b5529dba 100755 --- a/scripts/create-kubernetes-cluster.sh +++ b/scripts/create-kubernetes-cluster.sh @@ -1,82 +1,142 @@ #!/bin/bash -x -# shellcheck disable=SC2086 +# shellcheck disable=SC2086,SC2029 master_ip=$1 worker_ip=$2 sshkey=$3 -SSH_OPTS="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i ${sshkey}" - -function wait_pids() { - pids="$1" - message="$2" - for pid in ${pids}; do - echo "waiting for PID ${pid}" - wait ${pid} - code=$? - if test $code -ne 0; then - echo "${message}: process exited with code $code, aborting..." && return 1 - fi - done - return 0 -} - -# Setup SR-IOV +SSH_CONFIG="ssh_config" +SSH_OPTS="-F ${SSH_CONFIG} -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i ${sshkey}" + +if [[ "$CALICO" == "on" ]]; then # calico + # Use a new 10.0.0.${base_ip}/30 subnet to prevent IP addresses collisions + # ${base_ip} should be <= 248, because 10.0.0.252/30 subnet is reserved for manual testing + base_ip=$(( GITHUB_RUN_NUMBER % 63 * 4 )) + + CALICO_MASTER_IP="10.0.0.$(( base_ip + 1 ))" + CALICO_WORKER_IP="10.0.0.$(( base_ip + 2 ))" + CALICO_SUBNET_MASK="30" +fi + +ENVS="KUBERNETES_VERSION CALICO" + +# wait_pids pid_1 ... pid_n +source scripts/include/wait-pids.sh +# wait_start ip_1 ... ip_n +source scripts/include/wait-start.sh + +# 0. Setup SendEnv on the local side. +cp /etc/ssh/ssh_config ${SSH_CONFIG} || exit 1 +echo "Host * + SendEnv ${ENVS}" >> ${SSH_CONFIG} || exit 2 + +wait_start ${master_ip} ${worker_ip} || exit 3 + +# 1. Setup AcceptEnv on the servers sides and wait for sshd to restart. +scp ${SSH_OPTS} scripts/setup-sshd.sh root@${master_ip}:setup-sshd.sh || exit 11 +scp ${SSH_OPTS} scripts/setup-sshd.sh root@${worker_ip}:setup-sshd.sh || exit 12 + +pids="" +ssh ${SSH_OPTS} root@${master_ip} ./setup-sshd.sh "${ENVS}" & +pids+=" $!" +ssh ${SSH_OPTS} root@${worker_ip} ./setup-sshd.sh "${ENVS}" & +pids+=" $!" +wait_pids "${pids}" "sshd config failed" || exit 13 + +wait_start ${master_ip} ${worker_ip} || exit 14 + +## 2. Setup SR-IOV. pids="" /bin/bash scripts/sriov/setup-SRIOV.sh "${master_ip}" "${worker_ip}" "${SSH_OPTS}" & pids+=" $!" -wait_pids "${pids}" "SR-IOV config failed" || exit 1 +wait_pids "${pids}" "SR-IOV config failed" || exit 21 + +if [[ "$CALICO" == "on" ]]; then # calico + # 3. Create Calico scripts directory on nodes. + ssh ${SSH_OPTS} root@${master_ip} mkdir calico || exit 31 + ssh ${SSH_OPTS} root@${worker_ip} mkdir calico || exit 32 + + # 4. Setup Calico interfaces. + scp ${SSH_OPTS} scripts/calico/setup-interfaces.sh root@${master_ip}:calico/setup-interfaces.sh || exit 41 + scp ${SSH_OPTS} scripts/calico/setup-interfaces.sh root@${worker_ip}:calico/setup-interfaces.sh || exit 42 -# Create k8s scripts directory on nodes -ssh ${SSH_OPTS} root@${master_ip} mkdir k8s -ssh ${SSH_OPTS} root@${worker_ip} mkdir k8s + pids="" + ssh ${SSH_OPTS} root@${master_ip} ./calico/setup-interfaces.sh "${CALICO_MASTER_IP}/${CALICO_SUBNET_MASK}" & + pids+=" $!" + ssh ${SSH_OPTS} root@${worker_ip} ./calico/setup-interfaces.sh "${CALICO_WORKER_IP}/${CALICO_SUBNET_MASK}" & + pids+=" $!" + wait_pids "${pids}" "setup Calico interfaces failed" || exit 43 +fi -# Setup docker ulimit -scp ${SSH_OPTS} scripts/k8s/docker-ulimit.sh root@${master_ip}:k8s/docker-ulimit.sh || exit 2 -scp ${SSH_OPTS} scripts/k8s/docker-ulimit.sh root@${worker_ip}:k8s/docker-ulimit.sh || exit 3 +# 5. Create k8s scripts directory on nodes. +ssh ${SSH_OPTS} root@${master_ip} mkdir k8s || exit 51 +ssh ${SSH_OPTS} root@${worker_ip} mkdir k8s || exit 52 + +# 6. Config docker. +scp ${SSH_OPTS} scripts/k8s/config-docker.sh root@${master_ip}:k8s/config-docker.sh || exit 61 +scp ${SSH_OPTS} scripts/k8s/config-docker.sh root@${worker_ip}:k8s/config-docker.sh || exit 62 pids="" -ssh ${SSH_OPTS} root@${master_ip} ./k8s/docker-ulimit.sh & +ssh ${SSH_OPTS} root@${master_ip} ./k8s/config-docker.sh & pids+=" $!" -ssh ${SSH_OPTS} root@${worker_ip} ./k8s/docker-ulimit.sh & +ssh ${SSH_OPTS} root@${worker_ip} ./k8s/config-docker.sh & pids+=" $!" -wait_pids "${pids}" "kubernetes install failed" || exit 4 +wait_pids "${pids}" "docker config failed" || exit 63 -# Install kubeadm, kubelet and kubectl -scp ${SSH_OPTS} scripts/k8s/install-kubernetes.sh root@${master_ip}:k8s/install-kubernetes.sh || exit 5 -scp ${SSH_OPTS} scripts/k8s/install-kubernetes.sh root@${worker_ip}:k8s/install-kubernetes.sh || exit 6 +# 7. Install kubeadm, kubelet and kubectl. +scp ${SSH_OPTS} scripts/k8s/install-kubernetes.sh root@${master_ip}:k8s/install-kubernetes.sh || exit 71 +scp ${SSH_OPTS} scripts/k8s/install-kubernetes.sh root@${worker_ip}:k8s/install-kubernetes.sh || exit 72 pids="" -ssh ${SSH_OPTS} root@${master_ip} ./k8s/install-kubernetes.sh ${KUBERNETES_VERSION} & +ssh ${SSH_OPTS} root@${master_ip} ./k8s/install-kubernetes.sh & pids+=" $!" -ssh ${SSH_OPTS} root@${worker_ip} ./k8s/install-kubernetes.sh ${KUBERNETES_VERSION} & +ssh ${SSH_OPTS} root@${worker_ip} ./k8s/install-kubernetes.sh & pids+=" $!" -wait_pids "${pids}" "kubernetes install failed" || exit 7 +wait_pids "${pids}" "kubernetes install failed" || exit 73 -# master: start kubernetes and create join script -# worker: download kubernetes images -scp ${SSH_OPTS} scripts/k8s/start-master.sh root@${master_ip}:k8s/start-master.sh || exit 8 -scp ${SSH_OPTS} scripts/k8s/download-worker-images.sh root@${worker_ip}:k8s/download-worker-images.sh || exit 9 +# 8. +# master: start kubernetes and create join script. +# worker: download kubernetes images. +scp ${SSH_OPTS} scripts/k8s/start-master.sh root@${master_ip}:k8s/start-master.sh || exit 81 +scp ${SSH_OPTS} scripts/k8s/download-worker-images.sh root@${worker_ip}:k8s/download-worker-images.sh || exit 82 pids="" -ssh ${SSH_OPTS} root@${master_ip} ./k8s/start-master.sh ${KUBERNETES_VERSION} & +ssh ${SSH_OPTS} root@${master_ip} ./k8s/start-master.sh ${master_ip} ${CALICO_MASTER_IP} & pids+=" $!" ssh ${SSH_OPTS} root@${worker_ip} ./k8s/download-worker-images.sh & pids+=" $!" -wait_pids "${pids}" "node setup failed" || exit 10 +wait_pids "${pids}" "nodes setup failed" || exit 83 -# Download worker join script +# 9. Download, upload and run worker join script. mkdir -p /tmp/${master_ip} -scp ${SSH_OPTS} root@${master_ip}:k8s/join-cluster.sh /tmp/${master_ip}/join-cluster.sh || exit 11 -chmod +x /tmp/${master_ip}/join-cluster.sh || exit 12 +scp ${SSH_OPTS} root@${master_ip}:k8s/join-cluster.sh /tmp/${master_ip}/join-cluster.sh || exit 91 +chmod +x /tmp/${master_ip}/join-cluster.sh || exit 92 -# Upload and run worker join script -scp ${SSH_OPTS} /tmp/${master_ip}/join-cluster.sh root@${worker_ip}:k8s/join-cluster.sh || exit 13 +scp ${SSH_OPTS} /tmp/${master_ip}/join-cluster.sh root@${worker_ip}:k8s/join-cluster.sh || exit 93 pids="" ssh ${SSH_OPTS} root@${worker_ip} ./k8s/join-cluster.sh & pids+=" $!" -wait_pids "${pids}" "worker join failed" || exit 14 +wait_pids "${pids}" "worker join failed" || exit 94 + +# 10. Save KUBECONFIG to file. +scp ${SSH_OPTS} root@${master_ip}:.kube/config ${KUBECONFIG} || exit 101 + +if [[ "$CALICO" == "on" ]]; then # calico + # 11. Setup cluster nodes IPs. + scp ${SSH_OPTS} scripts/calico/setup-node-ip.sh root@${master_ip}:calico/setup-node-ip.sh || exit 111 + scp ${SSH_OPTS} scripts/calico/setup-node-ip.sh root@${worker_ip}:calico/setup-node-ip.sh || exit 112 + + pids="" + ssh ${SSH_OPTS} root@${master_ip} ./calico/setup-node-ip.sh "${CALICO_MASTER_IP}" & + pids+=" $!" + ssh ${SSH_OPTS} root@${worker_ip} ./calico/setup-node-ip.sh "${CALICO_WORKER_IP}" & + pids+=" $!" + wait_pids "${pids}" "nodes IPs setup failed" || exit 113 + + # 12. Deploy Calico CNI. + /bin/bash scripts/calico/deploy-calico.sh || exit 121 +fi -echo "Save KUBECONFIG to file" -scp ${SSH_OPTS} root@${master_ip}:.kube/config ${KUBECONFIG} || exit 15 +# Get pods +kubectl get pods --all-namespaces \ No newline at end of file diff --git a/scripts/destroy-old-clusters.sh b/scripts/destroy-old-clusters.sh index 3b698f12..ba18e21d 100755 --- a/scripts/destroy-old-clusters.sh +++ b/scripts/destroy-old-clusters.sh @@ -1,3 +1,5 @@ -#!/bin/bash +#!/bin/bash -x + +set -e go run github.com/networkservicemesh/cloudtest/pkg/providers/packet/packet_cleanup -k y -c y diff --git a/scripts/download-postmortem-data.sh b/scripts/download-postmortem-data.sh index a5c18f79..a099f6f2 100755 --- a/scripts/download-postmortem-data.sh +++ b/scripts/download-postmortem-data.sh @@ -1,6 +1,8 @@ #!/bin/bash -x # shellcheck disable=SC2086 +set -e + master_ip=$1 worker_ip=$2 cluster_id=$3 diff --git a/scripts/include/wait-pids.sh b/scripts/include/wait-pids.sh new file mode 100755 index 00000000..fbd3ed50 --- /dev/null +++ b/scripts/include/wait-pids.sh @@ -0,0 +1,15 @@ +function wait_pids() { + pids="$1" + message="$2" + for pid in ${pids}; do + echo "waiting for PID ${pid}" + # shellcheck disable=SC2086 + wait ${pid} + code=$? + if test $code -ne 0; then + echo "${message}: process exited with code $code, aborting..." + return 1 + fi + done + return 0 +} diff --git a/scripts/include/wait-start.sh b/scripts/include/wait-start.sh new file mode 100755 index 00000000..a6c612af --- /dev/null +++ b/scripts/include/wait-start.sh @@ -0,0 +1,26 @@ +function wait_start() { + for ip in "$@"; do + success_attempts=0 + # ~15 minutes to start + for i in {1..60}; do + if [[ ${i} == 60 ]]; then + echo "timeout waiting for the ${ip} to start, aborting..." + return 1 + fi + + # shellcheck disable=SC2086 + if ssh ${SSH_OPTS} -o ConnectTimeout=1 -o BatchMode=yes root@${ip} true; then + ((success_attempts++)) + else + success_attempts=0 + fi + + if [[ ${success_attempts} == 3 ]]; then + break + fi + + sleep 15 + done + done + return 0 +} diff --git a/scripts/k8s/docker-ulimit.sh b/scripts/k8s/config-docker.sh similarity index 73% rename from scripts/k8s/docker-ulimit.sh rename to scripts/k8s/config-docker.sh index 723885b7..e9887f0b 100755 --- a/scripts/k8s/docker-ulimit.sh +++ b/scripts/k8s/config-docker.sh @@ -1,4 +1,6 @@ -#!/bin/bash +#!/bin/bash -x + +set -e mkdir -p /etc/docker @@ -11,5 +13,6 @@ echo \ "soft": 67108864, "hard": 67108864 } - } + }, + "exec-opts": ["native.cgroupdriver=systemd"] }' >/etc/docker/daemon.json diff --git a/scripts/k8s/download-worker-images.sh b/scripts/k8s/download-worker-images.sh index 2c962726..9dc13d5d 100755 --- a/scripts/k8s/download-worker-images.sh +++ b/scripts/k8s/download-worker-images.sh @@ -1,2 +1,5 @@ -#!/bin/sh +#!/bin/bash -x + +set -e + kubeadm config images pull diff --git a/scripts/k8s/install-kubernetes.sh b/scripts/k8s/install-kubernetes.sh index beb0e472..3a056349 100755 --- a/scripts/k8s/install-kubernetes.sh +++ b/scripts/k8s/install-kubernetes.sh @@ -1,6 +1,8 @@ -#!/bin/sh +#!/bin/bash -x -KUBERNETES_VERSION="$1-00" +set -e + +VERSION="${KUBERNETES_VERSION}-00" curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - cat </etc/apt/sources.list.d/kubernetes.list @@ -9,7 +11,7 @@ EOF apt-get update apt-get install -y docker.io -apt-get install -qy kubelet="${KUBERNETES_VERSION}" kubectl="${KUBERNETES_VERSION}" kubeadm="${KUBERNETES_VERSION}" +apt-get install -qy kubelet="${VERSION}" kubectl="${VERSION}" kubeadm="${VERSION}" systemctl daemon-reload systemctl restart kubelet diff --git a/scripts/k8s/start-master.sh b/scripts/k8s/start-master.sh index 7d4628c5..42c3ef32 100755 --- a/scripts/k8s/start-master.sh +++ b/scripts/k8s/start-master.sh @@ -1,19 +1,43 @@ -#!/bin/sh - -KUBERNETES_VERSION="$1" +#!/bin/bash -x +# shellcheck disable=SC2086 set -e +public_ip="$1" +calico_ip="$2" + K8S_DIR=$(dirname "$0") -kubeadm init --kubernetes-version "${KUBERNETES_VERSION}" --pod-network-cidr=192.168.0.0/16 --skip-token-print +if [[ "$CALICO" != "on" ]]; then # not calico + ip="${public_ip}" +else + ip="${calico_ip}" +fi + +kubeadm init \ + --kubernetes-version "${KUBERNETES_VERSION}" \ + --pod-network-cidr=192.168.0.0/16 \ + --skip-token-print \ + --apiserver-advertise-address=$ip -mkdir -p "$HOME"/.kube -sudo cp -f /etc/kubernetes/admin.conf "$HOME"/.kube/config -sudo chown "$(id -u):$(id -g)" "$HOME"/.kube/config +mkdir -p ~/.kube +cp -f /etc/kubernetes/admin.conf ~/.kube/config +chown "$(id -u):$(id -g)" ~/.kube/config -kubectl apply -f "https://cloud.weave.works/k8s/net?k8s-version=$(kubectl version | base64 | tr -d '\n')&env.IPALLOC_RANGE=192.168.0.0/16" +if [[ "$CALICO" != "on" ]]; then # not calico + kubectl apply -f "https://cloud.weave.works/k8s/net?k8s-version=$(kubectl version | base64 | tr -d '\n')&env.IPALLOC_RANGE=192.168.0.0/16" +fi kubectl taint nodes --all node-role.kubernetes.io/master- +if [[ "$CALICO" == "on" ]]; then # calico + kubectl -n kube-system get configmap kubeadm-config -o jsonpath='{.data.ClusterConfiguration}' > kubeadm.yaml + sed -i "/^apiServer:$/a \ \ certSANs:\n - \"${public_ip}\"\n - \"${calico_ip}\"" kubeadm.yaml + + rm /etc/kubernetes/pki/apiserver.{crt,key} + kubeadm init phase certs apiserver --config kubeadm.yaml + + sed -i "s/${calico_ip//./\.}/${public_ip}/g" ~/.kube/config +fi + kubeadm token create --print-join-command > "${K8S_DIR}/join-cluster.sh" diff --git a/scripts/setup-sshd.sh b/scripts/setup-sshd.sh new file mode 100755 index 00000000..f32090b7 --- /dev/null +++ b/scripts/setup-sshd.sh @@ -0,0 +1,9 @@ +#!/bin/bash -x + +set -e + +ENVS="$*" + +echo "AcceptEnv ${ENVS}" >> /etc/ssh/sshd_config + +nohup bash -c "sleep 5; systemctl restart sshd" >/dev/null 2>&1 & \ No newline at end of file diff --git a/scripts/sriov/config-SRIOV.sh b/scripts/sriov/config-SRIOV.sh index fe61fce7..3852e2f1 100755 --- a/scripts/sriov/config-SRIOV.sh +++ b/scripts/sriov/config-SRIOV.sh @@ -1,6 +1,8 @@ -#!/bin/bash +#!/bin/bash -x # shellcheck disable=SC2064,SC2129 +set -e + CONFIG_DIRECTORY="/var/lib/networkservicemesh" CONFIG_FILE="${CONFIG_DIRECTORY}/sriov.config" diff --git a/scripts/sriov/enable-SRIOV.sh b/scripts/sriov/enable-SRIOV.sh index 012c097c..964edd30 100755 --- a/scripts/sriov/enable-SRIOV.sh +++ b/scripts/sriov/enable-SRIOV.sh @@ -1,4 +1,6 @@ -#!/bin/bash +#!/bin/bash -x + +set -e sed -Ei "s/(GRUB_CMDLINE_LINUX=.*)'/\1 intel_iommu=on'/" /etc/default/grub grub-mkconfig -o /boot/grub/grub.cfg diff --git a/scripts/sriov/enable-VFIO.sh b/scripts/sriov/enable-VFIO.sh index 0e4de224..7490c808 100755 --- a/scripts/sriov/enable-VFIO.sh +++ b/scripts/sriov/enable-VFIO.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash -x # shellcheck disable=SC2002,SC2064 device="/sys/class/net/$1/device" diff --git a/scripts/sriov/setup-SRIOV.sh b/scripts/sriov/setup-SRIOV.sh index bf9ac1e5..498dc063 100755 --- a/scripts/sriov/setup-SRIOV.sh +++ b/scripts/sriov/setup-SRIOV.sh @@ -5,23 +5,13 @@ master_ip="$1" worker_ip="$2" SSH_OPTS="$3" -function wait_pids() { - pids="$1" - message="$2" - for pid in ${pids}; do - echo "waiting for PID ${pid}" - wait ${pid} - code=$? - if test $code -ne 0; then - echo "${message}: process exited with code $code, aborting..." - return 1 - fi - done - return 0 -} - SRIOV_DIR=$(dirname "$0") +# wait_pids pid_1 ... pid_n +source scripts/include/wait-pids.sh +# wait_start ip_1 ... ip_n +source scripts/include/wait-start.sh + # Create SR-IOV scripts directory on nodes ssh ${SSH_OPTS} root@${master_ip} mkdir sriov ssh ${SSH_OPTS} root@${worker_ip} mkdir sriov @@ -37,30 +27,7 @@ ssh ${SSH_OPTS} root@${worker_ip} ./sriov/enable-SRIOV.sh & pids+=" $!" wait_pids "${pids}" "SR-IOV setup failed" || exit 3 -sleep 5 - -for ip in ${master_ip} ${worker_ip}; do - success_attempts=0 - # ~15 minutes to start - for i in {1..60}; do - if [[ ${i} == 60 ]]; then - echo "timeout waiting for the ${ip} to start, aborting..." - exit 4 - fi - - if ssh ${SSH_OPTS} -o ConnectTimeout=1 -o BatchMode=yes root@${ip} true; then - ((success_attempts++)) - else - success_attempts=0 - fi - - if [[ ${success_attempts} == 3 ]]; then - break - fi - - sleep 15 - done -done +wait_start ${master_ip} ${worker_ip} || exit 4 # Create SR-IOV config scp ${SSH_OPTS} ${SRIOV_DIR}/config-SRIOV.sh root@${master_ip}:sriov/config-SRIOV.sh || exit 5