From bd50586f8b8e5f6255746e6ba85bd8d1aac98255 Mon Sep 17 00:00:00 2001 From: Zenghui Shi Date: Thu, 6 Apr 2023 18:52:00 +0800 Subject: [PATCH 1/2] Multinode: ovn-kubernetes This commit is the first step to introduce multinode capability in microshift, focusing on networking part. - add a hidden flag --controlplane - split ovnk manifests to common, single-node and multi-node - apply multi-node manifests for ovnk - change cluster mtu based on --controlplane flag To run microshift in multinode mode: $ microshift run --controlplane Signed-off-by: Zenghui Shi Co-authored-by: Doug Hellmann --- .../ovn/{ => common}/clusterrole.yaml | 0 .../ovn/{ => common}/clusterrolebinding.yaml | 0 .../ovn/{ => common}/configmap.yaml | 0 .../master-serviceaccount.yaml} | 0 .../ovn/{ => common}/namespace.yaml | 0 .../node-serviceaccount.yaml} | 0 assets/components/ovn/{ => common}/role.yaml | 0 .../ovn/{ => common}/rolebinding.yaml | 0 .../ovn/multi-node/master/daemonset.yaml | 492 ++++++++++++++++++ .../ovn/multi-node/node/daemonset.yaml | 276 ++++++++++ .../{ => single-node}/master/daemonset.yaml | 0 .../ovn/{ => single-node}/node/daemonset.yaml | 0 pkg/cmd/run.go | 39 +- pkg/components/networking.go | 35 +- pkg/config/config.go | 4 + pkg/config/multinode.go | 24 + pkg/config/ovn/ovn.go | 31 +- pkg/config/ovn/ovn_test.go | 2 +- scripts/auto-rebase/assets.yaml | 39 +- 19 files changed, 894 insertions(+), 48 deletions(-) rename assets/components/ovn/{ => common}/clusterrole.yaml (100%) rename assets/components/ovn/{ => common}/clusterrolebinding.yaml (100%) rename assets/components/ovn/{ => common}/configmap.yaml (100%) rename assets/components/ovn/{master/serviceaccount.yaml => common/master-serviceaccount.yaml} (100%) rename assets/components/ovn/{ => common}/namespace.yaml (100%) rename assets/components/ovn/{node/serviceaccount.yaml => common/node-serviceaccount.yaml} (100%) rename assets/components/ovn/{ => common}/role.yaml (100%) rename assets/components/ovn/{ => common}/rolebinding.yaml (100%) create mode 100644 assets/components/ovn/multi-node/master/daemonset.yaml create mode 100644 assets/components/ovn/multi-node/node/daemonset.yaml rename assets/components/ovn/{ => single-node}/master/daemonset.yaml (100%) rename assets/components/ovn/{ => single-node}/node/daemonset.yaml (100%) create mode 100644 pkg/config/multinode.go diff --git a/assets/components/ovn/clusterrole.yaml b/assets/components/ovn/common/clusterrole.yaml similarity index 100% rename from assets/components/ovn/clusterrole.yaml rename to assets/components/ovn/common/clusterrole.yaml diff --git a/assets/components/ovn/clusterrolebinding.yaml b/assets/components/ovn/common/clusterrolebinding.yaml similarity index 100% rename from assets/components/ovn/clusterrolebinding.yaml rename to assets/components/ovn/common/clusterrolebinding.yaml diff --git a/assets/components/ovn/configmap.yaml b/assets/components/ovn/common/configmap.yaml similarity index 100% rename from assets/components/ovn/configmap.yaml rename to assets/components/ovn/common/configmap.yaml diff --git a/assets/components/ovn/master/serviceaccount.yaml b/assets/components/ovn/common/master-serviceaccount.yaml similarity index 100% rename from assets/components/ovn/master/serviceaccount.yaml rename to assets/components/ovn/common/master-serviceaccount.yaml diff --git a/assets/components/ovn/namespace.yaml b/assets/components/ovn/common/namespace.yaml similarity index 100% rename from assets/components/ovn/namespace.yaml rename to assets/components/ovn/common/namespace.yaml diff --git a/assets/components/ovn/node/serviceaccount.yaml b/assets/components/ovn/common/node-serviceaccount.yaml similarity index 100% rename from assets/components/ovn/node/serviceaccount.yaml rename to assets/components/ovn/common/node-serviceaccount.yaml diff --git a/assets/components/ovn/role.yaml b/assets/components/ovn/common/role.yaml similarity index 100% rename from assets/components/ovn/role.yaml rename to assets/components/ovn/common/role.yaml diff --git a/assets/components/ovn/rolebinding.yaml b/assets/components/ovn/common/rolebinding.yaml similarity index 100% rename from assets/components/ovn/rolebinding.yaml rename to assets/components/ovn/common/rolebinding.yaml diff --git a/assets/components/ovn/multi-node/master/daemonset.yaml b/assets/components/ovn/multi-node/master/daemonset.yaml new file mode 100644 index 0000000000..ad21ca543d --- /dev/null +++ b/assets/components/ovn/multi-node/master/daemonset.yaml @@ -0,0 +1,492 @@ +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovnkube-master + namespace: openshift-ovn-kubernetes + annotations: + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes controller (master) networking components. +spec: + selector: + matchLabels: + app: ovnkube-master + updateStrategy: + type: RollingUpdate + rollingUpdate: + # by default, Deployments spin up the new pod before terminating the old one + # but we don't want that - because ovsdb holds the lock. + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app: ovnkube-master + ovn-db-pod: "true" + component: network + type: infra + openshift.io/component: network + kubernetes.io/os: "linux" + spec: + serviceAccountName: ovn-kubernetes-controller + hostNetwork: true + dnsPolicy: Default + priorityClassName: "system-cluster-critical" + # volumes in all containers: + # (container) -> (host) + # /etc/openvswitch -> /var/lib/ovn/etc - ovsdb data + # /var/lib/openvswitch -> /var/lib/ovn/data - ovsdb pki state + # /run/openvswitch -> tmpfs - sockets + # /env -> configmap env-overrides - debug overrides + containers: + # ovn-northd: convert network objects in nbdb to flows in sbdb + - name: northd + image: {{ .ReleaseImage.ovn_kubernetes_microshift }} + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping ovn-northd" + OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd + echo "$(date -Iseconds) - ovn-northd stopped" + rm -f /var/run/ovn/ovn-northd.pid + exit 0 + } + # end of quit + trap quit TERM INT + + echo "$(date -Iseconds) - starting ovn-northd" + exec ovn-northd \ + --no-chdir "-vconsole:${OVN_LOG_LEVEL}" -vfile:off "-vPATTERN:console:%D{%Y-%m-%dT%H:%M:%S.###Z}|%05N|%c%T|%p|%m" \ + --ovnnb-db "{{.OVN_NB_DB_LIST}}" \ + --ovnsb-db "{{.OVN_SB_DB_LIST}}" \ + --pidfile /var/run/ovn/ovn-northd.pid & + + wait $! + lifecycle: + preStop: + exec: + command: + - /bin/bash + - -c + - OVN_MANAGE_OVSDB=no /usr/share/ovn/scripts/ovn-ctl stop_northd + env: + - name: OVN_LOG_LEVEL + value: info + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + volumeMounts: + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + resources: + requests: + cpu: 10m + memory: 10Mi + terminationMessagePolicy: FallbackToLogsOnError + + # nbdb: the northbound, or logical network object DB. In raft mode + - name: nbdb + image: {{ .ReleaseImage.ovn_kubernetes_microshift }} + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping nbdb" + /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb + echo "$(date -Iseconds) - nbdb stopped" + rm -f /var/run/ovn/ovnnb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + compact() { + sleep 15 + while true; do + /usr/bin/ovn-appctl -t /var/run/ovn/ovn${1}_db.ctl --timeout=5 ovsdb-server/compact 2>/dev/null || true + sleep 600 + done + } + # initialize variables + db="nb" + ovn_db_file="/etc/ovn/ovn${db}_db.db" + + OVN_ARGS="--db-nb-cluster-local-port=9643 \ + --db-nb-cluster-local-addr=$(bracketify ${K8S_NODE_IP}) \ + --db-nb-cluster-local-proto=tcp \ + --no-monitor" + + echo "$(date -Iseconds) - starting nbdb" + + exec /usr/share/ovn/scripts/ovn-ctl \ + ${OVN_ARGS} \ + --ovn-nb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:%D{%Y-%m-%dT%H:%M:%S.###Z}|%05N|%c%T|%p|%m" \ + --db-nb-election-timer=10000 \ + run_nb_ovsdb & + + db_pid=$! + compact $db & + wait $db_pid + + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + rm -f /var/run/ovn/ovnnb_db.pid + + retries=0 + while [[ "${retries}" -lt 60 ]]; do + ovn-nbctl --no-leader-only -t 5 set-connection ptcp:{{.OVN_NB_PORT}} -- set connection . inactivity_probe=60000 + if [[ $? == 0 ]]; then + echo "Successfully set nbdb connection" + break + else + sleep 2 + (( retries += 1 )) + fi + echo "Failed to set nbdb connection" + done + + #configure northd_probe_interval + OVN_NB_CTL="ovn-nbctl --db "{{.OVN_NB_DB_LIST}}"" + northd_probe_interval=${OVN_NORTHD_PROBE_INTERVAL:-10000} + echo "Setting northd probe interval to ${northd_probe_interval} ms" + retries=0 + current_probe_interval=0 + while [[ "${retries}" -lt 10 ]]; do + current_probe_interval=$(${OVN_NB_CTL} --if-exists get NB_GLOBAL . options:northd_probe_interval) + if [[ $? == 0 ]]; then + current_probe_interval=$(echo ${current_probe_interval} | tr -d '\"') + break + else + sleep 2 + (( retries += 1 )) + fi + done + + if [[ "${current_probe_interval}" != "${northd_probe_interval}" ]]; then + retries=0 + while [[ "${retries}" -lt 10 ]]; do + ${OVN_NB_CTL} set NB_GLOBAL . options:northd_probe_interval=${northd_probe_interval} + if [[ $? != 0 ]]; then + echo "Failed to set northd probe interval to ${northd_probe_interval}. retrying....." + sleep 2 + (( retries += 1 )) + else + echo "Successfully set northd probe interval to ${northd_probe_interval} ms" + break + fi + done + fi + + preStop: + exec: + command: + - /bin/bash + - -c + - | + echo "$(date -Iseconds) - stopping nbdb" + /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb + echo "$(date -Iseconds) - nbdb stopped" + rm -f /var/run/ovn/ovnnb_db.pid + readinessProbe: + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + /usr/bin/ovn-appctl -t /var/run/ovn/ovnnb_db.ctl --timeout=5 ovsdb-server/memory-trim-on-compaction on 2>/dev/null + + env: + - name: OVN_LOG_LEVEL + value: info + - name: OVN_NORTHD_PROBE_INTERVAL + value: "5000" + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + volumeMounts: + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + resources: + requests: + cpu: 10m + memory: 10Mi + + ports: + - name: nb-db-port + containerPort: {{.OVN_NB_PORT}} + terminationMessagePolicy: FallbackToLogsOnError + + # sbdb: The southbound, or flow DB. In raft mode + - name: sbdb + image: {{ .ReleaseImage.ovn_kubernetes_microshift }} + command: + - /bin/bash + - -c + - | + set -xem + if [[ -f /env/_master ]]; then + set -o allexport + source /env/_master + set +o allexport + fi + + quit() { + echo "$(date -Iseconds) - stopping sbdb" + /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb + echo "$(date -Iseconds) - sbdb stopped" + rm -f /var/run/ovn/ovnsb_db.pid + exit 0 + } + # end of quit + trap quit TERM INT + + bracketify() { case "$1" in *:*) echo "[$1]" ;; *) echo "$1" ;; esac } + compact() { + sleep 15 + while true; do + /usr/bin/ovn-appctl -t /var/run/ovn/ovn${1}_db.ctl --timeout=5 ovsdb-server/compact 2>/dev/null || true + sleep 600 + done + } + + # initialize variables + db="sb" + ovn_db_file="/etc/ovn/ovn${db}_db.db" + + OVN_ARGS="--db-sb-cluster-local-port=9644 \ + --db-sb-cluster-local-addr=$(bracketify ${K8S_NODE_IP}) \ + --db-sb-cluster-local-proto=tcp \ + --no-monitor" + + echo "$(date -Iseconds) - starting sbdb " + exec /usr/share/ovn/scripts/ovn-ctl \ + ${OVN_ARGS} \ + --ovn-sb-log="-vconsole:${OVN_LOG_LEVEL} -vfile:off -vPATTERN:console:%D{%Y-%m-%dT%H:%M:%S.###Z}|%05N|%c%T|%p|%m" \ + --db-sb-election-timer=16000 \ + run_sb_ovsdb & + db_pid=$! + compact $db & + wait $db_pid + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + set -x + rm -f /var/run/ovn/ovnsb_db.pid + + retries=0 + while [[ "${retries}" -lt 60 ]]; do + ovn-sbctl --no-leader-only -t 5 set-connection ptcp:{{.OVN_SB_PORT}} -- set connection . inactivity_probe=60000 + if [[ $? == 0 ]]; then + echo "Successfully set sbdb connection" + break + else + sleep 2 + (( retries += 1 )) + fi + echo "Failed to set sbdb connection" + done + + + preStop: + exec: + command: + - /bin/bash + - -c + - | + echo "$(date -Iseconds) - stopping sbdb" + /usr/share/ovn/scripts/ovn-ctl stop_sb_ovsdb + echo "$(date -Iseconds) - sbdb stopped" + rm -f /var/run/ovn/ovnsb_db.pid + readinessProbe: + timeoutSeconds: 5 + exec: + command: + - /bin/bash + - -c + - | + set -xeo pipefail + /usr/bin/ovn-appctl -t /var/run/ovn/ovnsb_db.ctl --timeout=5 ovsdb-server/memory-trim-on-compaction on 2>/dev/null + env: + - name: OVN_LOG_LEVEL + value: info + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + volumeMounts: + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /env + name: env-overrides + resources: + requests: + cpu: 10m + memory: 10Mi + + ports: + - name: sb-db-port + containerPort: {{.OVN_SB_PORT}} + terminationMessagePolicy: FallbackToLogsOnError + + # ovnkube master: convert kubernetes objects in to nbdb logical network components + - name: ovnkube-master + image: {{ .ReleaseImage.ovn_kubernetes_microshift }} + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_master" ]]; then + set -o allexport + source "/env/_master" + set +o allexport + fi + + # K8S_NODE_IP triggers reconcilation of this daemon when node IP changes + echo "$(date -Iseconds) - starting ovnkube-master, Node: ${K8S_NODE} IP: ${K8S_NODE_IP}" + + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + + sysctl net.ipv4.ip_forward=1 + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-master - start ovnkube --init-master ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --init-master "${K8S_NODE}" \ + --nb-address "{{.OVN_NB_DB_LIST}}" \ + --sb-address "{{.OVN_SB_DB_LIST}}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + ${gateway_mode_flags} \ + --inactivity-probe="180000" \ + --enable-multicast \ + --disable-snat-multiple-gws \ + --acl-logging-rate-limit "20" + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: {{.KubeconfigDir}} + name: kubeconfig + - mountPath: /env + name: env-overrides + - mountPath: /run/ovn-kubernetes/ + name: host-run-ovn-kubernetes + - mountPath: /dev/log + name: log-socket + - mountPath: /var/log/ovn + name: node-log + - mountPath: /etc/openvswitch + name: etc-openvswitch-node + - mountPath: /etc/ovn/ + name: etc-openvswitch-node + resources: + requests: + cpu: 10m + memory: 60Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + securityContext: + privileged: true + terminationMessagePolicy: FallbackToLogsOnError + nodeSelector: + node-role.kubernetes.io/master: "" + kubernetes.io/os: "linux" + volumes: + # for checking ovs-configuration service + - name: systemd-units + hostPath: + path: /etc/systemd/system + - name: run-openvswitch + hostPath: + path: /var/run/openvswitch + - name: run-ovn + hostPath: + path: /var/run/ovn + + - name: etc-openvswitch-node + hostPath: + path: /etc/openvswitch + # Used for placement of ACL audit logs + - name: node-log + hostPath: + path: /var/log/ovn + - name: log-socket + hostPath: + path: /dev/log + # For CNI server + - name: host-run-ovn-kubernetes + hostPath: + path: /run/ovn-kubernetes + + - name: kubeconfig + hostPath: + path: {{.KubeconfigDir}} + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: env-overrides + configMap: + name: env-overrides + optional: true + tolerations: + - operator: "Exists" diff --git a/assets/components/ovn/multi-node/node/daemonset.yaml b/assets/components/ovn/multi-node/node/daemonset.yaml new file mode 100644 index 0000000000..3402bb8c74 --- /dev/null +++ b/assets/components/ovn/multi-node/node/daemonset.yaml @@ -0,0 +1,276 @@ +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovnkube-node + namespace: openshift-ovn-kubernetes + annotations: + kubernetes.io/description: | + This daemonset launches the ovn-kubernetes per node networking components. +spec: + selector: + matchLabels: + app: ovnkube-node + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 10% + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app: ovnkube-node + component: network + type: infra + openshift.io/component: network + kubernetes.io/os: "linux" + spec: + serviceAccountName: ovn-kubernetes-node + hostNetwork: true + dnsPolicy: Default + hostPID: true + priorityClassName: "system-node-critical" + # volumes in all containers: + # (container) -> (host) + # /etc/openvswitch -> /etc/openvswitch - ovsdb system id + # /var/lib/openvswitch -> /var/lib/openvswitch/data - ovsdb data + # /run/openvswitch -> tmpfs - ovsdb sockets + # /env -> configmap env-overrides - debug overrides + containers: + # ovn-controller: programs the vswitch with flows from the sbdb + - name: ovn-controller + image: {{ .ReleaseImage.ovn_kubernetes_microshift }} + command: + - /bin/bash + - -c + - | + set -e + if [[ -f "/env/${K8S_NODE}" ]]; then + set -o allexport + source "/env/${K8S_NODE}" + set +o allexport + fi + + # K8S_NODE_IP triggers reconcilation of this daemon when node IP changes + echo "$(date -Iseconds) - starting ovn-controller, Node: ${K8S_NODE} IP: ${K8S_NODE_IP}" + + exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ + --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ + --syslog-method="null" \ + --log-file=/var/log/ovn/acl-audit-log.log \ + -vFACILITY:"local0" \ + -vconsole:"${OVN_LOG_LEVEL}" -vconsole:"acl_log:off" \ + -vPATTERN:console:"%D{%Y-%m-%dT%H:%M:%S.###Z}|%05N|%c%T|%p|%m" \ + -vsyslog:"acl_log:info" \ + -vfile:"acl_log:info" + securityContext: + privileged: true + env: + - name: OVN_LOG_LEVEL + value: info + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + volumeMounts: + - mountPath: /run/openvswitch + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /etc/openvswitch + name: etc-openvswitch + - mountPath: /etc/ovn/ + name: etc-openvswitch + - mountPath: /var/lib/openvswitch + name: var-lib-openvswitch + - mountPath: /env + name: env-overrides + - mountPath: /var/log/ovn + name: node-log + - mountPath: /dev/log + name: log-socket + + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 10m + memory: 10Mi + # ovnkube node: does node level bookkeeping and configuration + - name: ovnkube-node + image: {{ .ReleaseImage.ovn_kubernetes_microshift }} + command: + - /bin/bash + - -c + - | + set -xe + if [[ -f "/env/_master" ]]; then + set -o allexport + source "/env/_master" + set +o allexport + fi + + # K8S_NODE_IP triggers reconcilation of this daemon when node IP changes + echo "$(date -Iseconds) - starting ovnkube-node, Node: ${K8S_NODE} IP: ${K8S_NODE_IP}" + + echo "I$(date "+%m%d %H:%M:%S.%N") - copy ovn-k8s-cni-overlay" + cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ + + echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" + iptables -t raw -A PREROUTING -p udp --dport 6081 -j NOTRACK + iptables -t raw -A OUTPUT -p udp --dport 6081 -j NOTRACK + ip6tables -t raw -A PREROUTING -p udp --dport 6081 -j NOTRACK + ip6tables -t raw -A OUTPUT -p udp --dport 6081 -j NOTRACK + echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node" + + gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" + + sysctl net.ipv4.ip_forward=1 + + gw_interface_flag= + # if br-ex1 is configured on the node, we want to use it for external gateway traffic + if [ -d /sys/class/net/br-ex1 ]; then + gw_interface_flag="--exgw-interface=br-ex1" + # the functionality depends on ip_forwarding being enabled + fi + + echo "I$(date "+%m%d %H:%M:%S.%N") - ovnkube-node - start ovnkube --init-node ${K8S_NODE}" + exec /usr/bin/ovnkube \ + --init-node "${K8S_NODE}" \ + --nb-address "{{.OVN_NB_DB_LIST}}" \ + --sb-address "{{.OVN_SB_DB_LIST}}" \ + --config-file=/run/ovnkube-config/ovnkube.conf \ + --loglevel "${OVN_KUBE_LOG_LEVEL}" \ + ${gateway_mode_flags} \ + ${gw_interface_flag} \ + --inactivity-probe="180000" \ + --disable-snat-multiple-gws \ + lifecycle: + preStop: + exec: + command: ["rm","-f","/etc/cni/net.d/10-ovn-kubernetes.conf"] + readinessProbe: + exec: + command: ["test", "-f", "/etc/cni/net.d/10-ovn-kubernetes.conf"] + initialDelaySeconds: 5 + periodSeconds: 5 + volumeMounts: + # for checking ovs-configuration service + - mountPath: /etc/systemd/system + name: systemd-units + readOnly: true + - mountPath: /run/openvswitch/ + name: run-openvswitch + - mountPath: /run/ovn/ + name: run-ovn + - mountPath: /run/ovnkube-config/ + name: ovnkube-config + - mountPath: {{.KubeconfigDir}} + name: kubeconfig + - mountPath: /env + name: env-overrides + - mountPath: /etc/cni/net.d + name: host-cni-netd + - mountPath: /cni-bin-dir + name: host-cni-bin + - mountPath: /run/ovn-kubernetes/ + name: host-run-ovn-kubernetes + - mountPath: /host + name: host-slash + readOnly: true + mountPropagation: HostToContainer + - mountPath: /run/netns + name: host-run-netns + readOnly: true + mountPropagation: HostToContainer + - mountPath: /etc/openvswitch + name: etc-openvswitch-node + - mountPath: /etc/ovn/ + name: etc-openvswitch-node + + resources: + requests: + cpu: 10m + memory: 60Mi + env: + - name: OVN_KUBE_LOG_LEVEL + value: "4" + - name: K8S_NODE + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + securityContext: + privileged: true + terminationMessagePolicy: FallbackToLogsOnError + + nodeSelector: + kubernetes.io/os: "linux" + volumes: + # for checking ovs-configuration service + - name: systemd-units + hostPath: + path: /etc/systemd/system + # used for iptables wrapper scripts + - name: host-slash + hostPath: + path: / + - name: host-run-netns + hostPath: + path: /run/netns + - name: var-lib-openvswitch + hostPath: + path: /var/lib/openvswitch/data + - name: etc-openvswitch + hostPath: + path: /etc/openvswitch + - name: run-openvswitch + hostPath: + path: /var/run/openvswitch + - name: run-ovn + hostPath: + path: /var/run/ovn + + - name: etc-openvswitch-node + hostPath: + path: /etc/openvswitch + # Used for placement of ACL audit logs + - name: node-log + hostPath: + path: /var/log/ovn + - name: log-socket + hostPath: + path: /dev/log + + # For CNI server + - name: host-run-ovn-kubernetes + hostPath: + path: /run/ovn-kubernetes + - name: host-cni-netd + hostPath: + path: "/etc/cni/net.d" + - name: host-cni-bin + hostPath: + path: "/opt/cni/bin" + - name: host-var-lib-cni-networks-ovn-kubernetes + hostPath: + path: /var/lib/cni/networks/ovn-k8s-cni-overlay + - name: kubeconfig + hostPath: + path: /var/lib/microshift/resources/kubeadmin + - name: ovnkube-config + configMap: + name: ovnkube-config + - name: env-overrides + configMap: + name: env-overrides + optional: true + tolerations: + - operator: "Exists" diff --git a/assets/components/ovn/master/daemonset.yaml b/assets/components/ovn/single-node/master/daemonset.yaml similarity index 100% rename from assets/components/ovn/master/daemonset.yaml rename to assets/components/ovn/single-node/master/daemonset.yaml diff --git a/assets/components/ovn/node/daemonset.yaml b/assets/components/ovn/single-node/node/daemonset.yaml similarity index 100% rename from assets/components/ovn/node/daemonset.yaml rename to assets/components/ovn/single-node/node/daemonset.yaml diff --git a/pkg/cmd/run.go b/pkg/cmd/run.go index 12e3b0853a..bed7e4eb7c 100644 --- a/pkg/cmd/run.go +++ b/pkg/cmd/run.go @@ -34,19 +34,32 @@ func NewRunMicroshiftCommand() *cobra.Command { cmd := &cobra.Command{ Use: "run", Short: "Run MicroShift", - RunE: func(cmd *cobra.Command, args []string) error { - cfg, err := config.ActiveConfig() - if err != nil { - return err - } - // Things to very badly if the node's name has changed - // since the last time the server started. - err = cfg.EnsureNodeNameHasNotChanged() - if err != nil { - return err - } - return RunMicroshift(cfg) - }, + } + + var multinode bool + + flags := cmd.Flags() + flags.BoolVar(&multinode, "multinode", false, "enable multinode mode") + err := flags.MarkHidden("multinode") + if err != nil { + panic(err) + } + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + cfg, err := config.ActiveConfig() + if err != nil { + return err + } + + cfg = config.ConfigMultiNode(cfg, multinode) + + // Things to very badly if the node's name has changed + // since the last time the server started. + err = cfg.EnsureNodeNameHasNotChanged() + if err != nil { + return err + } + return RunMicroshift(cfg) } return cmd diff --git a/pkg/components/networking.go b/pkg/components/networking.go index 1cc9735d07..44934f2541 100644 --- a/pkg/components/networking.go +++ b/pkg/components/networking.go @@ -14,34 +14,41 @@ import ( func startCNIPlugin(ctx context.Context, cfg *config.Config, kubeconfigPath string) error { var ( ns = []string{ - "components/ovn/namespace.yaml", + "components/ovn/common/namespace.yaml", } sa = []string{ - "components/ovn/node/serviceaccount.yaml", - "components/ovn/master/serviceaccount.yaml", + "components/ovn/common/master-serviceaccount.yaml", + "components/ovn/common/node-serviceaccount.yaml", } r = []string{ - "components/ovn/role.yaml", + "components/ovn/common/role.yaml", } rb = []string{ - "components/ovn/rolebinding.yaml", + "components/ovn/common/rolebinding.yaml", } cr = []string{ - "components/ovn/clusterrole.yaml", + "components/ovn/common/clusterrole.yaml", } crb = []string{ - "components/ovn/clusterrolebinding.yaml", + "components/ovn/common/clusterrolebinding.yaml", } cm = []string{ - "components/ovn/configmap.yaml", + "components/ovn/common/configmap.yaml", } apps = []string{ - "components/ovn/master/daemonset.yaml", - "components/ovn/node/daemonset.yaml", + "components/ovn/single-node/master/daemonset.yaml", + "components/ovn/single-node/node/daemonset.yaml", } ) - ovnConfig, err := ovn.NewOVNKubernetesConfigFromFileOrDefault(filepath.Dir(config.ConfigFile)) + if cfg.MultiNode.Enabled { + apps = []string{ + "components/ovn/multi-node/master/daemonset.yaml", + "components/ovn/multi-node/node/daemonset.yaml", + } + } + + ovnConfig, err := ovn.NewOVNKubernetesConfigFromFileOrDefault(filepath.Dir(config.ConfigFile), cfg.MultiNode.Enabled) if err != nil { return err } @@ -74,10 +81,16 @@ func startCNIPlugin(ctx context.Context, cfg *config.Config, kubeconfigPath stri klog.Warningf("Failed to apply clusterRoleBinding %v %v", crb, err) return err } + + // Multinode only params: OVN_NB_DB_LIST, OVN_SB_DB_LIST, OVN_NB_PORT, OVN_SB_PORT extraParams := assets.RenderParams{ "OVNConfig": ovnConfig, "KubeconfigPath": kubeconfigPath, "KubeconfigDir": filepath.Join(config.DataDir, "/resources/kubeadmin"), + "OVN_NB_DB_LIST": fmt.Sprintf("tcp:%s:%s", cfg.MultiNode.Controlplane, ovn.OVN_NB_PORT), + "OVN_SB_DB_LIST": fmt.Sprintf("tcp:%s:%s", cfg.MultiNode.Controlplane, ovn.OVN_SB_PORT), + "OVN_NB_PORT": ovn.OVN_NB_PORT, + "OVN_SB_PORT": ovn.OVN_SB_PORT, } if err := assets.ApplyConfigMaps(ctx, cm, renderTemplate, renderParamsFromConfig(cfg, extraParams), kubeconfigPath); err != nil { klog.Warningf("Failed to apply configMap %v %v", cm, err) diff --git a/pkg/config/config.go b/pkg/config/config.go index 54ea70cd60..0f888e20b1 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -36,6 +36,8 @@ type Config struct { // Internal-only fields Ingress IngressConfig `json:"-"` userSettings *Config `json:"-"` // the values read from the config file + + MultiNode MultiNodeConfig `json:"-"` // the value read from commond line } // NewDefault creates a new Config struct populated with the @@ -110,6 +112,8 @@ func (c *Config) fillDefaults() error { KustomizePaths: []string{defaultManifestDirLib, defaultManifestDirEtc}, } + c.MultiNode.Enabled = false + return nil } diff --git a/pkg/config/multinode.go b/pkg/config/multinode.go new file mode 100644 index 0000000000..4770e2dc42 --- /dev/null +++ b/pkg/config/multinode.go @@ -0,0 +1,24 @@ +package config + +type MultiNodeConfig struct { + Enabled bool `json:"enabled"` + // only one controlplane node is supported + // IP address of control plane node + Controlplane string `json:"controlplane"` +} + +// ConfigMultiNode populates multinode configurations to Config.MultiNode +func ConfigMultiNode(c *Config, enabled bool) *Config { + if !enabled { + return c + } + c.MultiNode.Enabled = enabled + c.MultiNode.Controlplane = c.Node.NodeIP + + // Use controlplane node IP as APIServer backend (instead of 10.44.0.0) + c.ApiServer.AdvertiseAddress = c.Node.NodeIP + // Don't configure 10.44.0.0 on lo device + c.ApiServer.SkipInterface = true + + return c +} diff --git a/pkg/config/ovn/ovn.go b/pkg/config/ovn/ovn.go index fff4384ffe..f064b333c0 100644 --- a/pkg/config/ovn/ovn.go +++ b/pkg/config/ovn/ovn.go @@ -19,6 +19,15 @@ const ( defaultMTU = 1500 OVNKubernetesV4MasqueradeIP = "169.254.169.2" OVNKubernetesV6MasqueradeIP = "fd69::2" + + // used for multinode ovn database transport + OVN_NB_PORT = "9641" + OVN_SB_PORT = "9642" + + // geneve header length for IPv4 + GeneveHeaderLengthIPv4 = 58 + // geneve header length for IPv6 + GeneveHeaderLengthIPv6 = GeneveHeaderLengthIPv4 + 20 ) type OVNKubernetesConfig struct { @@ -90,25 +99,29 @@ func (o *OVNKubernetesConfig) validateConfig() error { return nil } -// getSystemMTU retrieves MTU from ovn-kubernetes gateway interafce "br-ex", +// getClusterMTU retrieves MTU from ovn-kubernetes gateway interface "br-ex", // and falls back to use 1500 when "br-ex" mtu is unable to get or less than 0. -func (o *OVNKubernetesConfig) getSystemMTU() { +func (o *OVNKubernetesConfig) getClusterMTU(multinode bool) { link, err := net.InterfaceByName(OVNGatewayInterface) if err == nil && link.MTU > 0 { o.MTU = link.MTU } else { o.MTU = defaultMTU } + + if multinode { + o.MTU = o.MTU - GeneveHeaderLengthIPv6 + } } // withDefaults returns the default values when ovn.yaml is not provided -func (o *OVNKubernetesConfig) withDefaults() *OVNKubernetesConfig { +func (o *OVNKubernetesConfig) withDefaults(multinode bool) *OVNKubernetesConfig { o.OVSInit.DisableOVSInit = false - o.getSystemMTU() + o.getClusterMTU(multinode) return o } -func newOVNKubernetesConfigFromFile(path string) (*OVNKubernetesConfig, error) { +func newOVNKubernetesConfigFromFile(path string, multinode bool) (*OVNKubernetesConfig, error) { o := new(OVNKubernetesConfig) buf, err := os.ReadFile(path) if err != nil { @@ -121,24 +134,24 @@ func newOVNKubernetesConfigFromFile(path string) (*OVNKubernetesConfig, error) { } // in case mtu is not defined if o.MTU == 0 { - o.getSystemMTU() + o.getClusterMTU(multinode) } klog.Infof("parsed OVNKubernetes config from file %q: %+v", path, o) return o, nil } -func NewOVNKubernetesConfigFromFileOrDefault(dir string) (*OVNKubernetesConfig, error) { +func NewOVNKubernetesConfigFromFileOrDefault(dir string, multinode bool) (*OVNKubernetesConfig, error) { path := filepath.Join(dir, ovnConfigFileName) if _, err := os.Stat(path); err != nil { if errors.Is(err, os.ErrNotExist) { klog.Infof("OVNKubernetes config file not found, assuming default values") - return new(OVNKubernetesConfig).withDefaults(), nil + return new(OVNKubernetesConfig).withDefaults(multinode), nil } return nil, fmt.Errorf("failed to get OVNKubernetes config file: %v", err) } - o, err := newOVNKubernetesConfigFromFile(path) + o, err := newOVNKubernetesConfigFromFile(path, multinode) if err == nil { return o, nil } diff --git a/pkg/config/ovn/ovn_test.go b/pkg/config/ovn/ovn_test.go index b8a8de4505..54a885b0fe 100644 --- a/pkg/config/ovn/ovn_test.go +++ b/pkg/config/ovn/ovn_test.go @@ -15,7 +15,7 @@ func TestNewOVNKubernetesConfigFromFileOrDefault(t *testing.T) { } for _, tt := range ttests { - _, err := NewOVNKubernetesConfigFromFileOrDefault(tt.configFile) + _, err := NewOVNKubernetesConfigFromFileOrDefault(tt.configFile, false) if (err != nil) != (tt.err != nil) { t.Errorf("NewOVNKubernetesConfigFromFileOrDefault() error = %v, wantErr %v", err, tt.err) } diff --git a/scripts/auto-rebase/assets.yaml b/scripts/auto-rebase/assets.yaml index e937441a8b..0f214d4452 100644 --- a/scripts/auto-rebase/assets.yaml +++ b/scripts/auto-rebase/assets.yaml @@ -76,22 +76,33 @@ assets: - dir: components/ovn/ ignore: "it's not covered by rebase script yet" - files: - - file: clusterrole.yaml - - file: clusterrolebinding.yaml - - file: configmap.yaml - - file: namespace.yaml - - file: role.yaml - - file: rolebinding.yaml dirs: - - dir: master/ - files: - - file: daemonset.yaml - - file: serviceaccount.yaml - - dir: node/ + - dir: common/ files: - - file: daemonset.yaml - - file: serviceaccount.yaml + - file: clusterrole.yaml + - file: clusterrolebinding.yaml + - file: configmap.yaml + - file: master-serviceaccount.yaml + - file: namespace.yaml + - file: node-serviceaccount.yaml + - file: role.yaml + - file: rolebinding.yaml + - dir: multi-node/ + dirs: + - dir: master/ + files: + - file: daemonset.yaml + - dir: node/ + files: + - file: daemonset.yaml + - dir: single-node/ + dirs: + - dir: master/ + files: + - file: daemonset.yaml + - dir: node/ + files: + - file: daemonset.yaml - dir: components/service-ca/ src: service-ca-operator/bindata/v4.0.0/controller/ From bc3a956cf5d3f29f25c7a3f3dbb5dbe5c74ab4d3 Mon Sep 17 00:00:00 2001 From: Zenghui Shi Date: Mon, 10 Apr 2023 16:28:58 +0800 Subject: [PATCH 2/2] update etcd/vendor --- .../openshift/microshift/pkg/config/config.go | 4 +++ .../microshift/pkg/config/multinode.go | 24 ++++++++++++++ .../microshift/pkg/config/ovn/ovn.go | 31 +++++++++++++------ 3 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 etcd/vendor/github.com/openshift/microshift/pkg/config/multinode.go diff --git a/etcd/vendor/github.com/openshift/microshift/pkg/config/config.go b/etcd/vendor/github.com/openshift/microshift/pkg/config/config.go index 54ea70cd60..0f888e20b1 100644 --- a/etcd/vendor/github.com/openshift/microshift/pkg/config/config.go +++ b/etcd/vendor/github.com/openshift/microshift/pkg/config/config.go @@ -36,6 +36,8 @@ type Config struct { // Internal-only fields Ingress IngressConfig `json:"-"` userSettings *Config `json:"-"` // the values read from the config file + + MultiNode MultiNodeConfig `json:"-"` // the value read from commond line } // NewDefault creates a new Config struct populated with the @@ -110,6 +112,8 @@ func (c *Config) fillDefaults() error { KustomizePaths: []string{defaultManifestDirLib, defaultManifestDirEtc}, } + c.MultiNode.Enabled = false + return nil } diff --git a/etcd/vendor/github.com/openshift/microshift/pkg/config/multinode.go b/etcd/vendor/github.com/openshift/microshift/pkg/config/multinode.go new file mode 100644 index 0000000000..4770e2dc42 --- /dev/null +++ b/etcd/vendor/github.com/openshift/microshift/pkg/config/multinode.go @@ -0,0 +1,24 @@ +package config + +type MultiNodeConfig struct { + Enabled bool `json:"enabled"` + // only one controlplane node is supported + // IP address of control plane node + Controlplane string `json:"controlplane"` +} + +// ConfigMultiNode populates multinode configurations to Config.MultiNode +func ConfigMultiNode(c *Config, enabled bool) *Config { + if !enabled { + return c + } + c.MultiNode.Enabled = enabled + c.MultiNode.Controlplane = c.Node.NodeIP + + // Use controlplane node IP as APIServer backend (instead of 10.44.0.0) + c.ApiServer.AdvertiseAddress = c.Node.NodeIP + // Don't configure 10.44.0.0 on lo device + c.ApiServer.SkipInterface = true + + return c +} diff --git a/etcd/vendor/github.com/openshift/microshift/pkg/config/ovn/ovn.go b/etcd/vendor/github.com/openshift/microshift/pkg/config/ovn/ovn.go index fff4384ffe..f064b333c0 100644 --- a/etcd/vendor/github.com/openshift/microshift/pkg/config/ovn/ovn.go +++ b/etcd/vendor/github.com/openshift/microshift/pkg/config/ovn/ovn.go @@ -19,6 +19,15 @@ const ( defaultMTU = 1500 OVNKubernetesV4MasqueradeIP = "169.254.169.2" OVNKubernetesV6MasqueradeIP = "fd69::2" + + // used for multinode ovn database transport + OVN_NB_PORT = "9641" + OVN_SB_PORT = "9642" + + // geneve header length for IPv4 + GeneveHeaderLengthIPv4 = 58 + // geneve header length for IPv6 + GeneveHeaderLengthIPv6 = GeneveHeaderLengthIPv4 + 20 ) type OVNKubernetesConfig struct { @@ -90,25 +99,29 @@ func (o *OVNKubernetesConfig) validateConfig() error { return nil } -// getSystemMTU retrieves MTU from ovn-kubernetes gateway interafce "br-ex", +// getClusterMTU retrieves MTU from ovn-kubernetes gateway interface "br-ex", // and falls back to use 1500 when "br-ex" mtu is unable to get or less than 0. -func (o *OVNKubernetesConfig) getSystemMTU() { +func (o *OVNKubernetesConfig) getClusterMTU(multinode bool) { link, err := net.InterfaceByName(OVNGatewayInterface) if err == nil && link.MTU > 0 { o.MTU = link.MTU } else { o.MTU = defaultMTU } + + if multinode { + o.MTU = o.MTU - GeneveHeaderLengthIPv6 + } } // withDefaults returns the default values when ovn.yaml is not provided -func (o *OVNKubernetesConfig) withDefaults() *OVNKubernetesConfig { +func (o *OVNKubernetesConfig) withDefaults(multinode bool) *OVNKubernetesConfig { o.OVSInit.DisableOVSInit = false - o.getSystemMTU() + o.getClusterMTU(multinode) return o } -func newOVNKubernetesConfigFromFile(path string) (*OVNKubernetesConfig, error) { +func newOVNKubernetesConfigFromFile(path string, multinode bool) (*OVNKubernetesConfig, error) { o := new(OVNKubernetesConfig) buf, err := os.ReadFile(path) if err != nil { @@ -121,24 +134,24 @@ func newOVNKubernetesConfigFromFile(path string) (*OVNKubernetesConfig, error) { } // in case mtu is not defined if o.MTU == 0 { - o.getSystemMTU() + o.getClusterMTU(multinode) } klog.Infof("parsed OVNKubernetes config from file %q: %+v", path, o) return o, nil } -func NewOVNKubernetesConfigFromFileOrDefault(dir string) (*OVNKubernetesConfig, error) { +func NewOVNKubernetesConfigFromFileOrDefault(dir string, multinode bool) (*OVNKubernetesConfig, error) { path := filepath.Join(dir, ovnConfigFileName) if _, err := os.Stat(path); err != nil { if errors.Is(err, os.ErrNotExist) { klog.Infof("OVNKubernetes config file not found, assuming default values") - return new(OVNKubernetesConfig).withDefaults(), nil + return new(OVNKubernetesConfig).withDefaults(multinode), nil } return nil, fmt.Errorf("failed to get OVNKubernetes config file: %v", err) } - o, err := newOVNKubernetesConfigFromFile(path) + o, err := newOVNKubernetesConfigFromFile(path, multinode) if err == nil { return o, nil }