1.Network Services Controller
kubectl expose deployment myip --port=8080 --target-port=8080 --type=NodePort -- session-affinity=ClientIP
--session-affinity=ClientIP
不加表示通过cluster_ip方式是轮询策略
加表示会话保持访问同一个IP访问会被控制在同一个节点上
2.Network Policy Controller (同一个namespace和不同namespace之间)
控制namespace内部和外部网络隔离
kubectl annotate ns production "net.beta.kubernetes.io/network-policy={\"ingress\": {\"isolation\": \"DefaultDeny\"}}"
3.Network Routes Controller(跨主机通过BGP)
1.路由规则会被添加到每台主机上
查看bgp的网络令居
gobgp neighbor -u kube-node3
查看RIB
gobgp neighbor -u kube-node3
查看节点上的路由表
ip route
4.工作原理
其中每个controller遵循如下的结构
func Run() {
for {
Sync() // control loop that runs for ever and perfom sync at periodic interval
}
}
func OnUpdate() {
Sync() // on receiving update of a watched API object (namespace, node, pod, network policy etc)
}
Sync() {
//re-concile any state changes
}
Cleanup() {
// cleanup any changes (to iptables, ipvs, network etc) done to the system
}
5.kube-router安装
5.1 说明
kube-router组件取代kube-proxy,用lvs做svc负载均衡,更快稳定。
cluster-ip,external-ip 全网路由
解决iptables 性能和负载聚合问题
还有iptables 负载NAT 丢失源ip问题
5.2 参数说明及部署
① Kube-router 从API server 获取pods, services, endpoints, network policies等信息. 所有的细节信息都要通过得到API server. API server可以通过kube-router --master=http://192.168.1.99:8080/ 或者 kube-router --kubeconfig=<path to kubeconfig file>传递。
②如果想把kube-router 作为agent运行在节点上,必须事先在每个安装ipset (如果作为daemonset运行时,镜像中已经预置ipset)。
③如果想通过kube-router实现pod-to-pod连接,那么需要在controller manager中配置CIDRs用于各pod分配IP.通过如下参数设置:
--allocate-node-cidrs=true
--cluster-cidr=10.1.0.0/16
④kube-router作为daemonset时,需要 kube-apiserver设置--allow-privileged=true
⑤如果想通过kube-router实现pod-to-pod连接必须安装CNI 网络插件,主要会用到bridge CNI plugin 和 host-local 两个插件。配置https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/cni/10-kuberouter.conf
参数说明
--run-router=true
#启用Pod网络 - 通过iBGP发布并学习到Pod的路由。 (默认为true)
--run-firewall=true
#启用网络策略 - 设置iptables为pod提供入口防火墙。 (默认为true)
--run-service-proxy=true
#启用服务代理 - 为Kubernetes服务设置IPVS。 (默认为true)
--advertise-cluster-ip=true
#将该服务的集群IP添加到RIB,以便通告给BGP peers.
--advertise-external-ip=true
#将服务的外部IP添加到RIB,以便将其通告给BGP peers.
--cluster-asn=64512
#集群自身节点运行iBGP的ASN编号.
--peer-router-ips=10.129.6.8
#所有节点将对等的外部路由器的IP地址,并通告集群ip和pod cidr。 (默认[])
--peer-router-asns=64513
#集群节点将向其通告集群ip和节点的pid cidr的BGP peers的ASN编号。 (默认[])
部署 kubectl apply -f kube-router.yml
apiVersion: v1
kind: ConfigMap
metadata:
name: kube-router-cfg
namespace: kube-system
labels:
tier: node
k8s-app: kube-router
data:
cni-conf.json: |
{
"name":"kubernetes",
"type":"bridge",
"bridge":"kube-bridge",
"isDefaultGateway":true,
"ipam": {
"type":"host-local"
}
}
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
labels:
k8s-app: kube-router
tier: node
name: kube-router
namespace: kube-system
spec:
template:
metadata:
labels:
k8s-app: kube-router
tier: node
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
serviceAccountName: kube-router
serviceAccount: kube-router
containers:
- name: kube-router
image: registry.ipscloud.com/kube-router:v1.0 #修改镜像
# imagePullPolicy: Always #注释默认使用本地已有的
args:
- --run-router=true
- --run-firewall=true
- --run-service-proxy=true
- --kubeconfig=/var/lib/kube-router/kubeconfig
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
resources:
requests:
cpu: 250m
memory: 250Mi
securityContext:
privileged: true
volumeMounts:
- name: lib-modules
mountPath: /lib/modules
readOnly: true
- name: cni-conf-dir
mountPath: /etc/cni/net.d
- name: kubeconfig
mountPath: /var/lib/kube-router/kubeconfig
- name: run
mountPath: /var/run/docker.sock
readOnly: true
initContainers:
- name: install-cni
image: registry.ipscloud.com/busybox:v1.0#修改镜像
# imagePullPolicy: Always #注释默认使用本地已有的
command:
- /bin/sh
- -c
- set -e -x;
if [ ! -f /etc/cni/net.d/10-kuberouter.conf ]; then
TMP=/etc/cni/net.d/.tmp-kuberouter-cfg;
cp /etc/kube-router/cni-conf.json ${TMP};
mv ${TMP} /etc/cni/net.d/10-kuberouter.conf;
fi
volumeMounts:
- name: cni-conf-dir
mountPath: /etc/cni/net.d
- name: kube-router-cfg
mountPath: /etc/kube-router
hostNetwork: true
hostIPC: true
hostPID: true
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
volumes:
- name: lib-modules
hostPath:
path: /lib/modules
- name: cni-conf-dir
hostPath:
path: /etc/cni/net.d
- name: run
hostPath:
path: /var/run/docker.sock
- name: kube-router-cfg
configMap:
name: kube-router-cfg
- name: kubeconfig
hostPath:
path: /data/cloud/ssl/admin.conf #修改成自己的认证文件位置
# configMap:
# name: kube-proxy
# items:
# - key: kubeconfig.conf
# path: kubeconfig
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-router
namespace: kube-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: kube-router
namespace: kube-system
rules:
- apiGroups:
- ""
resources:
- namespaces
- pods
- services
- nodes
- endpoints
verbs:
- list
- get
- watch
- apiGroups:
- "networking.k8s.io"
resources:
- networkpolicies
verbs:
- list
- get
- watch
- apiGroups:
- extensions
resources:
- networkpolicies
verbs:
- get
- list
- watch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: kube-router
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-router
subjects:
- kind: ServiceAccount
name: kube-router
namespace: kube-system
5.3 验证
Network Services Controller
#查看主机分配网段
[root@host229 ~]# kubectl get nodes -o json | jq '.items[] | .spec'
{
"podCIDR": "172.16.1.0/24"
}
{
"podCIDR": "172.16.3.0/24"
}
{
"podCIDR": "172.16.2.0/24"
}
{
"podCIDR": "172.16.0.0/24"
}
[root@host229 yaml]# kubectl run myip --image=cloudnativelabs/whats-my-ip --replicas=3 --port=8080
deployment.apps/myip created
[root@host229 yaml]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE
myip-5fc5cf6476-jmjzh 1/1 Running 0 28s 172.16.0.2 host229
myip-5fc5cf6476-qh546 1/1 Running 0 28s 172.16.1.2 host227
myip-5fc5cf6476-z7ccm 1/1 Running 0 28s 172.16.2.2 host228
[root@host229 yaml]# kubectl expose deployment myip --port=8080 --target-port=8080 --type=NodePort
#-------------------不同主机都可以访问------------------
[root@host229 yaml]# curl host227:31007
HOSTNAME:myip-5fc5cf6476-qh546 IP:172.16.1.2
[root@host229 yaml]# curl host228:31007
HOSTNAME:myip-5fc5cf6476-z7ccm IP:172.16.2.2
[root@host229 yaml]# curl host229:31007
HOSTNAME:myip-5fc5cf6476-qh546 IP:172.16.1.2
[root@host229 yaml]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.20.16.229:31007 rr
-> 172.16.0.2:8080 Masq 1 0 0
-> 172.16.1.2:8080 Masq 1 0 1
-> 172.16.2.2:8080 Masq 1 0 0
TCP 10.254.0.1:443 rr
-> 10.20.16.229:6443 Masq 1 0 0
TCP 10.254.62.77:8080 rr
-> 172.16.0.2:8080 Masq 1 0 0
-> 172.16.1.2:8080 Masq 1 0 0
-> 172.16.2.2:8080 Masq 1 0 0
# ----------------------删除svc,创建新的svc,并设置session persistence--------
[root@host229 yaml]# kubectl get svc -o wide
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
kubernetes ClusterIP 10.254.0.1 <none> 443/TCP 52m <none>
myip NodePort 10.254.62.77 <none> 8080:31007/TCP 4m run=myip
[root@host229 yaml]# kubectl delete svc myip
service "myip" deleted
[root@host229 yaml]# kubectl expose deployment myip --port=8080 --target-port=8080 --type=NodePort --session-affinity=ClientIP
service/myip exposed
#------------------再次访问发现同一个终端每次请求到pod为同一个-------------
[root@host229 yaml]# curl 10.254.102.139:8080
HOSTNAME:myip-5fc5cf6476-z7ccm IP:172.16.2.2
[root@host229 yaml]# curl 10.254.102.139:8080
HOSTNAME:myip-5fc5cf6476-z7ccm IP:172.16.2.2
[root@host229 yaml]# curl 10.254.102.139:8080
HOSTNAME:myip-5fc5cf6476-z7ccm IP:172.16.2.2
[root@host229 yaml]# curl 10.254.102.139:8080
HOSTNAME:myip-5fc5cf6476-z7ccm IP:172.16.2.2
Network Policy Controller(基于iptables )
# 创建两个命名空间production 和 staging
[root@host229 ~]# kubectl create namespace production
namespace/production created
[root@host229 ~]# kubectl create namespace staging
namespace/staging created
# 在两个命名空间中创建guestbook-all-in-one.yaml
# yaml 样例可到github的kubernetes的example中找,见https://github.com/kubernetes/examples/tree/master/guestbook/all-in-one
[root@host229 yaml]# kubectl -n production apply -f guestbook-all-in-one.yml
service/redis-master created
deployment.apps/redis-master created
service/redis-slave created
deployment.apps/redis-slave created
service/frontend created
deployment.apps/frontend created
[root@host229 yaml]# kubectl -n staging apply -f guestbook-all-in-one.yml
service/redis-master created
deployment.apps/redis-master created
service/redis-slave created
deployment.apps/redis-slave created
service/frontend created
deployment.apps/frontend created
[root@host229 ~]# kubectl get pod --all-namespaces -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE
kube-system kube-router-49nc6 1/1 Running 0 2h 10.20.16.229 host229
kube-system kube-router-lqzmq 1/1 Running 0 2h 10.20.16.228 host228
kube-system kube-router-nd46r 1/1 Running 0 2h 10.20.16.227 host227
kube-system kube-router-vvdp8 1/1 Running 0 2h 10.20.16.214 host214
production frontend-56f7975f44-654lz 1/1 Running 0 43m 172.16.2.3 host228
production frontend-56f7975f44-jlxcd 1/1 Running 0 43m 172.16.0.4 host229
production frontend-56f7975f44-s5wrg 1/1 Running 0 43m 172.16.1.3 host227
production redis-master-6b464554c8-cwbk2 1/1 Running 0 43m 172.16.3.2 host214
production redis-slave-b58dc4644-lqgrm 1/1 Running 0 43m 172.16.3.3 host214
production redis-slave-b58dc4644-wmqzc 1/1 Running 0 43m 172.16.0.3 host229
staging frontend-56f7975f44-7kncx 1/1 Running 0 40m 172.16.0.5 host229
staging frontend-56f7975f44-stvsq 1/1 Running 0 40m 172.16.1.5 host227
staging frontend-56f7975f44-zm7km 1/1 Running 0 40m 172.16.2.5 host228
staging redis-master-6b464554c8-rjl68 1/1 Running 0 40m 172.16.2.4 host228
staging redis-slave-b58dc4644-744sz 1/1 Running 0 40m 172.16.3.4 host214
staging redis-slave-b58dc4644-kv859 1/1 Running 0 40m 172.16.1.4 host227
# ----------- 相同命名空间-----------
[root@host229 ~]# kubectl -n production exec -it frontend-56f7975f44-654lz ping 172.16.3.2
PING 172.16.3.2 (172.16.3.2): 56 data bytes
64 bytes from 172.16.3.2: icmp_seq=0 ttl=62 time=0.535 ms
64 bytes from 172.16.3.2: icmp_seq=1 ttl=62 time=0.227 ms
64 bytes from 172.16.3.2: icmp_seq=2 ttl=62 time=0.231 ms
# ----------- 不相同命名空间-----------
[root@host229 ~]# kubectl -n production exec -it frontend-56f7975f44-654lz ping 172.16.2.4
PING 172.16.2.4 (172.16.2.4): 56 data bytes
64 bytes from 172.16.2.4: icmp_seq=0 ttl=64 time=0.206 ms
64 bytes from 172.16.2.4: icmp_seq=1 ttl=64 time=0.085 ms
64 bytes from 172.16.2.4: icmp_seq=2 ttl=64 time=0.072 m
网络的隔离策略
#允许所有入口和出口
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-all
spec:
podSelector: {}
ingress:
- {}
egress:
- {}
#禁止所有入口和出口
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: default-deny
spec:
podSelector: {}
policyTypes:
- Ingress
- Egress
#开放特定入口和出口
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: default-deny
spec:
podSelector: {}
policyTypes:
- Egress
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: test-network-policy
namespace: default
spec:
podSelector:
matchLabels:
role: db
policyTypes:
- Ingress
- Egress
ingress:
- from:
- ipBlock:
cidr: 172.17.0.0/16
except:
- 172.17.1.0/24
- namespaceSelector:
matchLabels:
project: myproject
- podSelector:
matchLabels:
role: frontend
ports:
- protocol: TCP
port: 6379
egress:
- to:
- ipBlock:
cidr: 10.0.0.0/24
ports:
- protocol: TCP
port: 5978
Network Routes Controller
①启用hairpin traffic
kubectl annotate service my-service "kube-router.io/service.hairpin="
②启用DSR(DSR将仅适用于外部IP)
kubectl annotate service my-service "kube-router.io/service.dsr=tunnel"
③负载均衡调度算法(默认轮询)
#最少连接
kubectl annotate service my-service "kube-router.io/service.scheduler=lc"
#轮序
kubectl annotate service my-service "kube-router.io/service.scheduler=rr"
#hash
kubectl annotate service my-service "kube-router.io/service.scheduler=sh"
#目标hash
kubectl annotate service my-service "kube-router.io/service.scheduler=dh"