Setup BGP Route Reflector for Calico node

| 分类 Network  | 标签 calico 

By default, Calico enable full node-to-node mesh, and each Calico node automatically sets up a BGP peering with every other Calico node in the network.

However, the full node-to-node mesh is only useful for small scale deployments and where all Calico nodes are on the same L2 network.

We can disable full node-to-node mesh by setup Route Reflector (or set of Route Reflectors), and each Calico node only peer with Route Reflector.

More details see 1, 2.

Environment

172.17.42.30 kube-master
172.17.42.31 kube-node1
172.17.42.32 kube-node2
172.17.42.40 node1
[root@kube-node1 ~]# calicoctl bgp node-mesh                
on

[root@kube-node1 ~]# ip route show
default via 172.17.42.1 dev eth0 
172.17.0.0/16 dev eth0  proto kernel  scope link  src 172.17.42.31 
blackhole 192.168.0.0/26  proto bird 
192.168.0.2 dev cali1f3c9fa633a  scope link 
192.168.0.64/26 via 172.17.42.32 dev eth0  proto bird

[root@kube-node2 ~]# ip route show
default via 172.17.42.1 dev eth0 
172.17.0.0/16 dev eth0  proto kernel  scope link  src 172.17.42.32 
192.168.0.0/26 via 172.17.42.31 dev eth0  proto bird 
192.168.0.64 dev cali03adc9f233a  scope link 
blackhole 192.168.0.64/26  proto bird 
192.168.0.65 dev calicb1a3b2633b  scope link

Setup Route Reflector

[root@kube-node1 ~]# calicoctl bgp node-mesh off

[root@kube-node1 ~]# ip route show              
default via 172.17.42.1 dev eth0 
172.17.0.0/16 dev eth0  proto kernel  scope link  src 172.17.42.31 
blackhole 192.168.0.0/26  proto bird 
192.168.0.2 dev cali1f3c9fa633a  scope link

[root@kube-node2 ~]# ip route show
default via 172.17.42.1 dev eth0 
172.17.0.0/16 dev eth0  proto kernel  scope link  src 172.17.42.32 
192.168.0.64 dev cali03adc9f233a  scope link 
blackhole 192.168.0.64/26  proto bird 
192.168.0.65 dev calicb1a3b2633b  scope link

Route entry 192.168.0.64/26 on kube-node1 is removed after disable full node-to-node BGP mesh.

  • Run BIRD Route Reflector on node1
# docker run --privileged --net=host -d -e IP=172.17.42.40 -e ETCD_AUTHORITY=172.17.42.30:2379 -v /var/log/:/var/log/ calico/routereflector:latest
  • Adding the Route Reflector into etcd
# curl -L http://172.17.42.30:2379/v2/keys/calico/bgp/v1/rr_v4/172.17.42.40 -XPUT -d value="{\"ip\":\"172.17.42.40\",\"cluster_id\":\"1.0.0.1\"}"
  • Config every node peer with each of the Route Reflectors
[root@kube-node1 ~]# calicoctl bgp peer add 172.17.42.40 as 65100    
[root@kube-node1 ~]# calicoctl bgp peer show                     
+----------------------+--------+
| Global IPv4 BGP Peer | AS Num |
+----------------------+--------+
| 172.17.42.40         | 65100  |
+----------------------+--------+
No global IPv6 BGP Peers defined.

Bird of Route Reflector will connect to every Calico node, and route entries will be automatically recreated.

[root@node1 ~]# netstat -tnp|grep 179
tcp        0      0 172.17.42.40:54395      172.17.42.31:179        ESTABLISHED 27782/bird          
tcp        0      0 172.17.42.40:56733      172.17.42.30:179        ESTABLISHED 27782/bird          
tcp        0      0 172.17.42.40:58889      172.17.42.32:179        ESTABLISHED 27782/bird

[root@kube-node1 ~]# ip route show
default via 172.17.42.1 dev eth0 
172.17.0.0/16 dev eth0  proto kernel  scope link  src 172.17.42.31 
blackhole 192.168.0.0/26  proto bird 
192.168.0.2 dev cali1f3c9fa633a  scope link 
192.168.0.64/26 via 172.17.42.32 dev eth0  proto bird

[root@kube-master ~]# ip route show
default via 172.17.42.1 dev eth0 
172.17.0.0/16 dev eth0  proto kernel  scope link  src 172.17.42.30  
192.168.0.0/26 via 172.17.42.31 dev eth0  proto bird 
192.168.0.64/26 via 172.17.42.32 dev eth0  proto bird

[root@kube-node2 ~]# ip route show
default via 172.17.42.1 dev eth0 
172.17.0.0/16 dev eth0  proto kernel  scope link  src 172.17.42.32 
192.168.0.0/26 via 172.17.42.31 dev eth0  proto bird 
192.168.0.64 dev cali03adc9f233a  scope link 
blackhole 192.168.0.64/26  proto bird 
192.168.0.65 dev calicb1a3b2633b  scope link

For redundancy, multiple BGP route reflectors can be deployed seamlessly. The route reflectors are purely involved in the control of the network: no endpoint data passes through them.

  • Bird config of Route Reflector
[root@node1 ~]# docker exec 56854e7cb79a cat /config/bird.cfg
# Generated by confd
router id 172.17.42.40;

# Watch interface up/down events.
protocol device {
  scan time 2;    # Scan interfaces every 2 seconds
}

# Template for all BGP clients
template bgp bgp_template {
  debug all;
  description "Connection to BGP peer";
  multihop;
  import all;        # Import all routes, since we don't know what the upstream
                     # topology is and therefore have to trust the ToR/RR.
  export all;        # Export all.
  source address 172.17.42.40;  # The local address we use for the TCP connection
  graceful restart;  # See comment in kernel section about graceful restart.
}




# ------------- RR-to-RR full mesh -------------



# For RR 172.17.42.40
# Skipping ourselves




# ------------- RR as a global peer -------------



# This RR is a global peer with *all* calico nodes.




# Peering with Calico node kube-master
protocol bgp Global_172_17_42_30 from bgp_template {
  local as 65100;
  neighbor 172.17.42.30 as 65100;
  rr client;
  rr cluster id 1.0.0.1;
}




# Peering with Calico node kube-node1
protocol bgp Global_172_17_42_31 from bgp_template {
  local as 65100;
  neighbor 172.17.42.31 as 65100;
  rr client;
  rr cluster id 1.0.0.1;
}




# Peering with Calico node kube-node2
protocol bgp Global_172_17_42_32 from bgp_template {
  local as 65100;
  neighbor 172.17.42.32 as 65100;
  rr client;
  rr cluster id 1.0.0.1;
}






# ------------- RR as a node-specific peer -------------
  • Bird config of Calico node
[root@kube-node1 ~]# docker exec e234b4e9dce7 cat /etc/calico/confd/config/bird.cfg
# Generated by confd
include "bird_aggr.cfg";
include "bird_ipam.cfg";

router id 172.17.42.31;



# Configure synchronization between routing tables and kernel.
protocol kernel {
  learn;             # Learn all alien routes from the kernel
  persist;           # Don't remove routes on bird shutdown
  scan time 2;       # Scan kernel routing table every 2 seconds
  import all;
  export filter calico_ipip; # Default is export none
  graceful restart;  # Turn on graceful restart to reduce potential flaps in
                     # routes when reloading BIRD configuration.  With a full
                     # automatic mesh, there is no way to prevent BGP from
                     # flapping since multiple nodes update their BGP
                     # configuration at the same time, GR is not guaranteed to
                     # work correctly in this scenario.
}

# Watch interface up/down events.
protocol device {
  
  debug { states };

  scan time 2;    # Scan interfaces every 2 seconds
}

protocol direct {
  
  debug { states };

  interface -"cali*", "*"; # Exclude cali* but include everything else.
}


# Template for all BGP clients
template bgp bgp_template {
  
  debug { states };

  description "Connection to BGP peer";
  local as 65100;
  multihop;
  gateway recursive; # This should be the default, but just in case.
  import all;        # Import all routes, since we don't know what the upstream
                     # topology is and therefore have to trust the ToR/RR.
  export filter calico_pools;  # Only want to export routes for workloads.
  next hop self;     # Disable next hop processing and always advertise our
                     # local address as nexthop
  source address 172.17.42.31;  # The local address we use for the TCP connection
  add paths on;
  graceful restart;  # See comment in kernel section about graceful restart.
}


# ------------- Global peers -------------



# For peer /global/peer_v4/172.17.42.40
protocol bgp Global_172_17_42_40 from bgp_template {
  neighbor 172.17.42.40 as 65100;
}




# ------------- Node-specific peers -------------

# No node-specific peers configured.

上一篇     下一篇