-
-
Notifications
You must be signed in to change notification settings - Fork 123
Expand file tree
/
Copy pathsetup
More file actions
executable file
·439 lines (391 loc) · 19 KB
/
Copy pathsetup
File metadata and controls
executable file
·439 lines (391 loc) · 19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
#!/bin/sh
LABNAME="l3-hyperv"
cd "$(dirname "$(readlink -f "$0")")"
. ../common/lab-setup
spawn vm HV1 networks 8,9,1,2,11,22
spawn vm VM1 network 1,11
spawn vm VM2 network 2,22
spawn vm HV2 networks 8,9,3,33
spawn vm VM3 network 3,33
spawn vm HV3 networks 8,9,4,44
spawn vm VM4 network 4,44
spawn vm internet networks 8,9
spawn vm RR1 network 8
# Choose one of the following:
spawn vm RR2 network 9 # Route reflector using BIRD
# spawn juniper-vrr RR3 networks 9 # Route reflector using Juniper vRR
# spawn juniper-vrr RR6 networks 9 # Route reflector using Juniper vRR (w/o logical systems)
# spawn vm RR4 network 9 # Route reflector using Quagga
# spawn vm RR5 network 9 # Route reflector using GoBGP
# spawn cumulus-vx RR7 network 9 # Route reflector using Cumulus VX
run
mount --bind rt_tables /etc/iproute2/rt_tables
case $uts in
HV*)
# We add some VLAN. We ensure socket priority 6 and 7 (~
# CS6/CS7) is mapped to IEEE 802.1p IC/NC. Many equipments
# have default CoS settings for IEEE 802.1p while they don't
# consider DS-field when BIRD does the right thing for BFD if
# you apply commit d6cf99615130.
ip link add name eth0.private link eth0 type vlan id 20 egress-qos-map 6:6 7:7 ingress-qos-map 6:6 7:7
ip link add name eth1.private link eth1 type vlan id 20 egress-qos-map 6:6 7:7 ingress-qos-map 6:6 7:7
ip link add name eth0.public link eth0 type vlan id 90 egress-qos-map 6:6 7:7 ingress-qos-map 6:6 7:7
ip link add name eth1.public link eth1 type vlan id 90 egress-qos-map 6:6 7:7 ingress-qos-map 6:6 7:7
ip link set up dev eth0.private
ip link set up dev eth1.private
ip link set up dev eth0.public
ip link set up dev eth1.public
case $uts in
HV1)
vmifaces="eth2 eth3"
brifaces="eth4 eth5"
;;
*)
vmifaces="eth2"
brifaces="eth3"
;;
esac
for iface in $vmifaces; do
ip link set up dev $iface
done
# Fake router IP for VM
for vmiface in $vmifaces; do
ip addr add 203.0.113.254/32 dev $vmiface scope link
# For IPv6, no special address is needed. A link-local
# gateway can also be used. fe80::1 cannot collide
# with an EUI-64-generated IP address as it doesn't
# have FF:FE bytes.
ip -6 addr add fe80::1/128 dev $vmiface scope link
done
# IP used for ECMP routing
ip addr add 198.51.100.10${uts#HV}/25 dev eth0.public
ip addr add 198.51.100.20${uts#HV}/25 dev eth1.public
ip addr add 172.22.15.10${uts#HV}/25 dev eth0.private
ip addr add 172.22.15.20${uts#HV}/25 dev eth1.private
ip -6 addr add 2001:db8:c633:6400::${uts#HV}/120 dev eth0.public
ip -6 addr add 2001:db8:c633:6401::${uts#HV}/120 dev eth1.public
# Blackhole default route for private, public and local-out
ip route add blackhole default metric 4294967294 table public
ip route add blackhole default metric 4294967294 table private
ip route add unreachable default metric 4294967294 table local-out
ip -6 route add blackhole default metric 4294967294 table public
# Own IP for private purpose (not really used)
ip addr add 172.22.2.${uts#HV}/32 dev dummy0
ip route add 172.22.2.${uts#HV}/32 dev dummy0 table private
# Some firewall rules as a second level of defense. We could
# go stateless if we needed to. Another line of defense would
# be to use containers to get distinct domain of
# routing. However, it would mean for the orchestrator to also
# be able to work with containers.
cat <<EOF'' > /tmp/tools/ip46tables
#!/bin/sh
iptables "$@" && ip6tables "$@"
EOF
chmod +x /tmp/tools/ip46tables
# Safety net: enforce routing rules
for vmiface in $vmifaces; do
for iface in eth0.public eth1.public; do
ip46tables -A FORWARD -i $vmiface -o $iface -j ACCEPT
ip46tables -A FORWARD -i $iface -o $vmiface -j ACCEPT
ip46tables -A FORWARD -i $iface -o eth0.public -j ACCEPT
ip46tables -A FORWARD -i $iface -o eth1.public -j ACCEPT
done
for iface in $vmifaces; do
ip46tables -A FORWARD -i $vmiface -o $iface -j ACCEPT
done
done
ip46tables -A FORWARD -m limit --limit 5/s --limit-burst 5 \
-j LOG --log-prefix "NF: routing evasion: " --log-level warning
ip46tables -A FORWARD -j DROP
# IPv4 TTL: routing is a bit too complex to let Linux guess
# sensible source address for TTL exceeded packets (we assume
# there is no proble with IPv6). Use one of those options:
# 1. Use interface address
sysctl -qw net.ipv4.icmp_errors_use_inbound_ifaddr=1
# 2. Drop packets with too low TTL
# iptables -t mangle -A PREROUTING -m ttl --ttl-eq 1 -j DROP
# 3. Don't decrease TTL (dangerous)
# iptables -t mangle -A PREROUTING -m ttl --ttl-eq 1 -j TTL --set-ttl 2
# RP filtering. We use Netfilter for IPv6 (3.3+) and rp_filter for IPv4 (it also filters ARP).
ip6tables -t raw -N RPFILTER
ip6tables -t raw -A RPFILTER -m rpfilter -j RETURN
ip6tables -t raw -A RPFILTER -m rpfilter --accept-local -m addrtype --dst-type MULTICAST -j DROP
ip6tables -t raw -A RPFILTER -m limit --limit 5/s --limit-burst 5 \
-j LOG --log-prefix "NF: rpfilter: " --log-level warning
ip6tables -t raw -A RPFILTER -j DROP
sysctl -qw net.ipv4.conf.all.rp_filter=0
for vmiface in $vmifaces; do
ip6tables -t raw -A PREROUTING -i $vmiface -j RPFILTER
sysctl -qw net.ipv4.conf.$vmiface.rp_filter=1
done
# VM could access BGP/BFD. There is TTL security, but better safe than sorry.
ip46tables -N BGP-BFD
ip46tables -A INPUT -p tcp --dport bgp -j BGP-BFD
ip46tables -A INPUT -p udp --dport 3784 -j BGP-BFD
iptables -A INPUT -p udp --dport 4789 -s 172.22.2.0/24 -m ttl --ttl-eq 255 -j ACCEPT
iptables -A BGP-BFD -s 198.51.100.0/24 -m ttl --ttl-eq 255 -j ACCEPT
iptables -A BGP-BFD -s 172.22.15.0/24 -m ttl --ttl-eq 255 -j ACCEPT
ip6tables -A BGP-BFD -s 2001:db8:c633:6400::/63 -m hl --hl-eq 255 -j ACCEPT
ip46tables -A BGP-BFD -m limit --limit 5/s --limit-burst 5 \
-j LOG --log-prefix "NF: BGP/BFD: " --log-level warning
ip46tables -A BGP-BFD -j DROP
# If your conntrack table is full, BFD packets will be
# dropped. Not useful. Let's ensure this doesn't happen.
ip46tables -t raw -A PREROUTING -p udp --dport 3784 -m addrtype --dst-type LOCAL -j CT --notrack
ip46tables -t raw -A OUTPUT -p udp --dport 3784 -m addrtype --src-type LOCAL -j CT --notrack
# When virnet-host is enabled, checksums of locally generated
# UDP packets to vnet interfaces are not computed. This should
# be handled by remote userspace when using AF_PACKET (notably
# dhclient), but on older versions, it is not. See:
# https://www.spinics.net/lists/kvm/msg37660.html
# https://lwn.net/Articles/396466/
for vmiface in $vmifaces; do
iptables -t mangle -A OUTPUT -o $vmiface -p udp --dport 68 -j CHECKSUM --checksum-fill
done
# As a router, we should not accept redirects: enabling
# forwarding should disable redirect acceptance. However,
# better safe than sorry.
#
# accept_redirects is "AND" when forwarding is enabled
sysctl -qw net.ipv4.conf.all.accept_redirects=0
sysctl -qw net.ipv6.conf.all.accept_redirects=0
# We shouldn't need to generate them either but there are some
# situations where we will (by configuring the wrong route on
# an interface). This should be harmless as the host will then
# try to do an ARP request which will be answered by the
# hypervisor. However, we don't want to trigger some edge case
# where an OS is confused by the bogus redirect.
#
# send_redirects is "OR"
sysctl -qw net.ipv4.conf.all.send_redirects=0
sysctl -qw net.ipv4.conf.default.send_redirects=0
sysctl -qw net.ipv4.conf.eth0/public.send_redirects=0
sysctl -qw net.ipv4.conf.eth1/public.send_redirects=0
for vmiface in $vmifaces; do
sysctl -qw net.ipv4.conf.$vmiface.send_redirects=0
# Unfortunately, not possible to disable that for IPv6.
ip6tables -A OUTPUT -o $vmiface -p ipv6-icmp --icmpv6-type redirect -j DROP
done
# We do ARP proxying
for vmiface in $vmifaces; do
sysctl -qw net.ipv4.conf.$vmiface.proxy_arp=1
sysctl -qw net.ipv4.neigh.$vmiface.proxy_delay=0
done
# Local lookups
ip rule add iif lo lookup main priority 21
ip rule add iif lo lookup local-out priority 22 # mostly local-out = private + public
# Private lookups
ip rule add iif eth0.private lookup private priority 26
ip rule add iif eth1.private lookup private priority 28
# Public lookups (the remaining)
ip rule add lookup public priority 99
# Remove other rules
ip rule del lookup main priority 32766
ip rule del lookup default priority 32767
# For IPv6, we don't have private stuff. So, simpler
# rules. Also, route lookup is a bit different in IPv6. Routes
# to next-hop need to be looked up as well.
ip -6 rule add lookup main priority 51
ip -6 rule add lookup public priority 53 # Last: has a default
ip -6 rule del lookup main priority 32766
# Add routes to VM. Of course, it should be done by some kind
# of automatic registration process. Note the use of "onlink"
# for "via" routes. This is needed for BIRD to advertise the
# address.
case ${uts#HV} in
1)
# Use a /32. If you need a /24, look at
# "noprefixroute" and commit
# f311a9e12540f45db7a497ba9561484bd2b04fa5
ip route add 203.0.113.1/32 dev eth2 table public metric 10
ip route add 203.0.113.10/32 \
via 203.0.113.1 dev eth2 onlink \
table public metric 10 # Anycast
ip route add 203.0.113.2/32 dev eth3 table public metric 10
ip -6 route add 2001:db8:cb00:7100::/64 \
via fe80::5254:33ff:fe00:7 dev eth2 \
table public metric 10
ip -6 route add 2001:db8:cb00:710a::/64 \
via fe80::5254:33ff:fe00:7 dev eth2 \
table public metric 10 # Anycast
ip -6 route add 2001:db8:cb00:7101::/64 \
via fe80::5254:33ff:fe00:9 dev eth3 \
table public metric 10
;;
2)
ip route add 203.0.113.3/32 dev eth2 table public metric 10
ip route add 203.0.113.10/32 \
via 203.0.113.3 dev eth2 onlink \
table public metric 10 # Anycast
ip -6 route add 2001:db8:cb00:7102::/64 \
via fe80::5254:33ff:fe00:f dev eth2 \
table public metric 10
ip -6 route add 2001:db8:cb00:710a::/64 \
via fe80::5254:33ff:fe00:f dev eth2 \
table public metric 10 # Anycast
;;
3)
ip route add 203.0.113.4/32 dev eth2 table public metric 10
ip -6 route add 2001:db8:cb00:7103::/64 \
via fe80::5254:33ff:fe00:15 dev eth2 \
table public metric 10
# We also configure an additional IP that will be
# NATed to the main IP. We keep the route to makes
# RP filtering works.
ip route add 203.0.113.11/32 dev eth2 table public metric 10
iptables -t nat -I PREROUTING -d 203.0.113.11 \
-j DNAT --to-destination 203.0.113.4
# This should also work with IPv6 but as each host
# has a /64, this makes less sense.
;;
esac
# We are a router. As a safety net, only enable forwarding on
# a selection of interfaces. default and all are by default at
# 0. all is special as it will also enable forwarding on all
# individual interfaces, including default.
for iface in $vmifaces eth0/public eth1/public eth0/private eth1/private; do
sysctl -qw net.ipv4.conf.$iface.forwarding=1
done
# For some reason, IPv6 doesn't work like IPv4 and global
# forwarding needs to be enabled.
sysctl -qw net.ipv6.conf.all.forwarding=1
# IPv6 route cache sizing is low (4096). However, it is only
# used for PMTU exceptions (starting from Linux 4.2). You can
# monitor its current value by looking at the before-last
# value in /proc/net/rt6_stats. If it reaches the limit in
# normal use, consider increasing the route cache size.
# sysctl -qw net.ipv6.route.max_size=65536
service bird
case $uts in
HV1) i=0 ;;
HV2) i=2 ;;
HV3) i=3 ;;
esac
for vmiface in $vmifaces; do
sed \
-e s/IFACE/$vmiface/ radvd.HV.conf \
-e s/@/$i/ >> /tmp/radvd.conf
i=$((i+1))
done
service radvd ../../tmp/radvd.conf
service dnsmasq \
--no-ping \
--no-resolv \
--port 0 \
--dhcp-authoritative \
--dhcp-range=tag:known,203.0.113.0,static,255.255.255.0,infinite \
--dhcp-host=50:54:33:00:00:07,203.0.113.1,VM1 \
--dhcp-host=50:54:33:00:00:09,203.0.113.2,VM2 \
--dhcp-host=50:54:33:00:00:0f,203.0.113.3,VM3 \
--dhcp-host=50:54:33:00:00:15,203.0.113.4,VM4 \
--dhcp-option=option:router,203.0.113.254
brctl addbr br1
brctl stp br1 off
for iface in $brifaces; do
brctl addif br1 $iface
done
ip link add vx1 type vxlan \
dstport 4789 \
id 100 \
ttl 255 \
dev dummy0 \
local 172.22.2.${uts#HV}
brctl addif br1 vx1
for i in $(seq 1 3); do
[ $i -ne ${uts#HV} ] || continue
bridge fdb append 00:00:00:00:00:00 dev vx1 dst 172.22.2.$i
done
# Disable any local traffic on the bridge.
# See: https://vincent.bernat.ch/en/blog/2017-linux-bridge-isolation
echo 1 > /sys/class/net/br1/bridge/vlan_filtering
bridge vlan del dev br1 vid 1 self
# Mostly cosmetic: disable IPv6 on all related interfaces to
# remove any link-local addresses.
sysctl -qw net.ipv6.conf.br1.disable_ipv6=1
sysctl -qw net.ipv6.conf.vx1.disable_ipv6=1
for iface in $brifaces; do
sysctl -qw net.ipv6.conf.$iface.disable_ipv6=1
done
ip link set up dev br1
ip link set up dev vx1
;;
RR*)
ip link add name eth0.public link eth0 type vlan id 90 egress-qos-map 6:6 7:7 ingress-qos-map 6:6 7:7
ip link add name eth0.private link eth0 type vlan id 20 egress-qos-map 6:6 7:7 ingress-qos-map 6:6 7:7
ip link set up dev eth0.public
ip link set up dev eth0.private
case $uts in
RR1) n=1 ;;
*) n=2 ;;
esac
ip addr add 198.51.100.${n}26/25 dev eth0.public
ip addr add 172.22.15.${n}26/25 dev eth0.private
ip -6 addr add 2001:db8:c633:640$((${n} - 1))::26/120 dev eth0.public
case $uts in
RR1|RR2) service bird ;;
RR4) service quagga ;;
RR5)
log_begin_msg "Start gobgpd"
# Assuming GoBGP 1.22 (with proper TTL security support)
gobgpd -t yaml -f /mnt/lab/gobgpd.public-ipv4.yaml \
--syslog yes --disable-stdlog --pprof-disable --api-hosts 127.0.0.1:50051 &
gobgpd -t yaml -f /mnt/lab/gobgpd.public-ipv6.yaml \
--syslog yes --disable-stdlog --pprof-disable --api-hosts 127.0.0.1:50052 &
gobgpd -t yaml -f /mnt/lab/gobgpd.private-ipv4.yaml \
--syslog yes --disable-stdlog --pprof-disable --api-hosts 127.0.0.1:50053 &
sleep 1
log_ok_msg "gobgpd started"
;;
esac
;;
VM*)
# Classic DHCP request for IPv4
[ ! -d /etc/dhcp ] || mount -n -t tmpfs tmpfs /etc/dhcp -o rw
service dhclient -v -4 -1 eth0 -lf /tmp/dhcp.leases -cf /dev/null
# For IPv6, they will get RA
# Additional IPs
case $uts in
VM1|VM3)
ip addr add 203.0.113.10/32 dev lo
ip -6 addr add 2001:db8:cb00:710a::1/128 dev lo
;;
esac
# Private IP in overlay network
ip link set up dev eth1
ip addr add 192.168.5.${uts#VM}/24 dev eth1
# VM3 will use temporary address
case $uts in
VM3)
sysctl -qw net.ipv6.conf.eth0.use_tempaddr=2
;;
esac
;;
internet)
ip link add name eth0.public link eth0 type vlan id 90 egress-qos-map 6:6 7:7 ingress-qos-map 6:6 7:7
ip link add name eth1.public link eth1 type vlan id 90 egress-qos-map 6:6 7:7 ingress-qos-map 6:6 7:7
ip link set up dev eth0.public
ip link set up dev eth1.public
# For ECMP routing
ip addr add 198.51.100.1/25 dev eth0.public
ip addr add 198.51.100.254/25 dev eth1.public
ip -6 addr add 2001:db8:c633:6400::ff/120 dev eth0.public
ip -6 addr add 2001:db8:c633:6401::ff/120 dev eth1.public
# Routing
ip rule add lookup main priority 51
ip rule add lookup public priority 53 # Last: has a default
ip rule del lookup main priority 32766
ip rule del lookup default priority 32767
ip -6 rule add lookup main priority 51
ip -6 rule add lookup public priority 53 # Last: has a default
ip -6 rule del lookup main priority 32766
# We are a router
sysctl -qw net.ipv4.ip_forward=1
sysctl -qw net.ipv6.conf.all.forwarding=1
service bird
# "Internet"
ip addr add 8.8.8.8/32 dev dummy0
ip -6 addr add 2001:4860:4860::8888/128 dev dummy0
;;
esac
sysctl -qw net.ipv4.conf.all.arp_announce=2
service nginx