Skip to content

Commit e813778

Browse files
SpCombkke
authored andcommitted
Fix agent to unregister LB service backends earlier during container shutdown (kontena#3287)
* agent ServicePodWorker#on_container_die(exit_code: ...) * log service:instance_exit event on container die * add e2e specs for shutdown of lb service backends * agent: unregister container from lb service on container kill event * add testcase for service without healthchecks * change event type to service:instance_crash * add affirmative-failure spec using the system default SIGTERM handler * fixup instance_crash * use eq for better diff * switch to POST requests to avoid retries on read timeouts * tweak timings to make requests more busy * fix !TRAP case to actually hard-exit! on SIGTERM * tweak spec name * show service logs in case of crashes
1 parent 5d36625 commit e813778

File tree

8 files changed

+307
-1
lines changed

8 files changed

+307
-1
lines changed

agent/lib/kontena/load_balancers/registrator.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def on_container_event(topic, event)
4141
if container && container.service_container? && container.load_balanced?
4242
self.register_container(container)
4343
end
44-
elsif event.status == 'die'
44+
elsif event.status == 'die' || event.status == 'kill'
4545
self.unregister_container(event.id)
4646
end
4747
rescue => exc
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
require 'net/http'
2+
3+
describe 'kontena service shutdown' do
4+
include ServiceHelper
5+
6+
context 'for a http test service' do
7+
before(:all) do
8+
with_fixture_dir('stack/shutdown-test') do
9+
run! 'kontena stack build --no-push'
10+
end
11+
end
12+
13+
context "using ungraceful shutdown to fail requests" do
14+
before(:all) do
15+
with_fixture_dir('stack/shutdown-test') do
16+
run! "kontena stack install -n shutdown-test-fail -v trap=false"
17+
end
18+
end
19+
after(:all) do
20+
run! 'kontena stack rm --force shutdown-test-fail'
21+
end
22+
23+
describe "after re-deploying the server" do
24+
before do
25+
run! 'kontena service deploy --force shutdown-test-fail/server'
26+
end
27+
28+
it "will result in client errors" do
29+
events = service_events('shutdown-test-fail/client')
30+
31+
expect(events.select{|e| e[:type] == 'instance_crash'}).to_not eq([]), service_logs('shutdown-test-fail/client').join
32+
end
33+
end
34+
end
35+
36+
context "using graceful shutdown to close the listening socket" do
37+
before(:all) do
38+
with_fixture_dir('stack/shutdown-test') do
39+
run! "kontena stack install -n shutdown-test-graceful -v graceful_shutdown=true"
40+
end
41+
end
42+
after(:all) do
43+
run! 'kontena stack rm --force shutdown-test-graceful'
44+
end
45+
46+
describe "after re-deploying the server" do
47+
before do
48+
run! 'kontena service deploy --force shutdown-test-graceful/server'
49+
end
50+
51+
it "does not result in any client errors" do
52+
events = service_events('shutdown-test-graceful/client')
53+
54+
expect(events.select{|e| e[:type] == 'instance_crash'}).to eq([]), service_logs('shutdown-test-graceful/client').join
55+
end
56+
end
57+
end
58+
59+
context "using graceful shutdown without a healthcheck" do
60+
before(:all) do
61+
with_fixture_dir('stack/shutdown-test') do
62+
run! "kontena stack install -n shutdown-test-nohealthcheck -v graceful_shutdown=true -v healthcheck=false"
63+
end
64+
end
65+
after(:all) do
66+
run! 'kontena stack rm --force shutdown-test-nohealthcheck'
67+
end
68+
69+
describe "after re-deploying the server" do
70+
before do
71+
run! 'kontena service deploy --force shutdown-test-nohealthcheck/server'
72+
end
73+
74+
it "does not result in any client errors" do
75+
events = service_events('shutdown-test-nohealthcheck/client')
76+
77+
expect(events.select{|e| e[:type] == 'instance_crash'}).to eq([]), service_logs('shutdown-test-nohealthcheck/client').join
78+
end
79+
end
80+
end
81+
82+
context "using delayed shutdown without closing the listening socket" do
83+
before(:all) do
84+
with_fixture_dir('stack/shutdown-test') do
85+
run! "kontena stack install -n shutdown-test-delay -v graceful_shutdown=false"
86+
end
87+
end
88+
after(:all) do
89+
run! 'kontena stack rm --force shutdown-test-delay'
90+
end
91+
92+
describe "after re-deploying the server" do
93+
before do
94+
run! 'kontena service deploy --force shutdown-test-delay/server'
95+
end
96+
97+
it "does not result in any client errors" do
98+
events = service_events('shutdown-test-delay/client')
99+
100+
expect(events.select{|e| e[:type] == 'instance_crash'}).to eq([]), service_logs('shutdown-test-delay/client').join
101+
end
102+
end
103+
end
104+
end
105+
end
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM alpine
2+
3+
RUN apk update && apk --update add ruby
4+
5+
ADD client.rb .
6+
7+
CMD ["ruby", "client.rb"]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM alpine
2+
3+
RUN apk update && apk --update add ruby
4+
5+
ADD server.rb .
6+
7+
CMD ["ruby", "server.rb"]
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
require 'logger'
2+
require 'net/http'
3+
require 'thread'
4+
require 'thwait'
5+
6+
def getenv(name, default = nil, &block)
7+
if (value = ENV[name]) && !value.empty?
8+
value = yield value if block
9+
value
10+
elsif default
11+
default
12+
else
13+
fail "Missing ENV #{name}"
14+
end
15+
end
16+
17+
URL = getenv('URL') { |v| URI(v) }
18+
THREADS = getenv('THREADS', 8) { |v| Integer(v) }
19+
SKEW = getenv('SKEW', 1.0) { |v| Float(v)}
20+
21+
$logger = Logger.new($stderr)
22+
$logger.progname = ARGV[0]
23+
24+
def time
25+
t = Time.now
26+
r = yield
27+
return Time.now - t, r
28+
end
29+
30+
def client_request(url = URL)
31+
Net::HTTP.start(url.hostname, url.port) do |http|
32+
return http.request_post(url.path, '')
33+
end
34+
end
35+
36+
def client_thread(i)
37+
t = Time.now
38+
$logger.info "Start #{i}/#{THREADS}..."
39+
40+
loop do
41+
skew = rand() * SKEW
42+
sleep(skew)
43+
44+
latency, response = time { client_request(URL) }
45+
46+
dt = Time.now - t
47+
t = Time.now
48+
49+
$logger.info "[thread #{'%2d' % i}/#{THREADS}] POST #{URL} => HTTP #{response.code} in #{'%.3fs' % latency} req + #{'%.3fs' % skew} skew + #{'%.3fs' % (dt - skew - latency)} overhead: #{response.body.strip}"
50+
51+
response.value # raises
52+
end
53+
end
54+
55+
threads = (1..THREADS).map { |i|
56+
thread = Thread.new do
57+
begin
58+
client_thread(i)
59+
rescue => exc
60+
$logger.error exc
61+
raise
62+
end
63+
end
64+
}
65+
66+
$logger.info "Waiting for #{threads.size} threads..."
67+
68+
threads_wait = ThreadsWait.new(*threads)
69+
thread = threads_wait.next_wait
70+
thread.value # raises
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
stack: test/shutdown-test
2+
variables:
3+
trap:
4+
type: boolean
5+
default: true
6+
graceful_shutdown:
7+
type: boolean
8+
healthcheck:
9+
type: boolean
10+
services:
11+
lb:
12+
image: kontena/lb
13+
server:
14+
build:
15+
context: .
16+
dockerfile: Dockerfile.server
17+
image: shutdown-test-server
18+
instances: 3
19+
# {% if healthcheck %}
20+
health_check:
21+
protocol: http
22+
port: 8000
23+
uri: "/health?status=200"
24+
initial_delay: 5
25+
# {% endif %}
26+
links:
27+
- lb
28+
environment:
29+
- KONTENA_LB_INTERNAL_PORT=8000
30+
- KONTENA_LB_VIRTUAL_PATH=/
31+
- REQUEST_DELAY=1.0
32+
# {% if trap %}
33+
- TRAP=true
34+
# {% if graceful_shutdown %}
35+
- SHUTDOWN=true
36+
# {% endif %}
37+
# {% endif %}
38+
client:
39+
build:
40+
context: .
41+
dockerfile: Dockerfile.client
42+
image: shutdown-test-client
43+
instances: 3
44+
environment:
45+
- SKEW=0.5
46+
- THREADS=32
47+
- URL=http://lb/
48+
depends_on:
49+
- server
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
require 'logger'
2+
require 'socket'
3+
require 'webrick'
4+
5+
NAME = Socket.gethostname
6+
PORT = 8000
7+
STATUS = 200
8+
SHUTDOWN = !!ENV['SHUTDOWN']
9+
TRAP = !!ENV['TRAP']
10+
REQUEST_DELAY = ENV['REQUEST_DELAY'].to_f
11+
12+
$logger = Logger.new($stderr)
13+
$logger.progname = ARGV[0]
14+
15+
server = WEBrick::HTTPServer.new(
16+
:Port => PORT,
17+
)
18+
server.mount_proc '/' do |req, res|
19+
q = req.query()
20+
21+
status = STATUS
22+
status = q['status'].to_i if q['status']
23+
delay = REQUEST_DELAY
24+
25+
sleep delay
26+
27+
res.status = status
28+
res.body = "Response from #{NAME} (delay=#{'%.3fs' % delay})\n"
29+
end
30+
31+
if TRAP
32+
trap 'TERM' do
33+
if SHUTDOWN
34+
$stderr.puts "shutdown on SIGTERM with #{server.tokens.max - server.tokens.size} active clients"
35+
server.shutdown # closes listeners after stopping
36+
else
37+
$stderr.puts "skip SIGTERM"
38+
end
39+
end
40+
else
41+
# kill the process instead of allowing webrick to handle the SignalException: SIGTERM
42+
trap 'TERM' do
43+
exit!
44+
end
45+
end
46+
47+
$logger.info "start :#{PORT}"
48+
server.start
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
require 'json'
2+
3+
module ServiceHelper
4+
def service_events(service, lines: 1000)
5+
k = run! "kontena service events --lines=#{lines} #{service}"
6+
7+
lines = k.out.lines[1..-1]
8+
lines.map{|l|
9+
time, type, data = l.split(' ', 3)
10+
11+
{time: time, type: type, data: data}
12+
}
13+
end
14+
15+
def service_logs(service, lines: 1000)
16+
k = run! "kontena service logs --lines=#{lines} #{service}"
17+
18+
k.out.lines
19+
end
20+
end

0 commit comments

Comments
 (0)