Skip to content

Commit 5445c28

Browse files
authored
harden even more (#30)
* harden even more * harden even more * fix smoke test image * harden more * harden more * harden more * harden more * try harder
1 parent 0ba46a0 commit 5445c28

File tree

11 files changed

+489
-49
lines changed

11 files changed

+489
-49
lines changed

.github/workflows/ci.yml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@ jobs:
4747
- name: Sync dependencies
4848
run: uv sync --dev
4949

50+
- name: Generate .env and secrets
51+
run: ./scripts/manage.sh create-env --non-interactive --force
52+
53+
- name: Preflight stack bring-up
54+
run: |
55+
set -euo pipefail
56+
cleanup() { ./scripts/manage.sh down >/dev/null 2>&1 || true; }
57+
trap cleanup EXIT
58+
./scripts/manage.sh build-image
59+
./scripts/manage.sh up
60+
5061
- name: Run core_data smoke workflow
5162
run: uv run python -m pytest -k full_workflow
5263

@@ -56,6 +67,17 @@ jobs:
5667
docker ps -a
5768
docker compose logs || true
5869
70+
- name: Collect diagnostics bundle
71+
if: failure()
72+
run: ./scripts/collect_diagnostics.sh --output diagnostics-smoke-${{ matrix.profile_name }}
73+
74+
- name: Upload diagnostics bundle
75+
if: failure()
76+
uses: actions/upload-artifact@v4
77+
with:
78+
name: diagnostics-smoke-${{ matrix.profile_name }}
79+
path: diagnostics-smoke-${{ matrix.profile_name }}
80+
5981
- name: Upload generated backups
6082
if: always()
6183
uses: actions/upload-artifact@v4
@@ -102,6 +124,17 @@ jobs:
102124
- name: Sync dependencies
103125
run: uv sync --dev
104126

127+
- name: Generate .env and secrets
128+
run: ./scripts/manage.sh create-env --non-interactive --force
129+
130+
- name: Preflight stack bring-up
131+
run: |
132+
set -euo pipefail
133+
cleanup() { ./scripts/manage.sh down >/dev/null 2>&1 || true; }
134+
trap cleanup EXIT
135+
./scripts/manage.sh build-image
136+
./scripts/manage.sh up
137+
105138
- name: Run marker tests
106139
run: uv run python -m pytest -m ${{ matrix.marker }}
107140

@@ -111,6 +144,16 @@ jobs:
111144
docker ps -a
112145
docker compose logs || true
113146
147+
- name: Collect diagnostics bundle
148+
if: failure()
149+
run: ./scripts/collect_diagnostics.sh --output diagnostics-marker-${{ matrix.marker }}
150+
151+
- name: Upload diagnostics bundle
152+
if: failure()
153+
uses: actions/upload-artifact@v4
154+
with:
155+
name: diagnostics-marker-${{ matrix.marker }}
156+
path: diagnostics-marker-${{ matrix.marker }}
114157
docker-build:
115158
name: Validate Docker Build
116159
runs-on: ubuntu-latest
@@ -128,10 +171,52 @@ jobs:
128171
file: ./postgres/Dockerfile
129172
platforms: linux/amd64
130173
push: false
174+
load: true
131175
tags: core-data-postgres:test
132176
cache-from: type=gha
133177
cache-to: type=gha,mode=max
134178

179+
- name: Smoke-test Docker image
180+
env:
181+
PGPASSWORD: thinice-test
182+
run: |
183+
set -euo pipefail
184+
cleanup() { docker rm -f postgres-smoke >/dev/null 2>&1 || true; }
185+
trap cleanup EXIT
186+
docker run -d --name postgres-smoke \
187+
-e POSTGRES_USER=thinice-test \
188+
-e POSTGRES_PASSWORD=thinice-test \
189+
-e POSTGRES_DB=thinice-test \
190+
-e CORE_DATA_SKIP_CONFIG_RENDER=1 \
191+
core-data-postgres:test
192+
tries=0
193+
max_tries=150
194+
until docker exec postgres-smoke pg_isready -h localhost -U thinice-test >/dev/null 2>&1; do
195+
tries=$((tries + 1))
196+
if ((tries >= max_tries)); then
197+
echo "[smoke] postgres-smoke never became ready; printing logs."
198+
docker logs postgres-smoke || true
199+
exit 1
200+
fi
201+
sleep 2
202+
done
203+
docker exec postgres-smoke psql -U thinice-test -d thinice-test -c "SELECT 1" >/dev/null
204+
205+
- name: Stop Docker smoke container
206+
if: always()
207+
run: docker rm -f postgres-smoke >/dev/null 2>&1 || true
208+
209+
- name: Collect diagnostics bundle
210+
if: failure()
211+
run: ./scripts/collect_diagnostics.sh --output diagnostics-docker-build
212+
213+
- name: Upload diagnostics bundle
214+
if: failure()
215+
uses: actions/upload-artifact@v4
216+
with:
217+
name: diagnostics-docker-build
218+
path: diagnostics-docker-build
219+
135220
- name: Validate Dockerfile with hadolint
136221
uses: hadolint/[email protected]
137222
with:

docker-compose.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,11 @@ services:
217217
networks:
218218
- core_data
219219
healthcheck:
220-
test: ["CMD-SHELL", "pg_isready -h $${POSTGRES_HOST:-postgres} -U $${POSTGRES_SUPERUSER:-postgres}"]
220+
test:
221+
[
222+
"CMD-SHELL",
223+
"PGPASSWORD=$(cat /run/secrets/postgres_superuser_password 2>/dev/null) pg_isready -h $${POSTGRES_HOST:-postgres} -U $${POSTGRES_SUPERUSER:-postgres} -d $${POSTGRES_DB:-postgres}",
224+
]
221225
interval: 30s
222226
timeout: 5s
223227
retries: 5

pgbouncer/entrypoint.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,36 @@ export POSTGRES_HOST=${POSTGRES_HOST:-postgres}
1818
export POSTGRES_PORT=${POSTGRES_PORT:-5432}
1919
export PGBOUNCER_AUTH_USER=${PGBOUNCER_AUTH_USER:-pgbouncer_auth}
2020

21+
wait_for_backend() {
22+
local attempts=${PGBOUNCER_BACKEND_WAIT_ATTEMPTS:-120}
23+
local delay=2
24+
local attempt=1
25+
while ((attempt <= attempts)); do
26+
if command -v pg_isready >/dev/null 2>&1; then
27+
if pg_isready -h "${POSTGRES_HOST}" -p "${POSTGRES_PORT}" >/dev/null 2>&1; then
28+
return 0
29+
fi
30+
elif command -v nc >/dev/null 2>&1; then
31+
if nc -z "${POSTGRES_HOST}" "${POSTGRES_PORT}" >/dev/null 2>&1; then
32+
return 0
33+
fi
34+
else
35+
if bash -c "exec 3<>/dev/tcp/${POSTGRES_HOST}/${POSTGRES_PORT}" >/dev/null 2>&1; then
36+
exec 3>&-
37+
return 0
38+
fi
39+
fi
40+
echo "[pgbouncer] waiting for PostgreSQL at ${POSTGRES_HOST}:${POSTGRES_PORT} (attempt ${attempt})" >&2
41+
sleep "${delay}"
42+
if ((attempt % 10 == 0 && delay < 10)); then
43+
delay=$((delay + 1))
44+
fi
45+
attempt=$((attempt + 1))
46+
done
47+
echo "[pgbouncer] timed out waiting for PostgreSQL at ${POSTGRES_HOST}:${POSTGRES_PORT}" >&2
48+
exit 1
49+
}
50+
2151
NETWORK_ACCESS_DIR=${NETWORK_ACCESS_DIR:-/opt/core_data/network_access}
2252
NETWORK_ALLOW_FILE=${NETWORK_ALLOW_FILE:-${NETWORK_ACCESS_DIR}/allow.list}
2353

@@ -53,6 +83,8 @@ fi
5383
mkdir -p "${log_dir}" "${run_dir}" "$(dirname "${config_path}")" "$(dirname "${userlist_path}")" "$(dirname "${hba_path}")"
5484
umask 077
5585

86+
wait_for_backend
87+
5688
auth_hba_config=""
5789
if [[ -r "${NETWORK_ALLOW_FILE}" ]]; then
5890
{

postgres/initdb/00-render-config.sh

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
set -euo pipefail
66

7+
if [[ "${CORE_DATA_SKIP_CONFIG_RENDER:-0}" == "1" ]]; then
8+
exit 0
9+
fi
10+
711
TEMPLATE_DIR="/opt/core_data/conf"
812
SENTINEL="${PGDATA}/.core_data_config_rendered"
913
PGBACKREST_CONF_PATH="${PGDATA}/pgbackrest.conf"
@@ -81,19 +85,30 @@ export \
8185

8286
mkdir -p "${PGDATA}"
8387

88+
first_render=1
8489
if [[ -f "${SENTINEL}" ]]; then
90+
first_render=0
8591
if [[ "${FORCE_RENDER_CONFIG}" != "1" ]]; then
8692
echo "[core_data] Configuration already rendered; refreshing network allow entries." >&2
8793
apply_network_allow_entries
88-
if ! pg_ctl -D "${PGDATA}" reload >/dev/null 2>&1; then
89-
echo "[core_data] WARNING: pg_ctl reload failed while refreshing network allow entries." >&2
94+
if pg_ctl -D "${PGDATA}" status >/dev/null 2>&1; then
95+
if ! pg_ctl -D "${PGDATA}" reload >/dev/null 2>&1; then
96+
echo "[core_data] WARNING: pg_ctl reload failed while refreshing network allow entries." >&2
97+
fi
9098
fi
9199
exit 0
92100
fi
93101
echo "[core_data] FORCE_RENDER_CONFIG=1 set; re-rendering templates." >&2
94102
rm -f "${SENTINEL}"
95103
fi
96104

105+
if [[ "${first_render}" -eq 1 ]]; then
106+
if pg_ctl -D "${PGDATA}" status >/dev/null 2>&1; then
107+
echo "[core_data] Stopping PostgreSQL before initial configuration render." >&2
108+
pg_ctl -D "${PGDATA}" -m fast -w stop >/dev/null 2>&1 || true
109+
fi
110+
fi
111+
97112
if [[ "${POSTGRES_SSL_ENABLED}" == "on" ]]; then
98113
CERT_DIR=$(dirname "${POSTGRES_SSL_CERT_FILE}")
99114
KEY_DIR=$(dirname "${POSTGRES_SSL_KEY_FILE}")
@@ -161,8 +176,14 @@ CONF
161176

162177
echo "[core_data] Rendered PostgreSQL configs and pgBackRest configuration." >&2
163178

164-
pg_ctl -D "${PGDATA}" -m fast -w restart >/dev/null 2>&1 || {
165-
echo "[core_data] WARNING: pg_ctl restart failed during initialization." >&2
166-
}
179+
if [[ "${first_render}" -eq 1 ]]; then
180+
if ! pg_ctl -D "${PGDATA}" -w start >/dev/null 2>&1; then
181+
echo "[core_data] WARNING: pg_ctl start failed during initial configuration." >&2
182+
fi
183+
elif pg_ctl -D "${PGDATA}" status >/dev/null 2>&1; then
184+
if ! pg_ctl -D "${PGDATA}" -m fast -w restart >/dev/null 2>&1; then
185+
echo "[core_data] WARNING: pg_ctl restart failed during configuration refresh." >&2
186+
fi
187+
fi
167188

168189
touch "${SENTINEL}"

postgres/initdb/02-enable-extensions.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44

55
set -euo pipefail
66

7+
if [[ "${CORE_DATA_SKIP_CONFIG_RENDER:-0}" == "1" ]]; then
8+
exit 0
9+
fi
10+
11+
BOOTSTRAP_SENTINEL=${CORE_DATA_BOOTSTRAP_SENTINEL:-${PGDATA}/.core_data_bootstrap_complete}
12+
713
if [[ -z "${POSTGRES_PASSWORD:-}" && -n "${POSTGRES_PASSWORD_FILE:-}" && -r "${POSTGRES_PASSWORD_FILE}" ]]; then
814
POSTGRES_PASSWORD=$(<"${POSTGRES_PASSWORD_FILE}")
915
fi
@@ -121,3 +127,5 @@ SQL
121127

122128
# Ensure template1 ships with extensions and helper functions so new databases inherit them.
123129
configure_database "template1"
130+
131+
touch "${BOOTSTRAP_SENTINEL}"

scripts/collect_diagnostics.sh

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/usr/bin/env bash
2+
# SPDX-FileCopyrightText: 2025 Blackcat Informatics® Inc.
3+
# SPDX-License-Identifier: MIT
4+
5+
set -euo pipefail
6+
7+
output_dir="diagnostics"
8+
logs_tail=${CORE_DATA_DIAG_LOG_TAIL:-400}
9+
10+
usage() {
11+
cat <<'USAGE'
12+
Usage: collect_diagnostics.sh [--output DIR]
13+
14+
Gather docker/container state, sanitized env vars, and service logs to help debug CI failures.
15+
USAGE
16+
}
17+
18+
while [[ $# -gt 0 ]]; do
19+
case "$1" in
20+
--output)
21+
output_dir=$2
22+
shift 2
23+
;;
24+
-h | --help)
25+
usage
26+
exit 0
27+
;;
28+
*)
29+
echo "[diagnostics] Unknown argument: $1" >&2
30+
exit 1
31+
;;
32+
esac
33+
done
34+
35+
mkdir -p "${output_dir}"
36+
timestamp=$(date -Iseconds)
37+
38+
run_cmd() {
39+
local name=$1
40+
shift
41+
if command -v "$1" >/dev/null 2>&1; then
42+
"$@" >"${output_dir}/${name}.txt" 2>&1 || true
43+
fi
44+
}
45+
46+
run_cmd "docker-ps" docker ps -a
47+
run_cmd "docker-compose-ls" docker compose ls
48+
run_cmd "docker-network-ls" docker network ls
49+
50+
sanitize_env() {
51+
local source_env=$1
52+
local dest=$2
53+
if [[ ! -f "${source_env}" ]]; then
54+
return
55+
fi
56+
python3 - "$source_env" "$dest" <<'PY'
57+
import os
58+
import re
59+
import sys
60+
source, dest = sys.argv[1], sys.argv[2]
61+
pattern = re.compile(r"(PASSWORD|SECRET|TOKEN|KEY|COOKIE)", re.IGNORECASE)
62+
with open(source, "r", encoding="utf-8") as fh, open(dest, "w", encoding="utf-8") as out:
63+
for line in fh:
64+
if "=" in line and not line.lstrip().startswith("#"):
65+
key, val = line.rstrip("\n").split("=", 1)
66+
if pattern.search(key):
67+
line = f"{key}=<redacted>\n"
68+
out.write(line)
69+
PY
70+
}
71+
72+
sanitize_env "${ENV_FILE:-${PWD}/.env}" "${output_dir}/env.redacted"
73+
74+
collect_container_artifacts() {
75+
local container=$1
76+
local safe_name=${container//\//_}
77+
docker inspect "${container}" >"${output_dir}/${safe_name}--inspect.json" 2>/dev/null || true
78+
docker logs --tail "${logs_tail}" "${container}" >"${output_dir}/${safe_name}--logs.txt" 2>&1 || true
79+
}
80+
81+
containers=$(docker ps -a --format '{{.Names}}' 2>/dev/null | grep -E 'core_data|postgres' || true)
82+
for name in ${containers}; do
83+
collect_container_artifacts "${name}"
84+
done
85+
86+
echo "[diagnostics] Wrote troubleshooting bundle to ${output_dir} (${timestamp})."

0 commit comments

Comments
 (0)