diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 91419dc..f6c6a27 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,17 @@ jobs: - name: Sync dependencies run: uv sync --dev + - name: Generate .env and secrets + run: ./scripts/manage.sh create-env --non-interactive --force + + - name: Preflight stack bring-up + run: | + set -euo pipefail + cleanup() { ./scripts/manage.sh down >/dev/null 2>&1 || true; } + trap cleanup EXIT + ./scripts/manage.sh build-image + ./scripts/manage.sh up + - name: Run core_data smoke workflow run: uv run python -m pytest -k full_workflow @@ -56,6 +67,17 @@ jobs: docker ps -a docker compose logs || true + - name: Collect diagnostics bundle + if: failure() + run: ./scripts/collect_diagnostics.sh --output diagnostics-smoke-${{ matrix.profile_name }} + + - name: Upload diagnostics bundle + if: failure() + uses: actions/upload-artifact@v4 + with: + name: diagnostics-smoke-${{ matrix.profile_name }} + path: diagnostics-smoke-${{ matrix.profile_name }} + - name: Upload generated backups if: always() uses: actions/upload-artifact@v4 @@ -102,6 +124,17 @@ jobs: - name: Sync dependencies run: uv sync --dev + - name: Generate .env and secrets + run: ./scripts/manage.sh create-env --non-interactive --force + + - name: Preflight stack bring-up + run: | + set -euo pipefail + cleanup() { ./scripts/manage.sh down >/dev/null 2>&1 || true; } + trap cleanup EXIT + ./scripts/manage.sh build-image + ./scripts/manage.sh up + - name: Run marker tests run: uv run python -m pytest -m ${{ matrix.marker }} @@ -111,6 +144,16 @@ jobs: docker ps -a docker compose logs || true + - name: Collect diagnostics bundle + if: failure() + run: ./scripts/collect_diagnostics.sh --output diagnostics-marker-${{ matrix.marker }} + + - name: Upload diagnostics bundle + if: failure() + uses: actions/upload-artifact@v4 + with: + name: diagnostics-marker-${{ matrix.marker }} + path: diagnostics-marker-${{ matrix.marker }} docker-build: name: Validate Docker Build runs-on: ubuntu-latest @@ -128,10 +171,52 @@ jobs: file: ./postgres/Dockerfile platforms: linux/amd64 push: false + load: true tags: core-data-postgres:test cache-from: type=gha cache-to: type=gha,mode=max + - name: Smoke-test Docker image + env: + PGPASSWORD: thinice-test + run: | + set -euo pipefail + cleanup() { docker rm -f postgres-smoke >/dev/null 2>&1 || true; } + trap cleanup EXIT + docker run -d --name postgres-smoke \ + -e POSTGRES_USER=thinice-test \ + -e POSTGRES_PASSWORD=thinice-test \ + -e POSTGRES_DB=thinice-test \ + -e CORE_DATA_SKIP_CONFIG_RENDER=1 \ + core-data-postgres:test + tries=0 + max_tries=150 + until docker exec postgres-smoke pg_isready -h localhost -U thinice-test >/dev/null 2>&1; do + tries=$((tries + 1)) + if ((tries >= max_tries)); then + echo "[smoke] postgres-smoke never became ready; printing logs." + docker logs postgres-smoke || true + exit 1 + fi + sleep 2 + done + docker exec postgres-smoke psql -U thinice-test -d thinice-test -c "SELECT 1" >/dev/null + + - name: Stop Docker smoke container + if: always() + run: docker rm -f postgres-smoke >/dev/null 2>&1 || true + + - name: Collect diagnostics bundle + if: failure() + run: ./scripts/collect_diagnostics.sh --output diagnostics-docker-build + + - name: Upload diagnostics bundle + if: failure() + uses: actions/upload-artifact@v4 + with: + name: diagnostics-docker-build + path: diagnostics-docker-build + - name: Validate Dockerfile with hadolint uses: hadolint/hadolint-action@v3.1.0 with: diff --git a/docker-compose.yml b/docker-compose.yml index 80b9c58..3ceabab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -217,7 +217,11 @@ services: networks: - core_data healthcheck: - test: ["CMD-SHELL", "pg_isready -h $${POSTGRES_HOST:-postgres} -U $${POSTGRES_SUPERUSER:-postgres}"] + test: + [ + "CMD-SHELL", + "PGPASSWORD=$(cat /run/secrets/postgres_superuser_password 2>/dev/null) pg_isready -h $${POSTGRES_HOST:-postgres} -U $${POSTGRES_SUPERUSER:-postgres} -d $${POSTGRES_DB:-postgres}", + ] interval: 30s timeout: 5s retries: 5 diff --git a/pgbouncer/entrypoint.sh b/pgbouncer/entrypoint.sh index db110e9..5f2af63 100755 --- a/pgbouncer/entrypoint.sh +++ b/pgbouncer/entrypoint.sh @@ -18,6 +18,36 @@ export POSTGRES_HOST=${POSTGRES_HOST:-postgres} export POSTGRES_PORT=${POSTGRES_PORT:-5432} export PGBOUNCER_AUTH_USER=${PGBOUNCER_AUTH_USER:-pgbouncer_auth} +wait_for_backend() { + local attempts=${PGBOUNCER_BACKEND_WAIT_ATTEMPTS:-120} + local delay=2 + local attempt=1 + while ((attempt <= attempts)); do + if command -v pg_isready >/dev/null 2>&1; then + if pg_isready -h "${POSTGRES_HOST}" -p "${POSTGRES_PORT}" >/dev/null 2>&1; then + return 0 + fi + elif command -v nc >/dev/null 2>&1; then + if nc -z "${POSTGRES_HOST}" "${POSTGRES_PORT}" >/dev/null 2>&1; then + return 0 + fi + else + if bash -c "exec 3<>/dev/tcp/${POSTGRES_HOST}/${POSTGRES_PORT}" >/dev/null 2>&1; then + exec 3>&- + return 0 + fi + fi + echo "[pgbouncer] waiting for PostgreSQL at ${POSTGRES_HOST}:${POSTGRES_PORT} (attempt ${attempt})" >&2 + sleep "${delay}" + if ((attempt % 10 == 0 && delay < 10)); then + delay=$((delay + 1)) + fi + attempt=$((attempt + 1)) + done + echo "[pgbouncer] timed out waiting for PostgreSQL at ${POSTGRES_HOST}:${POSTGRES_PORT}" >&2 + exit 1 +} + NETWORK_ACCESS_DIR=${NETWORK_ACCESS_DIR:-/opt/core_data/network_access} NETWORK_ALLOW_FILE=${NETWORK_ALLOW_FILE:-${NETWORK_ACCESS_DIR}/allow.list} @@ -53,6 +83,8 @@ fi mkdir -p "${log_dir}" "${run_dir}" "$(dirname "${config_path}")" "$(dirname "${userlist_path}")" "$(dirname "${hba_path}")" umask 077 +wait_for_backend + auth_hba_config="" if [[ -r "${NETWORK_ALLOW_FILE}" ]]; then { diff --git a/postgres/initdb/00-render-config.sh b/postgres/initdb/00-render-config.sh index 2578e67..c5ff264 100755 --- a/postgres/initdb/00-render-config.sh +++ b/postgres/initdb/00-render-config.sh @@ -4,6 +4,10 @@ set -euo pipefail +if [[ "${CORE_DATA_SKIP_CONFIG_RENDER:-0}" == "1" ]]; then + exit 0 +fi + TEMPLATE_DIR="/opt/core_data/conf" SENTINEL="${PGDATA}/.core_data_config_rendered" PGBACKREST_CONF_PATH="${PGDATA}/pgbackrest.conf" @@ -81,12 +85,16 @@ export \ mkdir -p "${PGDATA}" +first_render=1 if [[ -f "${SENTINEL}" ]]; then + first_render=0 if [[ "${FORCE_RENDER_CONFIG}" != "1" ]]; then echo "[core_data] Configuration already rendered; refreshing network allow entries." >&2 apply_network_allow_entries - if ! pg_ctl -D "${PGDATA}" reload >/dev/null 2>&1; then - echo "[core_data] WARNING: pg_ctl reload failed while refreshing network allow entries." >&2 + if pg_ctl -D "${PGDATA}" status >/dev/null 2>&1; then + if ! pg_ctl -D "${PGDATA}" reload >/dev/null 2>&1; then + echo "[core_data] WARNING: pg_ctl reload failed while refreshing network allow entries." >&2 + fi fi exit 0 fi @@ -94,6 +102,13 @@ if [[ -f "${SENTINEL}" ]]; then rm -f "${SENTINEL}" fi +if [[ "${first_render}" -eq 1 ]]; then + if pg_ctl -D "${PGDATA}" status >/dev/null 2>&1; then + echo "[core_data] Stopping PostgreSQL before initial configuration render." >&2 + pg_ctl -D "${PGDATA}" -m fast -w stop >/dev/null 2>&1 || true + fi +fi + if [[ "${POSTGRES_SSL_ENABLED}" == "on" ]]; then CERT_DIR=$(dirname "${POSTGRES_SSL_CERT_FILE}") KEY_DIR=$(dirname "${POSTGRES_SSL_KEY_FILE}") @@ -161,8 +176,14 @@ CONF echo "[core_data] Rendered PostgreSQL configs and pgBackRest configuration." >&2 -pg_ctl -D "${PGDATA}" -m fast -w restart >/dev/null 2>&1 || { - echo "[core_data] WARNING: pg_ctl restart failed during initialization." >&2 -} +if [[ "${first_render}" -eq 1 ]]; then + if ! pg_ctl -D "${PGDATA}" -w start >/dev/null 2>&1; then + echo "[core_data] WARNING: pg_ctl start failed during initial configuration." >&2 + fi +elif pg_ctl -D "${PGDATA}" status >/dev/null 2>&1; then + if ! pg_ctl -D "${PGDATA}" -m fast -w restart >/dev/null 2>&1; then + echo "[core_data] WARNING: pg_ctl restart failed during configuration refresh." >&2 + fi +fi touch "${SENTINEL}" diff --git a/postgres/initdb/02-enable-extensions.sh b/postgres/initdb/02-enable-extensions.sh index 7b13b8d..296800d 100755 --- a/postgres/initdb/02-enable-extensions.sh +++ b/postgres/initdb/02-enable-extensions.sh @@ -4,6 +4,12 @@ set -euo pipefail +if [[ "${CORE_DATA_SKIP_CONFIG_RENDER:-0}" == "1" ]]; then + exit 0 +fi + +BOOTSTRAP_SENTINEL=${CORE_DATA_BOOTSTRAP_SENTINEL:-${PGDATA}/.core_data_bootstrap_complete} + if [[ -z "${POSTGRES_PASSWORD:-}" && -n "${POSTGRES_PASSWORD_FILE:-}" && -r "${POSTGRES_PASSWORD_FILE}" ]]; then POSTGRES_PASSWORD=$(<"${POSTGRES_PASSWORD_FILE}") fi @@ -121,3 +127,5 @@ SQL # Ensure template1 ships with extensions and helper functions so new databases inherit them. configure_database "template1" + +touch "${BOOTSTRAP_SENTINEL}" diff --git a/scripts/collect_diagnostics.sh b/scripts/collect_diagnostics.sh new file mode 100755 index 0000000..f3a0619 --- /dev/null +++ b/scripts/collect_diagnostics.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 Blackcat InformaticsĀ® Inc. +# SPDX-License-Identifier: MIT + +set -euo pipefail + +output_dir="diagnostics" +logs_tail=${CORE_DATA_DIAG_LOG_TAIL:-400} + +usage() { + cat <<'USAGE' +Usage: collect_diagnostics.sh [--output DIR] + +Gather docker/container state, sanitized env vars, and service logs to help debug CI failures. +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --output) + output_dir=$2 + shift 2 + ;; + -h | --help) + usage + exit 0 + ;; + *) + echo "[diagnostics] Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +mkdir -p "${output_dir}" +timestamp=$(date -Iseconds) + +run_cmd() { + local name=$1 + shift + if command -v "$1" >/dev/null 2>&1; then + "$@" >"${output_dir}/${name}.txt" 2>&1 || true + fi +} + +run_cmd "docker-ps" docker ps -a +run_cmd "docker-compose-ls" docker compose ls +run_cmd "docker-network-ls" docker network ls + +sanitize_env() { + local source_env=$1 + local dest=$2 + if [[ ! -f "${source_env}" ]]; then + return + fi + python3 - "$source_env" "$dest" <<'PY' +import os +import re +import sys +source, dest = sys.argv[1], sys.argv[2] +pattern = re.compile(r"(PASSWORD|SECRET|TOKEN|KEY|COOKIE)", re.IGNORECASE) +with open(source, "r", encoding="utf-8") as fh, open(dest, "w", encoding="utf-8") as out: + for line in fh: + if "=" in line and not line.lstrip().startswith("#"): + key, val = line.rstrip("\n").split("=", 1) + if pattern.search(key): + line = f"{key}=\n" + out.write(line) +PY +} + +sanitize_env "${ENV_FILE:-${PWD}/.env}" "${output_dir}/env.redacted" + +collect_container_artifacts() { + local container=$1 + local safe_name=${container//\//_} + docker inspect "${container}" >"${output_dir}/${safe_name}--inspect.json" 2>/dev/null || true + docker logs --tail "${logs_tail}" "${container}" >"${output_dir}/${safe_name}--logs.txt" 2>&1 || true +} + +containers=$(docker ps -a --format '{{.Names}}' 2>/dev/null | grep -E 'core_data|postgres' || true) +for name in ${containers}; do + collect_container_artifacts "${name}" +done + +echo "[diagnostics] Wrote troubleshooting bundle to ${output_dir} (${timestamp})." diff --git a/scripts/lib/common.sh b/scripts/lib/common.sh index dbd9e4a..cf2755b 100755 --- a/scripts/lib/common.sh +++ b/scripts/lib/common.sh @@ -13,6 +13,8 @@ ENV_FILE=${ENV_FILE:-${ROOT_DIR}/.env} PGBACKREST_CONF=${PGBACKREST_CONF:-/var/lib/postgresql/data/pgbackrest.conf} POSTGRES_HOST=${POSTGRES_HOST:-localhost} POSTGRES_EXEC_USER=${POSTGRES_EXEC_USER:-postgres} +CORE_DATA_BOOTSTRAP_SENTINEL=${CORE_DATA_BOOTSTRAP_SENTINEL:-/var/lib/postgresql/data/.core_data_bootstrap_complete} +CORE_DATA_HEALTH_GUARD_SERVICES=${CORE_DATA_HEALTH_GUARD_SERVICES:-"postgres pgbouncer pghero"} if [[ -f "${ENV_FILE}" ]]; then set -a @@ -71,6 +73,116 @@ compose_has_service() { compose config --services 2>/dev/null | grep -Fxq "${service}" } +wait_for_service_healthy() { + local service=$1 + local timeout=${2:-180} + local poll_interval=${3:-2} + local stable_window=${4:-5} + if ! compose_has_service "${service}"; then + echo "[core_data] Service '${service}' not defined; skipping health wait." >&2 + return 0 + fi + local elapsed=0 + local announced=false + local healthy_started=-1 + while ((elapsed <= timeout)); do + local container_id + container_id=$(compose ps -q "${service}" 2>/dev/null | head -n 1 || true) + if [[ -z "${container_id}" ]]; then + if [[ "${announced}" == false ]]; then + echo "[core_data] Waiting for container '${service}' to start..." >&2 + announced=true + fi + else + local status + status=$(docker inspect --format '{{if .State.Health}}{{.State.Health.Status}}{{else}}missing{{end}}' "${container_id}" 2>/dev/null || echo "missing") + case "${status}" in + healthy) + if ((healthy_started < 0)); then + healthy_started=${elapsed} + elif (((elapsed - healthy_started) >= stable_window)); then + if [[ "${announced}" == true ]]; then + echo "[core_data] Service '${service}' is healthy." >&2 + fi + return 0 + fi + ;; + missing) + echo "[core_data] Service '${service}' has no healthcheck; skipping health wait." >&2 + return 0 + ;; + unhealthy) + echo "[core_data] Service '${service}' reported unhealthy status; continuing to wait (${elapsed}s elapsed)." >&2 + announced=true + healthy_started=-1 + ;; + starting) + if [[ "${announced}" == false ]]; then + echo "[core_data] Waiting for service '${service}' healthcheck..." >&2 + announced=true + fi + healthy_started=-1 + ;; + *) + echo "[core_data] Service '${service}' health status '${status}'; continuing to wait (${elapsed}s elapsed)." >&2 + announced=true + healthy_started=-1 + ;; + esac + fi + sleep "${poll_interval}" + elapsed=$((elapsed + poll_interval)) + done + echo "[core_data] Service '${service}' did not become healthy within ${timeout}s." >&2 + return 1 +} + +ensure_bootstrap_complete() { + local sentinel=${CORE_DATA_BOOTSTRAP_SENTINEL} + if compose_exec bash -lc "[[ -f '${sentinel}' ]]" >/dev/null 2>&1; then + return 0 + fi + echo "[core_data] WARNING: bootstrap sentinel '${sentinel}' missing inside container; verifying cluster state." >&2 + if compose_exec env PGPASSWORD="${POSTGRES_SUPERUSER_PASSWORD:-}" \ + psql --host localhost --username "${POSTGRES_SUPERUSER:-postgres}" \ + --dbname "${POSTGRES_DB:-postgres}" --tuples-only --command "SELECT 1;" >/dev/null 2>&1; then + if compose_exec bash -lc "touch '${sentinel}'" >/dev/null 2>&1; then + echo "[core_data] Re-created bootstrap sentinel for existing data directory." >&2 + return 0 + fi + fi + echo "[core_data] PostgreSQL initialization appears incomplete; check container logs and rerun './scripts/manage.sh up'." >&2 + return 1 +} + +stabilize_postgres() { + local required_stable=${POSTGRES_STABLE_WINDOW_SECONDS:-15} + local max_window=${POSTGRES_STABILIZATION_TIMEOUT:-120} + local db=${POSTGRES_DB:-postgres} + local superuser=${POSTGRES_SUPERUSER:-postgres} + local elapsed=0 + local consecutive=0 + while ((elapsed < max_window)); do + if ! compose_exec env PGPASSWORD="${POSTGRES_SUPERUSER_PASSWORD:-}" pg_isready -h localhost -U "${superuser}" >/dev/null 2>&1; then + echo "[core_data] PostgreSQL failed readiness check during stabilization window." >&2 + consecutive=0 + elif ! compose_exec env PGPASSWORD="${POSTGRES_SUPERUSER_PASSWORD:-}" \ + psql --host localhost --username "${superuser}" --dbname "${db}" --command "SELECT 1;" >/dev/null 2>&1; then + echo "[core_data] PostgreSQL query probe failed while waiting for stability." >&2 + consecutive=0 + else + consecutive=$((consecutive + 1)) + if ((consecutive >= required_stable)); then + return 0 + fi + fi + sleep 1 + elapsed=$((elapsed + 1)) + done + echo "[core_data] PostgreSQL did not remain stable for ${required_stable}s within ${max_window}s." >&2 + return 1 +} + # compose runs docker compose with the arguments provided. compose() { ${COMPOSE_BIN} "$@" diff --git a/scripts/logical_backup_runner.sh b/scripts/logical_backup_runner.sh index 942fe48..d9788ab 100755 --- a/scripts/logical_backup_runner.sh +++ b/scripts/logical_backup_runner.sh @@ -50,10 +50,22 @@ RUNNING=true trap 'RUNNING=false' TERM INT wait_for_postgres() { - until "${PG_ENV[@]}" pg_isready -q; do - log "waiting for postgres at ${POSTGRES_HOST}:${POSTGRES_PORT}" - sleep 5 + local attempts=${LOGICAL_BACKUP_WAIT_ATTEMPTS:-120} + local delay=2 + local attempt=1 + while ((attempt <= attempts)); do + if "${PG_ENV[@]}" pg_isready -q >/dev/null 2>&1 && "${PG_ENV[@]}" psql -Atqc "SELECT 1;" >/dev/null 2>&1; then + return 0 + fi + log "waiting for postgres at ${POSTGRES_HOST}:${POSTGRES_PORT} (attempt ${attempt})" + sleep "${delay}" + if ((attempt % 10 == 0 && delay < 10)); then + delay=$((delay + 1)) + fi + attempt=$((attempt + 1)) done + log "postgres never became ready; giving up" + return 1 } perform_backup() { @@ -87,7 +99,9 @@ perform_backup() { } main_loop() { - wait_for_postgres + if ! wait_for_postgres; then + exit 1 + fi while ${RUNNING}; do local cycle_start cycle_start=$(date +%s) @@ -107,7 +121,9 @@ main_loop() { log "sleeping ${sleep_seconds}s before next backup" sleep "${sleep_seconds}" & wait $! || true - wait_for_postgres + if ! wait_for_postgres; then + exit 1 + fi done } diff --git a/scripts/manage.sh b/scripts/manage.sh index 0df23ab..7bb3877 100755 --- a/scripts/manage.sh +++ b/scripts/manage.sh @@ -432,6 +432,33 @@ build-image) up) ensure_env compose up -d + if ! wait_for_service_healthy "${POSTGRES_SERVICE_NAME:-postgres}" "${POSTGRES_HEALTH_TIMEOUT:-180}" 2; then + echo "[core_data] PostgreSQL service failed to become healthy." >&2 + exit 1 + fi + if ! ensure_bootstrap_complete; then + echo "[core_data] PostgreSQL bootstrap did not finish successfully." >&2 + exit 1 + fi + if ! stabilize_postgres; then + echo "[core_data] PostgreSQL restarted or became unhealthy during the stabilization window." >&2 + exit 1 + fi + if [[ -n "${CORE_DATA_HEALTH_GUARD_SERVICES:-}" ]]; then + IFS=' ' read -r -a guard_services <<<"${CORE_DATA_HEALTH_GUARD_SERVICES}" + for svc in "${guard_services[@]}"; do + [[ -z "${svc}" ]] && continue + if [[ "${svc}" == "${POSTGRES_SERVICE_NAME:-postgres}" ]]; then + continue + fi + if compose_has_service "${svc}"; then + if ! wait_for_service_healthy "${svc}" "${SERVICE_HEALTH_TIMEOUT:-120}" 2; then + echo "[core_data] Service '${svc}' failed to become healthy." >&2 + exit 1 + fi + fi + done + fi warn_if_config_drift ;; down) diff --git a/scripts/pghero_entrypoint.sh b/scripts/pghero_entrypoint.sh index ce9f2f1..6a0bd90 100755 --- a/scripts/pghero_entrypoint.sh +++ b/scripts/pghero_entrypoint.sh @@ -1,8 +1,8 @@ -#!/usr/bin/env bash +#!/bin/sh # SPDX-FileCopyrightText: 2025 Blackcat InformaticsĀ® Inc. # SPDX-License-Identifier: MIT -set -euo pipefail +set -eu POSTGRES_SUPERUSER=${POSTGRES_SUPERUSER:-postgres} POSTGRES_DB=${POSTGRES_DB:-postgres} @@ -10,7 +10,7 @@ POSTGRES_HOST=${POSTGRES_HOST:-postgres} POSTGRES_PORT=${POSTGRES_PORT:-5432} PASSWORD_FILE=${POSTGRES_SUPERUSER_PASSWORD_FILE:-/run/secrets/postgres_superuser_password} -if [[ ! -r "${PASSWORD_FILE}" ]]; then +if [ ! -r "${PASSWORD_FILE}" ]; then echo "[pghero] password file ${PASSWORD_FILE} not readable" >&2 exit 1 fi @@ -19,4 +19,25 @@ PASSWORD=$(cat "${PASSWORD_FILE}") export DATABASE_URL="postgres://${POSTGRES_SUPERUSER}:${PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}?sslmode=prefer" export PGHERO_DATABASE_URL="${DATABASE_URL}" +wait_for_database() { + attempts=${PGHERO_DB_WAIT_ATTEMPTS:-90} + delay=2 + attempt=1 + while [ "${attempt}" -le "${attempts}" ]; do + if DATABASE_URL="${DATABASE_URL}" bundle exec ruby -e "require 'pg'; conn = PG.connect(ENV['DATABASE_URL']); conn.exec('SELECT 1'); conn.close" >/dev/null 2>&1; then + return 0 + fi + echo "[pghero] waiting for PostgreSQL at ${POSTGRES_HOST}:${POSTGRES_PORT} (attempt ${attempt})" >&2 + sleep "${delay}" + if [ $((attempt % 10)) -eq 0 ] && [ "${delay}" -lt 10 ]; then + delay=$((delay + 1)) + fi + attempt=$((attempt + 1)) + done + echo "[pghero] timed out waiting for PostgreSQL" >&2 + exit 1 + } + +wait_for_database + exec bundle exec puma -C /app/config/puma.rb diff --git a/tests/test_manage.py b/tests/test_manage.py index 6aa2292..bb4c760 100644 --- a/tests/test_manage.py +++ b/tests/test_manage.py @@ -386,51 +386,60 @@ def seed_secret(relative_path): f"service {service} should define a seccomp security option" ) + keep_stack = os.environ.get("CORE_DATA_TEST_KEEP_STACK") == "1" + try: yield env, project_name finally: - subprocess.run(["docker", "compose", "down", "-v"], cwd=ROOT, env=env, check=False) - subprocess.run( - ["docker", "pull", "busybox"], - check=False, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - if backups_target.exists(): + if keep_stack: + print( + f"[core_data tests] CORE_DATA_TEST_KEEP_STACK=1; " + f"leaving project '{project_name}' running for debugging. " + f"ENV_FILE={env_file}" + ) + else: + subprocess.run(["docker", "compose", "down", "-v"], cwd=ROOT, env=env, check=False) subprocess.run( - [ - "docker", - "run", - "--rm", - "-v", - f"{backups_target.resolve()}:/target", - "busybox", - "sh", - "-c", - "rm -rf /target/* /target/.[!.]* /target/..?*", - ], + ["docker", "pull", "busybox"], check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) - for path, existed, backup in managed_secrets: - if existed and backup is not None: - path.write_bytes(backup) + if backups_target.exists(): + subprocess.run( + [ + "docker", + "run", + "--rm", + "-v", + f"{backups_target.resolve()}:/target", + "busybox", + "sh", + "-c", + "rm -rf /target/* /target/.[!.]* /target/..?*", + ], + check=False, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + for path, existed, backup in managed_secrets: + if existed and backup is not None: + path.write_bytes(backup) + else: + path.unlink(missing_ok=True) + env_file.unlink(missing_ok=True) + if had_env and backup_env_bytes is not None: + repo_env_path.write_bytes(backup_env_bytes) else: - path.unlink(missing_ok=True) - env_file.unlink(missing_ok=True) - if had_env and backup_env_bytes is not None: - repo_env_path.write_bytes(backup_env_bytes) - else: - repo_env_path.unlink(missing_ok=True) + repo_env_path.unlink(missing_ok=True) - for data_path, backup_entry in managed_data_dirs: - rel = data_path.relative_to(data_root) - if data_path.exists(): - busybox_volume_command(f"rm -rf /data/{rel}") - if backup_entry is not None: - backup_rel = backup_entry.relative_to(data_root) - busybox_volume_command(f"mv /data/{backup_rel} /data/{rel}") + for data_path, backup_entry in managed_data_dirs: + rel = data_path.relative_to(data_root) + if data_path.exists(): + busybox_volume_command(f"rm -rf /data/{rel}") + if backup_entry is not None: + backup_rel = backup_entry.relative_to(data_root) + busybox_volume_command(f"mv /data/{backup_rel} /data/{rel}") def run_manage(env, *args, check=True): @@ -1361,6 +1370,25 @@ def worker(idx): compose_down(env, volumes=True) +@pytest.mark.pool +def test_database_recreation_cycles(manage_env): + env, _ = manage_env + run_manage(env, "build-image") + run_manage(env, "up") + try: + wait_for_ready(env) + run_manage(env, "create-user", "ci_user", "ci_password") + for cycle in range(3): + db_name = f"ci_regression_{cycle}" + run_manage(env, "create-db", db_name, "ci_user") + run_manage(env, "psql", "-d", db_name, "-c", "SELECT current_database();") + run_manage(env, "drop-db", db_name) + wait_for_ready(env) + finally: + run_manage(env, "down") + compose_down(env, volumes=True) + + @pytest.mark.pool def test_test_dataset_bootstrap(manage_env): env, _ = manage_env