diff --git a/README.md b/README.md index 44583ab..aefaefb 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ An example is shown here: curl -X POST http://localhost:8080/stackql \ -H "Content-Type: application/json" \ -d '{ - "query": "SELECT COUNT(*) FROM aws.ec2.instances_list_only WHERE region = '\''ap-southeast-2'\''", + "query": "SELECT COUNT(*) as count FROM aws.ec2.instances_list_only WHERE region = '\''ap-southeast-2'\''", "params": { "region": "ap-southeast-2" }, diff --git a/demo.sh b/demo.sh index 580988c..669548f 100644 --- a/demo.sh +++ b/demo.sh @@ -1,11 +1,12 @@ LOGLEVEL=DEBUG docker compose up --build docker compose exec api deno test --allow-net tests/api.test.ts +docker compose exec api deno test --allow-net tests/duckdb.test.ts curl -X POST http://localhost:8080/stackql \ -H "Content-Type: application/json" \ -d '{ - "query": "SELECT COUNT(*) FROM aws.ec2.instances_list_only WHERE region = '\''$region'\''", + "query": "SELECT COUNT(*) as count FROM aws.ec2.instances_list_only WHERE region = '\''$region'\''", "params": { "region": "ap-southeast-2" }, @@ -15,10 +16,15 @@ curl -X POST http://localhost:8080/stackql \ curl -X POST http://localhost:8080/stackql \ -H "Content-Type: application/json" \ -d '{ - "query": "SELECT COUNT(*) FROM aws.ec2.instances_list_only WHERE region = '\''ap-southeast-2'\''", + "query": "SELECT COUNT(*) as count FROM aws.ec2.instances_list_only WHERE region = '\''ap-southeast-2'\''", "showMetadata": true }' # SELECT COUNT(*) FROM aws.ec2.instances_list_only WHERE region = 'ap-southeast-2'; +export SQL_QUERY="SELECT country_region AS country, CAST(SUM(confirmed) AS INTEGER) AS total_confirmed, CAST(SUM(deaths) AS INTEGER) AS total_deaths, COUNT(*) AS record_count FROM read_csv_auto('s3://covid19-lake/archived/enigma-jhu/csv/Enigma-JHU.csv.gz') GROUP BY country_region ORDER BY total_confirmed DESC LIMIT 5" +curl -X POST http://localhost:8080/duckdb \ + -H "Content-Type: application/json" \ + -d "{\"query\": \"${SQL_QUERY}\", \"showMetadata\": true}" + docker compose down \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 9bede20..812711c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,6 +42,24 @@ services: retries: 3 command: stackql --pgsrv.port=5444 srv + duckdb: + build: + context: . + dockerfile: duckdb.Dockerfile + expose: + - 5433 + environment: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_REGION + volumes: + - duckdb-data:/app/data + healthcheck: + test: ["CMD", "/app/healthcheck.sh"] + interval: 10s + timeout: 5s + retries: 3 + api: build: context: ./src @@ -49,9 +67,16 @@ services: environment: DB_HOST: runner DB_PORT: 5444 + DUCKDB_HOST: duckdb + DUCKDB_PORT: 5433 LOGLEVEL: ${LOGLEVEL:-INFO} ports: - "8080:8080" depends_on: runner: - condition: service_healthy \ No newline at end of file + condition: service_healthy + duckdb: + condition: service_healthy + +volumes: + duckdb-data: diff --git a/duckdb.Dockerfile b/duckdb.Dockerfile new file mode 100644 index 0000000..d34874b --- /dev/null +++ b/duckdb.Dockerfile @@ -0,0 +1,25 @@ +FROM debian:bullseye-slim + +RUN apt-get update && apt-get install -y wget \ + && wget https://github.com/duckdb/duckdb/releases/download/v1.2.0/duckdb_cli-linux-amd64.zip \ + && apt-get install -y unzip \ + && unzip duckdb_cli-linux-amd64.zip \ + && rm duckdb_cli-linux-amd64.zip \ + && apt-get remove -y wget unzip \ + && apt-get autoremove -y \ + && apt-get clean + +WORKDIR /app + +COPY scripts/startup.sh /app/ +COPY scripts/healthcheck.sh /app/ + +RUN chmod +x /app/startup.sh /app/healthcheck.sh && \ + mv /duckdb /app/duckdb + +EXPOSE 5433 + +HEALTHCHECK --interval=10s --timeout=5s --start-period=5s --retries=3 \ + CMD ["/app/healthcheck.sh"] + +CMD ["/app/startup.sh"] \ No newline at end of file diff --git a/scripts/healthcheck.sh b/scripts/healthcheck.sh new file mode 100644 index 0000000..4427e73 --- /dev/null +++ b/scripts/healthcheck.sh @@ -0,0 +1,2 @@ +#!/bin/sh +test -f /app/healthy && ./duckdb mydatabase.db ".databases" > /dev/null 2>&1 \ No newline at end of file diff --git a/scripts/startup.sh b/scripts/startup.sh new file mode 100644 index 0000000..d84d543 --- /dev/null +++ b/scripts/startup.sh @@ -0,0 +1,4 @@ +#!/bin/sh +./duckdb mydatabase.db ".databases" > /dev/null 2>&1 || ./duckdb mydatabase.db +touch /app/healthy +tail -f /dev/null \ No newline at end of file diff --git a/src/Dockerfile b/src/Dockerfile index b06ae31..20bff00 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -1,7 +1,23 @@ # src/Dockerfile FROM denoland/deno:latest + +# Install DuckDB dependencies +RUN apt-get update && apt-get install -y \ + wget \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +# Install DuckDB shared library +RUN wget https://github.com/duckdb/duckdb/releases/download/v1.2.0/libduckdb-linux-amd64.zip \ + && unzip libduckdb-linux-amd64.zip \ + && mv libduckdb.so /usr/lib/ \ + && rm libduckdb-linux-amd64.zip + WORKDIR /app + COPY . . -RUN deno cache --reload --unstable app.ts -EXPOSE 8080 -CMD ["run", "--allow-net", "--allow-env", "--watch", "app.ts"] \ No newline at end of file + +# Cache the dependencies +RUN deno cache deps.ts + +CMD ["deno", "run", "--allow-net", "--allow-env", "--allow-read", "--allow-write", "--allow-ffi", "--unstable", "--watch", "app.ts"] \ No newline at end of file diff --git a/src/app.ts b/src/app.ts index 57da775..3bd922a 100644 --- a/src/app.ts +++ b/src/app.ts @@ -9,6 +9,7 @@ import { errorHandler } from "./middleware/error.middleware.ts"; import { healthRouter } from "./routes/health.routes.ts"; import { queryRouter } from "./routes/query.routes.ts"; import { metaRouter } from "./routes/meta.routes.ts"; +import { duckdbRouter } from "./routes/duckdb.routes.ts"; const app = new Application(); @@ -28,10 +29,12 @@ app.use(errorHandler); app.use(healthRouter.routes()); app.use(queryRouter.routes()); app.use(metaRouter.routes()); +app.use(duckdbRouter.routes()); app.use(healthRouter.allowedMethods()); app.use(queryRouter.allowedMethods()); app.use(metaRouter.allowedMethods()); +app.use(duckdbRouter.allowedMethods()); app.addEventListener("error", (evt) => { logger.error(`Uncaught error: ${evt.error}`); diff --git a/src/config/environment.ts b/src/config/environment.ts index 7f4f0f2..1fd5eca 100644 --- a/src/config/environment.ts +++ b/src/config/environment.ts @@ -10,6 +10,10 @@ export const config = { name: 'stackql', user: 'stackql', }, + duckdb: { + host: Deno.env.get('DUCKDB_HOST') || 'localhost', + port: Number(Deno.env.get('DUCKDB_PORT')) || 5433, + }, logging: { level: Deno.env.get('LOGLEVEL') || 'INFO' } diff --git a/src/controllers/duckdb.controller.ts b/src/controllers/duckdb.controller.ts new file mode 100644 index 0000000..4487d8b --- /dev/null +++ b/src/controllers/duckdb.controller.ts @@ -0,0 +1,61 @@ +// controllers/duckdb.controller.ts +import { Context } from "../types/context.types.ts"; +import { QueryRequest, QueryResponse } from "../types/api.types.ts"; +import { duckdb } from "../services/duckdb.service.ts"; +import { logger } from "../services/logger.service.ts"; + +export async function executeQuery(ctx: Context) { + if (!ctx.request.hasBody) { + ctx.response.status = 400; + ctx.response.body = { error: "Request body required" }; + return; + } + + try { + const body = await ctx.request.body.json(); + const reqData: QueryRequest = typeof body === "string" ? JSON.parse(body) : body; + logger.debug(`DuckDB request: ${JSON.stringify(reqData)}`); + + const startTime = new Date().toISOString(); + const t0 = performance.now(); + + const rows = duckdb.query(reqData.query); + logger.debug('DuckDB raw result:', JSON.stringify(rows)); + + // Try wrapping result in a more basic structure + const data = Array.isArray(rows) ? rows : [rows]; + logger.debug('Processed result:', JSON.stringify(data)); + + const response: QueryResponse = { + data, + ...((reqData.showMetadata !== false) && { + metadata: { + operation: { + startTime, + endTime: new Date().toISOString(), + duration: `${performance.now() - t0} ms`, + status: 'OK' + }, + result: { + rowCount: data.length + }, + request: { + query: reqData.query + } + } + }) + }; + + logger.debug('Final response:', JSON.stringify(response)); + + ctx.response.type = 'application/json'; + ctx.response.status = 200; + ctx.response.body = response; + + } catch (error) { + logger.error(`DuckDB query execution failed: ${error.message}`); + logger.error(`Stack trace: ${error.stack}`); + ctx.response.status = 400; + ctx.response.body = { error: error.message }; + } +} \ No newline at end of file diff --git a/src/routes/duckdb.routes.ts b/src/routes/duckdb.routes.ts new file mode 100644 index 0000000..c5393fd --- /dev/null +++ b/src/routes/duckdb.routes.ts @@ -0,0 +1,6 @@ +// routes/duckdb.routes.ts +import { Router } from "../deps.ts"; +import * as duckdbController from "../controllers/duckdb.controller.ts"; + +export const duckdbRouter = new Router() + .post("/duckdb", duckdbController.executeQuery); \ No newline at end of file diff --git a/src/services/duckdb.service.ts b/src/services/duckdb.service.ts new file mode 100644 index 0000000..c752b21 --- /dev/null +++ b/src/services/duckdb.service.ts @@ -0,0 +1,68 @@ +// services/duckdb.service.ts +import { open } from "https://deno.land/x/duckdb/mod.ts"; +import { config } from "../config/environment.ts"; +import { logger } from "./logger.service.ts"; + +class DuckDBService { + private db; + private connection; + + constructor() { + this.db = open(":memory:"); + this.connection = this.db.connect(); + this.init(); + } + + private async init() { + try { + // Install and load httpfs extension for S3 access + this.connection.query("INSTALL httpfs;"); + this.connection.query("LOAD httpfs;"); + + // Configure S3 credentials if available + const awsAccessKey = Deno.env.get("AWS_ACCESS_KEY_ID"); + const awsSecretKey = Deno.env.get("AWS_SECRET_ACCESS_KEY"); + const awsRegion = Deno.env.get("AWS_REGION"); + + if (awsAccessKey && awsSecretKey && awsRegion) { + this.connection.query( + `SET s3_access_key_id='${awsAccessKey}'; + SET s3_secret_access_key='${awsSecretKey}'; + SET s3_region='${awsRegion}';` + ); + } + logger.info("DuckDB initialized successfully"); + } catch (error) { + logger.error(`DuckDB initialization failed: ${error.message}`); + throw error; + } + } + + query(sql: string): T[] { + try { + logger.debug(`Executing query: ${sql}`); + const results: T[] = []; + + for (const row of this.connection.stream(sql)) { + logger.debug('Row:', row); + results.push(row as T); + } + + return results; + } catch (error) { + logger.error(`DuckDB query failed: ${error.message}`); + throw error; + } + } + + close() { + try { + this.connection.close(); + this.db.close(); + } catch (error) { + logger.error(`DuckDB closing failed: ${error.message}`); + } + } +} + +export const duckdb = new DuckDBService(); \ No newline at end of file diff --git a/src/tests/duckdb.test.ts b/src/tests/duckdb.test.ts new file mode 100644 index 0000000..8be4dd6 --- /dev/null +++ b/src/tests/duckdb.test.ts @@ -0,0 +1,41 @@ +// tests/duckdb.test.ts +import { assertEquals, assertExists } from "https://deno.land/std@0.208.0/assert/mod.ts"; + +const SQL_QUERY = ` + SELECT country_region AS country, + CAST(SUM(confirmed) AS INTEGER) AS total_confirmed, + CAST(SUM(deaths) AS INTEGER) AS total_deaths, + COUNT(*) AS record_count + FROM read_csv_auto('s3://covid19-lake/archived/enigma-jhu/csv/Enigma-JHU.csv.gz') + GROUP BY country_region + ORDER BY total_confirmed DESC + LIMIT 5 +`; + +Deno.test("DuckDB Integration Tests", async (t) => { + await t.step("Query execution - COVID-19 Dataset", async () => { + const response = await fetch("http://localhost:8080/duckdb", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + query: SQL_QUERY, + showMetadata: true + }) + }); + + assertEquals(response.status, 200); + + const data = await response.json(); + assertExists(data.data); + assertExists(data.metadata); + assertEquals(Array.isArray(data.data), true); + + if (data.data.length > 0) { + const firstRow = data.data[0]; + assertExists(firstRow.country); + assertExists(firstRow.total_confirmed); + assertExists(firstRow.total_deaths); + assertExists(firstRow.record_count); + } + }); +});