diff --git a/.errcheck_excludes.txt b/.errcheck_excludes.txt
deleted file mode 100644
index 306a6f62c..000000000
--- a/.errcheck_excludes.txt
+++ /dev/null
@@ -1 +0,0 @@
-(github.com/go-kit/log.Logger).Log
diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml
new file mode 100644
index 000000000..f8cbf61cf
--- /dev/null
+++ b/.github/workflows/buf.yml
@@ -0,0 +1,19 @@
+name: Buf
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+    paths:
+      - "proto/**/*.proto"
+
+jobs:
+  format:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: bufbuild/buf-setup-action@v1
+
+      - name: Format
+        run: buf format --exit-code
diff --git a/.github/workflows/dst.yml b/.github/workflows/dst.yml
new file mode 100644
index 000000000..afc853c6c
--- /dev/null
+++ b/.github/workflows/dst.yml
@@ -0,0 +1,49 @@
+name: DST
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  dst:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Go
+      run: |
+        curl -L -o go-custom-linux-amd64.tar.gz https://github.com/polarsignals/go/releases/download/v0.0.2/go-custom-linux-amd64.tar.gz
+        tar -xzf go-custom-linux-amd64.tar.gz
+        sudo mv go /usr/local/go
+        echo "/usr/local/go/bin" >> $GITHUB_PATH
+        echo "GOROOT=/usr/local/go" >> $GITHUB_ENV
+
+    - name: Setup Go module cache
+      uses: actions/cache@v4
+      with:
+        path: |
+          ~/.cache/go-build
+          ~/go/pkg/mod
+        key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }}
+        restore-keys: |
+          ${{ runner.os }}-golang-
+
+    - name: Compile DST Test and runtime
+      run: |
+        # ldflags=-checklinkname=0 is required for parquet-go to build with
+        # Go 1.23.
+        GOOS=wasip1 GOARCH=wasm go test -tags=faketime -ldflags=-checklinkname=0 -c -o dst.wasm ./dst
+        go build ./dst/runtime
+
+    - name: Run DST Test
+      run: |
+        echo "Running DST Test. The first run is slow due to module compilation. A compilation cache is used for subsequent runs."
+        iterations=10
+        for ((i=0; i < iterations; i++))
+        do
+          export GORANDSEED=$RANDOM$RANDOM$RANDOM$RANDOM$RANDOM$RANDOM
+          echo "Running DST test iteration $((i+1))/${iterations} with GORANDSEED=${GORANDSEED}"
+          ./runtime --module=./dst.wasm
+        done
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index d70599340..7d481bf9b 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -11,12 +11,12 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
 
     - name: Set up Go
-      uses: actions/setup-go@v2
+      uses: actions/setup-go@v5
       with:
-        go-version: 1.19
+        go-version: '1.24'
 
     - name: Test
-      run: go test -race -tags debug -v ./...
+      run: go test -short -race -tags assert,debug -v ./...
diff --git a/.github/workflows/golangci-lint-skip.yml b/.github/workflows/golangci-lint-skip.yml
new file mode 100644
index 000000000..17f685777
--- /dev/null
+++ b/.github/workflows/golangci-lint-skip.yml
@@ -0,0 +1,28 @@
+name: golangci-lint
+# This job is run when the golangci-lint job is not triggered (e.g. no go files
+# were modified). The reason this no-op job is required is to report success,
+# otherwise the lint job will be perpetually stuck in a "pending" state,
+# resulting in an inability to merge given that the lint check is required to
+# pass.
+on:
+  push:
+    tags:
+      - v*
+    branches: [ main ]
+    paths-ignore:
+      - '**.go'
+      - .golangci.yml
+      - .github/workflows/golangci-lint.yml
+  pull_request:
+    branches: [ main ]
+    paths-ignore:
+      - '**.go'
+      - .golangci.yml
+      - .github/workflows/golangci-lint.yml
+
+jobs:
+  lint:
+    name: lint
+    runs-on: ubuntu-latest
+    steps:
+      - run: 'echo "no golangci-lint required"'
diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml
index 9981226d6..0ab9a8939 100644
--- a/.github/workflows/golangci-lint.yml
+++ b/.github/workflows/golangci-lint.yml
@@ -21,13 +21,14 @@ jobs:
     runs-on: ubuntu-latest
     continue-on-error: true
     steps:
-      - uses: actions/setup-go@v2
+      - uses: actions/setup-go@v5
         with:
-          go-version: ^1.19
+          go-version: ^1.24
 
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: golangci-lint
-        uses: golangci/golangci-lint-action@v3.1.0
+        uses: golangci/golangci-lint-action@v7.0.0
         with:
-          version: v1.45.2
+          args: --timeout=10m
+          version: v2.0.2
diff --git a/.github/workflows/proto-gen.yaml b/.github/workflows/proto-gen.yaml
new file mode 100644
index 000000000..8c20745f6
--- /dev/null
+++ b/.github/workflows/proto-gen.yaml
@@ -0,0 +1,52 @@
+name: proto-gen
+
+on:
+  pull_request:
+  merge_group:
+    branches:
+    - main
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  skip-check:
+    name: Skip check
+    continue-on-error: true
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    outputs:
+      should_skip: ${{ steps.skip-check.outputs.should_skip }}
+    permissions:
+      actions: write
+      contents: read
+    steps:
+      - id: skip-check
+        uses: fkirc/skip-duplicate-actions@f75f66ce1886f00957d99748a42c724f4330bdcf # v5.3.1
+        with:
+          do_not_skip: '["schedule", "workflow_dispatch"]'
+          paths: |-
+            [
+              ".github/workflows/proto-gen.yaml",
+              ".go-version",
+              "buf.gen.yaml",
+              "buf.work.yaml",
+              "proto/**"
+            ]
+          skip_after_successful_duplicate: false
+
+  build:
+    name: Proto Generate
+    needs: skip-check
+    if: ${{ needs.skip-check.outputs.should_skip != 'true' }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - uses: bufbuild/buf-setup-action@f0475db2e1b1b2e8d121066b59dfb7f7bd6c4dc4 # v1.32.1
+
+      - name: Generate
+        run:
+          buf generate && git diff --exit-code
diff --git a/.github/workflows/proto-pr.yaml b/.github/workflows/proto-pr.yaml
new file mode 100644
index 000000000..d502fea62
--- /dev/null
+++ b/.github/workflows/proto-pr.yaml
@@ -0,0 +1,64 @@
+name: proto-pr
+
+on:
+  pull_request:
+  merge_group:
+    branches:
+    - main
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  skip-check:
+    name: Skip check
+    continue-on-error: true
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    outputs:
+      should_skip: ${{ steps.skip-check.outputs.should_skip }}
+    permissions:
+      actions: write
+      contents: read
+    steps:
+      - id: skip-check
+        uses: fkirc/skip-duplicate-actions@f75f66ce1886f00957d99748a42c724f4330bdcf # v5.3.1
+        with:
+          do_not_skip: '["schedule", "workflow_dispatch"]'
+          paths: |-
+            [
+              ".github/workflows/proto-pr.yaml",
+              ".go-version",
+              "buf.gen.yaml",
+              "buf.work.yaml",
+              "proto/**"
+            ]
+          skip_after_successful_duplicate: false
+
+  build:
+    name: Proto PR Checks
+    needs: skip-check
+    if: ${{ needs.skip-check.outputs.should_skip != 'true' }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - uses: bufbuild/buf-setup-action@f0475db2e1b1b2e8d121066b59dfb7f7bd6c4dc4 # v1.32.1
+
+      - name: version
+        run: buf --version
+
+      - name: Format
+        run: buf format --diff --exit-code
+
+      - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1
+        with:
+          input: 'proto'
+
+      - uses: bufbuild/buf-breaking-action@c57b3d842a5c3f3b454756ef65305a50a587c5ba # v1.1.4
+        with:
+          input: 'proto'
+          # The 'main' branch of the GitHub repository that defines the module.
+          against: 'https://github.com/${GITHUB_REPOSITORY}.git#branch=main,subdir=proto'
diff --git a/.github/workflows/proto-push.yaml b/.github/workflows/proto-push.yaml
new file mode 100644
index 000000000..9e0010a8e
--- /dev/null
+++ b/.github/workflows/proto-push.yaml
@@ -0,0 +1,72 @@
+name: proto-push
+
+on:
+  push:
+    branches:
+    - main
+    - release-*
+  merge_group:
+    branches:
+    - main
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  skip-check:
+    name: Skip check
+    continue-on-error: true
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    outputs:
+      should_skip: ${{ steps.skip-check.outputs.should_skip }}
+    permissions:
+      actions: write
+      contents: read
+    steps:
+      - id: skip-check
+        uses: fkirc/skip-duplicate-actions@f75f66ce1886f00957d99748a42c724f4330bdcf # v5.3.1
+        with:
+          do_not_skip: '["schedule", "workflow_dispatch"]'
+          paths: |-
+            [
+              ".github/workflows/proto-push.yaml",
+              ".go-version",
+              "buf.gen.yaml",
+              "buf.work.yaml",
+              "proto/**"
+            ]
+          skip_after_successful_duplicate: false
+
+  build:
+    name: Proto Push
+    needs: skip-check
+    if: ${{ needs.skip-check.outputs.should_skip != 'true' }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - uses: bufbuild/buf-setup-action@f0475db2e1b1b2e8d121066b59dfb7f7bd6c4dc4 # v1.32.1
+
+      - name: version
+        run: buf --version
+
+      - name: Format
+        run: buf format --diff --exit-code
+
+      - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1
+        with:
+          input: 'proto'
+
+      - uses: bufbuild/buf-breaking-action@c57b3d842a5c3f3b454756ef65305a50a587c5ba # v1.1.4
+        with:
+          input: 'proto'
+          # The 'main' branch of the GitHub repository that defines the module.
+          against: 'https://github.com/${GITHUB_REPOSITORY}.git#branch=main,ref=HEAD~1,subdir=proto'
+
+      - uses: bufbuild/buf-push-action@a654ff18effe4641ebea4a4ce242c49800728459 # v1.2.0
+        with:
+          input: 'proto'
+          buf_token: ${{ secrets.BUF_TOKEN }}
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 000000000..bc42109c9
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,15 @@
+name: 'Close stale issues and PRs'
+on:
+  schedule:
+    - cron: '30 1 * * *'
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v9
+        with:
+          stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
+          days-before-stale: 30
+          days-before-close: 5
+          exempt-issue-labels: bug,planned
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..0e50b92eb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.idea
+testdata/oldwal/databases
\ No newline at end of file
diff --git a/.golangci.yml b/.golangci.yml
index 98ee4c013..1891d5b62 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -1,44 +1,62 @@
-run:
-  deadline: 5m
-  skip-files:
-    # Skip autogenerated files.
-    - ^.*\.(pb|y)\.go$
-
-output:
-  sort-results: true
-
+version: "2"
 linters:
   enable:
     - depguard
     - godot
-    - gofumpt
-    - goimports
     - revive
     - whitespace
-
-issues:
-  exclude-rules:
-    - path: bench_test.go
-      text: 'is unused'
-
-linters-settings:
-  depguard:
-    list-type: blacklist
-    include-go-root: true
-    packages-with-error-message:
-      - go.uber.org/atomic: "Use sync/atomic instead of go.uber.org/atomic"
-      - debug/elf: "Use github.com/parca-dev/parca/internal/go/debug/elf instead of debug/elf"
-      - github.com/stretchr/testify/assert: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert"
-      - github.com/go-kit/kit/log: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log"
-      - github.com/pkg/errors: "Use fmt.Errorf instead"
-  errcheck:
-    exclude: ./.errcheck_excludes.txt
-  goimports:
-    local-prefixes: github.com/polarsignals/frostdb
-  gofumpt:
-    extra-rules: true
-  misspell:
-    locale: US
-  staticcheck:
-    checks:
-      - all
+  settings:
+    depguard:
+      rules:
+        main:
+          deny:
+            - pkg: go.uber.org/atomic
+              desc: Use sync/atomic instead of go.uber.org/atomic
+            - pkg: debug/elg
+              desc: Use github.com/parca-dev/parca/internal/go/debug/elf instead of debug/elf
+            - pkg: github.com/stretchr/testify/assert
+              desc: Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert
+            - pkg: github.com/go-kit/kit/log
+              desc: Use github.com/go-kit/log instead of github.com/go-kit/kit/log
+            - pkg: github.com/pkg/errors
+              desc: Use fmt.Errorf instead
+            - pkg: github.com/segmentio/parquet-go
+              desc: Use github.com/parquet-go/parquet-go instead
+    errcheck:
+      exclude-functions:
+        - (github.com/go-kit/log.Logger).Log
+    misspell:
+      locale: US
+    staticcheck:
+      checks:
+        - all
+  exclusions:
+    generated: lax
+    presets:
+      - comments
+      - common-false-positives
+      - legacy
+      - std-error-handling
+    rules:
+      - path: bench_test.go
+        text: is unused
+    paths:
+      - third_party$
+      - builtin$
+      - examples$
+formatters:
+  enable:
+    - gofumpt
+    - goimports
+  settings:
+    gofumpt:
+      extra-rules: true
+    goimports:
+      local-prefixes:
+        - github.com/polarsignals/frostdb
+  exclusions:
+    generated: lax
+    paths:
+      - third_party$
+      - builtin$
+      - examples$
diff --git a/Makefile b/Makefile
index d2bd43eec..bd17e1569 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,9 @@
 test:
-	go test -tags debug -race ./...
+	go test -tags assert,debug -race ./...
 
 .PHONY: gen/proto
 gen/proto:
 	buf generate
+
+lint:
+	golangci-lint --timeout=5m run --fix
\ No newline at end of file
diff --git a/README.md b/README.md
index 1a75746ad..eb5c112b6 100644
--- a/README.md
+++ b/README.md
@@ -12,24 +12,24 @@
 
 > This project is still in its infancy, consider it not production-ready, probably has various consistency and correctness problems and all API will change!
 
-FrostDB is an embeddable columnar database written in Go. It features semi-structured schemas (could also be described as typed wide-columns), and uses [Apache Parquet](https://parquet.apache.org/) for storage, and [Apache Arrow](https://arrow.apache.org/) at query time. Building on top of Apache Arrow, FrostDB provides a query builder and various optimizers (it reminds of DataFrame-like APIs).
+FrostDB is an embeddable wide-column columnar database written in Go. It features semi-structured schemas, uses [Apache Parquet](https://parquet.apache.org/) for storage, and [Apache Arrow](https://arrow.apache.org/) at query time. Building on top of Apache Arrow, FrostDB provides a query builder and various optimizers (using DataFrame-like APIs).
 
-FrostDB is optimized for use cases where the majority of interactions are writes, and when data is queried, a lot of data is queried at once (our use case at Polar Signals can be broadly described as Observability and specifically for [Parca](https://parca.dev/)). It could also be described as a wide-column columnar database.
+FrostDB is optimized for use cases where the majority of interactions are writes, with occasional analytical queries over this data. FrostDB was built specifically for [Parca](https://parca.dev/) for Observability use cases.
 
-Read the annoucement blog post to learn about what made us create it: https://www.polarsignals.com/blog/posts/2022/05/04/introducing-arcticdb/ (FrostDB was originally called ArcticDB)
+Read the announcement blog post to learn about what made us create it: https://www.polarsignals.com/blog/posts/2022/05/04/introducing-arcticdb/ (FrostDB was originally called ArcticDB)
 
 ## Why you should use FrostDB
 
-Columnar data stores have become incredibly popular for analytics data. Structuring data in columns instead of rows leverages the architecture of modern hardware, allowing for efficient processing of data.
+Columnar data stores have become incredibly popular for analytics. Structuring data in columns instead of rows leverages the architecture of modern hardware, allowing for efficient processing of data.
 A columnar data store might be right for you if you have workloads where you write a lot of data and need to perform analytics on that data.
 
-FrostDB is similar to many other in-memory columnar databases such as [DuckDB](https://duckdb.org/) or [InfluxDB IOx](https://github.com/influxdata/influxdb_iox). 
+FrostDB is similar to many other embeddable columnar databases such as [DuckDB](https://duckdb.org/)
 
 FrostDB may be a better fit for you if:
 - Are developing a Go program
-- Want to embed a columnar database in your program instead of running a separate server
+- Want to embed a columnar database in your program instead of running a separate database server
 - Have immutable datasets that don't require updating or deleting
-- Your data contains dynamic columns, where a column may expand during runtime
+- Your data contains dynamic columns, where the number of columns in the schema may increase at runtime
 
 FrostDB is likely not suitable for your needs if:
 - You aren't developing in Go
@@ -41,69 +41,12 @@ FrostDB is likely not suitable for your needs if:
 
 You can explore the [examples](https://github.com/polarsignals/frostdb/tree/main/examples) directory for sample code using FrostDB. Below is a snippet from the simple database example. It creates a database with a dynamic column schema, inserts some data, and queries it back out.
 
-```go
-// Create a new column store
-columnstore, _ := frostdb.New()
-
-// Open up a database in the column store
-database, _ := columnstore.DB(context.Background(), "simple_db")
-
-// Define our simple schema of labels and values
-schema, _ := simpleSchema()
-
-// Create a table named simple in our database
-table, _ := database.Table(
-    "simple_table",
-    frostdb.NewTableConfig(schema),
-)
-
-// Create values to insert into the database these first rows have dynamic label names of 'firstname' and 'surname'
-buf, _ := schema.NewBuffer(map[string][]string{
-    "names": {"firstname", "surname"},
-})
-
-// firstname:Frederic surname:Brancz 100
-buf.WriteRows([]parquet.Row{{
-    parquet.ValueOf("Frederic").Level(0, 1, 0),
-    parquet.ValueOf("Brancz").Level(0, 1, 1),
-    parquet.ValueOf(100).Level(0, 0, 2),
-}})
-
-// firstname:Thor surname:Hansen 10
-buf.WriteRows([]parquet.Row{{
-    parquet.ValueOf("Thor").Level(0, 1, 0),
-    parquet.ValueOf("Hansen").Level(0, 1, 1),
-    parquet.ValueOf(10).Level(0, 0, 2),
-}})
-table.InsertBuffer(context.Background(), buf)
-
-// Now we can insert rows that have middle names into our dynamic column
-buf, _ = schema.NewBuffer(map[string][]string{
-    "names": {"firstname", "middlename", "surname"},
-})
-// firstname:Matthias middlename:Oliver surname:Loibl 1
-buf.WriteRows([]parquet.Row{{
-    parquet.ValueOf("Matthias").Level(0, 1, 0),
-    parquet.ValueOf("Oliver").Level(0, 1, 1),
-    parquet.ValueOf("Loibl").Level(0, 1, 2),
-    parquet.ValueOf(1).Level(0, 0, 3),
-}})
-table.InsertBuffer(context.Background(), buf)
-
-// Create a new query engine to retrieve data and print the results
-engine := query.NewEngine(memory.DefaultAllocator, database.TableProvider())
-engine.ScanTable("simple_table").
-    Filter(
-        logicalplan.Col("names.firstname").Eq(logicalplan.Literal("Frederic")),
-    ).Execute(context.Background(), func(ctx context.Context, r arrow.Record) error {
-    fmt.Println(r)
-    return nil
-})
-```
+https://github.com/polarsignals/frostdb/blob/ee6970eff139c58a45998a87c02b661f32be5cbe/examples/simple/simple.go#L17-L69
+
 
 ## Design choices
 
-FrostDB was specifically built for Observability workloads. This resulted in several characteristics that make it unique in its combination.
+FrostDB was specifically built for Observability workloads. This resulted in several characteristics that make it unique.
 
 Table Of Contents:
 
@@ -114,11 +57,11 @@ Table Of Contents:
 
 ### Columnar layout
 
-Observability data is most useful when highly dimensional and those dimensions can be searched and aggregated by efficiently. Contrary to many relational databases like (MySQL, PostgreSQL, CockroachDB, TiDB, etc.) that store data all data belonging to a single row together, in a columnar layout all data of the same column in a table is available in one contiguous chunk of data, making it very efficient to scan and more importantly, only the data truly necessary for a query is loaded in the first place. FrostDB uses [Apache Parquet](https://parquet.apache.org/) for storage, and [Apache Arrow](https://arrow.apache.org/) at query time. Apache Parquet is used for storage to make use of its efficient encodings to save on memory and disk space. Apache Arrow is used at query time as a foundation to vectorize the query execution.
+Observability data is most useful when it is highly dimensional and those dimensions can be searched and aggregated by efficiently. Contrary to many relational databases (MySQL, PostgreSQL, CockroachDB, TiDB, etc.) that store data all data belonging to a single row together, a columnar layout stores all data of the same column in one contiguous chunk of data, making it very efficient to scan and aggregate data for any column. FrostDB uses [Apache Parquet](https://parquet.apache.org/) for storage, and [Apache Arrow](https://arrow.apache.org/) at query time. Apache Parquet is used for storage to make use of its efficient encodings to save on memory and disk space. Apache Arrow is used at query time as a foundation to vectorize the query execution.
 
 ### Dynamic Columns
 
-While columnar databases already exist, most require a static schema, however, Observability workloads differ in that their schemas are not static, meaning not all columns are pre-defined. On the other hand, wide column databases also already exist, but typically are not strictly typed, and most wide-column databases are row-based databases, not columnar databases.
+While columnar databases already exist, most require a static schema. However, Observability workloads differ in that data their schemas are not static, meaning not all columns are pre-defined. Wide column databases already exist, but typically are not strictly typed (e.g. document databases), and most wide-column databases are row-based databases, not columnar databases.
 
 Take a [Prometheus](https://prometheus.io/) time-series for example. Prometheus time-series are uniquely identified by the combination of their label-sets:
 
@@ -126,51 +69,15 @@ Take a [Prometheus](https://prometheus.io/) time-series for example. Prometheus
 http_requests_total{path="/api/v1/users", code="200"} 12
 ```
 
-This model does not map well into a static schema, as label-names cannot be known upfront. The most suitable data-type some columnar databases have to offer is a map, however, maps have the same problems as row-based databases, where all values of a map in a row are stored together, unable to exploit the advantages of a columnar layout. An FrostDB schema can define a column to be dynamic, causing a column to be created on the fly when a new label-name is seen.
+This model does not map well into a static schema, as label-names cannot be known upfront. The most suitable data-type some columnar databases have to offer is a map, however, maps have the same problems as row-based databases, where all values of a map in a row are stored together, resulting in an inability to exploit the advantages of a columnar layout. A FrostDB schema can define a column to be dynamic, causing a column to be created on the fly when a new label-name is seen.
 
-An FrostDB schema for Prometheus could look like this:
+A FrostDB schema for Prometheus could look like this:
 
 ```go
-package arcticprometheus
-
-import (
-	"github.com/polarsignals/frostdb/dynparquet"
-	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
-)
-
-func Schema() (*dynparquet.Schema, error) {
-	return dynparquet.SchemaFromDefinition(&schemapb.Schema{
-		Name: "prometheus",
-		Columns: []*schemapb.Column{{
-			Name: "labels",
-			StorageLayout: &schemapb.StorageLayout{
-				Type:     schemapb.StorageLayout_TYPE_STRING,
-				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-				Nullable: true,
-			},
-			Dynamic: true,
-		}, {
-			Name: "timestamp",
-			StorageLayout: &schemapb.StorageLayout{
-				Type: schemapb.StorageLayout_TYPE_INT64,
-			},
-			Dynamic: false,
-		}, {
-			Name: "value",
-			StorageLayout: &schemapb.StorageLayout{
-				Type: schemapb.StorageLayout_TYPE_DOUBLE,
-			},
-			Dynamic: false,
-		}},
-		SortingColumns: []*schemapb.SortingColumn{{
-			Name:       "labels",
-			NullsFirst: true,
-			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
-		}, {
-			Name:      "timestamp",
-			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
-		}},
-	})
+type Prometheus struct {
+	Labels    map[string]string `frostdb:",rle_dict,asc(1),null_first"`
+	Timestamp int64             `frostdb:",asc(0)"`
+	Value     float64
 }
 ```
 
@@ -178,21 +85,21 @@ func Schema() (*dynparquet.Schema, error) {
 
 With this schema, all rows are expected to have a `timestamp` and a `value` but can vary in their columns prefixed with `labels.`. In this schema all dynamically created columns are still Dictionary and run-length encoded and must be of type `string`.
 
-### Immutable & Sorted
-
-There are only writes and reads. All data is immutable and sorted. Having all data sorted allows FrostDB to avoid maintaining an index per column, and still serve queries with low latency.
+### Immutable
 
-To maintain global sorting FrostDB requires all inserts to be sorted if they contain multiple rows. Combined with immutability, global sorting of all data can be maintained at a reasonable cost. To optimize throughput, it is preferable to perform inserts in as large batches as possible. FrostDB maintains inserted data in batches of a configurable amount of rows (by default 8192), called a _Granule_. To directly jump to data needed for a query, FrostDB maintains a sparse index of Granules. The sparse index is small enough to fully reside in memory, it is currently implemented as a [b-tree](https://github.com/google/btree) of Granules.
+There are only writes and reads. All data is immutable. 
 
-![Sparse index of Granules](https://docs.google.com/drawings/d/1DbGqLKsloKAEG7ydJ5n5-Vr03j4jQMqdipJyEu0goIE/export/svg)
+FrostDB maintains inserted data in an Log-structured merge-tree(LSM) like index. This index is implemented as lists of Parts. A Part containers either an Arrow record or a 
+Parquet file. The first level (L0) contains a list of Arrrow records inserted as-is into the list. Upon reaching the maximum configured size of the level the level will be compacted
+ into a single Parquet file and added to the next level of the index. This process continues for each configured level of the index until a file is written into the final level of the index.
 
-At insert time, FrostDB splits the inserted rows into the appropriate Granule according to their lower and upper bound, to maintain global sorting. Once a Granule exceeds the configured amount, the Granule is split into `N` new Granules depending.
+![LSM Index compacting into higher levels](https://docs.google.com/drawings/d/e/2PACX-1vRckTzb-D57UaxDUSCQjyD4mZN_3_Cu032oA-2kLCu_owrYeaT4nrYCKBP1QYS8EMqv3bI3Kiudt_jV/pub?w=960&h=720)
 
-![Split of Granule](https://docs.google.com/drawings/d/1c38HQfpTPVtzatGenQaqF7oA_7NiEDbfeudxiUV5lSg/export/svg)
+Upon the size of the entire index reaching the configured max in-memory size the index is rotated out. It can be either configured to be dropped entirely or to be written out to 
+your storage of choice.
 
-Under the hood, Granules are a list of sorted Parts, and only if a query requires it are all parts merged into a sorted stream using a [direct k-way merge](https://en.wikipedia.org/wiki/K-way_merge_algorithm#Direct_k-way_merge) using a [min-heap](https://en.wikipedia.org/wiki/Binary_heap). An example of an operation that requires the whole Granule to be read as a single sorted stream are the aforementioned Granule splits.
-
-![A Granule is organized in Parts](https://docs.google.com/drawings/d/1Ex4hKLwoQ_IgYARj0aEjoFEjQRt6-B0fO8K9E7syyHc/export/svg)
+At query time FrostDB will scan each part in the in the index. To maintain fast queries FrostDB leverages the sparse index features of Parquet files, such as bloom filters and min
+and max values of columns in each row group such that only the row groups that contain data that can satisfy the query are processed.
 
 ### Snapshot isolation
 
@@ -200,14 +107,10 @@ FrostDB has snapshot isolation, however, it comes with a few caveats that should
 
 More concretely, FrostDB maintains a watermark indicating that all transactions equal and lower to the watermark are safe to be read. Only write transactions obtain a _new_ transaction ID, while reads use the transaction ID of the watermark to identify data that is safe to be read. The watermark is only increased when strictly monotonic, consecutive transactions have finished. This means that a low write transaction can block higher write transactions to become available to be read. To ensure progress is made, write transactions have a timeout.
 
-This mechanism inspired by a mix of [Google Spanner](https://research.google/pubs/pub39966/), [Google Percolator](https://research.google/pubs/pub36726/) and [Highly Available Transactions](https://www.vldb.org/pvldb/vol7/p181-bailis.pdf).
+This mechanism is inspired by a mix of [Google Spanner](https://research.google/pubs/pub39966/), [Google Percolator](https://research.google/pubs/pub36726/) and [Highly Available Transactions](https://www.vldb.org/pvldb/vol7/p181-bailis.pdf).
 
 ![Transactions are released in batches indicated by the watermark](https://docs.google.com/drawings/d/1qmcMg9sXnDZix9eWSvOtWJD06yHsLpgho8M-DGF84bU/export/svg)
 
-## Roadmap
-
-* Persistence: FrostDB is currently fully in-memory.
-
 ## Acknowledgments
 
-FrostDB stands on the shoulders of giants. Shout out to Segment for creating the incredible [`parquet-go`](https://github.com/segmentio/parquet-go) library as well as InfluxData for starting and various contributors after them working on [Go support for Apache Arrow](https://pkg.go.dev/github.com/apache/arrow/go/arrow).
+FrostDB stands on the shoulders of giants. Shout out to Segment for creating the incredible [`parquet-go`](https://github.com/parquet-go/parquet-go) library as well as InfluxData for starting and various contributors after them working on [Go support for Apache Arrow](https://pkg.go.dev/github.com/apache/arrow/go/arrow).
diff --git a/aggregate_test.go b/aggregate_test.go
index 4ea7bf286..d4f1b249d 100644
--- a/aggregate_test.go
+++ b/aggregate_test.go
@@ -2,24 +2,27 @@ package frostdb
 
 import (
 	"context"
+	"math/rand"
 	"sort"
 	"strconv"
 	"testing"
+	"time"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/require"
 
 	"github.com/polarsignals/frostdb/dynparquet"
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
 	"github.com/polarsignals/frostdb/query"
 	"github.com/polarsignals/frostdb/query/logicalplan"
 )
 
 func TestAggregateInconsistentSchema(t *testing.T) {
 	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
+		dynparquet.SampleDefinition(),
 	)
 
 	logger := newTestLogger(t)
@@ -35,8 +38,8 @@ func TestAggregateInconsistentSchema(t *testing.T) {
 	require.NoError(t, err)
 
 	samples := dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
+		Labels: map[string]string{
+			"label1": "value1",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -45,8 +48,8 @@ func TestAggregateInconsistentSchema(t *testing.T) {
 		Timestamp: 1,
 		Value:     1,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -55,8 +58,8 @@ func TestAggregateInconsistentSchema(t *testing.T) {
 		Timestamp: 2,
 		Value:     2,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -67,10 +70,10 @@ func TestAggregateInconsistentSchema(t *testing.T) {
 	}}
 
 	for i := range samples {
-		buf, err := samples[i : i+1].ToBuffer(table.Schema())
+		r, err := samples[i : i+1].ToRecord()
 		require.NoError(t, err)
 
-		_, err = table.InsertBuffer(context.Background(), buf)
+		_, err = table.InsertRecord(context.Background(), r)
 		require.NoError(t, err)
 	}
 
@@ -114,19 +117,23 @@ func TestAggregateInconsistentSchema(t *testing.T) {
 			var res arrow.Record
 			err = engine.ScanTable("test").
 				Aggregate(
-					[]logicalplan.Expr{testCase.fn(logicalplan.Col("value")).Alias(testCase.alias)},
+					[]*logicalplan.AggregationFunction{
+						testCase.fn(logicalplan.Col("value")),
+					},
 					[]logicalplan.Expr{logicalplan.Col("labels.label2")},
-				).Execute(context.Background(), func(ctx context.Context, r arrow.Record) error {
-				r.Retain()
-				res = r
-				return nil
-			})
+				).
+				Project(testCase.fn(logicalplan.Col("value")).Alias(testCase.alias)).
+				Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+					r.Retain()
+					res = r
+					return nil
+				})
 			require.NoError(t, err)
 			require.NotNil(t, res)
 			defer res.Release()
 
 			cols := res.Columns()
-			require.Equal(t, 2, len(cols))
+			require.Equal(t, 1, len(cols))
 			for i, col := range cols {
 				require.Equal(t, 2, col.Len(), "unexpected number of values in column %s", res.Schema().Field(i).Name)
 			}
@@ -142,7 +149,7 @@ func TestAggregateInconsistentSchema(t *testing.T) {
 
 func TestAggregationProjection(t *testing.T) {
 	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
+		dynparquet.SampleDefinition(),
 	)
 
 	logger := newTestLogger(t)
@@ -158,9 +165,9 @@ func TestAggregationProjection(t *testing.T) {
 	require.NoError(t, err)
 
 	samples := dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -169,10 +176,10 @@ func TestAggregationProjection(t *testing.T) {
 		Timestamp: 1,
 		Value:     1,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -181,10 +188,10 @@ func TestAggregationProjection(t *testing.T) {
 		Timestamp: 2,
 		Value:     2,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -195,10 +202,10 @@ func TestAggregationProjection(t *testing.T) {
 	}}
 
 	for i := 0; i < len(samples); i++ {
-		buf, err := samples[i : i+1].ToBuffer(table.Schema())
+		r, err := samples[i : i+1].ToRecord()
 		require.NoError(t, err)
 
-		_, err = table.InsertBuffer(context.Background(), buf)
+		_, err = table.InsertRecord(context.Background(), r)
 		require.NoError(t, err)
 	}
 
@@ -209,8 +216,13 @@ func TestAggregationProjection(t *testing.T) {
 
 	records := []arrow.Record{}
 	err = engine.ScanTable("test").
+		Project(
+			logicalplan.DynCol("labels"),
+			logicalplan.Col("timestamp"),
+			logicalplan.Col("value"),
+		).
 		Aggregate(
-			[]logicalplan.Expr{
+			[]*logicalplan.AggregationFunction{
 				logicalplan.Sum(logicalplan.Col("value")),
 				logicalplan.Max(logicalplan.Col("value")),
 			},
@@ -225,7 +237,7 @@ func TestAggregationProjection(t *testing.T) {
 			logicalplan.DynCol("labels"),
 			logicalplan.Col("timestamp").Gt(logicalplan.Literal(1)).Alias("timestamp"),
 		).
-		Execute(context.Background(), func(ctx context.Context, ar arrow.Record) error {
+		Execute(context.Background(), func(_ context.Context, ar arrow.Record) error {
 			records = append(records, ar)
 			ar.Retain()
 			return nil
@@ -245,6 +257,91 @@ func TestAggregationProjection(t *testing.T) {
 	require.True(t, record.Schema().HasField("max(value)"))
 }
 
+func TestDurationAggregation(t *testing.T) {
+	c, err := New()
+	require.NoError(t, err)
+	defer c.Close()
+
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+
+	type Record struct {
+		Timestamp  int64 `frostdb:",asc"`
+		Stacktrace string
+		Value      int64
+	}
+
+	table, err := NewGenericTable[Record](db, "test", memory.NewGoAllocator())
+	require.NoError(t, err)
+	defer table.Release()
+
+	records := []Record{
+		{
+			Timestamp:  1 * int64(time.Second),
+			Stacktrace: "stack1",
+			Value:      3,
+		},
+		{
+			Timestamp:  1 * int64(time.Second),
+			Stacktrace: "stack2",
+			Value:      5,
+		},
+		{
+			Timestamp:  1 * int64(time.Second),
+			Stacktrace: "stack3",
+			Value:      8,
+		},
+		{
+			Timestamp:  2 * int64(time.Second),
+			Stacktrace: "stack1",
+			Value:      2,
+		},
+		{
+			Timestamp:  2 * int64(time.Second),
+			Stacktrace: "stack2",
+			Value:      3,
+		},
+	}
+	_, err = table.Write(context.Background(), records...)
+	require.NoError(t, err)
+
+	engine := query.NewEngine(memory.DefaultAllocator, db.TableProvider())
+
+	results := []arrow.Record{}
+	defer func() {
+		for _, r := range results {
+			r.Release()
+		}
+	}()
+
+	_ = engine.ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{
+				logicalplan.Sum(logicalplan.Col("value")),
+			},
+			[]logicalplan.Expr{
+				logicalplan.Duration(time.Second),
+			},
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			r.Retain()
+			results = append(results, r)
+			return nil
+		})
+
+	require.Equal(t, 1, len(results))
+	require.Equal(t, int64(2), results[0].NumRows())
+
+	for i := 0; int64(i) < results[0].NumRows(); i++ {
+		switch results[0].Column(0).(*array.Int64).Value(i) {
+		case 1 * int64(time.Second):
+			require.Equal(t, int64(16), results[0].Column(1).(*array.Int64).Value(i))
+		case 2 * int64(time.Second):
+			require.Equal(t, int64(5), results[0].Column(1).(*array.Int64).Value(i))
+		}
+	}
+}
+
 // go test -bench=BenchmarkAggregation -benchmem -count=10 . | tee BenchmarkAggregation
 
 func BenchmarkAggregation(b *testing.B) {
@@ -259,16 +356,16 @@ func BenchmarkAggregation(b *testing.B) {
 
 	// Insert sample data
 	{
-		config := NewTableConfig(dynparquet.NewSampleSchema())
+		config := NewTableConfig(dynparquet.SampleDefinition())
 		table, err := db.Table("test", config)
 		require.NoError(b, err)
 
 		samples := make(dynparquet.Samples, 0, 10_000)
 		for i := 0; i < cap(samples); i++ {
 			samples = append(samples, dynparquet.Sample{
-				Labels: []dynparquet.Label{
-					{Name: "label1", Value: "value1"},
-					{Name: "label2", Value: "value" + strconv.Itoa(i%3)},
+				Labels: map[string]string{
+					"label1": "value1",
+					"label2": "value" + strconv.Itoa(i%3),
 				},
 				Stacktrace: []uuid.UUID{
 					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -278,9 +375,9 @@ func BenchmarkAggregation(b *testing.B) {
 			})
 		}
 
-		buf, err := samples.ToBuffer(table.Schema())
+		r, err := samples.ToRecord()
 		require.NoError(b, err)
-		_, err = table.InsertBuffer(ctx, buf)
+		_, err = table.InsertRecord(ctx, r)
 		require.NoError(b, err)
 	}
 
@@ -296,7 +393,7 @@ func BenchmarkAggregation(b *testing.B) {
 		name: "sum",
 		builder: engine.ScanTable("test").
 			Aggregate(
-				[]logicalplan.Expr{
+				[]*logicalplan.AggregationFunction{
 					logicalplan.Sum(logicalplan.Col("value")),
 				},
 				[]logicalplan.Expr{
@@ -307,7 +404,7 @@ func BenchmarkAggregation(b *testing.B) {
 		name: "count",
 		builder: engine.ScanTable("test").
 			Aggregate(
-				[]logicalplan.Expr{
+				[]*logicalplan.AggregationFunction{
 					logicalplan.Count(logicalplan.Col("value")),
 				},
 				[]logicalplan.Expr{
@@ -318,7 +415,7 @@ func BenchmarkAggregation(b *testing.B) {
 		name: "max",
 		builder: engine.ScanTable("test").
 			Aggregate(
-				[]logicalplan.Expr{
+				[]*logicalplan.AggregationFunction{
 					logicalplan.Max(logicalplan.Col("value")),
 				},
 				[]logicalplan.Expr{
@@ -328,10 +425,95 @@ func BenchmarkAggregation(b *testing.B) {
 	}} {
 		b.Run(bc.name, func(b *testing.B) {
 			for i := 0; i < b.N; i++ {
-				_ = bc.builder.Execute(ctx, func(ctx context.Context, r arrow.Record) error {
+				_ = bc.builder.Execute(ctx, func(_ context.Context, _ arrow.Record) error {
 					return nil
 				})
 			}
 		})
 	}
 }
+
+func Test_Aggregation_DynCol(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	config := NewTableConfig(
+		&schemapb.Schema{
+			Name: "test",
+			Columns: []*schemapb.Column{{
+				Name: "foo",
+				StorageLayout: &schemapb.StorageLayout{
+					Type:     schemapb.StorageLayout_TYPE_INT64,
+					Nullable: true,
+				},
+				Dynamic: true,
+			}},
+			SortingColumns: []*schemapb.SortingColumn{{
+				Name:      "foo",
+				Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+			}},
+		},
+	)
+
+	logger := newTestLogger(t)
+
+	c, err := New(
+		WithLogger(logger),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	fields := []arrow.Field{
+		{Name: "foo.bar", Type: arrow.PrimitiveTypes.Int64},
+		{Name: "foo.baz", Type: arrow.PrimitiveTypes.Int64},
+		{Name: "foo.bah", Type: arrow.PrimitiveTypes.Int64},
+	}
+
+	records := make([]arrow.Record, 0)
+	// For each field, create one record with only that field.
+	for i := 0; i < len(fields); i++ {
+		func() {
+			bldr := array.NewRecordBuilder(memory.DefaultAllocator, arrow.NewSchema(fields[i:i+1], nil))
+			defer bldr.Release()
+			bldr.Field(0).(*array.Int64Builder).Append(int64(rand.Intn(100)))
+			records = append(records, bldr.NewRecord())
+		}()
+	}
+	// One more record with all concrete columns.
+	bldr := array.NewRecordBuilder(memory.DefaultAllocator, arrow.NewSchema(fields, nil))
+	defer bldr.Release()
+	for i := 0; i < len(fields); i++ {
+		bldr.Field(i).(*array.Int64Builder).Append(int64(rand.Intn(100)))
+	}
+
+	records = append(records, bldr.NewRecord())
+	defer func() {
+		for _, r := range records {
+			r.Release()
+		}
+	}()
+
+	ctx := context.Background()
+	for _, r := range records {
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+
+	engine := query.NewEngine(mem, db.TableProvider())
+
+	err = engine.ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{logicalplan.Max(logicalplan.DynCol("foo"))},
+			nil,
+		).
+		Execute(context.Background(), func(_ context.Context, ar arrow.Record) error {
+			require.Equal(t, 3, int(ar.NumCols()))
+			require.Equal(t, 1, int(ar.NumRows()))
+			return nil
+		})
+	require.NoError(t, err)
+}
diff --git a/bench_test.go b/bench_test.go
index 0c728f691..2f06dd002 100644
--- a/bench_test.go
+++ b/bench_test.go
@@ -3,17 +3,20 @@ package frostdb
 import (
 	"context"
 	"errors"
+	"io"
+	"math/rand"
 	"sort"
 	"strings"
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/google/uuid"
 	"github.com/stretchr/testify/require"
 
-	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
+	"github.com/polarsignals/frostdb/dynparquet"
 	"github.com/polarsignals/frostdb/query"
 	"github.com/polarsignals/frostdb/query/logicalplan"
 )
@@ -34,6 +37,8 @@ const (
 func newDBForBenchmarks(ctx context.Context, b testing.TB) (*ColumnStore, *DB, error) {
 	b.Helper()
 
+	b.Logf("recovering DB")
+	start := time.Now()
 	col, err := New(
 		WithWAL(),
 		WithStoragePath(storagePath),
@@ -41,13 +46,7 @@ func newDBForBenchmarks(ctx context.Context, b testing.TB) (*ColumnStore, *DB, e
 	if err != nil {
 		return nil, nil, err
 	}
-
-	b.Logf("replaying WAL")
-	start := time.Now()
-	if err := col.ReplayWALs(ctx); err != nil {
-		return nil, nil, err
-	}
-	b.Logf("replayed WAL in %s", time.Since(start))
+	b.Logf("recovered DB in %s", time.Since(start))
 
 	colDB, err := col.DB(ctx, dbName)
 	if err != nil {
@@ -65,7 +64,7 @@ func newDBForBenchmarks(ctx context.Context, b testing.TB) (*ColumnStore, *DB, e
 	}
 	b.Logf("ensured compaction in %s", time.Since(start))
 
-	b.Logf("db initialized and WAL replayed, starting benchmark %s", b.Name())
+	b.Logf("db initialized and recovered, starting benchmark %s", b.Name())
 	return col, colDB, nil
 }
 
@@ -75,12 +74,12 @@ func getLatest15MinInterval(ctx context.Context, b testing.TB, engine *query.Loc
 	var result arrow.Record
 	require.NoError(b, engine.ScanTable(tableName).
 		Aggregate(
-			[]logicalplan.Expr{
+			[]*logicalplan.AggregationFunction{
 				logicalplan.Max(logicalplan.Col("timestamp")),
 			},
 			nil,
 		).Execute(ctx,
-		func(ctx context.Context, r arrow.Record) error {
+		func(_ context.Context, r arrow.Record) error {
 			r.Retain()
 			result = r
 			return nil
@@ -146,20 +145,22 @@ func (t *typesResult) Swap(i, j int) {
 	(*t)[i], (*t)[j] = (*t)[j], (*t)[i]
 }
 
-// getDeterministicTypeFilterExpr will always return a deterministic profile
+// getCPUTypeFilter will always return a deterministic profile
 // type across benchmark runs, as well as a pretty string to print this type.
-func getDeterministicTypeFilterExpr(
+func getCPUTypeFilter(
 	ctx context.Context, engine *query.LocalEngine,
 ) ([]logicalplan.Expr, string, error) {
 	results := make(typesResult, 0)
 	if err := getTypesQuery(engine).Execute(ctx, func(_ context.Context, r arrow.Record) error {
+		nameIdx := r.Schema().FieldIndices("name")[0]
 		for i := 0; i < int(r.NumRows()); i++ {
+			if !strings.Contains(string(r.Column(nameIdx).(*array.Dictionary).GetOneForMarshal(i).([]byte)), "cpu") {
+				// Not a CPU profile type, ignore.
+				continue
+			}
 			row := make([]string, 0, len(typeColumns))
 			for j := range typeColumns {
-				v, err := arrowutils.GetValue(r.Column(j), i)
-				if err != nil {
-					return err
-				}
+				v := r.Column(j).GetOneForMarshal(i)
 				row = append(row, string(v.([]byte)))
 			}
 			results = append(results, row)
@@ -170,7 +171,7 @@ func getDeterministicTypeFilterExpr(
 	}
 
 	if len(results) == 0 {
-		return nil, "", errors.New("no types found")
+		return nil, "", errors.New("no cpu types found")
 	}
 	sort.Sort(&results)
 
@@ -186,7 +187,7 @@ func getDeterministicTypeFilterExpr(
 
 // getDeterministicLabel will always return a deterministic label/value pair
 // across benchmarks.
-func getDeterministicLabelValuePair(ctx context.Context, engine *query.LocalEngine) (string, string, error) {
+func getDeterministicLabelValuePairForType(ctx context.Context, engine *query.LocalEngine, typeFilter []logicalplan.Expr) (string, string, error) {
 	labels := make([]string, 0)
 	if err := getLabelsQuery(engine).Execute(ctx, func(_ context.Context, r arrow.Record) error {
 		arr := r.Column(0)
@@ -200,20 +201,20 @@ func getDeterministicLabelValuePair(ctx context.Context, engine *query.LocalEngi
 	sort.Strings(labels)
 	for _, label := range labels {
 		values := make([]string, 0)
-		if err := getValuesForLabelQuery(engine, label).Execute(ctx, func(ctx context.Context, r arrow.Record) error {
-			arr := r.Column(0)
-			for i := 0; i < arr.Len(); i++ {
-				if arr.IsNull(i) {
-					continue
-				}
-				v, err := arrowutils.GetValue(arr, i)
-				if err != nil {
-					return err
+		if err := engine.ScanTable(tableName).
+			Filter(logicalplan.And(typeFilter...)).
+			Distinct(logicalplan.Col(label)).
+			Execute(ctx, func(_ context.Context, r arrow.Record) error {
+				arr := r.Column(0)
+				for i := 0; i < arr.Len(); i++ {
+					if arr.IsNull(i) {
+						continue
+					}
+					v := arr.GetOneForMarshal(i)
+					values = append(values, string(v.([]byte)))
 				}
-				values = append(values, string(v.([]byte)))
-			}
-			return nil
-		}); err != nil {
+				return nil
+			}); err != nil {
 			return "", "", err
 		}
 		if len(values) == 0 {
@@ -227,7 +228,6 @@ func getDeterministicLabelValuePair(ctx context.Context, engine *query.LocalEngi
 
 func BenchmarkQuery(b *testing.B) {
 	b.Skip(skipReason)
-
 	ctx := context.Background()
 	c, db, err := newDBForBenchmarks(ctx, b)
 	require.NoError(b, err)
@@ -237,14 +237,15 @@ func BenchmarkQuery(b *testing.B) {
 		memory.NewGoAllocator(),
 		db.TableProvider(),
 	)
-	start, end := getLatest15MinInterval(ctx, b, engine)
-	label, value, err := getDeterministicLabelValuePair(ctx, engine)
+	typeFilter, filterPretty, err := getCPUTypeFilter(ctx, engine)
 	require.NoError(b, err)
-	typeFilter, filterPretty, err := getDeterministicTypeFilterExpr(ctx, engine)
+
+	start, end := getLatest15MinInterval(ctx, b, engine)
+	label, value, err := getDeterministicLabelValuePairForType(ctx, engine, typeFilter)
 	require.NoError(b, err)
 
-	b.Logf("using label/value pair: (label=%s,value=%s)", label, value)
 	b.Logf("using types filter: %s", filterPretty)
+	b.Logf("using label/value pair: (label=%s,value=%s)", label, value)
 
 	fullFilter := append(
 		typeFilter,
@@ -256,7 +257,7 @@ func BenchmarkQuery(b *testing.B) {
 	b.Run("Types", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			if err := getTypesQuery(engine).
-				Execute(ctx, func(ctx context.Context, r arrow.Record) error {
+				Execute(ctx, func(_ context.Context, r arrow.Record) error {
 					if r.NumRows() == 0 {
 						b.Fatal("expected at least one row")
 					}
@@ -269,7 +270,7 @@ func BenchmarkQuery(b *testing.B) {
 
 	b.Run("Labels", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
-			if err := getLabelsQuery(engine).Execute(ctx, func(ctx context.Context, r arrow.Record) error {
+			if err := getLabelsQuery(engine).Execute(ctx, func(_ context.Context, r arrow.Record) error {
 				if r.NumRows() == 0 {
 					b.Fatal("expected at least one row")
 				}
@@ -283,7 +284,7 @@ func BenchmarkQuery(b *testing.B) {
 	b.Run("Values", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			if err := getValuesForLabelQuery(engine, label).
-				Execute(ctx, func(ctx context.Context, r arrow.Record) error {
+				Execute(ctx, func(_ context.Context, r arrow.Record) error {
 					if r.NumRows() == 0 {
 						b.Fatal("expected at least one row")
 					}
@@ -302,14 +303,14 @@ func BenchmarkQuery(b *testing.B) {
 					logicalplan.And(fullFilter...),
 				).
 				Aggregate(
-					[]logicalplan.Expr{
+					[]*logicalplan.AggregationFunction{
 						logicalplan.Sum(logicalplan.Col("value")),
 					},
 					[]logicalplan.Expr{
 						logicalplan.Col("stacktrace"),
 					},
 				).
-				Execute(ctx, func(ctx context.Context, r arrow.Record) error {
+				Execute(ctx, func(_ context.Context, r arrow.Record) error {
 					if r.NumRows() == 0 {
 						b.Fatal("expected at least one row")
 					}
@@ -328,7 +329,7 @@ func BenchmarkQuery(b *testing.B) {
 					logicalplan.And(fullFilter...),
 				).
 				Aggregate(
-					[]logicalplan.Expr{
+					[]*logicalplan.AggregationFunction{
 						logicalplan.Sum(logicalplan.Col("value")),
 					},
 					[]logicalplan.Expr{
@@ -336,7 +337,7 @@ func BenchmarkQuery(b *testing.B) {
 						logicalplan.Col("timestamp"),
 					},
 				).
-				Execute(ctx, func(ctx context.Context, r arrow.Record) error {
+				Execute(ctx, func(_ context.Context, r arrow.Record) error {
 					if r.NumRows() == 0 {
 						b.Fatal("expected at least one row")
 					}
@@ -346,6 +347,30 @@ func BenchmarkQuery(b *testing.B) {
 			}
 		}
 	})
+
+	// BenchmarkFilter benchmarks performing a simple filter. This is useful to get an idea of our scan speed (minus the
+	// execution engine).
+	b.Run("Filter", func(b *testing.B) {
+		schema := dynparquet.SampleDefinition()
+		table, err := db.Table(tableName, NewTableConfig(schema))
+		require.NoError(b, err)
+		size := table.ActiveBlock().Size()
+		for i := 0; i < b.N; i++ {
+			if err := engine.ScanTable(tableName).
+				Filter(
+					logicalplan.And(fullFilter...),
+				).
+				Execute(ctx, func(_ context.Context, r arrow.Record) error {
+					if r.NumRows() == 0 {
+						b.Fatal("expected at least one row")
+					}
+					return nil
+				}); err != nil {
+				b.Fatalf("query returned error: %v", err)
+			}
+		}
+		b.ReportMetric(float64(size)/(float64(b.Elapsed().Milliseconds())/float64(b.N)), "B/msec")
+	})
 }
 
 func BenchmarkReplay(b *testing.B) {
@@ -359,9 +384,93 @@ func BenchmarkReplay(b *testing.B) {
 			)
 			require.NoError(b, err)
 			defer col.Close()
-			if err := col.ReplayWALs(context.Background()); err != nil {
-				b.Fatal(err)
-			}
 		}()
 	}
 }
+
+type writeCounter struct {
+	io.Writer
+	count int
+}
+
+func (wc *writeCounter) Write(p []byte) (int, error) {
+	count, err := wc.Writer.Write(p)
+	wc.count += count
+	return count, err
+}
+
+func BenchmarkSnapshot(b *testing.B) {
+	b.Skip(skipReason)
+
+	ctx := context.Background()
+	col, err := New(
+		WithWAL(),
+		WithStoragePath(storagePath),
+	)
+	require.NoError(b, err)
+	defer col.Close()
+
+	db, err := col.DB(ctx, dbName)
+	require.NoError(b, err)
+
+	b.Log("recovered DB, starting benchmark")
+
+	b.ResetTimer()
+	bytesWritten := 0
+	for i := 0; i < b.N; i++ {
+		wc := &writeCounter{Writer: io.Discard}
+		require.NoError(b, WriteSnapshot(ctx, db.HighWatermark(), db, wc))
+		bytesWritten += wc.count
+	}
+	b.ReportMetric(float64(bytesWritten)/float64(b.N), "size/op")
+}
+
+func NewTestSamples(num int) dynparquet.Samples {
+	samples := make(dynparquet.Samples, 0, num)
+	for i := 0; i < num; i++ {
+		samples = append(samples,
+			dynparquet.Sample{
+				ExampleType: "cpu",
+				Labels: map[string]string{
+					"node": "test3",
+				},
+				Stacktrace: []uuid.UUID{
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+				},
+				Timestamp: rand.Int63n(100000),
+				Value:     rand.Int63(),
+			},
+		)
+	}
+	return samples
+}
+
+func Benchmark_Serialize(b *testing.B) {
+	ctx := context.Background()
+	schema := dynparquet.SampleDefinition()
+
+	col, err := New()
+	require.NoError(b, err)
+	defer col.Close()
+
+	db, err := col.DB(ctx, "test")
+	require.NoError(b, err)
+
+	tbl, err := db.Table("test", NewTableConfig(schema))
+	require.NoError(b, err)
+
+	// Insert 10k rows
+	samples := NewTestSamples(10000)
+	r, err := samples.ToRecord()
+	require.NoError(b, err)
+
+	_, err = tbl.InsertRecord(ctx, r)
+	require.NoError(b, err)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		// Serialize the table
+		require.NoError(b, tbl.active.Serialize(io.Discard))
+	}
+}
diff --git a/binaryscalarexpr.go b/binaryscalarexpr.go
deleted file mode 100644
index 5c8d2100b..000000000
--- a/binaryscalarexpr.go
+++ /dev/null
@@ -1,153 +0,0 @@
-package frostdb
-
-import (
-	"errors"
-	"fmt"
-
-	"github.com/segmentio/parquet-go"
-
-	"github.com/polarsignals/frostdb/query/logicalplan"
-)
-
-type ColumnRef struct {
-	ColumnName string
-}
-
-func (c *ColumnRef) Column(p Particulate) (parquet.ColumnChunk, bool, error) {
-	columnIndex := findColumnIndex(p.Schema(), c.ColumnName)
-	var columnChunk parquet.ColumnChunk
-	// columnChunk can be nil if the column is not present in the row group.
-	if columnIndex != -1 {
-		columnChunk = p.ColumnChunks()[columnIndex]
-	}
-
-	return columnChunk, columnIndex != -1, nil
-}
-
-func findColumnIndex(s *parquet.Schema, columnName string) int {
-	for i, field := range s.Fields() {
-		if field.Name() == columnName {
-			return i
-		}
-	}
-	return -1
-}
-
-type BinaryScalarExpr struct {
-	Left  *ColumnRef
-	Op    logicalplan.Op
-	Right parquet.Value
-}
-
-func (e BinaryScalarExpr) Eval(p Particulate) (bool, error) {
-	leftData, exists, err := e.Left.Column(p)
-	if err != nil {
-		return false, err
-	}
-
-	// TODO: This needs a bunch of test cases to validate edge cases like non
-	// existant columns or null values. I'm pretty sure this is completely
-	// wrong and needs per operation, per type specific behavior.
-	if !exists {
-		// only handling string for now.
-		if e.Right.Kind() == parquet.ByteArray || e.Right.Kind() == parquet.FixedLenByteArray {
-			switch {
-			case e.Op == logicalplan.OpEq && e.Right.String() == "":
-				return true, nil
-			case e.Op == logicalplan.OpNotEq && e.Right.String() != "":
-				return true, nil
-			}
-		}
-		return false, nil
-	}
-
-	return BinaryScalarOperation(leftData, e.Right, e.Op)
-}
-
-var ErrUnsupportedBinaryOperation = errors.New("unsupported binary operation")
-
-func BinaryScalarOperation(left parquet.ColumnChunk, right parquet.Value, operator logicalplan.Op) (bool, error) {
-	switch operator {
-	case logicalplan.OpEq:
-		if right == parquet.NullValue() {
-			// Assume all ColumnChunk have NULLs for now.
-			// They will be read and added to a bitmap later on.
-			// TODO: Maybe there's a nice way of reading the NumNulls from the Pages, for me they always return 0
-			return true, nil
-		}
-
-		bloomFilter := left.BloomFilter()
-		if bloomFilter == nil {
-			// If there is no bloom filter then we cannot make a statement about true negative, instead check the min max values of the column chunk
-			return compare(right, Max(left)) <= 0 || compare(right, Min(left)) >= -1, nil
-		}
-
-		ok, err := bloomFilter.Check(right)
-		if err != nil {
-			return true, err
-		}
-		if !ok {
-			// Bloom filters may return false positives, but never return false
-			// negatives, we know this column chunk does not contain the value.
-			return false, nil
-		}
-
-		return true, nil
-	case logicalplan.OpLtEq:
-		return compare(Min(left), right) <= 0, nil
-	case logicalplan.OpLt:
-		return compare(Min(left), right) < 0, nil
-	case logicalplan.OpGt:
-		return compare(Max(left), right) > 0, nil
-	case logicalplan.OpGtEq:
-		return compare(Max(left), right) >= 0, nil
-	default:
-		return true, nil
-	}
-}
-
-// Min returns the minimum value found in the column chunk across all pages.
-func Min(chunk parquet.ColumnChunk) parquet.Value {
-	columnIndex := chunk.ColumnIndex()
-	min := columnIndex.MinValue(0)
-	for i := 1; i < columnIndex.NumPages(); i++ {
-		if v := columnIndex.MinValue(i); compare(min, v) == 1 {
-			min = v
-		}
-	}
-
-	return min
-}
-
-// Max returns the maximum value found in the column chunk across all pages.
-func Max(chunk parquet.ColumnChunk) parquet.Value {
-	columnIndex := chunk.ColumnIndex()
-	max := columnIndex.MaxValue(0)
-	for i := 1; i < columnIndex.NumPages(); i++ {
-		if v := columnIndex.MaxValue(i); compare(max, v) == -1 {
-			max = v
-		}
-	}
-
-	return max
-}
-
-// compares two parquet values. 0 if they are equal, -1 if v1 < v2, 1 if v1 > v2.
-func compare(v1, v2 parquet.Value) int {
-	switch v1.Kind() {
-	case parquet.Int32:
-		return parquet.Int32Type.Compare(v1, v2)
-	case parquet.Int64:
-		return parquet.Int64Type.Compare(v1, v2)
-	case parquet.Float:
-		return parquet.FloatType.Compare(v1, v2)
-	case parquet.Double:
-		return parquet.DoubleType.Compare(v1, v2)
-	case parquet.ByteArray, parquet.FixedLenByteArray:
-		return parquet.ByteArrayType.Compare(v1, v2)
-	case parquet.Boolean:
-		return parquet.BooleanType.Compare(v1, v2)
-	default:
-		panic(fmt.Sprintf("unsupported value comparison: %v", v1.Kind()))
-	}
-}
diff --git a/buf.gen.yaml b/buf.gen.yaml
index 44a57a0d5..96f31fe08 100644
--- a/buf.gen.yaml
+++ b/buf.gen.yaml
@@ -6,12 +6,12 @@ managed:
 
 plugins:
   # renovate: datasource=github-releases depName=protocolbuffers/protobuf-go
-  - remote: buf.build/protocolbuffers/plugins/go:v1.28.0-1
+  - plugin: buf.build/protocolbuffers/go:v1.34.2
     out: gen/proto/go
     opt: paths=source_relative
 
   # renovate: datasource=github-releases depName=planetscale/vtprotobuf
-  - remote: buf.build/planetscale/plugins/vtproto:v0.3.0
+  - plugin: buf.build/community/planetscale-vtprotobuf:v0.6.0
     out: gen/proto/go
     opt:
-      - paths=source_relative,features=marshal+unmarshal+size+pool
+      - paths=source_relative,features=marshal+unmarshal+size+pool+grpc
diff --git a/bufutils/dyncols.go b/bufutils/dyncols.go
deleted file mode 100644
index 0408043a7..000000000
--- a/bufutils/dyncols.go
+++ /dev/null
@@ -1,25 +0,0 @@
-package bufutils
-
-import "sort"
-
-// Dedupe deduplicates the slices of values for each key in the map.
-func Dedupe(s map[string][]string) map[string][]string {
-	final := map[string][]string{}
-	set := map[string]map[string]struct{}{}
-	for k, v := range s {
-		if set[k] == nil {
-			set[k] = map[string]struct{}{}
-		}
-		for _, i := range v {
-			if _, ok := set[k][i]; !ok {
-				set[k][i] = struct{}{}
-				final[k] = append(final[k], i)
-			}
-		}
-	}
-
-	for _, s := range final {
-		sort.Strings(s)
-	}
-	return final
-}
diff --git a/cmd/parquet-reencode/main.go b/cmd/parquet-reencode/main.go
index b8f07262c..00c6d99e5 100644
--- a/cmd/parquet-reencode/main.go
+++ b/cmd/parquet-reencode/main.go
@@ -5,7 +5,7 @@ import (
 	"log"
 	"os"
 
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 	"google.golang.org/protobuf/encoding/protojson"
 
 	"github.com/polarsignals/frostdb/dynparquet"
@@ -52,7 +52,7 @@ func main() {
 		log.Fatal(fmt.Errorf("create output file: %w", err))
 	}
 
-	w, err := newSchema.GetWriter(outf, serBuf.DynamicColumns())
+	w, err := newSchema.GetWriter(outf, serBuf.DynamicColumns(), false)
 	if err != nil {
 		log.Fatal(fmt.Errorf("get writer: %w", err))
 	}
diff --git a/cmd/parquet-tool/cmd/column.go b/cmd/parquet-tool/cmd/column.go
new file mode 100644
index 000000000..9e620e492
--- /dev/null
+++ b/cmd/parquet-tool/cmd/column.go
@@ -0,0 +1,73 @@
+package cmd
+
+import (
+	"fmt"
+	"io"
+	"strconv"
+
+	"github.com/parquet-go/parquet-go"
+	"github.com/spf13/cobra"
+)
+
+var columnCmd = &cobra.Command{
+	Use:     "col",
+	Example: "parquet-tool col <file.parquet> <column> <rowgroup>",
+	Short:   "print out a column for a given row group in the file",
+	Args:    cobra.ExactArgs(3),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		rg, err := strconv.Atoi(args[2])
+		if err != nil {
+			return err
+		}
+		return column(args[0], args[1], rg)
+	},
+}
+
+func column(file string, col string, rg int) error {
+	pf, closer, err := openParquetFile(file)
+	if err != nil {
+		return fmt.Errorf("failed to open file: ", err)
+	}
+	defer closer.Close()
+
+	rowgroup := pf.RowGroups()[rg]
+	fields := rowgroup.Schema().Fields()
+
+	for i, f := range fields {
+		if f.Name() == col {
+			for j, chunk := range rowgroup.ColumnChunks() {
+				if j%len(fields) == i {
+					if err := printPage(chunk.Pages()); err != nil {
+						return err
+					}
+				}
+			}
+			return nil
+		}
+	}
+
+	fmt.Println("Column not found")
+	return nil
+}
+
+func printPage(page parquet.Pages) error {
+	defer page.Close()
+
+	i := 0
+	for pg, err := page.ReadPage(); err != io.EOF; pg, err = page.ReadPage() {
+		if err != nil {
+			return err
+		}
+
+		vr := pg.Values()
+		values := make([]parquet.Value, pg.NumValues())
+		_, err = vr.ReadValues(values)
+		if err != nil && err != io.EOF {
+			return err
+		}
+
+		fmt.Println("Values(", i, "):", values)
+		i++
+	}
+	return nil
+}
diff --git a/cmd/parquet-tool/cmd/dump.go b/cmd/parquet-tool/cmd/dump.go
new file mode 100644
index 000000000..d48597359
--- /dev/null
+++ b/cmd/parquet-tool/cmd/dump.go
@@ -0,0 +1,58 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/dustin/go-humanize"
+	"github.com/olekukonko/tablewriter"
+	"github.com/spf13/cobra"
+)
+
+var dumpCmd = &cobra.Command{
+	Use:     "dump",
+	Example: "parquet-tool dump <file.parquet>",
+	Short:   "dump the database",
+	Args:    cobra.ExactArgs(1),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		return dump(args[0])
+	},
+}
+
+func dump(file string) error {
+	pf, closer, err := openParquetFile(file)
+	if err != nil {
+		return err
+	}
+	defer closer.Close()
+
+	fmt.Println("schema:", pf.Schema())
+	meta := pf.Metadata()
+	fmt.Println("Num Rows:", meta.NumRows)
+
+	for i, rg := range meta.RowGroups {
+		fmt.Println("\t Row group:", i)
+		fmt.Println("\t\t Row Count:", rg.NumRows)
+		fmt.Println("\t\t Row size:", humanize.Bytes(uint64(rg.TotalByteSize)))
+		fmt.Println("\t\t Columns:")
+		table := tablewriter.NewWriter(os.Stdout)
+		table.SetHeader([]string{"Col", "Type", "NumVal", "Encoding", "TotalCompressedSize", "TotalUncompressedSize", "Compression", "%"})
+		for _, ds := range rg.Columns {
+			table.Append(
+				[]string{
+					strings.Join(ds.MetaData.PathInSchema, "/"),
+					ds.MetaData.Type.String(),
+					fmt.Sprintf("%d", ds.MetaData.NumValues),
+					fmt.Sprintf("%s", ds.MetaData.Encoding),
+					humanize.Bytes(uint64(ds.MetaData.TotalCompressedSize)),
+					humanize.Bytes(uint64(ds.MetaData.TotalUncompressedSize)),
+					fmt.Sprintf("%.2f", float64(ds.MetaData.TotalUncompressedSize-ds.MetaData.TotalCompressedSize)/float64(ds.MetaData.TotalCompressedSize)*100),
+					fmt.Sprintf("%.2f", float64(ds.MetaData.TotalUncompressedSize)/float64(rg.TotalByteSize)*100),
+				})
+		}
+		table.Render()
+	}
+
+	return nil
+}
diff --git a/cmd/parquet-tool/cmd/find.go b/cmd/parquet-tool/cmd/find.go
new file mode 100644
index 000000000..3376ff154
--- /dev/null
+++ b/cmd/parquet-tool/cmd/find.go
@@ -0,0 +1,168 @@
+package cmd
+
+import (
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/charmbracelet/lipgloss"
+	"github.com/charmbracelet/lipgloss/table"
+	"github.com/parquet-go/parquet-go"
+	"github.com/spf13/cobra"
+)
+
+var findCmd = &cobra.Command{
+	Use:     "find",
+	Example: "parquet-tool find timestamp=1698684986287 </path/to/file.parquet or directory>",
+	Short:   "Find value(s) in a parquet file",
+	Args:    cobra.MinimumNArgs(2),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		return findAll(args[1], args[0])
+	},
+}
+
+var HeaderStyle = lipgloss.NewStyle().
+	Bold(true).
+	Foreground(lipgloss.Color("#FAFAFA")).
+	Background(lipgloss.Color("#7D56F4"))
+
+var EvenRowStyle = lipgloss.NewStyle().
+	Bold(false).
+	Foreground(lipgloss.Color("#FAFAFA"))
+
+var OddRowStyle = lipgloss.NewStyle().
+	Bold(false).
+	Foreground(lipgloss.Color("#a6a4a4"))
+
+func parseColumnArg(columnArg string) (map[string]string, error) {
+	columns := make(map[string]string)
+	matchers := strings.Split(columnArg, ",") // csv separated list
+	for _, matcher := range matchers {
+		splits := strings.Split(matcher, "=")
+		if len(splits) != 2 {
+			return nil, fmt.Errorf("invalid column argument: %s; expected format of <column>=<value>", matcher)
+		}
+
+		columns[splits[0]] = splits[1]
+	}
+
+	return columns, nil
+}
+
+func findAll(fileOrDir, column string) error {
+	info, err := os.Stat(fileOrDir)
+	if err != nil {
+		return err
+	}
+
+	t := table.New().
+		Border(lipgloss.NormalBorder()).
+		BorderStyle(lipgloss.NewStyle().Foreground(lipgloss.Color("99"))).
+		StyleFunc(func(row, col int) lipgloss.Style {
+			switch {
+			case row == 0:
+				return HeaderStyle
+			case row%2 == 0:
+				return EvenRowStyle
+			default:
+				return OddRowStyle
+			}
+		}).
+		Headers("FILE", "ROW GROUP")
+	defer fmt.Println(t)
+
+	if !info.IsDir() {
+		return find(fileOrDir, column, t)
+	}
+
+	return filepath.WalkDir(fileOrDir, func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		if d.IsDir() {
+			return nil
+		}
+
+		return find(path, column, t)
+	})
+}
+
+func find(file, column string, t *table.Table) error {
+	pf, closer, err := openParquetFile(file)
+	if err != nil {
+		return err
+	}
+	defer closer.Close()
+
+	columns, err := parseColumnArg(column)
+	if err != nil {
+		return err
+	}
+
+	for i, rg := range pf.RowGroups() {
+		schema := rg.Schema()
+		found := 0 // We must find all columns that match or the whole row group does not match
+		for j, field := range schema.Fields() {
+			val, ok := columns[field.Name()]
+			if !ok {
+				continue // skip if column not found
+			}
+
+			v, err := getValue(val, field.Type().Kind())
+			if err != nil {
+				return err
+			}
+
+			// Check the min max values of each column
+			index, err := rg.ColumnChunks()[j].ColumnIndex()
+			if err != nil {
+				return err
+			}
+			for k := 0; k < index.NumPages(); k++ {
+				if compare(index.MinValue(k), v) <= 0 &&
+					compare(index.MaxValue(k), v) >= 0 {
+					found++
+					break
+				}
+			}
+		}
+
+		if found == len(columns) {
+			t.Row(file, fmt.Sprint(i))
+		}
+	}
+
+	return nil
+}
+
+func getValue(val string, kind parquet.Kind) (parquet.Value, error) {
+	switch kind {
+	case parquet.Int64:
+		i, err := strconv.ParseInt(val, 10, 64)
+		if err != nil {
+			return parquet.Value{}, err
+		}
+
+		return parquet.ValueOf(i), nil
+	case parquet.ByteArray:
+		return parquet.ValueOf([]byte(val)), nil
+	case parquet.Int96:
+		fallthrough
+	case parquet.Boolean:
+		fallthrough
+	case parquet.Int32:
+		fallthrough
+	case parquet.Float:
+		fallthrough
+	case parquet.Double:
+		fallthrough
+	case parquet.FixedLenByteArray:
+		fallthrough
+	default:
+		return parquet.Value{}, fmt.Errorf("unsupported kind: %v", kind)
+	}
+}
diff --git a/cmd/parquet-tool/cmd/root.go b/cmd/parquet-tool/cmd/root.go
new file mode 100644
index 000000000..dd5d09cdc
--- /dev/null
+++ b/cmd/parquet-tool/cmd/root.go
@@ -0,0 +1,34 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+)
+
+var rootCmd = &cobra.Command{
+	Use:   "parquet-tool",
+	Short: "Explort parquet files",
+	Run: func(cmd *cobra.Command, args []string) {
+		cmd.Help()
+	},
+}
+
+func Execute() {
+	if err := rootCmd.Execute(); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+}
+
+func init() {
+	rootCmd.AddCommand(dumpCmd)
+	rootCmd.AddCommand(findCmd)
+	rootCmd.AddCommand(walCmd)
+	rootCmd.AddCommand(snapshotCmd)
+	rootCmd.AddCommand(rowgroupCmd)
+	rootCmd.AddCommand(columnCmd)
+	rootCmd.AddCommand(rowCmd)
+	rootCmd.AddCommand(statsCmd)
+}
diff --git a/cmd/parquet-tool/cmd/row.go b/cmd/parquet-tool/cmd/row.go
new file mode 100644
index 000000000..6f31c5f22
--- /dev/null
+++ b/cmd/parquet-tool/cmd/row.go
@@ -0,0 +1,120 @@
+package cmd
+
+import (
+	"fmt"
+	"io"
+	"strconv"
+
+	"github.com/charmbracelet/lipgloss"
+	"github.com/charmbracelet/lipgloss/table"
+	"github.com/parquet-go/parquet-go"
+	"github.com/spf13/cobra"
+)
+
+var rowCmd = &cobra.Command{
+	Use:     "row",
+	Example: "parquet-tool row <row_start> <num_rows> <parqeut-file>",
+	Short:   "print out row(s) for a given file",
+	Args:    cobra.ExactArgs(3),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		rowStart, err := strconv.Atoi(args[0])
+		if err != nil {
+			return err
+		}
+		numRows, err := strconv.Atoi(args[1])
+		if err != nil {
+			return err
+		}
+		return row(args[2], rowStart, numRows)
+	},
+}
+
+func row(file string, rowStart, numRows int) error {
+	pf, closer, err := openParquetFile(file)
+	if err != nil {
+		return fmt.Errorf("failed to open file: ", err)
+	}
+	defer closer.Close()
+
+	// Find the row group that contains the row we want
+	var rowgroup parquet.RowGroup
+	rowsSeen := 0
+	rgoffset := 0
+	for _, rg := range pf.RowGroups() {
+		if rowsSeen+int(rg.NumRows()) >= rowStart { // This row group contains the row we want
+			rowgroup = rg
+			rgoffset = rowStart - rowsSeen
+			break
+		}
+		rowsSeen += int(rg.NumRows())
+	}
+
+	headers := []string{"column"}
+	for i := rowStart; i < rowStart+numRows; i++ {
+		headers = append(headers, strconv.Itoa(i))
+	}
+	fields := rowgroup.Schema().Fields()
+	tbl := table.New().
+		Border(lipgloss.NormalBorder()).
+		BorderStyle(lipgloss.NewStyle().Foreground(lipgloss.Color("99"))).
+		StyleFunc(func(row, col int) lipgloss.Style {
+			switch {
+			case row == 0:
+				return HeaderStyle
+			case row%2 == 0:
+				return EvenRowStyle
+			default:
+				return OddRowStyle
+			}
+		}).
+		Headers(headers...)
+	defer fmt.Println(tbl)
+
+	for i, chunk := range rowgroup.ColumnChunks() {
+		if err := printPageSubset(tbl, fields[i].Name(), chunk.Pages(), rgoffset, numRows); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func printPageSubset(tbl *table.Table, name string, page parquet.Pages, start, num int) error {
+	defer page.Close()
+
+	for pg, err := page.ReadPage(); err != io.EOF; pg, err = page.ReadPage() {
+		if err != nil {
+			return err
+		}
+
+		if int(pg.NumRows()) < start {
+			start -= int(pg.NumRows())
+			continue
+		}
+
+		vr := pg.Values()
+		values := make([]parquet.Value, pg.NumValues())
+		_, err = vr.ReadValues(values)
+		if err != nil && err != io.EOF {
+			return err
+		}
+
+		end := start + num
+		remainder := 0
+		if end > int(pg.NumRows()) {
+			end = int(pg.NumRows()) - start
+			remainder = num - end
+		}
+
+		strs := make([]string, 0, len(values))
+		for _, v := range values[start:end] {
+			strs = append(strs, fmt.Sprintf("%v", v))
+		}
+		tbl.Row(append([]string{name}, strs...)...)
+
+		if remainder <= 0 {
+			break
+		}
+	}
+	return nil
+}
diff --git a/cmd/parquet-tool/cmd/rowgroup.go b/cmd/parquet-tool/cmd/rowgroup.go
new file mode 100644
index 000000000..7342f62b7
--- /dev/null
+++ b/cmd/parquet-tool/cmd/rowgroup.go
@@ -0,0 +1,68 @@
+package cmd
+
+import (
+	"fmt"
+	"strconv"
+
+	"github.com/charmbracelet/lipgloss"
+	"github.com/charmbracelet/lipgloss/table"
+	"github.com/spf13/cobra"
+)
+
+var rowgroupCmd = &cobra.Command{
+	Use:     "rowgroup",
+	Example: "parquet-tool rg </path/to/parquet-file> <row_group_index>",
+	Short:   "Dump the column index for a row group",
+	Args:    cobra.ExactArgs(2),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		rg, err := strconv.Atoi(args[1])
+		if err != nil {
+			return err
+		}
+		return rowgroup(args[0], rg)
+	},
+}
+
+func rowgroup(file string, rg int) error {
+	f, closer, err := openParquetFile(file)
+	if err != nil {
+		return fmt.Errorf("failed to open file :", err)
+	}
+	defer closer.Close()
+
+	t := table.New().
+		Border(lipgloss.NormalBorder()).
+		BorderStyle(lipgloss.NewStyle().Foreground(lipgloss.Color("99"))).
+		StyleFunc(func(row, col int) lipgloss.Style {
+			switch {
+			case row == 0:
+				return HeaderStyle
+			case row%2 == 0:
+				return EvenRowStyle
+			default:
+				return OddRowStyle
+			}
+		}).
+		Headers("Column", "Page", "Min", "Max", "Nulls")
+	defer fmt.Println(t)
+
+	rowgroup := f.RowGroups()[rg]
+	fields := rowgroup.Schema().Fields()
+
+	for i, chunk := range rowgroup.ColumnChunks() {
+		index, err := chunk.ColumnIndex()
+		if err != nil {
+			return err
+		}
+		for j := 0; j < index.NumPages(); j++ {
+			t.Row(
+				fields[i%len(fields)].Name(),
+				strconv.Itoa(j),
+				fmt.Sprintf("%v", index.MinValue(j)),
+				fmt.Sprintf("%v", index.MaxValue(j)),
+				fmt.Sprintf("%v", index.NullCount(j)),
+			)
+		}
+	}
+	return nil
+}
diff --git a/cmd/parquet-tool/cmd/snapshot.go b/cmd/parquet-tool/cmd/snapshot.go
new file mode 100644
index 000000000..505302e96
--- /dev/null
+++ b/cmd/parquet-tool/cmd/snapshot.go
@@ -0,0 +1,162 @@
+package cmd
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"hash"
+	"hash/crc32"
+	"io"
+	"io/fs"
+	"os"
+	"path/filepath"
+
+	"github.com/apache/arrow-go/v18/arrow/ipc"
+	"github.com/spf13/cobra"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	snapshotpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/snapshot/v1alpha1"
+)
+
+var (
+	snapshotMagic = "FDBS"
+
+	snapshotVersion = 1
+	minReadVersion  = snapshotVersion
+)
+
+var snapshotCmd = &cobra.Command{
+	Use:     "snapshot",
+	Example: "parquet-tool snapshot </path/to/snapshot/directory> [columns to dump]",
+	Short:   "Interact with a snapshot directory",
+	Args:    cobra.MinimumNArgs(1),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		return inspectSnapshots(args[0], args[1:]...)
+	},
+}
+
+func inspectSnapshots(dir string, columns ...string) error {
+	return filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		if d.IsDir() {
+			return nil
+		}
+
+		info, err := d.Info()
+		if err != nil {
+			return err
+		}
+		return inspectSnapshot(path, info.Size(), columns)
+	})
+}
+
+func inspectSnapshot(path string, size int64, columns []string) error {
+	f, err := os.Open(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	footer, err := readFooter(f, size)
+	if err != nil {
+		return err
+	}
+
+	for _, tableMeta := range footer.TableMetadata {
+		for _, granuleMeta := range tableMeta.GranuleMetadata {
+			for _, partMeta := range granuleMeta.PartMetadata {
+				startOffset := partMeta.StartOffset
+				endOffset := partMeta.EndOffset
+				partBytes := make([]byte, endOffset-startOffset)
+				if _, err := f.ReadAt(partBytes, startOffset); err != nil {
+					return err
+				}
+				switch partMeta.Encoding {
+				case snapshotpb.Part_ENCODING_PARQUET:
+					_, err := dynparquet.ReaderFromBytes(partBytes) // TODO: do something with the serialized buffer
+					if err != nil {
+						return err
+					}
+					fmt.Println("Parquet: ", partMeta.CompactionLevel)
+				case snapshotpb.Part_ENCODING_ARROW:
+					arrowReader, err := ipc.NewReader(bytes.NewReader(partBytes))
+					if err != nil {
+						return err
+					}
+
+					record, err := arrowReader.Read()
+					if err != nil {
+						return err
+					}
+
+					inspectRecord(record, columns)
+				default:
+					return fmt.Errorf("unknown part encoding: %s", partMeta.Encoding)
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
+// Copied from FrostDB directly
+func readFooter(r io.ReaderAt, size int64) (*snapshotpb.FooterData, error) {
+	buffer := make([]byte, 16)
+	if _, err := r.ReadAt(buffer[:4], 0); err != nil {
+		return nil, err
+	}
+	if string(buffer[:4]) != snapshotMagic {
+		return nil, fmt.Errorf("invalid snapshot magic: %q", buffer[:4])
+	}
+	if _, err := r.ReadAt(buffer, size-int64(len(buffer))); err != nil {
+		return nil, err
+	}
+	if string(buffer[12:]) != snapshotMagic {
+		return nil, fmt.Errorf("invalid snapshot magic: %q", buffer[4:])
+	}
+
+	// The checksum does not include the last 8 bytes of the file, which is the
+	// magic and the checksum. Create a section reader of all but the last 8
+	// bytes to compute the checksum and validate it against the read checksum.
+	checksum := binary.LittleEndian.Uint32(buffer[8:12])
+	checksumWriter := newChecksumWriter()
+	if _, err := io.Copy(checksumWriter, io.NewSectionReader(r, 0, size-8)); err != nil {
+		return nil, fmt.Errorf("failed to compute checksum: %w", err)
+	}
+	if checksum != checksumWriter.Sum32() {
+		return nil, fmt.Errorf(
+			"snapshot file corrupt: invalid checksum: expected %x, got %x", checksum, checksumWriter.Sum32(),
+		)
+	}
+
+	version := binary.LittleEndian.Uint32(buffer[4:8])
+	if int(version) > snapshotVersion {
+		return nil, fmt.Errorf(
+			"cannot read snapshot with version %d: max version supported: %d", version, snapshotVersion,
+		)
+	}
+	if int(version) < minReadVersion {
+		return nil, fmt.Errorf(
+			"cannot read snapshot with version %d: min version supported: %d", version, minReadVersion,
+		)
+	}
+
+	footerSize := binary.LittleEndian.Uint32(buffer[:4])
+	footerBytes := make([]byte, footerSize)
+	if _, err := r.ReadAt(footerBytes, size-(int64(len(buffer))+int64(footerSize))); err != nil {
+		return nil, err
+	}
+	footer := &snapshotpb.FooterData{}
+	if err := footer.UnmarshalVT(footerBytes); err != nil {
+		return nil, fmt.Errorf("could not unmarshal footer: %v", err)
+	}
+	return footer, nil
+}
+
+func newChecksumWriter() hash.Hash32 {
+	return crc32.New(crc32.MakeTable(crc32.Castagnoli))
+}
diff --git a/cmd/parquet-tool/cmd/stats.go b/cmd/parquet-tool/cmd/stats.go
new file mode 100644
index 000000000..57a43b5ca
--- /dev/null
+++ b/cmd/parquet-tool/cmd/stats.go
@@ -0,0 +1,124 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/dustin/go-humanize"
+	"github.com/olekukonko/tablewriter"
+	"github.com/spf13/cobra"
+	"golang.org/x/exp/maps"
+	"golang.org/x/exp/slices"
+)
+
+var statsCmd = &cobra.Command{
+	Use:     "stats",
+	Example: "parquet-tool stats <file.parquet>",
+	Short:   "print total stats of a parquet file",
+	Args:    cobra.ExactArgs(1),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		return runStats(args[0])
+	},
+}
+
+type stats struct {
+	Type                  string
+	NumVal                int64
+	Encoding              string
+	TotalCompressedSize   int64
+	TotalUncompressedSize int64
+	TotalByteSize         int64
+}
+
+func runStats(file string) error {
+	pf, closer, err := openParquetFile(file)
+	if err != nil {
+		return err
+	}
+	defer closer.Close()
+
+	meta := pf.Metadata()
+
+	s := map[string]stats{}
+	for _, rg := range meta.RowGroups {
+		for _, ds := range rg.Columns {
+			col := strings.Join(ds.MetaData.PathInSchema, "/")
+			typ := ds.MetaData.Type.String()
+			enc := ""
+			for _, e := range ds.MetaData.Encoding {
+				enc += e.String() + " "
+			}
+
+			if _, ok := s[col]; !ok {
+				s[col] = stats{
+					Type:                  typ,
+					NumVal:                ds.MetaData.NumValues,
+					Encoding:              enc,
+					TotalCompressedSize:   ds.MetaData.TotalCompressedSize,
+					TotalUncompressedSize: ds.MetaData.TotalUncompressedSize,
+					TotalByteSize:         rg.TotalByteSize,
+				}
+			} else {
+				s[col] = stats{
+					Type:                  typ,
+					NumVal:                s[col].NumVal + ds.MetaData.NumValues,
+					Encoding:              enc,
+					TotalCompressedSize:   s[col].TotalCompressedSize + ds.MetaData.TotalCompressedSize,
+					TotalUncompressedSize: s[col].TotalUncompressedSize + ds.MetaData.TotalUncompressedSize,
+					TotalByteSize:         s[col].TotalByteSize + rg.TotalByteSize,
+				}
+			}
+		}
+	}
+
+	table := tablewriter.NewWriter(os.Stdout)
+	table.SetHeader([]string{"Col", "Type", "NumVal", "Encoding", "TotalCompressedSize", "TotalUncompressedSize", "Compression", "%"})
+	keys := maps.Keys(s)
+	slices.Sort(keys)
+
+	var (
+		totalCompressedSize   int64
+		totalUncompressedSize int64
+		totalByteSize         int64
+	)
+	for _, k := range keys {
+		row := s[k]
+		table.Append([]string{
+			k,
+			row.Type,
+			fmt.Sprintf("%d", row.NumVal),
+			row.Encoding,
+			humanize.Bytes(uint64(row.TotalCompressedSize)),
+			humanize.Bytes(uint64(row.TotalUncompressedSize)),
+			fmt.Sprintf("%.2f", float64(row.TotalUncompressedSize-row.TotalCompressedSize)/float64(row.TotalCompressedSize)*100),
+			fmt.Sprintf("%.2f", float64(row.TotalUncompressedSize)/float64(row.TotalByteSize)*100),
+		})
+
+		totalCompressedSize += row.TotalCompressedSize
+		totalUncompressedSize += row.TotalUncompressedSize
+		totalByteSize += row.TotalByteSize
+	}
+
+	table.Append([]string{
+		"Total",
+		"",
+		"",
+		"",
+		humanize.Bytes(uint64(totalCompressedSize)),
+		humanize.Bytes(uint64(totalUncompressedSize)),
+		"",
+		"",
+	})
+	table.Render()
+
+	return nil
+}
+
+func sum(a []int64) int64 {
+	var s int64
+	for _, v := range a {
+		s += v
+	}
+	return s
+}
diff --git a/cmd/parquet-tool/cmd/util.go b/cmd/parquet-tool/cmd/util.go
new file mode 100644
index 000000000..fce2a3b6c
--- /dev/null
+++ b/cmd/parquet-tool/cmd/util.go
@@ -0,0 +1,82 @@
+package cmd
+
+import (
+	"fmt"
+	"io"
+	"os"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/parquet-go/parquet-go"
+)
+
+func openParquetFile(file string) (*parquet.File, io.Closer, error) {
+	f, err := os.Open(file)
+	if err != nil {
+		return nil, nil, err
+	}
+	stats, err := f.Stat()
+	if err != nil {
+		return nil, f, err
+	}
+	pf, err := parquet.OpenFile(f, stats.Size())
+	if err != nil {
+		return nil, f, err
+	}
+
+	return pf, f, nil
+}
+
+func compare(v1, v2 parquet.Value) int {
+	if v1.IsNull() {
+		if v2.IsNull() {
+			return 0
+		}
+		return 1
+	}
+
+	if v2.IsNull() {
+		return -1
+	}
+
+	switch v1.Kind() {
+	case parquet.Int32:
+		return parquet.Int32Type.Compare(v1, v2)
+	case parquet.Int64:
+		return parquet.Int64Type.Compare(v1, v2)
+	case parquet.Float:
+		return parquet.FloatType.Compare(v1, v2)
+	case parquet.Double:
+		return parquet.DoubleType.Compare(v1, v2)
+	case parquet.ByteArray, parquet.FixedLenByteArray:
+		return parquet.ByteArrayType.Compare(v1, v2)
+	case parquet.Boolean:
+		return parquet.BooleanType.Compare(v1, v2)
+	default:
+		panic(fmt.Sprintf("unsupported value comparison: %v", v1.Kind()))
+	}
+}
+
+func inspectRecord(record arrow.Record, columns []string) {
+	if len(columns) == 0 {
+		fmt.Println(record)
+	} else {
+		fields := make([]arrow.Field, 0, len(columns))
+		cols := make([]arrow.Array, 0, len(columns))
+		for i := 0; i < record.Schema().NumFields(); i++ {
+			field := record.Schema().Field(i)
+			for _, col := range columns {
+				if col == field.Name {
+					fields = append(fields, field)
+					cols = append(cols, record.Column(i))
+				}
+			}
+		}
+
+		subschema := arrow.NewSchema(fields, nil)
+
+		r := array.NewRecord(subschema, cols, record.NumRows())
+		defer r.Release()
+		fmt.Println(r)
+	}
+}
diff --git a/cmd/parquet-tool/cmd/wal.go b/cmd/parquet-tool/cmd/wal.go
new file mode 100644
index 000000000..8885dfd84
--- /dev/null
+++ b/cmd/parquet-tool/cmd/wal.go
@@ -0,0 +1,51 @@
+package cmd
+
+import (
+	"bytes"
+
+	"github.com/apache/arrow-go/v18/arrow/ipc"
+	"github.com/go-kit/log"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/spf13/cobra"
+
+	walpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/wal/v1alpha1"
+	"github.com/polarsignals/frostdb/wal"
+)
+
+var walCmd = &cobra.Command{
+	Use:     "wal",
+	Example: "parquet-tool wal </path/to/wal/directory> [columns to dump]",
+	Short:   "Interact with a WAL directory",
+	Args:    cobra.MinimumNArgs(1),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		return inspectWAL(args[0], args[1:]...)
+	},
+}
+
+func inspectWAL(dir string, columns ...string) error {
+	wal, err := wal.Open(log.NewNopLogger(), prometheus.NewRegistry(), dir)
+	if err != nil {
+		return err
+	}
+
+	return wal.Replay(0, func(tx uint64, record *walpb.Record) error {
+		switch e := record.Entry.EntryType.(type) {
+		case *walpb.Entry_Write_:
+			reader, err := ipc.NewReader(bytes.NewReader(e.Write.Data))
+			if err != nil {
+				return err
+			}
+
+			record, err := reader.Read()
+			if err != nil {
+				return err
+			}
+
+			inspectRecord(record, columns)
+		default:
+			// NOTE: just looking for writes
+			return nil
+		}
+		return nil
+	})
+}
diff --git a/cmd/parquet-tool/go.mod b/cmd/parquet-tool/go.mod
new file mode 100644
index 000000000..f0ef0d28a
--- /dev/null
+++ b/cmd/parquet-tool/go.mod
@@ -0,0 +1,67 @@
+module github.com/polarsignals/frostdb/cmd/parquet-tool
+
+go 1.23.0
+
+toolchain go1.24.5
+
+require (
+	github.com/apache/arrow-go/v18 v18.4.0
+	github.com/apache/arrow/go/v16 v16.1.0
+	github.com/apache/arrow/go/v17 v17.0.0
+	github.com/charmbracelet/lipgloss v0.13.1
+	github.com/dustin/go-humanize v1.0.1
+	github.com/go-kit/log v0.2.1
+	github.com/olekukonko/tablewriter v0.0.5
+	github.com/parquet-go/parquet-go v0.22.0
+	github.com/polarsignals/frostdb v0.0.0-20240531143051-eaf80c711e0a
+	github.com/prometheus/client_golang v1.20.5
+	github.com/spf13/cobra v1.8.0
+	golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0
+)
+
+require (
+	github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect
+	github.com/andybalholm/brotli v1.2.0 // indirect
+	github.com/apache/arrow/go/v15 v15.0.2 // indirect
+	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
+	github.com/benbjohnson/immutable v0.4.3 // indirect
+	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/charmbracelet/x/ansi v0.3.2 // indirect
+	github.com/coreos/etcd v3.3.27+incompatible // indirect
+	github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect
+	github.com/coreos/pkg v0.0.0-20230601102743-20bbbf26f4d8 // indirect
+	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+	github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 // indirect
+	github.com/go-logfmt/logfmt v0.6.0 // indirect
+	github.com/goccy/go-json v0.10.5 // indirect
+	github.com/google/flatbuffers v25.2.10+incompatible // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/klauspost/compress v1.18.0 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.11 // indirect
+	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/mattn/go-runewidth v0.0.16 // indirect
+	github.com/muesli/reflow v0.3.0 // indirect
+	github.com/muesli/termenv v0.15.2 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/pierrec/lz4/v4 v4.1.22 // indirect
+	github.com/planetscale/vtprotobuf v0.6.0 // indirect
+	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
+	github.com/polarsignals/wal v0.0.0-20240514152147-1cd4b81c9b88 // indirect
+	github.com/prometheus/client_model v0.6.1 // indirect
+	github.com/prometheus/common v0.55.0 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
+	github.com/rivo/uniseg v0.4.7 // indirect
+	github.com/segmentio/encoding v0.3.6 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/zeebo/xxh3 v1.0.2 // indirect
+	go.etcd.io/bbolt v1.3.7 // indirect
+	golang.org/x/mod v0.25.0 // indirect
+	golang.org/x/sync v0.15.0 // indirect
+	golang.org/x/sys v0.33.0 // indirect
+	golang.org/x/tools v0.34.0 // indirect
+	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
+	google.golang.org/protobuf v1.36.6 // indirect
+)
diff --git a/cmd/parquet-tool/go.sum b/cmd/parquet-tool/go.sum
new file mode 100644
index 000000000..23798c2ce
--- /dev/null
+++ b/cmd/parquet-tool/go.sum
@@ -0,0 +1,237 @@
+github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU=
+github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk=
+github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
+github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
+github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
+github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
+github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
+github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
+github.com/apache/arrow-go/v18 v18.4.0 h1:/RvkGqH517iY8bZKc4FD5/kkdwXJGjxf28JIXbJ/oB0=
+github.com/apache/arrow-go/v18 v18.4.0/go.mod h1:Aawvwhj8x2jURIzD9Moy72cF0FyJXOpkYpdmGRHcw14=
+github.com/apache/arrow/go/v15 v15.0.2 h1:60IliRbiyTWCWjERBCkO1W4Qun9svcYoZrSLcyOsMLE=
+github.com/apache/arrow/go/v15 v15.0.2/go.mod h1:DGXsR3ajT524njufqf95822i+KTh+yea1jass9YXgjA=
+github.com/apache/arrow/go/v16 v16.1.0 h1:dwgfOya6s03CzH9JrjCBx6bkVb4yPD4ma3haj9p7FXI=
+github.com/apache/arrow/go/v16 v16.1.0/go.mod h1:9wnc9mn6vEDTRIm4+27pEjQpRKuTvBaessPoEXQzxWA=
+github.com/apache/arrow/go/v17 v16.0.0 h1:qRLbJRPj4zaseZrjbDHa7mUoZDDIU+4pu+mE2Lucs5g=
+github.com/apache/arrow/go/v17 v16.0.0/go.mod h1:9wnc9mn6vEDTRIm4+27pEjQpRKuTvBaessPoEXQzxWA=
+github.com/apache/arrow/go/v17 v16.1.0 h1:dwgfOya6s03CzH9JrjCBx6bkVb4yPD4ma3haj9p7FXI=
+github.com/apache/arrow/go/v17 v16.1.0/go.mod h1:9wnc9mn6vEDTRIm4+27pEjQpRKuTvBaessPoEXQzxWA=
+github.com/apache/arrow/go/v17 v17.0.0 h1:RRR2bdqKcdbss9Gxy2NS/hK8i4LDMh23L6BbkN5+F54=
+github.com/apache/arrow/go/v17 v17.0.0/go.mod h1:jR7QHkODl15PfYyjM2nU+yTLScZ/qfj7OSUZmJ8putc=
+github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
+github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
+github.com/benbjohnson/immutable v0.4.3 h1:GYHcksoJ9K6HyAUpGxwZURrbTkXA0Dh4otXGqbhdrjA=
+github.com/benbjohnson/immutable v0.4.3/go.mod h1:qJIKKSmdqz1tVzNtst1DZzvaqOU1onk1rc03IeM3Owk=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/charmbracelet/lipgloss v0.10.0 h1:KWeXFSexGcfahHX+54URiZGkBFazf70JNMtwg/AFW3s=
+github.com/charmbracelet/lipgloss v0.10.0/go.mod h1:Wig9DSfvANsxqkRsqj6x87irdy123SR4dOXlKa91ciE=
+github.com/charmbracelet/lipgloss v0.13.0 h1:4X3PPeoWEDCMvzDvGmTajSyYPcZM4+y8sCA/SsA3cjw=
+github.com/charmbracelet/lipgloss v0.13.0/go.mod h1:nw4zy0SBX/F/eAO1cWdcvy6qnkDUxr8Lw7dvFrAIbbY=
+github.com/charmbracelet/lipgloss v0.13.1 h1:Oik/oqDTMVA01GetT4JdEC033dNzWoQHdWnHnQmXE2A=
+github.com/charmbracelet/lipgloss v0.13.1/go.mod h1:zaYVJ2xKSKEnTEEbX6uAHabh2d975RJ+0yfkFpRBz5U=
+github.com/charmbracelet/x/ansi v0.1.4 h1:IEU3D6+dWwPSgZ6HBH+v6oUuZ/nVawMiWj5831KfiLM=
+github.com/charmbracelet/x/ansi v0.1.4/go.mod h1:dk73KoMTT5AX5BsX0KrqhsTqAnhZZoCBjs7dGWp4Ktw=
+github.com/charmbracelet/x/ansi v0.3.2 h1:wsEwgAN+C9U06l9dCVMX0/L3x7ptvY1qmjMwyfE6USY=
+github.com/charmbracelet/x/ansi v0.3.2/go.mod h1:dk73KoMTT5AX5BsX0KrqhsTqAnhZZoCBjs7dGWp4Ktw=
+github.com/coreos/etcd v3.3.27+incompatible h1:QIudLb9KeBsE5zyYxd1mjzRSkzLg9Wf9QlRwFgd6oTA=
+github.com/coreos/etcd v3.3.27+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
+github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
+github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
+github.com/coreos/pkg v0.0.0-20230601102743-20bbbf26f4d8 h1:NrLmX9HDyGvQhyZdrDx89zCvPdxQ/EHCo+xGNrjNmHc=
+github.com/coreos/pkg v0.0.0-20230601102743-20bbbf26f4d8/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
+github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 h1:y7y0Oa6UawqTFPCDw9JG6pdKt4F9pAhHv0B7FMGaGD0=
+github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
+github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
+github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
+github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
+github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
+github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
+github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
+github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
+github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
+github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI=
+github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q=
+github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
+github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
+github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
+github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
+github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
+github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM=
+github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/cpuid/v2 v2.2.11 h1:0OwqZRYI2rFrjS4kvkDnqJkKHdHaRnCm68/DY4OxRzU=
+github.com/klauspost/cpuid/v2 v2.2.11/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
+github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
+github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
+github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
+github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
+github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
+github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
+github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=
+github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
+github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
+github.com/parquet-go/parquet-go v0.20.1 h1:r5UqeMqyH2DrahZv6dlT41hH2NpS2F8atJWmX1ST1/U=
+github.com/parquet-go/parquet-go v0.20.1/go.mod h1:4YfUo8TkoGoqwzhA/joZKZ8f77wSMShOLHESY4Ys0bY=
+github.com/parquet-go/parquet-go v0.21.0 h1:cBIT1S7dA00LRVB4k9ZSrjPC1rQbiryIducp6nWDqZs=
+github.com/parquet-go/parquet-go v0.21.0/go.mod h1:wMYanjuaE900FTDTNY00JU+67Oqh9uO0pYWRNoPGctQ=
+github.com/parquet-go/parquet-go v0.22.0 h1:9G32efs+11L/MDc0Zt05AuvBubRGAp5lRKufv6pB/B8=
+github.com/parquet-go/parquet-go v0.22.0/go.mod h1:3VBP+djJCNuV+D5uSUs2pWQufk2yKO+9pwYvXglsB8Y=
+github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
+github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU=
+github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/planetscale/vtprotobuf v0.6.0 h1:nBeETjudeJ5ZgBHUz1fVHvbqUKnYOXNhsIEabROxmNA=
+github.com/planetscale/vtprotobuf v0.6.0/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/polarsignals/frostdb v0.0.0-20240422161053-bbe850c694aa h1:I4rRK4Kds5VOQET794Wb+ZdINprMJ9f3wRUpszINPC0=
+github.com/polarsignals/frostdb v0.0.0-20240422161053-bbe850c694aa/go.mod h1:528m/MX6D4sPbY/T4IgUxzwezY4pW1AH5E7u2JzzFt8=
+github.com/polarsignals/frostdb v0.0.0-20240422175033-44d6bc5deb6f h1:xw2DBhzdz32VEZ+UDQM4ei4xEidBsmGdwiKAWzW0z/s=
+github.com/polarsignals/frostdb v0.0.0-20240422175033-44d6bc5deb6f/go.mod h1:VYg5obSoG93TBBF8AFkNzG1txtzyWBIiB/t2JY8DiCY=
+github.com/polarsignals/frostdb v0.0.0-20240422193019-ddfc5534c809 h1:9IHOjvAsyt5CjfzQDItMLlOujPxGNn1MDAu1IyZ90FE=
+github.com/polarsignals/frostdb v0.0.0-20240422193019-ddfc5534c809/go.mod h1:YHgVYKw5Ynnw8/dMdFz8LL9SPjMXXPxm5X2Yzd5a/rA=
+github.com/polarsignals/frostdb v0.0.0-20240531143051-eaf80c711e0a h1:R5W7iCLaP2w1s9LBnONPXrJDEPwzvMvviUbo/uBxwzA=
+github.com/polarsignals/frostdb v0.0.0-20240531143051-eaf80c711e0a/go.mod h1:lzPdZ78hhdb/Hx1ljqhfNO3aBy+Sgh34pdaxfxu+j3Y=
+github.com/polarsignals/wal v0.0.0-20231123092250-5d233119cfc9 h1:SwUso/MRikI7aLlEelX4k6N107fT4uTAzmtyMTfjr44=
+github.com/polarsignals/wal v0.0.0-20231123092250-5d233119cfc9/go.mod h1:EVDHAAe+7GQ33A1/x+/gE+sBPN4toQ0XG5RoLD49xr8=
+github.com/polarsignals/wal v0.0.0-20240514152147-1cd4b81c9b88 h1:FZvQW8MXcNjwLfWDRAatOA83Pof5+iKW7veuInygBXY=
+github.com/polarsignals/wal v0.0.0-20240514152147-1cd4b81c9b88/go.mod h1:EVDHAAe+7GQ33A1/x+/gE+sBPN4toQ0XG5RoLD49xr8=
+github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
+github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
+github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
+github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
+github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
+github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
+github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
+github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE=
+github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
+github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
+github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
+github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg=
+github.com/segmentio/encoding v0.3.6 h1:E6lVLyDPseWEulBmCmAKPanDd3jiyGDo5gMcugCRwZQ=
+github.com/segmentio/encoding v0.3.6/go.mod h1:n0JeuIqEQrQoPDGsjo8UNd1iA0U8d8+oHAA4E3G3OxM=
+github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
+github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
+github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
+github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
+github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
+go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ=
+go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8 h1:ESSUROHIBHg7USnszlcdmjBEwdMj9VUvU+OPk4yl2mc=
+golang.org/x/exp v0.0.0-20240409090435-93d18d7e34b8/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI=
+golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f h1:99ci1mjWVBWwJiEKYY6jWa4d2nTQVIEhZIptnrVb1XY=
+golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI=
+golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM=
+golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
+golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc=
+golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
+golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM=
+golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8=
+golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
+golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
+golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
+golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
+golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
+golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
+golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
+golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
+golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
+golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
+golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY=
+golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg=
+golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw=
+golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
+golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
+golang.org/x/tools v0.29.0 h1:Xx0h3TtM9rzQpQuR4dKLrdglAmCEN5Oi+P74JdhdzXE=
+golang.org/x/tools v0.29.0/go.mod h1:KMQVMRsVxU6nHCFXrBPhDB8XncLNLM0lIy/F14RP588=
+golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
+golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
+golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
+golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
+golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
+golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
+gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
+gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
+google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/cmd/parquet-tool/main.go b/cmd/parquet-tool/main.go
index 38a8cb2c8..1f9f6947a 100644
--- a/cmd/parquet-tool/main.go
+++ b/cmd/parquet-tool/main.go
@@ -1,56 +1,7 @@
 package main
 
-import (
-	"fmt"
-	"os"
-	"strings"
-
-	"github.com/dustin/go-humanize"
-	"github.com/olekukonko/tablewriter"
-	"github.com/segmentio/parquet-go"
-)
+import "github.com/polarsignals/frostdb/cmd/parquet-tool/cmd"
 
 func main() {
-	if len(os.Args) < 2 {
-		fmt.Println("Usage: parquet-tool <file>")
-		os.Exit(1)
-	}
-	f, err := os.Open(os.Args[1])
-	if err != nil {
-		panic(err)
-	}
-	defer f.Close()
-	stats, err := f.Stat()
-	if err != nil {
-		panic(err)
-	}
-	pf, err := parquet.OpenFile(f, stats.Size())
-	if err != nil {
-		panic(err)
-	}
-	fmt.Println("schema:", pf.Schema())
-	meta := pf.Metadata()
-	fmt.Println("Num Rows:", meta.NumRows)
-
-	for i, rg := range meta.RowGroups {
-		fmt.Println("\t Row group:", i)
-		fmt.Println("\t\t Row Count:", rg.NumRows)
-		fmt.Println("\t\t Row size:", humanize.Bytes(uint64(rg.TotalByteSize)))
-		fmt.Println("\t\t Columns:")
-		table := tablewriter.NewWriter(os.Stdout)
-		table.SetHeader([]string{"Col", "Type", "NumVal", "TotalCompressedSize", "TotalUncompressedSize", "Compression", "%"})
-		for _, ds := range rg.Columns {
-			table.Append(
-				[]string{
-					strings.Join(ds.MetaData.PathInSchema, "/"),
-					ds.MetaData.Type.String(),
-					fmt.Sprintf("%d", ds.MetaData.NumValues),
-					humanize.Bytes(uint64(ds.MetaData.TotalCompressedSize)),
-					humanize.Bytes(uint64(ds.MetaData.TotalUncompressedSize)),
-					fmt.Sprintf("%.2f", float64(ds.MetaData.TotalUncompressedSize-ds.MetaData.TotalCompressedSize)/float64(ds.MetaData.TotalCompressedSize)*100),
-					fmt.Sprintf("%.2f", float64(ds.MetaData.TotalCompressedSize)/float64(rg.TotalByteSize)*100),
-				})
-		}
-		table.Render()
-	}
+	cmd.Execute()
 }
diff --git a/compaction.go b/compaction.go
deleted file mode 100644
index 6ade07e21..000000000
--- a/compaction.go
+++ /dev/null
@@ -1,680 +0,0 @@
-package frostdb
-
-import (
-	"bytes"
-	"context"
-	"fmt"
-	"math"
-	"runtime"
-	"sort"
-	"sync"
-	"time"
-
-	"github.com/dustin/go-humanize"
-	"github.com/go-kit/log"
-	"github.com/go-kit/log/level"
-	"github.com/google/btree"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-	"github.com/thanos-io/objstore/errutil"
-
-	"github.com/polarsignals/frostdb/dynparquet"
-	"github.com/polarsignals/frostdb/parts"
-)
-
-type CompactionOption func(*CompactionConfig)
-
-type CompactionConfig struct {
-	concurrency          int
-	interval             time.Duration
-	l1ToGranuleSizeRatio float64
-}
-
-// NewCompactionConfig creates a new compaction config with the given options.
-// If none are specified, a default compaction config is created.
-func NewCompactionConfig(options ...CompactionOption) *CompactionConfig {
-	c := &CompactionConfig{
-		concurrency: runtime.GOMAXPROCS(0),
-		interval:    100 * time.Millisecond,
-		// 0.5 was chosen so that a level1 part takes up around half the space
-		// in a full granule, allowing for level0 parts to grow up to the
-		// remaining space before compaction. The higher this ratio, the less
-		// new data is compacted and the more compactions occur. However, a
-		// lower ratio implies leaving some parquet compression size savings on
-		// the table.
-		l1ToGranuleSizeRatio: 0.5,
-	}
-	for _, o := range options {
-		o(c)
-	}
-	return c
-}
-
-// WithConcurrency specifies the number of concurrent goroutines compacting data
-// for each database.
-func WithConcurrency(concurrency int) CompactionOption {
-	return func(c *CompactionConfig) {
-		c.concurrency = concurrency
-	}
-}
-
-// WithInterval specifies the compaction sweep interval.
-func WithInterval(i time.Duration) CompactionOption {
-	return func(c *CompactionConfig) {
-		c.interval = i
-	}
-}
-
-// WithL1ToGranuleSizeRatio sets the target level1 part size relative to the
-// granule size. The closer this value is to 1, the more compacted data becomes
-// with an expected rise in memory and CPU usage.
-func WithL1ToGranuleSizeRatio(r float64) CompactionOption {
-	return func(c *CompactionConfig) {
-		c.l1ToGranuleSizeRatio = r
-	}
-}
-
-type compactorPool struct {
-	db *DB
-
-	cfg    *CompactionConfig
-	wg     sync.WaitGroup
-	cancel context.CancelFunc
-}
-
-func newCompactorPool(db *DB, cfg *CompactionConfig) *compactorPool {
-	return &compactorPool{
-		db:  db,
-		cfg: cfg,
-	}
-}
-
-func (c *compactorPool) start() {
-	ctx, cancelFn := context.WithCancel(context.Background())
-	c.cancel = cancelFn
-	for i := 0; i < c.cfg.concurrency; i++ {
-		c.wg.Add(1)
-		go func() {
-			defer c.wg.Done()
-			c.compactLoop(ctx)
-		}()
-	}
-}
-
-func (c *compactorPool) stop() {
-	if c.cancel == nil {
-		// Pool was not started.
-		return
-	}
-	c.cancel()
-	c.wg.Wait()
-}
-
-func (c *compactorPool) compactLoop(ctx context.Context) {
-	for {
-		select {
-		case <-ctx.Done():
-			return
-		case <-time.After(c.cfg.interval):
-			c.db.mtx.RLock()
-			tablesToCompact := make([]*Table, 0, len(c.db.tables))
-			for _, table := range c.db.tables {
-				tablesToCompact = append(tablesToCompact, table)
-			}
-			c.db.mtx.RUnlock()
-
-			for _, table := range tablesToCompact {
-				if err := table.ActiveBlock().compact(c.cfg); err != nil {
-					level.Warn(c.db.logger).Log("msg", "compaction failed", "err", err)
-				}
-			}
-		}
-	}
-}
-
-func (t *TableBlock) compact(cfg *CompactionConfig) error {
-	var compactionErrors errutil.MultiError
-	index := t.Index()
-	index.Ascend(func(i btree.Item) bool {
-		granuleToCompact := i.(*Granule)
-		if granuleToCompact.metadata.size.Load() < uint64(t.table.db.columnStore.granuleSizeBytes) {
-			// Skip granule since its size is under the target size.
-			return true
-		}
-		if !granuleToCompact.metadata.pruned.CompareAndSwap(0, 1) {
-			// Someone else claimed this granule compaction.
-			return true
-		}
-
-		defer t.table.metrics.compactions.Inc()
-		if successful, err := t.compactGranule(granuleToCompact, cfg); !successful || err != nil {
-			t.abortCompaction(granuleToCompact)
-			if err != nil {
-				compactionErrors.Add(err)
-			}
-		}
-		return true
-	})
-	return compactionErrors.Err()
-}
-
-// compactionStats is a helper struct to collect metrics during compaction.
-type compactionStats struct {
-	level0SizeBefore        int64
-	level0CountBefore       int
-	level0NumRowsBefore     int64
-	level1SizeBefore        int64
-	level1CountBefore       int
-	level1NumRowsBefore     int64
-	numPartsOverlap         int
-	uncompletedTxnPartCount int64
-	level1SizeAfter         uint64
-	level1CountAfter        int
-	splits                  int
-	totalDuration           time.Duration
-}
-
-func (s compactionStats) recordAndLog(m *compactionMetrics, l log.Logger) {
-	m.level0SizeBefore.Observe(float64(s.level0SizeBefore))
-	m.level0CountBefore.Observe(float64(s.level0CountBefore))
-	m.level1SizeBefore.Observe(float64(s.level1SizeBefore))
-	m.level1CountBefore.Observe(float64(s.level1CountBefore))
-	m.numPartsOverlap.Observe(float64(s.numPartsOverlap))
-	m.uncompletedTxnPartCount.Observe(float64(s.uncompletedTxnPartCount))
-	m.level1SizeAfter.Observe(float64(s.level1SizeAfter))
-	m.level1CountAfter.Observe(float64(s.level1CountAfter))
-	m.splits.Observe(float64(s.splits))
-	m.totalDuration.Observe(float64(s.totalDuration.Seconds()))
-
-	level.Debug(l).Log(
-		"msg", "compaction complete",
-		"duration", s.totalDuration,
-		"l0Before", fmt.Sprintf(
-			"[sz=%s,cnt=%d]", humanize.IBytes(uint64(s.level0SizeBefore)), s.level0CountBefore,
-		),
-		"l1Before", fmt.Sprintf(
-			"[sz=%s,cnt=%d]", humanize.IBytes(uint64(s.level1SizeBefore)), s.level1CountBefore,
-		),
-		"l1After", fmt.Sprintf(
-			"[sz=%s,cnt=%d]", humanize.IBytes(uint64(s.level1SizeAfter)), s.level1CountAfter,
-		),
-		"overlaps", s.numPartsOverlap,
-		"splits", s.splits,
-	)
-}
-
-// compactionMetrics are metrics recorded on each successful compaction.
-type compactionMetrics struct {
-	level0SizeBefore        prometheus.Histogram
-	level0CountBefore       prometheus.Histogram
-	level1SizeBefore        prometheus.Histogram
-	level1CountBefore       prometheus.Histogram
-	numPartsOverlap         prometheus.Histogram
-	uncompletedTxnPartCount prometheus.Histogram
-	level1SizeAfter         prometheus.Histogram
-	level1CountAfter        prometheus.Histogram
-	splits                  prometheus.Histogram
-	totalDuration           prometheus.Histogram
-}
-
-func newCompactionMetrics(reg prometheus.Registerer, granuleSize float64) *compactionMetrics {
-	const (
-		twoKiB = 2 << 10
-		// metricResolution is the number of buckets in a histogram for sizes
-		// and times.
-		metricResolution = 25
-	)
-	minSize := float64(twoKiB)
-	if granuleSize < minSize {
-		// This should only happen in tests.
-		minSize = 1
-		granuleSize = twoKiB
-	}
-	sizeBuckets := prometheus.ExponentialBucketsRange(minSize, granuleSize, metricResolution)
-	timeBuckets := prometheus.ExponentialBuckets(0.5, 2, metricResolution)
-	countBuckets := prometheus.ExponentialBuckets(1, 2, 10)
-	return &compactionMetrics{
-		level0SizeBefore: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_level_0_size_before",
-			Help:    "Total level 0 size when beginning compaction",
-			Buckets: sizeBuckets,
-		}),
-		level0CountBefore: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_level_0_count_before",
-			Help:    "Number of level 0 parts when beginning compaction",
-			Buckets: countBuckets,
-		}),
-		level1SizeBefore: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_level_1_size_before",
-			Help:    "Total level 1 size when beginning compaction",
-			Buckets: sizeBuckets,
-		}),
-		level1CountBefore: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_level_1_count_before",
-			Help:    "Number of level 0 parts when beginning compaction",
-			Buckets: countBuckets,
-		}),
-		numPartsOverlap: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_num_parts_overlap",
-			Help:    "Number of level 1 parts that overlapped with level 0 parts",
-			Buckets: countBuckets,
-		}),
-		uncompletedTxnPartCount: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_uncompleted_txn_part_count",
-			Help:    "Number of parts with uncompleted txns that could not be compacted",
-			Buckets: countBuckets,
-		}),
-		level1SizeAfter: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_level_1_size_after",
-			Help:    "Total level 1 size after compaction",
-			Buckets: sizeBuckets,
-		}),
-		level1CountAfter: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_level_1_count_after",
-			Help:    "Number of level 1 parts after compaction",
-			Buckets: countBuckets,
-		}),
-		splits: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_splits",
-			Help:    "Number of granule splits",
-			Buckets: countBuckets,
-		}),
-		totalDuration: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-			Name:    "compaction_total_duration_seconds",
-			Help:    "Total compaction duration",
-			Buckets: timeBuckets,
-		}),
-	}
-}
-
-// compactGranule is the "meat" of granule compaction. It returns whether the
-// compaction succeeded or not as well as an error (there are cases in which
-// false, nil is returned).
-// The goal of this method is to optimize both memory usage and scan speed.
-// Memory usage is optimized by merging multiple row groups into fewer row
-// groups, since the more data there is in a row group, the higher the benefits
-// of things like parquet compression (this is applied at the row group level).
-// Optimizing for memory usage is a tradeoff, since the more data there is in a
-// row group, the more sequential reads need to be done to find data that
-// queries want. This is why we aim to have a granule maximum size, since
-// searching for granules that satisfy query filters is an
-// O(log(number of granules)) operation, so the more granules we have, the less
-// O(n) work we have to do to search for row groups that satisfy query filters.
-// The compaction logic is as follows:
-//
-//  1. Before any compaction has happened, granules contain a variable number of
-//     parts (basically an encapsulation of a parquet file). These parts contain
-//     a variable number of variable-length row groups. The row group size in
-//     this case is the buffer size that was used by the client on the
-//     corresponding insert call. The compaction strategy in this simple case is
-//     to rewrite the row groups into multiple parts that contain optimally
-//     sized row groups (set by the user using the WithRowGroupSize table
-//     option). The new part sizes aim to be
-//     l1ToGranuleSizeRatio*granuleSize bytes large (specified in the config).
-//     The resulting parts are marked as compacted using compactionLevel1.
-//
-//  2. As more data is added to a granule compacted in point 1, the new data
-//     will again be stored in a suboptimal multi-part format with multiple
-//     variable length row groups. The same compaction as point 1 is performed
-//     but only on the new data as long as it does not overlap with any level1
-//     data (otherwise the overlapping parts are merged into new level1 parts).
-//     If the sum of the part sizes is above the target granule size, a split is
-//     performed, which simply means that the parts will be assigned to new
-//     granules.
-//
-// Some things to be aware of:
-//   - The current compaction strategy assumes there is no benefit to having
-//     muliple parts over a single part as long as the row groups are of equal
-//     length.
-//   - Currently, our compaction strategy is based on a sweep on a timed
-//     interval. This is OK for now, but ideally we would track metrics that
-//     are proxies for memory size/scan speed that would trigger compactions.
-//     For example, the ratio of uncompacted bytes to total bytes. Or performing
-//     split operations based on load.
-//
-// A note on ordering guarantees:
-// FrostDB guarantees ordering amongst granules. This means that if there are
-// two granules in the index, all rows in granule A are less than granule B.
-// However, within a granule, rows are only ordered at a part level since parts
-// are created when a client appends rows and clients are not required to
-// append in order. The current compaction strategy ensures that ordering
-// guarantees between granules are upheld. Additionally, level1 parts will never
-// overlap.
-func (t *TableBlock) compactGranule(granule *Granule, cfg *CompactionConfig) (bool, error) {
-	// Use the latest watermark as the tx id.
-	tx := t.table.db.tx.Load()
-
-	start := time.Now()
-	level0Parts, level1Parts, partsWithUncompletedTxns, stats, err := collectPartsForCompaction(
-		// Start compaction by adding a sentinel node to its parts list.
-		tx, granule.parts.Sentinel(parts.Compacting),
-	)
-	if err != nil {
-		return false, err
-	}
-
-	if len(level0Parts) == 0 && len(level1Parts) == 0 {
-		// Edge case in which only parts with uncompleted txns are found and
-		// they are already bigger than the max size. A future compaction cycle
-		// will take care of this, so return that the compaction was not
-		// successful
-		return false, nil
-	}
-
-	partsBefore := len(level0Parts) + len(level1Parts)
-	if len(level0Parts) > 0 {
-		var err error
-		level1Parts, err = compactLevel0IntoLevel1(
-			t, tx, level0Parts, level1Parts, cfg.l1ToGranuleSizeRatio, &stats,
-		)
-		if err != nil {
-			return false, err
-		}
-		stats.level1CountAfter += len(level1Parts)
-	}
-
-	// This sort is done to simplify the splitting of non-overlapping parts
-	// amongst new granules. There might be something more clever we could do
-	// here if we maintain stronger guarantees about the part ordering given
-	// that level1 parts should already be sorted when collected at the start
-	// of this method (since this is the only place level1 parts get added to
-	// granules). However, it seems like new parts are prepended so the sort
-	// order is inverted. Another option is to recursively split using
-	// addPartToGranule, which would be cheap in the general case.
-	sorter := parts.NewPartSorter(t.table.config.schema, level1Parts)
-	sort.Sort(sorter)
-	if sorter.Err() != nil {
-		return false, fmt.Errorf("error sorting level1: %w", sorter.Err())
-	}
-
-	newParts, err := divideLevel1PartsForGranule(
-		t,
-		tx,
-		level1Parts,
-		// Set a maximum size of the l1 ratio to give "breathing room" to new
-		// level0 parts. Otherwise, compactions will be initiated more often
-		// and work on less data. This ends up creating granules half filled
-		// up with a level1 part.
-		int64(float64(t.table.db.columnStore.granuleSizeBytes)*cfg.l1ToGranuleSizeRatio),
-	)
-	if err != nil {
-		return false, err
-	}
-	stats.splits = len(newParts) - 1
-
-	newGranules := make([]*Granule, 0, len(newParts))
-	partsAfter := 0
-	for _, parts := range newParts {
-		partsAfter += len(parts)
-		newGranule, err := NewGranule(t.table.config, parts...)
-		if err != nil {
-			if err != nil {
-				return false, fmt.Errorf("failed to create new granule: %w", err)
-			}
-		}
-		t.table.metrics.granulesCreated.Inc()
-		newGranules = append(newGranules, newGranule)
-	}
-
-	// Size calculation is done before adding the ignored parts to the new
-	// granules.
-	for _, g := range newGranules {
-		stats.level1SizeAfter += g.metadata.size.Load()
-	}
-
-	// Add remaining parts onto new granules.
-	for _, p := range partsWithUncompletedTxns {
-		if err := addPartToGranule(newGranules, p); err != nil {
-			return false, fmt.Errorf("add parts to granules: %w", err)
-		}
-	}
-
-	// We disable compaction for new granules before allowing new inserts to be
-	// propagated to them.
-	for _, childGranule := range newGranules {
-		childGranule.metadata.pruned.Store(1)
-	}
-
-	// We restore the possibility to trigger compaction after we exit the
-	// function.
-	defer func() {
-		for _, childGranule := range newGranules {
-			childGranule.metadata.pruned.Store(0)
-		}
-	}()
-
-	// Set the newGranules pointer, so new writes will propogate into these new
-	// granules.
-	granule.newGranules = newGranules
-
-	// Mark compaction complete in the granule; this will cause new writes to
-	// start using the newGranules pointer. Iterate over the resulting part
-	// list to copy any new parts that were added while we were compacting.
-	var addPartErr error
-	granule.parts.Sentinel(parts.Compacted).Iterate(func(p *parts.Part) bool {
-		if err := addPartToGranule(newGranules, p); err != nil {
-			addPartErr = err
-			return false
-		}
-		return true
-	})
-	if addPartErr != nil {
-		return false, fmt.Errorf("add part to granules: %w", addPartErr)
-	}
-
-	for {
-		index := t.Index()
-		t.mtx.Lock()
-		newIdx := index.Clone() // TODO(THOR): we can't clone concurrently
-		t.mtx.Unlock()
-
-		if newIdx.Delete(granule) == nil {
-			level.Error(t.logger).Log("msg", "failed to delete granule during split")
-			return false, fmt.Errorf("failed to delete granule")
-		}
-
-		for _, g := range newGranules {
-			if dupe := newIdx.ReplaceOrInsert(g); dupe != nil {
-				level.Error(t.logger).Log("duplicate insert performed")
-			}
-		}
-
-		// Point to the new index.
-		if t.index.CompareAndSwap(index, newIdx) {
-			sizeDiff := int64(stats.level1SizeAfter) - (stats.level0SizeBefore + stats.level1SizeBefore)
-			t.size.Add(sizeDiff)
-
-			t.table.metrics.numParts.Add(float64(int(stats.level1CountAfter) - partsBefore))
-			break
-		}
-	}
-	stats.totalDuration = time.Since(start)
-	stats.recordAndLog(t.table.metrics.compactionMetrics, t.logger)
-	// Release all records in L0 parts
-	for _, p := range level0Parts {
-		if r := p.Record(); r != nil {
-			r.Release()
-		}
-	}
-	return true, nil
-}
-
-// compactLevel0IntoLevel1 compacts the given level0Parts into level1Parts by
-// merging them with overlapping level1 parts and producing parts that are up
-// to the TableBlock's maximum granule size. These parts are guaranteed to be
-// non-overlapping.
-// The size limit is a best effort as there is currently no easy way to
-// determine the size of a parquet file while writing row groups to it.
-func compactLevel0IntoLevel1(
-	t *TableBlock,
-	tx uint64,
-	level0Parts,
-	level1Parts []*parts.Part,
-	l1ToGranuleSizeRatio float64,
-	stats *compactionStats,
-) ([]*parts.Part, error) {
-	// Verify whether the level0 parts overlap with level1 parts. If they do,
-	// we need to merge the overlapping parts. Not merging these parts could
-	// cause them to be split to different granules, rendering the index
-	// useless.
-	nonOverlapping := make([]*parts.Part, 0, len(level1Parts))
-	partsToCompact := level0Parts
-	// size and numRows are used to estimate the number of bytes per row so that
-	// we can provide a maximum number of rows to write in writeRowGroups which
-	// will more or less keep the compacted part under the maximum granule size.
-	size := stats.level1SizeBefore
-	numRows := stats.level1NumRowsBefore
-	if len(level1Parts) == 0 {
-		// If no level1 parts exist, then we estimate the parquet rows to
-		// write based on level0.
-		size = stats.level0SizeBefore
-		numRows = stats.level0NumRowsBefore
-	}
-	for _, p1 := range level1Parts {
-		overlapped := false
-		for _, p0 := range level0Parts {
-			if overlaps, err := p0.OverlapsWith(t.table.config.schema, p1); err != nil {
-				return nil, err
-			} else if overlaps {
-				stats.numPartsOverlap++
-				partsToCompact = append(partsToCompact, p1)
-				overlapped = true
-				break
-			}
-		}
-		if !overlapped {
-			nonOverlapping = append(nonOverlapping, p1)
-		}
-	}
-
-	bufs := make([]dynparquet.DynamicRowGroup, 0, len(level0Parts))
-	for _, p := range partsToCompact {
-		buf, err := p.AsSerializedBuffer(t.table.config.schema)
-		if err != nil {
-			return nil, err
-		}
-
-		// All the row groups in a part are wrapped in a single row group given
-		// that all rows are sorted within a part. This reduces the number of
-		// cursors open when merging the row groups.
-		bufs = append(bufs, buf.MultiDynamicRowGroup())
-	}
-
-	cursor := 0
-	merged, err := t.table.config.schema.MergeDynamicRowGroups(bufs)
-	if err != nil {
-		return nil, err
-	}
-
-	estimatedBytesPerRow := float64(size) / float64(numRows)
-	estimatedRowsPerPart := int(
-		math.Ceil(
-			(float64(t.table.db.columnStore.granuleSizeBytes) * l1ToGranuleSizeRatio) /
-				estimatedBytesPerRow,
-		),
-	)
-	if err := func() error {
-		rows := merged.Rows()
-		defer rows.Close()
-
-		for {
-			var mergedBytes bytes.Buffer
-			n, err := t.writeRows(&mergedBytes, rows, merged.DynamicColumns(), estimatedRowsPerPart)
-			if err != nil {
-				return err
-			}
-			if n == 0 {
-				break
-			}
-			cursor += n
-			serBuf, err := dynparquet.ReaderFromBytes(mergedBytes.Bytes())
-			if err != nil {
-				return err
-			}
-			compactedPart := parts.NewPart(tx, serBuf, parts.WithCompactionLevel(parts.CompactionLevel1))
-			nonOverlapping = append(nonOverlapping, compactedPart)
-		}
-		return nil
-	}(); err != nil {
-		return nil, fmt.Errorf("failed level0->level1 compaction: %w", err)
-	}
-
-	return nonOverlapping, nil
-}
-
-func collectPartsForCompaction(tx uint64, list *parts.List) (
-	level0Parts, level1Parts, partsWithUncompletedTxns []*parts.Part,
-	stats compactionStats, err error,
-) {
-	list.Iterate(func(p *parts.Part) bool {
-		if p.TX() > tx {
-			if !p.HasTombstone() {
-				partsWithUncompletedTxns = append(partsWithUncompletedTxns, p)
-				stats.uncompletedTxnPartCount++
-			}
-			// Parts with a tombstone are dropped.
-			return true
-		}
-
-		switch cl := p.CompactionLevel(); cl {
-		case parts.CompactionLevel0:
-			level0Parts = append(level0Parts, p)
-			stats.level0CountBefore++
-			stats.level0SizeBefore += p.Size()
-			stats.level0NumRowsBefore += p.NumRows()
-		case parts.CompactionLevel1:
-			level1Parts = append(level1Parts, p)
-			stats.level1CountBefore++
-			stats.level1SizeBefore += p.Size()
-			stats.level1NumRowsBefore += p.NumRows()
-		default:
-			err = fmt.Errorf("unexpected part compaction level %d", cl)
-			return false
-		}
-		return true
-	})
-	return
-}
-
-// divideLevel1PartsForGranule returns a two-dimensional slice of parts where
-// each element is a slice of parts that all together have a sum of sizes less
-// than maxSize.
-// The global sort order of the input parts is maintained.
-func divideLevel1PartsForGranule(t *TableBlock, tx uint64, level1 []*parts.Part, maxSize int64) ([][]*parts.Part, error) {
-	var totalSize int64
-	sizes := make([]int64, len(level1))
-	for i, p := range level1 {
-		size := p.Size()
-		totalSize += size
-		sizes[i] = size
-	}
-	if totalSize <= maxSize {
-		// No splits needed.
-		return [][]*parts.Part{level1}, nil
-	}
-
-	// We want to maximize the size of each split slice, so we follow a greedy
-	// approach. Note that in practice because we create level1 parts that are
-	// around half the size of a granule (level1ToGranuleSizeRatio), if a
-	// granule is split, it will be split into two granules that are half filled
-	// up with a level1 part.
-	var (
-		runningSize int64
-		newParts    [][]*parts.Part
-	)
-	for i, size := range sizes {
-		runningSize += size
-		if runningSize > maxSize {
-			// Close the current subslice and create a new one.
-			newParts = append(newParts, []*parts.Part{level1[i]})
-			runningSize = size
-			continue
-		}
-		if i == 0 {
-			newParts = append(newParts, []*parts.Part{level1[i]})
-		} else {
-			newParts[len(newParts)-1] = append(newParts[len(newParts)-1], level1[i])
-		}
-	}
-	return newParts, nil
-}
diff --git a/compaction_test.go b/compaction_test.go
deleted file mode 100644
index ccb2fdd11..000000000
--- a/compaction_test.go
+++ /dev/null
@@ -1,455 +0,0 @@
-package frostdb
-
-import (
-	"context"
-	"io"
-	"testing"
-
-	"github.com/google/btree"
-	"github.com/google/uuid"
-	"github.com/segmentio/parquet-go"
-	"github.com/stretchr/testify/require"
-
-	"github.com/polarsignals/frostdb/dynparquet"
-	"github.com/polarsignals/frostdb/parts"
-	"github.com/polarsignals/frostdb/pqarrow"
-	"github.com/polarsignals/frostdb/query/logicalplan"
-)
-
-// insertSamples is a helper function to insert a deterministic sample with a
-// given timestamp. Note that rows inserted should be sorted by timestamp since
-// it is a sorting column.
-func insertSamples(ctx context.Context, t *testing.T, table *Table, timestamps ...int64) uint64 {
-	t.Helper()
-	samples := make([]any, 0, len(timestamps))
-	for _, ts := range timestamps {
-		samples = append(samples, dynparquet.Sample{
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-			},
-			Timestamp: ts,
-		})
-	}
-	tx, err := table.Write(ctx, samples...)
-	require.NoError(t, err)
-	return tx
-}
-
-// insertSampleRecords is the same helper function as insertSamples but it inserts arrow records instead.
-func insertSampleRecords(ctx context.Context, t *testing.T, table *Table, timestamps ...int64) uint64 {
-	t.Helper()
-	var samples dynparquet.Samples
-	samples = make([]dynparquet.Sample, 0, len(timestamps))
-	for _, ts := range timestamps {
-		samples = append(samples, dynparquet.Sample{
-			ExampleType: "ex",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			},
-			Timestamp: ts,
-		})
-	}
-
-	ps, err := table.Schema().DynamicParquetSchema(map[string][]string{
-		"labels": {"label1"},
-	})
-	require.NoError(t, err)
-
-	sc, err := pqarrow.ParquetSchemaToArrowSchema(ctx, ps, logicalplan.IterOptions{})
-	require.NoError(t, err)
-
-	ar, err := samples.ToRecord(sc)
-	require.NoError(t, err)
-
-	tx, err := table.InsertRecord(ctx, ar)
-	require.NoError(t, err)
-	return tx
-}
-
-func TestCompaction(t *testing.T) {
-	// expectedPart specifies the expected part data the test should verify.
-	type expectedPart struct {
-		compactionLevel parts.CompactionLevel
-		numRowGroups    int
-		numRows         int
-		// data is the expected data. Only the timestamps are verified in this
-		// test for simplicity.
-		data []int64
-	}
-	// expectedGranule specifies the expected granule data the test should
-	// verify.
-	type expectedGranule struct {
-		parts []expectedPart
-	}
-
-	const (
-		// comactCommand indicates a compaction should be performed. Note that
-		// compactions need to be specified explicitly.
-		compactCommand = -1
-		// recordGranuleSizeCommand records the granule size at that moment.
-		recordGranuleSizeCommand = -2
-		// setRecordedGranuleSizeCommand sets the granule size to the size that
-		// was recorded when recordGranuleSizeCommand was executed.
-		setRecordedGranuleSizeCommand = -3
-		// acc accumulates the following inserts into a buffer.
-		acc = -4
-		// flushAcc inserts the accumulated data into the table. Useful to
-		// create row groups larger than one row.
-		flushAcc = -5
-	)
-
-	testCases := []struct {
-		name string
-		// rgSize is the desired row group size. If unspecified, will default to
-		// 2 rows.
-		rgSize int
-		// inserts are the timestamps to insert at. Negative int64s are
-		// interpreted as commands to do something, see the const declaration
-		// above.
-		inserts  []int64
-		expected []expectedGranule
-	}{
-		{
-			name: "SimpleLevel0ToLevel1",
-			// Insert three rows.
-			inserts: []int64{1, 2, 3, compactCommand},
-			// Expect compaction into a single part with level1 compaction.
-			expected: []expectedGranule{
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    2,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{1, 2, 3},
-						},
-					},
-				},
-			},
-		},
-		{
-			// This test is the same as above, but inserts a couple more rows,
-			// expecting a second part with compaction level 0 to be created.
-			name: "AddLevel0ToLevel1",
-			// Insert three rows.
-			inserts: []int64{1, 2, 3, compactCommand, 4},
-			expected: []expectedGranule{
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    1,
-							compactionLevel: parts.CompactionLevel0,
-							data:            []int64{4},
-						},
-						{
-							numRowGroups:    2,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{1, 2, 3},
-						},
-					},
-				},
-			},
-		},
-		{
-			// This test is the same as above, but adds a command to set the
-			// granule size artificially low followed by a compaction. This
-			// should trigger a split: i.e. the new part is reassigned to a new
-			// granule.
-			name: "Split",
-			inserts: []int64{
-				1, 2, 3,
-				compactCommand, recordGranuleSizeCommand,
-				4, 5,
-				setRecordedGranuleSizeCommand, compactCommand,
-			},
-			expected: []expectedGranule{
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    2,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{1, 2, 3},
-						},
-					},
-				},
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    1,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{4, 5},
-						},
-					},
-				},
-			},
-		},
-		{
-			// SimpleOverlap tests out of order inserts after a compaction. The
-			// new inserts should be merged into the existing part, because
-			// otherwise a new granule could be created with overlapping data,
-			// rendering the index useless.
-			name:    "SimpleOverlap",
-			inserts: []int64{1, 2, 3, compactCommand, 2, compactCommand},
-			expected: []expectedGranule{
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    2,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{1, 2, 2, 3},
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "MultiOverlap",
-			inserts: []int64{
-				1, 3, // Don't insert 2 to test non-equality overlapping.
-				compactCommand,
-				4, 5, 6,
-				compactCommand,
-				7, 8, 9,
-				compactCommand,
-				5, 2, 1,
-				compactCommand,
-			},
-			expected: []expectedGranule{
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    2,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{7, 8, 9},
-						},
-						{
-							numRowGroups:    4,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{1, 1, 2, 3, 4, 5, 5, 6},
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "SingleL0MultipleL1Overlaps",
-			inserts: []int64{
-				1, 3,
-				compactCommand,
-				4, 5, 6,
-				compactCommand,
-				acc, 2, 4, flushAcc,
-				compactCommand,
-			},
-			expected: []expectedGranule{
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    4,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{1, 2, 3, 4, 4, 5, 6},
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "MultipleL0ToL1",
-			inserts: []int64{
-				acc, 1, 2, 3, flushAcc,
-				acc, 4, 5, 6, flushAcc,
-				recordGranuleSizeCommand,
-				acc, 7, 8, 9, flushAcc,
-				setRecordedGranuleSizeCommand,
-				compactCommand,
-			},
-			expected: []expectedGranule{
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    2,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{1, 2, 3, 4},
-						},
-					},
-				},
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    2,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{5, 6, 7, 8},
-						},
-					},
-				},
-				{
-					[]expectedPart{
-						{
-							numRowGroups:    1,
-							compactionLevel: parts.CompactionLevel1,
-							data:            []int64{9},
-						},
-					},
-				},
-			},
-		},
-	}
-
-	numParts := func(g *Granule) int {
-		numparts := 0
-		g.parts.Iterate(func(p *parts.Part) bool {
-			numparts++
-			return true
-		})
-		return numparts
-	}
-
-	for _, tc := range testCases {
-		f := func(asArrow bool) func(t *testing.T) {
-			return func(t *testing.T) {
-				c, table := basicTable(t)
-				defer c.Close()
-				// Disable interval compaction for tests. These are triggered
-				// manually.
-				table.db.compactorPool.stop()
-				table.db.compactorPool = nil
-
-				table.config.rowGroupSize = 2
-				if tc.rgSize != 0 {
-					table.config.rowGroupSize = tc.rgSize
-				}
-
-				var (
-					numInserts          int
-					accumulating        bool
-					accBuf              []int64
-					lastTx              uint64
-					recordedGranuleSize uint64
-				)
-				for _, v := range tc.inserts {
-					switch v {
-					case compactCommand:
-						table.db.Wait(lastTx)
-						require.Equal(
-							t,
-							1,
-							table.active.Index().Len(),
-							"tests assume only a single granule as input",
-						)
-						cfg := table.db.columnStore.compactionConfig
-						if asArrow {
-							cfg.l1ToGranuleSizeRatio = 0.6 // use a different ratio for arrow records
-						}
-						success, err := table.active.compactGranule((table.active.Index().Min()).(*Granule), cfg)
-						require.True(t, success)
-						require.NoError(t, err)
-					case recordGranuleSizeCommand:
-						table.db.Wait(lastTx)
-						require.Equal(
-							t,
-							1,
-							table.active.Index().Len(),
-							"tests assume only a single granule as input",
-						)
-						recordedGranuleSize = (table.active.Index().Min()).(*Granule).metadata.size.Load()
-					case setRecordedGranuleSizeCommand:
-						table.db.columnStore.granuleSizeBytes = int64(recordedGranuleSize)
-					case acc:
-						accumulating = true
-					case flushAcc:
-						accumulating = false
-						switch asArrow {
-						case true:
-							lastTx = insertSampleRecords(context.Background(), t, table, accBuf...)
-						default:
-							lastTx = insertSamples(context.Background(), t, table, accBuf...)
-						}
-						accBuf = accBuf[:0]
-						numInserts++
-					default:
-						if accumulating {
-							accBuf = append(accBuf, v)
-							continue
-						}
-						switch asArrow {
-						case true:
-							lastTx = insertSampleRecords(context.Background(), t, table, v)
-						default:
-							lastTx = insertSamples(context.Background(), t, table, v)
-						}
-						numInserts++
-					}
-				}
-
-				require.Equal(t, len(tc.expected), table.active.Index().Len())
-				i := 0
-				table.active.Index().Ascend(func(item btree.Item) bool {
-					g := item.(*Granule)
-					expected := tc.expected[i]
-					require.Equal(t, len(expected.parts), numParts(g))
-
-					j := 0
-					g.parts.Iterate(func(p *parts.Part) bool {
-						expectedPart := expected.parts[j]
-						if expectedPart.numRowGroups == 0 {
-							require.Equal(t, int64(expectedPart.numRows), p.Record().NumRows())
-						} else {
-							buf, err := p.AsSerializedBuffer(table.Schema())
-							require.NoError(t, err)
-							rgs := buf.ParquetFile().RowGroups()
-							require.Equal(t, expectedPart.numRowGroups, len(rgs))
-							require.Equal(t, expectedPart.compactionLevel, p.CompactionLevel())
-							rowsRead := make([]parquet.Row, 0)
-							for _, rg := range rgs {
-								func() {
-									rows := rg.Rows()
-									defer rows.Close()
-
-									for {
-										rowBuf := make([]parquet.Row, 1)
-										n, err := rows.ReadRows(rowBuf)
-										if err != nil && err != io.EOF {
-											require.NoError(t, err)
-										}
-										if n > 0 {
-											rowsRead = append(rowsRead, rowBuf...)
-										}
-
-										if err == io.EOF {
-											break
-										}
-									}
-								}()
-							}
-							require.Equal(
-								t,
-								len(expectedPart.data),
-								len(rowsRead),
-								"different number of rows read for granule %d part %d",
-								i,
-								j,
-							)
-
-							// This is a bit of a hack. If the check below fails
-							// unexpectedly after a change to the default schema, think
-							// about a more robust search of the timestamp column index.
-							const timestampColumnIdx = 3
-							for k, expectedTimestamp := range expectedPart.data {
-								require.Equal(t, rowsRead[k][timestampColumnIdx].Int64(), expectedTimestamp)
-							}
-						}
-
-						j++
-						return true
-					})
-					i++
-					return true
-				})
-			}
-		}
-		t.Run(tc.name+"-parquet", f(false))
-		t.Run(tc.name+"-arrow", f(true))
-	}
-}
diff --git a/db.go b/db.go
index 1b2118259..33f8fcf74 100644
--- a/db.go
+++ b/db.go
@@ -5,6 +5,8 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
+	"math"
 	"os"
 	"path/filepath"
 	"runtime"
@@ -13,49 +15,70 @@ import (
 	"sync/atomic"
 	"time"
 
-	"github.com/apache/arrow/go/v10/arrow/ipc"
+	"github.com/apache/arrow-go/v18/arrow/ipc"
+	"github.com/apache/arrow-go/v18/arrow/util"
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/oklog/ulid"
+	"github.com/oklog/ulid/v2"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-	"github.com/thanos-io/objstore"
 	"go.opentelemetry.io/otel/trace"
+	"go.opentelemetry.io/otel/trace/noop"
+	"golang.org/x/exp/maps"
 	"golang.org/x/sync/errgroup"
 	"google.golang.org/protobuf/proto"
 
 	"github.com/polarsignals/frostdb/dynparquet"
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	schemav2pb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha2"
+	tablepb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/table/v1alpha1"
 	walpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/wal/v1alpha1"
+	"github.com/polarsignals/frostdb/index"
+	"github.com/polarsignals/frostdb/parts"
 	"github.com/polarsignals/frostdb/query/logicalplan"
-	"github.com/polarsignals/frostdb/storage"
 	"github.com/polarsignals/frostdb/wal"
 )
 
+const (
+	B   = 1
+	KiB = 1024 * B
+	MiB = 1024 * KiB
+	GiB = 1024 * MiB
+	TiB = 1024 * GiB
+)
+
 type ColumnStore struct {
-	mtx                  *sync.RWMutex
-	dbs                  map[string]*DB
-	reg                  prometheus.Registerer
-	logger               log.Logger
-	tracer               trace.Tracer
-	granuleSizeBytes     int64
-	activeMemorySize     int64
-	storagePath          string
-	bucket               storage.Bucket
-	ignoreStorageOnQuery bool
-	enableWAL            bool
-	compactionConfig     *CompactionConfig
-	metrics              metrics
+	mtx                 sync.RWMutex
+	dbs                 map[string]*DB
+	dbReplaysInProgress map[string]chan struct{}
+	reg                 prometheus.Registerer
+	logger              log.Logger
+	tracer              trace.Tracer
+	activeMemorySize    int64
+	storagePath         string
+	enableWAL           bool
+	manualBlockRotation bool
+	snapshotTriggerSize int64
+	metrics             globalMetrics
+	recoveryConcurrency int
 
 	// indexDegree is the degree of the btree index (default = 2)
 	indexDegree int
 	// splitSize is the number of new granules that are created when granules are split (default =2)
 	splitSize int
-}
+	// indexConfig is the configuration settings for the lsm index
+	indexConfig []*index.LevelConfig
+
+	sources []DataSource
+	sinks   []DataSink
+
+	compactAfterRecovery           bool
+	compactAfterRecoveryTableNames []string
 
-type metrics struct {
-	shutdownDuration  prometheus.Histogram
-	shutdownStarted   prometheus.Counter
-	shutdownCompleted prometheus.Counter
+	// testingOptions are options only used for testing purposes.
+	testingOptions struct {
+		disableReclaimDiskSpaceOnSnapshot bool
+		walTestingOptions                 []wal.Option
+	}
 }
 
 type Option func(*ColumnStore) error
@@ -64,16 +87,15 @@ func New(
 	options ...Option,
 ) (*ColumnStore, error) {
 	s := &ColumnStore{
-		mtx:              &sync.RWMutex{},
-		dbs:              map[string]*DB{},
-		reg:              prometheus.NewRegistry(),
-		logger:           log.NewNopLogger(),
-		tracer:           trace.NewNoopTracerProvider().Tracer(""),
-		indexDegree:      2,
-		splitSize:        2,
-		granuleSizeBytes: 1 * 1024 * 1024,   // 1MB granule size before splitting
-		activeMemorySize: 512 * 1024 * 1024, // 512MB
-		compactionConfig: NewCompactionConfig(),
+		dbs:                 make(map[string]*DB),
+		dbReplaysInProgress: make(map[string]chan struct{}),
+		reg:                 prometheus.NewRegistry(),
+		logger:              log.NewNopLogger(),
+		tracer:              noop.NewTracerProvider().Tracer(""),
+		indexConfig:         DefaultIndexConfig(),
+		indexDegree:         2,
+		splitSize:           2,
+		activeMemorySize:    512 * MiB,
 	}
 
 	for _, option := range options {
@@ -82,25 +104,26 @@ func New(
 		}
 	}
 
-	s.metrics = metrics{
-		shutdownDuration: promauto.With(s.reg).NewHistogram(prometheus.HistogramOpts{
-			Name: "shutdown_duration",
-			Help: "time it takes for the columnarstore to complete a full shutdown.",
-		}),
-		shutdownStarted: promauto.With(s.reg).NewCounter(prometheus.CounterOpts{
-			Name: "shutdown_started",
-			Help: "Indicates a shutdown of the columnarstore has started.",
-		}),
-		shutdownCompleted: promauto.With(s.reg).NewCounter(prometheus.CounterOpts{
-			Name: "shutdown_completed",
-			Help: "Indicates a shutdown of the columnarstore has completed.",
-		}),
-	}
+	// Register metrics that are updated by the collector.
+	s.reg.MustRegister(&collector{s: s})
+	s.metrics = makeAndRegisterGlobalMetrics(s.reg)
 
 	if s.enableWAL && s.storagePath == "" {
 		return nil, fmt.Errorf("storage path must be configured if WAL is enabled")
 	}
 
+	for _, cfg := range s.indexConfig {
+		if cfg.Type == index.CompactionTypeParquetDisk {
+			if !s.enableWAL || s.storagePath == "" {
+				return nil, fmt.Errorf("persistent disk compaction requires WAL and storage path to be enabled")
+			}
+		}
+	}
+
+	if err := s.recoverDBsFromStorage(context.Background()); err != nil {
+		return nil, err
+	}
+
 	return s, nil
 }
 
@@ -125,13 +148,6 @@ func WithRegistry(reg prometheus.Registerer) Option {
 	}
 }
 
-func WithGranuleSizeBytes(bytes int64) Option {
-	return func(s *ColumnStore) error {
-		s.granuleSizeBytes = bytes
-		return nil
-	}
-}
-
 func WithActiveMemorySize(size int64) Option {
 	return func(s *ColumnStore) error {
 		s.activeMemorySize = size
@@ -153,16 +169,31 @@ func WithSplitSize(size int) Option {
 	}
 }
 
-func WithBucketStorage(bucket objstore.Bucket) Option {
+func WithReadWriteStorage(ds DataSinkSource) Option {
 	return func(s *ColumnStore) error {
-		s.bucket = storage.NewBucketReaderAt(bucket)
+		s.sources = append(s.sources, ds)
+		s.sinks = append(s.sinks, ds)
 		return nil
 	}
 }
 
-func WithStorage(bucket storage.Bucket) Option {
+func WithReadOnlyStorage(ds DataSource) Option {
 	return func(s *ColumnStore) error {
-		s.bucket = bucket
+		s.sources = append(s.sources, ds)
+		return nil
+	}
+}
+
+func WithWriteOnlyStorage(ds DataSink) Option {
+	return func(s *ColumnStore) error {
+		s.sinks = append(s.sinks, ds)
+		return nil
+	}
+}
+
+func WithManualBlockRotation() Option {
+	return func(s *ColumnStore) error {
+		s.manualBlockRotation = true
 		return nil
 	}
 }
@@ -181,17 +212,48 @@ func WithStoragePath(path string) Option {
 	}
 }
 
-// WithIgnoreStorageOnQuery storage paths aren't included in queries.
-func WithIgnoreStorageOnQuery() Option {
+func WithIndexConfig(indexConfig []*index.LevelConfig) Option {
 	return func(s *ColumnStore) error {
-		s.ignoreStorageOnQuery = true
+		s.indexConfig = indexConfig
 		return nil
 	}
 }
 
-func WithCompactionConfig(c *CompactionConfig) Option {
+func WithCompactionAfterRecovery(tableNames []string) Option {
 	return func(s *ColumnStore) error {
-		s.compactionConfig = c
+		s.compactAfterRecovery = true
+		s.compactAfterRecoveryTableNames = tableNames
+		return nil
+	}
+}
+
+// WithSnapshotTriggerSize specifies a size in bytes of uncompressed inserts
+// that will trigger a snapshot of the whole database. This can be larger than
+// the active memory size given that the active memory size tracks the size of
+// *compressed* data, while snapshots are triggered based on the *uncompressed*
+// data inserted into the database. The reason this choice was made is that
+// if a database instance crashes, it is forced to reread all uncompressed
+// inserts since the last snapshot from the WAL, which could potentially lead
+// to unrecoverable OOMs on startup. Defining the snapshot trigger in terms of
+// uncompressed bytes limits the memory usage on recovery to at most the
+// snapshot trigger size (as long as snapshots were successful).
+// If 0, snapshots are disabled. Note that snapshots (if enabled) are also
+// triggered on block rotation of any database table.
+// Snapshots are complementary to the WAL and will also be disabled if the WAL
+// is disabled.
+func WithSnapshotTriggerSize(size int64) Option {
+	return func(s *ColumnStore) error {
+		s.snapshotTriggerSize = size
+		return nil
+	}
+}
+
+// WithRecoveryConcurrency limits the number of databases that are recovered
+// simultaneously when calling frostdb.New. This helps limit memory usage on
+// recovery.
+func WithRecoveryConcurrency(concurrency int) Option {
+	return func(s *ColumnStore) error {
+		s.recoveryConcurrency = concurrency
 		return nil
 	}
 }
@@ -210,7 +272,14 @@ func (s *ColumnStore) Close() error {
 	errg := &errgroup.Group{}
 	errg.SetLimit(runtime.GOMAXPROCS(0))
 	for _, db := range s.dbs {
-		errg.Go(db.Close)
+		toClose := db
+		errg.Go(func() error {
+			err := toClose.Close()
+			if err != nil {
+				level.Error(s.logger).Log("msg", "error closing DB", "db", toClose.name, "err", err)
+			}
+			return err
+		})
 	}
 
 	return errg.Wait()
@@ -220,8 +289,8 @@ func (s *ColumnStore) DatabasesDir() string {
 	return filepath.Join(s.storagePath, "databases")
 }
 
-// ReplayWALs replays the write-ahead log for each database.
-func (s *ColumnStore) ReplayWALs(ctx context.Context) error {
+// recoverDBsFromStorage replays the snapshots and write-ahead logs for each database.
+func (s *ColumnStore) recoverDBsFromStorage(ctx context.Context) error {
 	if !s.enableWAL {
 		return nil
 	}
@@ -241,27 +310,30 @@ func (s *ColumnStore) ReplayWALs(ctx context.Context) error {
 	}
 
 	g, ctx := errgroup.WithContext(ctx)
+	// Limit this operation since WAL recovery could be very memory intensive.
+	if s.recoveryConcurrency == 0 {
+		s.recoveryConcurrency = runtime.GOMAXPROCS(0)
+	}
+	g.SetLimit(s.recoveryConcurrency)
 	for _, f := range files {
 		databaseName := f.Name()
 		g.Go(func() error {
-			db, err := s.DB(ctx, databaseName)
-			if err != nil {
-				return err
-			}
-			return db.replayWAL(ctx)
+			// Open the DB for the side effect of the snapshot and WALs being loaded as part of the open operation.
+			_, err := s.DB(ctx,
+				databaseName,
+				WithCompactionAfterOpen(
+					s.compactAfterRecovery, s.compactAfterRecoveryTableNames,
+				),
+			)
+			return err
 		})
 	}
 
 	return g.Wait()
 }
 
-type dbMetrics struct {
-	txHighWatermark prometheus.GaugeFunc
-}
-
 type DB struct {
 	columnStore *ColumnStore
-	reg         prometheus.Registerer
 	logger      log.Logger
 	tracer      trace.Tracer
 	name        string
@@ -270,32 +342,84 @@ type DB struct {
 	roTables map[string]*Table
 	tables   map[string]*Table
 
-	storagePath          string
-	wal                  WAL
-	bucket               storage.Bucket
-	ignoreStorageOnQuery bool
+	storagePath string
+	wal         WAL
+
+	// The database supports multiple data sources and sinks.
+	sources []DataSource
+	sinks   []DataSink
+
 	// Databases monotonically increasing transaction id
-	tx *atomic.Uint64
+	tx atomic.Uint64
+	// highWatermark maintains the highest consecutively completed txn.
+	highWatermark atomic.Uint64
 
 	// TxPool is a waiting area for finished transactions that haven't been added to the watermark
 	txPool *TxPool
 
-	compactorPool *compactorPool
+	compactAfterRecovery           bool
+	compactAfterRecoveryTableNames []string
+
+	snapshotInProgress atomic.Bool
+
+	metrics         snapshotMetrics
+	metricsProvider tableMetricsProvider
+}
+
+// DataSinkSource is a convenience interface for a data source and sink.
+type DataSinkSource interface {
+	DataSink
+	DataSource
+}
 
-	// highWatermark maintains the highest consecutively completed tx number
-	highWatermark *atomic.Uint64
+// DataSource is remote source of data that can be queried.
+type DataSource interface {
+	fmt.Stringer
+	Scan(ctx context.Context, prefix string, schema *dynparquet.Schema, filter logicalplan.Expr, lastBlockTimestamp uint64, callback func(context.Context, any) error) error
+	Prefixes(ctx context.Context, prefix string) ([]string, error)
+}
 
-	metrics *dbMetrics
+// DataSink is a remote destination for data.
+type DataSink interface {
+	fmt.Stringer
+	Upload(ctx context.Context, name string, r io.Reader) error
+	Delete(ctx context.Context, name string) error
 }
 
-func (s *ColumnStore) DB(ctx context.Context, name string) (*DB, error) {
+type DBOption func(*DB) error
+
+func WithCompactionAfterOpen(compact bool, tableNames []string) DBOption {
+	return func(db *DB) error {
+		db.compactAfterRecovery = compact
+		db.compactAfterRecoveryTableNames = tableNames
+		return nil
+	}
+}
+
+// DB gets or creates a database on the given ColumnStore with the given
+// options. Note that if the database already exists, the options will be
+// applied cumulatively to the database.
+func (s *ColumnStore) DB(ctx context.Context, name string, opts ...DBOption) (*DB, error) {
 	if !validateName(name) {
 		return nil, errors.New("invalid database name")
 	}
+	applyOptsToDB := func(db *DB) error {
+		db.mtx.Lock()
+		defer db.mtx.Unlock()
+		for _, opt := range opts {
+			if err := opt(db); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
 	s.mtx.RLock()
 	db, ok := s.dbs[name]
 	s.mtx.RUnlock()
 	if ok {
+		if err := applyOptsToDB(db); err != nil {
+			return nil, err
+		}
 		return db, nil
 	}
 
@@ -304,116 +428,326 @@ func (s *ColumnStore) DB(ctx context.Context, name string) (*DB, error) {
 
 	// Need to double-check that in the meantime a database with the same name
 	// wasn't concurrently created.
-	db, ok = s.dbs[name]
-	if ok {
-		return db, nil
+	for {
+		db, ok = s.dbs[name]
+		if ok {
+			if err := applyOptsToDB(db); err != nil {
+				return nil, err
+			}
+			return db, nil
+		}
+
+		// DB has not yet been created. However, another goroutine might be
+		// replaying the WAL in the background (the store mutex is released
+		// during replay.).
+		waitForReplay, ok := s.dbReplaysInProgress[name]
+		if !ok {
+			// No replay in progress, it is safe to create the DB.
+			break
+		}
+		s.mtx.Unlock()
+		<-waitForReplay
+		s.mtx.Lock()
 	}
 
-	reg := prometheus.WrapRegistererWith(prometheus.Labels{"db": name}, s.reg)
 	logger := log.WithPrefix(s.logger, "db", name)
 	db = &DB{
-		columnStore:          s,
-		name:                 name,
-		mtx:                  &sync.RWMutex{},
-		tables:               map[string]*Table{},
-		roTables:             map[string]*Table{},
-		reg:                  reg,
-		logger:               logger,
-		tracer:               s.tracer,
-		tx:                   &atomic.Uint64{},
-		highWatermark:        &atomic.Uint64{},
-		storagePath:          filepath.Join(s.DatabasesDir(), name),
-		wal:                  &wal.NopWAL{},
-		ignoreStorageOnQuery: s.ignoreStorageOnQuery,
-	}
-
-	if s.bucket != nil {
-		db.bucket = storage.NewPrefixedBucket(s.bucket, db.name)
-	}
-
-	if s.enableWAL {
-		var err error
-		db.wal, err = db.openWAL()
-		if err != nil {
-			return nil, err
-		}
+		columnStore:     s,
+		name:            name,
+		mtx:             &sync.RWMutex{},
+		tables:          map[string]*Table{},
+		roTables:        map[string]*Table{},
+		logger:          logger,
+		tracer:          s.tracer,
+		wal:             &wal.NopWAL{},
+		sources:         s.sources,
+		sinks:           s.sinks,
+		metrics:         s.metrics.snapshotMetricsForDB(name),
+		metricsProvider: tableMetricsProvider{dbName: name, m: s.metrics},
+	}
+
+	if s.storagePath != "" {
+		db.storagePath = filepath.Join(s.DatabasesDir(), name)
+	}
+
+	if err := applyOptsToDB(db); err != nil {
+		return nil, err
 	}
 
 	if dbSetupErr := func() error {
-		db.txPool = NewTxPool(db.highWatermark)
-		db.compactorPool = newCompactorPool(db, s.compactionConfig)
-		// If bucket storage is configured; scan for existing tables in the database
-		if db.bucket != nil {
-			if err := db.bucket.Iter(ctx, "", func(tableName string) error {
-				_, err := db.readOnlyTable(strings.TrimSuffix(tableName, "/"))
+		if db.storagePath != "" {
+			if err := os.RemoveAll(db.trashDir()); err != nil {
+				return err
+			}
+			if err := os.RemoveAll(db.indexDir()); err != nil { // Remove the index directory. These are either restored from snapshots or rebuilt from the WAL.
+				return err
+			}
+		}
+		db.txPool = NewTxPool(&db.highWatermark)
+		// Wait to start the compactor pool since benchmarks show that WAL
+		// replay is a lot more efficient if it is not competing against
+		// compaction. Additionally, if the CompactAfterRecovery option is
+		// specified, we don't want the user-specified compaction to race with
+		// our compactor pool.
+		if len(db.sources) != 0 {
+			for _, source := range db.sources {
+				prefixes, err := source.Prefixes(ctx, name)
 				if err != nil {
 					return err
 				}
 
-				return nil
-			}); err != nil {
-				return fmt.Errorf("bucket iter on database open: %w", err)
+				for _, prefix := range prefixes {
+					_, err := db.readOnlyTable(prefix)
+					if err != nil {
+						return err
+					}
+				}
 			}
 		}
 
-		// Register metrics last to avoid duplicate registration should and of the WAL or storage replay errors occur
-		db.metrics = &dbMetrics{
-			txHighWatermark: promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{
-				Name: "tx_high_watermark",
-				Help: "The highest transaction number that has been released to be read",
-			}, func() float64 {
-				return float64(db.highWatermark.Load())
-			}),
+		if s.enableWAL {
+			if err := func() error {
+				// Unlock the store mutex while the WAL is replayed, otherwise
+				// if multiple DBs are opened in parallel, WAL replays will not
+				// happen in parallel. However, create a channel for any
+				// goroutines that might concurrently try to open the same DB
+				// to listen on.
+				s.dbReplaysInProgress[name] = make(chan struct{})
+				s.mtx.Unlock()
+				defer func() {
+					s.mtx.Lock()
+					close(s.dbReplaysInProgress[name])
+					delete(s.dbReplaysInProgress, name)
+				}()
+				var err error
+				db.wal, err = db.openWAL(
+					ctx,
+					append(
+						[]wal.Option{
+							wal.WithMetrics(s.metrics.metricsForFileWAL(name)),
+							wal.WithStoreMetrics(s.metrics.metricsForWAL(name)),
+						}, s.testingOptions.walTestingOptions...,
+					)...,
+				)
+				return err
+			}(); err != nil {
+				return err
+			}
+			// WAL pointers of tables need to be updated to the DB WAL since
+			// they are loaded from object storage and snapshots with a no-op
+			// WAL by default.
+			for _, table := range db.tables {
+				if !table.config.Load().DisableWal {
+					table.wal = db.wal
+				}
+			}
+			for _, table := range db.roTables {
+				if !table.config.Load().DisableWal {
+					table.wal = db.wal
+				}
+			}
 		}
 		return nil
 	}(); dbSetupErr != nil {
-		// Close handles closing partially set fields in the db.
-		db.Close()
+		level.Warn(s.logger).Log(
+			"msg", "error setting up db",
+			"name", name,
+			"err", dbSetupErr,
+		)
+		// closeInternal handles closing partially set fields in the db without
+		// rotating blocks etc... that the public Close method does.
+		_ = db.closeInternal()
 		return nil, dbSetupErr
 	}
 
-	db.compactorPool.start()
+	// Compact tables after recovery if requested.
+	if db.compactAfterRecovery {
+		tables := db.compactAfterRecoveryTableNames
+		if len(tables) == 0 {
+			// Run compaction on all tables.
+			tables = maps.Keys(db.tables)
+		}
+		for _, name := range tables {
+			tbl, err := db.GetTable(name)
+			if err != nil {
+				level.Warn(db.logger).Log("msg", "get table during db setup", "err", err)
+				continue
+			}
+
+			start := time.Now()
+			if err := tbl.EnsureCompaction(); err != nil {
+				level.Warn(db.logger).Log("msg", "compaction during setup", "err", err)
+			}
+			level.Info(db.logger).Log(
+				"msg", "compacted table after recovery", "table", name, "took", time.Since(start),
+			)
+		}
+	}
+
 	s.dbs[name] = db
 	return db, nil
 }
 
-func (db *DB) openWAL() (WAL, error) {
-	return wal.Open(
+// DBs returns all the DB names of this column store.
+func (s *ColumnStore) DBs() []string {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+	return maps.Keys(s.dbs)
+}
+
+func (s *ColumnStore) GetDB(name string) (*DB, error) {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+	db, ok := s.dbs[name]
+	if !ok {
+		return nil, fmt.Errorf("db %s not found", name)
+	}
+	return db, nil
+}
+
+func (s *ColumnStore) DropDB(name string) error {
+	db, err := s.GetDB(name)
+	if err != nil {
+		return err
+	}
+	if err := db.Close(WithClearStorage()); err != nil {
+		return err
+	}
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+	delete(s.dbs, name)
+	return os.RemoveAll(filepath.Join(s.DatabasesDir(), name))
+}
+
+func (db *DB) openWAL(ctx context.Context, opts ...wal.Option) (WAL, error) {
+	wal, err := wal.Open(
 		db.logger,
-		db.reg,
 		db.walDir(),
+		opts...,
 	)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := db.recover(ctx, wal); err != nil {
+		return nil, err
+	}
+
+	wal.RunAsync()
+	return wal, nil
 }
 
+const (
+	walPath       = "wal"
+	snapshotsPath = "snapshots"
+	indexPath     = "index"
+	trashPath     = "trash"
+)
+
 func (db *DB) walDir() string {
-	return filepath.Join(db.storagePath, "wal")
+	return filepath.Join(db.storagePath, walPath)
+}
+
+func (db *DB) snapshotsDir() string {
+	return filepath.Join(db.storagePath, snapshotsPath)
+}
+
+func (db *DB) trashDir() string {
+	return filepath.Join(db.storagePath, trashPath)
 }
 
-func (db *DB) replayWAL(ctx context.Context) error {
+func (db *DB) indexDir() string {
+	return filepath.Join(db.storagePath, indexPath)
+}
+
+// recover attempts to recover database state from a combination of snapshots and the WAL.
+//
+// The recovery process is as follows:
+// 1. Load the latest snapshot (if one should exist).
+// 1.a. If on-disk LSM index files exist: Upon table creation during snapshot loading, the index files shall be recovered from, inserting parts into the index.
+// 2. Replay the WAL starting from the latest snapshot transaction.
+// 2.a. If on-disk LSM index files were loaded: Insertion into the index may drop the insertion if a part with a higher transaction already exists in the WAL.
+func (db *DB) recover(ctx context.Context, wal WAL) error {
+	level.Info(db.logger).Log(
+		"msg", "recovering db",
+		"name", db.name,
+	)
+	snapshotLoadStart := time.Now()
+	snapshotTx, err := db.loadLatestSnapshot(ctx)
+	if err != nil {
+		level.Info(db.logger).Log(
+			"msg", "failed to load latest snapshot", "db", db.name, "err", err,
+		)
+		snapshotTx = 0
+	}
+	snapshotLogArgs := make([]any, 0)
+	if snapshotTx != 0 {
+		snapshotLogArgs = append(
+			snapshotLogArgs,
+			"snapshot_tx", snapshotTx,
+			"snapshot_load_duration", time.Since(snapshotLoadStart),
+		)
+		if err := db.cleanupSnapshotDir(ctx, snapshotTx); err != nil {
+			// Truncation is best-effort. If it fails, move on.
+			level.Info(db.logger).Log(
+				"msg", "failed to truncate snapshots not equal to loaded snapshot",
+				"err", err,
+				"snapshot_tx", snapshotTx,
+			)
+		}
+		// snapshotTx can correspond to a write at that txn that is contained in
+		// the snapshot. We want the first entry of the WAL to be the subsequent
+		// txn to not replay duplicate writes.
+		if err := wal.Truncate(snapshotTx + 1); err != nil {
+			level.Info(db.logger).Log(
+				"msg", "failed to truncate WAL after loading snapshot",
+				"err", err,
+				"snapshot_tx", snapshotTx,
+			)
+		}
+	}
+
 	// persistedTables is a map from a table name to the last transaction
 	// persisted.
-	persistedTables := map[string]uint64{}
-	if err := db.wal.Replay(func(tx uint64, record *walpb.Record) error {
+	persistedTables := make(map[string]uint64)
+	var lastTx uint64
+
+	start := time.Now()
+	if err := wal.Replay(snapshotTx+1, func(_ uint64, record *walpb.Record) error {
+		if err := ctx.Err(); err != nil {
+			return err
+		}
 		switch e := record.Entry.EntryType.(type) {
 		case *walpb.Entry_TableBlockPersisted_:
-			persistedTables[e.TableBlockPersisted.TableName] = tx
+			persistedTables[e.TableBlockPersisted.TableName] = e.TableBlockPersisted.NextTx
+			// The loaded snapshot might have persisted data, this is handled in
+			// the replay loop below.
+			return nil
+		default:
+			return nil
 		}
-		return nil
 	}); err != nil {
-		return fmt.Errorf("first WAL replay: %w", err)
+		return err
 	}
 
-	lastTx := uint64(0)
-	if err := db.wal.Replay(func(tx uint64, record *walpb.Record) error {
+	// performSnapshot is set to true if a snapshot should be performed after
+	// replay. This is set in cases where there could be "dead bytes" in the
+	// WAL (i.e. entries that occupy space on disk but are useless).
+	performSnapshot := false
+
+	if err := wal.Replay(snapshotTx+1, func(tx uint64, record *walpb.Record) error {
+		if err := ctx.Err(); err != nil {
+			return err
+		}
 		lastTx = tx
 		switch e := record.Entry.EntryType.(type) {
 		case *walpb.Entry_NewTableBlock_:
 			entry := e.NewTableBlock
 			var schema proto.Message
-			switch v := entry.Schema.(type) {
-			case *walpb.Entry_NewTableBlock_DeprecatedSchema:
+			switch v := entry.Config.Schema.(type) {
+			case *tablepb.TableConfig_DeprecatedSchema:
 				schema = v.DeprecatedSchema
-			case *walpb.Entry_NewTableBlock_SchemaV2:
+			case *tablepb.TableConfig_SchemaV2:
 				schema = v.SchemaV2
 			default:
 				return fmt.Errorf("unhandled schema type: %T", v)
@@ -424,9 +758,11 @@ func (db *DB) replayWAL(ctx context.Context) error {
 				return err
 			}
 
-			if lastPersistedTx, ok := persistedTables[entry.TableName]; ok && tx < lastPersistedTx {
+			nextNonPersistedTxn, wasPersisted := persistedTables[entry.TableName]
+			if wasPersisted && tx < nextNonPersistedTxn {
 				// This block has already been successfully persisted, so we can
-				// skip it.
+				// skip it. Note that if this new table block is the active
+				// block after persistence tx == nextNonPersistedTxn.
 				return nil
 			}
 
@@ -434,52 +770,65 @@ func (db *DB) replayWAL(ctx context.Context) error {
 			table, err := db.GetTable(tableName)
 			var tableErr ErrTableNotFound
 			if errors.As(err, &tableErr) {
-				schema, err := dynparquet.SchemaFromDefinition(schema)
-				if err != nil {
-					return fmt.Errorf("initialize schema: %w", err)
-				}
-				config := NewTableConfig(schema)
-				// TODO: May be we should acquire mutux lock when mutating d.roTables and d.tables?
-				// s.DB creates read only Table instance when BucketStore is configured.
-				// Make sure to use the existing Table instace instead of creating new one to avoid dangling instance.
-				table, ok := db.roTables[tableName]
-				if ok {
-					delete(db.roTables, tableName)
-					table.config = config
-				} else {
-					table, err = newTable(
-						db,
-						tableName,
-						config,
-						db.reg,
-						db.logger,
-						db.tracer,
-						db.wal,
-					)
-				}
-				if err != nil {
-					return fmt.Errorf("instantiate table: %w", err)
-				}
-
-				table.active, err = newTableBlock(table, 0, tx, id)
-				if err != nil {
-					return err
-				}
-				db.mtx.Lock()
-				db.tables[tableName] = table
-				db.mtx.Unlock()
-				return nil
+				return func() error {
+					db.mtx.Lock()
+					defer db.mtx.Unlock()
+					config := NewTableConfig(schema, FromConfig(entry.Config))
+					if _, ok := db.roTables[tableName]; ok {
+						table, err = db.promoteReadOnlyTableLocked(tableName, config)
+						if err != nil {
+							return fmt.Errorf("promoting read only table: %w", err)
+						}
+					} else {
+						table, err = newTable(
+							db,
+							tableName,
+							config,
+							db.metricsProvider.metricsForTable(tableName),
+							db.logger,
+							db.tracer,
+							wal,
+						)
+						if err != nil {
+							return fmt.Errorf("instantiate table: %w", err)
+						}
+					}
+
+					table.active, err = newTableBlock(table, 0, tx, id)
+					if err != nil {
+						return err
+					}
+					db.tables[tableName] = table
+					return nil
+				}()
 			}
 			if err != nil {
 				return fmt.Errorf("get table: %w", err)
 			}
 
-			// If we get to this point it means a block was finished but did
-			// not get persisted.
-			table.pendingBlocks[table.active] = struct{}{}
-			go table.writeBlock(table.active)
+			level.Info(db.logger).Log(
+				"msg", "writing unfinished block in recovery",
+				"table", tableName,
+				"tx", tx,
+			)
+			if snapshotTx == 0 || tx != nextNonPersistedTxn {
+				// If we get to this point it means a block was finished but did
+				// not get persisted. If a snapshot was loaded, then the table
+				// already exists but the active block is outdated. If
+				// tx == nextNonPersistedTxn, we should not persist the active
+				// block, but just create a new block.
+				table.pendingBlocks[table.active] = struct{}{}
+				go table.writeBlock(table.active, tx, db.columnStore.manualBlockRotation)
+			}
 
-			if !proto.Equal(schema, table.config.schema.Definition()) {
+			protoEqual := false
+			switch schema.(type) {
+			case *schemav2pb.Schema:
+				protoEqual = proto.Equal(schema, table.config.Load().GetSchemaV2())
+			case *schemapb.Schema:
+				protoEqual = proto.Equal(schema, table.config.Load().GetDeprecatedSchema())
+			}
+			if !protoEqual {
 				// If schemas are identical from block to block we should we
 				// reuse the previous schema in order to retain pooled memory
 				// for it.
@@ -488,7 +837,7 @@ func (db *DB) replayWAL(ctx context.Context) error {
 					return fmt.Errorf("initialize schema: %w", err)
 				}
 
-				table.config.schema = schema
+				table.schema = schema
 			}
 
 			table.active, err = newTableBlock(table, table.active.minTx, tx, id)
@@ -526,90 +875,192 @@ func (db *DB) replayWAL(ctx context.Context) error {
 				if err != nil {
 					return fmt.Errorf("read record: %w", err)
 				}
-
-				if err := table.active.InsertRecord(ctx, tx, record); err != nil {
-					return fmt.Errorf("insert record into block: %w", err)
-				}
+				defer reader.Release()
+				size := util.TotalRecordSize(record)
+				table.active.index.InsertPart(parts.NewArrowPart(tx, record, uint64(size), table.schema, parts.WithCompactionLevel(int(index.L0))))
 			default:
-				serBuf, err := dynparquet.ReaderFromBytes(entry.Data)
-				if err != nil {
-					return fmt.Errorf("deserialize buffer: %w", err)
-				}
-
-				if err := table.active.Insert(ctx, tx, serBuf); err != nil {
-					return fmt.Errorf("insert buffer into block: %w", err)
-				}
+				panic("parquet writes are deprecated")
 			}
-
-			// After every insert we're setting the tx and highWatermark to the replayed tx.
-			// This allows the block's compaction to start working on the inserted data.
-			db.tx.Store(tx)
-			db.highWatermark.Store(tx)
+			return nil
 		case *walpb.Entry_TableBlockPersisted_:
+			// If a block was persisted but the entry still exists in the WAL,
+			// a snapshot was not performed after persisting the block. Perform
+			// one now to clean up the WAL.
+			performSnapshot = true
+			return nil
+		case *walpb.Entry_Snapshot_:
 			return nil
 		default:
 			return fmt.Errorf("unexpected WAL entry type: %t", e)
 		}
 		return nil
 	}); err != nil {
-		return fmt.Errorf("second WAL replay: %w", err)
+		return err
+	}
+
+	resetTxn := snapshotTx
+	if lastTx > resetTxn {
+		resetTxn = lastTx
 	}
 
-	db.tx.Store(lastTx)
-	db.highWatermark.Store(lastTx)
+	db.mtx.Lock()
+	for _, table := range db.tables {
+		block := table.ActiveBlock()
+		block.uncompressedInsertsSize.Store(block.Index().LevelSize(index.L0))
+	}
+	db.mtx.Unlock()
 
+	db.resetToTxn(resetTxn, nil)
+	if performSnapshot && db.columnStore.snapshotTriggerSize != 0 {
+		level.Info(db.logger).Log(
+			"msg", "performing snapshot after recovery",
+		)
+		db.snapshot(ctx, false, func() {
+			if err := db.reclaimDiskSpace(ctx, wal); err != nil {
+				level.Error(db.logger).Log(
+					"msg", "failed to reclaim disk space after snapshot during recovery",
+					"err", err,
+				)
+			}
+		})
+	}
+	level.Info(db.logger).Log(
+		append(
+			[]any{
+				"msg", "db recovered",
+				"wal_replay_duration", time.Since(start),
+				"watermark", resetTxn,
+			},
+			snapshotLogArgs...,
+		)...,
+	)
 	return nil
 }
 
-func (db *DB) Close() error {
+type CloseOption func(*closeOptions)
+
+type closeOptions struct {
+	clearStorage bool
+}
+
+func WithClearStorage() CloseOption {
+	return func(o *closeOptions) {
+		o.clearStorage = true
+	}
+}
+
+func (db *DB) Close(options ...CloseOption) error {
+	opts := &closeOptions{}
+	for _, opt := range options {
+		opt(opts)
+	}
+
+	shouldPersist := len(db.sinks) > 0 && !db.columnStore.manualBlockRotation
+	if !shouldPersist && db.columnStore.snapshotTriggerSize != 0 && !opts.clearStorage {
+		start := time.Now()
+		db.snapshot(context.Background(), false, func() {
+			level.Info(db.logger).Log("msg", "snapshot on close completed", "duration", time.Since(start))
+			if err := db.reclaimDiskSpace(context.Background(), nil); err != nil {
+				level.Error(db.logger).Log(
+					"msg", "failed to reclaim disk space after snapshot",
+					"err", err,
+				)
+			}
+		})
+	}
+
+	level.Info(db.logger).Log("msg", "closing DB")
 	for _, table := range db.tables {
 		table.close()
-		if db.bucket != nil {
-			table.writeBlock(table.ActiveBlock())
+		if shouldPersist {
+			// Write the blocks but no snapshots since they are long-running
+			// jobs. Use db.tx.Load as the block's max txn since the table was
+			// closed above, so no writes are in flight at this stage.
+			// TODO(asubiotto): Maybe we should snapshot in any case since it
+			// should be faster to write to local disk than upload to object
+			// storage. This would avoid a slow WAL replay on startup if we
+			// don't manage to persist in time.
+			table.writeBlock(table.ActiveBlock(), db.tx.Load(), false)
 		}
 	}
+	level.Info(db.logger).Log("msg", "closed all tables")
 
-	if db.bucket != nil {
-		// If we've successfully persisted all the table blocks we can remove the wal
-		if err := os.RemoveAll(db.walDir()); err != nil {
-			return err
-		}
+	if err := db.closeInternal(); err != nil {
+		return err
 	}
 
-	if db.columnStore.enableWAL {
-		if err := db.wal.Close(); err != nil {
+	if (shouldPersist || opts.clearStorage) && db.storagePath != "" {
+		if err := db.dropStorage(); err != nil {
 			return err
 		}
+		level.Info(db.logger).Log("msg", "cleaned up wal & snapshots")
 	}
-	if db.txPool != nil {
-		db.txPool.Stop()
+	return nil
+}
+
+func (db *DB) closeInternal() error {
+	defer func() {
+		// Clean up the txPool even on error.
+		if db.txPool != nil {
+			db.txPool.Stop()
+		}
+	}()
+
+	if !db.columnStore.enableWAL || db.wal == nil {
+		return nil
 	}
+	return db.wal.Close()
+}
 
-	if db.compactorPool != nil {
-		db.compactorPool.stop()
+func (db *DB) maintainWAL() {
+	if minTx := db.getMinTXPersisted(); minTx > 0 {
+		if err := db.wal.Truncate(minTx); err != nil {
+			return
+		}
 	}
+}
 
-	return nil
+// reclaimDiskSpace attempts to read the latest valid snapshot txn and removes
+// any snapshots/wal entries that are older than the snapshot tx. Since this can
+// be called before db.wal is set, the caller may optionally pass in a WAL to
+// truncate.
+func (db *DB) reclaimDiskSpace(ctx context.Context, wal WAL) error {
+	if db.columnStore.testingOptions.disableReclaimDiskSpaceOnSnapshot {
+		return nil
+	}
+	validSnapshotTxn, err := db.getLatestValidSnapshotTxn(ctx)
+	if err != nil {
+		return err
+	}
+	if validSnapshotTxn == 0 {
+		return nil
+	}
+	if err := db.cleanupSnapshotDir(ctx, validSnapshotTxn); err != nil {
+		return err
+	}
+	if wal == nil {
+		wal = db.wal
+	}
+	// Snapshots are taken with a read txn and are inclusive, so therefore
+	// include a potential write at validSnapshotTxn. We don't want this to be
+	// the first entry in the WAL after truncation, given it is already
+	// contained in the snapshot, so Truncate at validSnapshotTxn + 1.
+	return wal.Truncate(validSnapshotTxn + 1)
 }
 
-func (db *DB) maintainWAL() {
+func (db *DB) getMinTXPersisted() uint64 {
 	db.mtx.RLock()
 	defer db.mtx.RUnlock()
-	minTx := uint64(0)
+	minTx := uint64(math.MaxUint64)
 	for _, table := range db.tables {
 		table.mtx.RLock()
 		tableMinTxPersisted := table.lastCompleted
 		table.mtx.RUnlock()
-		if minTx == 0 || tableMinTxPersisted < minTx {
+		if tableMinTxPersisted < minTx {
 			minTx = tableMinTxPersisted
 		}
 	}
-
-	if minTx > 0 {
-		if err := db.wal.Truncate(minTx); err != nil {
-			return
-		}
-	}
+	return minTx
 }
 
 func (db *DB) readOnlyTable(name string) (*Table, error) {
@@ -621,8 +1072,8 @@ func (db *DB) readOnlyTable(name string) (*Table, error) {
 	table, err := newTable(
 		db,
 		name,
-		NewTableConfig(nil),
-		db.reg,
+		nil,
+		db.metricsProvider.metricsForTable(name),
 		db.logger,
 		db.tracer,
 		db.wal,
@@ -635,7 +1086,34 @@ func (db *DB) readOnlyTable(name string) (*Table, error) {
 	return table, nil
 }
 
-func (db *DB) Table(name string, config *TableConfig) (*Table, error) {
+// promoteReadOnlyTableLocked promotes a read-only table to a read-write table.
+// The read-write table is returned but not added to the database. Callers must
+// do so.
+// db.mtx must be held while calling this method.
+func (db *DB) promoteReadOnlyTableLocked(name string, config *tablepb.TableConfig) (*Table, error) {
+	table, ok := db.roTables[name]
+	if !ok {
+		return nil, fmt.Errorf("read only table %s not found", name)
+	}
+	schema, err := schemaFromTableConfig(config)
+	if err != nil {
+		return nil, err
+	}
+	table.config.Store(config)
+	table.schema = schema
+	delete(db.roTables, name)
+	return table, nil
+}
+
+// Table will get or create a new table with the given name and config. If a table already exists with the given name, it will have it's configuration updated.
+func (db *DB) Table(name string, config *tablepb.TableConfig) (*Table, error) {
+	return db.table(name, config, generateULID())
+}
+
+func (db *DB) table(name string, config *tablepb.TableConfig, id ulid.ULID) (*Table, error) {
+	if config == nil {
+		return nil, fmt.Errorf("table config cannot be nil")
+	}
 	if !validateName(name) {
 		return nil, errors.New("invalid table name")
 	}
@@ -643,6 +1121,7 @@ func (db *DB) Table(name string, config *TableConfig) (*Table, error) {
 	table, ok := db.tables[name]
 	db.mtx.RUnlock()
 	if ok {
+		table.config.Store(config)
 		return table, nil
 	}
 
@@ -657,17 +1136,19 @@ func (db *DB) Table(name string, config *TableConfig) (*Table, error) {
 	}
 
 	// Check if this table exists as a read only table
-	table, ok = db.roTables[name]
-	if ok {
-		table.config = config
-		delete(db.roTables, name)
+	if _, ok := db.roTables[name]; ok {
+		var err error
+		table, err = db.promoteReadOnlyTableLocked(name, config)
+		if err != nil {
+			return nil, err
+		}
 	} else {
 		var err error
 		table, err = newTable(
 			db,
 			name,
 			config,
-			db.reg,
+			db.metricsProvider.metricsForTable(name),
 			db.logger,
 			db.tracer,
 			db.wal,
@@ -680,7 +1161,6 @@ func (db *DB) Table(name string, config *TableConfig) (*Table, error) {
 	tx, _, commit := db.begin()
 	defer commit()
 
-	id := generateULID()
 	if err := table.newTableBlock(0, tx, id); err != nil {
 		return nil, err
 	}
@@ -711,6 +1191,14 @@ func (db *DB) TableProvider() *DBTableProvider {
 	return NewDBTableProvider(db)
 }
 
+// TableNames returns the names of all the db's tables.
+func (db *DB) TableNames() []string {
+	db.mtx.RLock()
+	tables := maps.Keys(db.tables)
+	db.mtx.RUnlock()
+	return tables
+}
+
 type DBTableProvider struct {
 	db *DB
 }
@@ -749,15 +1237,18 @@ func (db *DB) beginRead() uint64 {
 //	The current high watermark
 //	A function to complete the transaction
 func (db *DB) begin() (uint64, uint64, func()) {
-	tx := db.tx.Add(1)
+	txn := db.tx.Add(1)
 	watermark := db.highWatermark.Load()
-	return tx, watermark, func() {
-		if mark := db.highWatermark.Load(); mark+1 == tx { // This is the next consecutive transaction; increate the watermark
-			db.highWatermark.Add(1)
+	return txn, watermark, func() {
+		if mark := db.highWatermark.Load(); mark+1 == txn {
+			// This is the next consecutive transaction; increase the watermark.
+			db.highWatermark.Store(txn)
+			db.txPool.notifyWatermark()
+			return
 		}
 
 		// place completed transaction in the waiting pool
-		db.txPool.Insert(tx)
+		db.txPool.Insert(txn)
 	}
 }
 
@@ -772,7 +1263,81 @@ func (db *DB) Wait(tx uint64) {
 	}
 }
 
+// HighWatermark returns the current high watermark.
+func (db *DB) HighWatermark() uint64 {
+	return db.highWatermark.Load()
+}
+
+// resetToTxn resets the DB's internal state to resume from the given
+// transaction. If the given wal is non-nil, it is also reset so that the next
+// expected transaction will log correctly to the WAL. Note that db.wal is not
+// used since callers might be calling resetToTxn before db.wal has been
+// initialized or might not want the WAL to be reset.
+func (db *DB) resetToTxn(txn uint64, wal WAL) {
+	db.tx.Store(txn)
+	db.highWatermark.Store(txn)
+	if wal != nil {
+		// This call resets the WAL to a zero state so that new records can be
+		// logged.
+		if err := wal.Reset(txn + 1); err != nil {
+			level.Warn(db.logger).Log(
+				"msg", "failed to reset WAL when resetting DB to txn",
+				"txnID", txn,
+				"err", err,
+			)
+		}
+	}
+}
+
 // validateName ensures that the passed in name doesn't violate any constrainsts.
 func validateName(name string) bool {
 	return !strings.Contains(name, "/")
 }
+
+// dropStorage removes all data from the storage directory, but leaves the empty
+// storage directory.
+func (db *DB) dropStorage() error {
+	trashDir := db.trashDir()
+
+	entries, err := os.ReadDir(db.storagePath)
+	if err != nil {
+		if os.IsNotExist(err) {
+			// Nothing to drop.
+			return nil
+		}
+		return err
+	}
+	// Try to rename all entries as this is O(1) per entry. We want to preserve
+	// the storagePath for future opens of this database. Callers that want to
+	// drop the DB remove storagePath themselves.
+	if moveErr := func() error {
+		if err := os.MkdirAll(trashDir, os.FileMode(0o755)); err != nil {
+			return fmt.Errorf("making trash dir: %w", err)
+		}
+		// Create a temporary directory in the trash dir to avoid clashing
+		// with other wal/snapshot dirs that might not have been removed
+		// previously.
+		tmpPath, err := os.MkdirTemp(trashDir, "")
+		if err != nil {
+			return err
+		}
+		errs := make([]error, 0, len(entries))
+		for _, e := range entries {
+			if err := os.Rename(filepath.Join(db.storagePath, e.Name()), filepath.Join(tmpPath, e.Name())); err != nil && !os.IsNotExist(err) {
+				errs = append(errs, err)
+			}
+		}
+		return errors.Join(errs...)
+	}(); moveErr != nil {
+		// If we failed to move storage path entries to the trash dir, fall back
+		// to attempting to remove them with RemoveAll.
+		errs := make([]error, 0, len(entries))
+		for _, e := range entries {
+			if err := os.RemoveAll(filepath.Join(db.storagePath, e.Name())); err != nil {
+				errs = append(errs, err)
+			}
+		}
+		return errors.Join(errs...)
+	}
+	return os.RemoveAll(trashDir)
+}
diff --git a/db_test.go b/db_test.go
index e2580ff96..2fc00c0fc 100644
--- a/db_test.go
+++ b/db_test.go
@@ -5,44 +5,56 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"io/fs"
+	"math/rand"
 	"os"
+	"path/filepath"
 	"strings"
+	"sync"
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/go-kit/log/level"
 	"github.com/google/uuid"
-	"github.com/segmentio/parquet-go"
+	"github.com/polarsignals/iceberg-go"
+	"github.com/polarsignals/iceberg-go/catalog"
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/stretchr/testify/require"
 	"github.com/thanos-io/objstore"
-	"github.com/thanos-io/objstore/providers/filesystem"
+	"golang.org/x/sync/errgroup"
+	"google.golang.org/protobuf/proto"
 
 	"github.com/polarsignals/frostdb/dynparquet"
-	"github.com/polarsignals/frostdb/pqarrow"
+	walpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/wal/v1alpha1"
+	"github.com/polarsignals/frostdb/index"
 	"github.com/polarsignals/frostdb/query"
 	"github.com/polarsignals/frostdb/query/logicalplan"
-
-	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	"github.com/polarsignals/frostdb/query/physicalplan"
+	"github.com/polarsignals/frostdb/recovery"
+	"github.com/polarsignals/frostdb/storage"
 )
 
 func TestDBWithWALAndBucket(t *testing.T) {
 	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
+		dynparquet.SampleDefinition(),
 	)
 
 	logger := newTestLogger(t)
 
 	dir := t.TempDir()
-	bucket, err := filesystem.NewBucket(dir)
-	require.NoError(t, err)
+	bucket := objstore.NewInMemBucket()
+
+	sinksource := NewDefaultObjstoreBucket(bucket)
 
 	c, err := New(
 		WithLogger(logger),
 		WithWAL(),
 		WithStoragePath(dir),
-		WithBucketStorage(bucket),
-		WithActiveMemorySize(100*1024),
+		WithReadWriteStorage(sinksource),
+		WithActiveMemorySize(100*KiB),
 	)
 	require.NoError(t, err)
 	db, err := c.DB(context.Background(), "test")
@@ -54,9 +66,9 @@ func TestDBWithWALAndBucket(t *testing.T) {
 
 	ctx := context.Background()
 	for i := 0; i < 100; i++ {
-		buf, err := samples.ToBuffer(table.Schema())
+		r, err := samples.ToRecord()
 		require.NoError(t, err)
-		_, err = table.InsertBuffer(ctx, buf)
+		_, err = table.InsertRecord(ctx, r)
 		require.NoError(t, err)
 	}
 	require.NoError(t, table.EnsureCompaction())
@@ -66,244 +78,208 @@ func TestDBWithWALAndBucket(t *testing.T) {
 		WithLogger(logger),
 		WithWAL(),
 		WithStoragePath(dir),
-		WithBucketStorage(bucket),
-		WithActiveMemorySize(100*1024),
+		WithReadWriteStorage(sinksource),
+		WithActiveMemorySize(100*KiB),
 	)
 	require.NoError(t, err)
 	defer c.Close()
-	require.NoError(t, c.ReplayWALs(context.Background()))
+	db, err = c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err = db.Table("test", config)
+	require.NoError(t, err)
+
+	// Validate that a read can be performed of the persisted data
+	pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer pool.AssertSize(t, 0)
+	rows := int64(0)
+	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
+		return table.Iterator(
+			ctx,
+			tx,
+			pool,
+			[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
+				rows += ar.NumRows()
+				return nil
+			}},
+		)
+	})
+	require.NoError(t, err)
+	require.Equal(t, int64(300), rows)
 }
 
 func TestDBWithWAL(t *testing.T) {
 	ctx := context.Background()
-	test := func(t *testing.T, isArrow bool) {
-		config := NewTableConfig(
-			dynparquet.NewSampleSchema(),
-		)
-
-		logger := newTestLogger(t)
-
-		dir := t.TempDir()
-		c, err := New(
-			WithLogger(logger),
-			WithWAL(),
-			WithStoragePath(dir),
-		)
-		require.NoError(t, err)
-		defer c.Close()
-
-		db, err := c.DB(context.Background(), "test")
-		require.NoError(t, err)
-		table, err := db.Table("test", config)
-		require.NoError(t, err)
-
-		samples := dynparquet.Samples{{
-			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
-			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-			},
-			Timestamp: 1,
-			Value:     1,
-		}, {
-			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value2"},
-				{Name: "label2", Value: "value2"},
-				{Name: "label3", Value: "value3"},
-			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-			},
-			Timestamp: 2,
-			Value:     2,
-		}, {
-			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value3"},
-				{Name: "label2", Value: "value2"},
-				{Name: "label4", Value: "value4"},
-			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-			},
-			Timestamp: 3,
-			Value:     3,
-		}}
-
-		switch isArrow {
-		case true:
-
-			ps, err := table.Schema().DynamicParquetSchema(map[string][]string{
-				"labels": {"label1", "label2", "label3", "label4"},
-			})
-			require.NoError(t, err)
-
-			sc, err := pqarrow.ParquetSchemaToArrowSchema(ctx, ps, logicalplan.IterOptions{})
-			require.NoError(t, err)
-
-			rec, err := samples.ToRecord(sc)
-			require.NoError(t, err)
-
-			ctx := context.Background()
-			_, err = table.InsertRecord(ctx, rec)
-			require.NoError(t, err)
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
 
-		default:
-			buf, err := samples.ToBuffer(table.Schema())
-			require.NoError(t, err)
+	logger := newTestLogger(t)
 
-			ctx := context.Background()
-			_, err = table.InsertBuffer(ctx, buf)
-			require.NoError(t, err)
-		}
+	dir := t.TempDir()
+	c, err := New(
+		WithLogger(logger),
+		WithWAL(),
+		WithStoragePath(dir),
+	)
+	require.NoError(t, err)
+	defer c.Close()
 
-		samples = dynparquet.Samples{{
-			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
-			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-			},
-			Timestamp: 2,
-			Value:     2,
-		}}
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
 
-		switch isArrow {
-		case true:
-			ps, err := table.Schema().DynamicParquetSchema(map[string][]string{
-				"labels": {"label1", "label2"},
-			})
-			require.NoError(t, err)
+	samples := dynparquet.Samples{{
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 1,
+		Value:     1,
+	}, {
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 2,
+		Value:     2,
+	}, {
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 3,
+		Value:     3,
+	}}
 
-			sc, err := pqarrow.ParquetSchemaToArrowSchema(ctx, ps, logicalplan.IterOptions{})
-			require.NoError(t, err)
+	rec, err := samples.ToRecord()
+	require.NoError(t, err)
 
-			rec, err := samples.ToRecord(sc)
-			require.NoError(t, err)
+	_, err = table.InsertRecord(ctx, rec)
+	require.NoError(t, err)
 
-			ctx := context.Background()
-			_, err = table.InsertRecord(ctx, rec)
-			require.NoError(t, err)
-		default:
-			buf, err := samples.ToBuffer(table.Schema())
-			require.NoError(t, err)
+	samples = dynparquet.Samples{{
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 2,
+		Value:     2,
+	}}
 
-			_, err = table.InsertBuffer(ctx, buf)
-			require.NoError(t, err)
-		}
+	rec, err = samples.ToRecord()
+	require.NoError(t, err)
 
-		samples = dynparquet.Samples{{
-			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
-				{Name: "label3", Value: "value3"},
-			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-			},
-			Timestamp: 3,
-			Value:     3,
-		}}
+	_, err = table.InsertRecord(ctx, rec)
+	require.NoError(t, err)
 
-		switch isArrow {
-		case true:
-			ps, err := table.Schema().DynamicParquetSchema(map[string][]string{
-				"labels": {"label1", "label2", "label3"},
-			})
-			require.NoError(t, err)
+	samples = dynparquet.Samples{{
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
+			"label3": "value3",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 3,
+		Value:     3,
+	}}
 
-			sc, err := pqarrow.ParquetSchemaToArrowSchema(ctx, ps, logicalplan.IterOptions{})
-			require.NoError(t, err)
+	rec, err = samples.ToRecord()
+	require.NoError(t, err)
 
-			rec, err := samples.ToRecord(sc)
-			require.NoError(t, err)
+	_, err = table.InsertRecord(ctx, rec)
+	require.NoError(t, err)
 
-			ctx := context.Background()
-			_, err = table.InsertRecord(ctx, rec)
-			require.NoError(t, err)
-		default:
-			buf, err := samples.ToBuffer(table.Schema())
-			require.NoError(t, err)
+	require.NoError(t, c.Close())
 
-			_, err = table.InsertBuffer(ctx, buf)
-			require.NoError(t, err)
-		}
+	c, err = New(
+		WithLogger(logger),
+		WithWAL(),
+		WithStoragePath(dir),
+	)
+	require.NoError(t, err)
+	defer c.Close()
 
-		require.NoError(t, c.Close())
+	db, err = c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err = db.Table("test", config)
+	require.NoError(t, err)
 
-		c, err = New(
-			WithLogger(logger),
-			WithWAL(),
-			WithStoragePath(dir),
+	pool := memory.NewGoAllocator()
+	records := []arrow.Record{}
+	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
+		return table.Iterator(
+			ctx,
+			tx,
+			pool,
+			[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
+				ar.Retain()
+				records = append(records, ar)
+				return nil
+			}},
 		)
-		require.NoError(t, err)
-		defer c.Close()
-
-		require.NoError(t, c.ReplayWALs(context.Background()))
-
-		db, err = c.DB(context.Background(), "test")
-		require.NoError(t, err)
-		table, err = db.Table("test", config)
-		require.NoError(t, err)
-
-		pool := memory.NewGoAllocator()
-		records := []arrow.Record{}
-		err = table.View(ctx, func(ctx context.Context, tx uint64) error {
-			return table.Iterator(
-				ctx,
-				tx,
-				pool,
-				[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-					ar.Retain()
-					records = append(records, ar)
-					return nil
-				}},
-			)
-		})
-		require.NoError(t, err)
+	})
+	require.NoError(t, err)
 
-		// Validate returned data
-		rows := int64(0)
-		for _, r := range records {
-			rows += r.NumRows()
-			r.Release()
-		}
-		require.Equal(t, int64(5), rows)
+	// Validate returned data
+	rows := int64(0)
+	for _, r := range records {
+		rows += r.NumRows()
+		r.Release()
 	}
+	require.Equal(t, int64(5), rows)
 
-	t.Run("parquet", func(t *testing.T) {
-		test(t, false)
-	})
-	t.Run("arrow", func(t *testing.T) {
-		test(t, true)
-	})
+	// Perform an aggregate query against the replayed data
+	engine := query.NewEngine(pool, db.TableProvider())
+	err = engine.ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{logicalplan.Sum(logicalplan.Col("value"))},
+			[]logicalplan.Expr{logicalplan.Col("labels.label2")},
+		).
+		Execute(context.Background(), func(_ context.Context, _ arrow.Record) error {
+			return nil
+		})
+	require.NoError(t, err)
 }
 
 func Test_DB_WithStorage(t *testing.T) {
 	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
+		dynparquet.SampleDefinition(),
 	)
 
-	bucket, err := filesystem.NewBucket(t.TempDir())
-	require.NoError(t, err)
-
+	bucket := objstore.NewInMemBucket()
+	sinksource := NewDefaultObjstoreBucket(bucket)
 	logger := newTestLogger(t)
 
 	c, err := New(
 		WithLogger(logger),
-		WithBucketStorage(bucket),
+		WithReadWriteStorage(sinksource),
 	)
 	require.NoError(t, err)
 
@@ -315,9 +291,9 @@ func Test_DB_WithStorage(t *testing.T) {
 
 	samples := dynparquet.Samples{{
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -327,10 +303,10 @@ func Test_DB_WithStorage(t *testing.T) {
 		Value:     1,
 	}, {
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -340,10 +316,10 @@ func Test_DB_WithStorage(t *testing.T) {
 		Value:     2,
 	}, {
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -353,65 +329,104 @@ func Test_DB_WithStorage(t *testing.T) {
 		Value:     3,
 	}}
 
-	buf, err := samples.ToBuffer(table.Schema())
+	r, err := samples.ToRecord()
 	require.NoError(t, err)
 
 	ctx := context.Background()
-	_, err = table.InsertBuffer(ctx, buf)
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	pool := memory.NewGoAllocator()
+	engine := query.NewEngine(pool, db.TableProvider())
+	var inMemory arrow.Record
+	err = engine.ScanTable(t.Name()).
+		Filter(logicalplan.Col("timestamp").GtEq(logicalplan.Literal(2))).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			r.Retain()
+			inMemory = r
+			return nil
+		})
 	require.NoError(t, err)
 
 	// Gracefully close the db to persist blocks
 	c.Close()
 
-	pool := memory.NewGoAllocator()
-	engine := query.NewEngine(pool, db.TableProvider())
+	c, err = New(
+		WithLogger(logger),
+		WithReadWriteStorage(sinksource),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+
+	db, err = c.DB(context.Background(), t.Name())
+	require.NoError(t, err)
+
+	_, err = db.Table(t.Name(), config)
+	require.NoError(t, err)
+
+	engine = query.NewEngine(pool, db.TableProvider())
+	var onDisk arrow.Record
 	err = engine.ScanTable(t.Name()).
 		Filter(logicalplan.Col("timestamp").GtEq(logicalplan.Literal(2))).
-		Execute(context.Background(), func(ctx context.Context, r arrow.Record) error {
-			require.Equal(t, int64(1), r.NumCols())
-			require.Equal(t, int64(2), r.NumRows())
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			r.Retain()
+			onDisk = r
 			return nil
 		})
 	require.NoError(t, err)
+
+	require.True(t, array.RecordEqual(inMemory, onDisk))
+	require.Equal(t, int64(1), onDisk.NumCols())
+	require.Equal(t, int64(2), onDisk.NumRows())
 }
 
-func Test_DB_ColdStart(t *testing.T) {
+func Test_DB_Filter_Block(t *testing.T) {
 	sanitize := func(name string) string {
-		return strings.Replace(name, "/", "-", -1)
+		return strings.ReplaceAll(name, "/", "-")
 	}
 
 	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
+		dynparquet.SampleDefinition(),
 	)
 
-	bucket, err := filesystem.NewBucket(t.TempDir())
-	require.NoError(t, err)
-	t.Cleanup(func() {
-		os.RemoveAll(sanitize(t.Name()))
-	})
-
+	bucket := objstore.NewInMemBucket()
+	sinksource := NewDefaultObjstoreBucket(bucket)
 	logger := newTestLogger(t)
 
 	tests := map[string]struct {
 		newColumnstore func(t *testing.T) *ColumnStore
+		filterExpr     logicalplan.Expr
+		projections    []logicalplan.Expr
+		distinct       []logicalplan.Expr
+		rows           int64
+		cols           int64
 	}{
-		"cold start with storage": {
+		"dynamic projection no optimization": {
+			filterExpr: logicalplan.And(
+				logicalplan.Col("timestamp").GtEq(logicalplan.Literal(2)),
+			),
+			projections: []logicalplan.Expr{logicalplan.DynCol("labels")},
+			rows:        2,
+			cols:        2,
 			newColumnstore: func(t *testing.T) *ColumnStore {
 				c, err := New(
 					WithLogger(logger),
-					WithBucketStorage(bucket),
+					WithReadWriteStorage(sinksource),
 				)
 				require.NoError(t, err)
 				return c
 			},
 		},
-		"cold start with storage and wal": {
+		"distinct": {
+			filterExpr:  nil,
+			distinct:    []logicalplan.Expr{logicalplan.DynCol("labels")},
+			projections: nil,
+			rows:        1,
+			cols:        2,
 			newColumnstore: func(t *testing.T) *ColumnStore {
 				c, err := New(
 					WithLogger(logger),
-					WithBucketStorage(bucket),
-					WithWAL(),
-					WithStoragePath(t.TempDir()),
+					WithReadWriteStorage(sinksource),
 				)
 				require.NoError(t, err)
 				return c
@@ -433,9 +448,9 @@ func Test_DB_ColdStart(t *testing.T) {
 			samples := dynparquet.Samples{
 				{
 					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
+					Labels: map[string]string{
+						"label1": "value1",
+						"label2": "value2",
 					},
 					Stacktrace: []uuid.UUID{
 						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -446,9 +461,9 @@ func Test_DB_ColdStart(t *testing.T) {
 				},
 				{
 					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
+					Labels: map[string]string{
+						"label1": "value1",
+						"label2": "value2",
 					},
 					Stacktrace: []uuid.UUID{
 						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -459,9 +474,9 @@ func Test_DB_ColdStart(t *testing.T) {
 				},
 				{
 					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
+					Labels: map[string]string{
+						"label1": "value1",
+						"label2": "value2",
 					},
 					Stacktrace: []uuid.UUID{
 						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -472,11 +487,11 @@ func Test_DB_ColdStart(t *testing.T) {
 				},
 			}
 
-			buf, err := samples.ToBuffer(table.Schema())
+			r, err := samples.ToRecord()
 			require.NoError(t, err)
 
 			ctx := context.Background()
-			_, err = table.InsertBuffer(ctx, buf)
+			_, err = table.InsertRecord(ctx, r)
 			require.NoError(t, err)
 
 			// Gracefully close the db to persist blocks
@@ -485,7 +500,7 @@ func Test_DB_ColdStart(t *testing.T) {
 			// Open a new database pointed to the same bucket storage
 			c, err = New(
 				WithLogger(logger),
-				WithBucketStorage(bucket),
+				WithReadWriteStorage(sinksource),
 			)
 			require.NoError(t, err)
 			defer c.Close()
@@ -494,288 +509,7 @@ func Test_DB_ColdStart(t *testing.T) {
 			db, err = c.DB(context.Background(), sanitize(t.Name()))
 			require.NoError(t, err)
 
-			pool := memory.NewGoAllocator()
-			engine := query.NewEngine(pool, db.TableProvider())
-			require.NoError(t, engine.ScanTable(sanitize(t.Name())).Execute(
-				context.Background(), func(ctx context.Context, r arrow.Record) error {
-					require.Equal(t, int64(6), r.NumCols())
-					require.Equal(t, int64(3), r.NumRows())
-					return nil
-				},
-			))
-		})
-	}
-}
-
-func Test_DB_ColdStart_MissingColumn(t *testing.T) {
-	schemaDef := &schemapb.Schema{
-		Name: "test",
-		Columns: []*schemapb.Column{
-			{
-				Name: "example_type",
-				StorageLayout: &schemapb.StorageLayout{
-					Type:     schemapb.StorageLayout_TYPE_STRING,
-					Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-				},
-				Dynamic: false,
-			},
-			{
-				Name: "labels",
-				StorageLayout: &schemapb.StorageLayout{
-					Type:     schemapb.StorageLayout_TYPE_STRING,
-					Nullable: true,
-					Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-				},
-				Dynamic: true,
-			},
-			{
-				Name: "pprof_labels",
-				StorageLayout: &schemapb.StorageLayout{
-					Type:     schemapb.StorageLayout_TYPE_STRING,
-					Nullable: true,
-					Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-				},
-				Dynamic: true,
-			},
-		},
-		SortingColumns: []*schemapb.SortingColumn{
-			{
-				Name:      "example_type",
-				Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
-			},
-			{
-				Name:       "labels",
-				Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
-				NullsFirst: true,
-			},
-			{
-				Name:       "pprof_labels",
-				Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
-				NullsFirst: true,
-			},
-		},
-	}
-
-	s, err := dynparquet.SchemaFromDefinition(schemaDef)
-	require.NoError(t, err)
-	config := NewTableConfig(s)
-
-	bucket, err := filesystem.NewBucket(t.TempDir())
-	require.NoError(t, err)
-	t.Cleanup(func() {
-		os.RemoveAll(t.Name())
-	})
-
-	logger := newTestLogger(t)
-
-	c, err := New(
-		WithLogger(logger),
-		WithBucketStorage(bucket),
-	)
-	require.NoError(t, err)
-
-	db, err := c.DB(context.Background(), t.Name())
-	require.NoError(t, err)
-	table, err := db.Table(t.Name(), config)
-	require.NoError(t, err)
-	t.Cleanup(func() {
-		os.RemoveAll(t.Name())
-	})
-
-	buf, err := s.NewBuffer(map[string][]string{
-		"labels": {
-			"label1",
-			"label2",
-		},
-		"pprof_labels": {},
-	})
-	require.NoError(t, err)
-
-	_, err = buf.WriteRows([]parquet.Row{
-		{
-			parquet.ValueOf("test").Level(0, 0, 0),
-			parquet.ValueOf("value1").Level(0, 1, 1),
-			parquet.ValueOf("value1").Level(0, 1, 2),
-		},
-	})
-	require.NoError(t, err)
-
-	ctx := context.Background()
-	_, err = table.InsertBuffer(ctx, buf)
-	require.NoError(t, err)
-
-	// Gracefully close the db to persist blocks
-	c.Close()
-
-	// Open a new database pointed to the same bucket storage
-	c, err = New(
-		WithLogger(logger),
-		WithBucketStorage(bucket),
-	)
-	require.NoError(t, err)
-	defer c.Close()
-
-	// connect to our test db
-	db, err = c.DB(context.Background(), t.Name())
-	require.NoError(t, err)
-
-	// fetch new table
-	table, err = db.Table(t.Name(), config)
-	require.NoError(t, err)
-
-	buf, err = s.NewBuffer(map[string][]string{
-		"labels": {
-			"label1",
-			"label2",
-		},
-		"pprof_labels": {},
-	})
-	require.NoError(t, err)
-
-	_, err = buf.WriteRows([]parquet.Row{
-		{
-			parquet.ValueOf("test").Level(0, 0, 0),
-			parquet.ValueOf("value2").Level(0, 1, 1),
-			parquet.ValueOf("value2").Level(0, 1, 2),
-		},
-	})
-	require.NoError(t, err)
-
-	_, err = table.InsertBuffer(ctx, buf)
-	require.NoError(t, err)
-}
-
-func Test_DB_Filter_Block(t *testing.T) {
-	sanitize := func(name string) string {
-		return strings.Replace(name, "/", "-", -1)
-	}
-
-	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
-	)
-
-	bucket, err := filesystem.NewBucket(t.TempDir())
-	require.NoError(t, err)
-	t.Cleanup(func() {
-		os.RemoveAll(sanitize(t.Name()))
-	})
-
-	logger := newTestLogger(t)
-
-	tests := map[string]struct {
-		newColumnstore func(t *testing.T) *ColumnStore
-		filterExpr     logicalplan.Expr
-		projections    []logicalplan.Expr
-		distinct       []logicalplan.Expr
-		rows           int64
-		cols           int64
-	}{
-		"dynamic projection no optimization": {
-			filterExpr: logicalplan.And(
-				logicalplan.Col("timestamp").GtEq(logicalplan.Literal(2)),
-			),
-			projections: []logicalplan.Expr{logicalplan.DynCol("labels")},
-			rows:        2,
-			cols:        2,
-			newColumnstore: func(t *testing.T) *ColumnStore {
-				c, err := New(
-					WithLogger(logger),
-					WithBucketStorage(bucket),
-				)
-				require.NoError(t, err)
-				return c
-			},
-		},
-		"distinct": {
-			filterExpr:  nil,
-			distinct:    []logicalplan.Expr{logicalplan.DynCol("labels")},
-			projections: nil,
-			rows:        1,
-			cols:        2,
-			newColumnstore: func(t *testing.T) *ColumnStore {
-				c, err := New(
-					WithLogger(logger),
-					WithBucketStorage(bucket),
-				)
-				require.NoError(t, err)
-				return c
-			},
-		},
-	}
-
-	for name, test := range tests {
-		t.Run(name, func(t *testing.T) {
-			c := test.newColumnstore(t)
-			db, err := c.DB(context.Background(), sanitize(t.Name()))
-			require.NoError(t, err)
-			table, err := db.Table(sanitize(t.Name()), config)
-			require.NoError(t, err)
-			t.Cleanup(func() {
-				os.RemoveAll(sanitize(t.Name()))
-			})
-
-			samples := dynparquet.Samples{
-				{
-					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
-					},
-					Stacktrace: []uuid.UUID{
-						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-					},
-					Timestamp: 1,
-					Value:     1,
-				},
-				{
-					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
-					},
-					Stacktrace: []uuid.UUID{
-						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-					},
-					Timestamp: 2,
-					Value:     2,
-				},
-				{
-					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
-					},
-					Stacktrace: []uuid.UUID{
-						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-					},
-					Timestamp: 3,
-					Value:     3,
-				},
-			}
-
-			buf, err := samples.ToBuffer(table.Schema())
-			require.NoError(t, err)
-
-			ctx := context.Background()
-			_, err = table.InsertBuffer(ctx, buf)
-			require.NoError(t, err)
-
-			// Gracefully close the db to persist blocks
-			c.Close()
-
-			// Open a new database pointed to the same bucket storage
-			c, err = New(
-				WithLogger(logger),
-				WithBucketStorage(bucket),
-			)
-			require.NoError(t, err)
-			defer c.Close()
-
-			// connect to our test db
-			db, err = c.DB(context.Background(), sanitize(t.Name()))
+			_, err = db.Table(sanitize(t.Name()), config)
 			require.NoError(t, err)
 
 			engine := query.NewEngine(
@@ -793,7 +527,7 @@ func Test_DB_Filter_Block(t *testing.T) {
 			if test.distinct != nil {
 				query = query.Distinct(test.distinct...)
 			}
-			err = query.Execute(context.Background(), func(ctx context.Context, ar arrow.Record) error {
+			err = query.Execute(context.Background(), func(_ context.Context, ar arrow.Record) error {
 				require.Equal(t, test.rows, ar.NumRows())
 				require.Equal(t, test.cols, ar.NumCols())
 				return nil
@@ -805,18 +539,23 @@ func Test_DB_Filter_Block(t *testing.T) {
 
 // ErrorBucket is an objstore.Bucket implementation that supports error injection.
 type ErrorBucket struct {
-	iter             func(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error
-	get              func(ctx context.Context, name string) (io.ReadCloser, error)
-	getRange         func(ctx context.Context, name string, off, length int64) (io.ReadCloser, error)
-	exists           func(ctx context.Context, name string) (bool, error)
-	isObjNotFoundErr func(err error) bool
-	attributes       func(ctx context.Context, name string) (objstore.ObjectAttributes, error)
+	iter                      func(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error
+	get                       func(ctx context.Context, name string) (io.ReadCloser, error)
+	getRange                  func(ctx context.Context, name string, off, length int64) (io.ReadCloser, error)
+	exists                    func(ctx context.Context, name string) (bool, error)
+	isObjNotFoundErr          func(err error) bool
+	isCustomerManagedKeyError func(err error) bool
+	attributes                func(ctx context.Context, name string) (objstore.ObjectAttributes, error)
 
 	upload func(ctx context.Context, name string, r io.Reader) error
 	delete func(ctx context.Context, name string) error
 	close  func() error
 }
 
+func (e *ErrorBucket) IsAccessDeniedErr(err error) bool {
+	return err != nil
+}
+
 func (e *ErrorBucket) Iter(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error {
 	if e.iter != nil {
 		return e.iter(ctx, dir, f, options...)
@@ -857,6 +596,14 @@ func (e *ErrorBucket) IsObjNotFoundErr(err error) bool {
 	return false
 }
 
+func (e *ErrorBucket) IsCustomerManagedKeyError(err error) bool {
+	if e.isCustomerManagedKeyError != nil {
+		return e.isCustomerManagedKeyError(err)
+	}
+
+	return false
+}
+
 func (e *ErrorBucket) Attributes(ctx context.Context, name string) (objstore.ObjectAttributes, error) {
 	if e.attributes != nil {
 		return e.attributes(ctx, name)
@@ -905,10 +652,11 @@ func Test_DB_OpenError(t *testing.T) {
 			return nil
 		},
 	}
+	sinksource := NewDefaultObjstoreBucket(e)
 
 	c, err := New(
 		WithLogger(logger),
-		WithBucketStorage(e),
+		WithReadWriteStorage(sinksource),
 	)
 	require.NoError(t, err)
 	defer c.Close()
@@ -926,19 +674,15 @@ func Test_DB_OpenError(t *testing.T) {
 
 func Test_DB_Block_Optimization(t *testing.T) {
 	sanitize := func(name string) string {
-		return strings.Replace(name, "/", "-", -1)
+		return strings.ReplaceAll(name, "/", "-")
 	}
 
 	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
+		dynparquet.SampleDefinition(),
 	)
 
-	bucket, err := filesystem.NewBucket(t.TempDir())
-	require.NoError(t, err)
-	t.Cleanup(func() {
-		os.RemoveAll(sanitize(t.Name()))
-	})
-
+	bucket := objstore.NewInMemBucket()
+	sinksource := NewDefaultObjstoreBucket(bucket)
 	logger := newTestLogger(t)
 
 	now := time.Now()
@@ -960,7 +704,7 @@ func Test_DB_Block_Optimization(t *testing.T) {
 			newColumnstore: func(t *testing.T) *ColumnStore {
 				c, err := New(
 					WithLogger(logger),
-					WithBucketStorage(bucket),
+					WithReadWriteStorage(sinksource),
 				)
 				require.NoError(t, err)
 				return c
@@ -974,7 +718,7 @@ func Test_DB_Block_Optimization(t *testing.T) {
 			newColumnstore: func(t *testing.T) *ColumnStore {
 				c, err := New(
 					WithLogger(logger),
-					WithBucketStorage(bucket),
+					WithReadWriteStorage(sinksource),
 				)
 				require.NoError(t, err)
 				return c
@@ -996,9 +740,9 @@ func Test_DB_Block_Optimization(t *testing.T) {
 			samples := dynparquet.Samples{
 				{
 					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
+					Labels: map[string]string{
+						"label1": "value1",
+						"label2": "value2",
 					},
 					Stacktrace: []uuid.UUID{
 						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -1009,9 +753,9 @@ func Test_DB_Block_Optimization(t *testing.T) {
 				},
 				{
 					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
+					Labels: map[string]string{
+						"label1": "value1",
+						"label2": "value2",
 					},
 					Stacktrace: []uuid.UUID{
 						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -1022,9 +766,9 @@ func Test_DB_Block_Optimization(t *testing.T) {
 				},
 				{
 					ExampleType: "test",
-					Labels: []dynparquet.Label{
-						{Name: "label1", Value: "value1"},
-						{Name: "label2", Value: "value2"},
+					Labels: map[string]string{
+						"label1": "value1",
+						"label2": "value2",
 					},
 					Stacktrace: []uuid.UUID{
 						{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -1035,11 +779,11 @@ func Test_DB_Block_Optimization(t *testing.T) {
 				},
 			}
 
-			buf, err := samples.ToBuffer(table.Schema())
+			r, err := samples.ToRecord()
 			require.NoError(t, err)
 
 			ctx := context.Background()
-			_, err = table.InsertBuffer(ctx, buf)
+			_, err = table.InsertRecord(ctx, r)
 			require.NoError(t, err)
 
 			// Gracefully close the db to persist blocks
@@ -1048,7 +792,7 @@ func Test_DB_Block_Optimization(t *testing.T) {
 			// Open a new database pointed to the same bucket storage
 			c, err = New(
 				WithLogger(logger),
-				WithBucketStorage(bucket),
+				WithReadWriteStorage(sinksource),
 			)
 			require.NoError(t, err)
 			defer c.Close()
@@ -1057,6 +801,9 @@ func Test_DB_Block_Optimization(t *testing.T) {
 			db, err = c.DB(context.Background(), sanitize(t.Name()))
 			require.NoError(t, err)
 
+			_, err = db.Table(sanitize(t.Name()), config)
+			require.NoError(t, err)
+
 			engine := query.NewEngine(
 				memory.NewGoAllocator(),
 				db.TableProvider(),
@@ -1074,7 +821,7 @@ func Test_DB_Block_Optimization(t *testing.T) {
 			}
 			rows := int64(0)
 			cols := int64(0)
-			err = query.Execute(context.Background(), func(ctx context.Context, ar arrow.Record) error {
+			err = query.Execute(context.Background(), func(_ context.Context, ar arrow.Record) error {
 				rows += ar.NumRows()
 				cols += ar.NumCols()
 				return nil
@@ -1088,39 +835,12 @@ func Test_DB_Block_Optimization(t *testing.T) {
 
 func Test_DB_TableWrite_FlatSchema(t *testing.T) {
 	ctx := context.Background()
-	flatDefinition := &schemapb.Schema{
-		Name: "test",
-		Columns: []*schemapb.Column{{
-			Name: "example_type",
-			StorageLayout: &schemapb.StorageLayout{
-				Type:     schemapb.StorageLayout_TYPE_STRING,
-				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-			},
-			Dynamic: false,
-		}, {
-			Name: "timestamp",
-			StorageLayout: &schemapb.StorageLayout{
-				Type: schemapb.StorageLayout_TYPE_INT64,
-			},
-			Dynamic: false,
-		}, {
-			Name: "value",
-			StorageLayout: &schemapb.StorageLayout{
-				Type: schemapb.StorageLayout_TYPE_INT64,
-			},
-			Dynamic: false,
-		}},
-		SortingColumns: []*schemapb.SortingColumn{{
-			Name:      "example_type",
-			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
-		}, {
-			Name:      "timestamp",
-			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
-		}},
+
+	type Flat struct {
+		ExampleType string `frostdb:",rle_dict,asc(0)"`
+		Timestamp   int64  `frostdb:",asc(1)"`
+		Value       int64
 	}
-	schema, err := dynparquet.SchemaFromDefinition(flatDefinition)
-	require.NoError(t, err)
-	config := NewTableConfig(schema)
 
 	c, err := New(WithLogger(newTestLogger(t)))
 	require.NoError(t, err)
@@ -1129,20 +849,15 @@ func Test_DB_TableWrite_FlatSchema(t *testing.T) {
 	db, err := c.DB(ctx, "flatschema")
 	require.NoError(t, err)
 
-	table, err := db.Table("test", config)
+	table, err := NewGenericTable[Flat](db, "test", memory.NewGoAllocator())
 	require.NoError(t, err)
+	defer table.Release()
 
-	s := struct {
-		ExampleType string
-		Timestamp   int64
-		Value       int64
-	}{
+	_, err = table.Write(ctx, Flat{
 		ExampleType: "hello-world",
 		Timestamp:   7,
 		Value:       8,
-	}
-
-	_, err = table.Write(ctx, s)
+	})
 	require.NoError(t, err)
 
 	engine := query.NewEngine(
@@ -1150,7 +865,7 @@ func Test_DB_TableWrite_FlatSchema(t *testing.T) {
 		db.TableProvider(),
 	)
 
-	err = engine.ScanTable("test").Execute(ctx, func(ctx context.Context, ar arrow.Record) error {
+	err = engine.ScanTable("test").Execute(ctx, func(_ context.Context, ar arrow.Record) error {
 		require.Equal(t, int64(1), ar.NumRows())
 		require.Equal(t, int64(3), ar.NumCols())
 		return nil
@@ -1160,9 +875,6 @@ func Test_DB_TableWrite_FlatSchema(t *testing.T) {
 
 func Test_DB_TableWrite_DynamicSchema(t *testing.T) {
 	ctx := context.Background()
-	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
-	)
 
 	c, err := New(WithLogger(newTestLogger(t)))
 	require.NoError(t, err)
@@ -1171,17 +883,18 @@ func Test_DB_TableWrite_DynamicSchema(t *testing.T) {
 	db, err := c.DB(ctx, "sampleschema")
 	require.NoError(t, err)
 
-	table, err := db.Table("test", config)
+	table, err := NewGenericTable[dynparquet.Sample](db, "test", memory.NewGoAllocator())
 	require.NoError(t, err)
+	defer table.Release()
 
 	now := time.Now()
 	ts := now.UnixMilli()
 	samples := dynparquet.Samples{
 		{
 			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
+			Labels: map[string]string{
+				"label1": "value1",
+				"label2": "value2",
 			},
 			Stacktrace: []uuid.UUID{
 				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -1192,10 +905,10 @@ func Test_DB_TableWrite_DynamicSchema(t *testing.T) {
 		},
 		{
 			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
-				{Name: "label3", Value: "value3"},
+			Labels: map[string]string{
+				"label1": "value1",
+				"label2": "value2",
+				"label3": "value3",
 			},
 			Stacktrace: []uuid.UUID{
 				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -1206,9 +919,9 @@ func Test_DB_TableWrite_DynamicSchema(t *testing.T) {
 		},
 		{
 			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
+			Labels: map[string]string{
+				"label1": "value1",
+				"label2": "value2",
 			},
 			Stacktrace: []uuid.UUID{
 				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -1227,7 +940,7 @@ func Test_DB_TableWrite_DynamicSchema(t *testing.T) {
 		db.TableProvider(),
 	)
 
-	err = engine.ScanTable("test").Execute(ctx, func(ctx context.Context, ar arrow.Record) error {
+	err = engine.ScanTable("test").Execute(ctx, func(_ context.Context, ar arrow.Record) error {
 		require.Equal(t, int64(3), ar.NumRows())
 		require.Equal(t, int64(7), ar.NumCols())
 		return nil
@@ -1250,137 +963,2389 @@ func Test_DB_TableNotExist(t *testing.T) {
 		db.TableProvider(),
 	)
 
-	err = engine.ScanTable("does-not-exist").Execute(ctx, func(ctx context.Context, ar arrow.Record) error {
+	err = engine.ScanTable("does-not-exist").Execute(ctx, func(_ context.Context, _ arrow.Record) error {
 		return nil
 	})
 	require.Error(t, err)
 }
 
-func TestReplayBackwardsCompatibility(t *testing.T) {
-	ctx := context.Background()
-	const storagePath = "testdata/oldwal"
-	c, err := New(WithWAL(), WithStoragePath(storagePath))
-	require.NoError(t, err)
-	defer c.Close()
-
-	require.NoError(t, c.ReplayWALs(ctx))
-}
-
 func Test_DB_TableWrite_ArrowRecord(t *testing.T) {
-	ctx := context.Background()
-	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
-	)
+	for _, schema := range []proto.Message{
+		dynparquet.SampleDefinition(),
+		dynparquet.PrehashedSampleDefinition(),
+	} {
+		ctx := context.Background()
+		config := NewTableConfig(
+			schema,
+		)
 
-	c, err := New(WithLogger(newTestLogger(t)))
-	require.NoError(t, err)
-	defer c.Close()
+		c, err := New(WithLogger(newTestLogger(t)))
+		require.NoError(t, err)
+		defer c.Close()
 
-	db, err := c.DB(ctx, "sampleschema")
-	require.NoError(t, err)
+		db, err := c.DB(ctx, "sampleschema")
+		require.NoError(t, err)
 
-	table, err := db.Table("test", config)
-	require.NoError(t, err)
+		table, err := db.Table("test", config)
+		require.NoError(t, err)
 
-	samples := dynparquet.Samples{
-		{
-			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
-			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		samples := dynparquet.Samples{
+			{
+				ExampleType: "test",
+				Labels: map[string]string{
+					"label1": "value1",
+					"label2": "value2",
+				},
+				Stacktrace: []uuid.UUID{
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+				},
+				Timestamp: 10,
+				Value:     1,
 			},
-			Timestamp: 10,
-			Value:     1,
+			{
+				ExampleType: "test",
+				Labels: map[string]string{
+					"label1": "value1",
+					"label2": "value2",
+					"label3": "value3",
+				},
+				Stacktrace: []uuid.UUID{
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+				},
+				Timestamp: 11,
+				Value:     2,
+			},
+			{
+				ExampleType: "test",
+				Labels: map[string]string{
+					"label1": "value1",
+					"label2": "value2",
+				},
+				Stacktrace: []uuid.UUID{
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+				},
+				Timestamp: 12,
+				Value:     3,
+			},
+		}
+
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+
+		engine := query.NewEngine(
+			memory.NewGoAllocator(),
+			db.TableProvider(),
+		)
+
+		tests := map[string]struct {
+			filter   logicalplan.Expr
+			distinct logicalplan.Expr
+			rows     int64
+			cols     int64
+		}{
+			"none": {
+				rows: 3,
+				cols: 7,
+			},
+			"timestamp filter": {
+				filter: logicalplan.Col("timestamp").GtEq(logicalplan.Literal(12)),
+				rows:   1,
+				cols:   1,
+			},
+			"distinct": {
+				distinct: logicalplan.DynCol("labels"),
+				rows:     2,
+				cols:     3,
+			},
+		}
+
+		for name, test := range tests {
+			t.Run(name, func(t *testing.T) {
+				bldr := engine.ScanTable("test")
+				if test.filter != nil {
+					bldr = bldr.Filter(test.filter)
+				}
+				if test.distinct != nil {
+					bldr = bldr.Distinct(test.distinct)
+				}
+				err = bldr.Execute(ctx, func(_ context.Context, ar arrow.Record) error {
+					require.Equal(t, test.rows, ar.NumRows())
+					require.Equal(t, test.cols, ar.NumCols())
+					return nil
+				})
+				require.NoError(t, err)
+			})
+		}
+	}
+}
+
+func Test_DB_ReadOnlyQuery(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+
+	logger := newTestLogger(t)
+
+	dir := t.TempDir()
+	bucket := objstore.NewInMemBucket()
+	sinksource := NewDefaultObjstoreBucket(bucket)
+
+	c, err := New(
+		WithLogger(logger),
+		WithWAL(),
+		WithStoragePath(dir),
+		WithReadWriteStorage(sinksource),
+		WithActiveMemorySize(100*KiB),
+	)
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+
+	ctx := context.Background()
+	for i := 0; i < 100; i++ {
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	require.NoError(t, table.EnsureCompaction())
+	require.NoError(t, c.Close())
+
+	c, err = New(
+		WithLogger(logger),
+		WithWAL(),
+		WithStoragePath(dir),
+		WithReadWriteStorage(sinksource),
+		WithActiveMemorySize(100*KiB),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+
+	// Query with an aggregat query
+	pool := memory.NewGoAllocator()
+	engine := query.NewEngine(pool, db.TableProvider())
+	err = engine.ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{logicalplan.Sum(logicalplan.Col("value"))},
+			[]logicalplan.Expr{logicalplan.Col("labels.label2")},
+		).
+		Execute(context.Background(), func(_ context.Context, _ arrow.Record) error {
+			return nil
+		})
+	require.NoError(t, err)
+}
+
+// TestDBRecover verifies correct DB recovery with both a WAL and snapshots as
+// well as a block rotation (in which case no duplicate data should be in the
+// database).
+func TestDBRecover(t *testing.T) {
+	ctx := context.Background()
+	const (
+		dbAndTableName = "test"
+		numInserts     = 3
+	)
+	setup := func(t *testing.T, blockRotation bool, options ...Option) string {
+		dir := t.TempDir()
+		c, err := New(
+			append([]Option{
+				WithLogger(newTestLogger(t)),
+				WithStoragePath(dir),
+				WithWAL(),
+				WithSnapshotTriggerSize(1),
+				// Disable reclaiming disk space on snapshot (i.e. deleting
+				// old snapshots and WAL). This allows us to modify on-disk
+				// state for some tests.
+				WithTestingOptions(WithTestingNoDiskSpaceReclaimOnSnapshot()),
+			},
+				options...,
+			)...,
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+		schema := dynparquet.SampleDefinition()
+		table, err := db.Table(dbAndTableName, NewTableConfig(schema))
+		require.NoError(t, err)
+
+		// Insert 3 txns.
+		for i := 0; i < numInserts; i++ {
+			samples := dynparquet.NewTestSamples()
+			for j := range samples {
+				samples[j].Timestamp = int64(i)
+			}
+			r, err := samples.ToRecord()
+			require.NoError(t, err)
+			_, err = table.InsertRecord(ctx, r)
+			require.NoError(t, err)
+			// Wait until a snapshot is written for each write (it is the txn
+			// immediately preceding the write). This has to be done in a loop,
+			// otherwise writes may not cause a snapshot given that there
+			// might be a snapshot in progress.
+			if i > 0 {
+				require.Eventually(t, func() bool {
+					files, err := os.ReadDir(db.snapshotsDir())
+					if err != nil {
+						return false
+					}
+					return len(files) == i && !db.snapshotInProgress.Load()
+				}, 30*time.Second, 100*time.Millisecond)
+			}
+		}
+		// At this point, there should be 2 snapshots. One was triggered before
+		// the second write, and the second was triggered before the third write.
+		if blockRotation {
+			// A block rotation should trigger the third snapshot.
+			require.NoError(t, table.RotateBlock(ctx, table.ActiveBlock()))
+			// Wait for the snapshot to complete
+			require.Eventually(t, func() bool {
+				files, err := os.ReadDir(db.snapshotsDir())
+				if err != nil {
+					return false
+				}
+				return len(files) == 3 && !db.snapshotInProgress.Load()
+			}, 30*time.Second, 100*time.Millisecond)
+		}
+
+		// Verify that there are now 3 snapshots and their txns.
+		files, err := os.ReadDir(db.snapshotsDir())
+		require.NoError(t, err)
+		snapshotTxns := make([]uint64, 0, len(files))
+		for _, f := range files {
+			tx, err := getTxFromSnapshotFileName(f.Name())
+			require.NoError(t, err)
+			snapshotTxns = append(snapshotTxns, tx)
+		}
+		expectedSnapshots := []uint64{3, 5}
+		if blockRotation {
+			expectedSnapshots = append(expectedSnapshots, 8)
+		}
+		require.Equal(t, expectedSnapshots, snapshotTxns)
+		return dir
+	}
+
+	t.Run("BlockRotation", func(t *testing.T) {
+		dir := setup(t, true)
+		c, err := New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+			WithSnapshotTriggerSize(1),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+
+		engine := query.NewEngine(memory.DefaultAllocator, db.TableProvider())
+		nrows := 0
+		require.NoError(t, engine.ScanTable(dbAndTableName).
+			Distinct(logicalplan.Col("timestamp")).
+			Execute(
+				ctx,
+				func(_ context.Context, r arrow.Record) error {
+					nrows += int(r.NumRows())
+					return nil
+				}))
+		// No more timestamps if querying in-memory only, since the data has
+		// been rotated.
+		require.Equal(t, 0, nrows)
+	})
+
+	// The ability to write and expect a WAL record to be logged is vital on
+	// database recovery. If it is not the case, writing to the WAL will be
+	// stuck.
+	newWriteAndExpectWALRecord := func(t *testing.T, db *DB, table *Table) {
+		t.Helper()
+		samples := dynparquet.NewTestSamples()
+		for i := range samples {
+			samples[i].Timestamp = numInserts
+		}
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+
+		writeTx, err := table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+
+		require.Eventually(t, func() bool {
+			lastIndex, err := db.wal.LastIndex()
+			require.NoError(t, err)
+			return lastIndex >= writeTx
+		}, time.Second, 10*time.Millisecond)
+	}
+
+	// Ensure that the WAL is written to after loading from a snapshot. This
+	// tests a regression detailed in:
+	// https://github.com/polarsignals/frostdb/issues/390
+	t.Run("Issue390", func(t *testing.T) {
+		dir := setup(t, false)
+		c, err := New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+			WithSnapshotTriggerSize(1),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+		table, err := db.GetTable(dbAndTableName)
+		require.NoError(t, err)
+		newWriteAndExpectWALRecord(t, db, table)
+	})
+
+	// OutOfDateSnapshots verifies a scenario in which the WAL has records with
+	// higher txns than the latest snapshot
+	t.Run("OutOfDateSnapshots", func(t *testing.T) {
+		dir := setup(t, false)
+
+		snapshotsPath := filepath.Join(dir, "databases", dbAndTableName, "snapshots")
+		// Since we snapshot on close, the latest snapshot might not have been
+		// written yet.
+		var files []os.DirEntry
+		require.Eventually(t, func() bool {
+			var err error
+			files, err = os.ReadDir(snapshotsPath)
+			require.NoError(t, err)
+			return len(files) == 3
+		}, 1*time.Second, 100*time.Millisecond)
+		require.NoError(t, os.RemoveAll(filepath.Join(snapshotsPath, files[len(files)-1].Name())))
+		files, err := os.ReadDir(snapshotsPath)
+		require.NoError(t, err)
+		require.Equal(t, 2, len(files))
+
+		c, err := New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+			WithSnapshotTriggerSize(1),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+	})
+
+	// WithBucket ensures normal behavior of recovery in case of graceful
+	// shutdown of a column store with bucket storage.
+	t.Run("WithBucket", func(t *testing.T) {
+		bucket := objstore.NewInMemBucket()
+		sinksource := NewDefaultObjstoreBucket(bucket)
+		dir := setup(t, true, WithReadWriteStorage(sinksource))
+
+		// The previous wal and snapshots directories should be empty since data
+		// is persisted on Close, rendering the directories useless.
+		databasesDir := filepath.Join(dir, "databases")
+		entries, err := os.ReadDir(databasesDir)
+		require.NoError(t, err)
+		for _, e := range entries {
+			dbEntries, err := os.ReadDir(filepath.Join(databasesDir, e.Name()))
+			require.NoError(t, err)
+			if len(dbEntries) > 0 {
+				entryNames := make([]string, 0, len(dbEntries))
+				for _, e := range dbEntries {
+					entryNames = append(entryNames, e.Name())
+				}
+				t.Fatalf("expected an empty dir but found the following entries: %v", entryNames)
+			}
+		}
+
+		c, err := New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+			WithSnapshotTriggerSize(1),
+			WithReadWriteStorage(sinksource),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+		table, err := db.Table(dbAndTableName, NewTableConfig(dynparquet.SampleDefinition()))
+		require.NoError(t, err)
+		newWriteAndExpectWALRecord(t, db, table)
+	})
+
+	// SnapshotOnRecovery verifies that a snapshot is taken on recovery if the
+	// WAL indicates that a block was rotated but no snapshot was taken.
+	t.Run("SnapshotOnRecovery", func(t *testing.T) {
+		dir := setup(t, false)
+		c, err := New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+			// This option will disable snapshots on block rotation.
+			WithSnapshotTriggerSize(0),
+		)
+		require.NoError(t, err)
+
+		snapshotsPath := filepath.Join(dir, "databases", dbAndTableName, "snapshots")
+		snapshots, err := os.ReadDir(snapshotsPath)
+		require.NoError(t, err)
+
+		seenSnapshots := make(map[string]struct{})
+		for _, s := range snapshots {
+			seenSnapshots[s.Name()] = struct{}{}
+		}
+
+		db, err := c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+		table, err := db.GetTable(dbAndTableName)
+		require.NoError(t, err)
+
+		require.NoError(t, table.RotateBlock(ctx, table.ActiveBlock()))
+
+		rec, err := dynparquet.NewTestSamples().ToRecord()
+		require.NoError(t, err)
+
+		insertTx, err := table.InsertRecord(ctx, rec)
+		require.NoError(t, err)
+
+		// RotateBlock again, this should log a couple of persisted block WAL
+		// entries.
+		require.NoError(t, table.RotateBlock(ctx, table.ActiveBlock()))
+		require.NoError(t, c.Close())
+
+		c, err = New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+			// Enable snapshots.
+			WithSnapshotTriggerSize(1),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		snapshots, err = os.ReadDir(snapshotsPath)
+		require.NoError(t, err)
+
+		for _, s := range snapshots {
+			tx, err := getTxFromSnapshotFileName(s.Name())
+			require.NoError(t, err)
+			require.GreaterOrEqual(t, tx, insertTx, "expected only snapshots after insert txn")
+		}
+		db, err = c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+
+		require.Eventually(t, func() bool {
+			numBlockPersists := 0
+			require.NoError(t, db.wal.Replay(0, func(_ uint64, entry *walpb.Record) error {
+				if _, ok := entry.Entry.EntryType.(*walpb.Entry_TableBlockPersisted_); ok {
+					numBlockPersists++
+				}
+				return nil
+			}))
+			return numBlockPersists <= 1
+		}, 1*time.Second, 10*time.Millisecond,
+			"expected at most one block persist entry; the others should have been snapshot and truncated",
+		)
+	})
+
+	// This test is a regression test to verify that writes completed during
+	// a block rotation are not lost on recovery.
+	t.Run("RotationDoesntDropWrites", func(t *testing.T) {
+		dir := setup(t, false)
+		c, err := New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+		)
+		require.NoError(t, err)
+
+		db, err := c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+		table, err := db.GetTable(dbAndTableName)
+		require.NoError(t, err)
+
+		// Simulate starting a write against the active block, this will block
+		// persistence until this write is finished.
+		block, finish, err := table.ActiveWriteBlock()
+		require.NoError(t, err)
+
+		// Rotate the block to create a new active block.
+		require.NoError(t, table.RotateBlock(ctx, block))
+
+		// Issue writes.
+		const nWrites = 5
+		expectedTimestamps := make(map[int64]struct{}, nWrites)
+		for i := 0; i < nWrites; i++ {
+			samples := dynparquet.NewTestSamples()
+			timestamp := rand.Int63()
+			for j := range samples {
+				samples[j].Timestamp = timestamp
+			}
+			r, err := samples.ToRecord()
+			require.NoError(t, err)
+			_, err = table.InsertRecord(ctx, r)
+			require.NoError(t, err)
+			expectedTimestamps[timestamp] = struct{}{}
+		}
+
+		// Finalize the block persistence and close the DB.
+		finish()
+
+		require.NoError(t, c.Close())
+		c, err = New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err = c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+
+		require.NoError(
+			t,
+			query.NewEngine(
+				memory.DefaultAllocator, db.TableProvider(),
+			).ScanTable(dbAndTableName).Execute(ctx, func(_ context.Context, r arrow.Record) error {
+				idxs := r.Schema().FieldIndices("timestamp")
+				require.Len(t, idxs, 1)
+				tCol := r.Column(idxs[0]).(*array.Int64)
+				for i := 0; i < tCol.Len(); i++ {
+					delete(expectedTimestamps, tCol.Value(i))
+				}
+				return nil
+			}),
+		)
+		require.Len(t, expectedTimestamps, 0, "expected to see all timestamps on recovery, but could not find %v", expectedTimestamps)
+	})
+
+	// This is a regression test for a bug found by DST that causes duplicate
+	// writes on recovery due to an off-by-one error in WAL truncation after
+	// a snapshot (WAL includes a write that is also in the snapshot).
+	t.Run("NoDuplicateWrites", func(t *testing.T) {
+		dir := setup(t, false)
+		c, err := New(
+			WithLogger(newTestLogger(t)),
+			WithStoragePath(dir),
+			WithWAL(),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+
+		// This is deduced based on the fact that `setup` inserts NewTestSamples
+		// numInserts times.
+		expectedRowsPerTimestamp := len(dynparquet.NewTestSamples())
+
+		timestamps := make(map[int64]int, numInserts)
+		require.NoError(
+			t,
+			query.NewEngine(
+				memory.DefaultAllocator,
+				db.TableProvider(),
+			).ScanTable(dbAndTableName).Execute(ctx, func(_ context.Context, r arrow.Record) error {
+				idxs := r.Schema().FieldIndices("timestamp")
+				require.Len(t, idxs, 1)
+				tCol := r.Column(idxs[0]).(*array.Int64)
+				for i := 0; i < tCol.Len(); i++ {
+					timestamps[tCol.Value(i)]++
+				}
+				return nil
+			}),
+		)
+		require.Len(t, timestamps, numInserts, "expected %d timestamps, but got %d", numInserts, len(timestamps))
+		for ts, occurrences := range timestamps {
+			require.Equal(t, expectedRowsPerTimestamp, occurrences, "expected %d rows for timestamp %d, but got %d", expectedRowsPerTimestamp, ts, occurrences)
+		}
+	})
+}
+
+func Test_DB_WalReplayTableConfig(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+		WithRowGroupSize(10),
+	)
+
+	logger := newTestLogger(t)
+
+	dir := t.TempDir()
+
+	c, err := New(
+		WithLogger(logger),
+		WithWAL(),
+		WithStoragePath(dir),
+	)
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+	require.Equal(t, uint64(10), table.config.Load().RowGroupSize)
+
+	samples := dynparquet.NewTestSamples()
+
+	ctx := context.Background()
+	for i := 0; i < 100; i++ {
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	require.NoError(t, c.Close())
+
+	c, err = New(
+		WithLogger(logger),
+		WithWAL(),
+		WithStoragePath(dir),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+
+	db, err = c.DB(ctx, "test")
+	require.NoError(t, err)
+
+	table, err = db.GetTable("test")
+	require.NoError(t, err)
+	require.Equal(t, uint64(10), table.config.Load().RowGroupSize)
+}
+
+func TestDBMinTXPersisted(t *testing.T) {
+	ctx := context.Background()
+	c, err := New()
+	require.NoError(t, err)
+	defer c.Close()
+
+	db, err := c.DB(ctx, "test")
+	require.NoError(t, err)
+
+	schema := dynparquet.SampleDefinition()
+	table, err := db.Table("test", NewTableConfig(schema))
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+	writeTx, err := table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	require.NoError(t, table.RotateBlock(ctx, table.ActiveBlock()))
+	// Writing the block is asynchronous, so wait for both the new table block
+	// txn and the block persistence txn.
+	db.Wait(writeTx + 2)
+
+	require.Equal(t, uint64(1), db.getMinTXPersisted())
+
+	_, err = db.Table("other", NewTableConfig(schema))
+	require.NoError(t, err)
+
+	require.Equal(t, uint64(0), db.getMinTXPersisted())
+}
+
+// TestReplayBackwardsCompatibility is a test that verifies that new versions of
+// the code gracefully handle old versions of the WAL. If this test fails, it
+// is likely that production code will break unless old WAL files are cleaned
+// up.
+// If it is expected that this test will fail, update testdata/oldwal with the
+// new WAL files but make sure to delete old WAL files in production before
+// deploying new code.
+func TestReplayBackwardsCompatibility(t *testing.T) {
+	const storagePath = "testdata/oldwal"
+	c, err := New(WithWAL(), WithStoragePath(storagePath))
+	require.NoError(t, err)
+	defer c.Close()
+}
+
+func Test_DB_Limiter(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+
+	c, err := New(
+		WithLogger(newTestLogger(t)),
+	)
+	defer func() {
+		_ = c.Close()
+	}()
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+
+	ctx := context.Background()
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	for i := 0; i < 1024; i++ {
+		t.Run(fmt.Sprintf("limit-%v", i), func(t *testing.T) {
+			debug := memory.NewCheckedAllocator(memory.DefaultAllocator)
+			defer debug.AssertSize(t, 0)
+			pool := query.NewLimitAllocator(int64(i), debug)
+			engine := query.NewEngine(pool, db.TableProvider())
+			err = engine.ScanTable("test").
+				Filter(
+					logicalplan.And(
+						logicalplan.Col("labels.namespace").Eq(logicalplan.Literal("default")),
+					),
+				).
+				Aggregate(
+					[]*logicalplan.AggregationFunction{logicalplan.Sum(logicalplan.Col("value"))},
+					[]logicalplan.Expr{logicalplan.Col("labels.namespace")},
+				).
+				Execute(context.Background(), func(_ context.Context, _ arrow.Record) error {
+					return nil
+				})
+		})
+	}
+}
+
+// DropStorage ensures that a database can continue on after drop storage is called.
+func Test_DB_DropStorage(t *testing.T) {
+	logger := newTestLogger(t)
+	ctx := context.Background()
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+
+	dir := t.TempDir()
+
+	// Use an actual prometheus registry to test duplicate metrics
+	// registration.
+	c, err := New(
+		WithLogger(logger),
+		WithRegistry(prometheus.NewRegistry()),
+		WithWAL(),
+		WithStoragePath(dir),
+		WithActiveMemorySize(1*MiB),
+	)
+	defer func() {
+		_ = c.Close()
+	}()
+	require.NoError(t, err)
+	const dbAndTableName = "test"
+	db, err := c.DB(ctx, dbAndTableName)
+	require.NoError(t, err)
+	table, err := db.Table(dbAndTableName, config)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+
+	for i := 0; i < 100; i++ {
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		defer r.Release()
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	countRows := func(expected int) {
+		rows := 0
+		engine := query.NewEngine(mem, db.TableProvider())
+		err = engine.ScanTable(dbAndTableName).
+			Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+				rows += int(r.NumRows())
+				return nil
+			})
+		require.NoError(t, err)
+		require.Equal(t, expected, rows)
+	}
+	countRows(300)
+
+	level.Debug(logger).Log("msg", "dropping storage")
+	require.NoError(t, c.DropDB(dbAndTableName))
+
+	// Getting without creating a DB should return an error.
+	_, err = c.GetDB(dbAndTableName)
+	require.Error(t, err)
+
+	// Opening a new DB with the same name should be fine.
+	db, err = c.DB(ctx, dbAndTableName)
+	require.NoError(t, err)
+	// A table as well.
+	table, err = db.Table(dbAndTableName, config)
+	require.NoError(t, err)
+
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+	defer r.Release()
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+	// Expect a three rows in the table.
+	countRows(3)
+
+	// Dropping twice should be valid as well.
+	require.NoError(t, c.DropDB(dbAndTableName))
+
+	// Open a new store against the dropped storage, and expect empty db.
+	c, err = New(
+		WithLogger(logger),
+		WithWAL(),
+		WithStoragePath(dir),
+		WithActiveMemorySize(1*MiB),
+	)
+	defer func() {
+		_ = c.Close()
+	}()
+	require.NoError(t, err)
+	level.Debug(logger).Log("msg", "opening new db")
+	db, err = c.DB(ctx, dbAndTableName)
+	require.NoError(t, err)
+	_, err = db.Table(dbAndTableName, config)
+	require.NoError(t, err)
+	countRows(0)
+}
+
+func Test_DB_EngineInMemory(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+
+	logger := newTestLogger(t)
+
+	dir := t.TempDir()
+	bucket := objstore.NewInMemBucket()
+
+	sinksource := NewDefaultObjstoreBucket(bucket)
+
+	c, err := New(
+		WithLogger(logger),
+		WithStoragePath(dir),
+		WithReadWriteStorage(sinksource),
+		WithActiveMemorySize(100*1024),
+	)
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+
+	ctx := context.Background()
+	for i := 0; i < 100; i++ {
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	require.NoError(t, c.Close())
+
+	c, err = New(
+		WithLogger(logger),
+		WithStoragePath(dir),
+		WithReadWriteStorage(sinksource),
+		WithActiveMemorySize(100*1024),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+
+	db, err = c.DB(context.Background(), "test")
+	require.NoError(t, err)
+
+	_, err = db.Table("test", config)
+	require.NoError(t, err)
+
+	pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer pool.AssertSize(t, 0)
+	engine := query.NewEngine(pool, db.TableProvider(), query.WithPhysicalplanOptions(physicalplan.WithReadMode(logicalplan.ReadModeInMemoryOnly)))
+	err = engine.ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{logicalplan.Sum(logicalplan.Col("value"))},
+			[]logicalplan.Expr{logicalplan.Col("labels.namespace")},
+		).
+		Execute(context.Background(), func(_ context.Context, _ arrow.Record) error {
+			t.FailNow() // should not be called
+			return nil
+		})
+	require.NoError(t, err)
+
+	engine = query.NewEngine(pool, db.TableProvider())
+	err = engine.ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{logicalplan.Sum(logicalplan.Col("value"))},
+			[]logicalplan.Expr{logicalplan.Col("labels.namespace")},
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			require.Equal(t, int64(2), r.NumRows())
+			return nil
+		})
+	require.NoError(t, err)
+}
+
+func Test_DB_SnapshotOnClose(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+
+	logger := newTestLogger(t)
+	dir := t.TempDir()
+
+	c, err := New(
+		WithLogger(logger),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithActiveMemorySize(1*GiB),
+		WithSnapshotTriggerSize(1*GiB),
+		WithManualBlockRotation(),
+	)
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+
+	ctx := context.Background()
+	for i := 0; i < 100; i++ {
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	require.NoError(t, c.Close())
+
+	// Check that we have a snapshot
+	found := false
+	require.NoError(t, filepath.WalkDir(dir, func(path string, _ fs.DirEntry, _ error) error {
+		if filepath.Ext(path) == ".fdbs" {
+			found = true
+		}
+		return nil
+	}))
+	require.True(t, found)
+}
+
+func Test_DB_All(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+
+	logger := newTestLogger(t)
+
+	c, err := New(
+		WithLogger(logger),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+
+	db, err := c.DB(context.Background(), t.Name())
+	require.NoError(t, err)
+	defer os.RemoveAll(t.Name())
+	table, err := db.Table(t.Name(), config)
+	require.NoError(t, err)
+
+	samples := dynparquet.Samples{{
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 1,
+		Value:     1,
+	}, {
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 2,
+		Value:     2,
+	}, {
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 3,
+		Value:     3,
+	}}
+
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	pool := memory.NewGoAllocator()
+	engine := query.NewEngine(pool, db.TableProvider())
+	err = engine.ScanTable(t.Name()).
+		Project(logicalplan.All()).
+		Filter(logicalplan.Col("timestamp").GtEq(logicalplan.Literal(2))).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			require.Equal(t, int64(2), r.NumRows())
+			require.Equal(t, int64(8), r.NumCols())
+			return nil
+		})
+	require.NoError(t, err)
+}
+
+func Test_DB_PrehashedStorage(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.PrehashedSampleDefinition(),
+	)
+
+	bucket := objstore.NewInMemBucket()
+	sinksource := NewDefaultObjstoreBucket(bucket)
+	logger := newTestLogger(t)
+
+	c, err := New(
+		WithLogger(logger),
+		WithReadWriteStorage(sinksource),
+	)
+	require.NoError(t, err)
+
+	db, err := c.DB(context.Background(), t.Name())
+	require.NoError(t, err)
+	defer os.RemoveAll(t.Name())
+	table, err := db.Table(t.Name(), config)
+	require.NoError(t, err)
+
+	samples := dynparquet.Samples{{
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 1,
+		Value:     1,
+	}, {
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 2,
+		Value:     2,
+	}, {
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 3,
+		Value:     3,
+	}}
+
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	// Gracefully close the db to persist blocks
+	c.Close()
+
+	c, err = New(
+		WithLogger(logger),
+		WithReadWriteStorage(sinksource),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+
+	db, err = c.DB(context.Background(), t.Name())
+	require.NoError(t, err)
+	table, err = db.Table(t.Name(), config)
+	require.NoError(t, err)
+
+	// Read the raw data back and expect prehashed columns to be returned
+	allocator := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer allocator.AssertSize(t, 0)
+	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
+		return table.Iterator(
+			ctx,
+			tx,
+			allocator,
+			[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
+				require.Equal(t, int64(3), ar.NumRows())
+				require.Equal(t, int64(13), ar.NumCols())
+				return nil
+			}},
+		)
+	})
+	require.NoError(t, err)
+}
+
+// TestDBConcurrentOpen verifies that concurrent calls to open a DB do not
+// result in a panic (most likely due to duplicate metrics registration).
+func TestDBConcurrentOpen(t *testing.T) {
+	const (
+		concurrency = 16
+		dbName      = "test"
+	)
+
+	bucket := objstore.NewInMemBucket()
+	sinksource := NewDefaultObjstoreBucket(bucket)
+	logger := newTestLogger(t)
+	tempDir := t.TempDir()
+
+	c, err := New(
+		WithLogger(logger),
+		WithReadWriteStorage(sinksource),
+		WithWAL(),
+		WithStoragePath(tempDir),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+
+	var errg errgroup.Group
+	for i := 0; i < concurrency; i++ {
+		errg.Go(func() error {
+			return recovery.Do(func() error {
+				_, err := c.DB(context.Background(), dbName)
+				return err
+			})()
+		})
+	}
+	require.NoError(t, errg.Wait())
+}
+
+func Test_DB_WithParquetDiskCompaction(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+
+	logger := newTestLogger(t)
+
+	cfg := DefaultIndexConfig()
+	cfg[0].Type = index.CompactionTypeParquetDisk // Create disk compaction
+	cfg[1].Type = index.CompactionTypeParquetDisk
+	c, err := New(
+		WithLogger(logger),
+		WithIndexConfig(cfg),
+		WithWAL(),
+		WithStoragePath(t.TempDir()),
+	)
+	require.NoError(t, err)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+
+	ctx := context.Background()
+	for i := 0; i < 100; i++ {
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	require.NoError(t, table.EnsureCompaction())
+
+	// Ensure that disk compacted data can be recovered
+	pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer pool.AssertSize(t, 0)
+	rows := int64(0)
+	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
+		return table.Iterator(
+			ctx,
+			tx,
+			pool,
+			[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
+				rows += ar.NumRows()
+				return nil
+			}},
+		)
+	})
+	require.NoError(t, err)
+	require.Equal(t, int64(300), rows)
+}
+
+func Test_DB_PersistentDiskCompaction(t *testing.T) {
+	t.Parallel()
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+	logger := newTestLogger(t)
+
+	type write struct {
+		n                int
+		ensureCompaction bool
+		op               func(table *Table, dir string)
+	}
+
+	defaultWrites := []write{
+		{
+			n:                100,
+			ensureCompaction: true,
 		},
 		{
-			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
-				{Name: "label3", Value: "value3"},
+			n:                100,
+			ensureCompaction: true,
+		},
+		{
+			n:                100,
+			ensureCompaction: true,
+		},
+		{
+			n: 100,
+		},
+	}
+
+	tests := map[string]struct {
+		beforeReplay func(table *Table, dir string)
+		beforeClose  func(table *Table, dir string)
+		lvl2Parts    int
+		lvl1Parts    int
+		finalRows    int64
+		writes       []write
+	}{
+		"happy path": {
+			beforeReplay: func(_ *Table, _ string) {},
+			beforeClose:  func(_ *Table, _ string) {},
+			lvl2Parts:    3,
+			lvl1Parts:    1,
+			finalRows:    1200,
+			writes:       defaultWrites,
+		},
+		"corrupted L1": {
+			beforeClose: func(_ *Table, _ string) {},
+			lvl2Parts:   3,
+			lvl1Parts:   1,
+			finalRows:   1200,
+			writes:      defaultWrites,
+			beforeReplay: func(table *Table, _ string) {
+				// Corrupt the LSM file; this should trigger a recovery from the WAL
+				l1Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L1")
+				levelFile := filepath.Join(l1Dir, fmt.Sprintf("00000000000000000000%s", index.IndexFileExtension))
+				info, err := os.Stat(levelFile)
+				require.NoError(t, err)
+				require.NoError(t, os.Truncate(levelFile, info.Size()-1)) // truncate the last byte to pretend the write didn't finish
 			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		"corrupted L2": {
+			beforeClose: func(_ *Table, _ string) {},
+			lvl2Parts:   4,
+			lvl1Parts:   0,
+			finalRows:   1200,
+			beforeReplay: func(table *Table, _ string) {
+				// Corrupt the LSM file; this should trigger a recovery from the WAL
+				l2Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L2")
+				levelFile := filepath.Join(l2Dir, fmt.Sprintf("00000000000000000000%s", index.IndexFileExtension))
+				info, err := os.Stat(levelFile)
+				require.NoError(t, err)
+				require.NoError(t, os.Truncate(levelFile, info.Size()-1)) // truncate the last byte to pretend the write didn't finish
+			},
+			writes: []write{
+				{
+					n:                100,
+					ensureCompaction: true,
+				},
+				{
+					n:                100,
+					ensureCompaction: true,
+				},
+				{
+					n:                100,
+					ensureCompaction: true,
+				},
+				{
+					n:                100,
+					ensureCompaction: true,
+				},
 			},
-			Timestamp: 11,
-			Value:     2,
 		},
-		{
-			ExampleType: "test",
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
+		"L1 cleanup failure with corruption": { // Compaction from L1->L2 happens, but L1 is not cleaned up and L2 gets corrupted
+			beforeClose: func(_ *Table, _ string) {},
+			lvl2Parts:   1,
+			lvl1Parts:   0,
+			finalRows:   300,
+			beforeReplay: func(table *Table, _ string) {
+				// Copy the L2 file back to L1 to simulate that it was not cleaned up
+				l2Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L2")
+				l2File := filepath.Join(l2Dir, fmt.Sprintf("00000000000000000000%s", index.IndexFileExtension))
+				l2, err := os.Open(l2File)
+				require.NoError(t, err)
+
+				l1Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L1")
+				l1File := filepath.Join(l1Dir, fmt.Sprintf("00000000000000000000%s", index.IndexFileExtension))
+				l1, err := os.Create(l1File)
+				require.NoError(t, err)
+
+				_, err = io.Copy(l1, l2)
+				require.NoError(t, err)
+				require.NoError(t, l2.Close())
+				require.NoError(t, l1.Close())
+
+				// Corrupt the L2 file
+				info, err := os.Stat(l2File)
+				require.NoError(t, err)
+				require.NoError(t, os.Truncate(l2File, info.Size()-1)) // truncate the last byte to pretend the write didn't finish
 			},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+			writes: []write{
+				{
+					n:                100,
+					ensureCompaction: true,
+				},
+			},
+		},
+		"L1 cleanup failure": { // Compaction from L1->L2 happens, but L1 is not cleaned up
+			beforeClose: func(_ *Table, _ string) {},
+			lvl2Parts:   1,
+			lvl1Parts:   0,
+			finalRows:   300,
+			beforeReplay: func(table *Table, _ string) {
+				// Copy the L2 file back to L1 to simulate that it was not cleaned up
+				l2Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L2")
+				l2File := filepath.Join(l2Dir, fmt.Sprintf("00000000000000000000%s", index.IndexFileExtension))
+				l2, err := os.Open(l2File)
+				require.NoError(t, err)
+
+				l1Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L1")
+				l1File := filepath.Join(l1Dir, fmt.Sprintf("00000000000000000000%s", index.IndexFileExtension))
+				l1, err := os.Create(l1File)
+				require.NoError(t, err)
+
+				_, err = io.Copy(l1, l2)
+				require.NoError(t, err)
+				require.NoError(t, l2.Close())
+				require.NoError(t, l1.Close())
+			},
+			writes: []write{
+				{
+					n:                100,
+					ensureCompaction: true,
+				},
+			},
+		},
+		"L1 cleanup failure with corruption after snapshot": { // Snapshot happens after L1 compaction; Then compaction happens from L1->L2, but L1 is not cleaned up and L2 gets corrupted
+			beforeClose: func(_ *Table, _ string) {},
+			lvl2Parts:   1,
+			lvl1Parts:   0,
+			finalRows:   303,
+			beforeReplay: func(table *Table, _ string) {
+				// Move the save files back into L1 to simulate an unssuccessful cleanup after L1 compaction
+				l1Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L1")
+				saveDir := filepath.Join(table.db.indexDir(), "save")
+				require.NoError(t, filepath.WalkDir(saveDir, func(path string, _ fs.DirEntry, _ error) error {
+					if filepath.Ext(path) == index.IndexFileExtension {
+						// Move the save file back to the original file
+						sv, err := os.Open(path)
+						require.NoError(t, err)
+						defer sv.Close()
+
+						lvl, err := os.Create(filepath.Join(l1Dir, filepath.Base(path)))
+						require.NoError(t, err)
+						defer lvl.Close()
+
+						_, err = io.Copy(lvl, sv)
+						require.NoError(t, err)
+					}
+					return nil
+				}))
+
+				// Corrupt the L2 file to simulate that the failure occurred when writing the L2 file
+				l2Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L2")
+				l2File := filepath.Join(l2Dir, fmt.Sprintf("00000000000000000000%s", index.IndexFileExtension))
+				info, err := os.Stat(l2File)
+				require.NoError(t, err)
+				require.NoError(t, os.Truncate(l2File, info.Size()-1)) // truncate the last byte to pretend the write didn't finish
+			},
+			writes: []write{
+				{ // Get to L1 compaction
+					n: 100,
+					op: func(table *Table, _ string) {
+						require.Eventually(t, func() bool {
+							return table.active.index.LevelSize(index.L1) != 0
+						}, time.Second, time.Millisecond*10)
+					},
+				},
+				{ // trigger a snapshot (leaving the only valid data in L1)
+					op: func(table *Table, _ string) {
+						tx := table.db.highWatermark.Load()
+						require.NoError(t, table.db.snapshotAtTX(context.Background(), tx, table.db.snapshotWriter(tx)))
+					},
+				},
+				{
+					op: func(table *Table, dir string) { // save the L1 files to restore them
+						l1Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L1")
+						saveDir := filepath.Join(dir, "save")
+						require.NoError(t, os.MkdirAll(saveDir, 0o755)) // Create a directory to save the files
+						require.NoError(t, filepath.WalkDir(l1Dir, func(path string, _ fs.DirEntry, _ error) error {
+							if filepath.Ext(path) == index.IndexFileExtension {
+								lvl, err := os.Open(path)
+								require.NoError(t, err)
+								defer lvl.Close()
+
+								sv, err := os.Create(filepath.Join(saveDir, filepath.Base(path)))
+								require.NoError(t, err)
+								defer sv.Close()
+
+								_, err = io.Copy(sv, lvl)
+								require.NoError(t, err)
+							}
+							return nil
+						}))
+					},
+				},
+				{ // Compact L1->L2
+					n:                1, // New write to "trigger" the compaction
+					ensureCompaction: true,
+				},
+			},
+		},
+		"snapshot": {
+			beforeReplay: func(_ *Table, _ string) {},
+			beforeClose: func(table *Table, _ string) {
+				// trigger a snapshot
+				success := false
+				table.db.snapshot(context.Background(), false, func() {
+					success = true
+				})
+				require.True(t, success)
+
+				// Write more to the table and trigger a compaction
+				samples := dynparquet.NewTestSamples()
+				for j := 0; j < 100; j++ {
+					r, err := samples.ToRecord()
+					require.NoError(t, err)
+					_, err = table.InsertRecord(context.Background(), r)
+					require.NoError(t, err)
+				}
+				require.Eventually(t, func() bool {
+					return table.active.index.LevelSize(index.L1) != 0
+				}, time.Second, time.Millisecond*10)
+			},
+			lvl2Parts: 3,
+			lvl1Parts: 2,
+			finalRows: 1500,
+			writes:    defaultWrites,
+		},
+		"corruption after snapshot": {
+			beforeReplay: func(table *Table, _ string) {
+				l1Dir := filepath.Join(table.db.indexDir(), "test", table.active.ulid.String(), "L1")
+				levelFile := filepath.Join(l1Dir, fmt.Sprintf("00000000000000000001%v", index.IndexFileExtension))
+				info, err := os.Stat(levelFile)
+				require.NoError(t, err)
+				require.NoError(t, os.Truncate(levelFile, info.Size()-1)) // truncate the last byte to pretend the write didn't finish
+			},
+			beforeClose: func(table *Table, _ string) {
+				// trigger a snapshot
+				tx := table.db.highWatermark.Load()
+				require.NoError(t, table.db.snapshotAtTX(context.Background(), tx, table.db.snapshotWriter(tx)))
+
+				// Write more to the table and trigger a compaction
+				samples := dynparquet.NewTestSamples()
+				for j := 0; j < 100; j++ {
+					r, err := samples.ToRecord()
+					require.NoError(t, err)
+					_, err = table.InsertRecord(context.Background(), r)
+					require.NoError(t, err)
+				}
+				require.Eventually(t, func() bool {
+					return table.active.index.LevelSize(index.L1) != 0
+				}, time.Second, time.Millisecond*10)
+			},
+			lvl2Parts: 3,
+			lvl1Parts: 2,
+			finalRows: 1500,
+			writes:    defaultWrites,
+		},
+	}
+
+	for name, test := range tests {
+		test := test
+		t.Run(name, func(t *testing.T) {
+			t.Parallel()
+			dir := t.TempDir()
+			cfg := []*index.LevelConfig{
+				{Level: index.L0, MaxSize: 25700, Type: index.CompactionTypeParquetDisk},
+				{Level: index.L1, MaxSize: 1024 * 1024 * 128, Type: index.CompactionTypeParquetDisk},
+				{Level: index.L2, MaxSize: 1024 * 1024 * 512},
+			}
+			c, err := New(
+				WithLogger(logger),
+				WithIndexConfig(cfg),
+				WithStoragePath(dir),
+				WithWAL(),
+			)
+			t.Cleanup(func() {
+				require.NoError(t, c.Close())
+			})
+			require.NoError(t, err)
+			db, err := c.DB(context.Background(), "test")
+			require.NoError(t, err)
+			table, err := db.Table("test", config)
+			require.NoError(t, err)
+
+			samples := dynparquet.NewTestSamples()
+
+			ctx := context.Background()
+			insertedRows := int64(0)
+			for _, w := range test.writes {
+				for j := 0; j < w.n; j++ {
+					r, err := samples.ToRecord()
+					require.NoError(t, err)
+					_, err = table.InsertRecord(ctx, r)
+					insertedRows += r.NumRows()
+					require.NoError(t, err)
+				}
+				if w.ensureCompaction {
+					require.NoError(t, table.EnsureCompaction())
+				}
+				if w.op != nil {
+					w.op(table, db.indexDir())
+				}
+			}
+
+			if test.lvl1Parts > 0 {
+				require.Eventually(t, func() bool {
+					return table.active.index.LevelSize(index.L1) != 0
+				}, 30*time.Second, time.Millisecond*10)
+			}
+
+			rows := func(db *DB) int64 {
+				pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+				defer pool.AssertSize(t, 0)
+				rows := int64(0)
+				engine := query.NewEngine(pool, db.TableProvider())
+				err = engine.ScanTable("test").
+					Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+						rows += r.NumRows()
+						return nil
+					})
+				require.NoError(t, err)
+				return rows
+			}
+
+			// Ensure that disk compacted data can be recovered
+			require.Equal(t, insertedRows, rows(db))
+
+			test.beforeClose(table, dir)
+
+			// Close the database
+			require.NoError(t, c.Close())
+
+			// Run the beforeReplay hook
+			test.beforeReplay(table, dir)
+
+			// Reopen database; expect it to recover from the LSM files and WAL
+			c, err = New(
+				WithLogger(logger),
+				WithIndexConfig(cfg),
+				WithStoragePath(dir),
+				WithWAL(),
+			)
+			require.NoError(t, err)
+			t.Cleanup(func() {
+				require.NoError(t, c.Close())
+			})
+			db, err = c.DB(context.Background(), "test")
+			require.NoError(t, err)
+			table, err = db.Table("test", config)
+			require.NoError(t, err)
+
+			// Check the final row count
+			require.Equal(t, test.finalRows, rows(db))
+
+			// Now write more data after a recovery into the levels and ensure that it can be read back.
+
+			// L1 writes
+			l1Before := table.active.index.LevelSize(index.L1)
+			for j := 0; j < 100; j++ {
+				r, err := samples.ToRecord()
+				require.NoError(t, err)
+				_, err = table.InsertRecord(ctx, r)
+				require.NoError(t, err)
+			}
+			require.Eventually(t, func() bool {
+				return table.active.index.LevelSize(index.L1) != l1Before
+			}, 30*time.Second, time.Millisecond*10)
+			require.Equal(t, test.finalRows+300, rows(db))
+
+			// Move that data to L2
+			require.NoError(t, table.EnsureCompaction())
+			require.Equal(t, test.finalRows+300, rows(db))
+		})
+	}
+}
+
+// Ensure data integrity when a block is rotated.
+func Test_DB_PersistentDiskCompaction_BlockRotation(t *testing.T) {
+	t.Parallel()
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+	logger := newTestLogger(t)
+	bucket := objstore.NewInMemBucket()
+	sinksource := NewDefaultObjstoreBucket(bucket)
+
+	dir := t.TempDir()
+	cfg := []*index.LevelConfig{
+		{Level: index.L0, MaxSize: 25700, Type: index.CompactionTypeParquetDisk},
+		{Level: index.L1, MaxSize: 1024 * 1024 * 128, Type: index.CompactionTypeParquetDisk},
+		{Level: index.L2, MaxSize: 1024 * 1024 * 512},
+	}
+	c, err := New(
+		WithLogger(logger),
+		WithIndexConfig(cfg),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithManualBlockRotation(),
+		WithReadWriteStorage(sinksource),
+	)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+
+	ctx := context.Background()
+	compactions := 3 // 3 compacted parts in L2 file
+	for i := 0; i < compactions; i++ {
+		for j := 0; j < 100; j++ {
+			r, err := samples.ToRecord()
+			require.NoError(t, err)
+			_, err = table.InsertRecord(ctx, r)
+			require.NoError(t, err)
+		}
+		require.NoError(t, table.EnsureCompaction())
+	}
+
+	// Get data in L1
+	for j := 0; j < 100; j++ {
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	require.Eventually(t, func() bool {
+		return table.active.index.LevelSize(index.L1) != 0
+	}, time.Second, time.Millisecond*10)
+
+	validateRows := func(expected int64) {
+		pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+		defer pool.AssertSize(t, 0)
+		rows := int64(0)
+		engine := query.NewEngine(pool, db.TableProvider())
+		err = engine.ScanTable("test").
+			Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+				rows += r.NumRows()
+				return nil
+			})
+		require.NoError(t, err)
+		require.Equal(t, expected, rows)
+	}
+
+	validateRows(1200)
+
+	// Rotate block
+	require.NoError(t, table.RotateBlock(context.Background(), table.ActiveBlock()))
+
+	validateRows(1200)
+
+	for i := 0; i < compactions; i++ {
+		for j := 0; j < 100; j++ {
+			r, err := samples.ToRecord()
+			require.NoError(t, err)
+			_, err = table.InsertRecord(ctx, r)
+			require.NoError(t, err)
+		}
+		require.NoError(t, table.EnsureCompaction())
+	}
+
+	// Get data in L1
+	for j := 0; j < 100; j++ {
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	require.Eventually(t, func() bool {
+		return table.active.index.LevelSize(index.L1) != 0
+	}, time.Second, time.Millisecond*10)
+
+	validateRows(2400)
+}
+
+func Test_DB_PersistentDiskCompaction_NonOverlappingCompaction(t *testing.T) {
+	t.Parallel()
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+	logger := newTestLogger(t)
+
+	dir := t.TempDir()
+	cfg := []*index.LevelConfig{
+		{Level: index.L0, MaxSize: 336, Type: index.CompactionTypeParquetDisk},
+		{Level: index.L1, MaxSize: 1024 * 1024 * 128, Type: index.CompactionTypeParquetDisk},
+		{Level: index.L2, MaxSize: 1024 * 1024 * 512},
+	}
+	c, err := New(
+		WithLogger(logger),
+		WithIndexConfig(cfg),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithManualBlockRotation(),
+	)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	for i := 0; i < 3; i++ { // Create non-overlapping samples for compaction
+		samples := dynparquet.GenerateTestSamples(1)
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
+	require.Eventually(t, func() bool {
+		return table.active.index.LevelSize(index.L1) != 0
+	}, time.Second, time.Millisecond*10)
+
+	// Snapshot the file
+	success := false
+	table.db.snapshot(context.Background(), false, func() {
+		success = true
+	})
+	require.True(t, success)
+
+	// Close the database
+	require.NoError(t, c.Close())
+
+	// Recover from the snapshot
+	c, err = New(
+		WithLogger(logger),
+		WithIndexConfig(cfg),
+		WithStoragePath(dir),
+		WithWAL(),
+	)
+	require.NoError(t, err)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
+	db, err = c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err = db.Table("test", config)
+	require.NoError(t, err)
+
+	validateRows := func(expected int64) {
+		pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+		defer pool.AssertSize(t, 0)
+		rows := int64(0)
+		engine := query.NewEngine(pool, db.TableProvider())
+		err = engine.ScanTable("test").
+			Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+				rows += r.NumRows()
+				return nil
+			})
+		require.NoError(t, err)
+		require.Equal(t, expected, rows)
+	}
+
+	validateRows(3)
+}
+
+func TestDBWatermarkBubbling(t *testing.T) {
+	c, err := New()
+	require.NoError(t, err)
+	defer c.Close()
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+
+	const nTxns = 100
+	var wg sync.WaitGroup
+	wg.Add(nTxns)
+	for i := 0; i < nTxns; i++ {
+		_, _, commit := db.begin()
+		go func() {
+			defer wg.Done()
+			commit()
+		}()
+	}
+	wg.Wait()
+
+	require.Eventually(t, func() bool {
+		return db.HighWatermark() == nTxns
+	}, time.Second, 10*time.Millisecond)
+}
+
+// Test_DB_SnapshotNewerData verifies that newer data that is compacted doesn't cause duplicate or loss of data on replay.
+func Test_DB_SnapshotNewerData(t *testing.T) {
+	t.Parallel()
+
+	logger := newTestLogger(t)
+	tests := map[string]struct {
+		create func(dir string) *ColumnStore
+	}{
+		"memory": {
+			create: func(dir string) *ColumnStore {
+				c, err := New(
+					WithLogger(logger),
+					WithIndexConfig(DefaultIndexConfig()),
+					WithStoragePath(dir),
+					WithWAL(),
+					WithManualBlockRotation(),
+				)
+				require.NoError(t, err)
+				t.Cleanup(func() {
+					require.NoError(t, c.Close())
+				})
+				return c
+			},
+		},
+		"disk": {
+			create: func(dir string) *ColumnStore {
+				cfg := DefaultIndexConfig()
+				cfg[0].Type = index.CompactionTypeParquetDisk
+				cfg[1].Type = index.CompactionTypeParquetDisk
+				c, err := New(
+					WithLogger(logger),
+					WithIndexConfig(cfg),
+					WithStoragePath(dir),
+					WithWAL(),
+					WithManualBlockRotation(),
+				)
+				require.NoError(t, err)
+				t.Cleanup(func() {
+					require.NoError(t, c.Close())
+				})
+				return c
 			},
-			Timestamp: 12,
-			Value:     3,
 		},
 	}
 
-	ps, err := table.Schema().DynamicParquetSchema(map[string][]string{
-		"labels": {"label1", "label2", "label3"},
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			config := NewTableConfig(
+				dynparquet.SampleDefinition(),
+			)
+
+			dir := t.TempDir()
+			c := test.create(dir)
+			db, err := c.DB(context.Background(), "test")
+			require.NoError(t, err)
+			table, err := db.Table("test", config)
+			require.NoError(t, err)
+
+			// Insert record to increase tx
+			ctx := context.Background()
+			for i := 0; i < 3; i++ {
+				samples := dynparquet.GenerateTestSamples(3)
+				r, err := samples.ToRecord()
+				require.NoError(t, err)
+				_, err = table.InsertRecord(ctx, r)
+				require.NoError(t, err)
+			}
+
+			// Get a snapshot tx
+			tx, _, commit := db.begin()
+
+			// Insert another write, and force compaction
+			samples := dynparquet.GenerateTestSamples(3)
+			r, err := samples.ToRecord()
+			require.NoError(t, err)
+			_, err = table.InsertRecord(ctx, r)
+			require.NoError(t, err)
+			require.NoError(t, table.EnsureCompaction())
+
+			// Now perform the snapshot
+			db.Wait(tx - 1)
+			err = db.wal.Log(
+				tx,
+				&walpb.Record{
+					Entry: &walpb.Entry{
+						EntryType: &walpb.Entry_Snapshot_{Snapshot: &walpb.Entry_Snapshot{Tx: tx}},
+					},
+				},
+			)
+			require.NoError(t, err)
+			require.NoError(t, db.snapshotAtTX(ctx, tx, db.offlineSnapshotWriter(tx)))
+			commit()
+			require.NoError(t, db.wal.Truncate(tx))
+			time.Sleep(1 * time.Second) // wal flushes every 50ms
+
+			// Close the database
+			require.NoError(t, c.Close())
+
+			// Recover from the snapshot
+			c = test.create(dir)
+			t.Cleanup(func() {
+				require.NoError(t, c.Close())
+			})
+			db, err = c.DB(context.Background(), "test")
+			require.NoError(t, err)
+
+			validateRows := func(expected int64) {
+				pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+				defer pool.AssertSize(t, 0)
+				rows := int64(0)
+				engine := query.NewEngine(pool, db.TableProvider())
+				err = engine.ScanTable("test").
+					Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+						rows += r.NumRows()
+						return nil
+					})
+				require.NoError(t, err)
+				require.Equal(t, expected, rows)
+			}
+
+			validateRows(12)
+		})
+	}
+}
+
+// Test_DB_SnapshotDuplicate verifies that if we attempt to take a snapshot again at the same tx that it will abort if the current snapshot is valid.
+func Test_DB_SnapshotDuplicate(t *testing.T) {
+	t.Parallel()
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+	logger := newTestLogger(t)
+
+	cfg := []*index.LevelConfig{
+		{Level: index.L0, MaxSize: 336, Type: index.CompactionTypeParquetDisk},
+		{Level: index.L1, MaxSize: 1024 * 1024 * 128, Type: index.CompactionTypeParquetDisk},
+		{Level: index.L2, MaxSize: 1024 * 1024 * 512},
+	}
+	dir := t.TempDir()
+	c, err := New(
+		WithLogger(logger),
+		WithIndexConfig(cfg),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithManualBlockRotation(),
+		WithSnapshotTriggerSize(1024*1024*1024), // we just need a non-zero value; but don't want the writes to unexpectedly trigger a snapshot
+	)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
 	})
 	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	samples := dynparquet.GenerateTestSamples(10)
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	require.NoError(t, table.EnsureCompaction())
+
+	// Snapshot the file
+	success := false
+	table.db.snapshot(context.Background(), false, func() {
+		success = true
+	})
+	require.True(t, success)
+
+	require.NoError(t, db.wal.Truncate(2))
+	time.Sleep(1 * time.Second) // wal flushes every 50ms
+
+	// Close the database to trigger a snapshot on close
+	require.NoError(t, c.Close())
 
-	sc, err := pqarrow.ParquetSchemaToArrowSchema(ctx, ps, logicalplan.IterOptions{})
+	// Reopen the database and expect the data to be there
+	c, err = New(
+		WithLogger(logger),
+		WithIndexConfig(cfg),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithManualBlockRotation(),
+		WithSnapshotTriggerSize(1024*1024*1024), // we just need a non-zero value; but don't want the writes to unexpectedly trigger a snapshot
+	)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
 	require.NoError(t, err)
+	db, err = c.DB(context.Background(), "test")
+	require.NoError(t, err)
+
+	validateRows := func(expected int64) {
+		pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+		defer pool.AssertSize(t, 0)
+		rows := int64(0)
+		engine := query.NewEngine(pool, db.TableProvider())
+		err = engine.ScanTable("test").
+			Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+				rows += r.NumRows()
+				return nil
+			})
+		require.NoError(t, err)
+		require.Equal(t, expected, rows)
+	}
+
+	validateRows(10)
+}
+
+func Test_DB_SnapshotDuplicate_Corrupted(t *testing.T) {
+	t.Parallel()
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+	logger := newTestLogger(t)
 
-	r, err := samples.ToRecord(sc)
+	cfg := []*index.LevelConfig{
+		{Level: index.L0, MaxSize: 336, Type: index.CompactionTypeParquetDisk},
+		{Level: index.L1, MaxSize: 1024 * 1024 * 128, Type: index.CompactionTypeParquetDisk},
+		{Level: index.L2, MaxSize: 1024 * 1024 * 512},
+	}
+	dir := t.TempDir()
+	c, err := New(
+		WithLogger(logger),
+		WithIndexConfig(cfg),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithManualBlockRotation(),
+		WithSnapshotTriggerSize(1024*1024*1024), // we just need a non-zero value; but don't want the writes to unexpectedly trigger a snapshot
+	)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
 	require.NoError(t, err)
 
+	ctx := context.Background()
+	samples := dynparquet.GenerateTestSamples(10)
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
 	_, err = table.InsertRecord(ctx, r)
 	require.NoError(t, err)
 
-	engine := query.NewEngine(
-		memory.NewGoAllocator(),
-		db.TableProvider(),
+	require.NoError(t, table.EnsureCompaction())
+
+	// Snapshot the file
+	success := false
+	table.db.snapshot(context.Background(), false, func() {
+		success = true
+	})
+	require.True(t, success)
+
+	require.NoError(t, db.wal.Truncate(2))
+	time.Sleep(1 * time.Second) // wal flushes every 50ms
+
+	// Corrupt the snapshot file.
+	require.NoError(t, os.Truncate(filepath.Join(SnapshotDir(db, 3), snapshotFileName(3)), 100))
+
+	// Close the database to trigger a snapshot on close
+	require.NoError(t, c.Close())
+
+	// Reopen the database and expect the data to be there
+	c, err = New(
+		WithLogger(logger),
+		WithIndexConfig(cfg),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithManualBlockRotation(),
+		WithSnapshotTriggerSize(1024*1024*1024), // we just need a non-zero value; but don't want the writes to unexpectedly trigger a snapshot
 	)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
+	require.NoError(t, err)
+	db, err = c.DB(context.Background(), "test")
+	require.NoError(t, err)
 
-	tests := map[string]struct {
-		filter   logicalplan.Expr
-		distinct logicalplan.Expr
-		rows     int64
-		cols     int64
-	}{
-		"none": {
-			rows: 3,
-			cols: 7,
-		},
-		"timestamp filter": {
-			filter: logicalplan.Col("timestamp").GtEq(logicalplan.Literal(12)),
-			rows:   1,
-			cols:   7,
-		},
-		"distinct": {
-			distinct: logicalplan.DynCol("labels"),
-			rows:     2,
-			cols:     3,
-		},
+	validateRows := func(expected int64) {
+		pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+		defer pool.AssertSize(t, 0)
+		rows := int64(0)
+		engine := query.NewEngine(pool, db.TableProvider())
+		err = engine.ScanTable("test").
+			Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+				rows += r.NumRows()
+				return nil
+			})
+		require.NoError(t, err)
+		require.Equal(t, expected, rows)
 	}
 
-	for name, test := range tests {
-		t.Run(name, func(t *testing.T) {
-			bldr := engine.ScanTable("test")
-			if test.filter != nil {
-				bldr = bldr.Filter(test.filter)
-			}
-			if test.distinct != nil {
-				bldr = bldr.Distinct(test.distinct)
-			}
-			err = bldr.Execute(ctx, func(ctx context.Context, ar arrow.Record) error {
-				require.Equal(t, test.rows, ar.NumRows())
-				require.Equal(t, test.cols, ar.NumCols())
+	validateRows(10)
+}
+
+func Test_Iceberg(t *testing.T) {
+	t.Parallel()
+
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+	logger := newTestLogger(t)
+	bucket := &TestBucket{Bucket: objstore.NewInMemBucket(), record: make(map[string]struct{})}
+	iceberg, err := storage.NewIceberg("/", catalog.NewHDFS("/", bucket), bucket,
+		storage.WithIcebergPartitionSpec(
+			iceberg.NewPartitionSpec( // Partition the table by timestamp.
+				iceberg.PartitionField{
+					Name:      "timestamp",
+					Transform: iceberg.IdentityTransform{},
+				},
+			),
+		))
+	require.NoError(t, err)
+
+	dir := t.TempDir()
+	c, err := New(
+		WithLogger(logger),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithManualBlockRotation(),
+		WithReadWriteStorage(iceberg),
+	)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	samples := dynparquet.GenerateTestSamples(10)
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	validateRows := func(expected int64) {
+		pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+		defer pool.AssertSize(t, 0)
+		rows := int64(0)
+		engine := query.NewEngine(pool, db.TableProvider())
+		err = engine.ScanTable("test").
+			Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+				rows += r.NumRows()
 				return nil
 			})
-			require.NoError(t, err)
+		require.NoError(t, err)
+		require.Equal(t, expected, rows)
+	}
+
+	validateRows(10)
+
+	require.NoError(t, table.RotateBlock(ctx, table.ActiveBlock()))
+	require.Eventually(t, func() bool {
+		info, err := bucket.Attributes(ctx, filepath.Join("test", "test", "metadata", "v1.metadata.json"))
+		if err != nil {
+			return false
+		}
+		return info.Size > 0
+	}, 3*time.Second, 10*time.Millisecond)
+
+	validateRows(10)
+
+	// Insert sampels with a different schema
+	samples = dynparquet.NewTestSamples()
+	r, err = samples.ToRecord()
+	require.NoError(t, err)
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	validateRows(13)
+
+	require.NoError(t, table.RotateBlock(ctx, table.ActiveBlock()))
+	require.Eventually(t, func() bool {
+		info, err := bucket.Attributes(ctx, filepath.Join("test", "test", "metadata", "v2.metadata.json"))
+		if err != nil {
+			return false
+		}
+		return info.Size > 0
+	}, 3*time.Second, 10*time.Millisecond)
+
+	validateRows(13)
+
+	// Reset the test bucket
+	bucket.Reset()
+
+	pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer pool.AssertSize(t, 0)
+	rows := int64(0)
+	engine := query.NewEngine(pool, db.TableProvider())
+	err = engine.ScanTable("test").Filter(
+		logicalplan.Col("timestamp").Gt(logicalplan.Literal(int64(2))),
+	).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			rows += r.NumRows()
+			return nil
+		})
+	require.NoError(t, err)
+	require.Equal(t, int64(7), rows)
+
+	// We expect the Iceberg table to only access a single Parquet file.
+	require.Len(t, bucket.record, 1)
+	bucket.Reset()
+
+	// query by a column that isn't part of the partition key
+	rows = 0
+	err = engine.ScanTable("test").Filter(
+		logicalplan.And(
+			logicalplan.Col("timestamp").Gt(logicalplan.Literal(int64(2))),
+			logicalplan.Col("value").Gt(logicalplan.Literal(int64(5))),
+		),
+	).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			rows += r.NumRows()
+			return nil
 		})
+	require.NoError(t, err)
+	require.Len(t, bucket.record, 1)
+	require.Equal(t, int64(4), rows)
+}
+
+type TestBucket struct {
+	sync.Mutex
+	record map[string]struct{}
+	objstore.Bucket
+}
+
+func (b *TestBucket) Reset() {
+	b.record = make(map[string]struct{})
+}
+
+func (b *TestBucket) GetRange(ctx context.Context, path string, offset, length int64) (io.ReadCloser, error) {
+	b.Lock()
+	b.record[path] = struct{}{}
+	b.Unlock()
+	return b.Bucket.GetRange(ctx, path, offset, length)
+}
+
+// Test_DB_EmptyPersist ensures that we don't write an empty block when the db is shutdown and there is no data.
+func Test_DB_EmptyPersist(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+
+	logger := newTestLogger(t)
+	assertBucket := &AssertBucket{
+		Bucket: objstore.NewInMemBucket(),
+		uploadFunc: func(_ context.Context, _ string, _ io.Reader) error {
+			t.Fatal("unexpected upload")
+			return nil
+		},
+	}
+
+	sinksource := NewDefaultObjstoreBucket(assertBucket)
+
+	c, err := New(
+		WithLogger(logger),
+		WithReadWriteStorage(sinksource),
+	)
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	_, err = db.Table("test", config)
+	require.NoError(t, err)
+
+	require.NoError(t, c.Close())
+}
+
+type AssertBucket struct {
+	objstore.Bucket
+
+	uploadFunc func(ctx context.Context, path string, r io.Reader) error
+}
+
+func (a *AssertBucket) Upload(ctx context.Context, path string, r io.Reader) error {
+	if a.uploadFunc != nil {
+		return a.uploadFunc(ctx, path, r)
+	}
+	return a.Bucket.Upload(ctx, path, r)
+}
+
+func Test_DB_Sample(t *testing.T) {
+	t.Parallel()
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+	)
+	logger := newTestLogger(t)
+
+	c, err := New(WithLogger(logger))
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
+	require.NoError(t, err)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	for i := 0; i < 500; i++ {
+		samples := dynparquet.GenerateTestSamples(10)
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
 	}
+
+	pool := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer pool.AssertSize(t, 0)
+	lock := &sync.Mutex{}
+	rows := int64(0)
+	sampleSize := int64(13)
+	engine := query.NewEngine(pool, db.TableProvider())
+	err = engine.ScanTable("test").
+		Sample(sampleSize, 1024*1024). // Sample 13 rows, materialize the reservoir at 1MB
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			lock.Lock()
+			defer lock.Unlock()
+			rows += r.NumRows()
+			return nil
+		})
+	require.NoError(t, err)
+	require.Equal(t, sampleSize, rows)
 }
diff --git a/design/index_files.md b/design/index_files.md
new file mode 100644
index 000000000..b3bfa449f
--- /dev/null
+++ b/design/index_files.md
@@ -0,0 +1,63 @@
+# On-Disk persistent index files
+
+Authored: Jan 22, 2024
+Author: Thor Hansen (@thorfour)
+
+## Abstract
+
+FrostDB currently uses temporary index files to store Parquet data. These are lost on shutdown, and need to be recreated from snapshots and WAL.
+Furthermore snapshots are less effecient than necessary because snapshotting copies Parquet data that is already on disk, and writes it to a different file (snapshot file)
+when we could just reference the index file instead.
+
+## Proposal
+
+Instead FrostDB should use persistent index files that don't requrie snapshots to copy data but instead hard link the files in the snapshot directory for recovery.
+
+Index files have the format of:
+|                    |
+| -----------------  |
+|   `<ParquetFile>`  |
+|   `<uint64 size>`  |
+|   `<ParquetFile>`  |
+|   `<uint64 size>`  |
+|   `<ParquetFile>`  |
+|   `<uint64 size>`  |
+
+Where Parquet files are appending togehter with a uint64 that represents the number of bytes in the previous ParquetFile.
+Each Parquet file represents a single Part in the LSM index. The Part's transaction id is stored in the Parquet file with a metadata key of 'compaction_tx'
+
+Index files will be stored in a directory with the prefix of:
+databases/<database>/index/<table>/<block>/<index_level>/
+
+With the filename format of 000000000001.parquet with a 20 byte representation of the ordering of the index file.
+
+### Index File Lifecycle
+
+There are 4 events that influence the lifecycle of an index file.
+- Writes
+- Snapshots
+- Compaction
+- Block Rotation
+
+Writes: 
+    These are compaction from a lower level that write into the index file. These writes shall write into the currently active index file in the level,
+    appending the newly compacted Parquet file into the index file with the uint64 at the end of the number of bytes written.
+    This file shall be fsync'd after a completed write.
+
+Snapshots: 
+    When a snapshot occurs the index files in every level of the index shall be rotated. This means a new index file is created the old one is fsync'd
+    and all future writes to the level are writen into that new file leaving the previous file immutable. The old files are then hard linked into the snapshot directory,
+    such that when the index deletes those files they can still be recovered from the snapshot.
+
+Compaction: 
+    This is when a level is compacted into a new Parquet file and written to the next level. All the files in that compacted level shall be deleted, as they are now
+    covered by the latest write into the next level.
+
+Rotation:
+    When an entire table block is rotated out, after rotation is completed the block directory that holds all levels of index files can be deleted.
+
+
+### Replay
+
+When a table block is created it shall walk through the table blocks directory. Upon discovering index files it will open and read all parts of that file.
+Returning a list of parts that were recovered with the block was created. The Table shall than replay those recovered parts into the LSM.
diff --git a/design/lsm.md b/design/lsm.md
new file mode 100644
index 000000000..866115dd7
--- /dev/null
+++ b/design/lsm.md
@@ -0,0 +1,54 @@
+# Log-structured merge-tree (LSM) style index
+
+Authored: Sept 21, 2023
+Author: Thor Hansen (@thorfour)
+
+## Abstract
+
+FrostDB was built using a BTree as it's index. This decision was made based on early assumptions and before the query engine
+had taken shape. Now with a better understanding of how FrostDB works in practice it is my recommendation that we replace the Btree index with
+an LSM-like index.
+
+## Problem
+
+Insertion spends the largest portion of CPU cycles on sorting the individual rows of records into their corresponding granules. However due to the multi-key primary key 
+schemas coupled with dynamic columns, the query engine does not leverage the sorted nature of rows during query time. The scan layer simply passes the records into the query engine.
+Which means the only benefit from sorting the rows is that during compaction a granule already contains the rows that will likely offer the best compaction benefits.
+
+However this means that we're slowing down insertion times for potential gains only during compaction time. We've witnessed things like replay and recovery become dominated by the 
+insert path.
+
+## Proposal
+
+Instead of using a Btree where we sort the individual rows of each record into granules, we instead use an LSM-style index where the entire record is simply appended to the first level
+of the LSM index. Because of the immutable nature of the data in FrostDB we do not require all the features of an LSM trree. But are instead lifting the ideas of having different
+levels of compacted data. With the first level being a raw list of arrow records that have been written, and subsequent levels being compacted records.
+
+
+|---------|    |---------|    |---------|
+|         |    |         |    |         |
+|  Record |--->|  Record |--->|  Record |   L0: Raw written Arrow records (memtable in most LSM definitions)
+|         |    |         |    |         |
+|_________|    |_________|    |_________|
+
+|---------|    |---------|    |---------|
+|         |    |         |    |         |
+| Parquet |--->| Parquet |--->| Parquet |   L1: Compacted L0 in Parquet files.
+|         |    |         |    |         |
+|_________|    |_________|    |_________|
+
+                Fig 1.
+
+Note that this is NOT a traditional LSM tree that needs to perform a merging of data at different levels due to the fact that neither deletes nor inserts are performed. Due to the 
+nature of querying in FrostDB where each file is scanned we need not sort the inserted records into a tree at all but instead keep all data in a single linked-list. 
+Therefore each subsequent level of the index is compacted and sorting solely for the purpose of reducing the memory footprint of the previous level of unsorted uncompacted files.
+
+
+### Compaction
+
+Compaction would work much like it does today except instead of compacting at the Granule level it takes all of L0 and compacts those records into a Parquet file (potentially in the future another larger arrow record).
+This compacted file would get added to the L1 list. For brevity there's only 2 levels in the diagram but there's no reason should workloads require it to only have two levels. Further levels could be added
+that compact the level above it, and even potentially use disk or remote storage to retain one or more levels.
+
+Because the data is immutable and there is no support for updates or deletions during compaction the compactor need not try and seach for conflicting rows of updates or deletes but
+can simply compact all the data into a combined record/file.
diff --git a/design/snapshots.md b/design/snapshots.md
new file mode 100644
index 000000000..1ad2b0533
--- /dev/null
+++ b/design/snapshots.md
@@ -0,0 +1,68 @@
+# Snapshots
+
+Authored: Mar 8, 2023
+Author: Alfonso Subiotto (@asubiotto)
+
+## Abstract
+
+Up until now, FrostDB has offered crash recovery solely with a write ahead log (WAL), where database operations are written one-by-one to an on-disk log. This document proposes complementing the WAL with point-in-time snapshots to reduce recovery time, WAL disk space, and WAL corruption issues.
+
+## Problem
+
+Running FrostDB with a WAL in production we've noticed three problems:
+- Running FrostDB with multiple small tables implies that the WAL is rarely, if ever, truncated. This is because the WAL is shared amongst tables and when persisting a table, the WAL can only be truncated at the minimum persisted transaction across all tables. This causes unnecessarily large WALs.
+- WAL replay times upon recovery can be unreasonably long (on the order of many minutes). This is a problem because database requests cannot be served until the previous state has been recovered. The cause of this performance issue is that writes are replayed one at a time.
+- WAL files are sometimes corrupted on preemptible VMs even though the library we use uses fsync. It's unclear whether non-atomic truncation is also an issue.
+
+## Proposal
+
+This document proposes to complement the WAL with point-in-time snapshots. Snapshots should help us with the mentioned problems in the following ways:
+- Snapshots are performed at database granularity. This means that regardless of how many tables the database has or their sizes, the WAL can be safely truncated at the transaction a snapshot occurred, reducing WAL size.
+- Snapshot big-o load times are a function of the number of parts in the database, rather than the number of inserts. Additionally, since the index structure is stored in a snapshot file, snapshots avoid having to perform an index search per insert which is a huge contributing factor to slow WAL replay performance.
+- Relying on snapshots implies that the WAL only needs to be replayed from the transaction of the last snapshot, adding redundancy for recovery. It's unclear whether this will solve our corruption problems, but reducing our reliance on the WAL should help us in theory.
+
+### Format
+
+Refer to [the snapshot .proto file](https://github.com/polarsignals/frostdb/blob/main/proto/frostdb/snapshot/v1alpha1/snapshot.proto) for information on snapshot metadata. Note that only metadata is stored in protobufs. This metadata is stored in the footer of a snapshot file and includes offsets to look up actual table data in the rest of the file. Refer to the [snapshot.go file](https://github.com/polarsignals/frostdb/blob/974038eeb6072e915f8e28eea5a18609f4f7ac74/snapshot.go#L28) for an in-depth explanation of the format of the full snapshot file.
+
+### Interval
+
+An option can be provided when opening a column store to define a "trigger size" in bytes. This is the number of bytes a table block has to grow by since the last snapshot to trigger a new snapshot. Future work should change this check to use the database size (sum of all tables). Additionally, snapshots will be triggered on block rotation (see [block rotation section as to why](#block-rotation).
+
+### Recovery
+
+When recovering a database, there will be both a `wal/` and a `snapshots/` folder. The snapshot file names will be encoded with the transaction the snapshot occurred at, similar to how WAL file names contain the first index of that file. Therefore, recovery will conceptually reverse iterate over the lexicographically-sorted snapshot files and load the latest (in tx order) possible snapshot. On successful load, the WAL will be replayed from this snapshot transaction, which has a 1:1 relationship to a WAL index.
+
+### Truncation
+
+The safest time to delete old snapshots is when a snapshot at txn `n` has been successfully loaded. This is implies that all snapshots up to txn `n-1` can be deleted. We can be more aggressive with our truncation, but leave that to future implementations.
+
+With respect to WAL truncation, the same can be done. Once a snapshot at txn n has been successfully loaded, the WAL can be truncated up to txn `n`.
+
+### Block Rotation
+
+Snapshot recovery in the presence of block rotations can be subtle. The situation we want to avoid is recovering from a snapshot that has data from a table block that has been rotated (i.e. persisted). If the aforementioned [snapshot intervals](#interval) and [recovery](#recovery) algorithms are followed without triggering a snapshot on block rotation, it is possible to load a snapshot at txn `n` when a block rotation happened at txn `n+k`. When serving queries, FrostDB could return duplicate data for writes with txn ids `<= n` since the same row could be both in-memory (originating from the snapshot) and in block storage (previously persisted).  Triggering snapshots on block rotation will not include the rotated block. When the snapshot is loaded during recovery, persisted data will not be reflected in the snapshot.
+
+However, there might be cases where the latest snapshot loaded during recovery is at a txn less than the block rotation. For example, the vm might be preempted after persisting a block, but before a snapshot can be written to disk. To handle these cases, the WAL replay code will have take into account that the replay is happening against a non-empty database loaded from a snapshot at the same txn the WAL replay started at. Given a table block persistence WAL entry indicates successful persistence of the table block, it is safe to ignore the snapshot data for that table, so the active table block index will be reset, deleting all data in the in-memory table and inserting only the WAL records starting from the snapshot txn.
+
+## Future work
+
+### Database size trigger
+
+For a first version, the easiest snapshot trigger was to look at the block size on insert, similar to how block rotation is triggered. Since snapshots are done at the database granularity, it would be more correct to trigger a snapshot based on the sum of all the table sizes.
+
+### Snapshot compression
+
+Currently, snapshot data regions are not compressed. It's unclear whether file sizes will be reduced much given existing parquet data compression, but it is something to be explored if file sizes become too large.
+
+## Alternatives
+
+### Improving the WAL
+
+Another approach to solving the [problems](#problem) outlined at the start of this document is to:
+- Persist blocks based on time as well as byte size. This should solve the WAL size problem by increasing the minimum persisted transaction across all tables.
+- Improve insert performance to reduce WAL replay time.
+
+Time-based block persistence was discarded given that snapshots solved this problem as well as the performance problem while not introducing more complexity to block persistence.
+
+Improving insert performance is something we should do regardless of WAL replay times. However, WAL replay times are fundamentally a function of the number of inserts performed against the database, while snapshot load times are a function of the number of parts independent of the number of inserts and avoid having to perform index searches on each insert. This is something that we can't get around with inserts given that we need to support inserting arbitrary data. Thus, snapshots still seemed like the better solution.
\ No newline at end of file
diff --git a/dst/.gitignore b/dst/.gitignore
new file mode 100644
index 000000000..19e1bced9
--- /dev/null
+++ b/dst/.gitignore
@@ -0,0 +1 @@
+*.wasm
diff --git a/dst/README.MD b/dst/README.MD
new file mode 100644
index 000000000..3db5b0347
--- /dev/null
+++ b/dst/README.MD
@@ -0,0 +1,31 @@
+# DST
+
+This folder is where FrostDB's deterministic simulation test workload (see `dst_test.go`) and runtime are stored.
+
+## Architecture
+
+Read the [blog post on DST in FrostDB](https://www.polarsignals.com/blog/posts/2024/05/28/mostly-dst-in-go) for a general background. In short, the deterministic simulation test is run as a workload against frostdb in `TestDST`. Determinism is achieved by using a custom go runtime to compile the test and subsequently using `wazero` with a special config (see the `runtime` folder) seeded with the `GORANDSEED` variable.
+
+## How to run
+
+### Prerequisites
+
+- [github.com/polarsignals/go](github.com/polarsignals/go): download the repository and run `./make.bash` in the `src` directory to build the custom go runtime.
+
+### Compile the test to a WASM module
+
+Compile the test with the custom go runtime. First set the `GOROOT` environment variable to the path of the custom go runtime and then compile (without running) the test binary to wasm.
+
+```bash
+export GOROOT=/path/to/polarsignals/go
+GOOS=wasip1 GOARCH=wasm $GOROOT/bin/go test -tags=faketime -c -o dst/dst.wasm ./dst
+```
+
+### Run the test using the pre-configured wazero runtime
+
+See `runtime/run.go` for the runtime configuration. The `GORANDSEED` environment variable is used to seed the custom go runtime so must be provided for tests. Note the `-test.v` flag is used to run the test in verbose mode. Any go testing flags can be added here at your convenience.
+
+```bash
+go build ./dst/runtime
+GORANDSEED=<seed> ./runtime --module=./dst/dst.wasm -test.v
+```
diff --git a/dst/dst_test.go b/dst/dst_test.go
new file mode 100644
index 000000000..a496fd52c
--- /dev/null
+++ b/dst/dst_test.go
@@ -0,0 +1,578 @@
+package dst
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"math/rand"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/ipc"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/go-kit/log"
+	"github.com/go-kit/log/level"
+	"github.com/google/uuid"
+	internalWal "github.com/polarsignals/wal"
+	"github.com/polarsignals/wal/types"
+	"github.com/stretchr/testify/require"
+	"github.com/thanos-io/objstore"
+	"go.uber.org/goleak"
+	"golang.org/x/sync/errgroup"
+
+	"github.com/polarsignals/frostdb"
+	walpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/wal/v1alpha1"
+	"github.com/polarsignals/frostdb/index"
+	"github.com/polarsignals/frostdb/query"
+	"github.com/polarsignals/frostdb/samples"
+	"github.com/polarsignals/frostdb/wal"
+)
+
+const (
+	randomSeedKey = "GORANDSEED"
+	numCommands   = 512
+	dbName        = "test"
+	tableName     = "test"
+)
+
+type command int
+
+const (
+	insert command = iota
+	compact
+	snapshot
+	rotate
+	restart
+)
+
+func (c command) String() string {
+	switch c {
+	case insert:
+		return "insert"
+	case compact:
+		return "compact"
+	case snapshot:
+		return "snapshot"
+	case rotate:
+		return "rotate"
+	case restart:
+		return "restart"
+	default:
+		return "<unknown>"
+	}
+}
+
+var commands = []command{insert, compact, snapshot, rotate, restart}
+
+// probabilities are command probabilities. It is not strictly necessary that
+// these sum to 1.
+var probabilities = map[command]float64{
+	insert:   0.75,
+	compact:  0.25,
+	snapshot: 0.1,
+	rotate:   0.05,
+	restart:  0.01,
+}
+
+var cumulativeProbabilities []float64
+
+func init() {
+	var sum float64
+	for _, p := range probabilities {
+		sum += p
+		cumulativeProbabilities = append(cumulativeProbabilities, sum)
+	}
+}
+
+func genCommand() command {
+	f := rand.Float64()
+	// Normalize f so it falls within a range.
+	f *= cumulativeProbabilities[len(cumulativeProbabilities)-1]
+	for i, p := range cumulativeProbabilities {
+		if f < p {
+			return commands[i]
+		}
+	}
+	// Should never reach here unless rounding error, but return an insert.
+	return insert
+}
+
+type int64checksum struct {
+	sum        int64
+	count      int64
+	timestamps []int64
+}
+
+func (c *int64checksum) add(v int64) {
+	c.sum ^= v
+	c.count++
+	c.timestamps = append(c.timestamps, v)
+}
+
+type tableProvider struct {
+	table atomic.Pointer[frostdb.Table]
+}
+
+func (t *tableProvider) GetTable() *frostdb.Table {
+	return t.table.Load()
+}
+
+func (t *tableProvider) Update(table *frostdb.Table) {
+	t.table.Store(table)
+}
+
+// walTickProbability is the probability that a WAL tick will occur after a
+// write, this simulates the passing of real time between WAL batch writes.
+const walTickProbability = 0.25
+
+// writerHelper is used concurrently to write to a table.
+type writerHelper struct {
+	logger       log.Logger
+	tp           *tableProvider
+	walTicker    *fakeTicker
+	timestampSum struct {
+		sync.Mutex
+		int64checksum
+	}
+}
+
+func genRandomLabels() map[string]string {
+	var (
+		labelKeys = []string{"label1", "label2", "label3"}
+		labelVals = []string{"val1", "val2", "val3"}
+	)
+	labels := make(map[string]string)
+	for i := 0; i < rand.Intn(len(labelKeys)); i++ {
+		labels[labelKeys[i]] = labelVals[rand.Intn(len(labelVals))]
+	}
+	return labels
+}
+
+type fakeTicker struct {
+	c chan time.Time
+}
+
+func (t *fakeTicker) C() <-chan time.Time {
+	return t.c
+}
+
+func (t *fakeTicker) Stop() {}
+
+func (w *writerHelper) write(ctx context.Context) (uint64, error) {
+	types := []string{"cpu", "memory", "block", "mutex"}
+
+	// Note: UUID is not really interesting since it takes the current time,
+	// which we control via the -faketime flag.
+	genUUID, err := uuid.NewUUID()
+	if err != nil {
+		return 0, err
+	}
+
+	smpls := make([]samples.Sample, rand.Intn(16)+1)
+	for i := range smpls {
+		smpls[i] = samples.Sample{
+			ExampleType: types[rand.Intn(len(types))],
+			Labels:      genRandomLabels(),
+			Stacktrace:  []uuid.UUID{genUUID},
+			Timestamp:   rand.Int63(),
+			Value:       rand.Int63(),
+		}
+	}
+
+	record, err := samples.Samples(smpls).ToRecord()
+	if err != nil {
+		return 0, err
+	}
+
+	tx, err := w.tp.GetTable().InsertRecord(ctx, record)
+	if err != nil {
+		return 0, err
+	}
+
+	timestamps := make([]int64, 0, len(smpls))
+	for i := range smpls {
+		timestamps = append(timestamps, smpls[i].Timestamp)
+		w.timestampSum.add(smpls[i].Timestamp)
+	}
+
+	if rand.Float64() < walTickProbability {
+		// Nonblocking write to channel.
+		select {
+		case w.walTicker.c <- time.Now():
+		default:
+		}
+	}
+	level.Info(w.logger).Log("msg", "write complete", "txn", tx, "rows", len(smpls), "timestamps", fmt.Sprintf("%v", timestamps))
+	return tx, nil
+}
+
+// testLogStore wraps a LogStore that the WAL uses to write records. The main
+// purpose of this struct is to keep track of what data might be missing at the
+// end of a test run. For example, if we fail to commit log entries due to an
+// fs error, we expect this write to be lost even though no error was returned
+// to the user (WAL writes are async). The DST can also set a flag on the log
+// store to track writes that are successfully committed in special cases (e.g.
+// when a hard shutdown copies the WAL's directory from under it).
+type testLogStore struct {
+	internalWal.LogStore
+	// accOnNoError is set to true if the LogStore should accumulate logs even
+	// on successful commit.
+	accOnNoError atomic.Bool
+	// acc does not need to be protected by a mutex since StoreLogs is called
+	// synchronously.
+	acc int64checksum
+}
+
+func (s *testLogStore) accumulateLogs(logs []types.LogEntry) error {
+	for _, log := range logs {
+		walRec := &walpb.Record{}
+		if err := walRec.UnmarshalVT(log.Data); err != nil {
+			return err
+		}
+		writeInfo, ok := walRec.Entry.EntryType.(*walpb.Entry_Write_)
+		if !ok {
+			continue
+		}
+		r, err := ipc.NewReader(bytes.NewReader(writeInfo.Write.Data))
+		if err != nil {
+			panic(fmt.Errorf("should not have error when decoding log entry: %w", err))
+		}
+		for r.Next() {
+			rec := r.Record()
+			idxSlice := rec.Schema().FieldIndices("timestamp")
+			timestamps := rec.Column(idxSlice[0]).(*array.Int64)
+			for i := 0; i < timestamps.Len(); i++ {
+				s.acc.add(timestamps.Value(i))
+			}
+			rec.Release()
+		}
+		r.Release()
+	}
+	return nil
+}
+
+// droppedLogsCallback is called by the WAL when logs are dropped on WAL close
+// timeout. The timeout usually happens when there is a truncation error due to
+// a hard shutdown, so nextTx is never updated.
+func (s *testLogStore) droppedLogsCallback(logs []types.LogEntry) {
+	_ = s.accumulateLogs(logs)
+}
+
+func (s *testLogStore) StoreLogs(logs []types.LogEntry) error {
+	if err := s.LogStore.StoreLogs(logs); err != nil {
+		_ = s.accumulateLogs(logs)
+		return err
+	}
+	// Successful commit.
+	if s.accOnNoError.Load() {
+		return s.accumulateLogs(logs)
+	}
+	return nil
+}
+
+// canIgnoreError returns whether the given error can be ignored. Specifically,
+// errors returned by operations when the database is closing are not a problem.
+func canIgnoreError(err error) bool {
+	errMsg := strings.ToLower(err.Error())
+	return strings.Contains(errMsg, "closed") ||
+		strings.Contains(errMsg, "closing")
+}
+
+func newStore(
+	storageDir string,
+	logger log.Logger,
+	objectStorage *frostdb.DefaultObjstoreBucket,
+	newLogStoreWrapper func(internalWal.LogStore) internalWal.LogStore,
+	droppedLogsCallback func([]types.LogEntry),
+	fakeTicker wal.Ticker,
+) (*frostdb.ColumnStore, error) {
+	return frostdb.New(
+		frostdb.WithStoragePath(storageDir),
+		frostdb.WithLogger(logger),
+		frostdb.WithWAL(),
+		frostdb.WithManualBlockRotation(),
+		frostdb.WithReadWriteStorage(objectStorage),
+		frostdb.WithIndexConfig(
+			[]*index.LevelConfig{
+				{Level: index.L0, MaxSize: 1024 * 1024 * 15, Type: index.CompactionTypeParquetDisk},
+				{Level: index.L1, MaxSize: 1024 * 1024 * 128, Type: index.CompactionTypeParquetDisk},
+				{Level: index.L2, MaxSize: 1024 * 1024 * 512},
+			},
+		),
+		// SnapshotTriggerSize enables snapshots. However, the size in question
+		// is set to be very large in order to trigger snapshots manually.
+		frostdb.WithSnapshotTriggerSize(math.MaxInt64),
+		frostdb.WithTestingOptions(
+			frostdb.WithTestingWalOptions(
+				wal.WithTestingLogStoreWrapper(newLogStoreWrapper),
+				wal.WithTestingLoopTicker(fakeTicker),
+				wal.WithTestingCallbackWithDroppedLogsOnClose(droppedLogsCallback),
+			),
+		),
+	)
+}
+
+type testOutput struct {
+	t testing.TB
+}
+
+func (l *testOutput) Write(p []byte) (n int, err error) {
+	l.t.Helper()
+	l.t.Log(string(p))
+	return len(p), nil
+}
+
+func newTestLogger(t testing.TB) log.Logger {
+	t.Helper()
+	logger := log.NewLogfmtLogger(log.NewSyncWriter(&testOutput{t: t}))
+	logger = level.NewFilter(logger, level.AllowDebug())
+	return logger
+}
+
+// Remove unused warnings for now.
+var (
+	_ = vfsShutdown
+	_ = vfsRestart
+)
+
+// TestDST runs deterministic simulation tests against FrostDB. For true
+// determinism and reproducibility, this test needs to be run with
+// GORANDSEED set, the modified go runtime found at github.com/polarsignals/go,
+// and GOOS=wasip1 GOARCH=wasm.
+func TestDST(t *testing.T) {
+	if os.Getenv(randomSeedKey) == "" {
+		t.Skipf("%s not set, skipping deterministic simulation tests", randomSeedKey)
+	}
+
+	t.Log("Running DST using random seed:", os.Getenv(randomSeedKey))
+	logger := newTestLogger(t)
+	objectStorage := frostdb.NewDefaultObjstoreBucket(
+		objstore.NewInMemBucket(), frostdb.StorageWithLogger(log.WithPrefix(logger, "component", "objstore")),
+	)
+	storageDir := t.TempDir()
+	logStoreWrapper := &testLogStore{}
+	walTicker := &fakeTicker{c: make(chan time.Time, 1)}
+	storeID := 0
+	c, err := newStore(
+		storageDir, log.WithPrefix(logger, "storeID", storeID), objectStorage, func(logStore internalWal.LogStore) internalWal.LogStore {
+			logStoreWrapper.LogStore = logStore
+			return logStoreWrapper
+		}, logStoreWrapper.droppedLogsCallback, walTicker,
+	)
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	var db atomic.Pointer[frostdb.DB]
+	{
+		// Separate scope to avoid table pointer misuse.
+		newDB, err := c.DB(ctx, dbName)
+		require.NoError(t, err)
+		db.Store(newDB)
+	}
+
+	tp := &tableProvider{}
+	tableConfig := frostdb.NewTableConfig(samples.SampleDefinition())
+	{
+		// Separate scope to avoid table pointer misuse.
+		table, err := db.Load().Table(tableName, tableConfig)
+		require.NoError(t, err)
+		tp.Update(table)
+	}
+
+	t.Log("DB initialized, starting commands")
+	w := &writerHelper{logger: logger, tp: tp, walTicker: walTicker}
+	writeAndWait := func() error {
+		tx, err := w.write(ctx)
+		if err != nil {
+			return err
+		}
+		db.Load().Wait(tx)
+		return nil
+	}
+
+	errg := &errgroup.Group{}
+	errg.SetLimit(32)
+	commandDistribution := make(map[command]int)
+
+	ignoreGoroutinesAtStartOfTest := goleak.IgnoreCurrent()
+	for i := 0; i < numCommands; i++ {
+		cmd := genCommand()
+		commandDistribution[cmd]++
+		switch cmd {
+		case insert:
+			errg.Go(func() error {
+				// This is a hack to ensure some randomized goroutine
+				// scheduling.
+				// TODO(asubiotto): Figure out if we still need this.
+				time.Sleep(1 * time.Millisecond)
+				if _, err := w.write(ctx); err != nil && !canIgnoreError(err) {
+					return fmt.Errorf("insert error: %s", err)
+				}
+				return nil
+			})
+		case compact:
+			errg.Go(func() error {
+				// TODO(asubiotto): Maybe we should offer a finer-grained way to
+				// trigger leveled compaction.
+				if err := tp.GetTable().EnsureCompaction(); err != nil && !errors.Is(err, io.EOF) {
+					return fmt.Errorf("compaction error: %w", err)
+				}
+				return nil
+			})
+		case rotate:
+			errg.Go(func() error {
+				table := tp.GetTable()
+				if err := table.RotateBlock(ctx, table.ActiveBlock()); err != nil {
+					return fmt.Errorf("rotate error: %s", err)
+				}
+				return nil
+			})
+		case snapshot:
+			errg.Go(func() error {
+				if err := db.Load().Snapshot(ctx); err != nil {
+					return fmt.Errorf("snapshot err: %w", err)
+				}
+				return nil
+			})
+		case restart:
+			// This is a hack to ensure some goroutines are scheduled before
+			// this restart.
+			// TODO(asubiotto): Figure out if we still need this.
+			time.Sleep(1 * time.Millisecond)
+			// Graceful shutdown.
+			require.NoError(t, c.Close())
+			_ = errg.Wait()
+
+			// Unfortunately frostdb doesn't have goroutine lifecycle management
+			// and adding it could lead to subtle issues (e.g. on Close with
+			// many DBs). Instead, this test simply verifies all goroutines
+			// spawned up until this restart eventually exit after n retries.
+			const maxRetries = 10
+			for i := 0; i < maxRetries; i++ {
+				if err := goleak.Find(ignoreGoroutinesAtStartOfTest); err == nil {
+					break
+				} else if i == maxRetries-1 {
+					t.Fatalf("leaked goroutines found on Close: %v", err)
+				} else {
+					time.Sleep(1 * time.Millisecond)
+				}
+			}
+
+			storeID++
+			c, err = newStore(
+				storageDir,
+				log.WithPrefix(logger, "storeID", storeID), objectStorage, func(logStore internalWal.LogStore) internalWal.LogStore {
+					logStoreWrapper.LogStore = logStore
+					return logStoreWrapper
+				}, logStoreWrapper.droppedLogsCallback, walTicker,
+			)
+			require.NoError(t, err)
+			newDB, err := c.DB(ctx, dbName)
+			require.NoError(t, err)
+			table, err := newDB.Table(tableName, tableConfig)
+			require.NoError(t, err)
+			db.Store(newDB)
+			tp.Update(table)
+			_, err = w.write(ctx)
+			// This write should succeed.
+			require.NoError(t, err)
+		}
+	}
+
+	// Wait for all requests to complete.
+	require.NoError(t, errg.Wait())
+
+	t.Logf("All commands completed. Command distribution:\n%v\nIssuing write and waiting for high watermark.", commandDistribution)
+
+	// Perform a write and wait for the high watermark to ensure all writes have
+	// been committed.
+	require.NoError(t, writeAndWait())
+
+	t.Log("Verifying data integrity.")
+
+	listFiles := func(dir string) string {
+		de, err := os.ReadDir(filepath.Join(storageDir, "databases", dbName, dir))
+		require.NoError(t, err)
+		var files []string
+		for _, e := range de {
+			files = append(files, e.Name())
+		}
+		return strings.Join(files, " ")
+	}
+	t.Log("Index files:", listFiles(filepath.Join("index", tableName)))
+	t.Log("snapshot files:", listFiles("snapshots"))
+	t.Log("WAL files:", listFiles("wal"))
+
+	// Defer a close here. This is not done at the start of the test because
+	// the test run itself may close the store.
+	defer c.Close()
+
+	timestampSum := &int64checksum{}
+	readTimestamps := make(map[int64]int)
+	expectedTimestamps := make(map[int64]struct{})
+	for _, v := range w.timestampSum.timestamps {
+		expectedTimestamps[v] = struct{}{}
+	}
+	require.NoError(
+		t,
+		query.NewEngine(
+			memory.DefaultAllocator,
+			db.Load().TableProvider(),
+		).ScanTable(tableName).Execute(ctx, func(_ context.Context, r arrow.Record) error {
+			idxSlice := r.Schema().FieldIndices("timestamp")
+			require.Equal(t, 1, len(idxSlice))
+			timestamps := r.Column(idxSlice[0]).(*array.Int64)
+			for i := 0; i < timestamps.Len(); i++ {
+				require.False(t, timestamps.IsNull(i))
+				timestampSum.add(timestamps.Value(i))
+				readTimestamps[timestamps.Value(i)]++
+				delete(expectedTimestamps, timestamps.Value(i))
+			}
+			return nil
+		}),
+	)
+
+	nonUnique := make([]int64, 0)
+	for k, v := range readTimestamps {
+		if v > 1 {
+			nonUnique = append(nonUnique, k)
+		}
+	}
+
+	if w.timestampSum.count != timestampSum.count {
+		if w.timestampSum.count < timestampSum.count {
+			// Duplicate data found.
+			t.Fatalf(
+				"too many rows read. wrote %d and found %d. timestamps found more than once: %v",
+				w.timestampSum.count, timestampSum.count, nonUnique,
+			)
+		}
+
+		// Drain currently pending WAL entries by closing.
+		require.NoError(t, c.Close())
+
+		// Delete the timestamps that were not committed over the lifetime of
+		// the test. These were lost.
+		for _, v := range logStoreWrapper.acc.timestamps {
+			delete(expectedTimestamps, v)
+		}
+
+		require.Zero(
+			t,
+			len(expectedTimestamps),
+			"number of rows mismatch, wrote %d, and read %d. %d could not be committed to WAL\n"+
+				"timestamps that were expected and not found: %v\n",
+			w.timestampSum.count, timestampSum.count, logStoreWrapper.acc.count, expectedTimestamps,
+		)
+	}
+}
diff --git a/dst/runtime/.gitignore b/dst/runtime/.gitignore
new file mode 100644
index 000000000..e4d05d91a
--- /dev/null
+++ b/dst/runtime/.gitignore
@@ -0,0 +1 @@
+runtime
diff --git a/dst/runtime/main.go b/dst/runtime/main.go
new file mode 100644
index 000000000..fa1d769d4
--- /dev/null
+++ b/dst/runtime/main.go
@@ -0,0 +1,47 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/spf13/cobra"
+)
+
+const (
+	modulePathFlagName = "module"
+)
+
+var rootCmd = &cobra.Command{
+	Use: "runtime --module=<path> [module_args]",
+	RunE: func(cmd *cobra.Command, _ []string) error {
+		modulePath, err := cmd.Flags().GetString(modulePathFlagName)
+		if err != nil {
+			return err
+		}
+		strippedArgs := make([]string, 0, len(os.Args))
+		for _, arg := range os.Args {
+			if strings.HasPrefix(arg, "--"+modulePathFlagName+"=") {
+				continue
+			}
+			strippedArgs = append(strippedArgs, arg)
+		}
+		os.Args = strippedArgs
+		return run(modulePath)
+	},
+}
+
+func init() {
+	rootCmd.Flags().String(modulePathFlagName, "", "path to the module (.wasm file) to run")
+	if err := rootCmd.MarkFlagRequired(modulePathFlagName); err != nil {
+		panic(err)
+	}
+}
+
+// main is the entry point to run dst_test.go with a wazero runtime.
+func main() {
+	if err := rootCmd.Execute(); err != nil {
+		fmt.Println(fmt.Errorf("error running wasm: %w", err))
+		os.Exit(1)
+	}
+}
diff --git a/dst/runtime/run.go b/dst/runtime/run.go
new file mode 100644
index 000000000..6738a1118
--- /dev/null
+++ b/dst/runtime/run.go
@@ -0,0 +1,75 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+
+	"github.com/tetratelabs/wazero"
+	"github.com/tetratelabs/wazero/experimental/sysfs"
+	"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
+
+	"github.com/polarsignals/frostdb/dst/vfs"
+)
+
+const (
+	randSeedEnvVar      = "GORANDSEED"
+	compilationCacheDir = "/tmp"
+)
+
+func run(modulePath string) error {
+	randSeed := os.Getenv(randSeedEnvVar)
+	if randSeed == "" {
+		return fmt.Errorf("%s is not set", randSeedEnvVar)
+	}
+
+	cc, err := wazero.NewCompilationCacheWithDir(compilationCacheDir)
+	if err != nil {
+		return fmt.Errorf("creating compilation cache: %w", err)
+	}
+
+	runtimeConfig := wazero.NewRuntimeConfig().
+		// Enable debug info for better stack traces.
+		WithDebugInfoEnabled(true).
+		// Cache compilations to speed up subsequent runs.
+		WithCompilationCache(cc)
+
+	ctx := context.Background()
+	r := wazero.NewRuntimeWithConfig(ctx, runtimeConfig)
+	defer r.Close(ctx)
+
+	config := wazero.NewModuleConfig().
+		WithEnv(randSeedEnvVar, randSeed).
+		WithStdin(os.Stdin).
+		WithStdout(os.Stdout).
+		WithStderr(os.Stderr).
+		// Mount filesystem. This is taken from wazero's CLI implementation.
+		WithFSConfig(wazero.NewFSConfig().(sysfs.FSConfig).WithSysFSMount(vfs.New("/"), "/")).
+		// All these time-related configuration options are to allow the module
+		// to access "real" time on the host. We could use this as a source of
+		// determinisme, but we currently compile the module with -faketime
+		// which allows us to virtually speed up time with sleeping goroutines.
+		// We could eventually revisit this, but this is fine for now.
+		WithSysNanosleep().
+		WithSysNanotime().
+		WithSysWalltime().
+		WithArgs(os.Args...)
+
+	vfs.MustInstantiate(ctx, r)
+
+	moduleBytes, err := os.ReadFile(modulePath)
+	if err != nil {
+		return fmt.Errorf("reading module: %w", err)
+	}
+
+	compiledModule, err := r.CompileModule(ctx, moduleBytes)
+	if err != nil {
+		return fmt.Errorf("compiling module: %w", err)
+	}
+
+	wasi_snapshot_preview1.MustInstantiate(ctx, r)
+	if _, err := r.InstantiateModule(ctx, compiledModule, config); err != nil {
+		return fmt.Errorf("instantiating module: %w", err)
+	}
+	return nil
+}
diff --git a/dst/vfs/file.go b/dst/vfs/file.go
new file mode 100644
index 000000000..a9a457b4f
--- /dev/null
+++ b/dst/vfs/file.go
@@ -0,0 +1,135 @@
+package vfs
+
+import (
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+type file struct {
+	// internal is the underlying file system to delegate to. This is
+	// purposefully not embedded so that any new methods need to be explicitly
+	// added.
+	internal experimentalsys.File
+}
+
+var _ experimentalsys.File = (*file)(nil)
+
+func newFile(f experimentalsys.File) *file {
+	return &file{internal: f}
+}
+
+func (f *file) Dev() (uint64, experimentalsys.Errno) {
+	if isShutdown {
+		return 0, experimentalsys.EIO
+	}
+	return f.internal.Dev()
+}
+
+func (f *file) Ino() (sys.Inode, experimentalsys.Errno) {
+	if isShutdown {
+		return 0, experimentalsys.EIO
+	}
+	return f.internal.Ino()
+}
+
+func (f *file) IsDir() (bool, experimentalsys.Errno) {
+	if isShutdown {
+		return false, experimentalsys.EIO
+	}
+	return f.internal.IsDir()
+}
+
+func (f *file) IsAppend() bool {
+	return f.internal.IsAppend()
+}
+
+func (f *file) SetAppend(enable bool) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return f.internal.SetAppend(enable)
+}
+
+func (f *file) Stat() (sys.Stat_t, experimentalsys.Errno) {
+	if isShutdown {
+		return sys.Stat_t{}, experimentalsys.EIO
+	}
+	return f.internal.Stat()
+}
+
+func (f *file) Read(buf []byte) (n int, errno experimentalsys.Errno) {
+	if isShutdown {
+		return 0, experimentalsys.EIO
+	}
+	return f.internal.Read(buf)
+}
+
+func (f *file) Pread(buf []byte, off int64) (n int, errno experimentalsys.Errno) {
+	if isShutdown {
+		return 0, experimentalsys.EIO
+	}
+	return f.internal.Pread(buf, off)
+}
+
+func (f *file) Seek(offset int64, whence int) (newOffset int64, errno experimentalsys.Errno) {
+	if isShutdown {
+		return 0, experimentalsys.EIO
+	}
+	return f.internal.Seek(offset, whence)
+}
+
+func (f *file) Readdir(n int) (dirents []experimentalsys.Dirent, errno experimentalsys.Errno) {
+	if isShutdown {
+		return nil, experimentalsys.EIO
+	}
+	return f.internal.Readdir(n)
+}
+
+func (f *file) Write(buf []byte) (n int, errno experimentalsys.Errno) {
+	if isShutdown {
+		return 0, experimentalsys.EIO
+	}
+	return f.internal.Write(buf)
+}
+
+func (f *file) Pwrite(buf []byte, off int64) (n int, errno experimentalsys.Errno) {
+	if isShutdown {
+		return 0, experimentalsys.EIO
+	}
+	return f.internal.Pwrite(buf, off)
+}
+
+func (f *file) Truncate(size int64) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return f.internal.Truncate(size)
+}
+
+func (f *file) Sync() experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return f.internal.Sync()
+}
+
+func (f *file) Datasync() experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return f.internal.Datasync()
+}
+
+func (f *file) Utimens(atim, mtim int64) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return f.internal.Utimens(atim, mtim)
+}
+
+func (f *file) Close() experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return f.internal.Close()
+}
diff --git a/dst/vfs/fs.go b/dst/vfs/fs.go
new file mode 100644
index 000000000..299f802aa
--- /dev/null
+++ b/dst/vfs/fs.go
@@ -0,0 +1,111 @@
+package vfs
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/experimental/sysfs"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+type dstfs struct {
+	// internal is the underlying file system to delegate to. This is
+	// purposefully not embedded so that any new methods need to be explicitly
+	// added.
+	internal experimentalsys.FS
+}
+
+var _ experimentalsys.FS = (*dstfs)(nil)
+
+func New(dir string) experimentalsys.FS {
+	return &dstfs{internal: sysfs.DirFS(dir)}
+}
+
+func (d *dstfs) OpenFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (experimentalsys.File, experimentalsys.Errno) {
+	if isShutdown {
+		return nil, experimentalsys.EIO
+	}
+
+	f, err := d.internal.OpenFile(path, flag, perm)
+	if err != 0 {
+		return nil, err
+	}
+	return newFile(f), 0
+}
+
+func (d *dstfs) Lstat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	if isShutdown {
+		return sys.Stat_t{}, experimentalsys.EIO
+	}
+	return d.internal.Lstat(path)
+}
+
+func (d *dstfs) Stat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	if isShutdown {
+		return sys.Stat_t{}, experimentalsys.EIO
+	}
+	return d.internal.Stat(path)
+}
+
+func (d *dstfs) Mkdir(path string, perm fs.FileMode) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return d.internal.Mkdir(path, perm)
+}
+
+func (d *dstfs) Chmod(path string, perm fs.FileMode) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return d.internal.Chmod(path, perm)
+}
+
+func (d *dstfs) Rename(from, to string) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return d.internal.Rename(from, to)
+}
+
+func (d *dstfs) Rmdir(path string) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return d.internal.Rmdir(path)
+}
+
+func (d *dstfs) Unlink(path string) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return d.internal.Unlink(path)
+}
+
+func (d *dstfs) Link(oldPath, newPath string) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return d.internal.Link(oldPath, newPath)
+}
+
+func (d *dstfs) Symlink(oldPath, linkName string) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return d.internal.Symlink(oldPath, linkName)
+}
+
+func (d *dstfs) Readlink(path string) (string, experimentalsys.Errno) {
+	if isShutdown {
+		return "", experimentalsys.EIO
+	}
+	return d.internal.Readlink(path)
+}
+
+func (d *dstfs) Utimens(path string, atim, mtim int64) experimentalsys.Errno {
+	if isShutdown {
+		return experimentalsys.EIO
+	}
+	return d.internal.Utimens(path, atim, mtim)
+}
diff --git a/dst/vfs/wasm.go b/dst/vfs/wasm.go
new file mode 100644
index 000000000..410f537aa
--- /dev/null
+++ b/dst/vfs/wasm.go
@@ -0,0 +1,28 @@
+package vfs
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero"
+)
+
+const wasmModuleName = "vfs"
+
+var isShutdown = false
+
+func shutdown() {
+	isShutdown = true
+}
+
+func restart() {
+	isShutdown = false
+}
+
+func MustInstantiate(ctx context.Context, r wazero.Runtime) {
+	if _, err := r.NewHostModuleBuilder(wasmModuleName).
+		NewFunctionBuilder().WithFunc(shutdown).Export("shutdown").
+		NewFunctionBuilder().WithFunc(restart).Export("restart").
+		Instantiate(ctx); err != nil {
+		panic(err)
+	}
+}
diff --git a/dst/vfs_hooks_nowasm.go b/dst/vfs_hooks_nowasm.go
new file mode 100644
index 000000000..a6a1fba91
--- /dev/null
+++ b/dst/vfs_hooks_nowasm.go
@@ -0,0 +1,7 @@
+//go:build !wasm
+
+package dst
+
+func vfsShutdown() {}
+
+func vfsRestart() {}
diff --git a/dst/vfs_hooks_wasm.go b/dst/vfs_hooks_wasm.go
new file mode 100644
index 000000000..177c5d32b
--- /dev/null
+++ b/dst/vfs_hooks_wasm.go
@@ -0,0 +1,9 @@
+//go:build wasm
+
+package dst
+
+//go:wasmimport vfs shutdown
+func vfsShutdown()
+
+//go:wasmimport vfs restart
+func vfsRestart()
diff --git a/dynparquet/concat.go b/dynparquet/concat.go
index b3af5ee03..784dc1471 100644
--- a/dynparquet/concat.go
+++ b/dynparquet/concat.go
@@ -1,6 +1,6 @@
 package dynparquet
 
-import "github.com/segmentio/parquet-go"
+import "github.com/parquet-go/parquet-go"
 
 type concatenatedDynamicRowGroup struct {
 	parquet.RowGroup
diff --git a/dynparquet/dynamiccolumns.go b/dynparquet/dynamiccolumns.go
index fb2425517..db3bb069a 100644
--- a/dynparquet/dynamiccolumns.go
+++ b/dynparquet/dynamiccolumns.go
@@ -10,41 +10,58 @@ var ErrMalformedDynamicColumns = errors.New("malformed dynamic columns string")
 
 func serializeDynamicColumns(dynamicColumns map[string][]string) string {
 	names := make([]string, 0, len(dynamicColumns))
-	for name := range dynamicColumns {
+	var size int
+	for name, cols := range dynamicColumns {
 		names = append(names, name)
+		size += len(name) +
+			2 // separators
+		for i := range cols {
+			size += len(cols[i]) + 1
+		}
 	}
 	sort.Strings(names)
-
-	str := ""
+	var str strings.Builder
+	str.Grow(size)
 	for i, name := range names {
 		if i != 0 {
-			str += ";"
+			str.WriteByte(';')
+		}
+		str.WriteString(name)
+		str.WriteByte(':')
+		for j := range dynamicColumns[name] {
+			if j != 0 {
+				str.WriteByte(',')
+			}
+			str.WriteString(dynamicColumns[name][j])
 		}
-		str += name + ":" + strings.Join(dynamicColumns[name], ",")
 	}
-
-	return str
+	return str.String()
 }
 
-func deserializeDynamicColumns(dynColString string) (map[string][]string, error) {
+func deserializeDynamicColumns(columns string) (map[string][]string, error) {
 	dynCols := map[string][]string{}
 
 	// handle case where the schema has no dynamic columnns
-	if len(dynColString) == 0 {
+	if len(columns) == 0 {
 		return dynCols, nil
 	}
-
-	for _, dynString := range strings.Split(dynColString, ";") {
-		split := strings.Split(dynString, ":")
-		if len(split) != 2 {
+	var column string
+	for {
+		if columns == "" {
+			return dynCols, nil
+		}
+		column, columns, _ = strings.Cut(columns, ";")
+		name, labels, ok := strings.Cut(column, ":")
+		if !ok {
 			return nil, ErrMalformedDynamicColumns
 		}
-		labelValues := strings.Split(split[1], ",")
-		if len(labelValues) == 1 && labelValues[0] == "" {
-			labelValues = []string{}
+		values := make([]string, 0, strings.Count(labels, ","))
+
+		var label string
+		for labels != "" {
+			label, labels, _ = strings.Cut(labels, ",")
+			values = append(values, label)
 		}
-		dynCols[split[0]] = labelValues
+		dynCols[name] = values
 	}
-
-	return dynCols, nil
 }
diff --git a/dynparquet/dynamiccolumns_test.go b/dynparquet/dynamiccolumns_test.go
index a5a9927eb..acbb2a787 100644
--- a/dynparquet/dynamiccolumns_test.go
+++ b/dynparquet/dynamiccolumns_test.go
@@ -52,3 +52,28 @@ func TestDynamicColumnsDeserialization_NoDynamicColumns(t *testing.T) {
 	expected := map[string][]string{}
 	require.Equal(t, expected, output)
 }
+
+func BenchmarkDeserialization(b *testing.B) {
+	input := "labels:__name__;pprof_labels:;pprof_num_labels:bytes"
+	for i := 0; i < b.N; i++ {
+		_, _ = deserializeDynamicColumns(input)
+	}
+}
+
+func BenchmarkSerialize(b *testing.B) {
+	input := map[string][]string{
+		"labels": {
+			"container",
+			"namespace",
+			"node",
+			"pod",
+		},
+		"pprof_labels": {
+			"profile_id",
+		},
+	}
+
+	for i := 0; i < b.N; i++ {
+		_ = serializeDynamicColumns(input)
+	}
+}
diff --git a/dynparquet/example.go b/dynparquet/example.go
index 345621d8c..8f6df4b8b 100644
--- a/dynparquet/example.go
+++ b/dynparquet/example.go
@@ -1,37 +1,27 @@
 package dynparquet
 
 import (
-	"fmt"
-	"sort"
-	"testing"
+	"github.com/parquet-go/parquet-go"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
-	"github.com/google/uuid"
-	"github.com/segmentio/parquet-go"
-	"github.com/stretchr/testify/require"
-
-	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
-	schemav2pb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha2"
+	"github.com/polarsignals/frostdb/samples"
 )
 
-type Label struct {
-	Name  string
-	Value string
-}
-
-type Sample struct {
-	ExampleType string
-	Labels      []Label
-	Stacktrace  []uuid.UUID
-	Timestamp   int64
-	Value       int64
-}
+type (
+	Samples = samples.Samples
+	Sample  = samples.Sample
+)
 
-type Samples []Sample
+var (
+	SampleDefinition          = samples.SampleDefinition
+	NewTestSamples            = samples.NewTestSamples
+	PrehashedSampleDefinition = samples.PrehashedSampleDefinition
+	SampleDefinitionWithFloat = samples.SampleDefinitionWithFloat
+	NewNestedSampleSchema     = samples.NewNestedSampleSchema
+	LabelColumn               = samples.LabelColumn
+	GenerateTestSamples       = samples.GenerateTestSamples
+)
 
-func (s Samples) ToBuffer(schema *Schema) (*Buffer, error) {
+func ToBuffer(s Samples, schema *Schema) (*Buffer, error) {
 	names := s.SampleLabelNames()
 
 	pb, err := schema.NewBuffer(map[string][]string{
@@ -54,160 +44,6 @@ func (s Samples) ToBuffer(schema *Schema) (*Buffer, error) {
 	return pb, nil
 }
 
-func (s Samples) ToRecord(schema *arrow.Schema) (arrow.Record, error) {
-	bld := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
-	defer bld.Release()
-
-	numLabels := len(schema.Fields()) - 4
-
-	for _, sample := range s {
-		if err := bld.Field(0).(*array.BinaryDictionaryBuilder).Append([]byte(sample.ExampleType)); err != nil {
-			return nil, fmt.Errorf("failed to append example type: %v", err)
-		}
-		for i := 0; i < numLabels; i++ {
-			found := false
-			for _, lbl := range sample.Labels {
-				if "labels."+lbl.Name == schema.Field(i+1).Name {
-					if err := bld.Field(i + 1).(*array.BinaryDictionaryBuilder).Append([]byte(lbl.Value)); err != nil {
-						return nil, fmt.Errorf("failed to append value: %v", err)
-					}
-					found = true
-					break
-				}
-			}
-
-			if !found {
-				bld.Field(i + 1).AppendNull()
-			}
-		}
-		if err := bld.Field(1 + numLabels).(*array.BinaryDictionaryBuilder).Append(ExtractLocationIDs(sample.Stacktrace)); err != nil {
-			return nil, fmt.Errorf("failed to append stacktrace: %v", err)
-		}
-		bld.Field(2 + numLabels).(*array.Int64Builder).Append(sample.Timestamp)
-		bld.Field(3 + numLabels).(*array.Int64Builder).Append(sample.Value)
-	}
-
-	return bld.NewRecord(), nil
-}
-
-func (s Samples) SampleLabelNames() []string {
-	names := []string{}
-	seen := map[string]struct{}{}
-
-	for _, sample := range s {
-		for _, label := range sample.Labels {
-			if _, ok := seen[label.Name]; !ok {
-				names = append(names, label.Name)
-				seen[label.Name] = struct{}{}
-			}
-		}
-	}
-	sort.Strings(names)
-
-	return names
-}
-
-func (s Sample) ToParquetRow(labelNames []string) parquet.Row {
-	// The order of these appends is important. Parquet values must be in the
-	// order of the schema and the schema orders columns by their names.
-
-	nameNumber := len(labelNames)
-	labelLen := len(s.Labels)
-	row := make([]parquet.Value, 0, nameNumber+3)
-
-	row = append(row, parquet.ValueOf(s.ExampleType).Level(0, 0, 0))
-
-	i, j := 0, 0
-	for i < nameNumber {
-		if labelNames[i] == s.Labels[j].Name {
-			row = append(row, parquet.ValueOf(s.Labels[j].Value).Level(0, 1, i+1))
-			i++
-			j++
-
-			if j >= labelLen {
-				for ; i < nameNumber; i++ {
-					row = append(row, parquet.ValueOf(nil).Level(0, 0, i+1))
-				}
-				break
-			}
-		} else {
-			row = append(row, parquet.ValueOf(nil).Level(0, 0, i+1))
-			i++
-		}
-	}
-	row = append(row, parquet.ValueOf(ExtractLocationIDs(s.Stacktrace)).Level(0, 0, nameNumber+1))
-	row = append(row, parquet.ValueOf(s.Timestamp).Level(0, 0, nameNumber+2))
-	row = append(row, parquet.ValueOf(s.Value).Level(0, 0, nameNumber+3))
-
-	return row
-}
-
-func ExtractLocationIDs(locs []uuid.UUID) []byte {
-	b := make([]byte, len(locs)*16) // UUID are 16 bytes thus multiply by 16
-	index := 0
-	for i := len(locs) - 1; i >= 0; i-- {
-		copy(b[index:index+16], locs[i][:])
-		index += 16
-	}
-	return b
-}
-
-func SampleDefinition() *schemapb.Schema {
-	return &schemapb.Schema{
-		Name: "test",
-		Columns: []*schemapb.Column{{
-			Name: "example_type",
-			StorageLayout: &schemapb.StorageLayout{
-				Type:     schemapb.StorageLayout_TYPE_STRING,
-				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-			},
-			Dynamic: false,
-		}, {
-			Name: "labels",
-			StorageLayout: &schemapb.StorageLayout{
-				Type:     schemapb.StorageLayout_TYPE_STRING,
-				Nullable: true,
-				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-			},
-			Dynamic: true,
-		}, {
-			Name: "stacktrace",
-			StorageLayout: &schemapb.StorageLayout{
-				Type:     schemapb.StorageLayout_TYPE_STRING,
-				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-			},
-			Dynamic: false,
-		}, {
-			Name: "timestamp",
-			StorageLayout: &schemapb.StorageLayout{
-				Type: schemapb.StorageLayout_TYPE_INT64,
-			},
-			Dynamic: false,
-		}, {
-			Name: "value",
-			StorageLayout: &schemapb.StorageLayout{
-				Type: schemapb.StorageLayout_TYPE_INT64,
-			},
-			Dynamic: false,
-		}},
-		SortingColumns: []*schemapb.SortingColumn{{
-			Name:      "example_type",
-			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
-		}, {
-			Name:       "labels",
-			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
-			NullsFirst: true,
-		}, {
-			Name:      "timestamp",
-			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
-		}, {
-			Name:       "stacktrace",
-			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
-			NullsFirst: true,
-		}},
-	}
-}
-
 func NewSampleSchema() *Schema {
 	s, err := SchemaFromDefinition(SampleDefinition())
 	if err != nil {
@@ -215,152 +51,3 @@ func NewSampleSchema() *Schema {
 	}
 	return s
 }
-
-func NewTestSamples() Samples {
-	return Samples{
-		{
-			ExampleType: "cpu",
-			Labels: []Label{{
-				Name:  "node",
-				Value: "test3",
-			}},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-			},
-			Timestamp: 2,
-			Value:     5,
-		}, {
-			ExampleType: "cpu",
-			Labels: []Label{{
-				Name:  "namespace",
-				Value: "default",
-			}, {
-				Name:  "pod",
-				Value: "test1",
-			}},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-			},
-			Timestamp: 2,
-			Value:     3,
-		}, {
-			ExampleType: "cpu",
-			Labels: []Label{{
-				Name:  "container",
-				Value: "test2",
-			}, {
-				Name:  "namespace",
-				Value: "default",
-			}},
-			Stacktrace: []uuid.UUID{
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-			},
-			Timestamp: 2,
-			Value:     3,
-		},
-	}
-}
-
-func NestedListDef(name string, layout *schemav2pb.StorageLayout) *schemav2pb.Node_Group {
-	return &schemav2pb.Node_Group{
-		Group: &schemav2pb.Group{
-			Name: name,
-			Nodes: []*schemav2pb.Node{ // NOTE that this nested group structure for a list is for backwards compatability: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
-				{
-					Type: &schemav2pb.Node_Group{
-						Group: &schemav2pb.Group{
-							Name:     "list",
-							Repeated: true,
-							Nodes: []*schemav2pb.Node{
-								{
-									Type: &schemav2pb.Node_Leaf{
-										Leaf: &schemav2pb.Leaf{
-											Name:          "element",
-											StorageLayout: layout,
-										},
-									},
-								},
-							},
-						},
-					},
-				},
-			},
-		},
-	}
-}
-
-func LabelColumn(name string) *schemav2pb.Node {
-	return &schemav2pb.Node{
-		Type: &schemav2pb.Node_Group{
-			Group: &schemav2pb.Group{
-				Name: "labels",
-				Nodes: []*schemav2pb.Node{
-					{
-						Type: &schemav2pb.Node_Leaf{
-							Leaf: &schemav2pb.Leaf{
-								Name: name,
-								StorageLayout: &schemav2pb.StorageLayout{
-									Type:     schemav2pb.StorageLayout_TYPE_STRING,
-									Nullable: true,
-									Encoding: schemav2pb.StorageLayout_ENCODING_RLE_DICTIONARY,
-								},
-							},
-						},
-					},
-				},
-			},
-		},
-	}
-}
-
-func NewNestedSampleSchema(t testing.TB) *Schema {
-	t.Helper()
-	def := &schemav2pb.Schema{
-		Root: &schemav2pb.Group{
-			Name: "nested",
-			Nodes: []*schemav2pb.Node{
-				{
-					Type: &schemav2pb.Node_Group{
-						Group: &schemav2pb.Group{
-							Name:  "labels",
-							Nodes: []*schemav2pb.Node{},
-						},
-					},
-				},
-				{
-					Type: NestedListDef("timestamps", &schemav2pb.StorageLayout{
-						Type:     schemav2pb.StorageLayout_TYPE_INT64,
-						Nullable: true,
-						Encoding: schemav2pb.StorageLayout_ENCODING_RLE_DICTIONARY,
-					}),
-				},
-				{
-					Type: NestedListDef("values", &schemav2pb.StorageLayout{
-						Type:     schemav2pb.StorageLayout_TYPE_INT64,
-						Nullable: true,
-						Encoding: schemav2pb.StorageLayout_ENCODING_RLE_DICTIONARY,
-					}),
-				},
-			},
-		},
-		SortingColumns: []*schemav2pb.SortingColumn{
-			{
-				Path:       "labels",
-				Direction:  schemav2pb.SortingColumn_DIRECTION_ASCENDING,
-				NullsFirst: true,
-			},
-			{
-				Path:      "timestamp",
-				Direction: schemav2pb.SortingColumn_DIRECTION_ASCENDING,
-			},
-		},
-	}
-
-	schema, err := SchemaFromDefinition(def)
-	require.NoError(t, err)
-
-	return schema
-}
diff --git a/dynparquet/hashed.go b/dynparquet/hashed.go
new file mode 100644
index 000000000..87fd00129
--- /dev/null
+++ b/dynparquet/hashed.go
@@ -0,0 +1,286 @@
+package dynparquet
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+	"strings"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/cespare/xxhash/v2"
+	"github.com/dgryski/go-metro"
+)
+
+const prehashedPrefix = "hashed"
+
+func HashedColumnName(col string) string {
+	return prehashedPrefix + "." + col
+}
+
+func IsHashedColumn(col string) bool {
+	return strings.HasPrefix(col, prehashedPrefix)
+}
+
+// findHashedColumn finds the index of the column in the given fields that have been prehashed.
+func FindHashedColumn(col string, fields []arrow.Field) int {
+	for j, f := range fields {
+		if HashedColumnName(col) == f.Name {
+			return j
+		}
+	}
+
+	return -1
+}
+
+// prehashColumns prehashes the columns in the given record that have been marked as prehashed in the given schema.
+func PrehashColumns(schema *Schema, r arrow.Record) arrow.Record {
+	bldr := array.NewInt64Builder(memory.DefaultAllocator) // TODO pass in allocator
+	defer bldr.Release()
+
+	fields := r.Schema().Fields()
+	additionalFields := make([]arrow.Field, 0, len(fields))
+	additionalColumns := make([]arrow.Array, 0, len(fields))
+	defer func() {
+		for _, col := range additionalColumns {
+			col.Release()
+		}
+	}()
+
+	for _, col := range schema.Columns() {
+		if !col.PreHash {
+			continue
+		}
+
+		for i, f := range fields {
+			if col.Name == f.Name || (col.Dynamic && strings.HasPrefix(f.Name, col.Name)) {
+				additionalFields = append(additionalFields, arrow.Field{
+					Name: HashedColumnName(f.Name),
+					Type: arrow.PrimitiveTypes.Int64,
+				})
+
+				// Hash the column
+				hashed := HashArray(r.Column(i))
+
+				// Build the new column
+				bldr.Reserve(len(hashed))
+				for _, v := range hashed {
+					bldr.UnsafeAppend(int64(v))
+				}
+
+				additionalColumns = append(additionalColumns, bldr.NewArray())
+			}
+		}
+	}
+
+	if len(additionalColumns) == 0 {
+		r.Retain() // NOTE: we retain here because we expect the caller to release the record that we're returning
+		return r
+	}
+
+	sch := arrow.NewSchema(append(fields, additionalFields...), nil)
+	return array.NewRecord(sch, append(r.Columns(), additionalColumns...), r.NumRows())
+}
+
+func HashArray(arr arrow.Array) []uint64 {
+	switch ar := arr.(type) {
+	case *array.String:
+		return hashStringArray(ar)
+	case *array.Binary:
+		return hashBinaryArray(ar)
+	case *array.Int64:
+		return hashInt64Array(ar)
+	case *array.Uint64:
+		return hashUint64Array(ar)
+	case *array.Boolean:
+		return hashBooleanArray(ar)
+	case *array.Dictionary:
+		return hashDictionaryArray(ar)
+	case *array.List:
+		return hashListArray(ar)
+	default:
+		panic("unsupported array type " + fmt.Sprintf("%T", arr))
+	}
+}
+
+func hashListArray(arr *array.List) (res []uint64) {
+	res = make([]uint64, arr.Len())
+	digest := xxhash.New()
+	switch e := arr.ListValues().(type) {
+	case *array.Int64:
+		var buf [8]byte
+		for i := 0; i < arr.Len(); i++ {
+			start, end := arr.ValueOffsets(i)
+			for j := start; j < end; j++ {
+				_, _ = digest.Write(binary.BigEndian.AppendUint64(buf[:0],
+					uint64(e.Value(int(j)))))
+			}
+			res[i] = digest.Sum64()
+			digest.Reset()
+		}
+		return
+	case *array.Float64:
+		var buf [8]byte
+		for i := 0; i < arr.Len(); i++ {
+			start, end := arr.ValueOffsets(i)
+			for j := start; j < end; j++ {
+				_, _ = digest.Write(binary.BigEndian.AppendUint64(buf[:0],
+					math.Float64bits(e.Value(int(j)))))
+			}
+			res[i] = digest.Sum64()
+			digest.Reset()
+		}
+		return
+	case *array.Boolean:
+		var buf [1]byte
+		for i := 0; i < arr.Len(); i++ {
+			start, end := arr.ValueOffsets(i)
+			for j := start; j < end; j++ {
+				if e.Value(int(j)) {
+					buf[0] = 2
+				} else {
+					buf[0] = 1
+				}
+				_, _ = digest.Write(buf[:])
+			}
+			res[i] = digest.Sum64()
+			digest.Reset()
+		}
+		return
+	case *array.Binary:
+		for i := 0; i < arr.Len(); i++ {
+			start, end := arr.ValueOffsets(i)
+			for j := start; j < end; j++ {
+				_, _ = digest.Write(e.Value(int(j)))
+			}
+			res[i] = digest.Sum64()
+			digest.Reset()
+		}
+		return
+	case *array.String:
+		for i := 0; i < arr.Len(); i++ {
+			start, end := arr.ValueOffsets(i)
+			for j := start; j < end; j++ {
+				_, _ = digest.WriteString(e.Value(int(j)))
+			}
+			res[i] = digest.Sum64()
+			digest.Reset()
+		}
+		return
+	case *array.Dictionary:
+		switch dict := e.Dictionary().(type) {
+		case *array.Binary:
+			for i := 0; i < arr.Len(); i++ {
+				start, end := arr.ValueOffsets(i)
+				for j := start; j < end; j++ {
+					_, _ = digest.Write(dict.Value(e.GetValueIndex(int(j))))
+				}
+				res[i] = digest.Sum64()
+				digest.Reset()
+			}
+			return
+		case *array.String:
+			for i := 0; i < arr.Len(); i++ {
+				start, end := arr.ValueOffsets(i)
+				for j := start; j < end; j++ {
+					_, _ = digest.WriteString(dict.Value(e.GetValueIndex(int(j))))
+				}
+				res[i] = digest.Sum64()
+				digest.Reset()
+			}
+			return
+		default:
+			panic(fmt.Sprintf("list dictionary not of expected type: %T", dict))
+		}
+	default:
+		panic(fmt.Sprintf("list not of expected type: %T", e))
+	}
+}
+
+func hashDictionaryArray(arr *array.Dictionary) []uint64 {
+	res := make([]uint64, arr.Len())
+	for i := 0; i < arr.Len(); i++ {
+		if !arr.IsNull(i) {
+			switch dict := arr.Dictionary().(type) {
+			case *array.Binary:
+				res[i] = metro.Hash64(dict.Value(arr.GetValueIndex(i)), 0)
+			case *array.String:
+				res[i] = metro.Hash64([]byte(dict.Value(arr.GetValueIndex(i))), 0)
+			default:
+				panic("unsupported dictionary type " + fmt.Sprintf("%T", dict))
+			}
+		}
+	}
+	return res
+}
+
+func hashBinaryArray(arr *array.Binary) []uint64 {
+	res := make([]uint64, arr.Len())
+	for i := 0; i < arr.Len(); i++ {
+		if !arr.IsNull(i) {
+			res[i] = metro.Hash64(arr.Value(i), 0)
+		}
+	}
+	return res
+}
+
+func hashBooleanArray(arr *array.Boolean) []uint64 {
+	res := make([]uint64, arr.Len())
+	for i := 0; i < arr.Len(); i++ {
+		if arr.IsNull(i) {
+			res[i] = 0
+			continue
+		}
+		if arr.Value(i) {
+			res[i] = 2
+		} else {
+			res[i] = 1
+		}
+	}
+	return res
+}
+
+func hashStringArray(arr *array.String) []uint64 {
+	res := make([]uint64, arr.Len())
+	for i := 0; i < arr.Len(); i++ {
+		if !arr.IsNull(i) {
+			res[i] = metro.Hash64([]byte(arr.Value(i)), 0)
+		}
+	}
+	return res
+}
+
+func hashInt64Array(arr *array.Int64) []uint64 {
+	res := make([]uint64, arr.Len())
+	for i := 0; i < arr.Len(); i++ {
+		if !arr.IsNull(i) {
+			res[i] = uint64(arr.Value(i))
+		}
+	}
+	return res
+}
+
+func hashUint64Array(arr *array.Uint64) []uint64 {
+	res := make([]uint64, arr.Len())
+	for i := 0; i < arr.Len(); i++ {
+		if !arr.IsNull(i) {
+			res[i] = arr.Value(i)
+		}
+	}
+	return res
+}
+
+// RemoveHashedColumns removes the hashed columns from the record.
+func RemoveHashedColumns(r arrow.Record) arrow.Record {
+	cols := make([]arrow.Array, 0, r.Schema().NumFields())
+	fields := make([]arrow.Field, 0, r.Schema().NumFields())
+	for i := 0; i < r.Schema().NumFields(); i++ {
+		if !IsHashedColumn(r.Schema().Field(i).Name) {
+			cols = append(cols, r.Column(i))
+			fields = append(fields, r.Schema().Field(i))
+		}
+	}
+
+	return array.NewRecord(arrow.NewSchema(fields, nil), cols, r.NumRows())
+}
diff --git a/dynparquet/nil_chunk.go b/dynparquet/nil_chunk.go
index 82c6f0717..0f33fc8af 100644
--- a/dynparquet/nil_chunk.go
+++ b/dynparquet/nil_chunk.go
@@ -3,8 +3,8 @@ package dynparquet
 import (
 	"io"
 
-	"github.com/segmentio/parquet-go"
-	"github.com/segmentio/parquet-go/encoding"
+	"github.com/parquet-go/parquet-go"
+	"github.com/parquet-go/parquet-go/encoding"
 )
 
 // NilColumnChunk is a column chunk that contains a single page with all null
@@ -136,7 +136,7 @@ func (p *nilPage) NumNulls() int64 {
 
 // Bounds returns the minimum and maximum values of the page, since all values
 // in the page are null, both the minimum and maximum values are null.
-func (p *nilPage) Bounds() (min, max parquet.Value, ok bool) {
+func (p *nilPage) Bounds() (_, _ parquet.Value, _ bool) {
 	return parquet.ValueOf(nil).Level(0, 0, p.columnIndex), parquet.ValueOf(nil).Level(0, 0, p.columnIndex), true
 }
 
@@ -169,14 +169,6 @@ type nilValueReader struct {
 	read      int
 }
 
-// min determines the minimum of two integers and returns the minimum.
-func min(a, b int) int {
-	if a < b {
-		return a
-	}
-	return b
-}
-
 // ReadValues reads the next n values from the page and returns the amount of
 // values read. It attempts to write the number of values that the `values`
 // parameter can hold. If less values are left to be read than there is space
@@ -217,7 +209,7 @@ func (p *nilPage) Data() encoding.Values {
 	panic("not implemented")
 }
 
-func (p *nilPage) Slice(i, j int64) parquet.Page {
+func (p *nilPage) Slice(_, _ int64) parquet.Page {
 	return &nilPage{
 		numValues:   p.numValues,
 		columnIndex: p.columnIndex,
@@ -243,15 +235,15 @@ func (p *nilPages) SeekToRow(row int64) error {
 // ColumnIndex returns the column index of the column chunk. Since the
 // NilColumnChunk is a virtual column chunk only for in-memory purposes, it
 // returns nil. Implements the parquet.ColumnChunk interface.
-func (c *NilColumnChunk) ColumnIndex() parquet.ColumnIndex {
-	return nil
+func (c *NilColumnChunk) ColumnIndex() (parquet.ColumnIndex, error) {
+	return nil, nil
 }
 
 // OffsetIndex returns the offset index of the column chunk. Since the
 // NilColumnChunk is a virtual column chunk only for in-memory purposes, it
 // returns nil. Implements the parquet.ColumnChunk interface.
-func (c *NilColumnChunk) OffsetIndex() parquet.OffsetIndex {
-	return nil
+func (c *NilColumnChunk) OffsetIndex() (parquet.OffsetIndex, error) {
+	return nil, nil
 }
 
 // BloomFilter returns the bloomfilter of the column chunk. Since the
diff --git a/dynparquet/nil_chunk_test.go b/dynparquet/nil_chunk_test.go
index 3cfc57d5a..b32dfd12b 100644
--- a/dynparquet/nil_chunk_test.go
+++ b/dynparquet/nil_chunk_test.go
@@ -4,7 +4,7 @@ import (
 	"io"
 	"testing"
 
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/dynparquet/reader.go b/dynparquet/reader.go
index 304675893..29b0f215e 100644
--- a/dynparquet/reader.go
+++ b/dynparquet/reader.go
@@ -5,7 +5,7 @@ import (
 	"errors"
 	"fmt"
 
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 )
 
 const (
@@ -23,7 +23,7 @@ type SerializedBuffer struct {
 func ReaderFromBytes(buf []byte) (*SerializedBuffer, error) {
 	f, err := parquet.OpenFile(bytes.NewReader(buf), int64(len(buf)))
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("error opening file from buffer: %w", err)
 	}
 
 	return NewSerializedBuffer(f)
@@ -76,9 +76,9 @@ func (b *SerializedBuffer) String() string {
 	numRowGroups := b.NumRowGroups()
 	numRows := b.NumRows()
 	w := newPrettyWriter()
-	_, _ = w.Write([]byte(fmt.Sprintf("row groups: %d\ttotal rows: %d\n", numRowGroups, numRows)))
+	_, _ = fmt.Fprintf(w, "num row groups: %d\tnum rows: %d\n", numRowGroups, numRows)
 	for i := 0; i < numRowGroups; i++ {
-		_, _ = w.Write([]byte("---\n"))
+		_, _ = fmt.Fprint(w, "---\n")
 		w.writePrettyRowGroup(b.DynamicRowGroup(i))
 	}
 	_ = w.Flush()
diff --git a/dynparquet/reader_test.go b/dynparquet/reader_test.go
index f71ce61f2..41552b3a4 100644
--- a/dynparquet/reader_test.go
+++ b/dynparquet/reader_test.go
@@ -4,20 +4,20 @@ import (
 	"bytes"
 	"testing"
 
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 	"github.com/stretchr/testify/require"
 )
 
 func TestReader(t *testing.T) {
 	schema := NewSampleSchema()
 	samples := NewTestSamples()
-	buf, err := samples.ToBuffer(schema)
+	buf, err := ToBuffer(samples, schema)
 	require.NoError(t, err)
 
 	b := bytes.NewBuffer(nil)
 	w, err := schema.NewWriter(b, map[string][]string{
 		"labels": samples.SampleLabelNames(),
-	})
+	}, false)
 	require.NoError(t, err)
 
 	_, err = parquet.CopyRows(w, buf.Rows())
@@ -33,7 +33,7 @@ func TestReader(t *testing.T) {
 func TestSerializedReader(t *testing.T) {
 	schema := NewSampleSchema()
 	samples := NewTestSamples()
-	buf, err := samples.ToBuffer(schema)
+	buf, err := ToBuffer(samples, schema)
 	require.NoError(t, err)
 
 	b := bytes.NewBuffer(nil)
diff --git a/dynparquet/row.go b/dynparquet/row.go
index 2370c0e8e..6de4a78ff 100644
--- a/dynparquet/row.go
+++ b/dynparquet/row.go
@@ -3,7 +3,7 @@ package dynparquet
 import (
 	"fmt"
 
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 )
 
 type DynamicRows struct {
@@ -77,12 +77,14 @@ func (s *Schema) RowLessThan(a, b *DynamicRow) bool {
 }
 
 func (s *Schema) Cmp(a, b *DynamicRow) int {
-	dynamicColumns := mergeDynamicColumnSets([]map[string][]string{a.DynamicColumns, b.DynamicColumns})
-	cols := s.parquetSortingColumns(dynamicColumns)
-	sortingSchema, err := s.parquetSortingSchema(dynamicColumns)
+	dynamicColumns := MergeDynamicColumnSets([]map[string][]string{a.DynamicColumns, b.DynamicColumns})
+	cols := s.ParquetSortingColumns(dynamicColumns)
+	pooledSchema, err := s.GetParquetSortingSchema(dynamicColumns)
 	if err != nil {
 		panic(fmt.Sprintf("unexpected schema state: %v", err))
 	}
+	sortingSchema := pooledSchema.Schema
+	defer s.PutPooledParquetSchema(pooledSchema)
 
 	// Iterate over all the schema columns to prepare the rows for comparison.
 	// The main reason we can't directly pass in {a,b}.Row is that they might
diff --git a/dynparquet/row_test.go b/dynparquet/row_test.go
index b7ee5cbbf..a715bef93 100644
--- a/dynparquet/row_test.go
+++ b/dynparquet/row_test.go
@@ -4,7 +4,7 @@ import (
 	"testing"
 
 	"github.com/google/uuid"
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 	"github.com/stretchr/testify/require"
 
 	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
@@ -101,7 +101,7 @@ func TestLess(t *testing.T) {
 	rowGroups := []DynamicRowGroup{}
 	for _, sample := range samples {
 		s := Samples{sample}
-		rg, err := s.ToBuffer(schema)
+		rg, err := ToBuffer(s, schema)
 		require.NoError(t, err)
 		rowGroups = append(rowGroups, rg)
 	}
@@ -161,7 +161,7 @@ func TestLess(t *testing.T) {
 		modifiedSample := samples[0]
 		modifiedSample.Timestamp++
 
-		rg, err := (Samples{modifiedSample}).ToBuffer(schema)
+		rg, err := ToBuffer(Samples{modifiedSample}, schema)
 		require.NoError(t, err)
 		row4 := &DynamicRows{
 			Schema:         rg.Schema(),
@@ -187,10 +187,7 @@ func TestLess(t *testing.T) {
 		schema := NewSampleSchema()
 		sample := Sample{
 			ExampleType: "cpu",
-			Labels: []Label{{
-				Name:  "node",
-				Value: "test3",
-			}},
+			Labels:      map[string]string{"node": "test3"},
 			Stacktrace: []uuid.UUID{
 				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
 				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
@@ -198,7 +195,7 @@ func TestLess(t *testing.T) {
 			Timestamp: 2,
 			Value:     5,
 		}
-		rg, err := Samples{sample}.ToBuffer(schema)
+		rg, err := ToBuffer(Samples{sample}, schema)
 		require.NoError(t, err)
 		row1 := &DynamicRows{
 			Schema:         rg.Schema(),
@@ -212,7 +209,7 @@ func TestLess(t *testing.T) {
 
 		// Add 1 to the last stacktrace byte.
 		sample.Stacktrace[1][15] = 0x3
-		rg, err = Samples{sample}.ToBuffer(schema)
+		rg, err = ToBuffer(Samples{sample}, schema)
 		require.NoError(t, err)
 		row2 := &DynamicRows{
 			Schema:         rg.Schema(),
@@ -231,8 +228,8 @@ func TestLess(t *testing.T) {
 func TestLessWithDynamicSchemas(t *testing.T) {
 	schema := NewSampleSchema()
 	samples := Samples{{
-		Labels: []Label{
-			{Name: "label12", Value: "value12"},
+		Labels: map[string]string{
+			"label12": "value12",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -241,8 +238,8 @@ func TestLessWithDynamicSchemas(t *testing.T) {
 		Timestamp: 2,
 		Value:     2,
 	}, {
-		Labels: []Label{
-			{Name: "label14", Value: "value14"},
+		Labels: map[string]string{
+			"label14": "value14",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -255,7 +252,7 @@ func TestLessWithDynamicSchemas(t *testing.T) {
 	rowGroups := []DynamicRowGroup{}
 	for _, sample := range samples {
 		s := Samples{sample}
-		rg, err := s.ToBuffer(schema)
+		rg, err := ToBuffer(s, schema)
 		require.NoError(t, err)
 		rowGroups = append(rowGroups, rg)
 	}
diff --git a/dynparquet/schema.go b/dynparquet/schema.go
index 0acb255c7..b7c748ad3 100644
--- a/dynparquet/schema.go
+++ b/dynparquet/schema.go
@@ -5,15 +5,17 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"regexp"
+	"slices"
 	"sort"
 	"strings"
 	"sync"
 	"text/tabwriter"
 
-	"github.com/segmentio/parquet-go"
-	"github.com/segmentio/parquet-go/compress"
-	"github.com/segmentio/parquet-go/encoding"
-	"github.com/segmentio/parquet-go/format"
+	"github.com/parquet-go/parquet-go"
+	"github.com/parquet-go/parquet-go/compress"
+	"github.com/parquet-go/parquet-go/encoding"
+	"github.com/parquet-go/parquet-go/format"
 	"google.golang.org/protobuf/proto"
 
 	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
@@ -30,6 +32,7 @@ type ColumnDefinition struct {
 	Name          string
 	StorageLayout parquet.Node
 	Dynamic       bool
+	PreHash       bool
 }
 
 // SortingColumn describes a column to sort by in a dynamic parquet schema.
@@ -39,28 +42,34 @@ type SortingColumn interface {
 }
 
 // Ascending constructs a SortingColumn value which dictates to sort by the column in ascending order.
-func Ascending(column string) SortingColumn { return ascending(column) }
+func Ascending(column string) SortingColumn { return ascending{name: column, path: []string{column}} }
 
 // Descending constructs a SortingColumn value which dictates to sort by the column in descending order.
-func Descending(column string) SortingColumn { return descending(column) }
+func Descending(column string) SortingColumn { return descending{name: column, path: []string{column}} }
 
 // NullsFirst wraps the SortingColumn passed as argument so that it instructs
 // the row group to place null values first in the column.
 func NullsFirst(sortingColumn SortingColumn) SortingColumn { return nullsFirst{sortingColumn} }
 
-type ascending string
+type ascending struct {
+	name string
+	path []string
+}
 
-func (asc ascending) String() string     { return fmt.Sprintf("ascending(%s)", string(asc)) }
-func (asc ascending) ColumnName() string { return string(asc) }
-func (asc ascending) Path() []string     { return []string{string(asc)} }
+func (asc ascending) String() string     { return "ascending(" + asc.name + ")" }
+func (asc ascending) ColumnName() string { return asc.name }
+func (asc ascending) Path() []string     { return asc.path }
 func (asc ascending) Descending() bool   { return false }
 func (asc ascending) NullsFirst() bool   { return false }
 
-type descending string
+type descending struct {
+	name string
+	path []string
+}
 
-func (desc descending) String() string     { return fmt.Sprintf("descending(%s)", string(desc)) }
-func (desc descending) ColumnName() string { return string(desc) }
-func (desc descending) Path() []string     { return []string{string(desc)} }
+func (desc descending) String() string     { return "descending(" + desc.name + ")" }
+func (desc descending) ColumnName() string { return desc.name }
+func (desc descending) Path() []string     { return desc.path }
 func (desc descending) Descending() bool   { return true }
 func (desc descending) NullsFirst() bool   { return false }
 
@@ -70,10 +79,12 @@ func (nf nullsFirst) String() string   { return fmt.Sprintf("nulls_first+%s", nf
 func (nf nullsFirst) NullsFirst() bool { return true }
 
 func makeDynamicSortingColumn(dynamicColumnName string, sortingColumn SortingColumn) SortingColumn {
+	fullName := sortingColumn.ColumnName() + "." + dynamicColumnName
 	return dynamicSortingColumn{
 		SortingColumn:     sortingColumn,
 		dynamicColumnName: dynamicColumnName,
-		fullName:          sortingColumn.ColumnName() + "." + dynamicColumnName,
+		fullName:          fullName,
+		path:              []string{fullName},
 	}
 }
 
@@ -82,6 +93,7 @@ type dynamicSortingColumn struct {
 	SortingColumn
 	dynamicColumnName string
 	fullName          string
+	path              []string
 }
 
 func (dyn dynamicSortingColumn) String() string {
@@ -92,7 +104,7 @@ func (dyn dynamicSortingColumn) ColumnName() string {
 	return dyn.fullName
 }
 
-func (dyn dynamicSortingColumn) Path() []string { return []string{dyn.ColumnName()} }
+func (dyn dynamicSortingColumn) Path() []string { return dyn.path }
 
 // Schema is a dynamic parquet schema. It extends a parquet schema with the
 // ability that any column definition that is dynamic will have columns
@@ -104,13 +116,68 @@ type Schema struct {
 	sortingColumns []SortingColumn
 	dynamicColumns []int
 
-	writers *sync.Map
-	buffers *sync.Map
+	UniquePrimaryIndex bool
+
+	writers        *sync.Map
+	buffers        *sync.Map
+	sortingSchemas *sync.Map
+	parquetSchemas *sync.Map
 }
 
-// IsDynamicColumn returns true if the passed in column is a dynamic column.
-func (s *Schema) IsDynamicColumn(col string) bool {
-	return s.columns[s.columnIndexes[col]].Dynamic
+// FindDynamicColumnForConcreteColumn returns a column definition for the
+// column passed. So "labels.label1" would return the column definition for the
+// dynamic column "labels" if it exists.
+func (s *Schema) FindDynamicColumnForConcreteColumn(column string) (ColumnDefinition, bool) {
+	periodPosition := 0
+	foundPeriod := false
+	for i, c := range column {
+		if c != '.' {
+			continue
+		}
+		if foundPeriod {
+			// Can't have more than one period.
+			return ColumnDefinition{}, false
+		}
+		foundPeriod = true
+		periodPosition = i
+	}
+	if !foundPeriod {
+		return ColumnDefinition{}, false
+	}
+
+	return s.FindDynamicColumn(column[:periodPosition])
+}
+
+// FindDynamicColumn returns a dynamic column definition for the column passed.
+func (s *Schema) FindDynamicColumn(dynamicColumnName string) (ColumnDefinition, bool) {
+	idx, ok := s.columnIndexes[dynamicColumnName]
+	if !ok {
+		return ColumnDefinition{}, false
+	}
+
+	colDef := s.columns[idx]
+	// Note: This is different from the FindColumn function.
+	if !colDef.Dynamic {
+		return ColumnDefinition{}, false
+	}
+
+	return colDef, true
+}
+
+// FindColumn returns a column definition for the column passed.
+func (s *Schema) FindColumn(column string) (ColumnDefinition, bool) {
+	idx, ok := s.columnIndexes[column]
+	if !ok {
+		return ColumnDefinition{}, false
+	}
+
+	colDef := s.columns[idx]
+	// Note: This is different from the FindDynamicColumn function.
+	if colDef.Dynamic {
+		return ColumnDefinition{}, false
+	}
+
+	return colDef, true
 }
 
 func findLeavesFromNode(node *schemav2pb.Node) []ColumnDefinition {
@@ -190,8 +257,11 @@ func nameFromNodeDef(node *schemav2pb.Node) string {
 }
 
 func SchemaFromDefinition(msg proto.Message) (*Schema, error) {
-	var columns []ColumnDefinition
-	var sortingColumns []SortingColumn
+	var (
+		columns            []ColumnDefinition
+		sortingColumns     []SortingColumn
+		uniquePrimaryIndex bool
+	)
 	switch def := msg.(type) {
 	case *schemapb.Schema:
 		columns = make([]ColumnDefinition, 0, len(def.Columns))
@@ -204,6 +274,7 @@ func SchemaFromDefinition(msg proto.Message) (*Schema, error) {
 				Name:          col.Name,
 				StorageLayout: layout,
 				Dynamic:       col.Dynamic,
+				PreHash:       col.Prehash,
 			})
 		}
 
@@ -223,6 +294,7 @@ func SchemaFromDefinition(msg proto.Message) (*Schema, error) {
 			}
 			sortingColumns = append(sortingColumns, sortingColumn)
 		}
+		uniquePrimaryIndex = def.UniquePrimaryIndex
 	case *schemav2pb.Schema:
 		columns = []ColumnDefinition{}
 		for _, node := range def.Root.Nodes {
@@ -245,13 +317,10 @@ func SchemaFromDefinition(msg proto.Message) (*Schema, error) {
 			}
 			sortingColumns = append(sortingColumns, sortingColumn)
 		}
+		uniquePrimaryIndex = def.UniquePrimaryIndex
 	}
 
-	return newSchema(
-		msg,
-		columns,
-		sortingColumns,
-	), nil
+	return newSchema(msg, columns, sortingColumns, uniquePrimaryIndex), nil
 }
 
 // DefinitionFromParquetFile converts a parquet file into a schemapb.Schema.
@@ -401,19 +470,19 @@ type v1storageLayoutWrapper struct {
 }
 
 func (s *v1storageLayoutWrapper) GetRepeated() bool {
-	return false
+	return s.Repeated
 }
 
 func (s *v1storageLayoutWrapper) GetTypeInt32() int32 {
-	return int32(s.StorageLayout.GetType())
+	return int32(s.GetType())
 }
 
 func (s *v1storageLayoutWrapper) GetEncodingInt32() int32 {
-	return int32(s.StorageLayout.GetEncoding())
+	return int32(s.GetEncoding())
 }
 
 func (s *v1storageLayoutWrapper) GetCompressionInt32() int32 {
-	return int32(s.StorageLayout.GetCompression())
+	return int32(s.GetCompression())
 }
 
 type v2storageLayoutWrapper struct {
@@ -421,18 +490,18 @@ type v2storageLayoutWrapper struct {
 }
 
 func (s *v2storageLayoutWrapper) GetTypeInt32() int32 {
-	return int32(s.StorageLayout.GetType())
+	return int32(s.GetType())
 }
 
 func (s *v2storageLayoutWrapper) GetEncodingInt32() int32 {
-	return int32(s.StorageLayout.GetEncoding())
+	return int32(s.GetEncoding())
 }
 
 func (s *v2storageLayoutWrapper) GetCompressionInt32() int32 {
-	return int32(s.StorageLayout.GetCompression())
+	return int32(s.GetCompression())
 }
 
-func StorageLayoutWrapper(layout *schemav2pb.StorageLayout) StorageLayout {
+func StorageLayoutWrapper(_ *schemav2pb.StorageLayout) StorageLayout {
 	return nil
 }
 
@@ -447,6 +516,10 @@ func storageLayoutToParquetNode(l StorageLayout) (parquet.Node, error) {
 		node = parquet.Leaf(parquet.DoubleType)
 	case int32(schemapb.StorageLayout_TYPE_BOOL):
 		node = parquet.Leaf(parquet.BooleanType)
+	case int32(schemapb.StorageLayout_TYPE_INT32):
+		node = parquet.Int(32)
+	case int32(schemapb.StorageLayout_TYPE_UINT64):
+		node = parquet.Uint(64)
 	default:
 		return nil, fmt.Errorf("unknown storage layout type: %v", l.GetTypeInt32())
 	}
@@ -518,6 +591,7 @@ func newSchema(
 	def proto.Message,
 	columns []ColumnDefinition,
 	sortingColumns []SortingColumn,
+	uniquePrimaryIndex bool,
 ) *Schema {
 	sort.Slice(columns, func(i, j int) bool {
 		return columns[i].Name < columns[j].Name
@@ -529,12 +603,15 @@ func newSchema(
 	}
 
 	s := &Schema{
-		def:            def,
-		columns:        columns,
-		sortingColumns: sortingColumns,
-		columnIndexes:  columnIndexes,
-		writers:        &sync.Map{},
-		buffers:        &sync.Map{},
+		def:                def,
+		columns:            columns,
+		sortingColumns:     sortingColumns,
+		columnIndexes:      columnIndexes,
+		writers:            &sync.Map{},
+		buffers:            &sync.Map{},
+		sortingSchemas:     &sync.Map{},
+		parquetSchemas:     &sync.Map{},
+		UniquePrimaryIndex: uniquePrimaryIndex,
 	}
 
 	for i, col := range columns {
@@ -573,7 +650,13 @@ func (s *Schema) Columns() []ColumnDefinition {
 	return s.columns
 }
 
-func (s *Schema) SortingColumns() []ColumnDefinition {
+func (s *Schema) SortingColumns() []SortingColumn {
+	sCols := make([]SortingColumn, len(s.sortingColumns))
+	copy(sCols, s.sortingColumns)
+	return sCols
+}
+
+func (s *Schema) ColumnDefinitionsForSortingColumns() []ColumnDefinition {
 	sCols := make([]ColumnDefinition, len(s.sortingColumns))
 	for i, col := range s.sortingColumns {
 		sCols[i] = s.columns[s.columnIndexes[col.ColumnName()]]
@@ -596,14 +679,9 @@ func (s *Schema) ParquetSchema() *parquet.Schema {
 	}
 }
 
-// DynamicParquetSchema returns the parquet schema for the dynamic schema with the
+// dynamicParquetSchema returns the parquet schema for the dynamic schema with the
 // concrete dynamic column names given in the argument.
-func (s Schema) DynamicParquetSchema(
-	dynamicColumns map[string][]string,
-) (
-	*parquet.Schema,
-	error,
-) {
+func (s Schema) dynamicParquetSchema(dynamicColumns map[string][]string) (*parquet.Schema, error) {
 	switch def := s.def.(type) {
 	case *schemav2pb.Schema:
 		return ParquetSchemaFromV2Definition(def), nil
@@ -614,10 +692,16 @@ func (s Schema) DynamicParquetSchema(
 				dyn := dynamicColumnsFor(col.Name, dynamicColumns)
 				for _, name := range dyn {
 					g[col.Name+"."+name] = col.StorageLayout
+					if col.PreHash {
+						g[HashedColumnName(col.Name+"."+name)] = parquet.Int(64) // TODO(thor): Do we need compression etc. here?
+					}
 				}
 				continue
 			}
 			g[col.Name] = col.StorageLayout
+			if col.PreHash {
+				g[HashedColumnName(col.Name)] = parquet.Int(64) // TODO(thor): Do we need compression etc. here?
+			}
 		}
 
 		return parquet.NewSchema(s.Name(), g), nil
@@ -651,10 +735,10 @@ func (s Schema) parquetSortingSchema(
 	return parquet.NewSchema(s.Name(), g), nil
 }
 
-// parquetSortingColumns returns the parquet sorting columns for the dynamic
+// ParquetSortingColumns returns the parquet sorting columns for the dynamic
 // sorting columns with the concrete dynamic column names given in the
 // argument.
-func (s Schema) parquetSortingColumns(
+func (s Schema) ParquetSortingColumns(
 	dynamicColumns map[string][]string,
 ) []parquet.SortingColumn {
 	cols := make([]parquet.SortingColumn, 0, len(s.sortingColumns))
@@ -690,6 +774,10 @@ func (b *Buffer) Reset() {
 	b.buffer.Reset()
 }
 
+func (b *Buffer) Size() int64 {
+	return b.buffer.Size()
+}
+
 func (b *Buffer) String() string {
 	return prettyRowGroup(b)
 }
@@ -931,24 +1019,36 @@ func (b *Buffer) DynamicRows() DynamicRowReader {
 	return newDynamicRowGroupReader(b, b.fields)
 }
 
+var (
+	matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
+	matchAllCap   = regexp.MustCompile("([a-z0-9])([A-Z])")
+)
+
+func ToSnakeCase(str string) string {
+	snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
+	snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
+	return strings.ToLower(snake)
+}
+
 // NewBuffer returns a new buffer with a concrete parquet schema generated
 // using the given concrete dynamic column names.
 func (s *Schema) NewBuffer(dynamicColumns map[string][]string) (*Buffer, error) {
-	ps, err := s.DynamicParquetSchema(dynamicColumns)
+	ps, err := s.GetDynamicParquetSchema(dynamicColumns)
 	if err != nil {
 		return nil, fmt.Errorf("create parquet schema for buffer: %w", err)
 	}
+	defer s.PutPooledParquetSchema(ps)
 
-	cols := s.parquetSortingColumns(dynamicColumns)
+	cols := s.ParquetSortingColumns(dynamicColumns)
 	return &Buffer{
 		dynamicColumns: dynamicColumns,
 		buffer: parquet.NewBuffer(
-			ps,
+			ps.Schema,
 			parquet.SortingRowGroupConfig(
 				parquet.SortingColumns(cols...),
 			),
 		),
-		fields: ps.Fields(),
+		fields: ps.Schema.Fields(),
 	}, nil
 }
 
@@ -972,7 +1072,7 @@ func (s *Schema) NewBufferV2(dynamicColumns ...*schemav2pb.Node) (*Buffer, error
 	}
 
 	ps := ParquetSchemaFromV2Definition(schema)
-	cols := s.parquetSortingColumns(map[string][]string{})
+	cols := s.ParquetSortingColumns(map[string][]string{})
 	return &Buffer{
 		dynamicColumns: map[string][]string{}, // unused for v2
 		buffer: parquet.NewBuffer(
@@ -986,7 +1086,7 @@ func (s *Schema) NewBufferV2(dynamicColumns ...*schemav2pb.Node) (*Buffer, error
 }
 
 func (s *Schema) SerializeBuffer(w io.Writer, buffer *Buffer) error {
-	pw, err := s.GetWriter(w, buffer.DynamicColumns())
+	pw, err := s.GetWriter(w, buffer.DynamicColumns(), false)
 	if err != nil {
 		return fmt.Errorf("create writer: %w", err)
 	}
@@ -1012,13 +1112,14 @@ const bloomFilterBitsPerValue = 10
 
 // NewWriter returns a new parquet writer with a concrete parquet schema
 // generated using the given concrete dynamic column names.
-func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string) (*parquet.GenericWriter[any], error) {
-	ps, err := s.DynamicParquetSchema(dynamicColumns)
+func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string, sorting bool, options ...parquet.WriterOption) (ParquetWriter, error) {
+	ps, err := s.GetDynamicParquetSchema(dynamicColumns)
 	if err != nil {
 		return nil, err
 	}
+	defer s.PutPooledParquetSchema(ps)
 
-	cols := s.parquetSortingColumns(dynamicColumns)
+	cols := s.ParquetSortingColumns(dynamicColumns)
 	bloomFilterColumns := make([]parquet.BloomFilterColumn, 0, len(cols))
 	for _, col := range cols {
 		// Don't add bloom filters to boolean columns
@@ -1036,8 +1137,8 @@ func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string) (*pa
 		)
 	}
 
-	return parquet.NewGenericWriter[any](w,
-		ps,
+	writerOptions := []parquet.WriterOption{
+		ps.Schema,
 		parquet.ColumnIndexSizeLimit(ColumnIndexSize),
 		parquet.BloomFilters(bloomFilterColumns...),
 		parquet.KeyValueMetadata(
@@ -1047,41 +1148,105 @@ func (s *Schema) NewWriter(w io.Writer, dynamicColumns map[string][]string) (*pa
 		parquet.SortingWriterConfig(
 			parquet.SortingColumns(cols...),
 		),
-	), nil
+	}
+	writerOptions = append(writerOptions, options...)
+	if sorting {
+		return parquet.NewSortingWriter[any](w, 32*1024, writerOptions...), nil
+	}
+	return parquet.NewGenericWriter[any](w, writerOptions...), nil
 }
 
-type PooledWriter struct {
-	pool *sync.Pool
-	*parquet.GenericWriter[any]
+type ParquetWriter interface {
+	Schema() *parquet.Schema
+	Write(rows []any) (int, error)
+	WriteRows(rows []parquet.Row) (int, error)
+	Flush() error
+	Close() error
+	Reset(writer io.Writer)
 }
 
-func (p PooledWriter) ParquetWriter() *parquet.GenericWriter[any] {
-	return p.GenericWriter
+type PooledWriter struct {
+	pool *sync.Pool
+	ParquetWriter
 }
 
-func (s *Schema) GetWriter(w io.Writer, dynamicColumns map[string][]string) (*PooledWriter, error) {
+func (s *Schema) GetWriter(w io.Writer, dynamicColumns map[string][]string, sorting bool) (*PooledWriter, error) {
 	key := serializeDynamicColumns(dynamicColumns)
-	pool, _ := s.writers.LoadOrStore(key, &sync.Pool{})
+	pool, _ := s.writers.LoadOrStore(fmt.Sprintf("%s,sorting=%t", key, sorting), &sync.Pool{})
 	pooled := pool.(*sync.Pool).Get()
 	if pooled == nil {
-		new, err := s.NewWriter(w, dynamicColumns)
+		pw, err := s.NewWriter(w, dynamicColumns, sorting)
 		if err != nil {
 			return nil, err
 		}
 		return &PooledWriter{
 			pool:          pool.(*sync.Pool),
-			GenericWriter: new,
+			ParquetWriter: pw,
 		}, nil
 	}
-	pooled.(*PooledWriter).GenericWriter.Reset(w)
+	pooled.(*PooledWriter).Reset(w)
 	return pooled.(*PooledWriter), nil
 }
 
+type PooledParquetSchema struct {
+	pool   *sync.Pool
+	Schema *parquet.Schema
+}
+
+// GetParquetSortingSchema returns a parquet schema of the sorting columns and
+// the given dynamic columns.
+// The difference with GetDynamicParquetSchema is that non-sorting columns are elided.
+func (s *Schema) GetParquetSortingSchema(dynamicColumns map[string][]string) (*PooledParquetSchema, error) {
+	key := serializeDynamicColumns(dynamicColumns)
+	pool, _ := s.sortingSchemas.LoadOrStore(key, &sync.Pool{})
+	pooled := pool.(*sync.Pool).Get()
+	if pooled == nil {
+		ps, err := s.parquetSortingSchema(dynamicColumns)
+		if err != nil {
+			return nil, err
+		}
+		return &PooledParquetSchema{
+			pool:   pool.(*sync.Pool),
+			Schema: ps,
+		}, nil
+	}
+	return pooled.(*PooledParquetSchema), nil
+}
+
+// GetDynamicParquetSchema returns a parquet schema of the all columns and
+// the given dynamic columns.
+// The difference with GetParquetSortingSchema is that all columns are included
+// in the parquet schema.
+func (s *Schema) GetDynamicParquetSchema(dynamicColumns map[string][]string) (*PooledParquetSchema, error) {
+	key := serializeDynamicColumns(dynamicColumns)
+	pool, _ := s.parquetSchemas.LoadOrStore(key, &sync.Pool{})
+	pooled := pool.(*sync.Pool).Get()
+	if pooled == nil {
+		ps, err := s.dynamicParquetSchema(dynamicColumns)
+		if err != nil {
+			return nil, err
+		}
+		return &PooledParquetSchema{
+			pool:   pool.(*sync.Pool),
+			Schema: ps,
+		}, nil
+	}
+	return pooled.(*PooledParquetSchema), nil
+}
+
+func (s *Schema) PutPooledParquetSchema(ps *PooledParquetSchema) {
+	ps.pool.Put(ps)
+}
+
 func (s *Schema) PutWriter(w *PooledWriter) {
-	w.GenericWriter.Reset(bytes.NewBuffer(nil))
+	w.Reset(bytes.NewBuffer(nil))
 	w.pool.Put(w)
 }
 
+func (s *Schema) ResetWriters() {
+	s.writers = &sync.Map{}
+}
+
 type PooledBuffer struct {
 	pool *sync.Pool
 	*Buffer
@@ -1092,23 +1257,27 @@ func (s *Schema) GetBuffer(dynamicColumns map[string][]string) (*PooledBuffer, e
 	pool, _ := s.buffers.LoadOrStore(key, &sync.Pool{})
 	pooled := pool.(*sync.Pool).Get()
 	if pooled == nil {
-		new, err := s.NewBuffer(dynamicColumns)
+		pw, err := s.NewBuffer(dynamicColumns)
 		if err != nil {
 			return nil, err
 		}
 		return &PooledBuffer{
 			pool:   pool.(*sync.Pool),
-			Buffer: new,
+			Buffer: pw,
 		}, nil
 	}
 	return pooled.(*PooledBuffer), nil
 }
 
 func (s *Schema) PutBuffer(b *PooledBuffer) {
-	b.Buffer.Reset()
+	b.Reset()
 	b.pool.Put(b)
 }
 
+func (s *Schema) ResetBuffers() {
+	s.buffers = &sync.Map{}
+}
+
 // MergedRowGroup allows wrapping any parquet.RowGroup to implement the
 // DynamicRowGroup interface by specifying the concrete dynamic column names
 // the RowGroup's schema contains.
@@ -1135,38 +1304,74 @@ func (r *MergedRowGroup) DynamicRows() DynamicRowReader {
 	return newDynamicRowGroupReader(r, r.fields)
 }
 
+type mergeOption struct {
+	dynamicColumns map[string][]string
+	// alreadySorted indicates that the row groups are already sorted and
+	// non-overlapping. This results in a parquet.MultiRowGroup, which is just
+	// a wrapper without the full-scale merging infrastructure.
+	alreadySorted bool
+}
+
+type MergeOption func(m *mergeOption)
+
+func WithDynamicCols(cols map[string][]string) MergeOption {
+	return func(m *mergeOption) {
+		m.dynamicColumns = cols
+	}
+}
+
+func WithAlreadySorted() MergeOption {
+	return func(m *mergeOption) {
+		m.alreadySorted = true
+	}
+}
+
 // MergeDynamicRowGroups merges the given dynamic row groups into a single
 // dynamic row group. It merges the parquet schema in a non-conflicting way by
 // merging all the concrete dynamic column names and generating a superset
 // parquet schema that all given dynamic row groups are compatible with.
-func (s *Schema) MergeDynamicRowGroups(rowGroups []DynamicRowGroup) (DynamicRowGroup, error) {
+func (s *Schema) MergeDynamicRowGroups(rowGroups []DynamicRowGroup, options ...MergeOption) (DynamicRowGroup, error) {
 	if len(rowGroups) == 1 {
 		return rowGroups[0], nil
 	}
 
-	dynamicColumns := mergeDynamicRowGroupDynamicColumns(rowGroups)
-	ps, err := s.DynamicParquetSchema(dynamicColumns)
+	// Apply options
+	m := &mergeOption{}
+	for _, option := range options {
+		option(m)
+	}
+
+	dynamicColumns := m.dynamicColumns
+	if dynamicColumns == nil {
+		dynamicColumns = mergeDynamicRowGroupDynamicColumns(rowGroups)
+	}
+	ps, err := s.GetDynamicParquetSchema(dynamicColumns)
 	if err != nil {
 		return nil, fmt.Errorf("create merged parquet schema merging %d row groups: %w", len(rowGroups), err)
 	}
+	defer s.PutPooledParquetSchema(ps)
 
-	cols := s.parquetSortingColumns(dynamicColumns)
+	cols := s.ParquetSortingColumns(dynamicColumns)
 
 	adapters := make([]parquet.RowGroup, 0, len(rowGroups))
 	for _, rowGroup := range rowGroups {
 		adapters = append(adapters, NewDynamicRowGroupMergeAdapter(
-			ps,
+			ps.Schema,
 			cols,
 			dynamicColumns,
 			rowGroup,
 		))
 	}
 
+	var opts []parquet.RowGroupOption
+	if !m.alreadySorted {
+		opts = append(opts, parquet.SortingRowGroupConfig(
+			parquet.SortingColumns(cols...),
+		))
+	}
 	merge, err := parquet.MergeRowGroups(
 		adapters,
-		parquet.SortingRowGroupConfig(
-			parquet.SortingColumns(cols...),
-		),
+		opts...,
 	)
 	if err != nil {
 		return nil, fmt.Errorf("create merge row groups: %w", err)
@@ -1175,7 +1380,7 @@ func (s *Schema) MergeDynamicRowGroups(rowGroups []DynamicRowGroup) (DynamicRowG
 	return &MergedRowGroup{
 		RowGroup: merge,
 		DynCols:  dynamicColumns,
-		fields:   ps.Fields(),
+		fields:   ps.Schema.Fields(),
 	}, nil
 }
 
@@ -1188,50 +1393,70 @@ func mergeDynamicRowGroupDynamicColumns(rowGroups []DynamicRowGroup) map[string]
 		sets = append(sets, batch.DynamicColumns())
 	}
 
-	return mergeDynamicColumnSets(sets)
+	return MergeDynamicColumnSets(sets)
 }
 
-func mergeDynamicColumnSets(sets []map[string][]string) map[string][]string {
-	dynamicColumns := map[string][][]string{}
-	for _, set := range sets {
-		for k, v := range set {
-			_, seen := dynamicColumns[k]
-			if !seen {
-				dynamicColumns[k] = [][]string{}
-			}
-			dynamicColumns[k] = append(dynamicColumns[k], v)
-		}
-	}
+func MergeDynamicColumnSets(sets []map[string][]string) map[string][]string {
+	m := newDynamicColumnSetMerger()
+	defer m.Release()
+	return m.Merge(sets)
+}
 
-	resultDynamicColumns := map[string][]string{}
-	for name, dynCols := range dynamicColumns {
-		resultDynamicColumns[name] = mergeDynamicColumns(dynCols)
-	}
+type dynColSet struct {
+	keys   []string
+	seen   map[string]struct{}
+	values []string
+}
 
-	return resultDynamicColumns
+func newDynamicColumnSetMerger() *dynColSet {
+	return mergeSetPool.Get().(*dynColSet)
 }
 
-// mergeDynamicColumns merges the given concrete dynamic column names into a
-// single superset. It assumes that the given DynamicColumns are all for the
-// same dynamic column name.
-func mergeDynamicColumns(dyn [][]string) []string {
-	return mergeStrings(dyn)
+var mergeSetPool = &sync.Pool{New: func() any {
+	return &dynColSet{
+		seen:   make(map[string]struct{}),
+		keys:   make([]string, 0, 16), // This is arbitrary we anticipate to be lower than values size
+		values: make([]string, 0, 64), // This is arbitrary
+	}
+}}
+
+func (c *dynColSet) Release() {
+	c.keys = c.keys[:0]
+	clear(c.seen)
+	c.values = c.values[:0]
+	mergeSetPool.Put(c)
 }
 
-// mergeStrings merges the given sorted string slices into a single sorted and
-// deduplicated slice of strings.
-func mergeStrings(str [][]string) []string {
-	result := []string{}
-	seen := map[string]struct{}{}
-	for _, s := range str {
-		for _, n := range s {
-			if _, ok := seen[n]; !ok {
-				result = append(result, n)
-				seen[n] = struct{}{}
+func (c *dynColSet) Merge(sets []map[string][]string) (o map[string][]string) {
+	// TODO:(gernest) use k-way merge
+	o = make(map[string][]string)
+	for i := range sets {
+		for k := range sets[i] {
+			if _, ok := c.seen[k]; !ok {
+				c.keys = append(c.keys, k)
+				c.seen[k] = struct{}{}
 			}
 		}
 	}
-	return MergeDeduplicatedDynCols(result)
+	for i := range c.keys {
+		clear(c.seen)
+		for j := range sets {
+			ls, ok := sets[j][c.keys[i]]
+			if !ok {
+				continue
+			}
+			for k := range ls {
+				if _, ok := c.seen[ls[k]]; !ok {
+					c.values = append(c.values, ls[k])
+					c.seen[ls[k]] = struct{}{}
+				}
+			}
+		}
+		sort.Strings(c.values)
+		o[c.keys[i]] = slices.Clone(c.values)
+		c.values = c.values[:0]
+	}
+	return
 }
 
 // MergeDeduplicatedDynCols is a light wrapper over sorting the deduplicated
@@ -1419,6 +1644,12 @@ type remappedPage struct {
 	remappedIndex int
 }
 
+type releasable interface {
+	Release()
+}
+
+var _ releasable = (*remappedPage)(nil)
+
 // Column returns the page's column index in the schema. It returns the
 // configured remapped index. Implements the parquet.Page interface.
 func (p *remappedPage) Column() int {
@@ -1435,6 +1666,10 @@ func (p *remappedPage) Values() parquet.ValueReader {
 	}
 }
 
+func (p *remappedPage) Release() {
+	parquet.Release(p.Page)
+}
+
 // Values returns the page's values. It ensures that all values read will be
 // remapped to have the configured remapped index. Implements the
 // parquet.ValueReader interface.
diff --git a/dynparquet/schema_test.go b/dynparquet/schema_test.go
index 445af7b0d..0e7af16f8 100644
--- a/dynparquet/schema_test.go
+++ b/dynparquet/schema_test.go
@@ -6,11 +6,12 @@ import (
 	"io"
 	"math/rand"
 	"testing"
-	"time"
 
 	"github.com/google/uuid"
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 	"github.com/stretchr/testify/require"
+
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
 )
 
 func TestMergeRowBatches(t *testing.T) {
@@ -20,12 +21,11 @@ func TestMergeRowBatches(t *testing.T) {
 	rowGroups := []DynamicRowGroup{}
 	for _, sample := range samples {
 		s := Samples{sample}
-		rg, err := s.ToBuffer(schema)
+		rg, err := ToBuffer(s, schema)
 		require.NoError(t, err)
 		rowGroups = append(rowGroups, rg)
 	}
 
-	rand.Seed(time.Now().UnixNano())
 	rand.Shuffle(len(rowGroups), func(i, j int) { rowGroups[i], rowGroups[j] = rowGroups[j], rowGroups[i] })
 
 	merge, err := schema.MergeDynamicRowGroups(rowGroups)
@@ -162,9 +162,9 @@ func TestMultipleIterations(t *testing.T) {
 	schema := NewSampleSchema()
 
 	samples := Samples{{
-		Labels: []Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -173,10 +173,10 @@ func TestMultipleIterations(t *testing.T) {
 		Timestamp: 1,
 		Value:     1,
 	}, {
-		Labels: []Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -185,10 +185,10 @@ func TestMultipleIterations(t *testing.T) {
 		Timestamp: 2,
 		Value:     2,
 	}, {
-		Labels: []Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -198,7 +198,7 @@ func TestMultipleIterations(t *testing.T) {
 		Value:     3,
 	}}
 
-	dbuf, err := samples.ToBuffer(schema)
+	dbuf, err := ToBuffer(samples, schema)
 	require.NoError(t, err)
 
 	buf := dbuf.buffer
@@ -235,9 +235,9 @@ func Test_SchemaFromParquetFile(t *testing.T) {
 	schema := NewSampleSchema()
 
 	samples := Samples{{
-		Labels: []Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -246,10 +246,10 @@ func Test_SchemaFromParquetFile(t *testing.T) {
 		Timestamp: 1,
 		Value:     1,
 	}, {
-		Labels: []Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -258,10 +258,10 @@ func Test_SchemaFromParquetFile(t *testing.T) {
 		Timestamp: 2,
 		Value:     2,
 	}, {
-		Labels: []Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -271,7 +271,7 @@ func Test_SchemaFromParquetFile(t *testing.T) {
 		Value:     3,
 	}}
 
-	dbuf, err := samples.ToBuffer(schema)
+	dbuf, err := ToBuffer(samples, schema)
 	require.NoError(t, err)
 
 	b := bytes.NewBuffer(nil)
@@ -284,3 +284,112 @@ func Test_SchemaFromParquetFile(t *testing.T) {
 	require.NoError(t, err)
 	require.Equal(t, SampleDefinition(), def)
 }
+
+func TestIsDynamicColumn(t *testing.T) {
+	for _, tc := range []struct {
+		input      string
+		cName      string
+		notDynamic bool
+		expected   bool
+	}{
+		{
+			input:    "labels.label1",
+			cName:    "labels",
+			expected: true,
+		},
+		{
+			input:    "labels.label1.cannothavetwoperiods",
+			cName:    "labels",
+			expected: false,
+		},
+		{
+			input:    "columnnotfound.label1",
+			cName:    "labels",
+			expected: false,
+		},
+		{
+			input:      "labels.columnnotdynamic",
+			cName:      "labels",
+			notDynamic: true,
+			expected:   false,
+		},
+		{
+			input:    "",
+			cName:    "labels",
+			expected: false,
+		},
+	} {
+		def := &schemapb.Schema{
+			Name: "test_schema",
+			Columns: []*schemapb.Column{{
+				Name: tc.cName,
+				StorageLayout: &schemapb.StorageLayout{
+					Type:     schemapb.StorageLayout_TYPE_INT64,
+					Encoding: schemapb.StorageLayout_ENCODING_PLAIN_UNSPECIFIED,
+				},
+				Dynamic: !tc.notDynamic,
+			}},
+		}
+		schema, err := SchemaFromDefinition(def)
+		require.NoError(t, err)
+		schema.FindDynamicColumnForConcreteColumn(tc.cName)
+	}
+}
+
+func BenchmarkIsDynamicColumn(b *testing.B) {
+	def := &schemapb.Schema{
+		Name: "test_schema",
+		Columns: []*schemapb.Column{{
+			Name: "labels",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_INT64,
+				Encoding: schemapb.StorageLayout_ENCODING_PLAIN_UNSPECIFIED,
+			},
+			Dynamic: true,
+		}},
+	}
+	schema, err := SchemaFromDefinition(def)
+	require.NoError(b, err)
+	for i := 0; i < b.N; i++ {
+		schema.FindDynamicColumnForConcreteColumn("labels.label1")
+	}
+}
+
+func TestMergeDynamicColumnSets(t *testing.T) {
+	sets := []map[string][]string{
+		{"labels": {"label1", "label2"}},
+		{"labels": {"label1", "label2"}},
+		{"labels": {"label1", "label2", "label3"}},
+		{
+			"labels": {"label1", "label2"},
+			"foo":    {"label1", "label2"},
+		},
+		{
+			"labels": {"label1", "label2", "label3"},
+			"foo":    {"label1", "label2", "label3"},
+		},
+	}
+	require.Equal(t, map[string][]string{
+		"foo":    {"label1", "label2", "label3"},
+		"labels": {"label1", "label2", "label3"},
+	}, MergeDynamicColumnSets(sets))
+}
+
+func BenchmarkMergeDynamicColumnSets(b *testing.B) {
+	sets := []map[string][]string{
+		{"labels": {"label1", "label2"}},
+		{"labels": {"label1", "label2"}},
+		{"labels": {"label1", "label2", "label3"}},
+		{
+			"labels": {"label1", "label2"},
+			"foo":    {"label1", "label2"},
+		},
+		{
+			"labels": {"label1", "label2", "label3"},
+			"foo":    {"label1", "label2", "label3"},
+		},
+	}
+	for i := 0; i < b.N; i++ {
+		MergeDynamicColumnSets(sets)
+	}
+}
diff --git a/examples/aggregations/aggregations.go b/examples/aggregations/aggregations.go
new file mode 100644
index 000000000..34d9fb588
--- /dev/null
+++ b/examples/aggregations/aggregations.go
@@ -0,0 +1,108 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+
+	"github.com/polarsignals/frostdb"
+	"github.com/polarsignals/frostdb/query"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+// This example demonstrates how to use FrostDB's to aggregate data.
+func main() {
+	// Create a new column store
+	columnstore, _ := frostdb.New()
+	defer columnstore.Close()
+
+	// Open up a database in the column store
+	database, _ := columnstore.DB(context.Background(), "weather_db")
+
+	// Create values to insert into the database. We support a dynamic structure for city to
+	// accommodate cities in different regions
+	type WeatherRecord struct {
+		City     map[string]string `frostdb:",rle_dict,asc(0)"`
+		Day      string            `frostdb:",rle_dict,asc(1)"`
+		Snowfall float64
+	}
+
+	// Create a table named snowfall_table in our database
+	table, _ := frostdb.NewGenericTable[WeatherRecord](
+		database, "snowfall_table", memory.DefaultAllocator,
+	)
+	defer table.Release()
+
+	montreal := map[string]string{
+		"name":     "Montreal",
+		"province": "Quebec",
+	}
+	toronto := map[string]string{
+		"name":     "Toronto",
+		"province": "Ontario",
+	}
+	minneapolis := map[string]string{
+		"name":  "Minneapolis",
+		"state": "Minnesota",
+	}
+
+	_, _ = table.Write(context.Background(),
+		WeatherRecord{Day: "Mon", Snowfall: 20, City: montreal},
+		WeatherRecord{Day: "Tue", Snowfall: 0o0, City: montreal},
+		WeatherRecord{Day: "Wed", Snowfall: 30, City: montreal},
+		WeatherRecord{Day: "Thu", Snowfall: 25.1, City: montreal},
+		WeatherRecord{Day: "Fri", Snowfall: 10, City: montreal},
+		WeatherRecord{Day: "Mon", Snowfall: 15, City: toronto},
+		WeatherRecord{Day: "Tue", Snowfall: 25, City: toronto},
+		WeatherRecord{Day: "Wed", Snowfall: 30, City: toronto},
+		WeatherRecord{Day: "Thu", Snowfall: 0o0, City: toronto},
+		WeatherRecord{Day: "Fri", Snowfall: 0o5, City: toronto},
+		WeatherRecord{Day: "Mon", Snowfall: 40.8, City: minneapolis},
+		WeatherRecord{Day: "Tue", Snowfall: 15, City: minneapolis},
+		WeatherRecord{Day: "Wed", Snowfall: 32.3, City: minneapolis},
+		WeatherRecord{Day: "Thu", Snowfall: 10, City: minneapolis},
+		WeatherRecord{Day: "Fri", Snowfall: 12, City: minneapolis},
+	)
+
+	// Create a new query engine to retrieve data
+	engine := query.NewEngine(memory.DefaultAllocator, database.TableProvider())
+
+	// snowfall statistics by city:
+	err := engine.ScanTable("snowfall_table").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{
+				logicalplan.Max(logicalplan.Col("snowfall")),
+				logicalplan.Min(logicalplan.Col("snowfall")),
+				logicalplan.Avg(logicalplan.Col("snowfall")),
+			},
+			[]logicalplan.Expr{logicalplan.Col("city.name")},
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			// print the results
+			fmt.Println(r)
+			return nil
+		})
+	if err != nil {
+		log.Fatal("snowfall statistics by city:", err)
+	}
+
+	// Total snowfall on each day of week:
+	err = engine.ScanTable("snowfall_table").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{
+				logicalplan.Sum(logicalplan.Col("snowfall")),
+			},
+			[]logicalplan.Expr{logicalplan.Col("day")},
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			// print the results
+			fmt.Println(r)
+			return nil
+		})
+	if err != nil {
+		log.Fatal("total snowfall on each day of week:", err)
+	}
+}
diff --git a/examples/simple.go b/examples/simple.go
deleted file mode 100644
index e7ac49b05..000000000
--- a/examples/simple.go
+++ /dev/null
@@ -1,120 +0,0 @@
-package main
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/memory"
-
-	"github.com/polarsignals/frostdb"
-	"github.com/polarsignals/frostdb/dynparquet"
-	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
-	"github.com/polarsignals/frostdb/query"
-	"github.com/polarsignals/frostdb/query/logicalplan"
-)
-
-// This example demonstrates how to create a simple FrostDB with a dynamic labels column that stores float values.
-func main() {
-
-	// Create a new column store
-	columnstore, _ := frostdb.New()
-	defer columnstore.Close()
-
-	// Open up a database in the column store
-	database, _ := columnstore.DB(context.Background(), "simple_db")
-
-	// Define our simple schema of labels and values
-	schema, _ := simpleSchema()
-
-	// Create a table named simple in our database
-	table, _ := database.Table(
-		"simple_table",
-		frostdb.NewTableConfig(schema),
-	)
-
-	type FirstLast struct {
-		FirstName string
-		Surname   string
-	}
-
-	type Simple struct {
-		Names FirstLast
-		Value int64
-	}
-	// Create values to insert into the database these first rows havel dynamic label names of 'firstname' and 'surname'
-	frederic := Simple{
-		Names: FirstLast{
-			FirstName: "Frederic",
-			Surname:   "Brancz",
-		},
-		Value: 100,
-	}
-
-	thor := Simple{
-		Names: FirstLast{
-			FirstName: "Thor",
-			Surname:   "Hansen",
-		},
-		Value: 99,
-	}
-	_, _ = table.Write(context.Background(), frederic, thor)
-
-	// Now we can insert rows that have middle names into our dynamic column
-	matthias := struct {
-		Names struct {
-			FirstName  string
-			MiddleName string
-			Surname    string
-		}
-		Value int64
-	}{
-		Names: struct {
-			FirstName  string
-			MiddleName string
-			Surname    string
-		}{
-			FirstName:  "Matthias",
-			MiddleName: "Oliver Rainer",
-			Surname:    "Loibl",
-		},
-		Value: 101,
-	}
-	_, _ = table.Write(context.Background(), matthias)
-
-	// Create a new query engine to retrieve data and print the results
-	engine := query.NewEngine(memory.DefaultAllocator, database.TableProvider())
-	_ = engine.ScanTable("simple_table").
-		Project(logicalplan.DynCol("names")). // We don't know all dynamic columns at query time, but we want all of them to be returned.
-		Filter(
-			logicalplan.Col("names.first_name").Eq(logicalplan.Literal("Frederic")),
-		).Execute(context.Background(), func(ctx context.Context, r arrow.Record) error {
-		fmt.Println(r)
-		return nil
-	})
-}
-
-func simpleSchema() (*dynparquet.Schema, error) {
-	return dynparquet.SchemaFromDefinition(&schemapb.Schema{
-		Name: "simple_schema",
-		Columns: []*schemapb.Column{{
-			Name: "names",
-			StorageLayout: &schemapb.StorageLayout{
-				Type:     schemapb.StorageLayout_TYPE_STRING,
-				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
-				Nullable: true,
-			},
-			Dynamic: true,
-		}, {
-			Name: "value",
-			StorageLayout: &schemapb.StorageLayout{
-				Type: schemapb.StorageLayout_TYPE_INT64,
-			},
-			Dynamic: false,
-		}},
-		SortingColumns: []*schemapb.SortingColumn{{
-			Name:      "names",
-			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
-		}},
-	})
-}
diff --git a/examples/simple/simple.go b/examples/simple/simple.go
new file mode 100644
index 000000000..78d624f90
--- /dev/null
+++ b/examples/simple/simple.go
@@ -0,0 +1,70 @@
+package main
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+
+	"github.com/polarsignals/frostdb"
+	"github.com/polarsignals/frostdb/query"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+// This example demonstrates how to create a simple FrostDB with a dynamic labels column that stores float values.
+func main() {
+	// Create a new column store
+	columnstore, _ := frostdb.New()
+	defer columnstore.Close()
+
+	// Open up a database in the column store
+	database, _ := columnstore.DB(context.Background(), "simple_db")
+
+	type Simple struct {
+		Names map[string]string `frostdb:",asc"`
+		Value int64
+	}
+	table, _ := frostdb.NewGenericTable[Simple](
+		database, "simple_table", memory.DefaultAllocator,
+	)
+	// Create values to insert into the database these first rows havel dynamic label names of 'firstname' and 'surname'
+	frederic := Simple{
+		Names: map[string]string{
+			"first_name": "Frederic",
+			"surname":    "Brancz",
+		},
+		Value: 100,
+	}
+
+	thor := Simple{
+		Names: map[string]string{
+			"first_name": "Thor",
+			"surname":    "Hansen",
+		},
+		Value: 99,
+	}
+	_, _ = table.Write(context.Background(), frederic, thor)
+
+	// Now we can insert rows that have middle names into our dynamic column
+	matthias := Simple{
+		Names: map[string]string{
+			"first_name":  "Matthias",
+			"middle_name": "Oliver Rainer",
+			"surname":     "Loibl",
+		},
+		Value: 101,
+	}
+	_, _ = table.Write(context.Background(), matthias)
+
+	// Create a new query engine to retrieve data and print the results
+	engine := query.NewEngine(memory.DefaultAllocator, database.TableProvider())
+	_ = engine.ScanTable("simple_table").
+		Project(logicalplan.DynCol("names")). // We don't know all dynamic columns at query time, but we want all of them to be returned.
+		Filter(
+			logicalplan.Col("names.first_name").Eq(logicalplan.Literal("Frederic")),
+		).Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+		fmt.Println(r)
+		return nil
+	})
+}
diff --git a/filter.go b/filter.go
deleted file mode 100644
index d0b89ee26..000000000
--- a/filter.go
+++ /dev/null
@@ -1,182 +0,0 @@
-package frostdb
-
-import (
-	"errors"
-	"fmt"
-
-	"github.com/segmentio/parquet-go"
-
-	"github.com/polarsignals/frostdb/pqarrow"
-	"github.com/polarsignals/frostdb/query/logicalplan"
-)
-
-type PreExprVisitorFunc func(expr logicalplan.Expr) bool
-
-func (f PreExprVisitorFunc) PreVisit(expr logicalplan.Expr) bool {
-	return f(expr)
-}
-
-func (f PreExprVisitorFunc) PostVisit(expr logicalplan.Expr) bool {
-	return false
-}
-
-// Particulate is an abstraction of something that can be filtered.
-// A parquet.RowGroup is a particulate that is able to be filtered, and wrapping a parquet.File with
-// ParquetFileParticulate allows a file to be filtered.
-type Particulate interface {
-	Schema() *parquet.Schema
-	ColumnChunks() []parquet.ColumnChunk
-}
-
-type TrueNegativeFilter interface {
-	Eval(Particulate) (bool, error)
-}
-
-type AlwaysTrueFilter struct{}
-
-func (f *AlwaysTrueFilter) Eval(p Particulate) (bool, error) {
-	return true, nil
-}
-
-func binaryBooleanExpr(expr *logicalplan.BinaryExpr) (TrueNegativeFilter, error) {
-	switch expr.Op {
-	case logicalplan.OpNotEq:
-		fallthrough
-	case logicalplan.OpLt:
-		fallthrough
-	case logicalplan.OpLtEq:
-		fallthrough
-	case logicalplan.OpGt:
-		fallthrough
-	case logicalplan.OpGtEq:
-		fallthrough
-	case logicalplan.OpEq: //, logicalplan.OpNotEq, logicalplan.OpLt, logicalplan.OpLtEq, logicalplan.OpGt, logicalplan.OpGtEq, logicalplan.OpRegexMatch, logicalplan.RegexNotMatch:
-		var leftColumnRef *ColumnRef
-		expr.Left.Accept(PreExprVisitorFunc(func(expr logicalplan.Expr) bool {
-			switch e := expr.(type) {
-			case *logicalplan.Column:
-				leftColumnRef = &ColumnRef{
-					ColumnName: e.ColumnName,
-				}
-				return false
-			}
-			return true
-		}))
-		if leftColumnRef == nil {
-			return nil, errors.New("left side of binary expression must be a column")
-		}
-
-		var (
-			rightValue parquet.Value
-			err        error
-		)
-		expr.Right.Accept(PreExprVisitorFunc(func(expr logicalplan.Expr) bool {
-			switch e := expr.(type) {
-			case *logicalplan.LiteralExpr:
-				rightValue, err = pqarrow.ArrowScalarToParquetValue(e.Value)
-				return false
-			}
-			return true
-		}))
-
-		if err != nil {
-			return nil, err
-		}
-
-		return &BinaryScalarExpr{
-			Left:  leftColumnRef,
-			Op:    expr.Op,
-			Right: rightValue,
-		}, nil
-	case logicalplan.OpAnd:
-		left, err := BooleanExpr(expr.Left)
-		if err != nil {
-			return nil, err
-		}
-
-		right, err := BooleanExpr(expr.Right)
-		if err != nil {
-			return nil, err
-		}
-
-		return &AndExpr{
-			Left:  left,
-			Right: right,
-		}, nil
-	case logicalplan.OpOr:
-		left, err := BooleanExpr(expr.Left)
-		if err != nil {
-			return nil, err
-		}
-
-		right, err := BooleanExpr(expr.Right)
-		if err != nil {
-			return nil, err
-		}
-
-		return &OrExpr{
-			Left:  left,
-			Right: right,
-		}, nil
-	default:
-		return &AlwaysTrueFilter{}, nil
-	}
-}
-
-type AndExpr struct {
-	Left  TrueNegativeFilter
-	Right TrueNegativeFilter
-}
-
-func (a *AndExpr) Eval(p Particulate) (bool, error) {
-	left, err := a.Left.Eval(p)
-	if err != nil {
-		return false, err
-	}
-	if !left {
-		return false, nil
-	}
-
-	right, err := a.Right.Eval(p)
-	if err != nil {
-		return false, err
-	}
-
-	// This stores the result in place to avoid allocations.
-	return left && right, nil
-}
-
-type OrExpr struct {
-	Left  TrueNegativeFilter
-	Right TrueNegativeFilter
-}
-
-func (a *OrExpr) Eval(p Particulate) (bool, error) {
-	left, err := a.Left.Eval(p)
-	if err != nil {
-		return false, err
-	}
-	if left {
-		return true, nil
-	}
-
-	right, err := a.Right.Eval(p)
-	if err != nil {
-		return false, err
-	}
-
-	return right, nil
-}
-
-func BooleanExpr(expr logicalplan.Expr) (TrueNegativeFilter, error) {
-	if expr == nil {
-		return &AlwaysTrueFilter{}, nil
-	}
-
-	switch e := expr.(type) {
-	case *logicalplan.BinaryExpr:
-		return binaryBooleanExpr(e)
-	default:
-		return nil, fmt.Errorf("unsupported boolean expression %T", e)
-	}
-}
diff --git a/gen/proto/go/frostdb/schema/v1alpha1/schema.pb.go b/gen/proto/go/frostdb/schema/v1alpha1/schema.pb.go
index bd09557f2..6c70671f3 100644
--- a/gen/proto/go/frostdb/schema/v1alpha1/schema.pb.go
+++ b/gen/proto/go/frostdb/schema/v1alpha1/schema.pb.go
@@ -1,6 +1,6 @@
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.28.0-devel
+// 	protoc-gen-go v1.34.2
 // 	protoc        (unknown)
 // source: frostdb/schema/v1alpha1/schema.proto
 
@@ -34,6 +34,10 @@ const (
 	StorageLayout_TYPE_DOUBLE StorageLayout_Type = 3
 	// Represents a boolean type.
 	StorageLayout_TYPE_BOOL StorageLayout_Type = 4
+	// Represents a int32 type.
+	StorageLayout_TYPE_INT32 StorageLayout_Type = 5
+	// Represents a uint64 type.
+	StorageLayout_TYPE_UINT64 StorageLayout_Type = 6
 )
 
 // Enum value maps for StorageLayout_Type.
@@ -44,6 +48,8 @@ var (
 		2: "TYPE_INT64",
 		3: "TYPE_DOUBLE",
 		4: "TYPE_BOOL",
+		5: "TYPE_INT32",
+		6: "TYPE_UINT64",
 	}
 	StorageLayout_Type_value = map[string]int32{
 		"TYPE_UNKNOWN_UNSPECIFIED": 0,
@@ -51,6 +57,8 @@ var (
 		"TYPE_INT64":               2,
 		"TYPE_DOUBLE":              3,
 		"TYPE_BOOL":                4,
+		"TYPE_INT32":               5,
+		"TYPE_UINT64":              6,
 	}
 )
 
@@ -272,6 +280,9 @@ type Schema struct {
 	Columns []*Column `protobuf:"bytes,2,rep,name=columns,proto3" json:"columns,omitempty"`
 	// Columns to sort by in the schema.
 	SortingColumns []*SortingColumn `protobuf:"bytes,3,rep,name=sorting_columns,json=sortingColumns,proto3" json:"sorting_columns,omitempty"`
+	// UniquePrimaryIndex defines whether the primary index is unique. Duplicate
+	// (according to the sorting column) rows will be dropped on compaction.
+	UniquePrimaryIndex bool `protobuf:"varint,4,opt,name=unique_primary_index,json=uniquePrimaryIndex,proto3" json:"unique_primary_index,omitempty"`
 }
 
 func (x *Schema) Reset() {
@@ -327,6 +338,13 @@ func (x *Schema) GetSortingColumns() []*SortingColumn {
 	return nil
 }
 
+func (x *Schema) GetUniquePrimaryIndex() bool {
+	if x != nil {
+		return x.UniquePrimaryIndex
+	}
+	return false
+}
+
 // Column definition.
 type Column struct {
 	state         protoimpl.MessageState
@@ -339,6 +357,9 @@ type Column struct {
 	StorageLayout *StorageLayout `protobuf:"bytes,2,opt,name=storage_layout,json=storageLayout,proto3" json:"storage_layout,omitempty"`
 	// Whether the column can dynamically expand.
 	Dynamic bool `protobuf:"varint,3,opt,name=dynamic,proto3" json:"dynamic,omitempty"`
+	// Prehash the column before storing it. This is an optimization to speed up aggregation queries when this column is often aggregated.
+	// This will create a separate non-dynamic column with the same name and the prefix "hashed." that contains the prehashed values.
+	Prehash bool `protobuf:"varint,4,opt,name=prehash,proto3" json:"prehash,omitempty"`
 }
 
 func (x *Column) Reset() {
@@ -394,6 +415,13 @@ func (x *Column) GetDynamic() bool {
 	return false
 }
 
+func (x *Column) GetPrehash() bool {
+	if x != nil {
+		return x.Prehash
+	}
+	return false
+}
+
 // Storage layout describes the physical storage properties of a column.
 type StorageLayout struct {
 	state         protoimpl.MessageState
@@ -408,6 +436,8 @@ type StorageLayout struct {
 	Compression StorageLayout_Compression `protobuf:"varint,3,opt,name=compression,proto3,enum=frostdb.schema.v1alpha1.StorageLayout_Compression" json:"compression,omitempty"`
 	// Wether values in the column are allowed to be null.
 	Nullable bool `protobuf:"varint,4,opt,name=nullable,proto3" json:"nullable,omitempty"`
+	// Whether the column is repeated.
+	Repeated bool `protobuf:"varint,5,opt,name=repeated,proto3" json:"repeated,omitempty"`
 }
 
 func (x *StorageLayout) Reset() {
@@ -470,6 +500,13 @@ func (x *StorageLayout) GetNullable() bool {
 	return false
 }
 
+func (x *StorageLayout) GetRepeated() bool {
+	if x != nil {
+		return x.Repeated
+	}
+	return false
+}
+
 // SortingColumn definition.
 type SortingColumn struct {
 	state         protoimpl.MessageState
@@ -544,7 +581,7 @@ var file_frostdb_schema_v1alpha1_schema_proto_rawDesc = []byte{
 	0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61,
 	0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x17, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
 	0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x22,
-	0xa8, 0x01, 0x0a, 0x06, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61,
+	0xda, 0x01, 0x0a, 0x06, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61,
 	0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x39,
 	0x0a, 0x07, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32,
 	0x1f, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61,
@@ -554,93 +591,101 @@ var file_frostdb_schema_v1alpha1_schema_proto_rawDesc = []byte{
 	0x28, 0x0b, 0x32, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68,
 	0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x6f, 0x72,
 	0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x52, 0x0e, 0x73, 0x6f, 0x72, 0x74,
-	0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x73, 0x22, 0x85, 0x01, 0x0a, 0x06, 0x43,
-	0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20,
-	0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4d, 0x0a, 0x0e, 0x73, 0x74, 0x6f,
-	0x72, 0x61, 0x67, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28,
-	0x0b, 0x32, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65,
-	0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x74, 0x6f, 0x72,
-	0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x52, 0x0d, 0x73, 0x74, 0x6f, 0x72, 0x61,
-	0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x64, 0x79, 0x6e, 0x61,
-	0x6d, 0x69, 0x63, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x64, 0x79, 0x6e, 0x61, 0x6d,
-	0x69, 0x63, 0x22, 0xce, 0x05, 0x0a, 0x0d, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61,
-	0x79, 0x6f, 0x75, 0x74, 0x12, 0x3f, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01,
-	0x28, 0x0e, 0x32, 0x2b, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68,
-	0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x74, 0x6f,
-	0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x52,
-	0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x4b, 0x0a, 0x08, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e,
-	0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2f, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
-	0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
-	0x31, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e,
-	0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x08, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69,
-	0x6e, 0x67, 0x12, 0x54, 0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f,
-	0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x32, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
-	0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
-	0x31, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e,
-	0x43, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x0b, 0x63, 0x6f, 0x6d,
-	0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x0a, 0x08, 0x6e, 0x75, 0x6c, 0x6c,
-	0x61, 0x62, 0x6c, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6e, 0x75, 0x6c, 0x6c,
-	0x61, 0x62, 0x6c, 0x65, 0x22, 0x65, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1c, 0x0a, 0x18,
-	0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53,
-	0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59,
-	0x50, 0x45, 0x5f, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x54,
-	0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x54,
-	0x59, 0x50, 0x45, 0x5f, 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09,
-	0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x10, 0x04, 0x22, 0xae, 0x01, 0x0a, 0x08,
-	0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x1e, 0x0a, 0x1a, 0x45, 0x4e, 0x43, 0x4f,
-	0x44, 0x49, 0x4e, 0x47, 0x5f, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45,
-	0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x45, 0x4e, 0x43, 0x4f,
-	0x44, 0x49, 0x4e, 0x47, 0x5f, 0x52, 0x4c, 0x45, 0x5f, 0x44, 0x49, 0x43, 0x54, 0x49, 0x4f, 0x4e,
-	0x41, 0x52, 0x59, 0x10, 0x01, 0x12, 0x20, 0x0a, 0x1c, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e,
-	0x47, 0x5f, 0x44, 0x45, 0x4c, 0x54, 0x41, 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x5f, 0x50,
-	0x41, 0x43, 0x4b, 0x45, 0x44, 0x10, 0x02, 0x12, 0x1d, 0x0a, 0x19, 0x45, 0x4e, 0x43, 0x4f, 0x44,
-	0x49, 0x4e, 0x47, 0x5f, 0x44, 0x45, 0x4c, 0x54, 0x41, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x5f, 0x41,
-	0x52, 0x52, 0x41, 0x59, 0x10, 0x03, 0x12, 0x24, 0x0a, 0x20, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49,
-	0x4e, 0x47, 0x5f, 0x44, 0x45, 0x4c, 0x54, 0x41, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x5f,
-	0x42, 0x59, 0x54, 0x45, 0x5f, 0x41, 0x52, 0x52, 0x41, 0x59, 0x10, 0x04, 0x22, 0xa4, 0x01, 0x0a,
-	0x0b, 0x43, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x20, 0x0a, 0x1c,
-	0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x4e, 0x4f, 0x4e, 0x45,
-	0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x16,
-	0x0a, 0x12, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x4e,
-	0x41, 0x50, 0x50, 0x59, 0x10, 0x01, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45,
-	0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x47, 0x5a, 0x49, 0x50, 0x10, 0x02, 0x12, 0x16, 0x0a, 0x12,
-	0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x42, 0x52, 0x4f, 0x54,
-	0x4c, 0x49, 0x10, 0x03, 0x12, 0x17, 0x0a, 0x13, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53,
-	0x49, 0x4f, 0x4e, 0x5f, 0x4c, 0x5a, 0x34, 0x5f, 0x52, 0x41, 0x57, 0x10, 0x04, 0x12, 0x14, 0x0a,
-	0x10, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x5a, 0x53, 0x54,
-	0x44, 0x10, 0x05, 0x22, 0xf7, 0x01, 0x0a, 0x0d, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43,
-	0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20,
-	0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4e, 0x0a, 0x09, 0x64, 0x69, 0x72,
-	0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x66,
-	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31,
-	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f,
-	0x6c, 0x75, 0x6d, 0x6e, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09,
-	0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6e, 0x75, 0x6c,
-	0x6c, 0x73, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a,
-	0x6e, 0x75, 0x6c, 0x6c, 0x73, 0x46, 0x69, 0x72, 0x73, 0x74, 0x22, 0x61, 0x0a, 0x09, 0x44, 0x69,
-	0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x1d, 0x44, 0x49, 0x52, 0x45, 0x43,
-	0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53,
-	0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x44, 0x49,
-	0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x41, 0x53, 0x43, 0x45, 0x4e, 0x44, 0x49, 0x4e,
-	0x47, 0x10, 0x01, 0x12, 0x18, 0x0a, 0x14, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
-	0x5f, 0x44, 0x45, 0x53, 0x43, 0x45, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x42, 0xfd, 0x01,
-	0x0a, 0x1b, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63,
-	0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x42, 0x0b, 0x53,
-	0x63, 0x68, 0x65, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x53, 0x67, 0x69,
-	0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c, 0x61, 0x72, 0x73, 0x69,
-	0x67, 0x6e, 0x61, 0x6c, 0x73, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x67, 0x65,
-	0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74,
-	0x64, 0x62, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
-	0x61, 0x31, 0x3b, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
-	0x31, 0xa2, 0x02, 0x03, 0x46, 0x53, 0x58, 0xaa, 0x02, 0x17, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64,
-	0x62, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
-	0x31, 0xca, 0x02, 0x17, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x53, 0x63, 0x68, 0x65,
-	0x6d, 0x61, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xe2, 0x02, 0x23, 0x46, 0x72,
+	0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x73, 0x12, 0x30, 0x0a, 0x14, 0x75, 0x6e,
+	0x69, 0x71, 0x75, 0x65, 0x5f, 0x70, 0x72, 0x69, 0x6d, 0x61, 0x72, 0x79, 0x5f, 0x69, 0x6e, 0x64,
+	0x65, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x12, 0x75, 0x6e, 0x69, 0x71, 0x75, 0x65,
+	0x50, 0x72, 0x69, 0x6d, 0x61, 0x72, 0x79, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x22, 0x9f, 0x01, 0x0a,
+	0x06, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4d, 0x0a, 0x0e, 0x73,
+	0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x18, 0x02, 0x20,
+	0x01, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63,
+	0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x74,
+	0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x52, 0x0d, 0x73, 0x74, 0x6f,
+	0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x64, 0x79,
+	0x6e, 0x61, 0x6d, 0x69, 0x63, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x64, 0x79, 0x6e,
+	0x61, 0x6d, 0x69, 0x63, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x72, 0x65, 0x68, 0x61, 0x73, 0x68, 0x18,
+	0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x70, 0x72, 0x65, 0x68, 0x61, 0x73, 0x68, 0x22, 0x8c,
+	0x06, 0x0a, 0x0d, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74,
+	0x12, 0x3f, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2b,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e,
+	0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+	0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70,
+	0x65, 0x12, 0x4b, 0x0a, 0x08, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x02, 0x20,
+	0x01, 0x28, 0x0e, 0x32, 0x2f, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63,
+	0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x74,
+	0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e, 0x45, 0x6e, 0x63, 0x6f,
+	0x64, 0x69, 0x6e, 0x67, 0x52, 0x08, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x54,
+	0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20,
+	0x01, 0x28, 0x0e, 0x32, 0x32, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63,
+	0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x74,
+	0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e, 0x43, 0x6f, 0x6d, 0x70,
+	0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73,
+	0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x0a, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65,
+	0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65,
+	0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01,
+	0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x22, 0x86, 0x01, 0x0a,
+	0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1c, 0x0a, 0x18, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e,
+	0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45,
+	0x44, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x54, 0x52, 0x49,
+	0x4e, 0x47, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54,
+	0x36, 0x34, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x44, 0x4f, 0x55,
+	0x42, 0x4c, 0x45, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, 0x4f,
+	0x4f, 0x4c, 0x10, 0x04, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54,
+	0x33, 0x32, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x49, 0x4e,
+	0x54, 0x36, 0x34, 0x10, 0x06, 0x22, 0xae, 0x01, 0x0a, 0x08, 0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69,
+	0x6e, 0x67, 0x12, 0x1e, 0x0a, 0x1a, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x50,
+	0x4c, 0x41, 0x49, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44,
+	0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x52,
+	0x4c, 0x45, 0x5f, 0x44, 0x49, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x41, 0x52, 0x59, 0x10, 0x01, 0x12,
+	0x20, 0x0a, 0x1c, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x44, 0x45, 0x4c, 0x54,
+	0x41, 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x5f, 0x50, 0x41, 0x43, 0x4b, 0x45, 0x44, 0x10,
+	0x02, 0x12, 0x1d, 0x0a, 0x19, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x44, 0x45,
+	0x4c, 0x54, 0x41, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x5f, 0x41, 0x52, 0x52, 0x41, 0x59, 0x10, 0x03,
+	0x12, 0x24, 0x0a, 0x20, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x44, 0x45, 0x4c,
+	0x54, 0x41, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x5f, 0x41,
+	0x52, 0x52, 0x41, 0x59, 0x10, 0x04, 0x22, 0xa4, 0x01, 0x0a, 0x0b, 0x43, 0x6f, 0x6d, 0x70, 0x72,
+	0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x20, 0x0a, 0x1c, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45,
+	0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45,
+	0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x16, 0x0a, 0x12, 0x43, 0x4f, 0x4d, 0x50,
+	0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x4e, 0x41, 0x50, 0x50, 0x59, 0x10, 0x01,
+	0x12, 0x14, 0x0a, 0x10, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f,
+	0x47, 0x5a, 0x49, 0x50, 0x10, 0x02, 0x12, 0x16, 0x0a, 0x12, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45,
+	0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x42, 0x52, 0x4f, 0x54, 0x4c, 0x49, 0x10, 0x03, 0x12, 0x17,
+	0x0a, 0x13, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x4c, 0x5a,
+	0x34, 0x5f, 0x52, 0x41, 0x57, 0x10, 0x04, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x4f, 0x4d, 0x50, 0x52,
+	0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x5a, 0x53, 0x54, 0x44, 0x10, 0x05, 0x22, 0xf7, 0x01,
+	0x0a, 0x0d, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12,
+	0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e,
+	0x61, 0x6d, 0x65, 0x12, 0x4e, 0x0a, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e,
+	0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62,
+	0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31,
+	0x2e, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x2e, 0x44,
+	0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74,
+	0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6e, 0x75, 0x6c, 0x6c, 0x73, 0x5f, 0x66, 0x69, 0x72,
+	0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x6e, 0x75, 0x6c, 0x6c, 0x73, 0x46,
+	0x69, 0x72, 0x73, 0x74, 0x22, 0x61, 0x0a, 0x09, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f,
+	0x6e, 0x12, 0x21, 0x0a, 0x1d, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x55,
+	0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49,
+	0x45, 0x44, 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f,
+	0x4e, 0x5f, 0x41, 0x53, 0x43, 0x45, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x18, 0x0a,
+	0x14, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x44, 0x45, 0x53, 0x43, 0x45,
+	0x4e, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x42, 0xfd, 0x01, 0x0a, 0x1b, 0x63, 0x6f, 0x6d, 0x2e,
+	0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76,
+	0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x42, 0x0b, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x50,
+	0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x53, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63,
+	0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c, 0x61, 0x72, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x73, 0x2f,
+	0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74,
+	0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x73, 0x63, 0x68,
+	0x65, 0x6d, 0x61, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x3b, 0x73, 0x63, 0x68,
+	0x65, 0x6d, 0x61, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xa2, 0x02, 0x03, 0x46, 0x53,
+	0x58, 0xaa, 0x02, 0x17, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x53, 0x63, 0x68, 0x65,
+	0x6d, 0x61, 0x2e, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xca, 0x02, 0x17, 0x46, 0x72,
 	0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5c, 0x56, 0x31, 0x61,
-	0x6c, 0x70, 0x68, 0x61, 0x31, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
-	0x61, 0xea, 0x02, 0x19, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x3a, 0x3a, 0x53, 0x63, 0x68,
-	0x65, 0x6d, 0x61, 0x3a, 0x3a, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x62, 0x06, 0x70,
-	0x72, 0x6f, 0x74, 0x6f, 0x33,
+	0x6c, 0x70, 0x68, 0x61, 0x31, 0xe2, 0x02, 0x23, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c,
+	0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x5c,
+	0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x19, 0x46, 0x72,
+	0x6f, 0x73, 0x74, 0x64, 0x62, 0x3a, 0x3a, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x3a, 0x3a, 0x56,
+	0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
 }
 
 var (
@@ -657,7 +702,7 @@ func file_frostdb_schema_v1alpha1_schema_proto_rawDescGZIP() []byte {
 
 var file_frostdb_schema_v1alpha1_schema_proto_enumTypes = make([]protoimpl.EnumInfo, 4)
 var file_frostdb_schema_v1alpha1_schema_proto_msgTypes = make([]protoimpl.MessageInfo, 4)
-var file_frostdb_schema_v1alpha1_schema_proto_goTypes = []interface{}{
+var file_frostdb_schema_v1alpha1_schema_proto_goTypes = []any{
 	(StorageLayout_Type)(0),        // 0: frostdb.schema.v1alpha1.StorageLayout.Type
 	(StorageLayout_Encoding)(0),    // 1: frostdb.schema.v1alpha1.StorageLayout.Encoding
 	(StorageLayout_Compression)(0), // 2: frostdb.schema.v1alpha1.StorageLayout.Compression
@@ -688,7 +733,7 @@ func file_frostdb_schema_v1alpha1_schema_proto_init() {
 		return
 	}
 	if !protoimpl.UnsafeEnabled {
-		file_frostdb_schema_v1alpha1_schema_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha1_schema_proto_msgTypes[0].Exporter = func(v any, i int) any {
 			switch v := v.(*Schema); i {
 			case 0:
 				return &v.state
@@ -700,7 +745,7 @@ func file_frostdb_schema_v1alpha1_schema_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_schema_v1alpha1_schema_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha1_schema_proto_msgTypes[1].Exporter = func(v any, i int) any {
 			switch v := v.(*Column); i {
 			case 0:
 				return &v.state
@@ -712,7 +757,7 @@ func file_frostdb_schema_v1alpha1_schema_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_schema_v1alpha1_schema_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha1_schema_proto_msgTypes[2].Exporter = func(v any, i int) any {
 			switch v := v.(*StorageLayout); i {
 			case 0:
 				return &v.state
@@ -724,7 +769,7 @@ func file_frostdb_schema_v1alpha1_schema_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_schema_v1alpha1_schema_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha1_schema_proto_msgTypes[3].Exporter = func(v any, i int) any {
 			switch v := v.(*SortingColumn); i {
 			case 0:
 				return &v.state
diff --git a/gen/proto/go/frostdb/schema/v1alpha1/schema_vtproto.pb.go b/gen/proto/go/frostdb/schema/v1alpha1/schema_vtproto.pb.go
index ab292173c..fe6c990f3 100644
--- a/gen/proto/go/frostdb/schema/v1alpha1/schema_vtproto.pb.go
+++ b/gen/proto/go/frostdb/schema/v1alpha1/schema_vtproto.pb.go
@@ -1,14 +1,14 @@
 // Code generated by protoc-gen-go-vtproto. DO NOT EDIT.
-// protoc-gen-go-vtproto version: v0.3.0
+// protoc-gen-go-vtproto version: v0.6.0
 // source: frostdb/schema/v1alpha1/schema.proto
 
 package schemav1alpha1
 
 import (
 	fmt "fmt"
+	protohelpers "github.com/planetscale/vtprotobuf/protohelpers"
 	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
 	io "io"
-	bits "math/bits"
 )
 
 const (
@@ -48,6 +48,16 @@ func (m *Schema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		i -= len(m.unknownFields)
 		copy(dAtA[i:], m.unknownFields)
 	}
+	if m.UniquePrimaryIndex {
+		i--
+		if m.UniquePrimaryIndex {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x20
+	}
 	if len(m.SortingColumns) > 0 {
 		for iNdEx := len(m.SortingColumns) - 1; iNdEx >= 0; iNdEx-- {
 			size, err := m.SortingColumns[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
@@ -55,7 +65,7 @@ func (m *Schema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 				return 0, err
 			}
 			i -= size
-			i = encodeVarint(dAtA, i, uint64(size))
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 			i--
 			dAtA[i] = 0x1a
 		}
@@ -67,7 +77,7 @@ func (m *Schema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 				return 0, err
 			}
 			i -= size
-			i = encodeVarint(dAtA, i, uint64(size))
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 			i--
 			dAtA[i] = 0x12
 		}
@@ -75,7 +85,7 @@ func (m *Schema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 	if len(m.Name) > 0 {
 		i -= len(m.Name)
 		copy(dAtA[i:], m.Name)
-		i = encodeVarint(dAtA, i, uint64(len(m.Name)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
 		i--
 		dAtA[i] = 0xa
 	}
@@ -112,6 +122,16 @@ func (m *Column) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		i -= len(m.unknownFields)
 		copy(dAtA[i:], m.unknownFields)
 	}
+	if m.Prehash {
+		i--
+		if m.Prehash {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x20
+	}
 	if m.Dynamic {
 		i--
 		if m.Dynamic {
@@ -128,14 +148,14 @@ func (m *Column) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0x12
 	}
 	if len(m.Name) > 0 {
 		i -= len(m.Name)
 		copy(dAtA[i:], m.Name)
-		i = encodeVarint(dAtA, i, uint64(len(m.Name)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
 		i--
 		dAtA[i] = 0xa
 	}
@@ -172,6 +192,16 @@ func (m *StorageLayout) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		i -= len(m.unknownFields)
 		copy(dAtA[i:], m.unknownFields)
 	}
+	if m.Repeated {
+		i--
+		if m.Repeated {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x28
+	}
 	if m.Nullable {
 		i--
 		if m.Nullable {
@@ -183,17 +213,17 @@ func (m *StorageLayout) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		dAtA[i] = 0x20
 	}
 	if m.Compression != 0 {
-		i = encodeVarint(dAtA, i, uint64(m.Compression))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Compression))
 		i--
 		dAtA[i] = 0x18
 	}
 	if m.Encoding != 0 {
-		i = encodeVarint(dAtA, i, uint64(m.Encoding))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Encoding))
 		i--
 		dAtA[i] = 0x10
 	}
 	if m.Type != 0 {
-		i = encodeVarint(dAtA, i, uint64(m.Type))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Type))
 		i--
 		dAtA[i] = 0x8
 	}
@@ -241,31 +271,20 @@ func (m *SortingColumn) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		dAtA[i] = 0x18
 	}
 	if m.Direction != 0 {
-		i = encodeVarint(dAtA, i, uint64(m.Direction))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Direction))
 		i--
 		dAtA[i] = 0x10
 	}
 	if len(m.Name) > 0 {
 		i -= len(m.Name)
 		copy(dAtA[i:], m.Name)
-		i = encodeVarint(dAtA, i, uint64(len(m.Name)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
 		i--
 		dAtA[i] = 0xa
 	}
 	return len(dAtA) - i, nil
 }
 
-func encodeVarint(dAtA []byte, offset int, v uint64) int {
-	offset -= sov(v)
-	base := offset
-	for v >= 1<<7 {
-		dAtA[offset] = uint8(v&0x7f | 0x80)
-		v >>= 7
-		offset++
-	}
-	dAtA[offset] = uint8(v)
-	return base
-}
 func (m *Schema) SizeVT() (n int) {
 	if m == nil {
 		return 0
@@ -274,23 +293,24 @@ func (m *Schema) SizeVT() (n int) {
 	_ = l
 	l = len(m.Name)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if len(m.Columns) > 0 {
 		for _, e := range m.Columns {
 			l = e.SizeVT()
-			n += 1 + l + sov(uint64(l))
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 		}
 	}
 	if len(m.SortingColumns) > 0 {
 		for _, e := range m.SortingColumns {
 			l = e.SizeVT()
-			n += 1 + l + sov(uint64(l))
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 		}
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
+	if m.UniquePrimaryIndex {
+		n += 2
 	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -302,18 +322,19 @@ func (m *Column) SizeVT() (n int) {
 	_ = l
 	l = len(m.Name)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if m.StorageLayout != nil {
 		l = m.StorageLayout.SizeVT()
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if m.Dynamic {
 		n += 2
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
+	if m.Prehash {
+		n += 2
 	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -324,20 +345,21 @@ func (m *StorageLayout) SizeVT() (n int) {
 	var l int
 	_ = l
 	if m.Type != 0 {
-		n += 1 + sov(uint64(m.Type))
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Type))
 	}
 	if m.Encoding != 0 {
-		n += 1 + sov(uint64(m.Encoding))
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Encoding))
 	}
 	if m.Compression != 0 {
-		n += 1 + sov(uint64(m.Compression))
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Compression))
 	}
 	if m.Nullable {
 		n += 2
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
+	if m.Repeated {
+		n += 2
 	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -349,26 +371,18 @@ func (m *SortingColumn) SizeVT() (n int) {
 	_ = l
 	l = len(m.Name)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if m.Direction != 0 {
-		n += 1 + sov(uint64(m.Direction))
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Direction))
 	}
 	if m.NullsFirst {
 		n += 2
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
-	}
+	n += len(m.unknownFields)
 	return n
 }
 
-func sov(x uint64) (n int) {
-	return (bits.Len64(x|1) + 6) / 7
-}
-func soz(x uint64) (n int) {
-	return sov(uint64((x << 1) ^ uint64((int64(x) >> 63))))
-}
 func (m *Schema) UnmarshalVT(dAtA []byte) error {
 	l := len(dAtA)
 	iNdEx := 0
@@ -377,7 +391,7 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -405,7 +419,7 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -419,11 +433,11 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -437,7 +451,7 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -450,11 +464,11 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -471,7 +485,7 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -484,11 +498,11 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -498,14 +512,34 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 				return err
 			}
 			iNdEx = postIndex
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field UniquePrimaryIndex", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.UniquePrimaryIndex = bool(v != 0)
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -528,7 +562,7 @@ func (m *Column) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -556,7 +590,7 @@ func (m *Column) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -570,11 +604,11 @@ func (m *Column) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -588,7 +622,7 @@ func (m *Column) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -601,11 +635,11 @@ func (m *Column) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -624,7 +658,7 @@ func (m *Column) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -637,14 +671,34 @@ func (m *Column) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			m.Dynamic = bool(v != 0)
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Prehash", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.Prehash = bool(v != 0)
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -667,7 +721,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -695,7 +749,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			m.Type = 0
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -714,7 +768,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			m.Encoding = 0
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -733,7 +787,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			m.Compression = 0
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -752,7 +806,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -765,14 +819,34 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			m.Nullable = bool(v != 0)
+		case 5:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Repeated", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.Repeated = bool(v != 0)
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -795,7 +869,7 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -823,7 +897,7 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -837,11 +911,11 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -855,7 +929,7 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			m.Direction = 0
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -874,7 +948,7 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -889,12 +963,12 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			m.NullsFirst = bool(v != 0)
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -909,87 +983,3 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 	}
 	return nil
 }
-func skip(dAtA []byte) (n int, err error) {
-	l := len(dAtA)
-	iNdEx := 0
-	depth := 0
-	for iNdEx < l {
-		var wire uint64
-		for shift := uint(0); ; shift += 7 {
-			if shift >= 64 {
-				return 0, ErrIntOverflow
-			}
-			if iNdEx >= l {
-				return 0, io.ErrUnexpectedEOF
-			}
-			b := dAtA[iNdEx]
-			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
-			if b < 0x80 {
-				break
-			}
-		}
-		wireType := int(wire & 0x7)
-		switch wireType {
-		case 0:
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return 0, ErrIntOverflow
-				}
-				if iNdEx >= l {
-					return 0, io.ErrUnexpectedEOF
-				}
-				iNdEx++
-				if dAtA[iNdEx-1] < 0x80 {
-					break
-				}
-			}
-		case 1:
-			iNdEx += 8
-		case 2:
-			var length int
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return 0, ErrIntOverflow
-				}
-				if iNdEx >= l {
-					return 0, io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				length |= (int(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			if length < 0 {
-				return 0, ErrInvalidLength
-			}
-			iNdEx += length
-		case 3:
-			depth++
-		case 4:
-			if depth == 0 {
-				return 0, ErrUnexpectedEndOfGroup
-			}
-			depth--
-		case 5:
-			iNdEx += 4
-		default:
-			return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
-		}
-		if iNdEx < 0 {
-			return 0, ErrInvalidLength
-		}
-		if depth == 0 {
-			return iNdEx, nil
-		}
-	}
-	return 0, io.ErrUnexpectedEOF
-}
-
-var (
-	ErrInvalidLength        = fmt.Errorf("proto: negative length found during unmarshaling")
-	ErrIntOverflow          = fmt.Errorf("proto: integer overflow")
-	ErrUnexpectedEndOfGroup = fmt.Errorf("proto: unexpected end of group")
-)
diff --git a/gen/proto/go/frostdb/schema/v1alpha2/schema.pb.go b/gen/proto/go/frostdb/schema/v1alpha2/schema.pb.go
index 501481150..cdb2551b1 100644
--- a/gen/proto/go/frostdb/schema/v1alpha2/schema.pb.go
+++ b/gen/proto/go/frostdb/schema/v1alpha2/schema.pb.go
@@ -1,6 +1,6 @@
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.28.0-devel
+// 	protoc-gen-go v1.34.2
 // 	protoc        (unknown)
 // source: frostdb/schema/v1alpha2/schema.proto
 
@@ -34,6 +34,10 @@ const (
 	StorageLayout_TYPE_DOUBLE StorageLayout_Type = 3
 	// Represents a boolean type.
 	StorageLayout_TYPE_BOOL StorageLayout_Type = 4
+	// Represents a int32 type.
+	StorageLayout_TYPE_INT32 StorageLayout_Type = 5
+	// Represents a uint64 type.
+	StorageLayout_TYPE_UINT64 StorageLayout_Type = 6
 )
 
 // Enum value maps for StorageLayout_Type.
@@ -44,6 +48,8 @@ var (
 		2: "TYPE_INT64",
 		3: "TYPE_DOUBLE",
 		4: "TYPE_BOOL",
+		5: "TYPE_INT32",
+		6: "TYPE_UINT64",
 	}
 	StorageLayout_Type_value = map[string]int32{
 		"TYPE_UNKNOWN_UNSPECIFIED": 0,
@@ -51,6 +57,8 @@ var (
 		"TYPE_INT64":               2,
 		"TYPE_DOUBLE":              3,
 		"TYPE_BOOL":                4,
+		"TYPE_INT32":               5,
+		"TYPE_UINT64":              6,
 	}
 )
 
@@ -270,6 +278,9 @@ type Schema struct {
 	Root *Group `protobuf:"bytes,2,opt,name=root,proto3" json:"root,omitempty"`
 	// Columns to sort by in the schema.
 	SortingColumns []*SortingColumn `protobuf:"bytes,3,rep,name=sorting_columns,json=sortingColumns,proto3" json:"sorting_columns,omitempty"`
+	// UniquePrimaryIndex defines whether the primary index is unique. Duplicate
+	// (according to the sorting column) rows will be dropped on compaction.
+	UniquePrimaryIndex bool `protobuf:"varint,4,opt,name=unique_primary_index,json=uniquePrimaryIndex,proto3" json:"unique_primary_index,omitempty"`
 }
 
 func (x *Schema) Reset() {
@@ -318,13 +329,23 @@ func (x *Schema) GetSortingColumns() []*SortingColumn {
 	return nil
 }
 
+func (x *Schema) GetUniquePrimaryIndex() bool {
+	if x != nil {
+		return x.UniquePrimaryIndex
+	}
+	return false
+}
+
 // Node is a node in a schema tree.
 type Node struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
+	// type can either be a leaf or a group.
+	//
 	// Types that are assignable to Type:
+	//
 	//	*Node_Leaf
 	//	*Node_Group
 	Type isNode_Type `protobuf_oneof:"type"`
@@ -694,7 +715,7 @@ var file_frostdb_schema_v1alpha2_schema_proto_rawDesc = []byte{
 	0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61,
 	0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x17, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
 	0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x22,
-	0x8d, 0x01, 0x0a, 0x06, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x32, 0x0a, 0x04, 0x72, 0x6f,
+	0xbf, 0x01, 0x0a, 0x06, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x32, 0x0a, 0x04, 0x72, 0x6f,
 	0x6f, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74,
 	0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
 	0x61, 0x32, 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x04, 0x72, 0x6f, 0x6f, 0x74, 0x12, 0x4f,
@@ -702,110 +723,115 @@ var file_frostdb_schema_v1alpha2_schema_proto_rawDesc = []byte{
 	0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
 	0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
 	0x32, 0x2e, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x52,
-	0x0e, 0x73, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x73, 0x22,
-	0x7b, 0x0a, 0x04, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x33, 0x0a, 0x04, 0x6c, 0x65, 0x61, 0x66, 0x18,
-	0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
-	0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e,
-	0x4c, 0x65, 0x61, 0x66, 0x48, 0x00, 0x52, 0x04, 0x6c, 0x65, 0x61, 0x66, 0x12, 0x36, 0x0a, 0x05,
-	0x67, 0x72, 0x6f, 0x75, 0x70, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72,
-	0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61,
-	0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x48, 0x00, 0x52, 0x05, 0x67,
-	0x72, 0x6f, 0x75, 0x70, 0x42, 0x06, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0x69, 0x0a, 0x04,
-	0x4c, 0x65, 0x61, 0x66, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01,
-	0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4d, 0x0a, 0x0e, 0x73, 0x74, 0x6f, 0x72,
-	0x61, 0x67, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b,
-	0x32, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d,
-	0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61,
-	0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x52, 0x0d, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67,
-	0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x22, 0x88, 0x01, 0x0a, 0x05, 0x47, 0x72, 0x6f, 0x75,
-	0x70, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
-	0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c,
-	0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c,
-	0x65, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20,
-	0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12, 0x33, 0x0a,
-	0x05, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66,
-	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31,
-	0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x05, 0x6e, 0x6f, 0x64,
-	0x65, 0x73, 0x22, 0xea, 0x05, 0x0a, 0x0d, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61,
-	0x79, 0x6f, 0x75, 0x74, 0x12, 0x3f, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01,
-	0x28, 0x0e, 0x32, 0x2b, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68,
-	0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x53, 0x74, 0x6f,
-	0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x52,
-	0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x4b, 0x0a, 0x08, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e,
-	0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2f, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
-	0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
-	0x32, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e,
-	0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x08, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69,
-	0x6e, 0x67, 0x12, 0x54, 0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f,
-	0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x32, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
+	0x0e, 0x73, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x73, 0x12,
+	0x30, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x71, 0x75, 0x65, 0x5f, 0x70, 0x72, 0x69, 0x6d, 0x61, 0x72,
+	0x79, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x12, 0x75,
+	0x6e, 0x69, 0x71, 0x75, 0x65, 0x50, 0x72, 0x69, 0x6d, 0x61, 0x72, 0x79, 0x49, 0x6e, 0x64, 0x65,
+	0x78, 0x22, 0x7b, 0x0a, 0x04, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x33, 0x0a, 0x04, 0x6c, 0x65, 0x61,
+	0x66, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
 	0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
-	0x32, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e,
-	0x43, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x0b, 0x63, 0x6f, 0x6d,
-	0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x0a, 0x08, 0x6e, 0x75, 0x6c, 0x6c,
-	0x61, 0x62, 0x6c, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6e, 0x75, 0x6c, 0x6c,
-	0x61, 0x62, 0x6c, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64,
-	0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64,
-	0x22, 0x65, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1c, 0x0a, 0x18, 0x54, 0x59, 0x50, 0x45,
-	0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49,
-	0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53,
-	0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f,
-	0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f,
-	0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x59, 0x50, 0x45,
-	0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x10, 0x04, 0x22, 0xae, 0x01, 0x0a, 0x08, 0x45, 0x6e, 0x63, 0x6f,
-	0x64, 0x69, 0x6e, 0x67, 0x12, 0x1e, 0x0a, 0x1a, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
-	0x5f, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49,
-	0x45, 0x44, 0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
-	0x5f, 0x52, 0x4c, 0x45, 0x5f, 0x44, 0x49, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x41, 0x52, 0x59, 0x10,
-	0x01, 0x12, 0x20, 0x0a, 0x1c, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x44, 0x45,
-	0x4c, 0x54, 0x41, 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x5f, 0x50, 0x41, 0x43, 0x4b, 0x45,
-	0x44, 0x10, 0x02, 0x12, 0x1d, 0x0a, 0x19, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f,
-	0x44, 0x45, 0x4c, 0x54, 0x41, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x5f, 0x41, 0x52, 0x52, 0x41, 0x59,
-	0x10, 0x03, 0x12, 0x24, 0x0a, 0x20, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x44,
-	0x45, 0x4c, 0x54, 0x41, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x5f, 0x42, 0x59, 0x54, 0x45,
-	0x5f, 0x41, 0x52, 0x52, 0x41, 0x59, 0x10, 0x04, 0x22, 0xa4, 0x01, 0x0a, 0x0b, 0x43, 0x6f, 0x6d,
-	0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x20, 0x0a, 0x1c, 0x43, 0x4f, 0x4d, 0x50,
-	0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x5f, 0x55, 0x4e, 0x53,
-	0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x16, 0x0a, 0x12, 0x43, 0x4f,
-	0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x4e, 0x41, 0x50, 0x50, 0x59,
-	0x10, 0x01, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f,
-	0x4e, 0x5f, 0x47, 0x5a, 0x49, 0x50, 0x10, 0x02, 0x12, 0x16, 0x0a, 0x12, 0x43, 0x4f, 0x4d, 0x50,
-	0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x42, 0x52, 0x4f, 0x54, 0x4c, 0x49, 0x10, 0x03,
-	0x12, 0x17, 0x0a, 0x13, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f,
-	0x4c, 0x5a, 0x34, 0x5f, 0x52, 0x41, 0x57, 0x10, 0x04, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x4f, 0x4d,
-	0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x5a, 0x53, 0x54, 0x44, 0x10, 0x05, 0x22,
-	0xf7, 0x01, 0x0a, 0x0d, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d,
-	0x6e, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
-	0x04, 0x70, 0x61, 0x74, 0x68, 0x12, 0x4e, 0x0a, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69,
-	0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74,
-	0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
-	0x61, 0x32, 0x2e, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e,
-	0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x64, 0x69, 0x72, 0x65,
-	0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6e, 0x75, 0x6c, 0x6c, 0x73, 0x5f, 0x66,
-	0x69, 0x72, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x6e, 0x75, 0x6c, 0x6c,
-	0x73, 0x46, 0x69, 0x72, 0x73, 0x74, 0x22, 0x61, 0x0a, 0x09, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74,
-	0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x1d, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
-	0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49,
-	0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54,
-	0x49, 0x4f, 0x4e, 0x5f, 0x41, 0x53, 0x43, 0x45, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12,
-	0x18, 0x0a, 0x14, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x44, 0x45, 0x53,
-	0x43, 0x45, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x42, 0xfd, 0x01, 0x0a, 0x1b, 0x63, 0x6f,
-	0x6d, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61,
-	0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x42, 0x0b, 0x53, 0x63, 0x68, 0x65, 0x6d,
-	0x61, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x53, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62,
-	0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c, 0x61, 0x72, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c,
-	0x73, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72,
-	0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x73,
-	0x63, 0x68, 0x65, 0x6d, 0x61, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x3b, 0x73,
-	0x63, 0x68, 0x65, 0x6d, 0x61, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0xa2, 0x02, 0x03,
-	0x46, 0x53, 0x58, 0xaa, 0x02, 0x17, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x53, 0x63,
-	0x68, 0x65, 0x6d, 0x61, 0x2e, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0xca, 0x02, 0x17,
-	0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5c, 0x56,
-	0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0xe2, 0x02, 0x23, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64,
-	0x62, 0x5c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
-	0x32, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x19,
-	0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x3a, 0x3a, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x3a,
-	0x3a, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f,
-	0x33,
+	0x32, 0x2e, 0x4c, 0x65, 0x61, 0x66, 0x48, 0x00, 0x52, 0x04, 0x6c, 0x65, 0x61, 0x66, 0x12, 0x36,
+	0x0a, 0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e,
+	0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76,
+	0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x48, 0x00, 0x52,
+	0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x42, 0x06, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0x69,
+	0x0a, 0x04, 0x4c, 0x65, 0x61, 0x66, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4d, 0x0a, 0x0e, 0x73, 0x74,
+	0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x18, 0x02, 0x20, 0x01,
+	0x28, 0x0b, 0x32, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68,
+	0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x53, 0x74, 0x6f,
+	0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x52, 0x0d, 0x73, 0x74, 0x6f, 0x72,
+	0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x22, 0x88, 0x01, 0x0a, 0x05, 0x47, 0x72,
+	0x6f, 0x75, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
+	0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61,
+	0x62, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61,
+	0x62, 0x6c, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18,
+	0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12,
+	0x33, 0x0a, 0x05, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e,
+	0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x05, 0x6e,
+	0x6f, 0x64, 0x65, 0x73, 0x22, 0x8c, 0x06, 0x0a, 0x0d, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+	0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x12, 0x3f, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x0e, 0x32, 0x2b, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73,
+	0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x53,
+	0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x2e, 0x54, 0x79, 0x70,
+	0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x4b, 0x0a, 0x08, 0x65, 0x6e, 0x63, 0x6f, 0x64,
+	0x69, 0x6e, 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2f, 0x2e, 0x66, 0x72, 0x6f, 0x73,
+	0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70,
+	0x68, 0x61, 0x32, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75,
+	0x74, 0x2e, 0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x08, 0x65, 0x6e, 0x63, 0x6f,
+	0x64, 0x69, 0x6e, 0x67, 0x12, 0x54, 0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73,
+	0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x32, 0x2e, 0x66, 0x72, 0x6f, 0x73,
+	0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70,
+	0x68, 0x61, 0x32, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75,
+	0x74, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x0b, 0x63,
+	0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x0a, 0x08, 0x6e, 0x75,
+	0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6e, 0x75,
+	0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74,
+	0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74,
+	0x65, 0x64, 0x22, 0x86, 0x01, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1c, 0x0a, 0x18, 0x54,
+	0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50,
+	0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50,
+	0x45, 0x5f, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59,
+	0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59,
+	0x50, 0x45, 0x5f, 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x54,
+	0x59, 0x50, 0x45, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x10, 0x04, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59,
+	0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59,
+	0x50, 0x45, 0x5f, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x06, 0x22, 0xae, 0x01, 0x0a, 0x08,
+	0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x1e, 0x0a, 0x1a, 0x45, 0x4e, 0x43, 0x4f,
+	0x44, 0x49, 0x4e, 0x47, 0x5f, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45,
+	0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x45, 0x4e, 0x43, 0x4f,
+	0x44, 0x49, 0x4e, 0x47, 0x5f, 0x52, 0x4c, 0x45, 0x5f, 0x44, 0x49, 0x43, 0x54, 0x49, 0x4f, 0x4e,
+	0x41, 0x52, 0x59, 0x10, 0x01, 0x12, 0x20, 0x0a, 0x1c, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e,
+	0x47, 0x5f, 0x44, 0x45, 0x4c, 0x54, 0x41, 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x5f, 0x50,
+	0x41, 0x43, 0x4b, 0x45, 0x44, 0x10, 0x02, 0x12, 0x1d, 0x0a, 0x19, 0x45, 0x4e, 0x43, 0x4f, 0x44,
+	0x49, 0x4e, 0x47, 0x5f, 0x44, 0x45, 0x4c, 0x54, 0x41, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x5f, 0x41,
+	0x52, 0x52, 0x41, 0x59, 0x10, 0x03, 0x12, 0x24, 0x0a, 0x20, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49,
+	0x4e, 0x47, 0x5f, 0x44, 0x45, 0x4c, 0x54, 0x41, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x5f,
+	0x42, 0x59, 0x54, 0x45, 0x5f, 0x41, 0x52, 0x52, 0x41, 0x59, 0x10, 0x04, 0x22, 0xa4, 0x01, 0x0a,
+	0x0b, 0x43, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x20, 0x0a, 0x1c,
+	0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x4e, 0x4f, 0x4e, 0x45,
+	0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x16,
+	0x0a, 0x12, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x4e,
+	0x41, 0x50, 0x50, 0x59, 0x10, 0x01, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45,
+	0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x47, 0x5a, 0x49, 0x50, 0x10, 0x02, 0x12, 0x16, 0x0a, 0x12,
+	0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x42, 0x52, 0x4f, 0x54,
+	0x4c, 0x49, 0x10, 0x03, 0x12, 0x17, 0x0a, 0x13, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53,
+	0x49, 0x4f, 0x4e, 0x5f, 0x4c, 0x5a, 0x34, 0x5f, 0x52, 0x41, 0x57, 0x10, 0x04, 0x12, 0x14, 0x0a,
+	0x10, 0x43, 0x4f, 0x4d, 0x50, 0x52, 0x45, 0x53, 0x53, 0x49, 0x4f, 0x4e, 0x5f, 0x5a, 0x53, 0x54,
+	0x44, 0x10, 0x05, 0x22, 0xf7, 0x01, 0x0a, 0x0d, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43,
+	0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20,
+	0x01, 0x28, 0x09, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, 0x12, 0x4e, 0x0a, 0x09, 0x64, 0x69, 0x72,
+	0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x66,
+	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x53, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x43, 0x6f,
+	0x6c, 0x75, 0x6d, 0x6e, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09,
+	0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x6e, 0x75, 0x6c,
+	0x6c, 0x73, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a,
+	0x6e, 0x75, 0x6c, 0x6c, 0x73, 0x46, 0x69, 0x72, 0x73, 0x74, 0x22, 0x61, 0x0a, 0x09, 0x44, 0x69,
+	0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x1d, 0x44, 0x49, 0x52, 0x45, 0x43,
+	0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53,
+	0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x44, 0x49,
+	0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x41, 0x53, 0x43, 0x45, 0x4e, 0x44, 0x49, 0x4e,
+	0x47, 0x10, 0x01, 0x12, 0x18, 0x0a, 0x14, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
+	0x5f, 0x44, 0x45, 0x53, 0x43, 0x45, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x42, 0xfd, 0x01,
+	0x0a, 0x1b, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63,
+	0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x42, 0x0b, 0x53,
+	0x63, 0x68, 0x65, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x53, 0x67, 0x69,
+	0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c, 0x61, 0x72, 0x73, 0x69,
+	0x67, 0x6e, 0x61, 0x6c, 0x73, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x67, 0x65,
+	0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74,
+	0x64, 0x62, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
+	0x61, 0x32, 0x3b, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
+	0x32, 0xa2, 0x02, 0x03, 0x46, 0x53, 0x58, 0xaa, 0x02, 0x17, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64,
+	0x62, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
+	0x32, 0xca, 0x02, 0x17, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x53, 0x63, 0x68, 0x65,
+	0x6d, 0x61, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0xe2, 0x02, 0x23, 0x46, 0x72,
+	0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5c, 0x56, 0x31, 0x61,
+	0x6c, 0x70, 0x68, 0x61, 0x32, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
+	0x61, 0xea, 0x02, 0x19, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x3a, 0x3a, 0x53, 0x63, 0x68,
+	0x65, 0x6d, 0x61, 0x3a, 0x3a, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x62, 0x06, 0x70,
+	0x72, 0x6f, 0x74, 0x6f, 0x33,
 }
 
 var (
@@ -822,7 +848,7 @@ func file_frostdb_schema_v1alpha2_schema_proto_rawDescGZIP() []byte {
 
 var file_frostdb_schema_v1alpha2_schema_proto_enumTypes = make([]protoimpl.EnumInfo, 4)
 var file_frostdb_schema_v1alpha2_schema_proto_msgTypes = make([]protoimpl.MessageInfo, 6)
-var file_frostdb_schema_v1alpha2_schema_proto_goTypes = []interface{}{
+var file_frostdb_schema_v1alpha2_schema_proto_goTypes = []any{
 	(StorageLayout_Type)(0),        // 0: frostdb.schema.v1alpha2.StorageLayout.Type
 	(StorageLayout_Encoding)(0),    // 1: frostdb.schema.v1alpha2.StorageLayout.Encoding
 	(StorageLayout_Compression)(0), // 2: frostdb.schema.v1alpha2.StorageLayout.Compression
@@ -858,7 +884,7 @@ func file_frostdb_schema_v1alpha2_schema_proto_init() {
 		return
 	}
 	if !protoimpl.UnsafeEnabled {
-		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[0].Exporter = func(v any, i int) any {
 			switch v := v.(*Schema); i {
 			case 0:
 				return &v.state
@@ -870,7 +896,7 @@ func file_frostdb_schema_v1alpha2_schema_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[1].Exporter = func(v any, i int) any {
 			switch v := v.(*Node); i {
 			case 0:
 				return &v.state
@@ -882,7 +908,7 @@ func file_frostdb_schema_v1alpha2_schema_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[2].Exporter = func(v any, i int) any {
 			switch v := v.(*Leaf); i {
 			case 0:
 				return &v.state
@@ -894,7 +920,7 @@ func file_frostdb_schema_v1alpha2_schema_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[3].Exporter = func(v any, i int) any {
 			switch v := v.(*Group); i {
 			case 0:
 				return &v.state
@@ -906,7 +932,7 @@ func file_frostdb_schema_v1alpha2_schema_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[4].Exporter = func(v any, i int) any {
 			switch v := v.(*StorageLayout); i {
 			case 0:
 				return &v.state
@@ -918,7 +944,7 @@ func file_frostdb_schema_v1alpha2_schema_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_schema_v1alpha2_schema_proto_msgTypes[5].Exporter = func(v any, i int) any {
 			switch v := v.(*SortingColumn); i {
 			case 0:
 				return &v.state
@@ -931,7 +957,7 @@ func file_frostdb_schema_v1alpha2_schema_proto_init() {
 			}
 		}
 	}
-	file_frostdb_schema_v1alpha2_schema_proto_msgTypes[1].OneofWrappers = []interface{}{
+	file_frostdb_schema_v1alpha2_schema_proto_msgTypes[1].OneofWrappers = []any{
 		(*Node_Leaf)(nil),
 		(*Node_Group)(nil),
 	}
diff --git a/gen/proto/go/frostdb/schema/v1alpha2/schema_vtproto.pb.go b/gen/proto/go/frostdb/schema/v1alpha2/schema_vtproto.pb.go
index bfe940823..efd8249e3 100644
--- a/gen/proto/go/frostdb/schema/v1alpha2/schema_vtproto.pb.go
+++ b/gen/proto/go/frostdb/schema/v1alpha2/schema_vtproto.pb.go
@@ -1,14 +1,14 @@
 // Code generated by protoc-gen-go-vtproto. DO NOT EDIT.
-// protoc-gen-go-vtproto version: v0.3.0
+// protoc-gen-go-vtproto version: v0.6.0
 // source: frostdb/schema/v1alpha2/schema.proto
 
 package schemav1alpha2
 
 import (
 	fmt "fmt"
+	protohelpers "github.com/planetscale/vtprotobuf/protohelpers"
 	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
 	io "io"
-	bits "math/bits"
 )
 
 const (
@@ -48,6 +48,16 @@ func (m *Schema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		i -= len(m.unknownFields)
 		copy(dAtA[i:], m.unknownFields)
 	}
+	if m.UniquePrimaryIndex {
+		i--
+		if m.UniquePrimaryIndex {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x20
+	}
 	if len(m.SortingColumns) > 0 {
 		for iNdEx := len(m.SortingColumns) - 1; iNdEx >= 0; iNdEx-- {
 			size, err := m.SortingColumns[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
@@ -55,7 +65,7 @@ func (m *Schema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 				return 0, err
 			}
 			i -= size
-			i = encodeVarint(dAtA, i, uint64(size))
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 			i--
 			dAtA[i] = 0x1a
 		}
@@ -66,7 +76,7 @@ func (m *Schema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0x12
 	}
@@ -104,16 +114,13 @@ func (m *Node) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		copy(dAtA[i:], m.unknownFields)
 	}
 	if vtmsg, ok := m.Type.(interface {
-		MarshalToVT([]byte) (int, error)
-		SizeVT() int
+		MarshalToSizedBufferVT([]byte) (int, error)
 	}); ok {
-		{
-			size := vtmsg.SizeVT()
-			i -= size
-			if _, err := vtmsg.MarshalToVT(dAtA[i:]); err != nil {
-				return 0, err
-			}
+		size, err := vtmsg.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
 		}
+		i -= size
 	}
 	return len(dAtA) - i, nil
 }
@@ -131,7 +138,7 @@ func (m *Node_Leaf) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0xa
 	}
@@ -150,7 +157,7 @@ func (m *Node_Group) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0x12
 	}
@@ -192,14 +199,14 @@ func (m *Leaf) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0x12
 	}
 	if len(m.Name) > 0 {
 		i -= len(m.Name)
 		copy(dAtA[i:], m.Name)
-		i = encodeVarint(dAtA, i, uint64(len(m.Name)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
 		i--
 		dAtA[i] = 0xa
 	}
@@ -243,7 +250,7 @@ func (m *Group) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 				return 0, err
 			}
 			i -= size
-			i = encodeVarint(dAtA, i, uint64(size))
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 			i--
 			dAtA[i] = 0x22
 		}
@@ -271,7 +278,7 @@ func (m *Group) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 	if len(m.Name) > 0 {
 		i -= len(m.Name)
 		copy(dAtA[i:], m.Name)
-		i = encodeVarint(dAtA, i, uint64(len(m.Name)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
 		i--
 		dAtA[i] = 0xa
 	}
@@ -329,17 +336,17 @@ func (m *StorageLayout) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		dAtA[i] = 0x20
 	}
 	if m.Compression != 0 {
-		i = encodeVarint(dAtA, i, uint64(m.Compression))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Compression))
 		i--
 		dAtA[i] = 0x18
 	}
 	if m.Encoding != 0 {
-		i = encodeVarint(dAtA, i, uint64(m.Encoding))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Encoding))
 		i--
 		dAtA[i] = 0x10
 	}
 	if m.Type != 0 {
-		i = encodeVarint(dAtA, i, uint64(m.Type))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Type))
 		i--
 		dAtA[i] = 0x8
 	}
@@ -387,31 +394,20 @@ func (m *SortingColumn) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		dAtA[i] = 0x18
 	}
 	if m.Direction != 0 {
-		i = encodeVarint(dAtA, i, uint64(m.Direction))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Direction))
 		i--
 		dAtA[i] = 0x10
 	}
 	if len(m.Path) > 0 {
 		i -= len(m.Path)
 		copy(dAtA[i:], m.Path)
-		i = encodeVarint(dAtA, i, uint64(len(m.Path)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Path)))
 		i--
 		dAtA[i] = 0xa
 	}
 	return len(dAtA) - i, nil
 }
 
-func encodeVarint(dAtA []byte, offset int, v uint64) int {
-	offset -= sov(v)
-	base := offset
-	for v >= 1<<7 {
-		dAtA[offset] = uint8(v&0x7f | 0x80)
-		v >>= 7
-		offset++
-	}
-	dAtA[offset] = uint8(v)
-	return base
-}
 func (m *Schema) SizeVT() (n int) {
 	if m == nil {
 		return 0
@@ -420,17 +416,18 @@ func (m *Schema) SizeVT() (n int) {
 	_ = l
 	if m.Root != nil {
 		l = m.Root.SizeVT()
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if len(m.SortingColumns) > 0 {
 		for _, e := range m.SortingColumns {
 			l = e.SizeVT()
-			n += 1 + l + sov(uint64(l))
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 		}
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
+	if m.UniquePrimaryIndex {
+		n += 2
 	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -443,9 +440,7 @@ func (m *Node) SizeVT() (n int) {
 	if vtmsg, ok := m.Type.(interface{ SizeVT() int }); ok {
 		n += vtmsg.SizeVT()
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
-	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -457,7 +452,7 @@ func (m *Node_Leaf) SizeVT() (n int) {
 	_ = l
 	if m.Leaf != nil {
 		l = m.Leaf.SizeVT()
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	return n
 }
@@ -469,7 +464,7 @@ func (m *Node_Group) SizeVT() (n int) {
 	_ = l
 	if m.Group != nil {
 		l = m.Group.SizeVT()
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	return n
 }
@@ -481,15 +476,13 @@ func (m *Leaf) SizeVT() (n int) {
 	_ = l
 	l = len(m.Name)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if m.StorageLayout != nil {
 		l = m.StorageLayout.SizeVT()
-		n += 1 + l + sov(uint64(l))
-	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -501,7 +494,7 @@ func (m *Group) SizeVT() (n int) {
 	_ = l
 	l = len(m.Name)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if m.Nullable {
 		n += 2
@@ -512,12 +505,10 @@ func (m *Group) SizeVT() (n int) {
 	if len(m.Nodes) > 0 {
 		for _, e := range m.Nodes {
 			l = e.SizeVT()
-			n += 1 + l + sov(uint64(l))
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 		}
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
-	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -528,13 +519,13 @@ func (m *StorageLayout) SizeVT() (n int) {
 	var l int
 	_ = l
 	if m.Type != 0 {
-		n += 1 + sov(uint64(m.Type))
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Type))
 	}
 	if m.Encoding != 0 {
-		n += 1 + sov(uint64(m.Encoding))
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Encoding))
 	}
 	if m.Compression != 0 {
-		n += 1 + sov(uint64(m.Compression))
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Compression))
 	}
 	if m.Nullable {
 		n += 2
@@ -542,9 +533,7 @@ func (m *StorageLayout) SizeVT() (n int) {
 	if m.Repeated {
 		n += 2
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
-	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -556,26 +545,18 @@ func (m *SortingColumn) SizeVT() (n int) {
 	_ = l
 	l = len(m.Path)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if m.Direction != 0 {
-		n += 1 + sov(uint64(m.Direction))
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Direction))
 	}
 	if m.NullsFirst {
 		n += 2
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
-	}
+	n += len(m.unknownFields)
 	return n
 }
 
-func sov(x uint64) (n int) {
-	return (bits.Len64(x|1) + 6) / 7
-}
-func soz(x uint64) (n int) {
-	return sov(uint64((x << 1) ^ uint64((int64(x) >> 63))))
-}
 func (m *Schema) UnmarshalVT(dAtA []byte) error {
 	l := len(dAtA)
 	iNdEx := 0
@@ -584,7 +565,7 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -612,7 +593,7 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -625,11 +606,11 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -648,7 +629,7 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -661,11 +642,11 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -675,14 +656,34 @@ func (m *Schema) UnmarshalVT(dAtA []byte) error {
 				return err
 			}
 			iNdEx = postIndex
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field UniquePrimaryIndex", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.UniquePrimaryIndex = bool(v != 0)
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -705,7 +706,7 @@ func (m *Node) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -733,7 +734,7 @@ func (m *Node) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -746,11 +747,11 @@ func (m *Node) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -764,7 +765,7 @@ func (m *Node) UnmarshalVT(dAtA []byte) error {
 				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
 					return err
 				}
-				m.Type = &Node_Leaf{v}
+				m.Type = &Node_Leaf{Leaf: v}
 			}
 			iNdEx = postIndex
 		case 2:
@@ -774,7 +775,7 @@ func (m *Node) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -787,11 +788,11 @@ func (m *Node) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -805,17 +806,17 @@ func (m *Node) UnmarshalVT(dAtA []byte) error {
 				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
 					return err
 				}
-				m.Type = &Node_Group{v}
+				m.Type = &Node_Group{Group: v}
 			}
 			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -838,7 +839,7 @@ func (m *Leaf) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -866,7 +867,7 @@ func (m *Leaf) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -880,11 +881,11 @@ func (m *Leaf) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -898,7 +899,7 @@ func (m *Leaf) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -911,11 +912,11 @@ func (m *Leaf) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -929,12 +930,12 @@ func (m *Leaf) UnmarshalVT(dAtA []byte) error {
 			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -957,7 +958,7 @@ func (m *Group) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -985,7 +986,7 @@ func (m *Group) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -999,11 +1000,11 @@ func (m *Group) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -1017,7 +1018,7 @@ func (m *Group) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1037,7 +1038,7 @@ func (m *Group) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1057,7 +1058,7 @@ func (m *Group) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1070,11 +1071,11 @@ func (m *Group) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -1086,12 +1087,12 @@ func (m *Group) UnmarshalVT(dAtA []byte) error {
 			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -1114,7 +1115,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -1142,7 +1143,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			m.Type = 0
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1161,7 +1162,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			m.Encoding = 0
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1180,7 +1181,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			m.Compression = 0
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1199,7 +1200,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1219,7 +1220,7 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1234,12 +1235,12 @@ func (m *StorageLayout) UnmarshalVT(dAtA []byte) error {
 			m.Repeated = bool(v != 0)
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -1262,7 +1263,7 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -1290,7 +1291,7 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1304,11 +1305,11 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -1322,7 +1323,7 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			m.Direction = 0
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1341,7 +1342,7 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1356,12 +1357,12 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 			m.NullsFirst = bool(v != 0)
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -1376,87 +1377,3 @@ func (m *SortingColumn) UnmarshalVT(dAtA []byte) error {
 	}
 	return nil
 }
-func skip(dAtA []byte) (n int, err error) {
-	l := len(dAtA)
-	iNdEx := 0
-	depth := 0
-	for iNdEx < l {
-		var wire uint64
-		for shift := uint(0); ; shift += 7 {
-			if shift >= 64 {
-				return 0, ErrIntOverflow
-			}
-			if iNdEx >= l {
-				return 0, io.ErrUnexpectedEOF
-			}
-			b := dAtA[iNdEx]
-			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
-			if b < 0x80 {
-				break
-			}
-		}
-		wireType := int(wire & 0x7)
-		switch wireType {
-		case 0:
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return 0, ErrIntOverflow
-				}
-				if iNdEx >= l {
-					return 0, io.ErrUnexpectedEOF
-				}
-				iNdEx++
-				if dAtA[iNdEx-1] < 0x80 {
-					break
-				}
-			}
-		case 1:
-			iNdEx += 8
-		case 2:
-			var length int
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return 0, ErrIntOverflow
-				}
-				if iNdEx >= l {
-					return 0, io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				length |= (int(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			if length < 0 {
-				return 0, ErrInvalidLength
-			}
-			iNdEx += length
-		case 3:
-			depth++
-		case 4:
-			if depth == 0 {
-				return 0, ErrUnexpectedEndOfGroup
-			}
-			depth--
-		case 5:
-			iNdEx += 4
-		default:
-			return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
-		}
-		if iNdEx < 0 {
-			return 0, ErrInvalidLength
-		}
-		if depth == 0 {
-			return iNdEx, nil
-		}
-	}
-	return 0, io.ErrUnexpectedEOF
-}
-
-var (
-	ErrInvalidLength        = fmt.Errorf("proto: negative length found during unmarshaling")
-	ErrIntOverflow          = fmt.Errorf("proto: integer overflow")
-	ErrUnexpectedEndOfGroup = fmt.Errorf("proto: unexpected end of group")
-)
diff --git a/gen/proto/go/frostdb/snapshot/v1alpha1/snapshot.pb.go b/gen/proto/go/frostdb/snapshot/v1alpha1/snapshot.pb.go
new file mode 100644
index 000000000..f424f0e6b
--- /dev/null
+++ b/gen/proto/go/frostdb/snapshot/v1alpha1/snapshot.pb.go
@@ -0,0 +1,616 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.34.2
+// 	protoc        (unknown)
+// source: frostdb/snapshot/v1alpha1/snapshot.proto
+
+package snapshotv1alpha1
+
+import (
+	v1alpha1 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/table/v1alpha1"
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// Encoding is a enum of possible encodings of the part.
+type Part_Encoding int32
+
+const (
+	// ENCODING_UNSPECIFIED is an unknown encoding.
+	Part_ENCODING_UNSPECIFIED Part_Encoding = 0
+	// ENCODING_PARQUET is a parquet encoding.
+	Part_ENCODING_PARQUET Part_Encoding = 1
+	// ENCODING_ARROW is an arrow encoding.
+	Part_ENCODING_ARROW Part_Encoding = 2
+)
+
+// Enum value maps for Part_Encoding.
+var (
+	Part_Encoding_name = map[int32]string{
+		0: "ENCODING_UNSPECIFIED",
+		1: "ENCODING_PARQUET",
+		2: "ENCODING_ARROW",
+	}
+	Part_Encoding_value = map[string]int32{
+		"ENCODING_UNSPECIFIED": 0,
+		"ENCODING_PARQUET":     1,
+		"ENCODING_ARROW":       2,
+	}
+)
+
+func (x Part_Encoding) Enum() *Part_Encoding {
+	p := new(Part_Encoding)
+	*p = x
+	return p
+}
+
+func (x Part_Encoding) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (Part_Encoding) Descriptor() protoreflect.EnumDescriptor {
+	return file_frostdb_snapshot_v1alpha1_snapshot_proto_enumTypes[0].Descriptor()
+}
+
+func (Part_Encoding) Type() protoreflect.EnumType {
+	return &file_frostdb_snapshot_v1alpha1_snapshot_proto_enumTypes[0]
+}
+
+func (x Part_Encoding) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use Part_Encoding.Descriptor instead.
+func (Part_Encoding) EnumDescriptor() ([]byte, []int) {
+	return file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescGZIP(), []int{3, 0}
+}
+
+// FooterData is a message stored in the footer of a snapshot file that encodes
+// data about the rest of the file.
+type FooterData struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// table_metadata is a list of all tables in the snapshot.
+	TableMetadata []*Table `protobuf:"bytes,1,rep,name=table_metadata,json=tableMetadata,proto3" json:"table_metadata,omitempty"`
+}
+
+func (x *FooterData) Reset() {
+	*x = FooterData{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[0]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *FooterData) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*FooterData) ProtoMessage() {}
+
+func (x *FooterData) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[0]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use FooterData.ProtoReflect.Descriptor instead.
+func (*FooterData) Descriptor() ([]byte, []int) {
+	return file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *FooterData) GetTableMetadata() []*Table {
+	if x != nil {
+		return x.TableMetadata
+	}
+	return nil
+}
+
+// Table encodes metadata about a table in a snapshot.
+type Table struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// name of the table.
+	Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+	// config is the configuration of the table.
+	Config *v1alpha1.TableConfig `protobuf:"bytes,2,opt,name=config,proto3" json:"config,omitempty"`
+	// active_block is the active block of the table.
+	ActiveBlock *Table_TableBlock `protobuf:"bytes,3,opt,name=active_block,json=activeBlock,proto3" json:"active_block,omitempty"`
+	// granule_metadata is a list of all granules in the table.
+	GranuleMetadata []*Granule `protobuf:"bytes,4,rep,name=granule_metadata,json=granuleMetadata,proto3" json:"granule_metadata,omitempty"`
+}
+
+func (x *Table) Reset() {
+	*x = Table{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[1]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Table) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Table) ProtoMessage() {}
+
+func (x *Table) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[1]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Table.ProtoReflect.Descriptor instead.
+func (*Table) Descriptor() ([]byte, []int) {
+	return file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *Table) GetName() string {
+	if x != nil {
+		return x.Name
+	}
+	return ""
+}
+
+func (x *Table) GetConfig() *v1alpha1.TableConfig {
+	if x != nil {
+		return x.Config
+	}
+	return nil
+}
+
+func (x *Table) GetActiveBlock() *Table_TableBlock {
+	if x != nil {
+		return x.ActiveBlock
+	}
+	return nil
+}
+
+func (x *Table) GetGranuleMetadata() []*Granule {
+	if x != nil {
+		return x.GranuleMetadata
+	}
+	return nil
+}
+
+// Granule encodes metadata about a granule in a table.
+type Granule struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// part_metadata is a list of all parts in the granule.
+	PartMetadata []*Part `protobuf:"bytes,1,rep,name=part_metadata,json=partMetadata,proto3" json:"part_metadata,omitempty"`
+}
+
+func (x *Granule) Reset() {
+	*x = Granule{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[2]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Granule) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Granule) ProtoMessage() {}
+
+func (x *Granule) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[2]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Granule.ProtoReflect.Descriptor instead.
+func (*Granule) Descriptor() ([]byte, []int) {
+	return file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *Granule) GetPartMetadata() []*Part {
+	if x != nil {
+		return x.PartMetadata
+	}
+	return nil
+}
+
+// Part encodes metadata about a part in a granule.
+type Part struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// start_offset is the start offset of the part.
+	StartOffset int64 `protobuf:"varint,1,opt,name=start_offset,json=startOffset,proto3" json:"start_offset,omitempty"`
+	// end_offset is the end offset of the part.
+	EndOffset int64 `protobuf:"varint,2,opt,name=end_offset,json=endOffset,proto3" json:"end_offset,omitempty"`
+	// tx is the transaction id of the part.
+	Tx uint64 `protobuf:"varint,3,opt,name=tx,proto3" json:"tx,omitempty"`
+	// compaction_level is the compaction level of the part.
+	CompactionLevel uint64 `protobuf:"varint,4,opt,name=compaction_level,json=compactionLevel,proto3" json:"compaction_level,omitempty"`
+	// encoding is the actual encoding of the part.
+	Encoding Part_Encoding `protobuf:"varint,5,opt,name=encoding,proto3,enum=frostdb.snapshot.v1alpha1.Part_Encoding" json:"encoding,omitempty"`
+}
+
+func (x *Part) Reset() {
+	*x = Part{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[3]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Part) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Part) ProtoMessage() {}
+
+func (x *Part) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[3]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Part.ProtoReflect.Descriptor instead.
+func (*Part) Descriptor() ([]byte, []int) {
+	return file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescGZIP(), []int{3}
+}
+
+func (x *Part) GetStartOffset() int64 {
+	if x != nil {
+		return x.StartOffset
+	}
+	return 0
+}
+
+func (x *Part) GetEndOffset() int64 {
+	if x != nil {
+		return x.EndOffset
+	}
+	return 0
+}
+
+func (x *Part) GetTx() uint64 {
+	if x != nil {
+		return x.Tx
+	}
+	return 0
+}
+
+func (x *Part) GetCompactionLevel() uint64 {
+	if x != nil {
+		return x.CompactionLevel
+	}
+	return 0
+}
+
+func (x *Part) GetEncoding() Part_Encoding {
+	if x != nil {
+		return x.Encoding
+	}
+	return Part_ENCODING_UNSPECIFIED
+}
+
+// TableBlock
+type Table_TableBlock struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// ulid is the unique identifier of the block.
+	Ulid []byte `protobuf:"bytes,1,opt,name=ulid,proto3" json:"ulid,omitempty"`
+	// size is the size of the block in bytes.
+	Size int64 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"`
+	// min_tx is the minimum transaction id in the block.
+	MinTx uint64 `protobuf:"varint,3,opt,name=min_tx,json=minTx,proto3" json:"min_tx,omitempty"`
+	// prev_tx is the transaction id of the previous block.
+	PrevTx uint64 `protobuf:"varint,4,opt,name=prev_tx,json=prevTx,proto3" json:"prev_tx,omitempty"`
+}
+
+func (x *Table_TableBlock) Reset() {
+	*x = Table_TableBlock{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[4]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Table_TableBlock) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Table_TableBlock) ProtoMessage() {}
+
+func (x *Table_TableBlock) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[4]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Table_TableBlock.ProtoReflect.Descriptor instead.
+func (*Table_TableBlock) Descriptor() ([]byte, []int) {
+	return file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescGZIP(), []int{1, 0}
+}
+
+func (x *Table_TableBlock) GetUlid() []byte {
+	if x != nil {
+		return x.Ulid
+	}
+	return nil
+}
+
+func (x *Table_TableBlock) GetSize() int64 {
+	if x != nil {
+		return x.Size
+	}
+	return 0
+}
+
+func (x *Table_TableBlock) GetMinTx() uint64 {
+	if x != nil {
+		return x.MinTx
+	}
+	return 0
+}
+
+func (x *Table_TableBlock) GetPrevTx() uint64 {
+	if x != nil {
+		return x.PrevTx
+	}
+	return 0
+}
+
+var File_frostdb_snapshot_v1alpha1_snapshot_proto protoreflect.FileDescriptor
+
+var file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDesc = []byte{
+	0x0a, 0x28, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68,
+	0x6f, 0x74, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x73, 0x6e, 0x61, 0x70,
+	0x73, 0x68, 0x6f, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x19, 0x66, 0x72, 0x6f, 0x73,
+	0x74, 0x64, 0x62, 0x2e, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2e, 0x76, 0x31, 0x61,
+	0x6c, 0x70, 0x68, 0x61, 0x31, 0x1a, 0x23, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x74,
+	0x61, 0x62, 0x6c, 0x65, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x63, 0x6f,
+	0x6e, 0x66, 0x69, 0x67, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x5b, 0x0a, 0x0a, 0x46, 0x6f,
+	0x6f, 0x74, 0x65, 0x72, 0x44, 0x61, 0x74, 0x61, 0x12, 0x47, 0x0a, 0x0e, 0x74, 0x61, 0x62, 0x6c,
+	0x65, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b,
+	0x32, 0x20, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x6e, 0x61, 0x70, 0x73,
+	0x68, 0x6f, 0x74, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x54, 0x61, 0x62,
+	0x6c, 0x65, 0x52, 0x0d, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
+	0x61, 0x4a, 0x04, 0x08, 0x02, 0x10, 0x03, 0x22, 0xdd, 0x02, 0x0a, 0x05, 0x54, 0x61, 0x62, 0x6c,
+	0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
+	0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x3b, 0x0a, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18,
+	0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
+	0x74, 0x61, 0x62, 0x6c, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x54,
+	0x61, 0x62, 0x6c, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x06, 0x63, 0x6f, 0x6e, 0x66,
+	0x69, 0x67, 0x12, 0x4e, 0x0a, 0x0c, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x62, 0x6c, 0x6f,
+	0x63, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74,
+	0x64, 0x62, 0x2e, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2e, 0x76, 0x31, 0x61, 0x6c,
+	0x70, 0x68, 0x61, 0x31, 0x2e, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x2e, 0x54, 0x61, 0x62, 0x6c, 0x65,
+	0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x52, 0x0b, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x42, 0x6c, 0x6f,
+	0x63, 0x6b, 0x12, 0x4d, 0x0a, 0x10, 0x67, 0x72, 0x61, 0x6e, 0x75, 0x6c, 0x65, 0x5f, 0x6d, 0x65,
+	0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x66,
+	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2e,
+	0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x47, 0x72, 0x61, 0x6e, 0x75, 0x6c, 0x65,
+	0x52, 0x0f, 0x67, 0x72, 0x61, 0x6e, 0x75, 0x6c, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
+	0x61, 0x1a, 0x64, 0x0a, 0x0a, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x12,
+	0x12, 0x0a, 0x04, 0x75, 0x6c, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x75,
+	0x6c, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28,
+	0x03, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x15, 0x0a, 0x06, 0x6d, 0x69, 0x6e, 0x5f, 0x74,
+	0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x6d, 0x69, 0x6e, 0x54, 0x78, 0x12, 0x17,
+	0x0a, 0x07, 0x70, 0x72, 0x65, 0x76, 0x5f, 0x74, 0x78, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52,
+	0x06, 0x70, 0x72, 0x65, 0x76, 0x54, 0x78, 0x22, 0x4f, 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6e, 0x75,
+	0x6c, 0x65, 0x12, 0x44, 0x0a, 0x0d, 0x70, 0x61, 0x72, 0x74, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64,
+	0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x66, 0x72, 0x6f, 0x73,
+	0x74, 0x64, 0x62, 0x2e, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2e, 0x76, 0x31, 0x61,
+	0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x50, 0x61, 0x72, 0x74, 0x52, 0x0c, 0x70, 0x61, 0x72, 0x74,
+	0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x99, 0x02, 0x0a, 0x04, 0x50, 0x61, 0x72,
+	0x74, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65,
+	0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x73, 0x74, 0x61, 0x72, 0x74, 0x4f, 0x66,
+	0x66, 0x73, 0x65, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x65, 0x6e, 0x64, 0x5f, 0x6f, 0x66, 0x66, 0x73,
+	0x65, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x65, 0x6e, 0x64, 0x4f, 0x66, 0x66,
+	0x73, 0x65, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x74, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52,
+	0x02, 0x74, 0x78, 0x12, 0x29, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x61, 0x63, 0x74, 0x69, 0x6f,
+	0x6e, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, 0x63,
+	0x6f, 0x6d, 0x70, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x44,
+	0x0a, 0x08, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e,
+	0x32, 0x28, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x6e, 0x61, 0x70, 0x73,
+	0x68, 0x6f, 0x74, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x50, 0x61, 0x72,
+	0x74, 0x2e, 0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x08, 0x65, 0x6e, 0x63, 0x6f,
+	0x64, 0x69, 0x6e, 0x67, 0x22, 0x4e, 0x0a, 0x08, 0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67,
+	0x12, 0x18, 0x0a, 0x14, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x55, 0x4e, 0x53,
+	0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10, 0x45, 0x4e,
+	0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x50, 0x41, 0x52, 0x51, 0x55, 0x45, 0x54, 0x10, 0x01,
+	0x12, 0x12, 0x0a, 0x0e, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x41, 0x52, 0x52,
+	0x4f, 0x57, 0x10, 0x02, 0x42, 0x8d, 0x02, 0x0a, 0x1d, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x72, 0x6f,
+	0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2e, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x42, 0x0d, 0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74,
+	0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x57, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e,
+	0x63, 0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c, 0x61, 0x72, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x73,
+	0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f,
+	0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x73, 0x6e,
+	0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x3b,
+	0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31,
+	0xa2, 0x02, 0x03, 0x46, 0x53, 0x58, 0xaa, 0x02, 0x19, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62,
+	0x2e, 0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x2e, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68,
+	0x61, 0x31, 0xca, 0x02, 0x19, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x53, 0x6e, 0x61,
+	0x70, 0x73, 0x68, 0x6f, 0x74, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xe2, 0x02,
+	0x25, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f,
+	0x74, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65,
+	0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x1b, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62,
+	0x3a, 0x3a, 0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x3a, 0x3a, 0x56, 0x31, 0x61, 0x6c,
+	0x70, 0x68, 0x61, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+}
+
+var (
+	file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescOnce sync.Once
+	file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescData = file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDesc
+)
+
+func file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescGZIP() []byte {
+	file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescOnce.Do(func() {
+		file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescData = protoimpl.X.CompressGZIP(file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescData)
+	})
+	return file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDescData
+}
+
+var file_frostdb_snapshot_v1alpha1_snapshot_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
+var file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
+var file_frostdb_snapshot_v1alpha1_snapshot_proto_goTypes = []any{
+	(Part_Encoding)(0),           // 0: frostdb.snapshot.v1alpha1.Part.Encoding
+	(*FooterData)(nil),           // 1: frostdb.snapshot.v1alpha1.FooterData
+	(*Table)(nil),                // 2: frostdb.snapshot.v1alpha1.Table
+	(*Granule)(nil),              // 3: frostdb.snapshot.v1alpha1.Granule
+	(*Part)(nil),                 // 4: frostdb.snapshot.v1alpha1.Part
+	(*Table_TableBlock)(nil),     // 5: frostdb.snapshot.v1alpha1.Table.TableBlock
+	(*v1alpha1.TableConfig)(nil), // 6: frostdb.table.v1alpha1.TableConfig
+}
+var file_frostdb_snapshot_v1alpha1_snapshot_proto_depIdxs = []int32{
+	2, // 0: frostdb.snapshot.v1alpha1.FooterData.table_metadata:type_name -> frostdb.snapshot.v1alpha1.Table
+	6, // 1: frostdb.snapshot.v1alpha1.Table.config:type_name -> frostdb.table.v1alpha1.TableConfig
+	5, // 2: frostdb.snapshot.v1alpha1.Table.active_block:type_name -> frostdb.snapshot.v1alpha1.Table.TableBlock
+	3, // 3: frostdb.snapshot.v1alpha1.Table.granule_metadata:type_name -> frostdb.snapshot.v1alpha1.Granule
+	4, // 4: frostdb.snapshot.v1alpha1.Granule.part_metadata:type_name -> frostdb.snapshot.v1alpha1.Part
+	0, // 5: frostdb.snapshot.v1alpha1.Part.encoding:type_name -> frostdb.snapshot.v1alpha1.Part.Encoding
+	6, // [6:6] is the sub-list for method output_type
+	6, // [6:6] is the sub-list for method input_type
+	6, // [6:6] is the sub-list for extension type_name
+	6, // [6:6] is the sub-list for extension extendee
+	0, // [0:6] is the sub-list for field type_name
+}
+
+func init() { file_frostdb_snapshot_v1alpha1_snapshot_proto_init() }
+func file_frostdb_snapshot_v1alpha1_snapshot_proto_init() {
+	if File_frostdb_snapshot_v1alpha1_snapshot_proto != nil {
+		return
+	}
+	if !protoimpl.UnsafeEnabled {
+		file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[0].Exporter = func(v any, i int) any {
+			switch v := v.(*FooterData); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[1].Exporter = func(v any, i int) any {
+			switch v := v.(*Table); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[2].Exporter = func(v any, i int) any {
+			switch v := v.(*Granule); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[3].Exporter = func(v any, i int) any {
+			switch v := v.(*Part); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes[4].Exporter = func(v any, i int) any {
+			switch v := v.(*Table_TableBlock); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDesc,
+			NumEnums:      1,
+			NumMessages:   5,
+			NumExtensions: 0,
+			NumServices:   0,
+		},
+		GoTypes:           file_frostdb_snapshot_v1alpha1_snapshot_proto_goTypes,
+		DependencyIndexes: file_frostdb_snapshot_v1alpha1_snapshot_proto_depIdxs,
+		EnumInfos:         file_frostdb_snapshot_v1alpha1_snapshot_proto_enumTypes,
+		MessageInfos:      file_frostdb_snapshot_v1alpha1_snapshot_proto_msgTypes,
+	}.Build()
+	File_frostdb_snapshot_v1alpha1_snapshot_proto = out.File
+	file_frostdb_snapshot_v1alpha1_snapshot_proto_rawDesc = nil
+	file_frostdb_snapshot_v1alpha1_snapshot_proto_goTypes = nil
+	file_frostdb_snapshot_v1alpha1_snapshot_proto_depIdxs = nil
+}
diff --git a/gen/proto/go/frostdb/snapshot/v1alpha1/snapshot_vtproto.pb.go b/gen/proto/go/frostdb/snapshot/v1alpha1/snapshot_vtproto.pb.go
new file mode 100644
index 000000000..38d7ff005
--- /dev/null
+++ b/gen/proto/go/frostdb/snapshot/v1alpha1/snapshot_vtproto.pb.go
@@ -0,0 +1,1051 @@
+// Code generated by protoc-gen-go-vtproto. DO NOT EDIT.
+// protoc-gen-go-vtproto version: v0.6.0
+// source: frostdb/snapshot/v1alpha1/snapshot.proto
+
+package snapshotv1alpha1
+
+import (
+	fmt "fmt"
+	protohelpers "github.com/planetscale/vtprotobuf/protohelpers"
+	v1alpha1 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/table/v1alpha1"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	io "io"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+func (m *FooterData) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *FooterData) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *FooterData) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.TableMetadata) > 0 {
+		for iNdEx := len(m.TableMetadata) - 1; iNdEx >= 0; iNdEx-- {
+			size, err := m.TableMetadata[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Table_TableBlock) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Table_TableBlock) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Table_TableBlock) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.PrevTx != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.PrevTx))
+		i--
+		dAtA[i] = 0x20
+	}
+	if m.MinTx != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.MinTx))
+		i--
+		dAtA[i] = 0x18
+	}
+	if m.Size != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Size))
+		i--
+		dAtA[i] = 0x10
+	}
+	if len(m.Ulid) > 0 {
+		i -= len(m.Ulid)
+		copy(dAtA[i:], m.Ulid)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Ulid)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Table) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Table) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Table) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.GranuleMetadata) > 0 {
+		for iNdEx := len(m.GranuleMetadata) - 1; iNdEx >= 0; iNdEx-- {
+			size, err := m.GranuleMetadata[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+			i--
+			dAtA[i] = 0x22
+		}
+	}
+	if m.ActiveBlock != nil {
+		size, err := m.ActiveBlock.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x1a
+	}
+	if m.Config != nil {
+		size, err := m.Config.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.Name) > 0 {
+		i -= len(m.Name)
+		copy(dAtA[i:], m.Name)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Granule) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Granule) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Granule) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.PartMetadata) > 0 {
+		for iNdEx := len(m.PartMetadata) - 1; iNdEx >= 0; iNdEx-- {
+			size, err := m.PartMetadata[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Part) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Part) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Part) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Encoding != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Encoding))
+		i--
+		dAtA[i] = 0x28
+	}
+	if m.CompactionLevel != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.CompactionLevel))
+		i--
+		dAtA[i] = 0x20
+	}
+	if m.Tx != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Tx))
+		i--
+		dAtA[i] = 0x18
+	}
+	if m.EndOffset != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.EndOffset))
+		i--
+		dAtA[i] = 0x10
+	}
+	if m.StartOffset != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.StartOffset))
+		i--
+		dAtA[i] = 0x8
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *FooterData) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.TableMetadata) > 0 {
+		for _, e := range m.TableMetadata {
+			l = e.SizeVT()
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+		}
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Table_TableBlock) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Ulid)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Size != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Size))
+	}
+	if m.MinTx != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.MinTx))
+	}
+	if m.PrevTx != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.PrevTx))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Table) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Name)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Config != nil {
+		l = m.Config.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.ActiveBlock != nil {
+		l = m.ActiveBlock.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if len(m.GranuleMetadata) > 0 {
+		for _, e := range m.GranuleMetadata {
+			l = e.SizeVT()
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+		}
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Granule) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.PartMetadata) > 0 {
+		for _, e := range m.PartMetadata {
+			l = e.SizeVT()
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+		}
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Part) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.StartOffset != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.StartOffset))
+	}
+	if m.EndOffset != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.EndOffset))
+	}
+	if m.Tx != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Tx))
+	}
+	if m.CompactionLevel != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.CompactionLevel))
+	}
+	if m.Encoding != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Encoding))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *FooterData) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: FooterData: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: FooterData: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TableMetadata", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.TableMetadata = append(m.TableMetadata, &Table{})
+			if err := m.TableMetadata[len(m.TableMetadata)-1].UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Table_TableBlock) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Table_TableBlock: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Table_TableBlock: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Ulid", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Ulid = append(m.Ulid[:0], dAtA[iNdEx:postIndex]...)
+			if m.Ulid == nil {
+				m.Ulid = []byte{}
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Size", wireType)
+			}
+			m.Size = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Size |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field MinTx", wireType)
+			}
+			m.MinTx = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.MinTx |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field PrevTx", wireType)
+			}
+			m.PrevTx = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.PrevTx |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Table) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Table: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Table: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Name = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Config", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Config == nil {
+				m.Config = &v1alpha1.TableConfig{}
+			}
+			if err := m.Config.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 3:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ActiveBlock", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.ActiveBlock == nil {
+				m.ActiveBlock = &Table_TableBlock{}
+			}
+			if err := m.ActiveBlock.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 4:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field GranuleMetadata", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.GranuleMetadata = append(m.GranuleMetadata, &Granule{})
+			if err := m.GranuleMetadata[len(m.GranuleMetadata)-1].UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Granule) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Granule: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Granule: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field PartMetadata", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.PartMetadata = append(m.PartMetadata, &Part{})
+			if err := m.PartMetadata[len(m.PartMetadata)-1].UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Part) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Part: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Part: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field StartOffset", wireType)
+			}
+			m.StartOffset = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.StartOffset |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field EndOffset", wireType)
+			}
+			m.EndOffset = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.EndOffset |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Tx", wireType)
+			}
+			m.Tx = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Tx |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field CompactionLevel", wireType)
+			}
+			m.CompactionLevel = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.CompactionLevel |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 5:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Encoding", wireType)
+			}
+			m.Encoding = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Encoding |= Part_Encoding(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
diff --git a/gen/proto/go/frostdb/storage/v1alpha1/storage.pb.go b/gen/proto/go/frostdb/storage/v1alpha1/storage.pb.go
new file mode 100644
index 000000000..09f24204f
--- /dev/null
+++ b/gen/proto/go/frostdb/storage/v1alpha1/storage.pb.go
@@ -0,0 +1,2713 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.34.2
+// 	protoc        (unknown)
+// source: frostdb/storage/v1alpha1/storage.proto
+
+package storagev1alpha1
+
+import (
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// Op describes a binary operator.
+type Op int32
+
+const (
+	// OP_UNKNOWN_UNSPECIFIED is the default value. It should not be used.
+	Op_OP_UNKNOWN_UNSPECIFIED Op = 0
+	// OP_EQ is the equality operator (`==`).
+	Op_OP_EQ Op = 1
+	// OP_NOT_EQ is the not-equality operator (`!=`).
+	Op_OP_NOT_EQ Op = 2
+	// OP_LT is the less-than operator (`<`).
+	Op_OP_LT Op = 3
+	// OP_LT_EQ is the less-than-or-equal operator (`<=`).
+	Op_OP_LT_EQ Op = 4
+	// OP_GT is the greater-than operator (`>`).
+	Op_OP_GT Op = 5
+	// OP_GT_EQ is the greater-than-or-equal operator (`>=`).
+	Op_OP_GT_EQ Op = 6
+	// OP_REGEX_MATCH is the regular expression match operator (`=~`).
+	Op_OP_REGEX_MATCH Op = 7
+	// OP_REGEX_NOT_MATCH is the regular expression not-match operator (`!~`).
+	Op_OP_REGEX_NOT_MATCH Op = 8
+	// OP_AND is the logical and operator (`&&`).
+	Op_OP_AND Op = 9
+	// OP_OR is the logical or operator (`||`).
+	Op_OP_OR Op = 10
+	// OP_ADD is the arithmetric addition (`+`).
+	Op_OP_ADD Op = 11
+	// OP_SUB is the arithmetric subtraction (`-`).
+	Op_OP_SUB Op = 12
+	// OP_MUL is the arithmetric multiplication (`*`).
+	Op_OP_MUL Op = 13
+	// OP_DIV is the arithmetric division (`/`).
+	Op_OP_DIV Op = 14
+	// OP_CONTAINS performs substring matches.
+	Op_OP_CONTAINS Op = 15
+	// OP_NOT_CONTAINS performs substring matches.
+	Op_OP_NOT_CONTAINS Op = 16
+)
+
+// Enum value maps for Op.
+var (
+	Op_name = map[int32]string{
+		0:  "OP_UNKNOWN_UNSPECIFIED",
+		1:  "OP_EQ",
+		2:  "OP_NOT_EQ",
+		3:  "OP_LT",
+		4:  "OP_LT_EQ",
+		5:  "OP_GT",
+		6:  "OP_GT_EQ",
+		7:  "OP_REGEX_MATCH",
+		8:  "OP_REGEX_NOT_MATCH",
+		9:  "OP_AND",
+		10: "OP_OR",
+		11: "OP_ADD",
+		12: "OP_SUB",
+		13: "OP_MUL",
+		14: "OP_DIV",
+		15: "OP_CONTAINS",
+		16: "OP_NOT_CONTAINS",
+	}
+	Op_value = map[string]int32{
+		"OP_UNKNOWN_UNSPECIFIED": 0,
+		"OP_EQ":                  1,
+		"OP_NOT_EQ":              2,
+		"OP_LT":                  3,
+		"OP_LT_EQ":               4,
+		"OP_GT":                  5,
+		"OP_GT_EQ":               6,
+		"OP_REGEX_MATCH":         7,
+		"OP_REGEX_NOT_MATCH":     8,
+		"OP_AND":                 9,
+		"OP_OR":                  10,
+		"OP_ADD":                 11,
+		"OP_SUB":                 12,
+		"OP_MUL":                 13,
+		"OP_DIV":                 14,
+		"OP_CONTAINS":            15,
+		"OP_NOT_CONTAINS":        16,
+	}
+)
+
+func (x Op) Enum() *Op {
+	p := new(Op)
+	*p = x
+	return p
+}
+
+func (x Op) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (Op) Descriptor() protoreflect.EnumDescriptor {
+	return file_frostdb_storage_v1alpha1_storage_proto_enumTypes[0].Descriptor()
+}
+
+func (Op) Type() protoreflect.EnumType {
+	return &file_frostdb_storage_v1alpha1_storage_proto_enumTypes[0]
+}
+
+func (x Op) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use Op.Descriptor instead.
+func (Op) EnumDescriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{0}
+}
+
+// Apache Arrow type.
+type Type int32
+
+const (
+	// The default value, this must not occur.
+	Type_TYPE_UNKNOWN_UNSPECIFIED Type = 0
+	// Float64
+	Type_TYPE_FLOAT64 Type = 1
+)
+
+// Enum value maps for Type.
+var (
+	Type_name = map[int32]string{
+		0: "TYPE_UNKNOWN_UNSPECIFIED",
+		1: "TYPE_FLOAT64",
+	}
+	Type_value = map[string]int32{
+		"TYPE_UNKNOWN_UNSPECIFIED": 0,
+		"TYPE_FLOAT64":             1,
+	}
+)
+
+func (x Type) Enum() *Type {
+	p := new(Type)
+	*p = x
+	return p
+}
+
+func (x Type) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (Type) Descriptor() protoreflect.EnumDescriptor {
+	return file_frostdb_storage_v1alpha1_storage_proto_enumTypes[1].Descriptor()
+}
+
+func (Type) Type() protoreflect.EnumType {
+	return &file_frostdb_storage_v1alpha1_storage_proto_enumTypes[1]
+}
+
+func (x Type) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use Type.Descriptor instead.
+func (Type) EnumDescriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{1}
+}
+
+// Type is the type of aggregation function.
+type AggregationFunction_Type int32
+
+const (
+	// UNKNOWN_UNSPECIFIED is the default value. It should not be used.
+	AggregationFunction_TYPE_UNKNOWN_UNSPECIFIED AggregationFunction_Type = 0
+	// SUM is the sum aggregation function.
+	AggregationFunction_TYPE_SUM AggregationFunction_Type = 1
+	// MIN is the min aggregation function.
+	AggregationFunction_TYPE_MIN AggregationFunction_Type = 2
+	// MAX is the max aggregation function.
+	AggregationFunction_TYPE_MAX AggregationFunction_Type = 3
+	// COUNT is the count aggregation function.
+	AggregationFunction_TYPE_COUNT AggregationFunction_Type = 4
+	// AVG is the avg aggregation function.
+	AggregationFunction_TYPE_AVG AggregationFunction_Type = 5
+	// UNIQUE is the unique aggregation function.
+	AggregationFunction_TYPE_UNIQUE AggregationFunction_Type = 6
+	// AND is the and aggregation function.
+	AggregationFunction_TYPE_AND AggregationFunction_Type = 7
+)
+
+// Enum value maps for AggregationFunction_Type.
+var (
+	AggregationFunction_Type_name = map[int32]string{
+		0: "TYPE_UNKNOWN_UNSPECIFIED",
+		1: "TYPE_SUM",
+		2: "TYPE_MIN",
+		3: "TYPE_MAX",
+		4: "TYPE_COUNT",
+		5: "TYPE_AVG",
+		6: "TYPE_UNIQUE",
+		7: "TYPE_AND",
+	}
+	AggregationFunction_Type_value = map[string]int32{
+		"TYPE_UNKNOWN_UNSPECIFIED": 0,
+		"TYPE_SUM":                 1,
+		"TYPE_MIN":                 2,
+		"TYPE_MAX":                 3,
+		"TYPE_COUNT":               4,
+		"TYPE_AVG":                 5,
+		"TYPE_UNIQUE":              6,
+		"TYPE_AND":                 7,
+	}
+)
+
+func (x AggregationFunction_Type) Enum() *AggregationFunction_Type {
+	p := new(AggregationFunction_Type)
+	*p = x
+	return p
+}
+
+func (x AggregationFunction_Type) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (AggregationFunction_Type) Descriptor() protoreflect.EnumDescriptor {
+	return file_frostdb_storage_v1alpha1_storage_proto_enumTypes[2].Descriptor()
+}
+
+func (AggregationFunction_Type) Type() protoreflect.EnumType {
+	return &file_frostdb_storage_v1alpha1_storage_proto_enumTypes[2]
+}
+
+func (x AggregationFunction_Type) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use AggregationFunction_Type.Descriptor instead.
+func (AggregationFunction_Type) EnumDescriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{23, 0}
+}
+
+// QueryRequest is the message sent to the Query gRPC endpoint.
+type QueryRequest struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// PlanRoot is the root of the query plan (i.e. a scan node).
+	PlanRoot *PlanNode `protobuf:"bytes,1,opt,name=plan_root,json=planRoot,proto3" json:"plan_root,omitempty"`
+	// description describes the query.
+	Description string `protobuf:"bytes,2,opt,name=description,proto3" json:"description,omitempty"`
+}
+
+func (x *QueryRequest) Reset() {
+	*x = QueryRequest{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[0]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *QueryRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*QueryRequest) ProtoMessage() {}
+
+func (x *QueryRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[0]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use QueryRequest.ProtoReflect.Descriptor instead.
+func (*QueryRequest) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *QueryRequest) GetPlanRoot() *PlanNode {
+	if x != nil {
+		return x.PlanRoot
+	}
+	return nil
+}
+
+func (x *QueryRequest) GetDescription() string {
+	if x != nil {
+		return x.Description
+	}
+	return ""
+}
+
+// QueryResponse is the message received from the Query gRPC endpoint.
+type QueryResponse struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// The resulting arrow record as bytes.
+	Record []byte `protobuf:"bytes,1,opt,name=record,proto3" json:"record,omitempty"`
+}
+
+func (x *QueryResponse) Reset() {
+	*x = QueryResponse{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[1]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *QueryResponse) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*QueryResponse) ProtoMessage() {}
+
+func (x *QueryResponse) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[1]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use QueryResponse.ProtoReflect.Descriptor instead.
+func (*QueryResponse) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *QueryResponse) GetRecord() []byte {
+	if x != nil {
+		return x.Record
+	}
+	return nil
+}
+
+// PlanNode describes a query plan.
+type PlanNode struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Next is the next PlanNode in the plan, i.e. the node that this node should
+	// push data to.
+	Next *PlanNode `protobuf:"bytes,1,opt,name=next,proto3" json:"next,omitempty"`
+	// Spec is this PlanNode's spec.
+	Spec *PlanNodeSpec `protobuf:"bytes,2,opt,name=spec,proto3" json:"spec,omitempty"`
+}
+
+func (x *PlanNode) Reset() {
+	*x = PlanNode{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[2]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *PlanNode) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*PlanNode) ProtoMessage() {}
+
+func (x *PlanNode) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[2]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use PlanNode.ProtoReflect.Descriptor instead.
+func (*PlanNode) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *PlanNode) GetNext() *PlanNode {
+	if x != nil {
+		return x.Next
+	}
+	return nil
+}
+
+func (x *PlanNode) GetSpec() *PlanNodeSpec {
+	if x != nil {
+		return x.Spec
+	}
+	return nil
+}
+
+// PlanNodeSpec is a PlanNode's information. This is a message that wraps oneof
+// to ensure forward compatibility:
+// https://developers.google.com/protocol-buffers/docs/proto3#backwards-compatibility_issues
+type PlanNodeSpec struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Spec is this PlanNode's information.
+	//
+	// Types that are assignable to Spec:
+	//
+	//	*PlanNodeSpec_TableScan
+	//	*PlanNodeSpec_SchemaScan
+	//	*PlanNodeSpec_Filter
+	//	*PlanNodeSpec_Projection
+	//	*PlanNodeSpec_Distinct
+	//	*PlanNodeSpec_Aggregation
+	//	*PlanNodeSpec_Limit
+	Spec isPlanNodeSpec_Spec `protobuf_oneof:"spec"`
+}
+
+func (x *PlanNodeSpec) Reset() {
+	*x = PlanNodeSpec{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[3]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *PlanNodeSpec) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*PlanNodeSpec) ProtoMessage() {}
+
+func (x *PlanNodeSpec) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[3]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use PlanNodeSpec.ProtoReflect.Descriptor instead.
+func (*PlanNodeSpec) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{3}
+}
+
+func (m *PlanNodeSpec) GetSpec() isPlanNodeSpec_Spec {
+	if m != nil {
+		return m.Spec
+	}
+	return nil
+}
+
+func (x *PlanNodeSpec) GetTableScan() *TableScan {
+	if x, ok := x.GetSpec().(*PlanNodeSpec_TableScan); ok {
+		return x.TableScan
+	}
+	return nil
+}
+
+func (x *PlanNodeSpec) GetSchemaScan() *SchemaScan {
+	if x, ok := x.GetSpec().(*PlanNodeSpec_SchemaScan); ok {
+		return x.SchemaScan
+	}
+	return nil
+}
+
+func (x *PlanNodeSpec) GetFilter() *Filter {
+	if x, ok := x.GetSpec().(*PlanNodeSpec_Filter); ok {
+		return x.Filter
+	}
+	return nil
+}
+
+func (x *PlanNodeSpec) GetProjection() *Projection {
+	if x, ok := x.GetSpec().(*PlanNodeSpec_Projection); ok {
+		return x.Projection
+	}
+	return nil
+}
+
+func (x *PlanNodeSpec) GetDistinct() *Distinct {
+	if x, ok := x.GetSpec().(*PlanNodeSpec_Distinct); ok {
+		return x.Distinct
+	}
+	return nil
+}
+
+func (x *PlanNodeSpec) GetAggregation() *Aggregation {
+	if x, ok := x.GetSpec().(*PlanNodeSpec_Aggregation); ok {
+		return x.Aggregation
+	}
+	return nil
+}
+
+func (x *PlanNodeSpec) GetLimit() *Limit {
+	if x, ok := x.GetSpec().(*PlanNodeSpec_Limit); ok {
+		return x.Limit
+	}
+	return nil
+}
+
+type isPlanNodeSpec_Spec interface {
+	isPlanNodeSpec_Spec()
+}
+
+type PlanNodeSpec_TableScan struct {
+	// TableScan is specified if this PlanNode represents a table scan.
+	TableScan *TableScan `protobuf:"bytes,1,opt,name=table_scan,json=tableScan,proto3,oneof"`
+}
+
+type PlanNodeSpec_SchemaScan struct {
+	// SchemaScan is specified if this PlanNode represents a table scan.
+	SchemaScan *SchemaScan `protobuf:"bytes,2,opt,name=schema_scan,json=schemaScan,proto3,oneof"`
+}
+
+type PlanNodeSpec_Filter struct {
+	// Filter is specified if this PlanNode represents a filter.
+	Filter *Filter `protobuf:"bytes,3,opt,name=filter,proto3,oneof"`
+}
+
+type PlanNodeSpec_Projection struct {
+	// Projections is specified if this PlanNode represents a projection.
+	Projection *Projection `protobuf:"bytes,4,opt,name=projection,proto3,oneof"`
+}
+
+type PlanNodeSpec_Distinct struct {
+	// Distinct is specified if this PlanNode represents a distinct.
+	Distinct *Distinct `protobuf:"bytes,5,opt,name=distinct,proto3,oneof"`
+}
+
+type PlanNodeSpec_Aggregation struct {
+	// Aggregation is specified if this PlanNode represents an aggregation.
+	Aggregation *Aggregation `protobuf:"bytes,6,opt,name=aggregation,proto3,oneof"`
+}
+
+type PlanNodeSpec_Limit struct {
+	// Limit is specified if this PlanNode represents a limit.
+	Limit *Limit `protobuf:"bytes,7,opt,name=limit,proto3,oneof"`
+}
+
+func (*PlanNodeSpec_TableScan) isPlanNodeSpec_Spec() {}
+
+func (*PlanNodeSpec_SchemaScan) isPlanNodeSpec_Spec() {}
+
+func (*PlanNodeSpec_Filter) isPlanNodeSpec_Spec() {}
+
+func (*PlanNodeSpec_Projection) isPlanNodeSpec_Spec() {}
+
+func (*PlanNodeSpec_Distinct) isPlanNodeSpec_Spec() {}
+
+func (*PlanNodeSpec_Aggregation) isPlanNodeSpec_Spec() {}
+
+func (*PlanNodeSpec_Limit) isPlanNodeSpec_Spec() {}
+
+// TableScan describes scanning a table to obtain rows.
+type TableScan struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Base specifies the fields shared with SchemaScan.
+	Base *ScanBase `protobuf:"bytes,1,opt,name=base,proto3" json:"base,omitempty"`
+}
+
+func (x *TableScan) Reset() {
+	*x = TableScan{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[4]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *TableScan) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TableScan) ProtoMessage() {}
+
+func (x *TableScan) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[4]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use TableScan.ProtoReflect.Descriptor instead.
+func (*TableScan) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{4}
+}
+
+func (x *TableScan) GetBase() *ScanBase {
+	if x != nil {
+		return x.Base
+	}
+	return nil
+}
+
+// SchemaScan describes scanning a table to obtain the schema.
+type SchemaScan struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Base specifies the fields shared with TableScan.
+	Base *ScanBase `protobuf:"bytes,1,opt,name=base,proto3" json:"base,omitempty"`
+}
+
+func (x *SchemaScan) Reset() {
+	*x = SchemaScan{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[5]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SchemaScan) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SchemaScan) ProtoMessage() {}
+
+func (x *SchemaScan) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[5]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SchemaScan.ProtoReflect.Descriptor instead.
+func (*SchemaScan) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{5}
+}
+
+func (x *SchemaScan) GetBase() *ScanBase {
+	if x != nil {
+		return x.Base
+	}
+	return nil
+}
+
+// ScanBase is a collection of fields shared by scans.
+type ScanBase struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Database is the name of the database to scan.
+	Database string `protobuf:"bytes,1,opt,name=database,proto3" json:"database,omitempty"`
+	// Table is the name of the table to scan.
+	Table string `protobuf:"bytes,2,opt,name=table,proto3" json:"table,omitempty"`
+}
+
+func (x *ScanBase) Reset() {
+	*x = ScanBase{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[6]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ScanBase) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ScanBase) ProtoMessage() {}
+
+func (x *ScanBase) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[6]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ScanBase.ProtoReflect.Descriptor instead.
+func (*ScanBase) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{6}
+}
+
+func (x *ScanBase) GetDatabase() string {
+	if x != nil {
+		return x.Database
+	}
+	return ""
+}
+
+func (x *ScanBase) GetTable() string {
+	if x != nil {
+		return x.Table
+	}
+	return ""
+}
+
+// Filter describes a filter.
+type Filter struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Expr is the filter expression.
+	Expr *Expr `protobuf:"bytes,1,opt,name=expr,proto3" json:"expr,omitempty"`
+}
+
+func (x *Filter) Reset() {
+	*x = Filter{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[7]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Filter) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Filter) ProtoMessage() {}
+
+func (x *Filter) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[7]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Filter.ProtoReflect.Descriptor instead.
+func (*Filter) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{7}
+}
+
+func (x *Filter) GetExpr() *Expr {
+	if x != nil {
+		return x.Expr
+	}
+	return nil
+}
+
+// Distinct describes a distinct node.
+type Distinct struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Exprs are the expressions to distinct by.
+	Exprs []*Expr `protobuf:"bytes,1,rep,name=exprs,proto3" json:"exprs,omitempty"`
+}
+
+func (x *Distinct) Reset() {
+	*x = Distinct{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[8]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Distinct) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Distinct) ProtoMessage() {}
+
+func (x *Distinct) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[8]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Distinct.ProtoReflect.Descriptor instead.
+func (*Distinct) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{8}
+}
+
+func (x *Distinct) GetExprs() []*Expr {
+	if x != nil {
+		return x.Exprs
+	}
+	return nil
+}
+
+// Projection describes a projection node.
+type Projection struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Exprs are the expressions to project.
+	Exprs []*Expr `protobuf:"bytes,1,rep,name=exprs,proto3" json:"exprs,omitempty"`
+}
+
+func (x *Projection) Reset() {
+	*x = Projection{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[9]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Projection) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Projection) ProtoMessage() {}
+
+func (x *Projection) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[9]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Projection.ProtoReflect.Descriptor instead.
+func (*Projection) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{9}
+}
+
+func (x *Projection) GetExprs() []*Expr {
+	if x != nil {
+		return x.Exprs
+	}
+	return nil
+}
+
+// Limit describes a limit node.
+type Limit struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Expr is the expression to limit by.
+	Expr *Expr `protobuf:"bytes,1,opt,name=expr,proto3" json:"expr,omitempty"`
+}
+
+func (x *Limit) Reset() {
+	*x = Limit{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[10]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Limit) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Limit) ProtoMessage() {}
+
+func (x *Limit) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[10]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Limit.ProtoReflect.Descriptor instead.
+func (*Limit) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{10}
+}
+
+func (x *Limit) GetExpr() *Expr {
+	if x != nil {
+		return x.Expr
+	}
+	return nil
+}
+
+// Aggregation describes an aggregation node.
+type Aggregation struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// GroupExprs are the expressions to group by.
+	GroupExprs []*Expr `protobuf:"bytes,1,rep,name=group_exprs,json=groupExprs,proto3" json:"group_exprs,omitempty"`
+	// AggExprs are the aggregation functions applied to values of each group.
+	AggExprs []*Expr `protobuf:"bytes,2,rep,name=agg_exprs,json=aggExprs,proto3" json:"agg_exprs,omitempty"`
+}
+
+func (x *Aggregation) Reset() {
+	*x = Aggregation{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[11]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Aggregation) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Aggregation) ProtoMessage() {}
+
+func (x *Aggregation) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[11]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Aggregation.ProtoReflect.Descriptor instead.
+func (*Aggregation) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{11}
+}
+
+func (x *Aggregation) GetGroupExprs() []*Expr {
+	if x != nil {
+		return x.GroupExprs
+	}
+	return nil
+}
+
+func (x *Aggregation) GetAggExprs() []*Expr {
+	if x != nil {
+		return x.AggExprs
+	}
+	return nil
+}
+
+// Expr is the base type for all expressions.
+type Expr struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// def is the definition of the expression.
+	Def *ExprDef `protobuf:"bytes,1,opt,name=def,proto3" json:"def,omitempty"`
+}
+
+func (x *Expr) Reset() {
+	*x = Expr{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[12]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Expr) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Expr) ProtoMessage() {}
+
+func (x *Expr) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[12]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Expr.ProtoReflect.Descriptor instead.
+func (*Expr) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{12}
+}
+
+func (x *Expr) GetDef() *ExprDef {
+	if x != nil {
+		return x.Def
+	}
+	return nil
+}
+
+// ExprDefinition is a definition of an expression. It is intentionally only a
+// single field that is a oneof to maintain forward compatibility when changing
+// fields in each of the oneofs. This message should never have any other
+// fields for forward compatibility.
+type ExprDef struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// content is the content of the expression definition.
+	//
+	// Types that are assignable to Content:
+	//
+	//	*ExprDef_BinaryExpr
+	//	*ExprDef_Column
+	//	*ExprDef_Literal
+	//	*ExprDef_DynamicColumn
+	//	*ExprDef_AggregationFunction
+	//	*ExprDef_Alias
+	//	*ExprDef_Duration
+	//	*ExprDef_Convert
+	//	*ExprDef_If
+	Content isExprDef_Content `protobuf_oneof:"content"`
+}
+
+func (x *ExprDef) Reset() {
+	*x = ExprDef{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[13]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ExprDef) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ExprDef) ProtoMessage() {}
+
+func (x *ExprDef) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[13]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ExprDef.ProtoReflect.Descriptor instead.
+func (*ExprDef) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{13}
+}
+
+func (m *ExprDef) GetContent() isExprDef_Content {
+	if m != nil {
+		return m.Content
+	}
+	return nil
+}
+
+func (x *ExprDef) GetBinaryExpr() *BinaryExpr {
+	if x, ok := x.GetContent().(*ExprDef_BinaryExpr); ok {
+		return x.BinaryExpr
+	}
+	return nil
+}
+
+func (x *ExprDef) GetColumn() *Column {
+	if x, ok := x.GetContent().(*ExprDef_Column); ok {
+		return x.Column
+	}
+	return nil
+}
+
+func (x *ExprDef) GetLiteral() *Literal {
+	if x, ok := x.GetContent().(*ExprDef_Literal); ok {
+		return x.Literal
+	}
+	return nil
+}
+
+func (x *ExprDef) GetDynamicColumn() *DynamicColumn {
+	if x, ok := x.GetContent().(*ExprDef_DynamicColumn); ok {
+		return x.DynamicColumn
+	}
+	return nil
+}
+
+func (x *ExprDef) GetAggregationFunction() *AggregationFunction {
+	if x, ok := x.GetContent().(*ExprDef_AggregationFunction); ok {
+		return x.AggregationFunction
+	}
+	return nil
+}
+
+func (x *ExprDef) GetAlias() *Alias {
+	if x, ok := x.GetContent().(*ExprDef_Alias); ok {
+		return x.Alias
+	}
+	return nil
+}
+
+func (x *ExprDef) GetDuration() *DurationExpr {
+	if x, ok := x.GetContent().(*ExprDef_Duration); ok {
+		return x.Duration
+	}
+	return nil
+}
+
+func (x *ExprDef) GetConvert() *ConvertExpr {
+	if x, ok := x.GetContent().(*ExprDef_Convert); ok {
+		return x.Convert
+	}
+	return nil
+}
+
+func (x *ExprDef) GetIf() *IfExpr {
+	if x, ok := x.GetContent().(*ExprDef_If); ok {
+		return x.If
+	}
+	return nil
+}
+
+type isExprDef_Content interface {
+	isExprDef_Content()
+}
+
+type ExprDef_BinaryExpr struct {
+	// BinaryExpr is a binary expression.
+	BinaryExpr *BinaryExpr `protobuf:"bytes,1,opt,name=binary_expr,json=binaryExpr,proto3,oneof"`
+}
+
+type ExprDef_Column struct {
+	// Column is a column expression.
+	Column *Column `protobuf:"bytes,2,opt,name=column,proto3,oneof"`
+}
+
+type ExprDef_Literal struct {
+	// Literal is a literal expression.
+	Literal *Literal `protobuf:"bytes,3,opt,name=literal,proto3,oneof"`
+}
+
+type ExprDef_DynamicColumn struct {
+	// DynamicColumn is a dynamic column expression.
+	DynamicColumn *DynamicColumn `protobuf:"bytes,4,opt,name=dynamic_column,json=dynamicColumn,proto3,oneof"`
+}
+
+type ExprDef_AggregationFunction struct {
+	// AggregationFunction is an aggregation function expression.
+	AggregationFunction *AggregationFunction `protobuf:"bytes,5,opt,name=aggregation_function,json=aggregationFunction,proto3,oneof"`
+}
+
+type ExprDef_Alias struct {
+	// Alias is an alias expression.
+	Alias *Alias `protobuf:"bytes,6,opt,name=alias,proto3,oneof"`
+}
+
+type ExprDef_Duration struct {
+	// DurationExpr is a duration expression to group by.
+	Duration *DurationExpr `protobuf:"bytes,7,opt,name=duration,proto3,oneof"`
+}
+
+type ExprDef_Convert struct {
+	// ConvertExpr is an expression to convert a column from one type to another.
+	Convert *ConvertExpr `protobuf:"bytes,8,opt,name=convert,proto3,oneof"`
+}
+
+type ExprDef_If struct {
+	// IfExpr is an if expression.
+	If *IfExpr `protobuf:"bytes,9,opt,name=if,proto3,oneof"`
+}
+
+func (*ExprDef_BinaryExpr) isExprDef_Content() {}
+
+func (*ExprDef_Column) isExprDef_Content() {}
+
+func (*ExprDef_Literal) isExprDef_Content() {}
+
+func (*ExprDef_DynamicColumn) isExprDef_Content() {}
+
+func (*ExprDef_AggregationFunction) isExprDef_Content() {}
+
+func (*ExprDef_Alias) isExprDef_Content() {}
+
+func (*ExprDef_Duration) isExprDef_Content() {}
+
+func (*ExprDef_Convert) isExprDef_Content() {}
+
+func (*ExprDef_If) isExprDef_Content() {}
+
+// BinaryExpression is a binary expression.
+type BinaryExpr struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// left is the left operand.
+	Left *Expr `protobuf:"bytes,1,opt,name=left,proto3" json:"left,omitempty"`
+	// right is the right operand.
+	Right *Expr `protobuf:"bytes,2,opt,name=right,proto3" json:"right,omitempty"`
+	// op is the operator.
+	Op Op `protobuf:"varint,3,opt,name=op,proto3,enum=frostdb.storage.v1alpha1.Op" json:"op,omitempty"`
+}
+
+func (x *BinaryExpr) Reset() {
+	*x = BinaryExpr{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[14]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *BinaryExpr) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*BinaryExpr) ProtoMessage() {}
+
+func (x *BinaryExpr) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[14]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use BinaryExpr.ProtoReflect.Descriptor instead.
+func (*BinaryExpr) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{14}
+}
+
+func (x *BinaryExpr) GetLeft() *Expr {
+	if x != nil {
+		return x.Left
+	}
+	return nil
+}
+
+func (x *BinaryExpr) GetRight() *Expr {
+	if x != nil {
+		return x.Right
+	}
+	return nil
+}
+
+func (x *BinaryExpr) GetOp() Op {
+	if x != nil {
+		return x.Op
+	}
+	return Op_OP_UNKNOWN_UNSPECIFIED
+}
+
+// IfExpr represents an if expression.
+type IfExpr struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// the condition
+	Condition *Expr `protobuf:"bytes,1,opt,name=condition,proto3" json:"condition,omitempty"`
+	// the true branch
+	Then *Expr `protobuf:"bytes,2,opt,name=then,proto3" json:"then,omitempty"`
+	// the false branch
+	Else *Expr `protobuf:"bytes,3,opt,name=else,proto3" json:"else,omitempty"`
+}
+
+func (x *IfExpr) Reset() {
+	*x = IfExpr{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[15]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *IfExpr) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*IfExpr) ProtoMessage() {}
+
+func (x *IfExpr) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[15]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use IfExpr.ProtoReflect.Descriptor instead.
+func (*IfExpr) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{15}
+}
+
+func (x *IfExpr) GetCondition() *Expr {
+	if x != nil {
+		return x.Condition
+	}
+	return nil
+}
+
+func (x *IfExpr) GetThen() *Expr {
+	if x != nil {
+		return x.Then
+	}
+	return nil
+}
+
+func (x *IfExpr) GetElse() *Expr {
+	if x != nil {
+		return x.Else
+	}
+	return nil
+}
+
+// ConvertExpr is an expression to convert an expression to another type.
+type ConvertExpr struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// the expression to convert
+	Expr *Expr `protobuf:"bytes,1,opt,name=expr,proto3" json:"expr,omitempty"`
+	// the type to convert to
+	Type Type `protobuf:"varint,2,opt,name=type,proto3,enum=frostdb.storage.v1alpha1.Type" json:"type,omitempty"`
+}
+
+func (x *ConvertExpr) Reset() {
+	*x = ConvertExpr{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[16]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ConvertExpr) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ConvertExpr) ProtoMessage() {}
+
+func (x *ConvertExpr) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[16]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ConvertExpr.ProtoReflect.Descriptor instead.
+func (*ConvertExpr) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{16}
+}
+
+func (x *ConvertExpr) GetExpr() *Expr {
+	if x != nil {
+		return x.Expr
+	}
+	return nil
+}
+
+func (x *ConvertExpr) GetType() Type {
+	if x != nil {
+		return x.Type
+	}
+	return Type_TYPE_UNKNOWN_UNSPECIFIED
+}
+
+// Column is an explicit column in a table.
+type Column struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// name is the name of the column.
+	Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+}
+
+func (x *Column) Reset() {
+	*x = Column{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[17]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Column) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Column) ProtoMessage() {}
+
+func (x *Column) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[17]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Column.ProtoReflect.Descriptor instead.
+func (*Column) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{17}
+}
+
+func (x *Column) GetName() string {
+	if x != nil {
+		return x.Name
+	}
+	return ""
+}
+
+// Literal is a literal value.
+type Literal struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// content is the content of the literal.
+	Content *LiteralContent `protobuf:"bytes,1,opt,name=content,proto3" json:"content,omitempty"`
+}
+
+func (x *Literal) Reset() {
+	*x = Literal{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[18]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Literal) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Literal) ProtoMessage() {}
+
+func (x *Literal) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[18]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Literal.ProtoReflect.Descriptor instead.
+func (*Literal) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{18}
+}
+
+func (x *Literal) GetContent() *LiteralContent {
+	if x != nil {
+		return x.Content
+	}
+	return nil
+}
+
+// LiteralContent is the content of a literal.
+type LiteralContent struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// value is the value of the literal.
+	//
+	// Types that are assignable to Value:
+	//
+	//	*LiteralContent_NullValue
+	//	*LiteralContent_BoolValue
+	//	*LiteralContent_Int32Value
+	//	*LiteralContent_Uint32Value
+	//	*LiteralContent_Int64Value
+	//	*LiteralContent_Uint64Value
+	//	*LiteralContent_FloatValue
+	//	*LiteralContent_DoubleValue
+	//	*LiteralContent_BinaryValue
+	//	*LiteralContent_StringValue
+	Value isLiteralContent_Value `protobuf_oneof:"value"`
+}
+
+func (x *LiteralContent) Reset() {
+	*x = LiteralContent{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[19]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *LiteralContent) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*LiteralContent) ProtoMessage() {}
+
+func (x *LiteralContent) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[19]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use LiteralContent.ProtoReflect.Descriptor instead.
+func (*LiteralContent) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{19}
+}
+
+func (m *LiteralContent) GetValue() isLiteralContent_Value {
+	if m != nil {
+		return m.Value
+	}
+	return nil
+}
+
+func (x *LiteralContent) GetNullValue() *Null {
+	if x, ok := x.GetValue().(*LiteralContent_NullValue); ok {
+		return x.NullValue
+	}
+	return nil
+}
+
+func (x *LiteralContent) GetBoolValue() bool {
+	if x, ok := x.GetValue().(*LiteralContent_BoolValue); ok {
+		return x.BoolValue
+	}
+	return false
+}
+
+func (x *LiteralContent) GetInt32Value() int32 {
+	if x, ok := x.GetValue().(*LiteralContent_Int32Value); ok {
+		return x.Int32Value
+	}
+	return 0
+}
+
+func (x *LiteralContent) GetUint32Value() uint32 {
+	if x, ok := x.GetValue().(*LiteralContent_Uint32Value); ok {
+		return x.Uint32Value
+	}
+	return 0
+}
+
+func (x *LiteralContent) GetInt64Value() int64 {
+	if x, ok := x.GetValue().(*LiteralContent_Int64Value); ok {
+		return x.Int64Value
+	}
+	return 0
+}
+
+func (x *LiteralContent) GetUint64Value() uint64 {
+	if x, ok := x.GetValue().(*LiteralContent_Uint64Value); ok {
+		return x.Uint64Value
+	}
+	return 0
+}
+
+func (x *LiteralContent) GetFloatValue() float32 {
+	if x, ok := x.GetValue().(*LiteralContent_FloatValue); ok {
+		return x.FloatValue
+	}
+	return 0
+}
+
+func (x *LiteralContent) GetDoubleValue() float64 {
+	if x, ok := x.GetValue().(*LiteralContent_DoubleValue); ok {
+		return x.DoubleValue
+	}
+	return 0
+}
+
+func (x *LiteralContent) GetBinaryValue() []byte {
+	if x, ok := x.GetValue().(*LiteralContent_BinaryValue); ok {
+		return x.BinaryValue
+	}
+	return nil
+}
+
+func (x *LiteralContent) GetStringValue() string {
+	if x, ok := x.GetValue().(*LiteralContent_StringValue); ok {
+		return x.StringValue
+	}
+	return ""
+}
+
+type isLiteralContent_Value interface {
+	isLiteralContent_Value()
+}
+
+type LiteralContent_NullValue struct {
+	// null whether the value is the null value.
+	NullValue *Null `protobuf:"bytes,1,opt,name=null_value,json=nullValue,proto3,oneof"`
+}
+
+type LiteralContent_BoolValue struct {
+	// bool_value is the bool value.
+	BoolValue bool `protobuf:"varint,2,opt,name=bool_value,json=boolValue,proto3,oneof"`
+}
+
+type LiteralContent_Int32Value struct {
+	// int32_value is the int32 value.
+	Int32Value int32 `protobuf:"varint,3,opt,name=int32_value,json=int32Value,proto3,oneof"`
+}
+
+type LiteralContent_Uint32Value struct {
+	// uint32_value is the uint32 value.
+	Uint32Value uint32 `protobuf:"varint,4,opt,name=uint32_value,json=uint32Value,proto3,oneof"`
+}
+
+type LiteralContent_Int64Value struct {
+	// int64_value is the int64 value.
+	Int64Value int64 `protobuf:"varint,5,opt,name=int64_value,json=int64Value,proto3,oneof"`
+}
+
+type LiteralContent_Uint64Value struct {
+	// uint64_value is the uint64 value.
+	Uint64Value uint64 `protobuf:"varint,6,opt,name=uint64_value,json=uint64Value,proto3,oneof"`
+}
+
+type LiteralContent_FloatValue struct {
+	// float_value is the float value.
+	FloatValue float32 `protobuf:"fixed32,7,opt,name=float_value,json=floatValue,proto3,oneof"`
+}
+
+type LiteralContent_DoubleValue struct {
+	// double_value is the double value.
+	DoubleValue float64 `protobuf:"fixed64,8,opt,name=double_value,json=doubleValue,proto3,oneof"`
+}
+
+type LiteralContent_BinaryValue struct {
+	// binary_value is the binary value.
+	BinaryValue []byte `protobuf:"bytes,9,opt,name=binary_value,json=binaryValue,proto3,oneof"`
+}
+
+type LiteralContent_StringValue struct {
+	// string_value is the string value.
+	StringValue string `protobuf:"bytes,10,opt,name=string_value,json=stringValue,proto3,oneof"`
+}
+
+func (*LiteralContent_NullValue) isLiteralContent_Value() {}
+
+func (*LiteralContent_BoolValue) isLiteralContent_Value() {}
+
+func (*LiteralContent_Int32Value) isLiteralContent_Value() {}
+
+func (*LiteralContent_Uint32Value) isLiteralContent_Value() {}
+
+func (*LiteralContent_Int64Value) isLiteralContent_Value() {}
+
+func (*LiteralContent_Uint64Value) isLiteralContent_Value() {}
+
+func (*LiteralContent_FloatValue) isLiteralContent_Value() {}
+
+func (*LiteralContent_DoubleValue) isLiteralContent_Value() {}
+
+func (*LiteralContent_BinaryValue) isLiteralContent_Value() {}
+
+func (*LiteralContent_StringValue) isLiteralContent_Value() {}
+
+// Null is the null value.
+type Null struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+}
+
+func (x *Null) Reset() {
+	*x = Null{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[20]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Null) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Null) ProtoMessage() {}
+
+func (x *Null) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[20]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Null.ProtoReflect.Descriptor instead.
+func (*Null) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{20}
+}
+
+// Alias is an alias for an expression.
+type Alias struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// name is the name of the alias.
+	Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+	// expr is the expression to alias.
+	Expr *Expr `protobuf:"bytes,2,opt,name=expr,proto3" json:"expr,omitempty"`
+}
+
+func (x *Alias) Reset() {
+	*x = Alias{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[21]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Alias) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Alias) ProtoMessage() {}
+
+func (x *Alias) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[21]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Alias.ProtoReflect.Descriptor instead.
+func (*Alias) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{21}
+}
+
+func (x *Alias) GetName() string {
+	if x != nil {
+		return x.Name
+	}
+	return ""
+}
+
+func (x *Alias) GetExpr() *Expr {
+	if x != nil {
+		return x.Expr
+	}
+	return nil
+}
+
+// DynamicColumn is a dynamic column.
+type DynamicColumn struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// name is the name of the dynamic column.
+	Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+}
+
+func (x *DynamicColumn) Reset() {
+	*x = DynamicColumn{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[22]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *DynamicColumn) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*DynamicColumn) ProtoMessage() {}
+
+func (x *DynamicColumn) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[22]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use DynamicColumn.ProtoReflect.Descriptor instead.
+func (*DynamicColumn) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{22}
+}
+
+func (x *DynamicColumn) GetName() string {
+	if x != nil {
+		return x.Name
+	}
+	return ""
+}
+
+// AggregationFunction is an aggregation function.
+type AggregationFunction struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// type is the type of aggregation function.
+	Type AggregationFunction_Type `protobuf:"varint,1,opt,name=type,proto3,enum=frostdb.storage.v1alpha1.AggregationFunction_Type" json:"type,omitempty"`
+	// expr is the expression to aggregate.
+	Expr *Expr `protobuf:"bytes,2,opt,name=expr,proto3" json:"expr,omitempty"`
+}
+
+func (x *AggregationFunction) Reset() {
+	*x = AggregationFunction{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[23]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *AggregationFunction) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*AggregationFunction) ProtoMessage() {}
+
+func (x *AggregationFunction) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[23]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use AggregationFunction.ProtoReflect.Descriptor instead.
+func (*AggregationFunction) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{23}
+}
+
+func (x *AggregationFunction) GetType() AggregationFunction_Type {
+	if x != nil {
+		return x.Type
+	}
+	return AggregationFunction_TYPE_UNKNOWN_UNSPECIFIED
+}
+
+func (x *AggregationFunction) GetExpr() *Expr {
+	if x != nil {
+		return x.Expr
+	}
+	return nil
+}
+
+// DurationExpr is a duration expressed in milliseconds.
+type DurationExpr struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// milliseconds is the duration in milliseconds.
+	Milliseconds int64 `protobuf:"varint,1,opt,name=milliseconds,proto3" json:"milliseconds,omitempty"`
+}
+
+func (x *DurationExpr) Reset() {
+	*x = DurationExpr{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[24]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *DurationExpr) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*DurationExpr) ProtoMessage() {}
+
+func (x *DurationExpr) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_storage_v1alpha1_storage_proto_msgTypes[24]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use DurationExpr.ProtoReflect.Descriptor instead.
+func (*DurationExpr) Descriptor() ([]byte, []int) {
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP(), []int{24}
+}
+
+func (x *DurationExpr) GetMilliseconds() int64 {
+	if x != nil {
+		return x.Milliseconds
+	}
+	return 0
+}
+
+var File_frostdb_storage_v1alpha1_storage_proto protoreflect.FileDescriptor
+
+var file_frostdb_storage_v1alpha1_storage_proto_rawDesc = []byte{
+	0x0a, 0x26, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67,
+	0x65, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61,
+	0x67, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x18, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
+	0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
+	0x61, 0x31, 0x22, 0x71, 0x0a, 0x0c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65,
+	0x73, 0x74, 0x12, 0x3f, 0x0a, 0x09, 0x70, 0x6c, 0x61, 0x6e, 0x5f, 0x72, 0x6f, 0x6f, 0x74, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
+	0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31,
+	0x2e, 0x50, 0x6c, 0x61, 0x6e, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x08, 0x70, 0x6c, 0x61, 0x6e, 0x52,
+	0x6f, 0x6f, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69,
+	0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69,
+	0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x27, 0x0a, 0x0d, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65,
+	0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64,
+	0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x22, 0x7e,
+	0x0a, 0x08, 0x50, 0x6c, 0x61, 0x6e, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x36, 0x0a, 0x04, 0x6e, 0x65,
+	0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74,
+	0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70,
+	0x68, 0x61, 0x31, 0x2e, 0x50, 0x6c, 0x61, 0x6e, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x04, 0x6e, 0x65,
+	0x78, 0x74, 0x12, 0x3a, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b,
+	0x32, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61,
+	0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x50, 0x6c, 0x61, 0x6e,
+	0x4e, 0x6f, 0x64, 0x65, 0x53, 0x70, 0x65, 0x63, 0x52, 0x04, 0x73, 0x70, 0x65, 0x63, 0x22, 0xef,
+	0x03, 0x0a, 0x0c, 0x50, 0x6c, 0x61, 0x6e, 0x4e, 0x6f, 0x64, 0x65, 0x53, 0x70, 0x65, 0x63, 0x12,
+	0x44, 0x0a, 0x0a, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x73, 0x63, 0x61, 0x6e, 0x18, 0x01, 0x20,
+	0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74,
+	0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x54,
+	0x61, 0x62, 0x6c, 0x65, 0x53, 0x63, 0x61, 0x6e, 0x48, 0x00, 0x52, 0x09, 0x74, 0x61, 0x62, 0x6c,
+	0x65, 0x53, 0x63, 0x61, 0x6e, 0x12, 0x47, 0x0a, 0x0b, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f,
+	0x73, 0x63, 0x61, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x66, 0x72, 0x6f,
+	0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61,
+	0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x53, 0x63, 0x61, 0x6e,
+	0x48, 0x00, 0x52, 0x0a, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x53, 0x63, 0x61, 0x6e, 0x12, 0x3a,
+	0x0a, 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+	0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72,
+	0x48, 0x00, 0x52, 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x12, 0x46, 0x0a, 0x0a, 0x70, 0x72,
+	0x6f, 0x6a, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+	0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x50, 0x72, 0x6f, 0x6a, 0x65, 0x63,
+	0x74, 0x69, 0x6f, 0x6e, 0x48, 0x00, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x69,
+	0x6f, 0x6e, 0x12, 0x40, 0x0a, 0x08, 0x64, 0x69, 0x73, 0x74, 0x69, 0x6e, 0x63, 0x74, 0x18, 0x05,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73,
+	0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e,
+	0x44, 0x69, 0x73, 0x74, 0x69, 0x6e, 0x63, 0x74, 0x48, 0x00, 0x52, 0x08, 0x64, 0x69, 0x73, 0x74,
+	0x69, 0x6e, 0x63, 0x74, 0x12, 0x49, 0x0a, 0x0b, 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74,
+	0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x66, 0x72, 0x6f, 0x73,
+	0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c,
+	0x70, 0x68, 0x61, 0x31, 0x2e, 0x41, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, 0x69, 0x6f, 0x6e,
+	0x48, 0x00, 0x52, 0x0b, 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12,
+	0x37, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+	0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x48,
+	0x00, 0x52, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x42, 0x06, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63,
+	0x22, 0x43, 0x0a, 0x09, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x53, 0x63, 0x61, 0x6e, 0x12, 0x36, 0x0a,
+	0x04, 0x62, 0x61, 0x73, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x66, 0x72,
+	0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x63, 0x61, 0x6e, 0x42, 0x61, 0x73, 0x65, 0x52,
+	0x04, 0x62, 0x61, 0x73, 0x65, 0x22, 0x44, 0x0a, 0x0a, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x53,
+	0x63, 0x61, 0x6e, 0x12, 0x36, 0x0a, 0x04, 0x62, 0x61, 0x73, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x22, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72,
+	0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x63, 0x61,
+	0x6e, 0x42, 0x61, 0x73, 0x65, 0x52, 0x04, 0x62, 0x61, 0x73, 0x65, 0x22, 0x3c, 0x0a, 0x08, 0x53,
+	0x63, 0x61, 0x6e, 0x42, 0x61, 0x73, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x64, 0x61, 0x74, 0x61, 0x62,
+	0x61, 0x73, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x62,
+	0x61, 0x73, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x01,
+	0x28, 0x09, 0x52, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3c, 0x0a, 0x06, 0x46, 0x69, 0x6c,
+	0x74, 0x65, 0x72, 0x12, 0x32, 0x0a, 0x04, 0x65, 0x78, 0x70, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72,
+	0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70,
+	0x72, 0x52, 0x04, 0x65, 0x78, 0x70, 0x72, 0x22, 0x40, 0x0a, 0x08, 0x44, 0x69, 0x73, 0x74, 0x69,
+	0x6e, 0x63, 0x74, 0x12, 0x34, 0x0a, 0x05, 0x65, 0x78, 0x70, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03,
+	0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f,
+	0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78,
+	0x70, 0x72, 0x52, 0x05, 0x65, 0x78, 0x70, 0x72, 0x73, 0x22, 0x42, 0x0a, 0x0a, 0x50, 0x72, 0x6f,
+	0x6a, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x34, 0x0a, 0x05, 0x65, 0x78, 0x70, 0x72, 0x73,
+	0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62,
+	0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
+	0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x52, 0x05, 0x65, 0x78, 0x70, 0x72, 0x73, 0x22, 0x3b, 0x0a,
+	0x05, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x32, 0x0a, 0x04, 0x65, 0x78, 0x70, 0x72, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73,
+	0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e,
+	0x45, 0x78, 0x70, 0x72, 0x52, 0x04, 0x65, 0x78, 0x70, 0x72, 0x22, 0x8b, 0x01, 0x0a, 0x0b, 0x41,
+	0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x3f, 0x0a, 0x0b, 0x67, 0x72,
+	0x6f, 0x75, 0x70, 0x5f, 0x65, 0x78, 0x70, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32,
+	0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67,
+	0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x52,
+	0x0a, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x45, 0x78, 0x70, 0x72, 0x73, 0x12, 0x3b, 0x0a, 0x09, 0x61,
+	0x67, 0x67, 0x5f, 0x65, 0x78, 0x70, 0x72, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+	0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x52, 0x08,
+	0x61, 0x67, 0x67, 0x45, 0x78, 0x70, 0x72, 0x73, 0x22, 0x3b, 0x0a, 0x04, 0x45, 0x78, 0x70, 0x72,
+	0x12, 0x33, 0x0a, 0x03, 0x64, 0x65, 0x66, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e,
+	0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e,
+	0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x44, 0x65, 0x66,
+	0x52, 0x03, 0x64, 0x65, 0x66, 0x22, 0x84, 0x05, 0x0a, 0x07, 0x45, 0x78, 0x70, 0x72, 0x44, 0x65,
+	0x66, 0x12, 0x47, 0x0a, 0x0b, 0x62, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x5f, 0x65, 0x78, 0x70, 0x72,
+	0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62,
+	0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
+	0x31, 0x2e, 0x42, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x45, 0x78, 0x70, 0x72, 0x48, 0x00, 0x52, 0x0a,
+	0x62, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x45, 0x78, 0x70, 0x72, 0x12, 0x3a, 0x0a, 0x06, 0x63, 0x6f,
+	0x6c, 0x75, 0x6d, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x66, 0x72, 0x6f,
+	0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61,
+	0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x48, 0x00, 0x52, 0x06,
+	0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x3d, 0x0a, 0x07, 0x6c, 0x69, 0x74, 0x65, 0x72, 0x61,
+	0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
+	0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
+	0x61, 0x31, 0x2e, 0x4c, 0x69, 0x74, 0x65, 0x72, 0x61, 0x6c, 0x48, 0x00, 0x52, 0x07, 0x6c, 0x69,
+	0x74, 0x65, 0x72, 0x61, 0x6c, 0x12, 0x50, 0x0a, 0x0e, 0x64, 0x79, 0x6e, 0x61, 0x6d, 0x69, 0x63,
+	0x5f, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e,
+	0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e,
+	0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x44, 0x79, 0x6e, 0x61, 0x6d, 0x69, 0x63,
+	0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x48, 0x00, 0x52, 0x0d, 0x64, 0x79, 0x6e, 0x61, 0x6d, 0x69,
+	0x63, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x62, 0x0a, 0x14, 0x61, 0x67, 0x67, 0x72, 0x65,
+	0x67, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18,
+	0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
+	0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31,
+	0x2e, 0x41, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x46, 0x75, 0x6e, 0x63,
+	0x74, 0x69, 0x6f, 0x6e, 0x48, 0x00, 0x52, 0x13, 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74,
+	0x69, 0x6f, 0x6e, 0x46, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x37, 0x0a, 0x05, 0x61,
+	0x6c, 0x69, 0x61, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x66, 0x72, 0x6f,
+	0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61,
+	0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x41, 0x6c, 0x69, 0x61, 0x73, 0x48, 0x00, 0x52, 0x05, 0x61,
+	0x6c, 0x69, 0x61, 0x73, 0x12, 0x44, 0x0a, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e,
+	0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62,
+	0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
+	0x31, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x45, 0x78, 0x70, 0x72, 0x48, 0x00,
+	0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x41, 0x0a, 0x07, 0x63, 0x6f,
+	0x6e, 0x76, 0x65, 0x72, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x66, 0x72,
+	0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x45, 0x78,
+	0x70, 0x72, 0x48, 0x00, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x12, 0x32, 0x0a,
+	0x02, 0x69, 0x66, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x66, 0x72, 0x6f, 0x73,
+	0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c,
+	0x70, 0x68, 0x61, 0x31, 0x2e, 0x49, 0x66, 0x45, 0x78, 0x70, 0x72, 0x48, 0x00, 0x52, 0x02, 0x69,
+	0x66, 0x42, 0x09, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x22, 0xa4, 0x01, 0x0a,
+	0x0a, 0x42, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x45, 0x78, 0x70, 0x72, 0x12, 0x32, 0x0a, 0x04, 0x6c,
+	0x65, 0x66, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73,
+	0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c,
+	0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x52, 0x04, 0x6c, 0x65, 0x66, 0x74, 0x12,
+	0x34, 0x0a, 0x05, 0x72, 0x69, 0x67, 0x68, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+	0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x52, 0x05,
+	0x72, 0x69, 0x67, 0x68, 0x74, 0x12, 0x2c, 0x0a, 0x02, 0x6f, 0x70, 0x18, 0x03, 0x20, 0x01, 0x28,
+	0x0e, 0x32, 0x1c, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72,
+	0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x4f, 0x70, 0x52,
+	0x02, 0x6f, 0x70, 0x22, 0xae, 0x01, 0x0a, 0x06, 0x49, 0x66, 0x45, 0x78, 0x70, 0x72, 0x12, 0x3c,
+	0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72,
+	0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70,
+	0x72, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x04,
+	0x74, 0x68, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f,
+	0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61,
+	0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x52, 0x04, 0x74, 0x68, 0x65, 0x6e,
+	0x12, 0x32, 0x0a, 0x04, 0x65, 0x6c, 0x73, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+	0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x52, 0x04,
+	0x65, 0x6c, 0x73, 0x65, 0x22, 0x75, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x45,
+	0x78, 0x70, 0x72, 0x12, 0x32, 0x0a, 0x04, 0x65, 0x78, 0x70, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72,
+	0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70,
+	0x72, 0x52, 0x04, 0x65, 0x78, 0x70, 0x72, 0x12, 0x32, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18,
+	0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
+	0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31,
+	0x2e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0x1c, 0x0a, 0x06, 0x43,
+	0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20,
+	0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x4d, 0x0a, 0x07, 0x4c, 0x69, 0x74,
+	0x65, 0x72, 0x61, 0x6c, 0x12, 0x42, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
+	0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31,
+	0x2e, 0x4c, 0x69, 0x74, 0x65, 0x72, 0x61, 0x6c, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x52,
+	0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x22, 0x9d, 0x03, 0x0a, 0x0e, 0x4c, 0x69, 0x74,
+	0x65, 0x72, 0x61, 0x6c, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, 0x3f, 0x0a, 0x0a, 0x6e,
+	0x75, 0x6c, 0x6c, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32,
+	0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67,
+	0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x4e, 0x75, 0x6c, 0x6c, 0x48,
+	0x00, 0x52, 0x09, 0x6e, 0x75, 0x6c, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1f, 0x0a, 0x0a,
+	0x62, 0x6f, 0x6f, 0x6c, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08,
+	0x48, 0x00, 0x52, 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, 0x0a,
+	0x0b, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01,
+	0x28, 0x05, 0x48, 0x00, 0x52, 0x0a, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x56, 0x61, 0x6c, 0x75, 0x65,
+	0x12, 0x23, 0x0a, 0x0c, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65,
+	0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x00, 0x52, 0x0b, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32,
+	0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, 0x0a, 0x0b, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x76,
+	0x61, 0x6c, 0x75, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x0a, 0x69, 0x6e,
+	0x74, 0x36, 0x34, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x75, 0x69, 0x6e, 0x74,
+	0x36, 0x34, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00,
+	0x52, 0x0b, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, 0x0a,
+	0x0b, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x07, 0x20, 0x01,
+	0x28, 0x02, 0x48, 0x00, 0x52, 0x0a, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65,
+	0x12, 0x23, 0x0a, 0x0c, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65,
+	0x18, 0x08, 0x20, 0x01, 0x28, 0x01, 0x48, 0x00, 0x52, 0x0b, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+	0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x62, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x5f,
+	0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0c, 0x48, 0x00, 0x52, 0x0b, 0x62,
+	0x69, 0x6e, 0x61, 0x72, 0x79, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x73, 0x74,
+	0x72, 0x69, 0x6e, 0x67, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09,
+	0x48, 0x00, 0x52, 0x0b, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x42,
+	0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x06, 0x0a, 0x04, 0x4e, 0x75, 0x6c, 0x6c,
+	0x22, 0x4f, 0x0a, 0x05, 0x41, 0x6c, 0x69, 0x61, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d,
+	0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x32, 0x0a,
+	0x04, 0x65, 0x78, 0x70, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72,
+	0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x78, 0x70, 0x72, 0x52, 0x04, 0x65, 0x78, 0x70,
+	0x72, 0x22, 0x23, 0x0a, 0x0d, 0x44, 0x79, 0x6e, 0x61, 0x6d, 0x69, 0x63, 0x43, 0x6f, 0x6c, 0x75,
+	0x6d, 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x9f, 0x02, 0x0a, 0x13, 0x41, 0x67, 0x67, 0x72, 0x65,
+	0x67, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x46, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x46,
+	0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x32, 0x2e, 0x66,
+	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76,
+	0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x41, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74,
+	0x69, 0x6f, 0x6e, 0x46, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x54, 0x79, 0x70, 0x65,
+	0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x32, 0x0a, 0x04, 0x65, 0x78, 0x70, 0x72, 0x18, 0x02,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73,
+	0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e,
+	0x45, 0x78, 0x70, 0x72, 0x52, 0x04, 0x65, 0x78, 0x70, 0x72, 0x22, 0x8b, 0x01, 0x0a, 0x04, 0x54,
+	0x79, 0x70, 0x65, 0x12, 0x1c, 0x0a, 0x18, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e,
+	0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10,
+	0x00, 0x12, 0x0c, 0x0a, 0x08, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x55, 0x4d, 0x10, 0x01, 0x12,
+	0x0c, 0x0a, 0x08, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4d, 0x49, 0x4e, 0x10, 0x02, 0x12, 0x0c, 0x0a,
+	0x08, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4d, 0x41, 0x58, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x54,
+	0x59, 0x50, 0x45, 0x5f, 0x43, 0x4f, 0x55, 0x4e, 0x54, 0x10, 0x04, 0x12, 0x0c, 0x0a, 0x08, 0x54,
+	0x59, 0x50, 0x45, 0x5f, 0x41, 0x56, 0x47, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50,
+	0x45, 0x5f, 0x55, 0x4e, 0x49, 0x51, 0x55, 0x45, 0x10, 0x06, 0x12, 0x0c, 0x0a, 0x08, 0x54, 0x59,
+	0x50, 0x45, 0x5f, 0x41, 0x4e, 0x44, 0x10, 0x07, 0x22, 0x32, 0x0a, 0x0c, 0x44, 0x75, 0x72, 0x61,
+	0x74, 0x69, 0x6f, 0x6e, 0x45, 0x78, 0x70, 0x72, 0x12, 0x22, 0x0a, 0x0c, 0x6d, 0x69, 0x6c, 0x6c,
+	0x69, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c,
+	0x6d, 0x69, 0x6c, 0x6c, 0x69, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x2a, 0x85, 0x02, 0x0a,
+	0x02, 0x4f, 0x70, 0x12, 0x1a, 0x0a, 0x16, 0x4f, 0x50, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57,
+	0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12,
+	0x09, 0x0a, 0x05, 0x4f, 0x50, 0x5f, 0x45, 0x51, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x4f, 0x50,
+	0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x45, 0x51, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x4f, 0x50, 0x5f,
+	0x4c, 0x54, 0x10, 0x03, 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50, 0x5f, 0x4c, 0x54, 0x5f, 0x45, 0x51,
+	0x10, 0x04, 0x12, 0x09, 0x0a, 0x05, 0x4f, 0x50, 0x5f, 0x47, 0x54, 0x10, 0x05, 0x12, 0x0c, 0x0a,
+	0x08, 0x4f, 0x50, 0x5f, 0x47, 0x54, 0x5f, 0x45, 0x51, 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x4f,
+	0x50, 0x5f, 0x52, 0x45, 0x47, 0x45, 0x58, 0x5f, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, 0x07, 0x12,
+	0x16, 0x0a, 0x12, 0x4f, 0x50, 0x5f, 0x52, 0x45, 0x47, 0x45, 0x58, 0x5f, 0x4e, 0x4f, 0x54, 0x5f,
+	0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, 0x08, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x50, 0x5f, 0x41, 0x4e,
+	0x44, 0x10, 0x09, 0x12, 0x09, 0x0a, 0x05, 0x4f, 0x50, 0x5f, 0x4f, 0x52, 0x10, 0x0a, 0x12, 0x0a,
+	0x0a, 0x06, 0x4f, 0x50, 0x5f, 0x41, 0x44, 0x44, 0x10, 0x0b, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x50,
+	0x5f, 0x53, 0x55, 0x42, 0x10, 0x0c, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x50, 0x5f, 0x4d, 0x55, 0x4c,
+	0x10, 0x0d, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x50, 0x5f, 0x44, 0x49, 0x56, 0x10, 0x0e, 0x12, 0x0f,
+	0x0a, 0x0b, 0x4f, 0x50, 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x41, 0x49, 0x4e, 0x53, 0x10, 0x0f, 0x12,
+	0x13, 0x0a, 0x0f, 0x4f, 0x50, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x41, 0x49,
+	0x4e, 0x53, 0x10, 0x10, 0x2a, 0x36, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1c, 0x0a, 0x18,
+	0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53,
+	0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x59,
+	0x50, 0x45, 0x5f, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x36, 0x34, 0x10, 0x01, 0x32, 0x6e, 0x0a, 0x0e,
+	0x46, 0x72, 0x6f, 0x73, 0x74, 0x44, 0x42, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x5c,
+	0x0a, 0x05, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x26, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
+	0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
+	0x61, 0x31, 0x2e, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a,
+	0x27, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67,
+	0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x51, 0x75, 0x65, 0x72, 0x79,
+	0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x85, 0x02, 0x0a,
+	0x1c, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x74, 0x6f,
+	0x72, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x42, 0x0c, 0x53,
+	0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x55, 0x67,
+	0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c, 0x61, 0x72, 0x73,
+	0x69, 0x67, 0x6e, 0x61, 0x6c, 0x73, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x67,
+	0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x72, 0x6f, 0x73,
+	0x74, 0x64, 0x62, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2f, 0x76, 0x31, 0x61, 0x6c,
+	0x70, 0x68, 0x61, 0x31, 0x3b, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x31, 0x61, 0x6c,
+	0x70, 0x68, 0x61, 0x31, 0xa2, 0x02, 0x03, 0x46, 0x53, 0x58, 0xaa, 0x02, 0x18, 0x46, 0x72, 0x6f,
+	0x73, 0x74, 0x64, 0x62, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x56, 0x31, 0x61,
+	0x6c, 0x70, 0x68, 0x61, 0x31, 0xca, 0x02, 0x18, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c,
+	0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31,
+	0xe2, 0x02, 0x24, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x53, 0x74, 0x6f, 0x72, 0x61,
+	0x67, 0x65, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x5c, 0x47, 0x50, 0x42, 0x4d,
+	0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x1a, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64,
+	0x62, 0x3a, 0x3a, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x3a, 0x3a, 0x56, 0x31, 0x61, 0x6c,
+	0x70, 0x68, 0x61, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+}
+
+var (
+	file_frostdb_storage_v1alpha1_storage_proto_rawDescOnce sync.Once
+	file_frostdb_storage_v1alpha1_storage_proto_rawDescData = file_frostdb_storage_v1alpha1_storage_proto_rawDesc
+)
+
+func file_frostdb_storage_v1alpha1_storage_proto_rawDescGZIP() []byte {
+	file_frostdb_storage_v1alpha1_storage_proto_rawDescOnce.Do(func() {
+		file_frostdb_storage_v1alpha1_storage_proto_rawDescData = protoimpl.X.CompressGZIP(file_frostdb_storage_v1alpha1_storage_proto_rawDescData)
+	})
+	return file_frostdb_storage_v1alpha1_storage_proto_rawDescData
+}
+
+var file_frostdb_storage_v1alpha1_storage_proto_enumTypes = make([]protoimpl.EnumInfo, 3)
+var file_frostdb_storage_v1alpha1_storage_proto_msgTypes = make([]protoimpl.MessageInfo, 25)
+var file_frostdb_storage_v1alpha1_storage_proto_goTypes = []any{
+	(Op)(0),                       // 0: frostdb.storage.v1alpha1.Op
+	(Type)(0),                     // 1: frostdb.storage.v1alpha1.Type
+	(AggregationFunction_Type)(0), // 2: frostdb.storage.v1alpha1.AggregationFunction.Type
+	(*QueryRequest)(nil),          // 3: frostdb.storage.v1alpha1.QueryRequest
+	(*QueryResponse)(nil),         // 4: frostdb.storage.v1alpha1.QueryResponse
+	(*PlanNode)(nil),              // 5: frostdb.storage.v1alpha1.PlanNode
+	(*PlanNodeSpec)(nil),          // 6: frostdb.storage.v1alpha1.PlanNodeSpec
+	(*TableScan)(nil),             // 7: frostdb.storage.v1alpha1.TableScan
+	(*SchemaScan)(nil),            // 8: frostdb.storage.v1alpha1.SchemaScan
+	(*ScanBase)(nil),              // 9: frostdb.storage.v1alpha1.ScanBase
+	(*Filter)(nil),                // 10: frostdb.storage.v1alpha1.Filter
+	(*Distinct)(nil),              // 11: frostdb.storage.v1alpha1.Distinct
+	(*Projection)(nil),            // 12: frostdb.storage.v1alpha1.Projection
+	(*Limit)(nil),                 // 13: frostdb.storage.v1alpha1.Limit
+	(*Aggregation)(nil),           // 14: frostdb.storage.v1alpha1.Aggregation
+	(*Expr)(nil),                  // 15: frostdb.storage.v1alpha1.Expr
+	(*ExprDef)(nil),               // 16: frostdb.storage.v1alpha1.ExprDef
+	(*BinaryExpr)(nil),            // 17: frostdb.storage.v1alpha1.BinaryExpr
+	(*IfExpr)(nil),                // 18: frostdb.storage.v1alpha1.IfExpr
+	(*ConvertExpr)(nil),           // 19: frostdb.storage.v1alpha1.ConvertExpr
+	(*Column)(nil),                // 20: frostdb.storage.v1alpha1.Column
+	(*Literal)(nil),               // 21: frostdb.storage.v1alpha1.Literal
+	(*LiteralContent)(nil),        // 22: frostdb.storage.v1alpha1.LiteralContent
+	(*Null)(nil),                  // 23: frostdb.storage.v1alpha1.Null
+	(*Alias)(nil),                 // 24: frostdb.storage.v1alpha1.Alias
+	(*DynamicColumn)(nil),         // 25: frostdb.storage.v1alpha1.DynamicColumn
+	(*AggregationFunction)(nil),   // 26: frostdb.storage.v1alpha1.AggregationFunction
+	(*DurationExpr)(nil),          // 27: frostdb.storage.v1alpha1.DurationExpr
+}
+var file_frostdb_storage_v1alpha1_storage_proto_depIdxs = []int32{
+	5,  // 0: frostdb.storage.v1alpha1.QueryRequest.plan_root:type_name -> frostdb.storage.v1alpha1.PlanNode
+	5,  // 1: frostdb.storage.v1alpha1.PlanNode.next:type_name -> frostdb.storage.v1alpha1.PlanNode
+	6,  // 2: frostdb.storage.v1alpha1.PlanNode.spec:type_name -> frostdb.storage.v1alpha1.PlanNodeSpec
+	7,  // 3: frostdb.storage.v1alpha1.PlanNodeSpec.table_scan:type_name -> frostdb.storage.v1alpha1.TableScan
+	8,  // 4: frostdb.storage.v1alpha1.PlanNodeSpec.schema_scan:type_name -> frostdb.storage.v1alpha1.SchemaScan
+	10, // 5: frostdb.storage.v1alpha1.PlanNodeSpec.filter:type_name -> frostdb.storage.v1alpha1.Filter
+	12, // 6: frostdb.storage.v1alpha1.PlanNodeSpec.projection:type_name -> frostdb.storage.v1alpha1.Projection
+	11, // 7: frostdb.storage.v1alpha1.PlanNodeSpec.distinct:type_name -> frostdb.storage.v1alpha1.Distinct
+	14, // 8: frostdb.storage.v1alpha1.PlanNodeSpec.aggregation:type_name -> frostdb.storage.v1alpha1.Aggregation
+	13, // 9: frostdb.storage.v1alpha1.PlanNodeSpec.limit:type_name -> frostdb.storage.v1alpha1.Limit
+	9,  // 10: frostdb.storage.v1alpha1.TableScan.base:type_name -> frostdb.storage.v1alpha1.ScanBase
+	9,  // 11: frostdb.storage.v1alpha1.SchemaScan.base:type_name -> frostdb.storage.v1alpha1.ScanBase
+	15, // 12: frostdb.storage.v1alpha1.Filter.expr:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 13: frostdb.storage.v1alpha1.Distinct.exprs:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 14: frostdb.storage.v1alpha1.Projection.exprs:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 15: frostdb.storage.v1alpha1.Limit.expr:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 16: frostdb.storage.v1alpha1.Aggregation.group_exprs:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 17: frostdb.storage.v1alpha1.Aggregation.agg_exprs:type_name -> frostdb.storage.v1alpha1.Expr
+	16, // 18: frostdb.storage.v1alpha1.Expr.def:type_name -> frostdb.storage.v1alpha1.ExprDef
+	17, // 19: frostdb.storage.v1alpha1.ExprDef.binary_expr:type_name -> frostdb.storage.v1alpha1.BinaryExpr
+	20, // 20: frostdb.storage.v1alpha1.ExprDef.column:type_name -> frostdb.storage.v1alpha1.Column
+	21, // 21: frostdb.storage.v1alpha1.ExprDef.literal:type_name -> frostdb.storage.v1alpha1.Literal
+	25, // 22: frostdb.storage.v1alpha1.ExprDef.dynamic_column:type_name -> frostdb.storage.v1alpha1.DynamicColumn
+	26, // 23: frostdb.storage.v1alpha1.ExprDef.aggregation_function:type_name -> frostdb.storage.v1alpha1.AggregationFunction
+	24, // 24: frostdb.storage.v1alpha1.ExprDef.alias:type_name -> frostdb.storage.v1alpha1.Alias
+	27, // 25: frostdb.storage.v1alpha1.ExprDef.duration:type_name -> frostdb.storage.v1alpha1.DurationExpr
+	19, // 26: frostdb.storage.v1alpha1.ExprDef.convert:type_name -> frostdb.storage.v1alpha1.ConvertExpr
+	18, // 27: frostdb.storage.v1alpha1.ExprDef.if:type_name -> frostdb.storage.v1alpha1.IfExpr
+	15, // 28: frostdb.storage.v1alpha1.BinaryExpr.left:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 29: frostdb.storage.v1alpha1.BinaryExpr.right:type_name -> frostdb.storage.v1alpha1.Expr
+	0,  // 30: frostdb.storage.v1alpha1.BinaryExpr.op:type_name -> frostdb.storage.v1alpha1.Op
+	15, // 31: frostdb.storage.v1alpha1.IfExpr.condition:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 32: frostdb.storage.v1alpha1.IfExpr.then:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 33: frostdb.storage.v1alpha1.IfExpr.else:type_name -> frostdb.storage.v1alpha1.Expr
+	15, // 34: frostdb.storage.v1alpha1.ConvertExpr.expr:type_name -> frostdb.storage.v1alpha1.Expr
+	1,  // 35: frostdb.storage.v1alpha1.ConvertExpr.type:type_name -> frostdb.storage.v1alpha1.Type
+	22, // 36: frostdb.storage.v1alpha1.Literal.content:type_name -> frostdb.storage.v1alpha1.LiteralContent
+	23, // 37: frostdb.storage.v1alpha1.LiteralContent.null_value:type_name -> frostdb.storage.v1alpha1.Null
+	15, // 38: frostdb.storage.v1alpha1.Alias.expr:type_name -> frostdb.storage.v1alpha1.Expr
+	2,  // 39: frostdb.storage.v1alpha1.AggregationFunction.type:type_name -> frostdb.storage.v1alpha1.AggregationFunction.Type
+	15, // 40: frostdb.storage.v1alpha1.AggregationFunction.expr:type_name -> frostdb.storage.v1alpha1.Expr
+	3,  // 41: frostdb.storage.v1alpha1.FrostDBService.Query:input_type -> frostdb.storage.v1alpha1.QueryRequest
+	4,  // 42: frostdb.storage.v1alpha1.FrostDBService.Query:output_type -> frostdb.storage.v1alpha1.QueryResponse
+	42, // [42:43] is the sub-list for method output_type
+	41, // [41:42] is the sub-list for method input_type
+	41, // [41:41] is the sub-list for extension type_name
+	41, // [41:41] is the sub-list for extension extendee
+	0,  // [0:41] is the sub-list for field type_name
+}
+
+func init() { file_frostdb_storage_v1alpha1_storage_proto_init() }
+func file_frostdb_storage_v1alpha1_storage_proto_init() {
+	if File_frostdb_storage_v1alpha1_storage_proto != nil {
+		return
+	}
+	if !protoimpl.UnsafeEnabled {
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[0].Exporter = func(v any, i int) any {
+			switch v := v.(*QueryRequest); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[1].Exporter = func(v any, i int) any {
+			switch v := v.(*QueryResponse); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[2].Exporter = func(v any, i int) any {
+			switch v := v.(*PlanNode); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[3].Exporter = func(v any, i int) any {
+			switch v := v.(*PlanNodeSpec); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[4].Exporter = func(v any, i int) any {
+			switch v := v.(*TableScan); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[5].Exporter = func(v any, i int) any {
+			switch v := v.(*SchemaScan); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[6].Exporter = func(v any, i int) any {
+			switch v := v.(*ScanBase); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[7].Exporter = func(v any, i int) any {
+			switch v := v.(*Filter); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[8].Exporter = func(v any, i int) any {
+			switch v := v.(*Distinct); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[9].Exporter = func(v any, i int) any {
+			switch v := v.(*Projection); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[10].Exporter = func(v any, i int) any {
+			switch v := v.(*Limit); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[11].Exporter = func(v any, i int) any {
+			switch v := v.(*Aggregation); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[12].Exporter = func(v any, i int) any {
+			switch v := v.(*Expr); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[13].Exporter = func(v any, i int) any {
+			switch v := v.(*ExprDef); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[14].Exporter = func(v any, i int) any {
+			switch v := v.(*BinaryExpr); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[15].Exporter = func(v any, i int) any {
+			switch v := v.(*IfExpr); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[16].Exporter = func(v any, i int) any {
+			switch v := v.(*ConvertExpr); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[17].Exporter = func(v any, i int) any {
+			switch v := v.(*Column); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[18].Exporter = func(v any, i int) any {
+			switch v := v.(*Literal); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[19].Exporter = func(v any, i int) any {
+			switch v := v.(*LiteralContent); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[20].Exporter = func(v any, i int) any {
+			switch v := v.(*Null); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[21].Exporter = func(v any, i int) any {
+			switch v := v.(*Alias); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[22].Exporter = func(v any, i int) any {
+			switch v := v.(*DynamicColumn); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[23].Exporter = func(v any, i int) any {
+			switch v := v.(*AggregationFunction); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_frostdb_storage_v1alpha1_storage_proto_msgTypes[24].Exporter = func(v any, i int) any {
+			switch v := v.(*DurationExpr); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+	}
+	file_frostdb_storage_v1alpha1_storage_proto_msgTypes[3].OneofWrappers = []any{
+		(*PlanNodeSpec_TableScan)(nil),
+		(*PlanNodeSpec_SchemaScan)(nil),
+		(*PlanNodeSpec_Filter)(nil),
+		(*PlanNodeSpec_Projection)(nil),
+		(*PlanNodeSpec_Distinct)(nil),
+		(*PlanNodeSpec_Aggregation)(nil),
+		(*PlanNodeSpec_Limit)(nil),
+	}
+	file_frostdb_storage_v1alpha1_storage_proto_msgTypes[13].OneofWrappers = []any{
+		(*ExprDef_BinaryExpr)(nil),
+		(*ExprDef_Column)(nil),
+		(*ExprDef_Literal)(nil),
+		(*ExprDef_DynamicColumn)(nil),
+		(*ExprDef_AggregationFunction)(nil),
+		(*ExprDef_Alias)(nil),
+		(*ExprDef_Duration)(nil),
+		(*ExprDef_Convert)(nil),
+		(*ExprDef_If)(nil),
+	}
+	file_frostdb_storage_v1alpha1_storage_proto_msgTypes[19].OneofWrappers = []any{
+		(*LiteralContent_NullValue)(nil),
+		(*LiteralContent_BoolValue)(nil),
+		(*LiteralContent_Int32Value)(nil),
+		(*LiteralContent_Uint32Value)(nil),
+		(*LiteralContent_Int64Value)(nil),
+		(*LiteralContent_Uint64Value)(nil),
+		(*LiteralContent_FloatValue)(nil),
+		(*LiteralContent_DoubleValue)(nil),
+		(*LiteralContent_BinaryValue)(nil),
+		(*LiteralContent_StringValue)(nil),
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: file_frostdb_storage_v1alpha1_storage_proto_rawDesc,
+			NumEnums:      3,
+			NumMessages:   25,
+			NumExtensions: 0,
+			NumServices:   1,
+		},
+		GoTypes:           file_frostdb_storage_v1alpha1_storage_proto_goTypes,
+		DependencyIndexes: file_frostdb_storage_v1alpha1_storage_proto_depIdxs,
+		EnumInfos:         file_frostdb_storage_v1alpha1_storage_proto_enumTypes,
+		MessageInfos:      file_frostdb_storage_v1alpha1_storage_proto_msgTypes,
+	}.Build()
+	File_frostdb_storage_v1alpha1_storage_proto = out.File
+	file_frostdb_storage_v1alpha1_storage_proto_rawDesc = nil
+	file_frostdb_storage_v1alpha1_storage_proto_goTypes = nil
+	file_frostdb_storage_v1alpha1_storage_proto_depIdxs = nil
+}
diff --git a/gen/proto/go/frostdb/storage/v1alpha1/storage_vtproto.pb.go b/gen/proto/go/frostdb/storage/v1alpha1/storage_vtproto.pb.go
new file mode 100644
index 000000000..29c15c46e
--- /dev/null
+++ b/gen/proto/go/frostdb/storage/v1alpha1/storage_vtproto.pb.go
@@ -0,0 +1,5617 @@
+// Code generated by protoc-gen-go-vtproto. DO NOT EDIT.
+// protoc-gen-go-vtproto version: v0.6.0
+// source: frostdb/storage/v1alpha1/storage.proto
+
+package storagev1alpha1
+
+import (
+	context "context"
+	binary "encoding/binary"
+	fmt "fmt"
+	protohelpers "github.com/planetscale/vtprotobuf/protohelpers"
+	grpc "google.golang.org/grpc"
+	codes "google.golang.org/grpc/codes"
+	status "google.golang.org/grpc/status"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	io "io"
+	math "math"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the grpc package it is being compiled against.
+// Requires gRPC-Go v1.32.0 or later.
+const _ = grpc.SupportPackageIsVersion7
+
+// FrostDBServiceClient is the client API for FrostDBService service.
+//
+// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
+type FrostDBServiceClient interface {
+	// Query executes the query plan contained in the request and returns back
+	// the results as arrow records in IPC format.
+	Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (FrostDBService_QueryClient, error)
+}
+
+type frostDBServiceClient struct {
+	cc grpc.ClientConnInterface
+}
+
+func NewFrostDBServiceClient(cc grpc.ClientConnInterface) FrostDBServiceClient {
+	return &frostDBServiceClient{cc}
+}
+
+func (c *frostDBServiceClient) Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (FrostDBService_QueryClient, error) {
+	stream, err := c.cc.NewStream(ctx, &FrostDBService_ServiceDesc.Streams[0], "/frostdb.storage.v1alpha1.FrostDBService/Query", opts...)
+	if err != nil {
+		return nil, err
+	}
+	x := &frostDBServiceQueryClient{stream}
+	if err := x.ClientStream.SendMsg(in); err != nil {
+		return nil, err
+	}
+	if err := x.ClientStream.CloseSend(); err != nil {
+		return nil, err
+	}
+	return x, nil
+}
+
+type FrostDBService_QueryClient interface {
+	Recv() (*QueryResponse, error)
+	grpc.ClientStream
+}
+
+type frostDBServiceQueryClient struct {
+	grpc.ClientStream
+}
+
+func (x *frostDBServiceQueryClient) Recv() (*QueryResponse, error) {
+	m := new(QueryResponse)
+	if err := x.ClientStream.RecvMsg(m); err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+// FrostDBServiceServer is the server API for FrostDBService service.
+// All implementations must embed UnimplementedFrostDBServiceServer
+// for forward compatibility
+type FrostDBServiceServer interface {
+	// Query executes the query plan contained in the request and returns back
+	// the results as arrow records in IPC format.
+	Query(*QueryRequest, FrostDBService_QueryServer) error
+	mustEmbedUnimplementedFrostDBServiceServer()
+}
+
+// UnimplementedFrostDBServiceServer must be embedded to have forward compatible implementations.
+type UnimplementedFrostDBServiceServer struct {
+}
+
+func (UnimplementedFrostDBServiceServer) Query(*QueryRequest, FrostDBService_QueryServer) error {
+	return status.Errorf(codes.Unimplemented, "method Query not implemented")
+}
+func (UnimplementedFrostDBServiceServer) mustEmbedUnimplementedFrostDBServiceServer() {}
+
+// UnsafeFrostDBServiceServer may be embedded to opt out of forward compatibility for this service.
+// Use of this interface is not recommended, as added methods to FrostDBServiceServer will
+// result in compilation errors.
+type UnsafeFrostDBServiceServer interface {
+	mustEmbedUnimplementedFrostDBServiceServer()
+}
+
+func RegisterFrostDBServiceServer(s grpc.ServiceRegistrar, srv FrostDBServiceServer) {
+	s.RegisterService(&FrostDBService_ServiceDesc, srv)
+}
+
+func _FrostDBService_Query_Handler(srv interface{}, stream grpc.ServerStream) error {
+	m := new(QueryRequest)
+	if err := stream.RecvMsg(m); err != nil {
+		return err
+	}
+	return srv.(FrostDBServiceServer).Query(m, &frostDBServiceQueryServer{stream})
+}
+
+type FrostDBService_QueryServer interface {
+	Send(*QueryResponse) error
+	grpc.ServerStream
+}
+
+type frostDBServiceQueryServer struct {
+	grpc.ServerStream
+}
+
+func (x *frostDBServiceQueryServer) Send(m *QueryResponse) error {
+	return x.ServerStream.SendMsg(m)
+}
+
+// FrostDBService_ServiceDesc is the grpc.ServiceDesc for FrostDBService service.
+// It's only intended for direct use with grpc.RegisterService,
+// and not to be introspected or modified (even as a copy)
+var FrostDBService_ServiceDesc = grpc.ServiceDesc{
+	ServiceName: "frostdb.storage.v1alpha1.FrostDBService",
+	HandlerType: (*FrostDBServiceServer)(nil),
+	Methods:     []grpc.MethodDesc{},
+	Streams: []grpc.StreamDesc{
+		{
+			StreamName:    "Query",
+			Handler:       _FrostDBService_Query_Handler,
+			ServerStreams: true,
+		},
+	},
+	Metadata: "frostdb/storage/v1alpha1/storage.proto",
+}
+
+func (m *QueryRequest) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *QueryRequest) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *QueryRequest) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.Description) > 0 {
+		i -= len(m.Description)
+		copy(dAtA[i:], m.Description)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Description)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if m.PlanRoot != nil {
+		size, err := m.PlanRoot.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *QueryResponse) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *QueryResponse) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *QueryResponse) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.Record) > 0 {
+		i -= len(m.Record)
+		copy(dAtA[i:], m.Record)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Record)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *PlanNode) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *PlanNode) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNode) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Spec != nil {
+		size, err := m.Spec.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	if m.Next != nil {
+		size, err := m.Next.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *PlanNodeSpec) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *PlanNodeSpec) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNodeSpec) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if vtmsg, ok := m.Spec.(interface {
+		MarshalToSizedBufferVT([]byte) (int, error)
+	}); ok {
+		size, err := vtmsg.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *PlanNodeSpec_TableScan) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNodeSpec_TableScan) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.TableScan != nil {
+		size, err := m.TableScan.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+func (m *PlanNodeSpec_SchemaScan) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNodeSpec_SchemaScan) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.SchemaScan != nil {
+		size, err := m.SchemaScan.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	return len(dAtA) - i, nil
+}
+func (m *PlanNodeSpec_Filter) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNodeSpec_Filter) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Filter != nil {
+		size, err := m.Filter.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x1a
+	}
+	return len(dAtA) - i, nil
+}
+func (m *PlanNodeSpec_Projection) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNodeSpec_Projection) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Projection != nil {
+		size, err := m.Projection.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x22
+	}
+	return len(dAtA) - i, nil
+}
+func (m *PlanNodeSpec_Distinct) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNodeSpec_Distinct) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Distinct != nil {
+		size, err := m.Distinct.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x2a
+	}
+	return len(dAtA) - i, nil
+}
+func (m *PlanNodeSpec_Aggregation) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNodeSpec_Aggregation) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Aggregation != nil {
+		size, err := m.Aggregation.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x32
+	}
+	return len(dAtA) - i, nil
+}
+func (m *PlanNodeSpec_Limit) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *PlanNodeSpec_Limit) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Limit != nil {
+		size, err := m.Limit.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x3a
+	}
+	return len(dAtA) - i, nil
+}
+func (m *TableScan) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *TableScan) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *TableScan) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Base != nil {
+		size, err := m.Base.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *SchemaScan) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *SchemaScan) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *SchemaScan) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Base != nil {
+		size, err := m.Base.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *ScanBase) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ScanBase) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ScanBase) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.Table) > 0 {
+		i -= len(m.Table)
+		copy(dAtA[i:], m.Table)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Table)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.Database) > 0 {
+		i -= len(m.Database)
+		copy(dAtA[i:], m.Database)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Database)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Filter) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Filter) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Filter) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Expr != nil {
+		size, err := m.Expr.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Distinct) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Distinct) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Distinct) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.Exprs) > 0 {
+		for iNdEx := len(m.Exprs) - 1; iNdEx >= 0; iNdEx-- {
+			size, err := m.Exprs[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Projection) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Projection) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Projection) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.Exprs) > 0 {
+		for iNdEx := len(m.Exprs) - 1; iNdEx >= 0; iNdEx-- {
+			size, err := m.Exprs[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Limit) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Limit) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Limit) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Expr != nil {
+		size, err := m.Expr.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Aggregation) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Aggregation) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Aggregation) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.AggExprs) > 0 {
+		for iNdEx := len(m.AggExprs) - 1; iNdEx >= 0; iNdEx-- {
+			size, err := m.AggExprs[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+			i--
+			dAtA[i] = 0x12
+		}
+	}
+	if len(m.GroupExprs) > 0 {
+		for iNdEx := len(m.GroupExprs) - 1; iNdEx >= 0; iNdEx-- {
+			size, err := m.GroupExprs[iNdEx].MarshalToSizedBufferVT(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Expr) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Expr) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Expr) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Def != nil {
+		size, err := m.Def.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *ExprDef) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ExprDef) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if vtmsg, ok := m.Content.(interface {
+		MarshalToSizedBufferVT([]byte) (int, error)
+	}); ok {
+		size, err := vtmsg.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *ExprDef_BinaryExpr) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_BinaryExpr) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.BinaryExpr != nil {
+		size, err := m.BinaryExpr.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+func (m *ExprDef_Column) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_Column) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Column != nil {
+		size, err := m.Column.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	return len(dAtA) - i, nil
+}
+func (m *ExprDef_Literal) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_Literal) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Literal != nil {
+		size, err := m.Literal.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x1a
+	}
+	return len(dAtA) - i, nil
+}
+func (m *ExprDef_DynamicColumn) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_DynamicColumn) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.DynamicColumn != nil {
+		size, err := m.DynamicColumn.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x22
+	}
+	return len(dAtA) - i, nil
+}
+func (m *ExprDef_AggregationFunction) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_AggregationFunction) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.AggregationFunction != nil {
+		size, err := m.AggregationFunction.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x2a
+	}
+	return len(dAtA) - i, nil
+}
+func (m *ExprDef_Alias) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_Alias) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Alias != nil {
+		size, err := m.Alias.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x32
+	}
+	return len(dAtA) - i, nil
+}
+func (m *ExprDef_Duration) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_Duration) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Duration != nil {
+		size, err := m.Duration.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x3a
+	}
+	return len(dAtA) - i, nil
+}
+func (m *ExprDef_Convert) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_Convert) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Convert != nil {
+		size, err := m.Convert.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x42
+	}
+	return len(dAtA) - i, nil
+}
+func (m *ExprDef_If) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ExprDef_If) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.If != nil {
+		size, err := m.If.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x4a
+	}
+	return len(dAtA) - i, nil
+}
+func (m *BinaryExpr) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *BinaryExpr) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *BinaryExpr) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Op != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Op))
+		i--
+		dAtA[i] = 0x18
+	}
+	if m.Right != nil {
+		size, err := m.Right.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	if m.Left != nil {
+		size, err := m.Left.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *IfExpr) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *IfExpr) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *IfExpr) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Else != nil {
+		size, err := m.Else.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x1a
+	}
+	if m.Then != nil {
+		size, err := m.Then.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	if m.Condition != nil {
+		size, err := m.Condition.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *ConvertExpr) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ConvertExpr) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *ConvertExpr) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Type != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Type))
+		i--
+		dAtA[i] = 0x10
+	}
+	if m.Expr != nil {
+		size, err := m.Expr.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Column) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Column) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Column) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.Name) > 0 {
+		i -= len(m.Name)
+		copy(dAtA[i:], m.Name)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Literal) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Literal) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Literal) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Content != nil {
+		size, err := m.Content.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *LiteralContent) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *LiteralContent) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if vtmsg, ok := m.Value.(interface {
+		MarshalToSizedBufferVT([]byte) (int, error)
+	}); ok {
+		size, err := vtmsg.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *LiteralContent_NullValue) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_NullValue) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.NullValue != nil {
+		size, err := m.NullValue.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_BoolValue) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_BoolValue) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i--
+	if m.BoolValue {
+		dAtA[i] = 1
+	} else {
+		dAtA[i] = 0
+	}
+	i--
+	dAtA[i] = 0x10
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_Int32Value) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_Int32Value) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Int32Value))
+	i--
+	dAtA[i] = 0x18
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_Uint32Value) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_Uint32Value) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Uint32Value))
+	i--
+	dAtA[i] = 0x20
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_Int64Value) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_Int64Value) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Int64Value))
+	i--
+	dAtA[i] = 0x28
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_Uint64Value) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_Uint64Value) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Uint64Value))
+	i--
+	dAtA[i] = 0x30
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_FloatValue) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_FloatValue) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i -= 4
+	binary.LittleEndian.PutUint32(dAtA[i:], uint32(math.Float32bits(float32(m.FloatValue))))
+	i--
+	dAtA[i] = 0x3d
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_DoubleValue) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_DoubleValue) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i -= 8
+	binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.DoubleValue))))
+	i--
+	dAtA[i] = 0x41
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_BinaryValue) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_BinaryValue) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i -= len(m.BinaryValue)
+	copy(dAtA[i:], m.BinaryValue)
+	i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.BinaryValue)))
+	i--
+	dAtA[i] = 0x4a
+	return len(dAtA) - i, nil
+}
+func (m *LiteralContent_StringValue) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *LiteralContent_StringValue) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	i -= len(m.StringValue)
+	copy(dAtA[i:], m.StringValue)
+	i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.StringValue)))
+	i--
+	dAtA[i] = 0x52
+	return len(dAtA) - i, nil
+}
+func (m *Null) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Null) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Null) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Alias) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Alias) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Alias) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Expr != nil {
+		size, err := m.Expr.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.Name) > 0 {
+		i -= len(m.Name)
+		copy(dAtA[i:], m.Name)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *DynamicColumn) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *DynamicColumn) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *DynamicColumn) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if len(m.Name) > 0 {
+		i -= len(m.Name)
+		copy(dAtA[i:], m.Name)
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Name)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *AggregationFunction) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *AggregationFunction) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *AggregationFunction) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Expr != nil {
+		size, err := m.Expr.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	if m.Type != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Type))
+		i--
+		dAtA[i] = 0x8
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *DurationExpr) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *DurationExpr) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *DurationExpr) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Milliseconds != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Milliseconds))
+		i--
+		dAtA[i] = 0x8
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *QueryRequest) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.PlanRoot != nil {
+		l = m.PlanRoot.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	l = len(m.Description)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *QueryResponse) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Record)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *PlanNode) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Next != nil {
+		l = m.Next.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Spec != nil {
+		l = m.Spec.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *PlanNodeSpec) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if vtmsg, ok := m.Spec.(interface{ SizeVT() int }); ok {
+		n += vtmsg.SizeVT()
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *PlanNodeSpec_TableScan) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.TableScan != nil {
+		l = m.TableScan.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *PlanNodeSpec_SchemaScan) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.SchemaScan != nil {
+		l = m.SchemaScan.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *PlanNodeSpec_Filter) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Filter != nil {
+		l = m.Filter.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *PlanNodeSpec_Projection) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Projection != nil {
+		l = m.Projection.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *PlanNodeSpec_Distinct) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Distinct != nil {
+		l = m.Distinct.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *PlanNodeSpec_Aggregation) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Aggregation != nil {
+		l = m.Aggregation.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *PlanNodeSpec_Limit) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Limit != nil {
+		l = m.Limit.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *TableScan) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Base != nil {
+		l = m.Base.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *SchemaScan) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Base != nil {
+		l = m.Base.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *ScanBase) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Database)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	l = len(m.Table)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Filter) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Expr != nil {
+		l = m.Expr.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Distinct) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Exprs) > 0 {
+		for _, e := range m.Exprs {
+			l = e.SizeVT()
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+		}
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Projection) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Exprs) > 0 {
+		for _, e := range m.Exprs {
+			l = e.SizeVT()
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+		}
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Limit) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Expr != nil {
+		l = m.Expr.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Aggregation) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.GroupExprs) > 0 {
+		for _, e := range m.GroupExprs {
+			l = e.SizeVT()
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+		}
+	}
+	if len(m.AggExprs) > 0 {
+		for _, e := range m.AggExprs {
+			l = e.SizeVT()
+			n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+		}
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Expr) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Def != nil {
+		l = m.Def.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *ExprDef) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if vtmsg, ok := m.Content.(interface{ SizeVT() int }); ok {
+		n += vtmsg.SizeVT()
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *ExprDef_BinaryExpr) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.BinaryExpr != nil {
+		l = m.BinaryExpr.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *ExprDef_Column) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Column != nil {
+		l = m.Column.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *ExprDef_Literal) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Literal != nil {
+		l = m.Literal.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *ExprDef_DynamicColumn) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.DynamicColumn != nil {
+		l = m.DynamicColumn.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *ExprDef_AggregationFunction) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.AggregationFunction != nil {
+		l = m.AggregationFunction.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *ExprDef_Alias) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Alias != nil {
+		l = m.Alias.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *ExprDef_Duration) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Duration != nil {
+		l = m.Duration.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *ExprDef_Convert) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Convert != nil {
+		l = m.Convert.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *ExprDef_If) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.If != nil {
+		l = m.If.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *BinaryExpr) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Left != nil {
+		l = m.Left.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Right != nil {
+		l = m.Right.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Op != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Op))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *IfExpr) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Condition != nil {
+		l = m.Condition.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Then != nil {
+		l = m.Then.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Else != nil {
+		l = m.Else.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *ConvertExpr) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Expr != nil {
+		l = m.Expr.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Type != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Type))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Column) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Name)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Literal) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Content != nil {
+		l = m.Content.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *LiteralContent) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if vtmsg, ok := m.Value.(interface{ SizeVT() int }); ok {
+		n += vtmsg.SizeVT()
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *LiteralContent_NullValue) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.NullValue != nil {
+		l = m.NullValue.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *LiteralContent_BoolValue) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	n += 2
+	return n
+}
+func (m *LiteralContent_Int32Value) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	n += 1 + protohelpers.SizeOfVarint(uint64(m.Int32Value))
+	return n
+}
+func (m *LiteralContent_Uint32Value) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	n += 1 + protohelpers.SizeOfVarint(uint64(m.Uint32Value))
+	return n
+}
+func (m *LiteralContent_Int64Value) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	n += 1 + protohelpers.SizeOfVarint(uint64(m.Int64Value))
+	return n
+}
+func (m *LiteralContent_Uint64Value) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	n += 1 + protohelpers.SizeOfVarint(uint64(m.Uint64Value))
+	return n
+}
+func (m *LiteralContent_FloatValue) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	n += 5
+	return n
+}
+func (m *LiteralContent_DoubleValue) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	n += 9
+	return n
+}
+func (m *LiteralContent_BinaryValue) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.BinaryValue)
+	n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	return n
+}
+func (m *LiteralContent_StringValue) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.StringValue)
+	n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	return n
+}
+func (m *Null) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *Alias) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Name)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	if m.Expr != nil {
+		l = m.Expr.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *DynamicColumn) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Name)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *AggregationFunction) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Type != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Type))
+	}
+	if m.Expr != nil {
+		l = m.Expr.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *DurationExpr) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Milliseconds != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Milliseconds))
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *QueryRequest) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: QueryRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: QueryRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field PlanRoot", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.PlanRoot == nil {
+				m.PlanRoot = &PlanNode{}
+			}
+			if err := m.PlanRoot.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Description", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Description = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *QueryResponse) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: QueryResponse: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: QueryResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Record", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Record = append(m.Record[:0], dAtA[iNdEx:postIndex]...)
+			if m.Record == nil {
+				m.Record = []byte{}
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *PlanNode) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: PlanNode: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: PlanNode: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Next", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Next == nil {
+				m.Next = &PlanNode{}
+			}
+			if err := m.Next.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Spec", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Spec == nil {
+				m.Spec = &PlanNodeSpec{}
+			}
+			if err := m.Spec.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *PlanNodeSpec) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: PlanNodeSpec: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: PlanNodeSpec: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TableScan", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Spec.(*PlanNodeSpec_TableScan); ok {
+				if err := oneof.TableScan.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &TableScan{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Spec = &PlanNodeSpec_TableScan{TableScan: v}
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field SchemaScan", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Spec.(*PlanNodeSpec_SchemaScan); ok {
+				if err := oneof.SchemaScan.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &SchemaScan{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Spec = &PlanNodeSpec_SchemaScan{SchemaScan: v}
+			}
+			iNdEx = postIndex
+		case 3:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Filter", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Spec.(*PlanNodeSpec_Filter); ok {
+				if err := oneof.Filter.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Filter{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Spec = &PlanNodeSpec_Filter{Filter: v}
+			}
+			iNdEx = postIndex
+		case 4:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Projection", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Spec.(*PlanNodeSpec_Projection); ok {
+				if err := oneof.Projection.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Projection{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Spec = &PlanNodeSpec_Projection{Projection: v}
+			}
+			iNdEx = postIndex
+		case 5:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Distinct", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Spec.(*PlanNodeSpec_Distinct); ok {
+				if err := oneof.Distinct.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Distinct{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Spec = &PlanNodeSpec_Distinct{Distinct: v}
+			}
+			iNdEx = postIndex
+		case 6:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Aggregation", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Spec.(*PlanNodeSpec_Aggregation); ok {
+				if err := oneof.Aggregation.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Aggregation{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Spec = &PlanNodeSpec_Aggregation{Aggregation: v}
+			}
+			iNdEx = postIndex
+		case 7:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Spec.(*PlanNodeSpec_Limit); ok {
+				if err := oneof.Limit.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Limit{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Spec = &PlanNodeSpec_Limit{Limit: v}
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *TableScan) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: TableScan: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: TableScan: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Base", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Base == nil {
+				m.Base = &ScanBase{}
+			}
+			if err := m.Base.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *SchemaScan) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: SchemaScan: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: SchemaScan: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Base", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Base == nil {
+				m.Base = &ScanBase{}
+			}
+			if err := m.Base.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ScanBase) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ScanBase: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ScanBase: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Database", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Database = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Table", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Table = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Filter) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Filter: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Filter: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Expr", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Expr == nil {
+				m.Expr = &Expr{}
+			}
+			if err := m.Expr.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Distinct) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Distinct: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Distinct: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Exprs", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Exprs = append(m.Exprs, &Expr{})
+			if err := m.Exprs[len(m.Exprs)-1].UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Projection) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Projection: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Projection: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Exprs", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Exprs = append(m.Exprs, &Expr{})
+			if err := m.Exprs[len(m.Exprs)-1].UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Limit) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Limit: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Limit: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Expr", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Expr == nil {
+				m.Expr = &Expr{}
+			}
+			if err := m.Expr.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Aggregation) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Aggregation: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Aggregation: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field GroupExprs", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.GroupExprs = append(m.GroupExprs, &Expr{})
+			if err := m.GroupExprs[len(m.GroupExprs)-1].UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field AggExprs", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.AggExprs = append(m.AggExprs, &Expr{})
+			if err := m.AggExprs[len(m.AggExprs)-1].UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Expr) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Expr: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Expr: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Def", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Def == nil {
+				m.Def = &ExprDef{}
+			}
+			if err := m.Def.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ExprDef) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ExprDef: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ExprDef: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field BinaryExpr", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_BinaryExpr); ok {
+				if err := oneof.BinaryExpr.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &BinaryExpr{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_BinaryExpr{BinaryExpr: v}
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Column", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_Column); ok {
+				if err := oneof.Column.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Column{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_Column{Column: v}
+			}
+			iNdEx = postIndex
+		case 3:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Literal", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_Literal); ok {
+				if err := oneof.Literal.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Literal{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_Literal{Literal: v}
+			}
+			iNdEx = postIndex
+		case 4:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field DynamicColumn", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_DynamicColumn); ok {
+				if err := oneof.DynamicColumn.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &DynamicColumn{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_DynamicColumn{DynamicColumn: v}
+			}
+			iNdEx = postIndex
+		case 5:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field AggregationFunction", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_AggregationFunction); ok {
+				if err := oneof.AggregationFunction.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &AggregationFunction{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_AggregationFunction{AggregationFunction: v}
+			}
+			iNdEx = postIndex
+		case 6:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Alias", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_Alias); ok {
+				if err := oneof.Alias.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Alias{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_Alias{Alias: v}
+			}
+			iNdEx = postIndex
+		case 7:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Duration", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_Duration); ok {
+				if err := oneof.Duration.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &DurationExpr{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_Duration{Duration: v}
+			}
+			iNdEx = postIndex
+		case 8:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Convert", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_Convert); ok {
+				if err := oneof.Convert.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &ConvertExpr{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_Convert{Convert: v}
+			}
+			iNdEx = postIndex
+		case 9:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field If", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Content.(*ExprDef_If); ok {
+				if err := oneof.If.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &IfExpr{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Content = &ExprDef_If{If: v}
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *BinaryExpr) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: BinaryExpr: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: BinaryExpr: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Left", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Left == nil {
+				m.Left = &Expr{}
+			}
+			if err := m.Left.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Right", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Right == nil {
+				m.Right = &Expr{}
+			}
+			if err := m.Right.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Op", wireType)
+			}
+			m.Op = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Op |= Op(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *IfExpr) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: IfExpr: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: IfExpr: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Condition", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Condition == nil {
+				m.Condition = &Expr{}
+			}
+			if err := m.Condition.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Then", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Then == nil {
+				m.Then = &Expr{}
+			}
+			if err := m.Then.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 3:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Else", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Else == nil {
+				m.Else = &Expr{}
+			}
+			if err := m.Else.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ConvertExpr) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ConvertExpr: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ConvertExpr: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Expr", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Expr == nil {
+				m.Expr = &Expr{}
+			}
+			if err := m.Expr.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType)
+			}
+			m.Type = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Type |= Type(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Column) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Column: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Column: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Name = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Literal) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Literal: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Literal: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Content", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Content == nil {
+				m.Content = &LiteralContent{}
+			}
+			if err := m.Content.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *LiteralContent) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: LiteralContent: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: LiteralContent: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field NullValue", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Value.(*LiteralContent_NullValue); ok {
+				if err := oneof.NullValue.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Null{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Value = &LiteralContent_NullValue{NullValue: v}
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field BoolValue", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			b := bool(v != 0)
+			m.Value = &LiteralContent_BoolValue{BoolValue: b}
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Int32Value", wireType)
+			}
+			var v int32
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.Value = &LiteralContent_Int32Value{Int32Value: v}
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Uint32Value", wireType)
+			}
+			var v uint32
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= uint32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.Value = &LiteralContent_Uint32Value{Uint32Value: v}
+		case 5:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Int64Value", wireType)
+			}
+			var v int64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.Value = &LiteralContent_Int64Value{Int64Value: v}
+		case 6:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Uint64Value", wireType)
+			}
+			var v uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.Value = &LiteralContent_Uint64Value{Uint64Value: v}
+		case 7:
+			if wireType != 5 {
+				return fmt.Errorf("proto: wrong wireType = %d for field FloatValue", wireType)
+			}
+			var v uint32
+			if (iNdEx + 4) > l {
+				return io.ErrUnexpectedEOF
+			}
+			v = uint32(binary.LittleEndian.Uint32(dAtA[iNdEx:]))
+			iNdEx += 4
+			m.Value = &LiteralContent_FloatValue{FloatValue: float32(math.Float32frombits(v))}
+		case 8:
+			if wireType != 1 {
+				return fmt.Errorf("proto: wrong wireType = %d for field DoubleValue", wireType)
+			}
+			var v uint64
+			if (iNdEx + 8) > l {
+				return io.ErrUnexpectedEOF
+			}
+			v = uint64(binary.LittleEndian.Uint64(dAtA[iNdEx:]))
+			iNdEx += 8
+			m.Value = &LiteralContent_DoubleValue{DoubleValue: float64(math.Float64frombits(v))}
+		case 9:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field BinaryValue", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			v := make([]byte, postIndex-iNdEx)
+			copy(v, dAtA[iNdEx:postIndex])
+			m.Value = &LiteralContent_BinaryValue{BinaryValue: v}
+			iNdEx = postIndex
+		case 10:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field StringValue", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Value = &LiteralContent_StringValue{StringValue: string(dAtA[iNdEx:postIndex])}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Null) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Null: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Null: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Alias) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Alias: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Alias: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Name = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Expr", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Expr == nil {
+				m.Expr = &Expr{}
+			}
+			if err := m.Expr.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *DynamicColumn) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: DynamicColumn: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: DynamicColumn: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Name = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *AggregationFunction) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: AggregationFunction: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: AggregationFunction: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType)
+			}
+			m.Type = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Type |= AggregationFunction_Type(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Expr", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Expr == nil {
+				m.Expr = &Expr{}
+			}
+			if err := m.Expr.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *DurationExpr) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: DurationExpr: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: DurationExpr: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Milliseconds", wireType)
+			}
+			m.Milliseconds = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Milliseconds |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
diff --git a/gen/proto/go/frostdb/table/v1alpha1/config.pb.go b/gen/proto/go/frostdb/table/v1alpha1/config.pb.go
new file mode 100644
index 000000000..7e9139bd8
--- /dev/null
+++ b/gen/proto/go/frostdb/table/v1alpha1/config.pb.go
@@ -0,0 +1,255 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.34.2
+// 	protoc        (unknown)
+// source: frostdb/table/v1alpha1/config.proto
+
+package tablev1alpha1
+
+import (
+	v1alpha1 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	v1alpha2 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha2"
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// TableConfig is the configuration information for a table.
+type TableConfig struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Schema of the table.
+	//
+	// Types that are assignable to Schema:
+	//
+	//	*TableConfig_DeprecatedSchema
+	//	*TableConfig_SchemaV2
+	Schema isTableConfig_Schema `protobuf_oneof:"schema"`
+	// RowGroupSize is the size in rows of row groups that are written to Parquet files.
+	RowGroupSize uint64 `protobuf:"varint,3,opt,name=row_group_size,json=rowGroupSize,proto3" json:"row_group_size,omitempty"`
+	// BlockReaderLimit is the concurrency limit of the number of Go routines spawned when reading storage blocks.
+	BlockReaderLimit uint64 `protobuf:"varint,4,opt,name=block_reader_limit,json=blockReaderLimit,proto3" json:"block_reader_limit,omitempty"`
+	// DisableWal disables the write ahead log for this table.
+	DisableWal bool `protobuf:"varint,5,opt,name=disable_wal,json=disableWal,proto3" json:"disable_wal,omitempty"`
+}
+
+func (x *TableConfig) Reset() {
+	*x = TableConfig{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_table_v1alpha1_config_proto_msgTypes[0]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *TableConfig) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TableConfig) ProtoMessage() {}
+
+func (x *TableConfig) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_table_v1alpha1_config_proto_msgTypes[0]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use TableConfig.ProtoReflect.Descriptor instead.
+func (*TableConfig) Descriptor() ([]byte, []int) {
+	return file_frostdb_table_v1alpha1_config_proto_rawDescGZIP(), []int{0}
+}
+
+func (m *TableConfig) GetSchema() isTableConfig_Schema {
+	if m != nil {
+		return m.Schema
+	}
+	return nil
+}
+
+func (x *TableConfig) GetDeprecatedSchema() *v1alpha1.Schema {
+	if x, ok := x.GetSchema().(*TableConfig_DeprecatedSchema); ok {
+		return x.DeprecatedSchema
+	}
+	return nil
+}
+
+func (x *TableConfig) GetSchemaV2() *v1alpha2.Schema {
+	if x, ok := x.GetSchema().(*TableConfig_SchemaV2); ok {
+		return x.SchemaV2
+	}
+	return nil
+}
+
+func (x *TableConfig) GetRowGroupSize() uint64 {
+	if x != nil {
+		return x.RowGroupSize
+	}
+	return 0
+}
+
+func (x *TableConfig) GetBlockReaderLimit() uint64 {
+	if x != nil {
+		return x.BlockReaderLimit
+	}
+	return 0
+}
+
+func (x *TableConfig) GetDisableWal() bool {
+	if x != nil {
+		return x.DisableWal
+	}
+	return false
+}
+
+type isTableConfig_Schema interface {
+	isTableConfig_Schema()
+}
+
+type TableConfig_DeprecatedSchema struct {
+	// Deprecated schema definition. Used for backwards compatibility.
+	DeprecatedSchema *v1alpha1.Schema `protobuf:"bytes,1,opt,name=deprecated_schema,json=deprecatedSchema,proto3,oneof"`
+}
+
+type TableConfig_SchemaV2 struct {
+	// Schema of the table. Use this field.
+	SchemaV2 *v1alpha2.Schema `protobuf:"bytes,2,opt,name=schema_v2,json=schemaV2,proto3,oneof"`
+}
+
+func (*TableConfig_DeprecatedSchema) isTableConfig_Schema() {}
+
+func (*TableConfig_SchemaV2) isTableConfig_Schema() {}
+
+var File_frostdb_table_v1alpha1_config_proto protoreflect.FileDescriptor
+
+var file_frostdb_table_v1alpha1_config_proto_rawDesc = []byte{
+	0x0a, 0x23, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x2f,
+	0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e,
+	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x16, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x74,
+	0x61, 0x62, 0x6c, 0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x1a, 0x24, 0x66,
+	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2f, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x70, 0x72,
+	0x6f, 0x74, 0x6f, 0x1a, 0x24, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x73, 0x63, 0x68,
+	0x65, 0x6d, 0x61, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2f, 0x73, 0x63, 0x68,
+	0x65, 0x6d, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x9c, 0x02, 0x0a, 0x0b, 0x54, 0x61,
+	0x62, 0x6c, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x4e, 0x0a, 0x11, 0x64, 0x65, 0x70,
+	0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73,
+	0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53,
+	0x63, 0x68, 0x65, 0x6d, 0x61, 0x48, 0x00, 0x52, 0x10, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61,
+	0x74, 0x65, 0x64, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x3e, 0x0a, 0x09, 0x73, 0x63, 0x68,
+	0x65, 0x6d, 0x61, 0x5f, 0x76, 0x32, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x66,
+	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x48, 0x00, 0x52,
+	0x08, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x56, 0x32, 0x12, 0x24, 0x0a, 0x0e, 0x72, 0x6f, 0x77,
+	0x5f, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28,
+	0x04, 0x52, 0x0c, 0x72, 0x6f, 0x77, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x69, 0x7a, 0x65, 0x12,
+	0x2c, 0x0a, 0x12, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x5f,
+	0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x10, 0x62, 0x6c, 0x6f,
+	0x63, 0x6b, 0x52, 0x65, 0x61, 0x64, 0x65, 0x72, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1f, 0x0a,
+	0x0b, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x77, 0x61, 0x6c, 0x18, 0x05, 0x20, 0x01,
+	0x28, 0x08, 0x52, 0x0a, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x57, 0x61, 0x6c, 0x42, 0x08,
+	0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x42, 0xf6, 0x01, 0x0a, 0x1a, 0x63, 0x6f, 0x6d,
+	0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x2e, 0x76,
+	0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x42, 0x0b, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x50,
+	0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x51, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63,
+	0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c, 0x61, 0x72, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x73, 0x2f,
+	0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74,
+	0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x74, 0x61, 0x62,
+	0x6c, 0x65, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x3b, 0x74, 0x61, 0x62, 0x6c,
+	0x65, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xa2, 0x02, 0x03, 0x46, 0x54, 0x58, 0xaa,
+	0x02, 0x16, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x2e,
+	0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xca, 0x02, 0x16, 0x46, 0x72, 0x6f, 0x73, 0x74,
+	0x64, 0x62, 0x5c, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
+	0x31, 0xe2, 0x02, 0x22, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x54, 0x61, 0x62, 0x6c,
+	0x65, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65,
+	0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x18, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62,
+	0x3a, 0x3a, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x3a, 0x3a, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
+	0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+}
+
+var (
+	file_frostdb_table_v1alpha1_config_proto_rawDescOnce sync.Once
+	file_frostdb_table_v1alpha1_config_proto_rawDescData = file_frostdb_table_v1alpha1_config_proto_rawDesc
+)
+
+func file_frostdb_table_v1alpha1_config_proto_rawDescGZIP() []byte {
+	file_frostdb_table_v1alpha1_config_proto_rawDescOnce.Do(func() {
+		file_frostdb_table_v1alpha1_config_proto_rawDescData = protoimpl.X.CompressGZIP(file_frostdb_table_v1alpha1_config_proto_rawDescData)
+	})
+	return file_frostdb_table_v1alpha1_config_proto_rawDescData
+}
+
+var file_frostdb_table_v1alpha1_config_proto_msgTypes = make([]protoimpl.MessageInfo, 1)
+var file_frostdb_table_v1alpha1_config_proto_goTypes = []any{
+	(*TableConfig)(nil),     // 0: frostdb.table.v1alpha1.TableConfig
+	(*v1alpha1.Schema)(nil), // 1: frostdb.schema.v1alpha1.Schema
+	(*v1alpha2.Schema)(nil), // 2: frostdb.schema.v1alpha2.Schema
+}
+var file_frostdb_table_v1alpha1_config_proto_depIdxs = []int32{
+	1, // 0: frostdb.table.v1alpha1.TableConfig.deprecated_schema:type_name -> frostdb.schema.v1alpha1.Schema
+	2, // 1: frostdb.table.v1alpha1.TableConfig.schema_v2:type_name -> frostdb.schema.v1alpha2.Schema
+	2, // [2:2] is the sub-list for method output_type
+	2, // [2:2] is the sub-list for method input_type
+	2, // [2:2] is the sub-list for extension type_name
+	2, // [2:2] is the sub-list for extension extendee
+	0, // [0:2] is the sub-list for field type_name
+}
+
+func init() { file_frostdb_table_v1alpha1_config_proto_init() }
+func file_frostdb_table_v1alpha1_config_proto_init() {
+	if File_frostdb_table_v1alpha1_config_proto != nil {
+		return
+	}
+	if !protoimpl.UnsafeEnabled {
+		file_frostdb_table_v1alpha1_config_proto_msgTypes[0].Exporter = func(v any, i int) any {
+			switch v := v.(*TableConfig); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+	}
+	file_frostdb_table_v1alpha1_config_proto_msgTypes[0].OneofWrappers = []any{
+		(*TableConfig_DeprecatedSchema)(nil),
+		(*TableConfig_SchemaV2)(nil),
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: file_frostdb_table_v1alpha1_config_proto_rawDesc,
+			NumEnums:      0,
+			NumMessages:   1,
+			NumExtensions: 0,
+			NumServices:   0,
+		},
+		GoTypes:           file_frostdb_table_v1alpha1_config_proto_goTypes,
+		DependencyIndexes: file_frostdb_table_v1alpha1_config_proto_depIdxs,
+		MessageInfos:      file_frostdb_table_v1alpha1_config_proto_msgTypes,
+	}.Build()
+	File_frostdb_table_v1alpha1_config_proto = out.File
+	file_frostdb_table_v1alpha1_config_proto_rawDesc = nil
+	file_frostdb_table_v1alpha1_config_proto_goTypes = nil
+	file_frostdb_table_v1alpha1_config_proto_depIdxs = nil
+}
diff --git a/gen/proto/go/frostdb/table/v1alpha1/config_vtproto.pb.go b/gen/proto/go/frostdb/table/v1alpha1/config_vtproto.pb.go
new file mode 100644
index 000000000..3fd91e325
--- /dev/null
+++ b/gen/proto/go/frostdb/table/v1alpha1/config_vtproto.pb.go
@@ -0,0 +1,359 @@
+// Code generated by protoc-gen-go-vtproto. DO NOT EDIT.
+// protoc-gen-go-vtproto version: v0.6.0
+// source: frostdb/table/v1alpha1/config.proto
+
+package tablev1alpha1
+
+import (
+	fmt "fmt"
+	protohelpers "github.com/planetscale/vtprotobuf/protohelpers"
+	v1alpha1 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	v1alpha2 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha2"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	io "io"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+func (m *TableConfig) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *TableConfig) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *TableConfig) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if vtmsg, ok := m.Schema.(interface {
+		MarshalToSizedBufferVT([]byte) (int, error)
+	}); ok {
+		size, err := vtmsg.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+	}
+	if m.DisableWal {
+		i--
+		if m.DisableWal {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x28
+	}
+	if m.BlockReaderLimit != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.BlockReaderLimit))
+		i--
+		dAtA[i] = 0x20
+	}
+	if m.RowGroupSize != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.RowGroupSize))
+		i--
+		dAtA[i] = 0x18
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *TableConfig_DeprecatedSchema) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *TableConfig_DeprecatedSchema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.DeprecatedSchema != nil {
+		size, err := m.DeprecatedSchema.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+func (m *TableConfig_SchemaV2) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *TableConfig_SchemaV2) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.SchemaV2 != nil {
+		size, err := m.SchemaV2.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x12
+	}
+	return len(dAtA) - i, nil
+}
+func (m *TableConfig) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if vtmsg, ok := m.Schema.(interface{ SizeVT() int }); ok {
+		n += vtmsg.SizeVT()
+	}
+	if m.RowGroupSize != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.RowGroupSize))
+	}
+	if m.BlockReaderLimit != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.BlockReaderLimit))
+	}
+	if m.DisableWal {
+		n += 2
+	}
+	n += len(m.unknownFields)
+	return n
+}
+
+func (m *TableConfig_DeprecatedSchema) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.DeprecatedSchema != nil {
+		l = m.DeprecatedSchema.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *TableConfig_SchemaV2) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.SchemaV2 != nil {
+		l = m.SchemaV2.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
+}
+func (m *TableConfig) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: TableConfig: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: TableConfig: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field DeprecatedSchema", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Schema.(*TableConfig_DeprecatedSchema); ok {
+				if err := oneof.DeprecatedSchema.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &v1alpha1.Schema{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Schema = &TableConfig_DeprecatedSchema{DeprecatedSchema: v}
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field SchemaV2", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.Schema.(*TableConfig_SchemaV2); ok {
+				if err := oneof.SchemaV2.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &v1alpha2.Schema{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.Schema = &TableConfig_SchemaV2{SchemaV2: v}
+			}
+			iNdEx = postIndex
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RowGroupSize", wireType)
+			}
+			m.RowGroupSize = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.RowGroupSize |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field BlockReaderLimit", wireType)
+			}
+			m.BlockReaderLimit = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.BlockReaderLimit |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 5:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field DisableWal", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.DisableWal = bool(v != 0)
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
diff --git a/gen/proto/go/frostdb/wal/v1alpha1/wal.pb.go b/gen/proto/go/frostdb/wal/v1alpha1/wal.pb.go
index 07f9b6db1..9dedf1445 100644
--- a/gen/proto/go/frostdb/wal/v1alpha1/wal.pb.go
+++ b/gen/proto/go/frostdb/wal/v1alpha1/wal.pb.go
@@ -1,14 +1,13 @@
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.28.0-devel
+// 	protoc-gen-go v1.34.2
 // 	protoc        (unknown)
 // source: frostdb/wal/v1alpha1/wal.proto
 
 package walv1alpha1
 
 import (
-	v1alpha1 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
-	v1alpha2 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha2"
+	v1alpha1 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/table/v1alpha1"
 	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
 	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
 	reflect "reflect"
@@ -82,9 +81,11 @@ type Entry struct {
 	// The new-table entry.
 	//
 	// Types that are assignable to EntryType:
+	//
 	//	*Entry_Write_
 	//	*Entry_NewTableBlock_
 	//	*Entry_TableBlockPersisted_
+	//	*Entry_Snapshot_
 	EntryType isEntry_EntryType `protobuf_oneof:"entry_type"`
 }
 
@@ -148,6 +149,13 @@ func (x *Entry) GetTableBlockPersisted() *Entry_TableBlockPersisted {
 	return nil
 }
 
+func (x *Entry) GetSnapshot() *Entry_Snapshot {
+	if x, ok := x.GetEntryType().(*Entry_Snapshot_); ok {
+		return x.Snapshot
+	}
+	return nil
+}
+
 type isEntry_EntryType interface {
 	isEntry_EntryType()
 }
@@ -167,12 +175,19 @@ type Entry_TableBlockPersisted_ struct {
 	TableBlockPersisted *Entry_TableBlockPersisted `protobuf:"bytes,3,opt,name=table_block_persisted,json=tableBlockPersisted,proto3,oneof"`
 }
 
+type Entry_Snapshot_ struct {
+	// Snapshot is set if the entry describes a snapshot.
+	Snapshot *Entry_Snapshot `protobuf:"bytes,4,opt,name=snapshot,proto3,oneof"`
+}
+
 func (*Entry_Write_) isEntry_EntryType() {}
 
 func (*Entry_NewTableBlock_) isEntry_EntryType() {}
 
 func (*Entry_TableBlockPersisted_) isEntry_EntryType() {}
 
+func (*Entry_Snapshot_) isEntry_EntryType() {}
+
 // The write-type entry.
 type Entry_Write struct {
 	state         protoimpl.MessageState
@@ -250,12 +265,8 @@ type Entry_NewTableBlock struct {
 	TableName string `protobuf:"bytes,1,opt,name=table_name,json=tableName,proto3" json:"table_name,omitempty"`
 	// Block ID of the new-table-block.
 	BlockId []byte `protobuf:"bytes,2,opt,name=block_id,json=blockId,proto3" json:"block_id,omitempty"`
-	// Schema of the new-table-block.
-	//
-	// Types that are assignable to Schema:
-	//	*Entry_NewTableBlock_DeprecatedSchema
-	//	*Entry_NewTableBlock_SchemaV2
-	Schema isEntry_NewTableBlock_Schema `protobuf_oneof:"schema"`
+	// Config of the new-table-block.
+	Config *v1alpha1.TableConfig `protobuf:"bytes,5,opt,name=config,proto3" json:"config,omitempty"`
 }
 
 func (x *Entry_NewTableBlock) Reset() {
@@ -304,46 +315,13 @@ func (x *Entry_NewTableBlock) GetBlockId() []byte {
 	return nil
 }
 
-func (m *Entry_NewTableBlock) GetSchema() isEntry_NewTableBlock_Schema {
-	if m != nil {
-		return m.Schema
-	}
-	return nil
-}
-
-func (x *Entry_NewTableBlock) GetDeprecatedSchema() *v1alpha1.Schema {
-	if x, ok := x.GetSchema().(*Entry_NewTableBlock_DeprecatedSchema); ok {
-		return x.DeprecatedSchema
-	}
-	return nil
-}
-
-func (x *Entry_NewTableBlock) GetSchemaV2() *v1alpha2.Schema {
-	if x, ok := x.GetSchema().(*Entry_NewTableBlock_SchemaV2); ok {
-		return x.SchemaV2
+func (x *Entry_NewTableBlock) GetConfig() *v1alpha1.TableConfig {
+	if x != nil {
+		return x.Config
 	}
 	return nil
 }
 
-type isEntry_NewTableBlock_Schema interface {
-	isEntry_NewTableBlock_Schema()
-}
-
-type Entry_NewTableBlock_DeprecatedSchema struct {
-	// Deprecated schema of the new-table-block. Only used for reading for
-	// backwards compatibility.
-	DeprecatedSchema *v1alpha1.Schema `protobuf:"bytes,3,opt,name=deprecated_schema,json=deprecatedSchema,proto3,oneof"`
-}
-
-type Entry_NewTableBlock_SchemaV2 struct {
-	// Schema of the new-table-block. Use this field.
-	SchemaV2 *v1alpha2.Schema `protobuf:"bytes,4,opt,name=schema_v2,json=schemaV2,proto3,oneof"`
-}
-
-func (*Entry_NewTableBlock_DeprecatedSchema) isEntry_NewTableBlock_Schema() {}
-
-func (*Entry_NewTableBlock_SchemaV2) isEntry_NewTableBlock_Schema() {}
-
 // The table-block persisted entry.
 type Entry_TableBlockPersisted struct {
 	state         protoimpl.MessageState
@@ -354,6 +332,11 @@ type Entry_TableBlockPersisted struct {
 	TableName string `protobuf:"bytes,1,opt,name=table_name,json=tableName,proto3" json:"table_name,omitempty"`
 	// Block ID of the new-table-block.
 	BlockId []byte `protobuf:"bytes,2,opt,name=block_id,json=blockId,proto3" json:"block_id,omitempty"`
+	// NextTx is the next non-persisted transaction at the time of block
+	// persistence. If the block has been persisted, any txn id < next_tx is
+	// considered persisted or not relevant to this table (i.e. it can be a
+	// non-persisted txn from another table).
+	NextTx uint64 `protobuf:"varint,3,opt,name=next_tx,json=nextTx,proto3" json:"next_tx,omitempty"`
 }
 
 func (x *Entry_TableBlockPersisted) Reset() {
@@ -402,78 +385,134 @@ func (x *Entry_TableBlockPersisted) GetBlockId() []byte {
 	return nil
 }
 
+func (x *Entry_TableBlockPersisted) GetNextTx() uint64 {
+	if x != nil {
+		return x.NextTx
+	}
+	return 0
+}
+
+// The snapshot entry.
+type Entry_Snapshot struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// The tx the snapshot was taken at.
+	Tx uint64 `protobuf:"varint,1,opt,name=tx,proto3" json:"tx,omitempty"`
+}
+
+func (x *Entry_Snapshot) Reset() {
+	*x = Entry_Snapshot{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_frostdb_wal_v1alpha1_wal_proto_msgTypes[5]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *Entry_Snapshot) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Entry_Snapshot) ProtoMessage() {}
+
+func (x *Entry_Snapshot) ProtoReflect() protoreflect.Message {
+	mi := &file_frostdb_wal_v1alpha1_wal_proto_msgTypes[5]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Entry_Snapshot.ProtoReflect.Descriptor instead.
+func (*Entry_Snapshot) Descriptor() ([]byte, []int) {
+	return file_frostdb_wal_v1alpha1_wal_proto_rawDescGZIP(), []int{1, 3}
+}
+
+func (x *Entry_Snapshot) GetTx() uint64 {
+	if x != nil {
+		return x.Tx
+	}
+	return 0
+}
+
 var File_frostdb_wal_v1alpha1_wal_proto protoreflect.FileDescriptor
 
 var file_frostdb_wal_v1alpha1_wal_proto_rawDesc = []byte{
 	0x0a, 0x1e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x77, 0x61, 0x6c, 0x2f, 0x76, 0x31,
 	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x77, 0x61, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
 	0x12, 0x14, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31,
-	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x1a, 0x24, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f,
-	0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f,
-	0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x24, 0x66, 0x72,
-	0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2f, 0x76, 0x31, 0x61,
-	0x6c, 0x70, 0x68, 0x61, 0x32, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x70, 0x72, 0x6f,
-	0x74, 0x6f, 0x22, 0x3b, 0x0a, 0x06, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x12, 0x31, 0x0a, 0x05,
-	0x65, 0x6e, 0x74, 0x72, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x66, 0x72,
-	0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
-	0x61, 0x31, 0x2e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x05, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x22,
-	0x95, 0x05, 0x0a, 0x05, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x39, 0x0a, 0x05, 0x77, 0x72, 0x69,
-	0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74,
-	0x64, 0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e,
-	0x45, 0x6e, 0x74, 0x72, 0x79, 0x2e, 0x57, 0x72, 0x69, 0x74, 0x65, 0x48, 0x00, 0x52, 0x05, 0x77,
-	0x72, 0x69, 0x74, 0x65, 0x12, 0x53, 0x0a, 0x0f, 0x6e, 0x65, 0x77, 0x5f, 0x74, 0x61, 0x62, 0x6c,
-	0x65, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x29, 0x2e,
-	0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c,
-	0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x2e, 0x4e, 0x65, 0x77, 0x54, 0x61,
-	0x62, 0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x48, 0x00, 0x52, 0x0d, 0x6e, 0x65, 0x77, 0x54,
-	0x61, 0x62, 0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x12, 0x65, 0x0a, 0x15, 0x74, 0x61, 0x62,
-	0x6c, 0x65, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74,
-	0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2f, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74,
-	0x64, 0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e,
-	0x45, 0x6e, 0x74, 0x72, 0x79, 0x2e, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b,
-	0x50, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65, 0x64, 0x48, 0x00, 0x52, 0x13, 0x74, 0x61, 0x62,
-	0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x50, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65, 0x64,
-	0x1a, 0x50, 0x0a, 0x05, 0x57, 0x72, 0x69, 0x74, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x61, 0x62,
-	0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x74,
-	0x61, 0x62, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61,
-	0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05,
-	0x61, 0x72, 0x72, 0x6f, 0x77, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x61, 0x72, 0x72,
-	0x6f, 0x77, 0x1a, 0xe3, 0x01, 0x0a, 0x0d, 0x4e, 0x65, 0x77, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x42,
-	0x6c, 0x6f, 0x63, 0x6b, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x6e, 0x61,
-	0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x4e,
-	0x61, 0x6d, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x18,
-	0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x49, 0x64, 0x12, 0x4e,
-	0x0a, 0x11, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x73, 0x63, 0x68,
-	0x65, 0x6d, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x66, 0x72, 0x6f, 0x73,
-	0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70,
-	0x68, 0x61, 0x31, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x48, 0x00, 0x52, 0x10, 0x64, 0x65,
-	0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x3e,
-	0x0a, 0x09, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x76, 0x32, 0x18, 0x04, 0x20, 0x01, 0x28,
-	0x0b, 0x32, 0x1f, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x73, 0x63, 0x68, 0x65,
-	0x6d, 0x61, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x32, 0x2e, 0x53, 0x63, 0x68, 0x65,
-	0x6d, 0x61, 0x48, 0x00, 0x52, 0x08, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x56, 0x32, 0x42, 0x08,
-	0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x4f, 0x0a, 0x13, 0x54, 0x61, 0x62, 0x6c,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x1a, 0x23, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f,
+	0x74, 0x61, 0x62, 0x6c, 0x65, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x63,
+	0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x41, 0x0a, 0x06, 0x52,
+	0x65, 0x63, 0x6f, 0x72, 0x64, 0x12, 0x31, 0x0a, 0x05, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x77,
+	0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x6e, 0x74, 0x72,
+	0x79, 0x52, 0x05, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x4a, 0x04, 0x08, 0x02, 0x10, 0x03, 0x22, 0xbd,
+	0x05, 0x0a, 0x05, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x39, 0x0a, 0x05, 0x77, 0x72, 0x69, 0x74,
+	0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
+	0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45,
+	0x6e, 0x74, 0x72, 0x79, 0x2e, 0x57, 0x72, 0x69, 0x74, 0x65, 0x48, 0x00, 0x52, 0x05, 0x77, 0x72,
+	0x69, 0x74, 0x65, 0x12, 0x53, 0x0a, 0x0f, 0x6e, 0x65, 0x77, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65,
+	0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x66,
+	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70,
+	0x68, 0x61, 0x31, 0x2e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x2e, 0x4e, 0x65, 0x77, 0x54, 0x61, 0x62,
+	0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x48, 0x00, 0x52, 0x0d, 0x6e, 0x65, 0x77, 0x54, 0x61,
+	0x62, 0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x12, 0x65, 0x0a, 0x15, 0x74, 0x61, 0x62, 0x6c,
+	0x65, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65,
+	0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2f, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
+	0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45,
+	0x6e, 0x74, 0x72, 0x79, 0x2e, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x50,
+	0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65, 0x64, 0x48, 0x00, 0x52, 0x13, 0x74, 0x61, 0x62, 0x6c,
 	0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x50, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65, 0x64, 0x12,
-	0x1d, 0x0a, 0x0a, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20,
-	0x01, 0x28, 0x09, 0x52, 0x09, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x19,
-	0x0a, 0x08, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c,
-	0x52, 0x07, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x49, 0x64, 0x42, 0x0c, 0x0a, 0x0a, 0x65, 0x6e, 0x74,
-	0x72, 0x79, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x42, 0xe5, 0x01, 0x0a, 0x18, 0x63, 0x6f, 0x6d, 0x2e,
-	0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c,
-	0x70, 0x68, 0x61, 0x31, 0x42, 0x08, 0x57, 0x61, 0x6c, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01,
-	0x5a, 0x4d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c,
-	0x61, 0x72, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x73, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64,
-	0x62, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66,
-	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x77, 0x61, 0x6c, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70,
-	0x68, 0x61, 0x31, 0x3b, 0x77, 0x61, 0x6c, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xa2,
-	0x02, 0x03, 0x46, 0x57, 0x58, 0xaa, 0x02, 0x14, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e,
-	0x57, 0x61, 0x6c, 0x2e, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xca, 0x02, 0x14, 0x46,
-	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x57, 0x61, 0x6c, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70,
-	0x68, 0x61, 0x31, 0xe2, 0x02, 0x20, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x57, 0x61,
-	0x6c, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65,
-	0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x16, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62,
-	0x3a, 0x3a, 0x57, 0x61, 0x6c, 0x3a, 0x3a, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x62,
-	0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+	0x42, 0x0a, 0x08, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x24, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x77, 0x61, 0x6c, 0x2e,
+	0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x2e, 0x53,
+	0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x48, 0x00, 0x52, 0x08, 0x73, 0x6e, 0x61, 0x70, 0x73,
+	0x68, 0x6f, 0x74, 0x1a, 0x50, 0x0a, 0x05, 0x57, 0x72, 0x69, 0x74, 0x65, 0x12, 0x1d, 0x0a, 0x0a,
+	0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x09, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x64,
+	0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x12,
+	0x14, 0x0a, 0x05, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05,
+	0x61, 0x72, 0x72, 0x6f, 0x77, 0x1a, 0x92, 0x01, 0x0a, 0x0d, 0x4e, 0x65, 0x77, 0x54, 0x61, 0x62,
+	0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x61, 0x62, 0x6c, 0x65,
+	0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x74, 0x61, 0x62,
+	0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f,
+	0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x49,
+	0x64, 0x12, 0x3b, 0x0a, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x23, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x74, 0x61, 0x62, 0x6c,
+	0x65, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x54, 0x61, 0x62, 0x6c, 0x65,
+	0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x4a, 0x04,
+	0x08, 0x03, 0x10, 0x04, 0x4a, 0x04, 0x08, 0x04, 0x10, 0x05, 0x1a, 0x68, 0x0a, 0x13, 0x54, 0x61,
+	0x62, 0x6c, 0x65, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x50, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65,
+	0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65,
+	0x12, 0x19, 0x0a, 0x08, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01,
+	0x28, 0x0c, 0x52, 0x07, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x49, 0x64, 0x12, 0x17, 0x0a, 0x07, 0x6e,
+	0x65, 0x78, 0x74, 0x5f, 0x74, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x6e, 0x65,
+	0x78, 0x74, 0x54, 0x78, 0x1a, 0x1a, 0x0a, 0x08, 0x53, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74,
+	0x12, 0x0e, 0x0a, 0x02, 0x74, 0x78, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x02, 0x74, 0x78,
+	0x42, 0x0c, 0x0a, 0x0a, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x42, 0xe5,
+	0x01, 0x0a, 0x18, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x77,
+	0x61, 0x6c, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x42, 0x08, 0x57, 0x61, 0x6c,
+	0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x4d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e,
+	0x63, 0x6f, 0x6d, 0x2f, 0x70, 0x6f, 0x6c, 0x61, 0x72, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x73,
+	0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f,
+	0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2f, 0x77, 0x61,
+	0x6c, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x3b, 0x77, 0x61, 0x6c, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xa2, 0x02, 0x03, 0x46, 0x57, 0x58, 0xaa, 0x02, 0x14, 0x46,
+	0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x2e, 0x57, 0x61, 0x6c, 0x2e, 0x56, 0x31, 0x61, 0x6c, 0x70,
+	0x68, 0x61, 0x31, 0xca, 0x02, 0x14, 0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x5c, 0x57, 0x61,
+	0x6c, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0xe2, 0x02, 0x20, 0x46, 0x72, 0x6f,
+	0x73, 0x74, 0x64, 0x62, 0x5c, 0x57, 0x61, 0x6c, 0x5c, 0x56, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61,
+	0x31, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x16,
+	0x46, 0x72, 0x6f, 0x73, 0x74, 0x64, 0x62, 0x3a, 0x3a, 0x57, 0x61, 0x6c, 0x3a, 0x3a, 0x56, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
 }
 
 var (
@@ -488,23 +527,23 @@ func file_frostdb_wal_v1alpha1_wal_proto_rawDescGZIP() []byte {
 	return file_frostdb_wal_v1alpha1_wal_proto_rawDescData
 }
 
-var file_frostdb_wal_v1alpha1_wal_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
-var file_frostdb_wal_v1alpha1_wal_proto_goTypes = []interface{}{
+var file_frostdb_wal_v1alpha1_wal_proto_msgTypes = make([]protoimpl.MessageInfo, 6)
+var file_frostdb_wal_v1alpha1_wal_proto_goTypes = []any{
 	(*Record)(nil),                    // 0: frostdb.wal.v1alpha1.Record
 	(*Entry)(nil),                     // 1: frostdb.wal.v1alpha1.Entry
 	(*Entry_Write)(nil),               // 2: frostdb.wal.v1alpha1.Entry.Write
 	(*Entry_NewTableBlock)(nil),       // 3: frostdb.wal.v1alpha1.Entry.NewTableBlock
 	(*Entry_TableBlockPersisted)(nil), // 4: frostdb.wal.v1alpha1.Entry.TableBlockPersisted
-	(*v1alpha1.Schema)(nil),           // 5: frostdb.schema.v1alpha1.Schema
-	(*v1alpha2.Schema)(nil),           // 6: frostdb.schema.v1alpha2.Schema
+	(*Entry_Snapshot)(nil),            // 5: frostdb.wal.v1alpha1.Entry.Snapshot
+	(*v1alpha1.TableConfig)(nil),      // 6: frostdb.table.v1alpha1.TableConfig
 }
 var file_frostdb_wal_v1alpha1_wal_proto_depIdxs = []int32{
 	1, // 0: frostdb.wal.v1alpha1.Record.entry:type_name -> frostdb.wal.v1alpha1.Entry
 	2, // 1: frostdb.wal.v1alpha1.Entry.write:type_name -> frostdb.wal.v1alpha1.Entry.Write
 	3, // 2: frostdb.wal.v1alpha1.Entry.new_table_block:type_name -> frostdb.wal.v1alpha1.Entry.NewTableBlock
 	4, // 3: frostdb.wal.v1alpha1.Entry.table_block_persisted:type_name -> frostdb.wal.v1alpha1.Entry.TableBlockPersisted
-	5, // 4: frostdb.wal.v1alpha1.Entry.NewTableBlock.deprecated_schema:type_name -> frostdb.schema.v1alpha1.Schema
-	6, // 5: frostdb.wal.v1alpha1.Entry.NewTableBlock.schema_v2:type_name -> frostdb.schema.v1alpha2.Schema
+	5, // 4: frostdb.wal.v1alpha1.Entry.snapshot:type_name -> frostdb.wal.v1alpha1.Entry.Snapshot
+	6, // 5: frostdb.wal.v1alpha1.Entry.NewTableBlock.config:type_name -> frostdb.table.v1alpha1.TableConfig
 	6, // [6:6] is the sub-list for method output_type
 	6, // [6:6] is the sub-list for method input_type
 	6, // [6:6] is the sub-list for extension type_name
@@ -518,7 +557,7 @@ func file_frostdb_wal_v1alpha1_wal_proto_init() {
 		return
 	}
 	if !protoimpl.UnsafeEnabled {
-		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[0].Exporter = func(v any, i int) any {
 			switch v := v.(*Record); i {
 			case 0:
 				return &v.state
@@ -530,7 +569,7 @@ func file_frostdb_wal_v1alpha1_wal_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[1].Exporter = func(v any, i int) any {
 			switch v := v.(*Entry); i {
 			case 0:
 				return &v.state
@@ -542,7 +581,7 @@ func file_frostdb_wal_v1alpha1_wal_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[2].Exporter = func(v any, i int) any {
 			switch v := v.(*Entry_Write); i {
 			case 0:
 				return &v.state
@@ -554,7 +593,7 @@ func file_frostdb_wal_v1alpha1_wal_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[3].Exporter = func(v any, i int) any {
 			switch v := v.(*Entry_NewTableBlock); i {
 			case 0:
 				return &v.state
@@ -566,7 +605,7 @@ func file_frostdb_wal_v1alpha1_wal_proto_init() {
 				return nil
 			}
 		}
-		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
+		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[4].Exporter = func(v any, i int) any {
 			switch v := v.(*Entry_TableBlockPersisted); i {
 			case 0:
 				return &v.state
@@ -578,15 +617,24 @@ func file_frostdb_wal_v1alpha1_wal_proto_init() {
 				return nil
 			}
 		}
+		file_frostdb_wal_v1alpha1_wal_proto_msgTypes[5].Exporter = func(v any, i int) any {
+			switch v := v.(*Entry_Snapshot); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
 	}
-	file_frostdb_wal_v1alpha1_wal_proto_msgTypes[1].OneofWrappers = []interface{}{
+	file_frostdb_wal_v1alpha1_wal_proto_msgTypes[1].OneofWrappers = []any{
 		(*Entry_Write_)(nil),
 		(*Entry_NewTableBlock_)(nil),
 		(*Entry_TableBlockPersisted_)(nil),
-	}
-	file_frostdb_wal_v1alpha1_wal_proto_msgTypes[3].OneofWrappers = []interface{}{
-		(*Entry_NewTableBlock_DeprecatedSchema)(nil),
-		(*Entry_NewTableBlock_SchemaV2)(nil),
+		(*Entry_Snapshot_)(nil),
 	}
 	type x struct{}
 	out := protoimpl.TypeBuilder{
@@ -594,7 +642,7 @@ func file_frostdb_wal_v1alpha1_wal_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: file_frostdb_wal_v1alpha1_wal_proto_rawDesc,
 			NumEnums:      0,
-			NumMessages:   5,
+			NumMessages:   6,
 			NumExtensions: 0,
 			NumServices:   0,
 		},
diff --git a/gen/proto/go/frostdb/wal/v1alpha1/wal_vtproto.pb.go b/gen/proto/go/frostdb/wal/v1alpha1/wal_vtproto.pb.go
index 752ddf4c3..0eca2caec 100644
--- a/gen/proto/go/frostdb/wal/v1alpha1/wal_vtproto.pb.go
+++ b/gen/proto/go/frostdb/wal/v1alpha1/wal_vtproto.pb.go
@@ -1,16 +1,15 @@
 // Code generated by protoc-gen-go-vtproto. DO NOT EDIT.
-// protoc-gen-go-vtproto version: v0.3.0
+// protoc-gen-go-vtproto version: v0.6.0
 // source: frostdb/wal/v1alpha1/wal.proto
 
 package walv1alpha1
 
 import (
 	fmt "fmt"
-	v1alpha1 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
-	v1alpha2 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha2"
+	protohelpers "github.com/planetscale/vtprotobuf/protohelpers"
+	v1alpha1 "github.com/polarsignals/frostdb/gen/proto/go/frostdb/table/v1alpha1"
 	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
 	io "io"
-	bits "math/bits"
 )
 
 const (
@@ -56,7 +55,7 @@ func (m *Record) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0xa
 	}
@@ -106,14 +105,14 @@ func (m *Entry_Write) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 	if len(m.Data) > 0 {
 		i -= len(m.Data)
 		copy(dAtA[i:], m.Data)
-		i = encodeVarint(dAtA, i, uint64(len(m.Data)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Data)))
 		i--
 		dAtA[i] = 0x12
 	}
 	if len(m.TableName) > 0 {
 		i -= len(m.TableName)
 		copy(dAtA[i:], m.TableName)
-		i = encodeVarint(dAtA, i, uint64(len(m.TableName)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.TableName)))
 		i--
 		dAtA[i] = 0xa
 	}
@@ -150,73 +149,33 @@ func (m *Entry_NewTableBlock) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		i -= len(m.unknownFields)
 		copy(dAtA[i:], m.unknownFields)
 	}
-	if vtmsg, ok := m.Schema.(interface {
-		MarshalToVT([]byte) (int, error)
-		SizeVT() int
-	}); ok {
-		{
-			size := vtmsg.SizeVT()
-			i -= size
-			if _, err := vtmsg.MarshalToVT(dAtA[i:]); err != nil {
-				return 0, err
-			}
+	if m.Config != nil {
+		size, err := m.Config.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
 		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x2a
 	}
 	if len(m.BlockId) > 0 {
 		i -= len(m.BlockId)
 		copy(dAtA[i:], m.BlockId)
-		i = encodeVarint(dAtA, i, uint64(len(m.BlockId)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.BlockId)))
 		i--
 		dAtA[i] = 0x12
 	}
 	if len(m.TableName) > 0 {
 		i -= len(m.TableName)
 		copy(dAtA[i:], m.TableName)
-		i = encodeVarint(dAtA, i, uint64(len(m.TableName)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.TableName)))
 		i--
 		dAtA[i] = 0xa
 	}
 	return len(dAtA) - i, nil
 }
 
-func (m *Entry_NewTableBlock_DeprecatedSchema) MarshalToVT(dAtA []byte) (int, error) {
-	size := m.SizeVT()
-	return m.MarshalToSizedBufferVT(dAtA[:size])
-}
-
-func (m *Entry_NewTableBlock_DeprecatedSchema) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
-	i := len(dAtA)
-	if m.DeprecatedSchema != nil {
-		size, err := m.DeprecatedSchema.MarshalToSizedBufferVT(dAtA[:i])
-		if err != nil {
-			return 0, err
-		}
-		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
-		i--
-		dAtA[i] = 0x1a
-	}
-	return len(dAtA) - i, nil
-}
-func (m *Entry_NewTableBlock_SchemaV2) MarshalToVT(dAtA []byte) (int, error) {
-	size := m.SizeVT()
-	return m.MarshalToSizedBufferVT(dAtA[:size])
-}
-
-func (m *Entry_NewTableBlock_SchemaV2) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
-	i := len(dAtA)
-	if m.SchemaV2 != nil {
-		size, err := m.SchemaV2.MarshalToSizedBufferVT(dAtA[:i])
-		if err != nil {
-			return 0, err
-		}
-		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
-		i--
-		dAtA[i] = 0x22
-	}
-	return len(dAtA) - i, nil
-}
 func (m *Entry_TableBlockPersisted) MarshalVT() (dAtA []byte, err error) {
 	if m == nil {
 		return nil, nil
@@ -247,23 +206,66 @@ func (m *Entry_TableBlockPersisted) MarshalToSizedBufferVT(dAtA []byte) (int, er
 		i -= len(m.unknownFields)
 		copy(dAtA[i:], m.unknownFields)
 	}
+	if m.NextTx != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.NextTx))
+		i--
+		dAtA[i] = 0x18
+	}
 	if len(m.BlockId) > 0 {
 		i -= len(m.BlockId)
 		copy(dAtA[i:], m.BlockId)
-		i = encodeVarint(dAtA, i, uint64(len(m.BlockId)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.BlockId)))
 		i--
 		dAtA[i] = 0x12
 	}
 	if len(m.TableName) > 0 {
 		i -= len(m.TableName)
 		copy(dAtA[i:], m.TableName)
-		i = encodeVarint(dAtA, i, uint64(len(m.TableName)))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.TableName)))
 		i--
 		dAtA[i] = 0xa
 	}
 	return len(dAtA) - i, nil
 }
 
+func (m *Entry_Snapshot) MarshalVT() (dAtA []byte, err error) {
+	if m == nil {
+		return nil, nil
+	}
+	size := m.SizeVT()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBufferVT(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Entry_Snapshot) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Entry_Snapshot) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	if m == nil {
+		return 0, nil
+	}
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.unknownFields != nil {
+		i -= len(m.unknownFields)
+		copy(dAtA[i:], m.unknownFields)
+	}
+	if m.Tx != 0 {
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(m.Tx))
+		i--
+		dAtA[i] = 0x8
+	}
+	return len(dAtA) - i, nil
+}
+
 func (m *Entry) MarshalVT() (dAtA []byte, err error) {
 	if m == nil {
 		return nil, nil
@@ -295,16 +297,13 @@ func (m *Entry) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 		copy(dAtA[i:], m.unknownFields)
 	}
 	if vtmsg, ok := m.EntryType.(interface {
-		MarshalToVT([]byte) (int, error)
-		SizeVT() int
+		MarshalToSizedBufferVT([]byte) (int, error)
 	}); ok {
-		{
-			size := vtmsg.SizeVT()
-			i -= size
-			if _, err := vtmsg.MarshalToVT(dAtA[i:]); err != nil {
-				return 0, err
-			}
+		size, err := vtmsg.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
 		}
+		i -= size
 	}
 	return len(dAtA) - i, nil
 }
@@ -322,7 +321,7 @@ func (m *Entry_Write_) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0xa
 	}
@@ -341,7 +340,7 @@ func (m *Entry_NewTableBlock_) MarshalToSizedBufferVT(dAtA []byte) (int, error)
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0x12
 	}
@@ -360,22 +359,30 @@ func (m *Entry_TableBlockPersisted_) MarshalToSizedBufferVT(dAtA []byte) (int, e
 			return 0, err
 		}
 		i -= size
-		i = encodeVarint(dAtA, i, uint64(size))
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
 		i--
 		dAtA[i] = 0x1a
 	}
 	return len(dAtA) - i, nil
 }
-func encodeVarint(dAtA []byte, offset int, v uint64) int {
-	offset -= sov(v)
-	base := offset
-	for v >= 1<<7 {
-		dAtA[offset] = uint8(v&0x7f | 0x80)
-		v >>= 7
-		offset++
-	}
-	dAtA[offset] = uint8(v)
-	return base
+func (m *Entry_Snapshot_) MarshalToVT(dAtA []byte) (int, error) {
+	size := m.SizeVT()
+	return m.MarshalToSizedBufferVT(dAtA[:size])
+}
+
+func (m *Entry_Snapshot_) MarshalToSizedBufferVT(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	if m.Snapshot != nil {
+		size, err := m.Snapshot.MarshalToSizedBufferVT(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = protohelpers.EncodeVarint(dAtA, i, uint64(size))
+		i--
+		dAtA[i] = 0x22
+	}
+	return len(dAtA) - i, nil
 }
 func (m *Record) SizeVT() (n int) {
 	if m == nil {
@@ -385,11 +392,9 @@ func (m *Record) SizeVT() (n int) {
 	_ = l
 	if m.Entry != nil {
 		l = m.Entry.SizeVT()
-		n += 1 + l + sov(uint64(l))
-	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -401,18 +406,16 @@ func (m *Entry_Write) SizeVT() (n int) {
 	_ = l
 	l = len(m.TableName)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	l = len(m.Data)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	if m.Arrow {
 		n += 2
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
-	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -424,62 +427,51 @@ func (m *Entry_NewTableBlock) SizeVT() (n int) {
 	_ = l
 	l = len(m.TableName)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	l = len(m.BlockId)
 	if l > 0 {
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
-	if vtmsg, ok := m.Schema.(interface{ SizeVT() int }); ok {
-		n += vtmsg.SizeVT()
-	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
+	if m.Config != nil {
+		l = m.Config.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
+	n += len(m.unknownFields)
 	return n
 }
 
-func (m *Entry_NewTableBlock_DeprecatedSchema) SizeVT() (n int) {
+func (m *Entry_TableBlockPersisted) SizeVT() (n int) {
 	if m == nil {
 		return 0
 	}
 	var l int
 	_ = l
-	if m.DeprecatedSchema != nil {
-		l = m.DeprecatedSchema.SizeVT()
-		n += 1 + l + sov(uint64(l))
+	l = len(m.TableName)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
-	return n
-}
-func (m *Entry_NewTableBlock_SchemaV2) SizeVT() (n int) {
-	if m == nil {
-		return 0
+	l = len(m.BlockId)
+	if l > 0 {
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
-	var l int
-	_ = l
-	if m.SchemaV2 != nil {
-		l = m.SchemaV2.SizeVT()
-		n += 1 + l + sov(uint64(l))
+	if m.NextTx != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.NextTx))
 	}
+	n += len(m.unknownFields)
 	return n
 }
-func (m *Entry_TableBlockPersisted) SizeVT() (n int) {
+
+func (m *Entry_Snapshot) SizeVT() (n int) {
 	if m == nil {
 		return 0
 	}
 	var l int
 	_ = l
-	l = len(m.TableName)
-	if l > 0 {
-		n += 1 + l + sov(uint64(l))
-	}
-	l = len(m.BlockId)
-	if l > 0 {
-		n += 1 + l + sov(uint64(l))
-	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
+	if m.Tx != 0 {
+		n += 1 + protohelpers.SizeOfVarint(uint64(m.Tx))
 	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -492,9 +484,7 @@ func (m *Entry) SizeVT() (n int) {
 	if vtmsg, ok := m.EntryType.(interface{ SizeVT() int }); ok {
 		n += vtmsg.SizeVT()
 	}
-	if m.unknownFields != nil {
-		n += len(m.unknownFields)
-	}
+	n += len(m.unknownFields)
 	return n
 }
 
@@ -506,7 +496,7 @@ func (m *Entry_Write_) SizeVT() (n int) {
 	_ = l
 	if m.Write != nil {
 		l = m.Write.SizeVT()
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	return n
 }
@@ -518,7 +508,7 @@ func (m *Entry_NewTableBlock_) SizeVT() (n int) {
 	_ = l
 	if m.NewTableBlock != nil {
 		l = m.NewTableBlock.SizeVT()
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	return n
 }
@@ -530,16 +520,21 @@ func (m *Entry_TableBlockPersisted_) SizeVT() (n int) {
 	_ = l
 	if m.TableBlockPersisted != nil {
 		l = m.TableBlockPersisted.SizeVT()
-		n += 1 + l + sov(uint64(l))
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
 	}
 	return n
 }
-
-func sov(x uint64) (n int) {
-	return (bits.Len64(x|1) + 6) / 7
-}
-func soz(x uint64) (n int) {
-	return sov(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+func (m *Entry_Snapshot_) SizeVT() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Snapshot != nil {
+		l = m.Snapshot.SizeVT()
+		n += 1 + l + protohelpers.SizeOfVarint(uint64(l))
+	}
+	return n
 }
 func (m *Record) UnmarshalVT(dAtA []byte) error {
 	l := len(dAtA)
@@ -549,7 +544,7 @@ func (m *Record) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -577,7 +572,7 @@ func (m *Record) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -590,11 +585,11 @@ func (m *Record) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -608,12 +603,12 @@ func (m *Record) UnmarshalVT(dAtA []byte) error {
 			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -636,7 +631,7 @@ func (m *Entry_Write) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -664,7 +659,7 @@ func (m *Entry_Write) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -678,11 +673,11 @@ func (m *Entry_Write) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -696,7 +691,7 @@ func (m *Entry_Write) UnmarshalVT(dAtA []byte) error {
 			var byteLen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -709,11 +704,11 @@ func (m *Entry_Write) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if byteLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + byteLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -730,7 +725,7 @@ func (m *Entry_Write) UnmarshalVT(dAtA []byte) error {
 			var v int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -745,12 +740,12 @@ func (m *Entry_Write) UnmarshalVT(dAtA []byte) error {
 			m.Arrow = bool(v != 0)
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -773,7 +768,7 @@ func (m *Entry_NewTableBlock) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -801,7 +796,7 @@ func (m *Entry_NewTableBlock) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -815,11 +810,11 @@ func (m *Entry_NewTableBlock) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -833,7 +828,7 @@ func (m *Entry_NewTableBlock) UnmarshalVT(dAtA []byte) error {
 			var byteLen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -846,11 +841,11 @@ func (m *Entry_NewTableBlock) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if byteLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + byteLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -860,14 +855,14 @@ func (m *Entry_NewTableBlock) UnmarshalVT(dAtA []byte) error {
 				m.BlockId = []byte{}
 			}
 			iNdEx = postIndex
-		case 3:
+		case 5:
 			if wireType != 2 {
-				return fmt.Errorf("proto: wrong wireType = %d for field DeprecatedSchema", wireType)
+				return fmt.Errorf("proto: wrong wireType = %d for field Config", wireType)
 			}
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -880,76 +875,30 @@ func (m *Entry_NewTableBlock) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
 			}
-			if oneof, ok := m.Schema.(*Entry_NewTableBlock_DeprecatedSchema); ok {
-				if err := oneof.DeprecatedSchema.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
-					return err
-				}
-			} else {
-				v := &v1alpha1.Schema{}
-				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
-					return err
-				}
-				m.Schema = &Entry_NewTableBlock_DeprecatedSchema{v}
-			}
-			iNdEx = postIndex
-		case 4:
-			if wireType != 2 {
-				return fmt.Errorf("proto: wrong wireType = %d for field SchemaV2", wireType)
-			}
-			var msglen int
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return ErrIntOverflow
-				}
-				if iNdEx >= l {
-					return io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				msglen |= int(b&0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			if msglen < 0 {
-				return ErrInvalidLength
-			}
-			postIndex := iNdEx + msglen
-			if postIndex < 0 {
-				return ErrInvalidLength
-			}
-			if postIndex > l {
-				return io.ErrUnexpectedEOF
+			if m.Config == nil {
+				m.Config = &v1alpha1.TableConfig{}
 			}
-			if oneof, ok := m.Schema.(*Entry_NewTableBlock_SchemaV2); ok {
-				if err := oneof.SchemaV2.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
-					return err
-				}
-			} else {
-				v := &v1alpha2.Schema{}
-				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
-					return err
-				}
-				m.Schema = &Entry_NewTableBlock_SchemaV2{v}
+			if err := m.Config.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+				return err
 			}
 			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -972,7 +921,7 @@ func (m *Entry_TableBlockPersisted) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -1000,7 +949,7 @@ func (m *Entry_TableBlockPersisted) UnmarshalVT(dAtA []byte) error {
 			var stringLen uint64
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1014,11 +963,11 @@ func (m *Entry_TableBlockPersisted) UnmarshalVT(dAtA []byte) error {
 			}
 			intStringLen := int(stringLen)
 			if intStringLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + intStringLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -1032,7 +981,7 @@ func (m *Entry_TableBlockPersisted) UnmarshalVT(dAtA []byte) error {
 			var byteLen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1045,11 +994,11 @@ func (m *Entry_TableBlockPersisted) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if byteLen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + byteLen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -1059,14 +1008,103 @@ func (m *Entry_TableBlockPersisted) UnmarshalVT(dAtA []byte) error {
 				m.BlockId = []byte{}
 			}
 			iNdEx = postIndex
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field NextTx", wireType)
+			}
+			m.NextTx = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.NextTx |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.unknownFields = append(m.unknownFields, dAtA[iNdEx:iNdEx+skippy]...)
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Entry_Snapshot) UnmarshalVT(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return protohelpers.ErrIntOverflow
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Entry_Snapshot: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Entry_Snapshot: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Tx", wireType)
+			}
+			m.Tx = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Tx |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -1089,7 +1127,7 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 		var wire uint64
 		for shift := uint(0); ; shift += 7 {
 			if shift >= 64 {
-				return ErrIntOverflow
+				return protohelpers.ErrIntOverflow
 			}
 			if iNdEx >= l {
 				return io.ErrUnexpectedEOF
@@ -1117,7 +1155,7 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1130,11 +1168,11 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -1148,7 +1186,7 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
 					return err
 				}
-				m.EntryType = &Entry_Write_{v}
+				m.EntryType = &Entry_Write_{Write: v}
 			}
 			iNdEx = postIndex
 		case 2:
@@ -1158,7 +1196,7 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1171,11 +1209,11 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -1189,7 +1227,7 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
 					return err
 				}
-				m.EntryType = &Entry_NewTableBlock_{v}
+				m.EntryType = &Entry_NewTableBlock_{NewTableBlock: v}
 			}
 			iNdEx = postIndex
 		case 3:
@@ -1199,7 +1237,7 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 			var msglen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
-					return ErrIntOverflow
+					return protohelpers.ErrIntOverflow
 				}
 				if iNdEx >= l {
 					return io.ErrUnexpectedEOF
@@ -1212,11 +1250,11 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 				}
 			}
 			if msglen < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			postIndex := iNdEx + msglen
 			if postIndex < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
@@ -1230,17 +1268,58 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
 					return err
 				}
-				m.EntryType = &Entry_TableBlockPersisted_{v}
+				m.EntryType = &Entry_TableBlockPersisted_{TableBlockPersisted: v}
+			}
+			iNdEx = postIndex
+		case 4:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Snapshot", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return protohelpers.ErrIntOverflow
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return protohelpers.ErrInvalidLength
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if oneof, ok := m.EntryType.(*Entry_Snapshot_); ok {
+				if err := oneof.Snapshot.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+			} else {
+				v := &Entry_Snapshot{}
+				if err := v.UnmarshalVT(dAtA[iNdEx:postIndex]); err != nil {
+					return err
+				}
+				m.EntryType = &Entry_Snapshot_{Snapshot: v}
 			}
 			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
-			skippy, err := skip(dAtA[iNdEx:])
+			skippy, err := protohelpers.Skip(dAtA[iNdEx:])
 			if err != nil {
 				return err
 			}
 			if (skippy < 0) || (iNdEx+skippy) < 0 {
-				return ErrInvalidLength
+				return protohelpers.ErrInvalidLength
 			}
 			if (iNdEx + skippy) > l {
 				return io.ErrUnexpectedEOF
@@ -1255,87 +1334,3 @@ func (m *Entry) UnmarshalVT(dAtA []byte) error {
 	}
 	return nil
 }
-func skip(dAtA []byte) (n int, err error) {
-	l := len(dAtA)
-	iNdEx := 0
-	depth := 0
-	for iNdEx < l {
-		var wire uint64
-		for shift := uint(0); ; shift += 7 {
-			if shift >= 64 {
-				return 0, ErrIntOverflow
-			}
-			if iNdEx >= l {
-				return 0, io.ErrUnexpectedEOF
-			}
-			b := dAtA[iNdEx]
-			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
-			if b < 0x80 {
-				break
-			}
-		}
-		wireType := int(wire & 0x7)
-		switch wireType {
-		case 0:
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return 0, ErrIntOverflow
-				}
-				if iNdEx >= l {
-					return 0, io.ErrUnexpectedEOF
-				}
-				iNdEx++
-				if dAtA[iNdEx-1] < 0x80 {
-					break
-				}
-			}
-		case 1:
-			iNdEx += 8
-		case 2:
-			var length int
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return 0, ErrIntOverflow
-				}
-				if iNdEx >= l {
-					return 0, io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				length |= (int(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			if length < 0 {
-				return 0, ErrInvalidLength
-			}
-			iNdEx += length
-		case 3:
-			depth++
-		case 4:
-			if depth == 0 {
-				return 0, ErrUnexpectedEndOfGroup
-			}
-			depth--
-		case 5:
-			iNdEx += 4
-		default:
-			return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
-		}
-		if iNdEx < 0 {
-			return 0, ErrInvalidLength
-		}
-		if depth == 0 {
-			return iNdEx, nil
-		}
-	}
-	return 0, io.ErrUnexpectedEOF
-}
-
-var (
-	ErrInvalidLength        = fmt.Errorf("proto: negative length found during unmarshaling")
-	ErrIntOverflow          = fmt.Errorf("proto: integer overflow")
-	ErrUnexpectedEndOfGroup = fmt.Errorf("proto: unexpected end of group")
-)
diff --git a/go.mod b/go.mod
index e11df3e83..94ccb7bcd 100644
--- a/go.mod
+++ b/go.mod
@@ -1,79 +1,94 @@
 module github.com/polarsignals/frostdb
 
-go 1.19
+go 1.24.1
 
 require (
-	github.com/RoaringBitmap/roaring v0.9.4
-	github.com/apache/arrow/go/v10 v10.0.1
+	github.com/RoaringBitmap/roaring v1.9.4
+	github.com/apache/arrow-go/v18 v18.2.0
+	github.com/cespare/xxhash/v2 v2.3.0
 	github.com/cockroachdb/datadriven v1.0.2
-	github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140
-	github.com/dustin/go-humanize v1.0.0
+	github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33
+	github.com/dustin/go-humanize v1.0.1
 	github.com/go-kit/log v0.2.1
-	github.com/google/btree v1.0.1
-	github.com/google/uuid v1.3.0
+	github.com/google/uuid v1.6.0
 	github.com/oklog/ulid v1.3.1
-	github.com/olekukonko/tablewriter v0.0.5
-	github.com/pingcap/tidb/parser v0.0.0-20220921115303-5aab87679fde
-	github.com/prometheus/client_golang v1.12.2
-	github.com/segmentio/parquet-go v0.0.0-20230209224803-1d85e8136681
-	github.com/stretchr/testify v1.8.0
-	github.com/thanos-io/objstore v0.0.0-20220715165016-ce338803bc1e
-	github.com/tidwall/wal v1.1.7
-	go.opentelemetry.io/otel v0.20.0
-	go.opentelemetry.io/otel/trace v0.20.0
-	go.uber.org/goleak v1.1.12
-	golang.org/x/exp v0.0.0-20220827204233-334a2380cb91
-	golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde
-	google.golang.org/protobuf v1.28.1
+	github.com/oklog/ulid/v2 v2.1.0
+	github.com/parquet-go/parquet-go v0.24.0
+	github.com/pingcap/tidb/parser v0.0.0-20231013125129-93a834a6bf8d
+	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10
+	github.com/polarsignals/iceberg-go v0.0.0-20240502213135-2ee70b71e76b
+	github.com/polarsignals/wal v0.0.0-20240619104840-9da940027f9c
+	github.com/prometheus/client_golang v1.20.5
+	github.com/spf13/cobra v1.8.0
+	github.com/stretchr/testify v1.10.0
+	github.com/tetratelabs/wazero v1.7.3
+	github.com/thanos-io/objstore v0.0.0-20240818203309-0363dadfdfb1
+	go.opentelemetry.io/otel v1.34.0
+	go.opentelemetry.io/otel/trace v1.34.0
+	go.uber.org/goleak v1.3.0
+	golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c
+	golang.org/x/sync v0.11.0
+	google.golang.org/grpc v1.71.0
+	google.golang.org/protobuf v1.36.5
 )
 
 require (
-	github.com/andybalholm/brotli v1.0.4 // indirect
-	github.com/apache/thrift v0.16.0 // indirect
+	github.com/andybalholm/brotli v1.1.1 // indirect
+	github.com/benbjohnson/clock v1.3.5 // indirect
+	github.com/benbjohnson/immutable v0.4.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
-	github.com/bits-and-blooms/bitset v1.2.0 // indirect
-	github.com/cespare/xxhash/v2 v2.1.2 // indirect
+	github.com/bits-and-blooms/bitset v1.12.0 // indirect
+	github.com/coreos/etcd v3.3.27+incompatible // indirect
+	github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect
+	github.com/coreos/pkg v0.0.0-20220810130054-c7d1c02cb6cf // indirect
 	github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
-	github.com/efficientgo/tools/core v0.0.0-20220225185207-fe763185946b // indirect
-	github.com/go-logfmt/logfmt v0.5.1 // indirect
-	github.com/goccy/go-json v0.9.11 // indirect
-	github.com/golang/protobuf v1.5.2 // indirect
+	github.com/efficientgo/core v1.0.0-rc.2 // indirect
+	github.com/go-logfmt/logfmt v0.6.0 // indirect
+	github.com/goccy/go-json v0.10.5 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
-	github.com/google/flatbuffers v2.0.8+incompatible // indirect
-	github.com/klauspost/asmfmt v1.3.2 // indirect
-	github.com/klauspost/compress v1.15.9 // indirect
-	github.com/klauspost/cpuid/v2 v2.0.9 // indirect
-	github.com/mattn/go-runewidth v0.0.9 // indirect
-	github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
-	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
-	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
+	github.com/google/flatbuffers v25.2.10+incompatible // indirect
+	github.com/google/go-cmp v0.6.0 // indirect
+	github.com/hamba/avro/v2 v2.28.0 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/compress v1.18.0 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.10 // indirect
+	github.com/mattn/go-runewidth v0.0.16 // indirect
+	github.com/mitchellh/mapstructure v1.5.0 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/mschoch/smat v0.2.0 // indirect
-	github.com/opentracing/opentracing-go v1.2.0 // indirect
-	github.com/pierrec/lz4/v4 v4.1.15 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/olekukonko/tablewriter v0.0.5 // indirect
+	github.com/pierrec/lz4/v4 v4.1.22 // indirect
 	github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 // indirect
-	github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7 // indirect
+	github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c // indirect
+	github.com/pingcap/log v1.1.0 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/prometheus/client_model v0.2.0 // indirect
-	github.com/prometheus/common v0.37.0 // indirect
-	github.com/prometheus/procfs v0.7.3 // indirect
-	github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 // indirect
-	github.com/segmentio/encoding v0.3.5 // indirect
-	github.com/tidwall/gjson v1.10.2 // indirect
-	github.com/tidwall/match v1.1.1 // indirect
-	github.com/tidwall/pretty v1.2.0 // indirect
-	github.com/tidwall/tinylru v1.1.0 // indirect
+	github.com/prometheus/client_model v0.6.1 // indirect
+	github.com/prometheus/common v0.55.0 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
+	github.com/rivo/uniseg v0.4.7 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/stoewer/go-strcase v1.3.0 // indirect
 	github.com/zeebo/xxh3 v1.0.2 // indirect
-	go.uber.org/atomic v1.9.0 // indirect
-	go.uber.org/multierr v1.6.0 // indirect
-	go.uber.org/zap v1.18.1 // indirect
-	golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
-	golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 // indirect
-	golang.org/x/text v0.3.7 // indirect
-	golang.org/x/tools v0.1.12 // indirect
-	golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
-	gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
-	gopkg.in/yaml.v2 v2.4.0 // indirect
+	go.etcd.io/bbolt v1.3.6 // indirect
+	go.uber.org/atomic v1.11.0 // indirect
+	go.uber.org/multierr v1.11.0 // indirect
+	go.uber.org/zap v1.25.0 // indirect
+	golang.org/x/mod v0.23.0 // indirect
+	golang.org/x/net v0.35.0 // indirect
+	golang.org/x/sys v0.31.0 // indirect
+	golang.org/x/text v0.22.0 // indirect
+	golang.org/x/tools v0.30.0 // indirect
+	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect
+	gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
+
+// Avro has a regression in parsing map[string]any in v2.20.0 and higher. Issue to track this regression: https://github.com/hamba/avro/issues/386
+replace github.com/hamba/avro/v2 => github.com/hamba/avro/v2 v2.19.0
diff --git a/go.sum b/go.sum
index b6967f8ed..26d439dde 100644
--- a/go.sum
+++ b/go.sum
@@ -1,630 +1,262 @@
-cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
-cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
-cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
-cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
-cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
-cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
-cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
-cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
-cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
-cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
-cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
-cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
-cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
-cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc=
-cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=
-cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
-cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
-cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
-cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
-cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
-cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
-cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
-cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
-cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
-cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
-cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
-cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
-cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
-cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
-cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
-cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
-cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
-dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
-github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
-github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
-github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU=
-github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo=
-github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA=
-github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
-github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
-github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
-github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
-github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
-github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
-github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
-github.com/apache/arrow/go/v10 v10.0.1 h1:n9dERvixoC/1JjDmBcs9FPaEryoANa2sCgVFo6ez9cI=
-github.com/apache/arrow/go/v10 v10.0.1/go.mod h1:YvhnlEePVnBS4+0z3fhPfUy7W1Ikj0Ih0vcRo/gZ1M0=
-github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY=
-github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU=
-github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
+github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ=
+github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
+github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
+github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
+github.com/apache/arrow-go/v18 v18.2.0 h1:QhWqpgZMKfWOniGPhbUxrHohWnooGURqL2R2Gg4SO1Q=
+github.com/apache/arrow-go/v18 v18.2.0/go.mod h1:Ic/01WSwGJWRrdAZcxjBZ5hbApNJ28K96jGYaxzzGUc=
+github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE=
+github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw=
 github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
-github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
-github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
+github.com/benbjohnson/clock v1.3.5 h1:VvXlSJBzZpA/zum6Sj74hxwYI2DIxRWuNIoXAzHZz5o=
+github.com/benbjohnson/clock v1.3.5/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
+github.com/benbjohnson/immutable v0.4.0 h1:CTqXbEerYso8YzVPxmWxh2gnoRQbbB9X1quUC8+vGZA=
+github.com/benbjohnson/immutable v0.4.0/go.mod h1:iAr8OjJGLnLmVUr9MZ/rz4PWUy6Ouc2JLYuMArmvAJM=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
-github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
-github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
-github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
-github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
-github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
-github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
-github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
-github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
+github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA=
+github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cockroachdb/datadriven v1.0.2 h1:H9MtNqVoVhvd9nCBwOyDjUEdZCREqbIdCJD93PBm/jA=
 github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU=
+github.com/coreos/etcd v3.3.27+incompatible h1:QIudLb9KeBsE5zyYxd1mjzRSkzLg9Wf9QlRwFgd6oTA=
+github.com/coreos/etcd v3.3.27+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
+github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
+github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
+github.com/coreos/pkg v0.0.0-20220810130054-c7d1c02cb6cf h1:GOPo6vn/vTN+3IwZBvXX0y5doJfSC7My0cdzelyOCsQ=
+github.com/coreos/pkg v0.0.0-20220810130054-c7d1c02cb6cf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
+github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso=
 github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 h1:y7y0Oa6UawqTFPCDw9JG6pdKt4F9pAhHv0B7FMGaGD0=
-github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
-github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
-github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
-github.com/efficientgo/tools/core v0.0.0-20220225185207-fe763185946b h1:ZHiD4/yE4idlbqvAO6iYCOYRzOMRpxkW+FKasRA3tsQ=
-github.com/efficientgo/tools/core v0.0.0-20220225185207-fe763185946b/go.mod h1:OmVcnJopJL8d3X3sSXTiypGoUSgFq1aDGmlrdi9dn/M=
-github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
-github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
-github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
-github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
-github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
-github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
-github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
-github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
-github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
-github.com/go-kit/log v0.2.0/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
+github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 h1:ucRHb6/lvW/+mTEIGbvhcYU3S8+uSNkuMjx/qZFfhtM=
+github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/efficientgo/core v1.0.0-rc.2 h1:7j62qHLnrZqO3V3UA0AqOGd5d5aXV3AX6m/NZBHp78I=
+github.com/efficientgo/core v1.0.0-rc.2/go.mod h1:FfGdkzWarkuzOlY04VY+bGfb1lWrjaL6x/GLcQ4vJps=
 github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
 github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
-github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
-github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
-github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
-github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA=
-github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
-github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
-github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
-github.com/goccy/go-json v0.9.11 h1:/pAaQDLHEoCq/5FFmSKBswWmK6H0e8g4159Kc/X/nqk=
-github.com/goccy/go-json v0.9.11/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
-github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
-github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
-github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
-github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
-github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
-github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
-github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
-github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
-github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
-github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8=
-github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
-github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
-github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
-github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
-github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
-github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
-github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
-github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
-github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
-github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
-github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
+github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
+github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
+github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
+github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
-github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
-github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4=
-github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA=
-github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM=
-github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
-github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
-github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
+github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q=
+github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
-github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
-github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
-github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
-github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
-github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
-github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
-github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
-github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
-github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
-github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
+github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
+github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hamba/avro/v2 v2.19.0 h1:jITwvb03UMLfTFHFKdvaMyU/G96iVWS5EiMsqo3flfE=
+github.com/hamba/avro/v2 v2.19.0/go.mod h1:72DkWmMmAyZA+qHoI89u4RMCQ3X54vpEb1ap80iCIBg=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
-github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
-github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
-github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
-github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
-github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
+github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
-github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
-github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
-github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
-github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
-github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4=
 github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE=
-github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
-github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
-github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
-github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
-github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
-github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
+github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE=
+github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
-github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
-github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
-github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
-github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
-github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE=
+github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
+github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
-github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
 github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
-github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
-github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
 github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
+github.com/oklog/ulid/v2 v2.1.0 h1:+9lhoxAP56we25tyYETBBY1YLA2SaoLvUFgrP2miPJU=
+github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ=
 github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
 github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
-github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
-github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
-github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0=
-github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/parquet-go/parquet-go v0.24.0 h1:VrsifmLPDnas8zpoHmYiWDZ1YHzLmc7NmNwPGkI2JM4=
+github.com/parquet-go/parquet-go v0.24.0/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw=
+github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o=
+github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU=
+github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
+github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
 github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 h1:+FZIDR/D97YOPik4N4lPDaUcLDF/EQPogxtlHB2ZZRM=
 github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg=
-github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7 h1:k2BbABz9+TNpYRwsCCFS8pEEnFVOdbgEjL/kTlLuzZQ=
-github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM=
-github.com/pingcap/tidb/parser v0.0.0-20220921115303-5aab87679fde h1:8IiT59cBZ6KFchSuOUjHUhQWUpQ2lay6uKJLqsanfWs=
-github.com/pingcap/tidb/parser v0.0.0-20220921115303-5aab87679fde/go.mod h1:wjvp+T3/T9XYt0nKqGX3Kc1AKuyUcfno6LTc6b2A6ew=
-github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c h1:CgbKAHto5CQgWM9fSBIvaxsJHuGP0uM74HXtv3MyyGQ=
+github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew=
+github.com/pingcap/log v1.1.0 h1:ELiPxACz7vdo1qAvvaWJg1NrYFoY6gqAh/+Uo6aXdD8=
+github.com/pingcap/log v1.1.0/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
+github.com/pingcap/tidb/parser v0.0.0-20231013125129-93a834a6bf8d h1:EHXDxa7eq8vWc2T8cwstlr3A48dx4TvMsCh5Y7z2VZ8=
+github.com/pingcap/tidb/parser v0.0.0-20231013125129-93a834a6bf8d/go.mod h1:cwq4bKUlftpWuznB+rqNwbN0xy6/i5SL/nYvEKeJn4s=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
-github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
-github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
-github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
-github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY=
-github.com/prometheus/client_golang v1.12.2 h1:51L9cDoUHVrXx4zWYlcLQIZ+d+VXHgqnYKkIuq4g/34=
-github.com/prometheus/client_golang v1.12.2/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY=
-github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
-github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M=
-github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
-github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
-github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
-github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
-github.com/prometheus/common v0.37.0 h1:ccBbHCgIiT9uSoFY0vX8H3zsNR5eLt17/RQLUvn8pXE=
-github.com/prometheus/common v0.37.0/go.mod h1:phzohg0JFMnBEFGxTDbfu3QyL5GI8gTQJFhYO5B3mfA=
-github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
-github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
-github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
-github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
-github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU=
-github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
-github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk=
-github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
-github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
-github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
-github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg=
-github.com/segmentio/encoding v0.3.5 h1:UZEiaZ55nlXGDL92scoVuw00RmiRCazIEmvPSbSvt8Y=
-github.com/segmentio/encoding v0.3.5/go.mod h1:n0JeuIqEQrQoPDGsjo8UNd1iA0U8d8+oHAA4E3G3OxM=
-github.com/segmentio/parquet-go v0.0.0-20230209224803-1d85e8136681 h1:wjC8jWN4Kt/Per2HczpJzs5xSS0SYBaFa6O9Bc4SkQ8=
-github.com/segmentio/parquet-go v0.0.0-20230209224803-1d85e8136681/go.mod h1:SclLlCfB7c7CH0YerV+OtYmZExyK5rhVOd6UT90erVw=
-github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
-github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
-github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
+github.com/polarsignals/iceberg-go v0.0.0-20240502213135-2ee70b71e76b h1:Dbm5itapR0uYIMujR8OntWpDJ/nm5OM6JiaKauLcZ4Y=
+github.com/polarsignals/iceberg-go v0.0.0-20240502213135-2ee70b71e76b/go.mod h1:5T9ChEZjRNhAGGLwH1cqzDA7wXB84SmU+WkXQr/ZAjo=
+github.com/polarsignals/wal v0.0.0-20240619104840-9da940027f9c h1:ReFgEXqZ9/y+/9ZdNHOa1L62wqt8mWqoqrWutWj2x+A=
+github.com/polarsignals/wal v0.0.0-20240619104840-9da940027f9c/go.mod h1:EVDHAAe+7GQ33A1/x+/gE+sBPN4toQ0XG5RoLD49xr8=
+github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
+github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
+github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
+github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
+github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
+github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
+github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/thanos-io/objstore v0.0.0-20220715165016-ce338803bc1e h1:IhC7gP1u/uA+yf9RYwhRVBq+2+HV1xRGcrY/C6WBaPY=
-github.com/thanos-io/objstore v0.0.0-20220715165016-ce338803bc1e/go.mod h1:Fp62HaCG8R+5ak2g6+foU/Jag9JhtmpftVpubyS3S5s=
-github.com/tidwall/gjson v1.10.2 h1:APbLGOM0rrEkd8WBw9C24nllro4ajFuJu0Sc9hRz8Bo=
-github.com/tidwall/gjson v1.10.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
-github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
-github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
-github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
-github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
-github.com/tidwall/tinylru v1.1.0 h1:XY6IUfzVTU9rpwdhKUF6nQdChgCdGjkMfLzbWyiau6I=
-github.com/tidwall/tinylru v1.1.0/go.mod h1:3+bX+TJ2baOLMWTnlyNWHh4QMnFyARg2TLTQ6OFbzw8=
-github.com/tidwall/wal v1.1.7 h1:emc1TRjIVsdKKSnpwGBAcsAGg0767SvUk8+ygx7Bb+4=
-github.com/tidwall/wal v1.1.7/go.mod h1:r6lR1j27W9EPalgHiB7zLJDYu3mzW5BQP5KrzBpYY/E=
-github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/tetratelabs/wazero v1.7.3 h1:PBH5KVahrt3S2AHgEjKu4u+LlDbbk+nsGE3KLucy6Rw=
+github.com/tetratelabs/wazero v1.7.3/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y=
+github.com/thanos-io/objstore v0.0.0-20240818203309-0363dadfdfb1 h1:z0v9BB/p7s4J6R//+0a5M3wCld8KzNjrGRLIwXfrAZk=
+github.com/thanos-io/objstore v0.0.0-20240818203309-0363dadfdfb1/go.mod h1:3ukSkG4rIRUGkKM4oIz+BSuUx2e3RlQVVv3Cc3W+Tv4=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
 github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
+github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
 github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
 github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
-go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
-go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
-go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
-go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
-go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
-go.opentelemetry.io/otel v0.20.0 h1:eaP0Fqu7SXHwvjiqDq83zImeehOHX8doTvU9AwXON8g=
-go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo=
-go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU=
-go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa9FqQ2IQ8LoxiGnw=
-go.opentelemetry.io/otel/trace v0.20.0 h1:1DL6EXUdcg95gukhuRRvLDO/4X5THh/5dIV52lqtnbw=
-go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw=
-go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
+go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU=
+go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4=
+go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
+go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
+go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
+go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
+go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
+go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
+go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
+go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
+go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
+go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
+go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
 go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
-go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
 go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
+go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
+go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
 go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
-go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA=
-go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
-go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
-go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
-go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
-go.uber.org/zap v1.18.1 h1:CSUJ2mjFszzEWt4CdKISEuChVIXGBn3lAPwkRGyVrc4=
-go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI=
-golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
+go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak=
+go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
+go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
+go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI=
+go.uber.org/zap v1.25.0 h1:4Hvk6GtkucQ790dqmj7l1eEnRdKm3k3ZUrUMS2d5+5c=
+go.uber.org/zap v1.25.0/go.mod h1:JIAUzQIH94IC4fOJQm7gMmBJP5k7wQfdcnYdPoEXJYk=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
-golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
-golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
-golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
-golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
-golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
-golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
-golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
-golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 h1:tnebWN09GYg9OLPss1KXj8txwZc6X6uMr6VFdcGNbHw=
-golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
-golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
-golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
-golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
-golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
-golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
-golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc=
+golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
-golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
-golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k=
-golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
-golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
-golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
-golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
-golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
-golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
-golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
-golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM=
+golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
-golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
-golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
-golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
-golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc=
-golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
+golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde h1:ejfdSekXMDxDLbRrJMwUk6KnSLZ2McaUCVcIKM+N6jc=
-golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
+golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 h1:v6hYoSR9T5oet+pMXwUWkbiVqx/63mlHjefrHmxwfeY=
-golang.org/x/sys v0.0.0-20220829200755-d48e67d00261/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
+golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
-golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
-golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
+golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
-golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
-golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
-golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
-golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
-golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
-golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
-golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
-golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
-golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
-golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
-golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
-golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
+golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f h1:uF6paiQQebLeSXkrTqHqz0MXhXXS1KgF41eUdBNvxK0=
-golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
-gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E=
-google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
-google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
-google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
-google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
-google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
-google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
-google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
-google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
-google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
-google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
-google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
-google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=
-google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
-google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
-google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
-google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
-google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
-google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
-google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
-google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
-google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
-google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
-google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
-google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
-google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
-google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
-google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
-google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
-google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
-google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
-google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
-google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
-google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
-google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
-google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
-google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
-google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
-google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w=
-google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
-gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
+golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
+golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
+gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
+gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f h1:OxYkA3wjPsZyBylwymxSHa7ViiW1Sml4ToBrncvFehI=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:+2Yz8+CLJbIfL9z73EW45avw8Lmge3xVElCP9zEKi50=
+google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg=
+google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
+google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
+google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
-gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
-gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
 gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
-gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
+gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
-gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
-honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
-honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
-honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
-honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
-honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
-honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
-rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
-rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
-rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
diff --git a/granule.go b/granule.go
deleted file mode 100644
index c8bc0d016..000000000
--- a/granule.go
+++ /dev/null
@@ -1,171 +0,0 @@
-package frostdb
-
-import (
-	"context"
-	"fmt"
-	"sync/atomic"
-
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/google/btree"
-
-	"github.com/polarsignals/frostdb/dynparquet"
-	"github.com/polarsignals/frostdb/parts"
-)
-
-type Granule struct {
-	metadata GranuleMetadata
-
-	parts       *parts.List
-	tableConfig *TableConfig
-
-	// newGranules are the granules that were created after a split
-	newGranules []*Granule
-}
-
-// GranuleMetadata is the metadata for a granule.
-type GranuleMetadata struct {
-	// least is the row that exists within the Granule that is the least.
-	// This is used for quick insertion into the btree, without requiring an
-	// iterator.
-	least *dynparquet.DynamicRow
-
-	// size is the raw commited, and uncommited size of the granule. It is used as a suggestion for potential compaction
-	size atomic.Uint64
-
-	// pruned indicates if this Granule is longer found in the index
-	pruned atomic.Uint64
-}
-
-func NewGranule(tableConfig *TableConfig, prts ...*parts.Part) (*Granule, error) {
-	g := &Granule{
-		parts:       parts.NewList(&atomic.Pointer[parts.Node]{}, parts.None),
-		tableConfig: tableConfig,
-		metadata:    GranuleMetadata{},
-	}
-
-	for _, p := range prts {
-		if err := g.addPart(p); err != nil {
-			return nil, err
-		}
-	}
-
-	return g, nil
-}
-
-func (g *Granule) addPart(p *parts.Part) error {
-	if p.NumRows() == 0 {
-		return nil
-	}
-
-	r, err := p.Least()
-	if err != nil {
-		return err
-	}
-
-	_ = g.parts.Prepend(p)
-	g.metadata.size.Add(uint64(p.Size()))
-
-	if g.metadata.least == nil || g.tableConfig.schema.RowLessThan(r, g.metadata.least) {
-		g.metadata.least = r
-	}
-
-	return nil
-}
-
-// Append adds a part into the Granule. It returns the new size of the Granule.
-func (g *Granule) Append(p *parts.Part) (uint64, error) {
-	node := g.parts.Prepend(p)
-	newSize := g.metadata.size.Add(uint64(p.Size()))
-
-	// If the prepend returned that we're adding to the compacted list; then we
-	// need to propagate the Part to the new granules.
-	if node.Compacted() {
-		err := addPartToGranule(g.newGranules, p)
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	return newSize, nil
-}
-
-// PartsForTx returns the parts for the given transaction constraints.
-func (g *Granule) PartsForTx(watermark uint64, iterator func(*parts.Part) bool) {
-	g.parts.Iterate(func(p *parts.Part) bool {
-		// Don't iterate over parts from an uncompleted transaction
-		if p.TX() > watermark {
-			return true
-		}
-
-		return iterator(p)
-	})
-}
-
-// Less implements the btree.Item interface.
-func (g *Granule) Less(than btree.Item) bool {
-	var otherRow *dynparquet.DynamicRow
-	switch v := than.(type) {
-	case *Granule:
-		otherRow = v.Least()
-	case btreeComparableDynamicRow:
-		otherRow = v.DynamicRow
-	default:
-		panic(fmt.Sprintf("cannot compare against %T", v))
-	}
-	return g.tableConfig.schema.RowLessThan(g.Least(), otherRow)
-}
-
-// Least returns the least row in a Granule.
-func (g *Granule) Least() *dynparquet.DynamicRow {
-	return g.metadata.least
-}
-
-// Collect will filter row groups or arrow records into the collector. Arrow records passed to the collector must be Released().
-func (g *Granule) Collect(ctx context.Context, tx uint64, filter TrueNegativeFilter, collector chan<- any) {
-	records := []arrow.Record{}
-	g.PartsForTx(tx, func(p *parts.Part) bool {
-		if r := p.Record(); r != nil {
-			r.Retain()
-			records = append(records, r)
-			return true
-		}
-
-		var buf *dynparquet.SerializedBuffer
-		var err error
-		buf, err = p.AsSerializedBuffer(g.tableConfig.schema)
-		if err != nil {
-			return false
-		}
-		f := buf.ParquetFile()
-		for i := range f.RowGroups() {
-			rg := buf.DynamicRowGroup(i)
-			var mayContainUsefulData bool
-			mayContainUsefulData, err = filter.Eval(rg)
-			if err != nil {
-				return false
-			}
-			if mayContainUsefulData {
-				select {
-				case <-ctx.Done():
-					return false
-				case collector <- rg:
-				}
-			}
-		}
-
-		return true
-	})
-
-	if len(g.newGranules) != 0 && len(records) != 0 { // This granule was pruned while we were retaining Records; it's not safe to use them anymore
-		for _, r := range records {
-			r.Release()
-		}
-		for _, newGran := range g.newGranules {
-			newGran.Collect(ctx, tx, filter, collector)
-		}
-	} else {
-		for _, r := range records {
-			collector <- r
-		}
-	}
-}
diff --git a/index/levels.go b/index/levels.go
new file mode 100644
index 000000000..99aaab62e
--- /dev/null
+++ b/index/levels.go
@@ -0,0 +1,333 @@
+package index
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strconv"
+	"sync"
+
+	"github.com/go-kit/log"
+	"github.com/go-kit/log/level"
+	"github.com/parquet-go/parquet-go"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/parts"
+)
+
+const (
+	IndexFileExtension     = ".idx"
+	ParquetCompactionTXKey = "compaction_tx"
+	dirPerms               = os.FileMode(0o755)
+	filePerms              = os.FileMode(0o640)
+)
+
+type Compaction func(w io.Writer, compact []parts.Part, options ...parquet.WriterOption) (int64, error)
+
+type FileCompaction struct {
+	// settings
+	dir     string
+	compact Compaction
+	maxSize int64
+
+	// internal data
+	indexFiles []*os.File
+	offset     int64          // Writing offsets into the file
+	parts      sync.WaitGroup // Wait group for parts that are currently reference in this level.
+
+	// Options
+	logger log.Logger
+}
+
+func NewFileCompaction(dir string, maxSize int64, compact Compaction, logger log.Logger) (*FileCompaction, error) {
+	f := &FileCompaction{
+		dir:     dir,
+		compact: compact,
+		maxSize: maxSize,
+		logger:  logger,
+	}
+
+	if err := os.MkdirAll(dir, dirPerms); err != nil {
+		return nil, err
+	}
+
+	return f, nil
+}
+
+func (f *FileCompaction) MaxSize() int64 { return f.maxSize }
+
+// Snapshot takes a snapshot of the current level. It ignores the parts and just hard links the files into the snapshot directory.
+// It will rotate the active file if it has data in it rendering all snapshotted files as immutable.
+func (f *FileCompaction) Snapshot(_ []parts.Part, _ func(parts.Part) error, dir string) error {
+	if err := os.MkdirAll(dir, dirPerms); err != nil {
+		return err
+	}
+
+	for i, file := range f.indexFiles {
+		if i == len(f.indexFiles)-1 {
+			// Sync the last file if it has data in it.
+			if f.offset > 0 {
+				if err := f.Sync(); err != nil {
+					return err
+				}
+			} else {
+				return nil // Skip empty file.
+			}
+		}
+
+		// Hard link the file into the snapshot directory.
+		if err := os.Link(file.Name(), filepath.Join(dir, filepath.Base(file.Name()))); err != nil {
+			return err
+		}
+	}
+
+	// Rotate the active file if it has data in it.
+	_, err := f.createIndexFile(len(f.indexFiles))
+	return err
+}
+
+func (f *FileCompaction) createIndexFile(id int) (*os.File, error) {
+	file, err := os.OpenFile(filepath.Join(f.dir, fmt.Sprintf("%020d%s", id, IndexFileExtension)), os.O_CREATE|os.O_RDWR, filePerms)
+	if err != nil {
+		return nil, err
+	}
+
+	f.offset = 0
+	f.indexFiles = append(f.indexFiles, file)
+	return file, nil
+}
+
+func (f *FileCompaction) openIndexFile(path string) (*os.File, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+
+	f.indexFiles = append(f.indexFiles, file)
+	return file, nil
+}
+
+// file returns the currently active index file.
+func (f *FileCompaction) file() *os.File {
+	return f.indexFiles[len(f.indexFiles)-1]
+}
+
+// accountingWriter is a writer that accounts for the number of bytes written.
+type accountingWriter struct {
+	w io.Writer
+	n int64
+}
+
+func (a *accountingWriter) Write(p []byte) (int, error) {
+	n, err := a.w.Write(p)
+	a.n += int64(n)
+	return n, err
+}
+
+// Compact will compact the given parts into a Parquet file written to the next level file.
+func (f *FileCompaction) Compact(compact []parts.Part, options ...parts.Option) ([]parts.Part, int64, int64, error) {
+	if len(compact) == 0 {
+		return nil, 0, 0, fmt.Errorf("no parts to compact")
+	}
+
+	accountant := &accountingWriter{w: f.file()}
+	preCompactionSize, err := f.compact(accountant, compact,
+		parquet.KeyValueMetadata(
+			ParquetCompactionTXKey, // Compacting up through this transaction.
+			fmt.Sprintf("%v", compact[0].TX()),
+		),
+	) // compact into the next level
+	if err != nil {
+		return nil, 0, 0, err
+	}
+
+	// Record the writing offset into the file.
+	prevOffset := f.offset
+
+	// Record the file size for recovery.
+	size := make([]byte, 8)
+	binary.LittleEndian.PutUint64(size, uint64(accountant.n))
+	if n, err := f.file().Write(size); n != 8 {
+		return nil, 0, 0, fmt.Errorf("failed to write size to file: %v", err)
+	}
+	f.offset += accountant.n + 8
+
+	// Sync file after writing.
+	if err := f.Sync(); err != nil {
+		return nil, 0, 0, fmt.Errorf("failed to sync file: %v", err)
+	}
+
+	pf, err := parquet.OpenFile(io.NewSectionReader(f.file(), prevOffset, accountant.n), accountant.n)
+	if err != nil {
+		return nil, 0, 0, fmt.Errorf("failed to open file after compaction: %w", err)
+	}
+
+	buf, err := dynparquet.NewSerializedBuffer(pf)
+	if err != nil {
+		return nil, 0, 0, err
+	}
+
+	f.parts.Add(1)
+	return []parts.Part{parts.NewParquetPart(compact[0].TX(), buf, append(options, parts.WithRelease(f.parts.Done))...)}, preCompactionSize, accountant.n, nil
+}
+
+// Reset is called when the level no longer has active parts in it at the end of a compaction.
+func (f *FileCompaction) Reset() {
+	f.parts.Wait() // Wait for all parts to be released.
+	for _, file := range f.indexFiles {
+		if err := file.Close(); err != nil {
+			level.Error(f.logger).Log("msg", "failed to close level file", "err", err)
+		}
+	}
+
+	// Delete all the files in the directory level. And open a new file.
+	if err := os.RemoveAll(f.dir); err != nil {
+		level.Error(f.logger).Log("msg", "failed to remove level directory", "err", err)
+	}
+
+	if err := os.MkdirAll(f.dir, dirPerms); err != nil {
+		level.Error(f.logger).Log("msg", "failed to create level directory", "err", err)
+	}
+
+	f.indexFiles = nil
+	_, err := f.createIndexFile(len(f.indexFiles))
+	if err != nil {
+		level.Error(f.logger).Log("msg", "failed to create new level file", "err", err)
+	}
+}
+
+// recovery the level from the given directory.
+func (f *FileCompaction) recover(options ...parts.Option) ([]parts.Part, error) {
+	defer func() {
+		_, err := f.createIndexFile(len(f.indexFiles))
+		if err != nil {
+			level.Error(f.logger).Log("msg", "failed to create new level file", "err", err)
+		}
+	}()
+	recovered := []parts.Part{}
+	err := filepath.WalkDir(f.dir, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		if filepath.Ext(path) != IndexFileExtension {
+			return nil
+		}
+
+		info, err := d.Info()
+		if err != nil {
+			return fmt.Errorf("failed to get file info: %v", err)
+		}
+
+		if info.Size() == 0 { // file empty, nothing to recover.
+			return nil
+		}
+
+		file, err := f.openIndexFile(path)
+		if err != nil {
+			return fmt.Errorf("failed to open file: %v", err)
+		}
+
+		// Recover all parts from file.
+		fileParts := []parts.Part{}
+		if err := func() error {
+			for offset := info.Size(); offset > 0; {
+				offset -= 8
+				size := make([]byte, 8)
+				if n, err := file.ReadAt(size, offset); n != 8 {
+					return fmt.Errorf("failed to read size from file: %v", err)
+				}
+				parquetSize := int64(binary.LittleEndian.Uint64(size))
+				offset -= parquetSize
+
+				pf, err := parquet.OpenFile(io.NewSectionReader(file, offset, parquetSize), parquetSize)
+				if err != nil {
+					return err
+				}
+
+				buf, err := dynparquet.NewSerializedBuffer(pf)
+				if err != nil {
+					return err
+				}
+
+				var tx int
+				txstr, ok := buf.ParquetFile().Lookup(ParquetCompactionTXKey)
+				if !ok {
+					level.Warn(f.logger).Log("msg", "failed to find compaction_tx metadata", "file", file.Name())
+					tx = 0 // Downgrade the compaction tx so that all future reads will be able to read this part.
+				} else {
+					tx, err = strconv.Atoi(txstr)
+					if err != nil {
+						level.Warn(f.logger).Log("msg", "failed to parse compaction_tx metadata", "file", file.Name(), "err", err)
+						tx = 0 // Downgrade the compaction tx so that all future reads will be able to read this part.
+					}
+				}
+
+				f.parts.Add(1)
+				fileParts = append(fileParts, parts.NewParquetPart(uint64(tx), buf, append(options, parts.WithRelease(f.parts.Done))...))
+			}
+
+			return nil
+		}(); err != nil {
+			for _, part := range fileParts {
+				part.Release()
+			}
+
+			// If we failed to recover the file, remove it.
+			if err := f.file().Close(); err != nil {
+				level.Error(f.logger).Log("msg", "failed to close level file after failed recovery", "err", err)
+			}
+			f.indexFiles = f.indexFiles[:len(f.indexFiles)-1] // Remove the file from the list of files.
+			return err
+		}
+
+		recovered = append(recovered, fileParts...)
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return recovered, nil
+}
+
+// Sync calls Sync on the underlying file.
+func (f *FileCompaction) Sync() error { return f.file().Sync() }
+
+type inMemoryLevel struct {
+	compact Compaction
+	maxSize int64
+}
+
+func (l *inMemoryLevel) MaxSize() int64 { return l.maxSize }
+func (l *inMemoryLevel) Snapshot(snapshot []parts.Part, writer func(parts.Part) error, _ string) error {
+	for _, part := range snapshot {
+		if err := writer(part); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+func (l *inMemoryLevel) Reset() {}
+func (l *inMemoryLevel) Compact(toCompact []parts.Part, options ...parts.Option) ([]parts.Part, int64, int64, error) {
+	if len(toCompact) == 0 {
+		return nil, 0, 0, fmt.Errorf("no parts to compact")
+	}
+
+	var b bytes.Buffer
+	preCompactionSize, err := l.compact(&b, toCompact)
+	if err != nil {
+		return nil, 0, 0, err
+	}
+
+	buf, err := dynparquet.ReaderFromBytes(b.Bytes())
+	if err != nil {
+		return nil, 0, 0, err
+	}
+
+	postCompactionSize := int64(b.Len())
+	return []parts.Part{parts.NewParquetPart(toCompact[0].TX(), buf, options...)}, preCompactionSize, postCompactionSize, nil
+}
diff --git a/index/lsm.go b/index/lsm.go
new file mode 100644
index 000000000..c603c0c31
--- /dev/null
+++ b/index/lsm.go
@@ -0,0 +1,669 @@
+package index
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/util"
+	"github.com/go-kit/log"
+	"github.com/go-kit/log/level"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/parts"
+	"github.com/polarsignals/frostdb/query/expr"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+type CompactionType int
+
+const (
+	CompactionTypeUnknown CompactionType = iota
+
+	// CompactionTypeParquetDisk is a compaction type that will compact the parts into a Parquet file on disk.
+	CompactionTypeParquetDisk
+
+	// CompactionTypeParquetMemory is a compaction type that will compact the parts into a Parquet file in memory.
+	CompactionTypeParquetMemory
+)
+
+// LSM is a log-structured merge-tree like index. It is implemented as a single linked list of parts.
+//
+// Arrow records are always added to the L0 list. When a list reaches it's configured max size it is compacted
+// calling the levels Compact function and is then added as a new part to the next level.
+//
+// [L0]->[record]->[record]->[L1]->[record/parquet]->[record/parquet] etc.
+type LSM struct {
+	sync.RWMutex
+	compacting   sync.Mutex
+	compactionWg sync.WaitGroup
+
+	schema *dynparquet.Schema
+
+	dir           string
+	maxTXRecoverd []uint64
+	levels        []Level
+	partList      *Node
+	sizes         []atomic.Int64
+
+	// Options
+	logger    log.Logger
+	metrics   *LSMMetrics
+	watermark func() uint64
+}
+
+// LSMMetrics are the metrics for an LSM index.
+type LSMMetrics struct {
+	Compactions        *prometheus.CounterVec
+	LevelSize          *prometheus.GaugeVec
+	CompactionDuration prometheus.Observer
+}
+
+// LevelConfig is the configuration for a level in the LSM.
+// The MaxSize is the maximum size of the level in bytes before it triggers a compaction into the next level.
+type LevelConfig struct {
+	Level   SentinelType
+	MaxSize int64
+	Type    CompactionType
+	Compact Compaction
+}
+
+type Level interface {
+	Compact(parts []parts.Part, options ...parts.Option) ([]parts.Part, int64, int64, error)
+	Snapshot(parts []parts.Part, writer func(parts.Part) error, dir string) error
+	MaxSize() int64
+	Reset()
+}
+
+type LSMOption func(*LSM)
+
+func LSMWithLogger(logger log.Logger) LSMOption {
+	return func(l *LSM) {
+		l.logger = logger
+	}
+}
+
+func LSMWithMetrics(metrics *LSMMetrics) LSMOption {
+	return func(l *LSM) {
+		l.metrics = metrics
+	}
+}
+
+func NewLSMMetrics(reg prometheus.Registerer) *LSMMetrics {
+	return &LSMMetrics{
+		Compactions: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Name: "frostdb_lsm_compactions_total",
+			Help: "The total number of compactions that have occurred.",
+		}, []string{"level"}),
+
+		LevelSize: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
+			Name: "frostdb_lsm_level_size_bytes",
+			Help: "The size of the level in bytes.",
+		}, []string{"level"}),
+
+		CompactionDuration: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
+			Name:                        "frostdb_lsm_compaction_total_duration_seconds",
+			Help:                        "Total compaction duration",
+			NativeHistogramBucketFactor: 1.1,
+		}),
+	}
+}
+
+// NewLSM returns an LSM-like index of len(levels) levels.
+// wait is a function that will block until the given transaction has been committed; this is used only during compaction to ensure
+// that all the tx in the level up to the compaction tx have been committed before compacting.
+func NewLSM(dir string, schema *dynparquet.Schema, levels []*LevelConfig, watermark func() uint64, options ...LSMOption) (*LSM, error) {
+	if err := validateLevels(levels); err != nil {
+		return nil, err
+	}
+
+	lsm := &LSM{
+		schema:        schema,
+		dir:           dir,
+		maxTXRecoverd: make([]uint64, len(levels)),
+		partList:      NewList(L0),
+		sizes:         make([]atomic.Int64, len(levels)),
+		compacting:    sync.Mutex{},
+		logger:        log.NewNopLogger(),
+		watermark:     watermark,
+	}
+
+	for _, opt := range options {
+		opt(lsm)
+	}
+
+	// Configure the LSM levels.
+	settings, recovered, err := configureLSMLevels(dir, levels, lsm.logger)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		for _, part := range recovered {
+			part.Release()
+		}
+	}()
+	lsm.levels = settings
+
+	// Reverse iterate (due to prepend) to create the chain of sentinel nodes.
+	for i := len(levels) - 1; i > 0; i-- {
+		lsm.partList.Sentinel(levels[i].Level)
+	}
+
+	if lsm.metrics == nil {
+		lsm.metrics = NewLSMMetrics(prometheus.NewRegistry())
+	} else {
+		for _, lvl := range levels {
+			lsm.metrics.LevelSize.WithLabelValues(lvl.Level.String()).Set(0)
+		}
+	}
+
+	// Replay the recovered parts
+	for _, part := range recovered {
+		lsm.InsertPart(part)
+	}
+
+	return lsm, nil
+}
+
+func (l *LSM) Close() error {
+	l.compacting.Lock()
+	defer l.compacting.Unlock()
+	l.Lock()
+	defer l.Unlock()
+
+	// Release all the parts to free up any underlying resources.
+	l.partList.Iterate(func(node *Node) bool {
+		if node.part != nil {
+			node.part.Release()
+		}
+		return true
+	})
+
+	// Reset the levels to ensure that none of the parts are still being referenced.
+	for i := range l.levels {
+		if l.levels[i] != nil {
+			l.levels[i].Reset()
+		}
+	}
+
+	// Remove the index directory
+	if err := os.RemoveAll(l.dir); err != nil {
+		return fmt.Errorf("remove lsm dir: %w", err)
+	}
+
+	return nil
+}
+
+// configureLSMLevels will configure the LSM levels. It will recover the levels from disk and return the recovered parts.
+func configureLSMLevels(dir string, levels []*LevelConfig, logger log.Logger) ([]Level, []parts.Part, error) {
+	settings := make([]Level, len(levels))
+	recovered := []parts.Part{}
+
+	// Recover in reverse order so that the highest level is recovered first.
+	// This allows us to throw away parts that were compacted into a higher level but for some reason weren't successfully removed.
+	for i := len(levels) - 1; i >= 0; i-- {
+		lvl := levels[i]
+		switch lvl.Type {
+		case CompactionTypeParquetMemory:
+			settings[i] = &inMemoryLevel{
+				maxSize: lvl.MaxSize,
+				compact: lvl.Compact,
+			}
+		case CompactionTypeParquetDisk:
+			fileCompaction, err := NewFileCompaction(filepath.Join(dir, fmt.Sprintf("L%v", i+1)), lvl.MaxSize, lvl.Compact, logger) // TODO: it would be nice to not need to inject the compact function here.
+			if err != nil {
+				return nil, nil, err
+			}
+			parts, err := fileCompaction.recover(parts.WithCompactionLevel(i + 1))
+			if err != nil {
+				return nil, nil, fmt.Errorf("failed to recover level %v parts: %w", i+1, err)
+			}
+			recovered = append(recovered, parts...)
+			settings[i] = fileCompaction
+		default:
+			if i != len(levels)-1 { // Compaction type should not be set for last level
+				panic(fmt.Sprintf("unknown compaction type: %v", lvl.Type))
+			}
+		}
+	}
+
+	return settings, recovered, nil
+}
+
+// Size returns the total size of the index in bytes.
+func (l *LSM) Size() int64 {
+	var size int64
+	for i := range l.sizes {
+		size += l.sizes[i].Load()
+	}
+	return size
+}
+
+// LevelSize returns the size of a specific level in bytes.
+func (l *LSM) LevelSize(t SentinelType) int64 {
+	return l.sizes[t].Load()
+}
+
+// Snapshot creates a snapshot of the index at the given transaction. It will call the writer function with the parts in the index that are in-memory.
+func (l *LSM) Snapshot(tx uint64, writer func(parts.Part) error, dir string) error {
+	l.compacting.Lock()
+	defer l.compacting.Unlock()
+
+	var (
+		snapshotList []parts.Part
+		iterError    error
+	)
+	var snapshotLevel SentinelType
+	l.partList.Iterate(func(node *Node) bool {
+		if node.part == nil {
+			if node.sentinel == L0 { // First node in the list will be L0
+				snapshotLevel = L0
+				return true
+			}
+
+			switch snapshotLevel {
+			case L0: // L0 is always in-memory
+				for _, part := range snapshotList {
+					if err := writer(part); err != nil {
+						iterError = err
+						return false
+					}
+				}
+			default:
+				lvl := l.levels[snapshotLevel-1]
+				lvldir := filepath.Join(dir, fmt.Sprintf("%v", snapshotLevel))
+				if err := lvl.Snapshot(snapshotList, writer, lvldir); err != nil {
+					iterError = err
+					return false
+				}
+			}
+
+			snapshotLevel = node.sentinel
+			snapshotList = nil
+			return true
+		}
+
+		if node.part.TX() <= tx {
+			snapshotList = append(snapshotList, node.part)
+		}
+		return true
+	})
+	if iterError != nil {
+		return iterError
+	}
+
+	lvl := l.levels[snapshotLevel-1]
+	lvldir := filepath.Join(dir, fmt.Sprintf("%v", snapshotLevel))
+	return lvl.Snapshot(snapshotList, writer, lvldir)
+}
+
+func validateLevels(levels []*LevelConfig) error {
+	for i, l := range levels {
+		if int(l.Level) != i {
+			return fmt.Errorf("level %d is not in order", l.Level)
+		}
+
+		switch i {
+		case len(levels) - 1:
+			if l.Compact != nil {
+				return fmt.Errorf("level %d is the last level and should not have a compact function", l.Level)
+			}
+		default:
+			if l.Compact == nil {
+				return fmt.Errorf("level %d is not the last level and should have a compact function", l.Level)
+			}
+		}
+	}
+
+	return nil
+}
+
+func (l *LSM) MaxLevel() SentinelType {
+	return SentinelType(len(l.levels) - 1)
+}
+
+func (l *LSM) Add(tx uint64, record arrow.Record) {
+	record.Retain()
+	size := util.TotalRecordSize(record)
+	l.partList.Insert(parts.NewArrowPart(tx, record, uint64(size), l.schema, parts.WithCompactionLevel(int(L0))))
+	l0 := l.sizes[L0].Add(int64(size))
+	l.metrics.LevelSize.WithLabelValues(L0.String()).Set(float64(l0))
+	if l0 >= l.levels[L0].MaxSize() {
+		if l.compacting.TryLock() {
+			l.compactionWg.Add(1)
+			go func() {
+				defer l.compacting.Unlock()
+				defer l.compactionWg.Done()
+				_ = l.compact(false)
+			}()
+		}
+	}
+}
+
+func (l *LSM) WaitForPendingCompactions() {
+	l.compactionWg.Wait()
+}
+
+// InsertPart inserts a part into the LSM tree. It will be inserted into the correct level. It does not check if the insert should cause a compaction.
+// This should only be used during snapshot recovery. It will drop the insert on the floor if the part is older than a part in the next level of the LSM. This indicates
+// that this part is already accounted for in the next level vis compaction.
+func (l *LSM) InsertPart(part parts.Part) {
+	level := SentinelType(part.CompactionLevel())
+	// Check the next levels if there is one to see if this part should be inserted.
+	if level != l.MaxLevel() {
+		for i := level + 1; i < l.MaxLevel()+1; i++ {
+			if part.TX() <= l.maxTXRecoverd[i] {
+				return
+			}
+		}
+	}
+
+	// Retain the part
+	part.Retain()
+
+	if tx := part.TX(); tx > l.maxTXRecoverd[level] {
+		l.maxTXRecoverd[level] = tx
+	}
+
+	// Insert the part into the correct level, but do not do this if parts with newer TXs have already been inserted.
+	l.findLevel(level).Insert(part)
+	size := l.sizes[level].Add(int64(part.Size()))
+	l.metrics.LevelSize.WithLabelValues(level.String()).Set(float64(size))
+}
+
+func (l *LSM) String() string {
+	s := ""
+	for i := range l.sizes {
+		s += fmt.Sprintf("L%v: %d ", i, l.sizes[i].Load())
+	}
+	s += "\n"
+	s += l.partList.String()
+	return s
+}
+
+func (l *LSM) Prefixes(_ context.Context, _ string) ([]string, error) {
+	return []string{}, nil
+}
+
+func (l *LSM) Iterate(iter func(node *Node) bool) {
+	l.RLock()
+	defer l.RUnlock()
+	l.partList.Iterate(iter)
+}
+
+func (l *LSM) Scan(ctx context.Context, _ string, _ *dynparquet.Schema, filter logicalplan.Expr, tx uint64, callback func(context.Context, any) error) error {
+	l.RLock()
+	defer l.RUnlock()
+
+	booleanFilter, err := expr.BooleanExpr(filter)
+	if err != nil {
+		return fmt.Errorf("boolean expr: %w", err)
+	}
+
+	var iterError error
+	l.partList.Iterate(func(node *Node) bool {
+		if node.part == nil { // encountered a sentinel node; continue on
+			return true
+		}
+
+		if node.part.TX() > tx { // skip parts that are newer than this transaction
+			return true
+		}
+
+		if r := node.part.Record(); r != nil {
+			r.Retain()
+			if err := callback(ctx, r); err != nil {
+				iterError = err
+				return false
+			}
+			return true
+		}
+
+		buf, err := node.part.AsSerializedBuffer(nil)
+		if err != nil {
+			iterError = err
+			return false
+		}
+
+		for i := 0; i < buf.NumRowGroups(); i++ {
+			rg := buf.DynamicRowGroup(i)
+			mayContainUsefulData, err := booleanFilter.Eval(rg, false)
+			if err != nil {
+				iterError = err
+				return false
+			}
+
+			if mayContainUsefulData {
+				node.part.Retain() // Create another reference to this part
+				if err := callback(ctx, &releaseableRowGroup{DynamicRowGroup: rg, release: node.part.Release}); err != nil {
+					iterError = err
+					return false
+				}
+			}
+		}
+		return true
+	})
+	return iterError
+}
+
+type releaseableRowGroup struct {
+	dynparquet.DynamicRowGroup
+	release func()
+}
+
+func (r *releaseableRowGroup) Release() {
+	r.release()
+}
+
+type ReleaseableRowGroup interface {
+	dynparquet.DynamicRowGroup
+	Release()
+}
+
+// TODO: this should be changed to just retain the sentinel nodes in the lsm struct to do an O(1) lookup.
+func (l *LSM) findLevel(level SentinelType) *Node {
+	var list *Node
+	l.partList.Iterate(func(node *Node) bool {
+		if node.part == nil && node.sentinel == level {
+			list = node
+			return false
+		}
+		return true
+	})
+
+	return list
+}
+
+// findNode returns the node that points to node.
+func (l *LSM) findNode(node *Node) *Node {
+	var list *Node
+	l.partList.Iterate(func(n *Node) bool {
+		if n.next.Load() == node {
+			list = n
+			return false
+		}
+		return true
+	})
+
+	return list
+}
+
+// EnsureCompaction forces a compaction of all levels, regardless of whether the
+// levels are below the target size.
+func (l *LSM) EnsureCompaction() error {
+	l.compacting.Lock()
+	defer l.compacting.Unlock()
+	return l.compact(true /* ignoreSizes */)
+}
+
+// Rotate will write all parts in the LSM into the external writer. No changes are made to the LSM.
+func (l *LSM) Rotate(externalWriter func([]parts.Part) (parts.Part, int64, int64, error)) error {
+	l.compacting.Lock()
+	defer l.compacting.Unlock()
+	start := time.Now()
+	defer func() {
+		l.metrics.CompactionDuration.Observe(time.Since(start).Seconds())
+	}()
+
+	// Write all the parts to the external writer
+	compact := []parts.Part{}
+	l.partList.Iterate(func(node *Node) bool {
+		if node.part == nil {
+			return true
+		}
+
+		compact = append(compact, node.part)
+		return true
+	})
+
+	_, _, _, err := externalWriter(compact)
+	return err
+}
+
+// Merge will merge the given level into an arrow record for the next level using the configured Compact function for the given level.
+// If this is the max level of the LSM an external writer must be provided to write the merged part elsewhere.
+func (l *LSM) merge(level SentinelType) error {
+	if int(level) > len(l.levels) {
+		return fmt.Errorf("level %d does not exist", level)
+	}
+	if int(level) == len(l.levels)-1 {
+		return fmt.Errorf("cannot merge the last level")
+	}
+	l.metrics.Compactions.WithLabelValues(level.String()).Inc()
+
+	compact := l.findLevel(level)
+
+	// Find a transaction that is <= the current watermark.
+	// This ensures a contiguous sorted list of transactions.
+	if level == L0 {
+		compact = compact.next.Load()
+		if compact == nil || compact.part == nil {
+			return nil // nothing to compact
+		}
+
+		// Find the first part that is <= the watermark and reset the compact list to that part.
+		wm := l.watermark()
+		compact.Iterate(func(node *Node) bool {
+			if node.part == nil && node.sentinel != L0 {
+				return false
+			}
+			if node.part.TX() <= wm {
+				compact = node
+				return false
+			}
+			return true
+		})
+	}
+
+	nodeList := []*Node{}
+	var next *Node
+	var iterErr error
+	compact.Iterate(func(node *Node) bool {
+		if node.part == nil { // sentinel encountered
+			switch node.sentinel {
+			case level: // the sentinel for the beginning of the list
+				return true
+			case level + 1:
+				next = node.next.Load() // skip the sentinel to combine the lists
+			default:
+				next = node
+			}
+			return false
+		}
+
+		nodeList = append(nodeList, node)
+		return true
+	})
+	if iterErr != nil {
+		return iterErr
+	}
+
+	if len(nodeList) == 0 {
+		return nil
+	}
+
+	var size int64
+	var compactedSize int64
+	var compacted []parts.Part
+	var err error
+	mergeList := make([]parts.Part, 0, len(nodeList))
+	for _, node := range nodeList {
+		mergeList = append(mergeList, node.part)
+	}
+	s := &Node{
+		sentinel: level + 1,
+	}
+	compacted, size, compactedSize, err = l.levels[level].Compact(mergeList, parts.WithCompactionLevel(int(level)+1))
+	if err != nil {
+		return err
+	}
+
+	// Create new list for the compacted parts.
+	compactedList := &Node{
+		part: compacted[0],
+	}
+	node := compactedList
+	for _, p := range compacted[1:] {
+		node.next.Store(&Node{
+			part: p,
+		})
+		node = node.next.Load()
+	}
+	s.next.Store(compactedList)
+	if next != nil {
+		node.next.Store(next)
+	}
+	l.sizes[level+1].Add(int64(compactedSize))
+	l.metrics.LevelSize.WithLabelValues(SentinelType(level + 1).String()).Set(float64(l.sizes[level+1].Load()))
+
+	// Replace the compacted list with the new list
+	// find the node that points to the first node in our compacted list.
+	node = l.findNode(nodeList[0])
+	for !node.next.CompareAndSwap(nodeList[0], s) {
+		// This can happen at most once in the scenario where a new part is added to the L0 list while we are trying to replace it.
+		node = l.findNode(nodeList[0])
+	}
+	l.sizes[level].Add(-int64(size))
+	l.metrics.LevelSize.WithLabelValues(level.String()).Set(float64(l.sizes[level].Load()))
+
+	// release the old parts
+	l.Lock()
+	for _, part := range mergeList {
+		part.Release()
+	}
+	l.Unlock()
+
+	// Reset the level that was just compacted
+	if level != L0 {
+		l.levels[level-1].Reset()
+	}
+
+	return nil
+}
+
+// compact is a cascading compaction routine. It will start at the lowest level and compact until the next level is either the max level or the next level does not exceed the max size.
+// compact can not be run concurrently.
+func (l *LSM) compact(ignoreSizes bool) error {
+	start := time.Now()
+	defer func() {
+		l.metrics.CompactionDuration.Observe(time.Since(start).Seconds())
+	}()
+
+	for i := 0; i < len(l.levels)-1; i++ {
+		if ignoreSizes || l.sizes[i].Load() >= l.levels[i].MaxSize() {
+			if err := l.merge(SentinelType(i)); err != nil {
+				level.Error(l.logger).Log("msg", "failed to merge level", "level", i, "err", err)
+				return err
+			}
+		}
+	}
+
+	return nil
+}
diff --git a/index/lsm_list.go b/index/lsm_list.go
new file mode 100644
index 000000000..1c68bcbd2
--- /dev/null
+++ b/index/lsm_list.go
@@ -0,0 +1,131 @@
+package index
+
+import (
+	"fmt"
+	"runtime"
+	"sync/atomic"
+
+	"github.com/polarsignals/frostdb/parts"
+)
+
+type SentinelType int
+
+const (
+	L0 SentinelType = iota
+	L1
+	L2
+)
+
+func (s SentinelType) String() string {
+	return fmt.Sprintf("L%v", int(s))
+}
+
+// Node is a Part that is a part of a linked-list.
+type Node struct {
+	next atomic.Pointer[Node]
+	part parts.Part
+
+	sentinel SentinelType // sentinel nodes contain no parts, and are to indicate the start of a new sub list
+}
+
+func (n *Node) Part() parts.Part {
+	return n.part
+}
+
+func (n *Node) String() string {
+	if n.part == nil {
+		if n.next.Load() == nil {
+			return fmt.Sprintf("[%v]", n.sentinel)
+		}
+		return fmt.Sprintf("[%v]->%v", n.sentinel, n.next.Load().String())
+	}
+
+	if n.part.Record() != nil {
+		if n.next.Load() == nil {
+			return fmt.Sprintf("[%v]", n.part.Record().NumRows())
+		}
+		return fmt.Sprintf("[%v]->%v", n.part.Record().NumRows(), n.next.Load().String())
+	}
+
+	b, _ := n.part.AsSerializedBuffer(nil)
+	if n.next.Load() == nil {
+		return fmt.Sprintf("[%v]", b.NumRows())
+	}
+	return fmt.Sprintf("[%v]->%v", b.NumRows(), n.next.Load().String())
+}
+
+// NewList creates a new part list using atomic constructs.
+func NewList(sentinel SentinelType) *Node {
+	p := &Node{
+		sentinel: sentinel,
+	}
+	return p
+}
+
+// Sentinel adds a new sentinel node to the list, and returns the sub list starting from that sentinel.
+func (n *Node) Sentinel(s SentinelType) *Node {
+	return n.prepend(&Node{
+		sentinel: s,
+	})
+}
+
+// Prepend a node onto the front of the list.
+func (n *Node) Prepend(part parts.Part) *Node {
+	return n.prepend(&Node{
+		part: part,
+	})
+}
+
+// Insert a Node into the list, in order by Tx.
+func (n *Node) Insert(part parts.Part) {
+	node := &Node{
+		part: part,
+	}
+	tx := node.part.TX()
+	tryInsert := func() bool {
+		prev := n
+		next := n.next.Load()
+		for {
+			if next == nil {
+				return prev.next.CompareAndSwap(next, node)
+			}
+			if next.part == nil || next.part.TX() < tx {
+				node.next.Store(next)
+				return prev.next.CompareAndSwap(next, node)
+			}
+			prev = next
+			next = next.next.Load()
+		}
+	}
+	for !tryInsert() {
+		runtime.Gosched()
+	}
+}
+
+func (n *Node) prepend(node *Node) *Node {
+	for { // continue until a successful compare and swap occurs
+		next := n.next.Load()
+		node.next.Store(next)
+		if n.next.CompareAndSwap(next, node) {
+			return node
+		}
+	}
+}
+
+// Iterate accesses every node in the list.
+func (n *Node) Iterate(iterate func(*Node) bool) {
+	if !iterate(n) {
+		return
+	}
+
+	node := n.next.Load()
+	for {
+		if node == nil {
+			return
+		}
+		if !iterate(node) {
+			return
+		}
+		node = node.next.Load()
+	}
+}
diff --git a/index/lsm_test.go b/index/lsm_test.go
new file mode 100644
index 000000000..cad6c50a9
--- /dev/null
+++ b/index/lsm_test.go
@@ -0,0 +1,254 @@
+package index
+
+import (
+	"context"
+	"errors"
+	"io"
+	"math"
+	"math/rand"
+	"slices"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/parquet-go/parquet-go"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/parts"
+)
+
+func compactParts(w io.Writer, compact []parts.Part, _ ...parquet.WriterOption) (int64, error) {
+	schema := dynparquet.NewSampleSchema()
+	bufs := []dynparquet.DynamicRowGroup{}
+	var size int64
+	for _, part := range compact {
+		size += part.Size()
+		buf, err := part.AsSerializedBuffer(schema)
+		if err != nil {
+			return 0, err
+		}
+		bufs = append(bufs, buf.MultiDynamicRowGroup())
+	}
+	merged, err := schema.MergeDynamicRowGroups(bufs)
+	if err != nil {
+		return 0, err
+	}
+	err = func() error {
+		writer, err := schema.GetWriter(w, merged.DynamicColumns(), false)
+		if err != nil {
+			return err
+		}
+		defer writer.Close()
+
+		rows := merged.Rows()
+		defer rows.Close()
+
+		buf := make([]parquet.Row, merged.NumRows())
+		if _, err := rows.ReadRows(buf); err != nil && !errors.Is(err, io.EOF) {
+			return err
+		}
+		if _, err := writer.WriteRows(buf); err != nil && !errors.Is(err, io.EOF) {
+			return err
+		}
+
+		return nil
+	}()
+	if err != nil {
+		return 0, err
+	}
+
+	return size, nil
+}
+
+func check(t *testing.T, lsm *LSM, records, buffers int) {
+	t.Helper()
+	seen := map[SentinelType]bool{}
+	lsm.partList.Iterate(func(node *Node) bool {
+		if node.part == nil {
+			if seen[node.sentinel] {
+				t.Fatal("duplicate sentinel")
+			}
+			seen[node.sentinel] = true
+		}
+		return true
+	})
+	rec := 0
+	buf := 0
+	require.NoError(t, lsm.Scan(context.Background(), "", nil, nil, math.MaxUint64, func(_ context.Context, v any) error {
+		switch v.(type) {
+		case arrow.Record:
+			rec++
+		case dynparquet.DynamicRowGroup:
+			buf++
+		}
+		return nil
+	}))
+	require.Equal(t, records, rec)
+	require.Equal(t, buf, buffers)
+}
+
+func Test_LSM_Basic(t *testing.T) {
+	t.Parallel()
+	lsm, err := NewLSM("test", nil, []*LevelConfig{
+		{Level: L0, MaxSize: 1024 * 1024 * 1024, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L1, MaxSize: 1024 * 1024 * 1024, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L2, MaxSize: 1024 * 1024 * 1024},
+	},
+		func() uint64 { return math.MaxUint64 },
+	)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+
+	lsm.Add(1, r)
+	lsm.Add(2, r)
+	lsm.Add(3, r)
+	check(t, lsm, 3, 0)
+	require.NoError(t, lsm.merge(L0))
+	check(t, lsm, 0, 1)
+	lsm.Add(4, r)
+	check(t, lsm, 1, 1)
+	lsm.Add(5, r)
+	check(t, lsm, 2, 1)
+	require.NoError(t, lsm.merge(L0))
+	check(t, lsm, 0, 2)
+	lsm.Add(6, r)
+	check(t, lsm, 1, 2)
+	require.NoError(t, lsm.merge(L1))
+	check(t, lsm, 1, 1)
+	require.NoError(t, lsm.merge(L0))
+	check(t, lsm, 0, 2)
+}
+
+func Test_LSM_DuplicateSentinel(t *testing.T) {
+	t.Parallel()
+	lsm, err := NewLSM("test", nil, []*LevelConfig{
+		{Level: L0, MaxSize: 1024 * 1024 * 1024, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L1, MaxSize: 1024 * 1024 * 1024, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L2, MaxSize: 1024 * 1024 * 1024},
+	},
+		func() uint64 { return math.MaxUint64 },
+	)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+
+	lsm.Add(1, r)
+	lsm.Add(2, r)
+	lsm.Add(3, r)
+	check(t, lsm, 3, 0)
+	require.NoError(t, lsm.merge(L0))
+	check(t, lsm, 0, 1)
+	require.NoError(t, lsm.merge(L0))
+	check(t, lsm, 0, 1)
+}
+
+func Test_LSM_Compaction(t *testing.T) {
+	t.Parallel()
+	lsm, err := NewLSM("test", nil, []*LevelConfig{
+		{Level: L0, MaxSize: 1, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L1, MaxSize: 1024 * 1024 * 1024},
+	},
+		func() uint64 { return math.MaxUint64 },
+	)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+
+	lsm.Add(1, r)
+	require.Eventually(t, func() bool {
+		return lsm.sizes[L0].Load() == 0 && lsm.sizes[L1].Load() != 0
+	}, 30*time.Second, 10*time.Millisecond)
+}
+
+func Test_LSM_CascadeCompaction(t *testing.T) {
+	t.Parallel()
+	lsm, err := NewLSM("test", nil, []*LevelConfig{
+		{Level: L0, MaxSize: 257, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L1, MaxSize: 2281, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L2, MaxSize: 2281, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: 3, MaxSize: 2281, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: 4, MaxSize: 2281},
+	},
+		func() uint64 { return math.MaxUint64 },
+	)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+
+	lsm.Add(1, r)
+	require.Eventually(t, func() bool {
+		return lsm.sizes[L0].Load() == 0 &&
+			lsm.sizes[L1].Load() != 0 &&
+			lsm.sizes[L2].Load() == 0 &&
+			lsm.sizes[3].Load() == 0 &&
+			lsm.sizes[4].Load() == 0
+	}, 3*time.Second, 10*time.Millisecond)
+	lsm.Add(2, r)
+	require.Eventually(t, func() bool {
+		return lsm.sizes[L0].Load() == 0 &&
+			lsm.sizes[L1].Load() == 0 &&
+			lsm.sizes[L2].Load() == 0 &&
+			lsm.sizes[3].Load() == 0 &&
+			lsm.sizes[4].Load() != 0
+	}, 30*time.Second, 10*time.Millisecond)
+}
+
+func Test_LSM_InOrderInsert(t *testing.T) {
+	t.Parallel()
+	lsm, err := NewLSM("test", nil, []*LevelConfig{
+		{Level: L0, MaxSize: 1024 * 1024 * 1024, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L1, MaxSize: 1024 * 1024 * 1024, Type: CompactionTypeParquetMemory, Compact: compactParts},
+		{Level: L2, MaxSize: 1024 * 1024 * 1024},
+	},
+		func() uint64 { return math.MaxUint64 },
+	)
+	require.NoError(t, err)
+
+	samples := dynparquet.NewTestSamples()
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
+
+	wg := &sync.WaitGroup{}
+	workers := 100
+	inserts := 100
+	wg.Add(workers)
+	for i := 0; i < workers; i++ {
+		go func() {
+			defer wg.Done()
+			for j := 0; j < inserts; j++ {
+				lsm.Add(rand.Uint64(), r)
+			}
+		}()
+	}
+	wg.Wait()
+
+	tx := make([]uint64, 0, workers*inserts)
+	lsm.Iterate(func(node *Node) bool {
+		if node.part != nil {
+			tx = append(tx, node.part.TX())
+		}
+		return true
+	})
+
+	// check that the transactions are sorted in descending order
+	require.True(t, slices.IsSortedFunc[[]uint64, uint64](tx, func(i, j uint64) int {
+		if i < j {
+			return 1
+		} else if i > j {
+			return -1
+		}
+
+		return 0
+	}))
+}
diff --git a/internal/records/record_builder.go b/internal/records/record_builder.go
new file mode 100644
index 000000000..fa610b846
--- /dev/null
+++ b/internal/records/record_builder.go
@@ -0,0 +1,898 @@
+package records
+
+import (
+	"reflect"
+	"regexp"
+	"slices"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/google/uuid"
+
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
+)
+
+const (
+	TagName = "frostdb"
+)
+
+type Record struct {
+	arrow.Record
+	SortingColumns []arrowutils.SortingColumn
+}
+
+// Build is a generic arrow.Record builder that ingests structs of type T. The
+// generated record can be passed to (*Table).InsertRecord.
+//
+// Struct tag `frostdb` is used to pass options for the schema for T and use
+// (*Build[T]).Schema to obtain schema v1alpha1.
+//
+// This api is opinionated.
+//
+//   - Nested Columns are not supported
+//
+// # Tags
+//
+// Use `frostdb` to define tags that customizes field values. You can express
+// everything needed to construct schema v1alpha1.
+//
+// Tags are defined as a comma separated list. The first item is the column
+// name. Column name is optional, when omitted it is derived from the field name
+// (snake_cased)
+//
+// Supported Tags
+//
+//	    delta_binary_packed | Delta binary packed encoding.
+//	                 brotli | Brotli compression.
+//	                    asc | Sorts in ascending order.Use asc(n) where n is an integer for sorting order.
+//	                   gzip | GZIP compression.
+//	                 snappy | Snappy compression.
+//	delta_length_byte_array | Delta Length Byte Array encoding.
+//	       delta_byte_array | Delta Byte Array encoding.
+//	                   desc | Sorts in descending order.Use desc(n) where n is an integer for sorting order
+//	                lz4_raw | LZ4_RAW compression.
+//	               pre_hash | Prehash the column before storing it.
+//	             null_first | When used wit asc nulls are smallest and with des nulls are largest.
+//	                   zstd | ZSTD compression.
+//	               rle_dict | Dictionary run-length encoding.
+//	                  plain | Plain encoding.
+//
+// Example tagged Sample struct
+//
+//	type Sample struct {
+//		ExampleType string      `frostdb:"example_type,rle_dict,asc(0)"`
+//		Labels      []Label     `frostdb:"labels,rle_dict,null,dyn,asc(1),null_first"`
+//		Stacktrace  []uuid.UUID `frostdb:"stacktrace,rle_dict,asc(3),null_first"`
+//		Timestamp   int64       `frostdb:"timestamp,asc(2)"`
+//		Value       int64       `frostdb:"value"`
+//	}
+//
+// # Dynamic columns
+//
+// Field of type map<string, T> is a dynamic column by default.
+//
+//	type Example struct {
+//		// Use supported tags to customize the column value
+//		Labels map[string]string `frostdb:"labels"`
+//	}
+//
+// # Repeated columns
+//
+// Fields of type []int64, []float64, []bool, and []string are supported. These
+// are represented as arrow.LIST.
+//
+// Generated schema for the repeated columns applies all supported tags. By
+// default repeated fields are nullable. You can safely pass nil slices for
+// repeated columns.
+type Build[T any] struct {
+	fields []*fieldRecord
+	buffer []arrow.Array
+	sort   []*fieldRecord
+}
+
+func NewBuild[T any](mem memory.Allocator) *Build[T] {
+	var a T
+	r := reflect.TypeOf(a)
+	for r.Kind() == reflect.Ptr {
+		r = r.Elem()
+	}
+	if r.Kind() != reflect.Struct {
+		panic("frostdb/dynschema: " + r.String() + " is not supported")
+	}
+	b := &Build[T]{}
+	for i := 0; i < r.NumField(); i++ {
+		f := r.Field(i)
+		var (
+			typ        arrow.DataType
+			dictionary bool
+			preHash    bool
+			nullable   bool
+			sortColumn bool
+			nullFirst  bool
+			sortOrder  int
+			direction  schemapb.SortingColumn_Direction
+
+			encoding    schemapb.StorageLayout_Encoding
+			compression schemapb.StorageLayout_Compression
+			styp        schemapb.StorageLayout_Type
+		)
+		name, tag := fieldName(f)
+		if tag != "" {
+			walkTag(tag, func(key, value string) {
+				switch key {
+				case "null_first":
+					nullFirst = true
+				case "asc", "desc":
+					sortColumn = true
+					sortOrder, _ = strconv.Atoi(value)
+					if key == "asc" {
+						direction = schemapb.SortingColumn_DIRECTION_ASCENDING
+					} else {
+						direction = schemapb.SortingColumn_DIRECTION_DESCENDING
+					}
+				case "pre_hash":
+					preHash = true
+				case "plain":
+					encoding = schemapb.StorageLayout_ENCODING_PLAIN_UNSPECIFIED
+				case "rle_dict":
+					encoding = schemapb.StorageLayout_ENCODING_RLE_DICTIONARY
+					dictionary = true
+				case "delta_binary_packed":
+					encoding = schemapb.StorageLayout_ENCODING_DELTA_BINARY_PACKED
+				case "delta_byte_array":
+					encoding = schemapb.StorageLayout_ENCODING_DELTA_BINARY_PACKED
+				case "delta_length_byte_array":
+					encoding = schemapb.StorageLayout_ENCODING_DELTA_LENGTH_BYTE_ARRAY
+				case "snappy":
+					compression = schemapb.StorageLayout_COMPRESSION_SNAPPY
+				case "gzip":
+					compression = schemapb.StorageLayout_COMPRESSION_GZIP
+				case "brotli":
+					compression = schemapb.StorageLayout_COMPRESSION_BROTLI
+				case "lz4_raw":
+					compression = schemapb.StorageLayout_COMPRESSION_LZ4_RAW
+				case "zstd":
+					compression = schemapb.StorageLayout_COMPRESSION_ZSTD
+				}
+			})
+		}
+		fr := &fieldRecord{
+			name:        name,
+			preHash:     preHash,
+			sort:        sortColumn,
+			sortOrder:   sortOrder,
+			nullFirst:   nullFirst,
+			direction:   direction,
+			compression: compression,
+			encoding:    encoding,
+		}
+		fty := f.Type
+		for fty.Kind() == reflect.Ptr {
+			nullable = true
+			fty = fty.Elem()
+		}
+		switch fty.Kind() {
+		case reflect.Map:
+			typ, styp = baseType(fty.Elem(), dictionary)
+			fr.typ = styp
+			fr.dynamic = true
+			fr.nullable = true
+			fr.build = newMapFieldBuilder(newFieldFunc(typ, mem, name,
+				// Pointer base types needs to be property handled even for dynamic columns
+				// so map[string]string and map[string]*string should all work the same.
+				fty.Elem().Kind() == reflect.Ptr),
+				newRowsBeforeFunc(i, b.numRowsBefore),
+			)
+		case reflect.Slice:
+			switch {
+			case isUUIDSlice(fty):
+				fr.typ = schemapb.StorageLayout_TYPE_STRING
+				fr.build = newUUIDSliceField(mem, name)
+			default:
+				typ, styp = baseType(fty.Elem(), dictionary)
+				fr.typ = styp
+				fr.repeated = true
+				// Repeated columns are always nullable
+				fr.nullable = true
+				typ = arrow.ListOf(typ)
+				fr.build = newFieldBuild(typ, mem, name, true)
+			}
+		case reflect.Int64, reflect.Float64, reflect.Bool, reflect.String, reflect.Uint64:
+			typ, styp = baseType(fty, dictionary)
+			fr.typ = styp
+			fr.nullable = nullable
+			fr.build = newFieldBuild(typ, mem, name, nullable)
+		default:
+			panic("frostdb/dynschema: " + fty.String() + " is npt supported")
+		}
+		b.fields = append(b.fields, fr)
+	}
+	return b
+}
+
+// For dynamic columns we need to know the state of row counts to adjust nulls to
+// match the record row count.
+//
+// This handles the case where a series of T without any dynamic columns is
+// followed by dynamic columns.
+func (b *Build[T]) numRowsBefore(fieldIdx int) int {
+	for i := 0; i <= len(b.fields) && i != fieldIdx; i++ {
+		before := i < fieldIdx
+		f := b.fields[i]
+		if f.dynamic {
+			// If we have dynamic columns before/after fieldIdx. We can stop looking if
+			// the columns were appended to.
+			if size := f.build.Len(); size != 0 {
+				if before {
+					// The field has already been processed. Adjust the size because we care
+					// about rows count before current T appending
+					size--
+				}
+				return size
+			}
+			continue
+		}
+		size := b.fields[i].build.Len()
+		if before {
+			// The field has already been processed. Adjust the size because we care
+			// about rows count before current T appending
+			size--
+		}
+		return size
+	}
+	return 0
+}
+
+func (b *Build[T]) Append(values ...T) error {
+	for _, value := range values {
+		v := reflect.ValueOf(value)
+		for v.Kind() == reflect.Ptr {
+			v = v.Elem()
+		}
+		for i := 0; i < v.NumField(); i++ {
+			err := b.fields[i].build.Append(v.Field(i))
+			if err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (b *Build[T]) NewRecord() *Record {
+	fields := make([]arrow.Field, 0, len(b.fields))
+	for _, f := range b.fields {
+		fs := f.build.Fields()
+		if f.sort {
+			if f.dynamic {
+				for j := 0; j < len(fs); j++ {
+					b.sort = append(b.sort, f)
+				}
+			} else {
+				b.sort = append(b.sort, f)
+			}
+		}
+		fields = append(fields, fs...)
+		b.buffer = f.build.NewArray(b.buffer)
+	}
+	defer func() {
+		for i := range b.buffer {
+			b.buffer[i].Release()
+		}
+		b.buffer = b.buffer[:0]
+		b.sort = b.sort[:0]
+	}()
+	sort.Slice(b.sort, func(i, j int) bool {
+		return b.sort[i].sortOrder < b.sort[j].sortOrder
+	})
+	sortingCols := make([]arrowutils.SortingColumn, 0, len(b.sort))
+	for idx, f := range b.sort {
+		direction := arrowutils.Ascending
+		if f.direction == schemapb.SortingColumn_DIRECTION_DESCENDING {
+			direction = arrowutils.Descending
+		}
+		sortingCols = append(sortingCols, arrowutils.SortingColumn{
+			Index:      idx,
+			Direction:  direction,
+			NullsFirst: f.nullFirst,
+		})
+	}
+	return &Record{
+		Record: array.NewRecord(
+			arrow.NewSchema(fields, nil),
+			b.buffer,
+			int64(b.buffer[0].Len()),
+		),
+		SortingColumns: sortingCols,
+	}
+}
+
+func (b Build[T]) Schema(name string) (s *schemapb.Schema) {
+	s = &schemapb.Schema{Name: name, Columns: make([]*schemapb.Column, 0, len(b.fields))}
+	var toSort []*fieldRecord
+	for _, f := range b.fields {
+		s.Columns = append(s.Columns, &schemapb.Column{
+			Name:    f.name,
+			Dynamic: f.dynamic,
+			Prehash: f.preHash,
+			StorageLayout: &schemapb.StorageLayout{
+				Type:        f.typ,
+				Encoding:    f.encoding,
+				Compression: f.compression,
+				Nullable:    f.nullable,
+				Repeated:    f.repeated,
+			},
+		})
+		if f.sort {
+			toSort = append(toSort, f)
+		}
+	}
+	sort.Slice(toSort, func(i, j int) bool {
+		return toSort[i].sortOrder < toSort[j].sortOrder
+	})
+	for _, f := range toSort {
+		s.SortingColumns = append(s.SortingColumns, &schemapb.SortingColumn{
+			Name:       f.name,
+			Direction:  f.direction,
+			NullsFirst: f.nullFirst,
+		})
+	}
+	return
+}
+
+func (b *Build[T]) Release() {
+	for _, f := range b.fields {
+		f.build.Release()
+	}
+	b.buffer = b.buffer[:0]
+}
+
+type fieldBuilder interface {
+	Fields() []arrow.Field
+	Len() int
+	AppendNull()
+	Append(reflect.Value) error
+	NewArray([]arrow.Array) []arrow.Array
+	Release()
+}
+
+type mapFieldBuilder struct {
+	newField   func(string) fieldBuilder
+	rowsBefore func() int
+	columns    map[string]fieldBuilder
+	seen       map[string]struct{}
+	keys       []string
+}
+
+func newFieldFunc(dt arrow.DataType, mem memory.Allocator, name string, nullable bool) func(string) fieldBuilder {
+	return func(s string) fieldBuilder {
+		return newFieldBuild(dt, mem, name+"."+s, nullable)
+	}
+}
+
+func newRowsBeforeFunc(i int, f func(int) int) func() int {
+	return func() int {
+		return f(i)
+	}
+}
+
+func newMapFieldBuilder(newField func(string) fieldBuilder, rowsBefore func() int) *mapFieldBuilder {
+	return &mapFieldBuilder{
+		newField:   newField,
+		rowsBefore: rowsBefore,
+		columns:    make(map[string]fieldBuilder),
+		seen:       make(map[string]struct{}),
+	}
+}
+
+var _ fieldBuilder = (*mapFieldBuilder)(nil)
+
+func (m *mapFieldBuilder) Fields() (o []arrow.Field) {
+	if len(m.columns) == 0 {
+		return []arrow.Field{}
+	}
+	o = make([]arrow.Field, 0, len(m.columns))
+	m.keys = slices.Grow(m.keys, len(m.columns))
+	for k := range m.columns {
+		m.keys = append(m.keys, k)
+	}
+	sort.Strings(m.keys)
+	for _, key := range m.keys {
+		o = append(o, m.columns[key].Fields()...)
+	}
+	return
+}
+
+func (m *mapFieldBuilder) NewArray(a []arrow.Array) []arrow.Array {
+	if len(m.columns) == 0 {
+		return a
+	}
+	m.keys = m.keys[:0]
+	for k := range m.columns {
+		m.keys = append(m.keys, k)
+	}
+	sort.Strings(m.keys)
+	for _, key := range m.keys {
+		a = m.columns[key].NewArray(a)
+	}
+	for _, v := range m.columns {
+		v.Release()
+	}
+	clear(m.columns)
+	m.keys = m.keys[:0]
+	return a
+}
+
+func (m *mapFieldBuilder) AppendNull() {}
+
+func (m *mapFieldBuilder) Release() {
+	for _, v := range m.columns {
+		v.Release()
+	}
+	clear(m.columns)
+	m.keys = m.keys[:0]
+}
+
+func (m *mapFieldBuilder) Append(v reflect.Value) error {
+	if v.IsNil() || v.Len() == 0 {
+		for _, v := range m.columns {
+			v.AppendNull()
+		}
+		return nil
+	}
+	clear(m.seen)
+	keys := v.MapKeys()
+	size := m.Len()
+	if size == 0 {
+		// Maybe we never supplied dynamic columns before but other columns were
+		// appended.
+		size = m.rowsBefore()
+	}
+	for _, key := range keys {
+		name := key.Interface().(string)
+		m.seen[name] = struct{}{}
+		err := m.get(name, size).Append(v.MapIndex(key))
+		if err != nil {
+			return err
+		}
+	}
+	for k, v := range m.columns {
+		_, ok := m.seen[k]
+		if !ok {
+			// All record columns must have the same length. Set columns not present in v
+			// to null
+			v.AppendNull()
+		}
+	}
+	return nil
+}
+
+func (m *mapFieldBuilder) Len() int {
+	for _, v := range m.columns {
+		return v.Len()
+	}
+	return 0
+}
+
+func (m *mapFieldBuilder) get(name string, size int) fieldBuilder {
+	f, ok := m.columns[name]
+	if ok {
+		return f
+	}
+	f = m.newField(name)
+	for i := 0; i < size; i++ {
+		f.AppendNull()
+	}
+
+	m.columns[name] = f
+	return f
+}
+
+func baseType(fty reflect.Type, dictionary bool) (typ arrow.DataType, sty schemapb.StorageLayout_Type) {
+	for fty.Kind() == reflect.Ptr {
+		fty = fty.Elem()
+	}
+	switch fty.Kind() {
+	case reflect.Int64:
+		typ = arrow.PrimitiveTypes.Int64
+		sty = schemapb.StorageLayout_TYPE_INT64
+	case reflect.Float64:
+		typ = arrow.PrimitiveTypes.Float64
+		sty = schemapb.StorageLayout_TYPE_DOUBLE
+	case reflect.Bool:
+		typ = arrow.FixedWidthTypes.Boolean
+		sty = schemapb.StorageLayout_TYPE_BOOL
+	case reflect.String:
+		typ = arrow.BinaryTypes.String
+		sty = schemapb.StorageLayout_TYPE_STRING
+	case reflect.Uint64:
+		typ = arrow.PrimitiveTypes.Uint64
+		sty = schemapb.StorageLayout_TYPE_UINT64
+	default:
+		panic("frostdb/dynschema: " + fty.String() + " is npt supported")
+	}
+	if dictionary {
+		typ = &arrow.DictionaryType{
+			IndexType: &arrow.Uint32Type{},
+			ValueType: typ,
+		}
+	}
+	return
+}
+
+func fieldName(f reflect.StructField) (name, tag string) {
+	name, tag, _ = strings.Cut(f.Tag.Get(TagName), ",")
+	if name == "" {
+		name = ToSnakeCase(f.Name)
+	}
+	return
+}
+
+func newFieldBuild(dt arrow.DataType, mem memory.Allocator, name string, nullable bool) (f *fieldBuilderFunc) {
+	b := array.NewBuilder(mem, dt)
+	f = &fieldBuilderFunc{
+		col: arrow.Field{
+			Name:     name,
+			Type:     dt,
+			Nullable: nullable,
+		},
+		releaseFunc: b.Release,
+		nilFunc:     b.AppendNull,
+		len:         b.Len,
+		newArraysFunc: func(a []arrow.Array) []arrow.Array {
+			return append(a, b.NewArray())
+		},
+	}
+	switch e := b.(type) {
+	case *array.Int64Builder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			e.Append(v.Int())
+			return nil
+		}
+	case *array.Int64DictionaryBuilder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			return e.Append(v.Int())
+		}
+	case *array.Uint64Builder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			e.Append(v.Uint())
+			return nil
+		}
+	case *array.Uint64DictionaryBuilder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			return e.Append(v.Uint())
+		}
+	case *array.Float64Builder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			e.Append(v.Float())
+			return nil
+		}
+	case *array.Float64DictionaryBuilder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			return e.Append(v.Float())
+		}
+	case *array.BooleanBuilder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			e.Append(v.Bool())
+			return nil
+		}
+	case *array.StringBuilder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			e.Append(v.Interface().(string))
+			return nil
+		}
+	case *array.BinaryDictionaryBuilder:
+		f.buildFunc = func(v reflect.Value) error {
+			if nullable {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				v = v.Elem()
+			}
+			return e.AppendString(v.Interface().(string))
+		}
+	case *array.ListBuilder:
+		switch build := e.ValueBuilder().(type) {
+		case *array.Int64Builder:
+			f.buildFunc = func(v reflect.Value) error {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				e.Append(true)
+				build.Reserve(v.Len())
+				return applyInt(v, func(i int64) error {
+					build.Append(i)
+					return nil
+				})
+			}
+		case *array.Int64DictionaryBuilder:
+			f.buildFunc = func(v reflect.Value) error {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				e.Append(true)
+				build.Reserve(v.Len())
+				return applyInt(v, build.Append)
+			}
+		case *array.Uint64Builder:
+			f.buildFunc = func(v reflect.Value) error {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				e.Append(true)
+				build.Reserve(v.Len())
+				return applyUInt(v, func(i uint64) error {
+					build.Append(i)
+					return nil
+				})
+			}
+		case *array.Float64Builder:
+			f.buildFunc = func(v reflect.Value) error {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				e.Append(true)
+				build.Reserve(v.Len())
+				return applyFloat64(v, func(i float64) error {
+					build.Append(i)
+					return nil
+				})
+			}
+		case *array.Float64DictionaryBuilder:
+			f.buildFunc = func(v reflect.Value) error {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				e.Append(true)
+				build.Reserve(v.Len())
+				return applyFloat64(v, build.Append)
+			}
+
+		case *array.StringBuilder:
+			f.buildFunc = func(v reflect.Value) error {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				e.Append(true)
+				build.Reserve(v.Len())
+				return applyString(v, func(i string) error {
+					build.Append(i)
+					return nil
+				})
+			}
+		case *array.BinaryDictionaryBuilder:
+			f.buildFunc = func(v reflect.Value) error {
+				if v.Len() == 0 {
+					e.AppendNull()
+					return nil
+				}
+				e.Append(true)
+				build.Reserve(v.Len())
+				return applyString(v, build.AppendString)
+			}
+		case *array.BooleanBuilder:
+			f.buildFunc = func(v reflect.Value) error {
+				if v.IsNil() {
+					e.AppendNull()
+					return nil
+				}
+				e.Append(true)
+				build.Reserve(v.Len())
+				return applyBool(v, func(i bool) error {
+					build.Append(i)
+					return nil
+				})
+			}
+		}
+	default:
+		panic("frostdb:dynschema: unsupported array builder " + b.Type().String())
+	}
+	return
+}
+
+func applyString(v reflect.Value, apply func(string) error) error {
+	return listApply[string](v, func(v reflect.Value) string {
+		return v.Interface().(string)
+	}, apply)
+}
+
+func applyFloat64(v reflect.Value, apply func(float64) error) error {
+	return listApply[float64](v, func(v reflect.Value) float64 {
+		return v.Float()
+	}, apply)
+}
+
+func applyBool(v reflect.Value, apply func(bool) error) error {
+	return listApply[bool](v, func(v reflect.Value) bool {
+		return v.Bool()
+	}, apply)
+}
+
+func applyInt(v reflect.Value, apply func(int64) error) error {
+	return listApply[int64](v, func(v reflect.Value) int64 {
+		return v.Int()
+	}, apply)
+}
+
+func applyUInt(v reflect.Value, apply func(uint64) error) error {
+	return listApply[uint64](v, func(v reflect.Value) uint64 {
+		return v.Uint()
+	}, apply)
+}
+
+func listApply[T any](v reflect.Value, fn func(reflect.Value) T, apply func(T) error) error {
+	for i := 0; i < v.Len(); i++ {
+		err := apply(fn(v.Index(i)))
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func newUUIDSliceField(mem memory.Allocator, name string) (f *fieldBuilderFunc) {
+	dt := &arrow.DictionaryType{
+		IndexType: &arrow.Int32Type{},
+		ValueType: &arrow.BinaryType{},
+	}
+	b := array.NewBuilder(mem, dt)
+	f = &fieldBuilderFunc{
+		col: arrow.Field{
+			Name: name,
+			Type: dt,
+		},
+		releaseFunc: b.Release,
+		nilFunc:     b.AppendNull,
+		len:         b.Len,
+		newArraysFunc: func(a []arrow.Array) []arrow.Array {
+			return append(a, b.NewArray())
+		},
+	}
+	bd := b.(*array.BinaryDictionaryBuilder)
+	f.buildFunc = func(v reflect.Value) error {
+		return bd.Append(ExtractLocationIDs(v.Interface().([]uuid.UUID)))
+	}
+	return
+}
+
+func ExtractLocationIDs(locs []uuid.UUID) []byte {
+	b := make([]byte, len(locs)*16) // UUID are 16 bytes thus multiply by 16
+	index := 0
+	for i := len(locs) - 1; i >= 0; i-- {
+		copy(b[index:index+16], locs[i][:])
+		index += 16
+	}
+	return b
+}
+
+type fieldBuilderFunc struct {
+	len           func() int
+	col           arrow.Field
+	nilFunc       func()
+	buildFunc     func(reflect.Value) error
+	newArraysFunc func([]arrow.Array) []arrow.Array
+	releaseFunc   func()
+}
+
+var _ fieldBuilder = (*fieldBuilderFunc)(nil)
+
+func (f *fieldBuilderFunc) Fields() []arrow.Field                  { return []arrow.Field{f.col} }
+func (f *fieldBuilderFunc) Len() int                               { return f.len() }
+func (f *fieldBuilderFunc) AppendNull()                            { f.nilFunc() }
+func (f *fieldBuilderFunc) Append(v reflect.Value) error           { return f.buildFunc(v) }
+func (f *fieldBuilderFunc) NewArray(a []arrow.Array) []arrow.Array { return f.newArraysFunc(a) }
+func (f *fieldBuilderFunc) Release()                               { f.releaseFunc() }
+
+type fieldRecord struct {
+	name        string
+	dynamic     bool
+	preHash     bool
+	nullable    bool
+	repeated    bool
+	sort        bool
+	nullFirst   bool
+	sortOrder   int
+	direction   schemapb.SortingColumn_Direction
+	encoding    schemapb.StorageLayout_Encoding
+	compression schemapb.StorageLayout_Compression
+	typ         schemapb.StorageLayout_Type
+	build       fieldBuilder
+}
+
+func walkTag(tag string, f func(key, value string)) {
+	if tag == "" {
+		return
+	}
+	value, tag, _ := strings.Cut(tag, ",")
+	if value != "" {
+		k, v, _ := strings.Cut(value, "(")
+		v, _, _ = strings.Cut(v, ")")
+		f(k, v)
+	}
+	walkTag(tag, f)
+}
+
+var uuidSliceType = reflect.TypeOf([]uuid.UUID{})
+
+func isUUIDSlice(typ reflect.Type) bool {
+	return typ.AssignableTo(uuidSliceType)
+}
+
+var (
+	matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
+	matchAllCap   = regexp.MustCompile("([a-z0-9])([A-Z])")
+)
+
+func ToSnakeCase(str string) string {
+	snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
+	snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
+	return strings.ToLower(snake)
+}
diff --git a/internal/records/record_builder_test.go b/internal/records/record_builder_test.go
new file mode 100644
index 000000000..db53af285
--- /dev/null
+++ b/internal/records/record_builder_test.go
@@ -0,0 +1,202 @@
+package records_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/protobuf/encoding/protojson"
+	"google.golang.org/protobuf/proto"
+
+	"github.com/polarsignals/frostdb/internal/records"
+	"github.com/polarsignals/frostdb/samples"
+)
+
+func TestBuild(t *testing.T) {
+	t.Run("NewBuild", func(_ *testing.T) {
+		b := records.NewBuild[samples.Sample](memory.DefaultAllocator)
+		defer b.Release()
+
+		ptr := records.NewBuild[*samples.Sample](memory.DefaultAllocator)
+		defer ptr.Release()
+	})
+
+	t.Run("Schema", func(t *testing.T) {
+		b := records.NewBuild[samples.Sample](memory.DefaultAllocator)
+		defer b.Release()
+		got := b.Schema("test")
+		want := samples.SampleDefinition()
+		require.True(t, proto.Equal(want, got))
+	})
+
+	t.Run("NewRecord", func(t *testing.T) {
+		b := records.NewBuild[samples.Sample](memory.DefaultAllocator)
+		defer b.Release()
+		samples := samples.NewTestSamples()
+		err := b.Append(samples...)
+		require.Nil(t, err)
+		r := b.NewRecord()
+		require.Equal(t, int64(len(samples)), r.NumRows())
+		want := `[{"example_type":"cpu","labels.container":null,"labels.namespace":null,"labels.node":"test3","labels.pod":null,"stacktrace":"AAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAE=","timestamp":2,"value":5}
+,{"example_type":"cpu","labels.container":null,"labels.namespace":"default","labels.node":null,"labels.pod":"test1","stacktrace":"AAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAE=","timestamp":2,"value":3}
+,{"example_type":"cpu","labels.container":"test2","labels.namespace":"default","labels.node":null,"labels.pod":null,"stacktrace":"AAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAE=","timestamp":2,"value":3}
+]`
+		got, err := r.MarshalJSON()
+		require.Nil(t, err)
+		require.JSONEq(t, want, string(got))
+	})
+
+	t.Run("Repeated", func(t *testing.T) {
+		type Repeated struct {
+			Int        []int64
+			Float      []float64
+			Bool       []bool
+			String     []string
+			StringDict []string `frostdb:",rle_dict"`
+			Uint64     []uint64
+		}
+		b := records.NewBuild[Repeated](memory.DefaultAllocator)
+		defer b.Release()
+
+		wantSchema := `{
+  "name": "repeated",
+  "columns": [
+    {
+      "name": "int",
+      "storageLayout": {
+        "type": "TYPE_INT64",
+        "nullable": true,
+        "repeated": true
+      }
+    },
+    {
+      "name": "float",
+      "storageLayout": {
+        "type": "TYPE_DOUBLE",
+        "nullable": true,
+        "repeated": true
+      }
+    },
+    {
+      "name": "bool",
+      "storageLayout": {
+        "type": "TYPE_BOOL",
+        "nullable": true,
+        "repeated": true
+      }
+    },
+    {
+      "name": "string",
+      "storageLayout": {
+        "type": "TYPE_STRING",
+        "nullable": true,
+        "repeated": true
+      }
+    },
+    {
+      "name": "string_dict",
+      "storageLayout": {
+        "type": "TYPE_STRING",
+        "encoding": "ENCODING_RLE_DICTIONARY",
+        "nullable": true,
+        "repeated": true
+      }
+    },
+ 	{
+      "name": "uint64",
+      "storageLayout": {
+        "type": "TYPE_UINT64",
+        "nullable": true,
+        "repeated": true
+      }
+    }
+  ]
+}`
+		m := protojson.MarshalOptions{Multiline: true}
+		d, _ := m.Marshal(b.Schema("repeated"))
+		require.JSONEq(t, wantSchema, string(d))
+
+		err := b.Append(
+			Repeated{}, // nulls
+			Repeated{
+				Int:        []int64{1, 2},
+				Float:      []float64{1, 2},
+				Bool:       []bool{true, true},
+				String:     []string{"a", "b"},
+				StringDict: []string{"a", "b"},
+				Uint64:     []uint64{1, 2},
+			},
+			Repeated{
+				Int:        []int64{1, 2},
+				Float:      []float64{1, 2},
+				Bool:       []bool{true, true},
+				String:     []string{"a", "b"},
+				StringDict: []string{"c", "d"},
+				Uint64:     []uint64{1, 2},
+			},
+		)
+		require.Nil(t, err)
+		want := `[{"bool":null,"float":null,"int":null,"string":null,"string_dict":null, "uint64":null}
+,{"bool":[true,true],"float":[1,2],"int":[1,2],"string":["a","b"],"string_dict":["a","b"],"uint64":[1, 2]}
+,{"bool":[true,true],"float":[1,2],"int":[1,2],"string":["a","b"],"string_dict":["c","d"],"uint64":[1, 2]}
+]`
+		r := b.NewRecord()
+		data, _ := r.MarshalJSON()
+		require.JSONEq(t, want, string(data))
+	})
+}
+
+func TestBuild_pointer_base_types(t *testing.T) {
+	type PointerBase struct {
+		Int     *int64
+		Double  *float64
+		String  *string
+		Dynamic map[string]*string
+	}
+
+	b := records.NewBuild[PointerBase](memory.NewGoAllocator())
+	defer b.Release()
+
+	err := b.Append(
+		PointerBase{},
+		PointerBase{
+			Int:    point[int64](1),
+			Double: point[float64](1),
+			String: point[string]("1"),
+			Dynamic: map[string]*string{
+				"one": point[string]("1"),
+			},
+		},
+	)
+	require.Nil(t, err)
+	r := b.NewRecord()
+	defer r.Release()
+
+	want := `[{"double":null,"dynamic.one":null,"int":null,"string":null}
+,{"double":1,"dynamic.one":"1","int":1,"string":"1"}
+]`
+
+	got, err := r.MarshalJSON()
+	require.Nil(t, err)
+	require.JSONEq(t, want, string(got))
+}
+
+func point[T any](t T) *T {
+	return &t
+}
+
+func BenchmarkBuild_Append_Then_NewRecord(b *testing.B) {
+	// The way the record builder is used consist of calling Append followed by
+	// NewRecord
+	//
+	// They are separate methods because we can't ignore benefits of buffering.
+	build := records.NewBuild[samples.Sample](memory.DefaultAllocator)
+	defer build.Release()
+	samples := samples.NewTestSamples()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_ = build.Append(samples...)
+		r := build.NewRecord()
+		r.Release()
+	}
+}
diff --git a/internal/records/record_reader.go b/internal/records/record_reader.go
new file mode 100644
index 000000000..de4cc47fa
--- /dev/null
+++ b/internal/records/record_reader.go
@@ -0,0 +1,188 @@
+package records
+
+import (
+	"reflect"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+)
+
+type Reader[T any] struct {
+	records []arrow.Record
+}
+
+func NewReader[T any](records ...arrow.Record) *Reader[T] {
+	var a T
+	r := reflect.TypeOf(a)
+	for r.Kind() == reflect.Ptr {
+		r = r.Elem()
+	}
+	if r.Kind() != reflect.Struct {
+		panic("frostdb/dynschema: " + r.String() + " is not supported")
+	}
+
+	return &Reader[T]{records: records}
+}
+
+func (r *Reader[T]) NumRows() int64 {
+	var rows int64
+	for _, record := range r.records {
+		rows += record.NumRows()
+	}
+	return rows
+}
+
+func (r *Reader[T]) Value(i int) T {
+	row := *new(T)
+	rowType := reflect.TypeOf(row)
+
+	// find the record with the value
+	var record arrow.Record
+	var previousRows int64
+	for _, rec := range r.records {
+		if i < int(previousRows+rec.NumRows()) {
+			record = rec
+			i = i - int(previousRows)
+			break
+		}
+		previousRows += rec.NumRows()
+	}
+
+	for j := 0; j < rowType.NumField(); j++ {
+		f := rowType.Field(j)
+		name, _ := fieldName(f)
+
+		indices := record.Schema().FieldIndices(name)
+		if len(indices) != 1 {
+			panic("field " + name + " not found or ambiguous")
+		}
+
+		switch f.Type.Kind() {
+		case reflect.Bool:
+			arr, ok := record.Column(indices[0]).(*array.Boolean)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Float32:
+			arr, ok := record.Column(indices[0]).(*array.Float32)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Float64:
+			arr, ok := record.Column(indices[0]).(*array.Float64)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Int8:
+			arr, ok := record.Column(indices[0]).(*array.Int8)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Int16:
+			arr, ok := record.Column(indices[0]).(*array.Int16)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Int32:
+			arr, ok := record.Column(indices[0]).(*array.Int32)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Int64:
+			arr, ok := record.Column(indices[0]).(*array.Int64)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Uint8:
+			arr, ok := record.Column(indices[0]).(*array.Uint8)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Uint16:
+			arr, ok := record.Column(indices[0]).(*array.Uint16)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Uint32:
+			arr, ok := record.Column(indices[0]).(*array.Uint32)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.Uint64:
+			arr, ok := record.Column(indices[0]).(*array.Uint64)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		case reflect.String:
+			// We probably need to support array.Binary too
+			arr, ok := record.Column(indices[0]).(*array.String)
+			if !ok || arr.IsNull(i) {
+				continue
+			}
+			reflect.ValueOf(&row).Elem().Field(j).Set(
+				reflect.ValueOf(
+					arr.Value(i),
+				),
+			)
+		default:
+			panic("unsupported type " + f.Type.String())
+		}
+	}
+
+	return row
+}
diff --git a/logictest/logic_test.go b/logictest/logic_test.go
index 2417af57a..85c0db6a9 100644
--- a/logictest/logic_test.go
+++ b/logictest/logic_test.go
@@ -4,7 +4,7 @@ import (
 	"context"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/cockroachdb/datadriven"
 	"github.com/stretchr/testify/require"
 
@@ -20,16 +20,17 @@ const testdataDirectory = "testdata"
 
 type frostDB struct {
 	*frostdb.DB
+	allocator *memory.CheckedAllocator
 }
 
-func (db frostDB) CreateTable(name string, schema *dynparquet.Schema) (Table, error) {
-	return db.DB.Table(name, frostdb.NewTableConfig(schema))
+func (db frostDB) CreateTable(name string, schema *schemapb.Schema) (Table, error) {
+	return db.Table(name, frostdb.NewTableConfig(schema))
 }
 
 func (db frostDB) ScanTable(name string) query.Builder {
 	queryEngine := query.NewEngine(
-		memory.NewGoAllocator(),
-		db.DB.TableProvider(),
+		db.allocator,
+		db.TableProvider(),
 		query.WithPhysicalplanOptions(
 			physicalplan.WithOrderedAggregations(),
 		),
@@ -37,9 +38,9 @@ func (db frostDB) ScanTable(name string) query.Builder {
 	return queryEngine.ScanTable(name)
 }
 
-var schemas = map[string]*dynparquet.Schema{
-	"default": dynparquet.NewSampleSchema(),
-	"simple_bool": SchemaMust(&schemapb.Schema{
+var schemas = map[string]*schemapb.Schema{
+	"default": dynparquet.SampleDefinitionWithFloat(),
+	"simple_bool": {
 		Name: "simple_bool",
 		Columns: []*schemapb.Column{{
 			Name: "name",
@@ -57,7 +58,92 @@ var schemas = map[string]*dynparquet.Schema{
 			Name:      "found",
 			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
 		}},
-	}),
+	},
+	"prehashed": {
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "labels",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Nullable: true,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+			Dynamic: true,
+			Prehash: true,
+		}, {
+			Name: "stacktrace",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+			Dynamic: false,
+			Prehash: true,
+		}, {
+			Name: "timestamp",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+			Dynamic: false,
+		}, {
+			Name: "value",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+			Dynamic: false,
+		}},
+		SortingColumns: []*schemapb.SortingColumn{{
+			Name:      "example_type",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}, {
+			Name:       "labels",
+			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
+			NullsFirst: true,
+		}, {
+			Name:      "timestamp",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}, {
+			Name:       "stacktrace",
+			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
+			NullsFirst: true,
+		}},
+	},
+	"bytes": {
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "labels",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+				Nullable: true,
+			},
+			Dynamic: true,
+		}, {
+			Name: "timestamp",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_UINT64,
+			},
+			Dynamic: false,
+		}, {
+			Name: "value",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:        schemapb.StorageLayout_TYPE_STRING,
+				Encoding:    schemapb.StorageLayout_ENCODING_DELTA_LENGTH_BYTE_ARRAY,
+				Compression: schemapb.StorageLayout_COMPRESSION_LZ4_RAW,
+			},
+			Dynamic: false,
+		}},
+		SortingColumns: []*schemapb.SortingColumn{{
+			Name:       "labels",
+			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
+			NullsFirst: true,
+		}, {
+			Name:      "timestamp",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}, {
+			Name:      "value",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}},
+	},
 }
 
 // TestLogic runs all the datadriven tests in the testdata directory. Refer to
@@ -78,18 +164,16 @@ func TestLogic(t *testing.T) {
 		defer columnStore.Close()
 		db, err := columnStore.DB(ctx, "test")
 		require.NoError(t, err)
-		r := NewRunner(frostDB{DB: db}, schemas)
-		datadriven.RunTest(t, path, func(t *testing.T, c *datadriven.TestData) string {
+		fdb := frostDB{
+			DB:        db,
+			allocator: memory.NewCheckedAllocator(memory.DefaultAllocator),
+		}
+		r := NewRunner(fdb, schemas)
+		datadriven.RunTest(t, path, func(_ *testing.T, c *datadriven.TestData) string {
 			return r.RunCmd(ctx, c)
 		})
+		if path != "testdata/exec/aggregate/ordered_aggregate" { // NOTE: skip checking the limit for the ordered aggregator as it still leaks memory.
+			fdb.allocator.AssertSize(t, 0)
+		}
 	})
 }
-
-func SchemaMust(def *schemapb.Schema) *dynparquet.Schema {
-	schema, err := dynparquet.SchemaFromDefinition(def)
-	if err != nil {
-		panic(err.Error())
-	}
-
-	return schema
-}
diff --git a/logictest/runner.go b/logictest/runner.go
index f49b6b1e8..491a3d6a8 100644
--- a/logictest/runner.go
+++ b/logictest/runner.go
@@ -9,14 +9,18 @@ import (
 	"strings"
 	"text/tabwriter"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/cockroachdb/datadriven"
 	"github.com/google/uuid"
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 
 	"github.com/polarsignals/frostdb/dynparquet"
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	"github.com/polarsignals/frostdb/pqarrow"
 	"github.com/polarsignals/frostdb/query"
+	"github.com/polarsignals/frostdb/query/logicalplan"
 	"github.com/polarsignals/frostdb/sqlparse"
 )
 
@@ -52,26 +56,27 @@ const (
 )
 
 type DB interface {
-	CreateTable(name string, schema *dynparquet.Schema) (Table, error)
+	CreateTable(name string, schema *schemapb.Schema) (Table, error)
 	// ScanTable returns a query.Builder prepared to scan the given table.
 	ScanTable(name string) query.Builder
 }
 
 type Table interface {
 	Schema() *dynparquet.Schema
-	InsertBuffer(context.Context, *dynparquet.Buffer) (uint64, error)
+	InsertRecord(context.Context, arrow.Record) (uint64, error)
 }
 
 type Runner struct {
 	db                        DB
-	schemas                   map[string]*dynparquet.Schema
+	schemas                   map[string]*schemapb.Schema
 	activeTable               Table
 	activeTableName           string
+	activeTableParquetSchema  *parquet.Schema
 	activeTableDynamicColumns []string
 	sqlParser                 *sqlparse.Parser
 }
 
-func NewRunner(db DB, schemas map[string]*dynparquet.Schema) *Runner {
+func NewRunner(db DB, schemas map[string]*schemapb.Schema) *Runner {
 	return &Runner{
 		db:        db,
 		schemas:   schemas,
@@ -101,8 +106,8 @@ func (r *Runner) handleCmd(ctx context.Context, c *datadriven.TestData) (string,
 	return "", fmt.Errorf("unknown command %s", c.Cmd)
 }
 
-func (r *Runner) handleCreateTable(ctx context.Context, c *datadriven.TestData) (string, error) {
-	var schema *dynparquet.Schema
+func (r *Runner) handleCreateTable(_ context.Context, c *datadriven.TestData) (string, error) {
+	var schema *schemapb.Schema
 	for _, arg := range c.CmdArgs {
 		if arg.Key == "schema" {
 			if len(arg.Vals) != 1 {
@@ -123,7 +128,8 @@ func (r *Runner) handleCreateTable(ctx context.Context, c *datadriven.TestData)
 	}
 	r.activeTable = table
 	r.activeTableName = name
-	for _, c := range schema.Columns() {
+	r.activeTableParquetSchema = table.Schema().ParquetSchema()
+	for _, c := range table.Schema().Columns() {
 		if c.Dynamic {
 			r.activeTableDynamicColumns = append(r.activeTableDynamicColumns, c.Name)
 		}
@@ -217,7 +223,15 @@ func (r *Runner) handleInsert(ctx context.Context, c *datadriven.TestData) (stri
 				if err != nil {
 					return "", fmt.Errorf("insert: %w", err)
 				}
-				rows[i] = append(rows[i], parquet.ValueOf(v).Level(0, 0, colIdx))
+				if col.StorageLayout.Optional() {
+					if parquet.ValueOf(v).IsNull() {
+						rows[i] = append(rows[i], parquet.ValueOf(v).Level(0, 0, colIdx))
+					} else {
+						rows[i] = append(rows[i], parquet.ValueOf(v).Level(0, 1, colIdx))
+					}
+				} else {
+					rows[i] = append(rows[i], parquet.ValueOf(v).Level(0, 0, colIdx))
+				}
 				colIdx++
 				continue
 			}
@@ -227,7 +241,13 @@ func (r *Runner) handleInsert(ctx context.Context, c *datadriven.TestData) (stri
 				if err != nil {
 					return "", fmt.Errorf("insert: %w", err)
 				}
-				rows[i] = append(rows[i], parquet.ValueOf(v).Level(0, 1, colIdx))
+				parquetV := parquet.ValueOf(v)
+				if parquetV.IsNull() {
+					parquetV = parquetV.Level(0, 0, colIdx)
+				} else {
+					parquetV = parquetV.Level(0, 1, colIdx)
+				}
+				rows[i] = append(rows[i], parquetV)
 				colIdx++
 			}
 		}
@@ -241,8 +261,20 @@ func (r *Runner) handleInsert(ctx context.Context, c *datadriven.TestData) (stri
 	if _, err := buf.WriteRows(rows); err != nil {
 		return "", fmt.Errorf("insert: %w", err)
 	}
+	buf.Sort()
+
+	// TODO: https://github.com/polarsignals/frostdb/issues/548 Should just build the arrow record directly.
+	converter := pqarrow.NewParquetConverter(memory.NewGoAllocator(), logicalplan.IterOptions{})
+	defer converter.Close()
+
+	if err := converter.Convert(ctx, buf, schema); err != nil {
+		return "", err
+	}
 
-	if _, err := r.activeTable.InsertBuffer(ctx, buf); err != nil {
+	rec := converter.NewRecord()
+	defer rec.Release()
+
+	if _, err := r.activeTable.InsertRecord(ctx, rec); err != nil {
 		return "", fmt.Errorf("insert: %w", err)
 	}
 
@@ -263,6 +295,12 @@ func stringToValue(t parquet.Type, stringValue string) (any, error) {
 			return nil, fmt.Errorf("unexpected error converting %s to int: %w", stringValue, err)
 		}
 		return intValue, nil
+	case parquet.Double:
+		floatValue, err := strconv.ParseFloat(stringValue, 64)
+		if err != nil {
+			return nil, fmt.Errorf("unexpected error converting %s to float: %w", stringValue, err)
+		}
+		return floatValue, nil
 	case parquet.Boolean:
 		switch stringValue {
 		case "true":
@@ -348,9 +386,14 @@ func (r *Runner) handleExec(ctx context.Context, c *datadriven.TestData) (string
 	return b.String(), nil
 }
 
-func (r *Runner) parseSQL(dynColNames []string, sql string) (sqlparse.ParseResult, error) {
+func (r *Runner) parseSQL(
+	dynColNames []string,
+	sql string,
+) (sqlparse.ParseResult, error) {
 	res, err := r.sqlParser.ExperimentalParse(
-		r.db.ScanTable(r.activeTableName), dynColNames, sql,
+		r.db.ScanTable(r.activeTableName),
+		dynColNames,
+		sql,
 	)
 	if err != nil {
 		return sqlparse.ParseResult{}, err
@@ -378,6 +421,22 @@ func arrayToStringVals(a arrow.Array) ([]string, error) {
 			}
 			result[i] = strconv.Itoa(int(col.Value(i)))
 		}
+	case *array.Uint64:
+		for i := range result {
+			if col.IsNull(i) {
+				result[i] = nullString
+				continue
+			}
+			result[i] = strconv.FormatUint(col.Value(i), 10)
+		}
+	case *array.Float64:
+		for i := range result {
+			if col.IsNull(i) {
+				result[i] = nullString
+				continue
+			}
+			result[i] = fmt.Sprintf("%f", float64(col.Value(i)))
+		}
 	case *array.Boolean:
 		for i := range result {
 			if col.IsNull(i) {
diff --git a/logictest/testdata/exec/aggregate/aggregate b/logictest/testdata/exec/aggregate/aggregate
index 33163e81e..a471ddc7e 100644
--- a/logictest/testdata/exec/aggregate/aggregate
+++ b/logictest/testdata/exec/aggregate/aggregate
@@ -1,61 +1,96 @@
 createtable schema=default
 ----
 
-insert cols=(labels.label1, labels.label2, labels.label3, labels.label4, stacktrace, timestamp, value)
-value1  value2  null    null    stack1  1   1
-value2  value2  value3  null    stack1  2   2
-value3  value2  null    value4  stack1  3   3
+insert cols=(labels.label1, labels.label2, labels.label3, labels.label4, stacktrace, timestamp, value, floatvalue)
+value1  value2  null    null    stack1  1   1   1.1
+value2  value2  value3  null    stack1  2   2   2.2
+value3  value2  null    value4  stack1  3   3   3.3
 ----
 
-insert cols=(labels.label1, labels.label2, labels.label3, labels.label4, stacktrace, timestamp, value)
-value4  value2  null    null    stack1  4   4
-value5  value2  value3  null    stack1  5   5
-value6  value2  null    value4  stack1  6   6
+insert cols=(labels.label1, labels.label2, labels.label3, labels.label4, stacktrace, timestamp, value, floatvalue)
+value4  value2  null    null    stack1  4   4   4.4
+value5  value2  value3  null    stack1  5   5   5.5
+value6  value2  null    value4  stack1  6   6   6.6
 ----
 
 exec
-select sum(value) as value_sum group by labels.label2
+select sum(value) as value_sum, labels.label2 group by labels.label2
 ----
-value2  21
+21      value2
 
 exec
-select max(value) as value_max group by labels.label2
+select labels.label2, sum(floatvalue) as float_value_sum group by labels.label2
+----
+value2  23.100000
+
+exec
+select labels.label2, max(value) as value_max group by labels.label2
 ----
 value2  6
 
 exec
-select count(value) as value_count group by labels.label2
+select labels.label2, max(floatvalue) as value_max group by labels.label2
+----
+value2  6.600000
+
+exec
+select labels.label2, min(floatvalue) as value_min group by labels.label2
+----
+value2  1.100000
+
+exec
+select labels.label2, count(value) as value_count group by labels.label2
 ----
 value2  6
 
 exec
-select avg(value) as value_avg group by labels.label2
+select labels.label2, avg(value) group by labels.label2
+----
+value2  3
+
+exec
+select labels.label2, avg(value) as value_avg group by labels.label2
 ----
 value2  3
 
 exec
-select avg(value) as value_avg group by labels.label4
+select labels.label2, avg(floatvalue) as value_avg group by labels.label2
+----
+value2  3.850000
+
+exec
+select labels.label4, avg(value) as value_avg group by labels.label4
 ----
 null    3
 value4  4
 
 exec
-select sum(value), count(value) group by stacktrace
+select stacktrace, sum(value), count(value) group by stacktrace
 ----
 stack1  21      6
 
 exec
-select sum(value) as value_sum, count(value) as value_count group by stacktrace
+select stacktrace, sum(floatvalue), count(floatvalue) group by stacktrace
+----
+stack1  23.100000  6
+
+exec
+select stacktrace, sum(value) as value_sum, count(value) as value_count group by stacktrace
 ----
 stack1  21      6
 
 exec
-select sum(value), count(value), min(value), max(value) group by labels.label2
+select labels.label2, sum(value), count(value), min(value), max(value) group by labels.label2
 ----
 value2  21      6       1       6
 
+exec
+select labels.label2, sum(floatvalue), count(floatvalue), min(floatvalue), max(floatvalue) group by labels.label2
+----
+value2  23.100000  6       1.100000  6.600000
+
 exec unordered
-select sum(value) as value_sum where timestamp >= 1 group by labels.label1
+select labels.label1, sum(value) as value_sum where timestamp >= 1 group by labels.label1
 ----
 value1  1
 value2  2
@@ -63,3 +98,27 @@ value3  3
 value4  4
 value5  5
 value6  6
+
+exec unordered
+select labels, sum(value) as value_sum group by labels
+----
+value1  value2  null    null    1
+value2  value2  value3  null    2
+value3  value2  null    value4  3
+value4  value2  null    null    4
+value5  value2  value3  null    5
+value6  value2  null    value4  6
+
+# Limit but there are fewer rows
+exec
+select sum(value) as value_sum, labels.label3 group by labels.label3 limit 3
+----
+14      null
+7       value3
+
+# Actually limit to 1 row
+exec
+select sum(value) as value_sum, labels.label3 group by labels.label3 limit 1
+----
+14      null
+
diff --git a/logictest/testdata/exec/aggregate/aggregate_nulls b/logictest/testdata/exec/aggregate/aggregate_nulls
index ec0394f76..4f0d81f4d 100644
--- a/logictest/testdata/exec/aggregate/aggregate_nulls
+++ b/logictest/testdata/exec/aggregate/aggregate_nulls
@@ -10,25 +10,25 @@ null    value2  stack1  3   3
 # TODO query results should explicitly return null values
 # https://github.com/polarsignals/frostdb/issues/259
 exec
-select sum(value) as value_sum group by labels.label2
+select labels.label2, sum(value) as value_sum group by labels.label2
 ----
 value2  5
 null    1
 
 exec
-select max(value) as value_max group by labels.label2
+select labels.label2, max(value) as value_max group by labels.label2
 ----
 value2  3
 null    1
 
 exec
-select count(value) as value_count group by labels.label2
+select labels.label2, count(value) as value_count group by labels.label2
 ----
 value2  2
 null    1
 
 exec
-select sum(value) as value_sum, count(value) as value_count group by labels.label2
+select labels.label2, sum(value) as value_sum, count(value) as value_count group by labels.label2
 ----
 value2  5       2
 null    1       1
diff --git a/logictest/testdata/exec/aggregate/math b/logictest/testdata/exec/aggregate/math
new file mode 100644
index 000000000..100e4f799
--- /dev/null
+++ b/logictest/testdata/exec/aggregate/math
@@ -0,0 +1,138 @@
+createtable schema=default
+----
+
+insert cols=(labels.label1, timestamp, value)
+value1 1 2
+value1 3 4
+value1 5 6
+value1 11 0
+----
+
+exec
+select value * timestamp
+----
+2
+12
+30
+0
+
+exec
+select value * 2
+----
+4
+8
+12
+0
+
+exec
+select 2 * 3
+----
+6
+6
+6
+6
+
+exec
+select value * (2 * 3)
+----
+12
+24
+36
+0
+
+exec
+select value * (timestamp * 2)
+----
+4
+24
+60
+0
+
+exec
+select value * timestamp * 2
+----
+4
+24
+60
+0
+
+exec
+select value * timestamp + 2 - 1
+----
+3
+13
+31
+1
+
+exec
+select value * timestamp * (2 - 1)
+----
+2
+12
+30
+0
+
+exec
+select value, value * timestamp * (2 - 1)
+----
+2       2
+4       12
+6       30
+0       0
+
+exec
+select timestamp, value * timestamp * (2 - 1) where value > 2
+----
+3       12
+5       30
+
+exec
+select value, timestamp  * value * (2 - 1)  where value > 2
+----
+4       12
+6       30
+
+exec
+select value , value  * timestamp * (2 - 1) as result where value > 2
+----
+4       12
+6       30
+
+exec
+select value as result, value  * timestamp * (2 - 1)  where value > 2
+----
+4       12
+6       30
+
+exec
+select labels.label1, sum(value), count(value) group by labels.label1
+----
+value1  12      4
+
+exec
+select   max(value) - min(value) , sum(value) / count(value)  group by labels.label1
+----
+6       3
+
+exec
+select  labels.label1, max(value) - min(value) , sum(value) / count(value)  group by labels.label1
+----
+value1  6       3
+
+exec
+select  labels.label1, (max(value) - min(value)) /2  , sum(value) / count(value)  group by labels.label1
+----
+value1  3       3
+
+exec
+select  labels.label1, (max(value) - min(value)) /2  , sum(value) / count(value) , (sum(value) / count(value)) * 2 group by labels.label1
+----
+value1  3       3       6
+
+exec
+select timestamp / value
+----
+0
+0
+0
+null
diff --git a/logictest/testdata/exec/aggregate/ordered_aggregate b/logictest/testdata/exec/aggregate/ordered_aggregate
deleted file mode 100644
index 4e8818cf2..000000000
--- a/logictest/testdata/exec/aggregate/ordered_aggregate
+++ /dev/null
@@ -1,53 +0,0 @@
-createtable schema=default
-----
-
-insert cols=(example_type, labels.label1, timestamp, value)
-type1   value1  1   1
-type1   value1  2   1
-type2   value1  1   1
-----
-
-insert cols=(example_type, labels.label1, timestamp, value)
-type1   value1  1   1
-type1   value1  2   1
-type2   value1  1   1
-----
-
-insert cols=(example_type, labels.label2, timestamp, value)
-type1   value2  1   1
-type1   value2  2   1
-type2   value2  1   1
-----
-
-insert cols=(example_type, labels.label3, timestamp, value)
-type1   value3  1   1
-type1   value3  2   1
-type2   value3  1   1
-----
-
-exec
-select sum(value) as value_sum group by (example_type, labels)
-----
-type1   null    null    value3  2
-type1   null    value2  null    2
-type1   value1  null    null    4
-type2   null    null    value3  1
-type2   null    value2  null    1
-type2   value1  null    null    2
-
-exec
-select sum(value) as value_sum where example_type = 'type1' group by (labels)
-----
-null    null    value3  2
-null    value2  null    2
-value1  null    null    4
-
-exec
-select sum(value) as value_sum where example_type = 'type1' group by (labels, timestamp)
-----
-null    null    value3  1       1
-null    null    value3  2       1
-null    value2  null    1       1
-null    value2  null    2       1
-value1  null    null    1       2
-value1  null    null    2       2
diff --git a/logictest/testdata/exec/aggregate/window b/logictest/testdata/exec/aggregate/window
index 7838bf4e4..c4d444f0f 100644
--- a/logictest/testdata/exec/aggregate/window
+++ b/logictest/testdata/exec/aggregate/window
@@ -11,52 +11,53 @@ value4  stack1  123000  4
 ----
 
 exec
-select sum(value) as value_sum group by second(1)
+select sum(value) as value_sum, (timestamp/1000)*1000 as timestamp_bucket group by timestamp_bucket
 ----
-120000  1
-121000  2
-122000  3
-123000  4
+1       120000
+2       121000
+3       122000
+4       123000
 
 exec
-select sum(value) as value_sum group by second(2)
+select sum(value) as value_sum, (timestamp/2000)*2000 as timestamp_bucket group by timestamp_bucket
 ----
-120000  3
-122000  7
+3       120000
+7       122000
 
 exec
-select sum(value) as value_sum group by second(3)
+select sum(value) as value_sum, (timestamp/3000)*3000 as timestamp_bucket group by timestamp_bucket
 ----
-120000  6
-123000  4
+6       120000
+4       123000
 
 exec
-select sum(value) as value_sum, count(value) as value_count group by second(3)
+select sum(value) as value_sum, count(value) as value_count, (timestamp/3000)*3000 as timestamp_bucket group by timestamp_bucket
 ----
-120000  6       3
-123000  4       1
+6       3       120000
+4       1       123000
 
 exec
-select sum(value) as value_sum group by second(4)
+select sum(value) as value_sum, (timestamp/4000)*4000 as timestamp_bucket group by timestamp_bucket
 ----
-120000  10
+10      120000
 
 exec
-select sum(value) as value_sum group by labels.label1, second(5)
+select labels.label1, (timestamp/5000)*5000 as timestamp_bucket, sum(value) as value_sum group by labels.label1, timestamp_bucket
+# TODO: Not sure if this is strictly correct. Previously with "group by second(5)" we preserved the timestamp if there was only a single timestamp.
 ----
 value1  120000  1
-value2  121000  2
-value3  122000  3
-value4  123000  4
+value2  120000  2
+value3  120000  3
+value4  120000  4
 
 exec
-select sum(value) as value_sum, count(timestamp) as timestamp_count group by second(2)
+select (timestamp/2000)*2000 as timestamp_bucket, sum(value) as value_sum, count(timestamp) as timestamp_count group by timestamp_bucket
 ----
 120000  3       2
 122000  7       2
 
 exec
-select count(timestamp) as timestamp_count group by second(3)
+select (timestamp/3000)*3000 as timestamp_bucket, count(timestamp) as timestamp_count group by timestamp_bucket
 ----
 120000  3
 123000  1
diff --git a/logictest/testdata/exec/filter/filter b/logictest/testdata/exec/filter/filter
index 7feea3e91..3be201d37 100644
--- a/logictest/testdata/exec/filter/filter
+++ b/logictest/testdata/exec/filter/filter
@@ -7,6 +7,38 @@ value2  value2  value3  null    stack1  2   2
 value3  value2  null    value4  stack1  3   3
 ----
 
+exec
+select labels, stacktrace, timestamp, value where timestamp = 2
+----
+value2  value2  value3  null    stack1  2       2
+
+exec
+select labels, stacktrace, timestamp, value where timestamp != 2
+----
+value1  value2  null    null    stack1  1       1
+value3  value2  null    value4  stack1  3       3
+
+exec
+select labels, stacktrace, timestamp, value where timestamp < 2
+----
+value1  value2  null    null    stack1  1       1
+
+exec
+select labels, stacktrace, timestamp, value where timestamp <= 2
+----
+value1  value2  null    null    stack1  1       1
+value2  value2  value3  null    stack1  2       2
+
+exec
+select labels, stacktrace, timestamp, value where timestamp <= 2 limit 1
+----
+value1  value2  null    null    stack1  1       1
+
+exec
+select labels, stacktrace, timestamp, value where timestamp > 2
+----
+value3  value2  null    value4  stack1  3       3
+
 exec
 select labels, stacktrace, timestamp, value where timestamp >= 2
 ----
@@ -112,3 +144,48 @@ exec
 select labels, stacktrace, timestamp, value where labels.label4 != null
 ----
 value3  value2  null    value4  stack1  3       3
+
+# filter column that doesn't exist
+exec
+select labels, timestamp, value where doesntexist > 4
+----
+
+exec
+select labels, timestamp, value where doesntexist < 4
+----
+
+exec
+select labels, timestamp, value where doesntexist >= 4
+----
+
+exec
+select labels, timestamp, value where doesntexist <= 4
+----
+
+exec
+select stacktrace, value where stacktrace like 'ack'
+----
+stack1  1
+stack1  2
+stack1  3
+
+exec
+select stacktrace, value where stacktrace like 'ack2'
+----
+
+exec
+select stacktrace, value where stacktrace not like 'ack'
+----
+
+exec
+select stacktrace, value where stacktrace not like 'ack2'
+----
+stack1  1
+stack1  2
+stack1  3
+
+exec
+select stacktrace, value where labels.label1 not like 'ue2' and stacktrace like 'ack'
+----
+stack1  1
+stack1  3
diff --git a/logictest/testdata/exec/filter/filter_contains b/logictest/testdata/exec/filter/filter_contains
new file mode 100644
index 000000000..39f9a9c9d
--- /dev/null
+++ b/logictest/testdata/exec/filter/filter_contains
@@ -0,0 +1,24 @@
+createtable schema=bytes
+----
+
+insert cols=(labels.label1, labels.label2, labels.label3, labels.label4, timestamp, value)
+value1  value2  null    null    1   foo
+value2  value2  value3  null    2   bar
+value3  value2  null    value4  3   baz
+----
+
+exec
+select labels, timestamp, value where timestamp = 2
+----
+value2  value2  value3  null    2       bar
+
+exec
+select labels, timestamp, value where value LIKE 'a'
+----
+value2  value2  value3  null    2       bar
+value3  value2  null    value4  3       baz
+
+exec
+select labels, timestamp, value where value NOT LIKE 'a'
+----
+value1  value2  null    null    1       foo
diff --git a/logictest/testdata/exec/filter/filter_projection b/logictest/testdata/exec/filter/filter_projection
index ced281d40..52bb0ab58 100644
--- a/logictest/testdata/exec/filter/filter_projection
+++ b/logictest/testdata/exec/filter/filter_projection
@@ -20,3 +20,20 @@ select timestamp where timestamp >= 2
 ----
 2
 3
+
+# projection of null column
+exec
+select value where labels.label5 = null and labels.label3 != null
+----
+2
+
+# inverse projection of null column
+exec
+select value where labels.label5 != null and labels.label3 != null
+----
+
+# multi null column projection
+exec
+select value where (labels.label3 = 'value3' and labels.label5 = null) or (labels.label3 = null and labels.label5 = 'a')
+----
+2
diff --git a/logictest/testdata/exec/limit/limit b/logictest/testdata/exec/limit/limit
new file mode 100644
index 000000000..7919403ba
--- /dev/null
+++ b/logictest/testdata/exec/limit/limit
@@ -0,0 +1,39 @@
+createtable schema=default
+----
+
+insert cols=(labels.label1, labels.label2, labels.label3, labels.label4, labels.label5)
+value1  value1  null    null    value1
+value2  value2  value3  null    value1
+value3  value1  null    value4  value1
+----
+
+exec
+select labels.label3 limit 0
+----
+
+exec
+select labels.label1 limit 4 # no need to limit
+----
+value1
+value2
+value3
+
+exec
+select labels.label2 limit 3 # no need to limit - exact 3 rows are the limit
+----
+value1
+value2
+value1
+
+exec
+select labels.label3 limit 2
+----
+null
+value3
+
+exec
+select labels.label1, labels.label2 limit 5
+----
+value1  value1
+value2  value2
+value3  value1
diff --git a/logictest/testdata/exec/projection/convert b/logictest/testdata/exec/projection/convert
new file mode 100644
index 000000000..12edf0c4d
--- /dev/null
+++ b/logictest/testdata/exec/projection/convert
@@ -0,0 +1,15 @@
+createtable schema=default
+----
+
+insert cols=(labels.label1, stacktrace, timestamp, value, floatvalue)
+value1 stack1 1 2 1.1
+value1 stack1 3 4 1.1
+value1 stack2 5 6 1.1
+----
+
+exec
+select convert(value, float) * floatvalue
+----
+2.200000
+4.400000
+6.600000
diff --git a/logictest/testdata/exec/projection/math_projection b/logictest/testdata/exec/projection/math_projection
new file mode 100644
index 000000000..6238f30d1
--- /dev/null
+++ b/logictest/testdata/exec/projection/math_projection
@@ -0,0 +1,21 @@
+createtable schema=default
+----
+
+insert cols=(labels.label1, stacktrace, timestamp, value)
+value1 stack1 1 2
+value1 stack1 3 4
+value1 stack2 5 6
+----
+
+exec
+select value * timestamp
+----
+2
+12
+30
+
+exec
+select stacktrace, sum(value * timestamp) group by stacktrace
+----
+stack1  14
+stack2  30
diff --git a/logictest/testdata/plan/aggregate/aggregate b/logictest/testdata/plan/aggregate/aggregate
index c18ec750f..aad2bf603 100644
--- a/logictest/testdata/plan/aggregate/aggregate
+++ b/logictest/testdata/plan/aggregate/aggregate
@@ -7,48 +7,84 @@ createtable schema=default
 exec
 explain select sum(value) as value_sum group by (example_type, labels.label1)
 ----
-TableScan [concurrent] - HashAggregate (value_sum by example_type,labels.label1) - Synchronizer - HashAggregate (value_sum by example_type,labels.label1)
+TableScan [concurrent] - Projection (value, example_type, labels.label1) - HashAggregate (sum(value) by example_type,labels.label1) - Synchronizer - HashAggregate (sum(value) by example_type,labels.label1) - Projection (sum(value) as value_sum)
 
 # An ordered aggregation should be planned in this case because the full set of dynamic columns is specified.
 exec
 explain select sum(value) as value_sum group by (example_type, labels)
 ----
-TableScan [concurrent] - OrderedAggregate (value by example_type,labels) - OrderedSynchronizer - OrderedAggregate (value by example_type,labels)
+TableScan [concurrent] - Projection (value, example_type, labels) - HashAggregate (sum(value) by example_type,labels) - Synchronizer - HashAggregate (sum(value) by example_type,labels) - Projection (sum(value) as value_sum)
 
 # A hash aggregation is planned in the case that the group by columns are inverted. TODO(asubiotto): We could probably
 # plan an ordered aggregation in this case, but let's not do so unless necessary.
 exec
 explain select sum(value) as value_sum group by (labels, example_type)
 ----
-TableScan [concurrent] - HashAggregate (value_sum by labels,example_type) - Synchronizer - HashAggregate (value_sum by labels,example_type)
+TableScan [concurrent] - Projection (value, labels, example_type) - HashAggregate (sum(value) by labels,example_type) - Synchronizer - HashAggregate (sum(value) by labels,example_type) - Projection (sum(value) as value_sum)
 
 # A hash aggregation should be planned in this case because we are not grouping by example_type, so the group by columns
 # are not a prefix of the sorting columns.
 exec
 explain select sum(value) as value_sum group by (labels)
 ----
-TableScan [concurrent] - HashAggregate (value_sum by labels) - Synchronizer - HashAggregate (value_sum by labels)
+TableScan [concurrent] - Projection (value, labels) - HashAggregate (sum(value) by labels) - Synchronizer - HashAggregate (sum(value) by labels) - Projection (sum(value) as value_sum)
 
 # If however, the prefix that is not contained by the grouping columns is covered by an equality filter, an ordered
 # aggregation can be planned since we "fix" a prefix of the sorting columns to a given value.
 exec
 explain select sum(value) as value_sum where example_type = 'some_value' group by (labels)
 ----
-TableScan [concurrent] - PredicateFilter (example_type == some_value) - OrderedAggregate (value by labels) - OrderedSynchronizer - OrderedAggregate (value by labels)
+TableScan [concurrent] - PredicateFilter (example_type == some_value) - Projection (value, labels) - HashAggregate (sum(value) by labels) - Synchronizer - HashAggregate (sum(value) by labels) - Projection (sum(value) as value_sum)
 
 exec
 explain select sum(value) as value_sum where example_type = 'some_value' group by (labels, timestamp)
 ----
-TableScan [concurrent] - PredicateFilter (example_type == some_value) - OrderedAggregate (value by labels,timestamp) - OrderedSynchronizer - OrderedAggregate (value by labels,timestamp)
+TableScan [concurrent] - PredicateFilter (example_type == some_value) - Projection (value, labels, timestamp) - HashAggregate (sum(value) by labels,timestamp) - Synchronizer - HashAggregate (sum(value) by labels,timestamp) - Projection (sum(value) as value_sum)
 
 # The above only applies to equality filters of course.
 exec
 explain select sum(value) as value_sum where example_type > 'some_value' group by (labels)
 ----
-TableScan [concurrent] - PredicateFilter (example_type > some_value) - HashAggregate (value_sum by labels) - Synchronizer - HashAggregate (value_sum by labels)
+TableScan [concurrent] - PredicateFilter (example_type > some_value) - Projection (value, labels) - HashAggregate (sum(value) by labels) - Synchronizer - HashAggregate (sum(value) by labels) - Projection (sum(value) as value_sum)
 
 # And here's a hash aggregation with a filter on another column.
 exec
 explain select sum(value) as value_sum where timestamp >= 1 group by labels.label1
 ----
-TableScan [concurrent] - PredicateFilter (timestamp >= 1) - HashAggregate (value_sum by labels.label1) - Synchronizer - HashAggregate (value_sum by labels.label1)
+TableScan [concurrent] - PredicateFilter (timestamp >= 1) - Projection (value, labels.label1) - HashAggregate (sum(value) by labels.label1) - Synchronizer - HashAggregate (sum(value) by labels.label1) - Projection (sum(value) as value_sum)
+
+exec
+explain select labels.label2, sum(value) as value_sum, count(value) as value_count group by labels.label2
+----
+TableScan [concurrent] - Projection (labels.label2, value) - HashAggregate (sum(value),count(value) by labels.label2) - Synchronizer - HashAggregate (sum(value),count(value) by labels.label2) - Projection (labels.label2, sum(value) as value_sum, count(value) as value_count)
+
+exec
+explain select example_type, labels, sum(value) as value_sum group by (example_type, labels)
+----
+TableScan [concurrent] - Projection (example_type, labels, value) - HashAggregate (sum(value) by example_type,labels) - Synchronizer - HashAggregate (sum(value) by example_type,labels) - Projection (example_type, labels, sum(value) as value_sum)
+
+exec
+explain select sum(value * timestamp) group by stacktrace
+----
+TableScan [concurrent] - Projection (value * timestamp, stacktrace) - HashAggregate (sum(value * timestamp) by stacktrace) - Synchronizer - HashAggregate (sum(value * timestamp) by stacktrace) - Projection (sum(value * timestamp))
+
+exec
+explain select avg(value) group by stacktrace
+----
+TableScan [concurrent] - Projection (value, stacktrace) - HashAggregate (sum(value),count(value) by stacktrace) - Synchronizer - HashAggregate (sum(value),count(value) by stacktrace) - Projection (stacktrace, sum(value) / count(value) as avg(value)) - Projection (avg(value))
+
+exec
+explain select max(value) - min(value), sum(value) / count(value) group by labels.label1
+----
+TableScan [concurrent] - Projection (value, labels.label1) - HashAggregate (max(value),min(value),sum(value),count(value) by labels.label1) - Synchronizer - HashAggregate (max(value),min(value),sum(value),count(value) by labels.label1) - Projection (max(value) - min(value), sum(value) / count(value))
+
+exec
+explain select labels.label1, (max(value) - min(value)) / 2, sum(value) / count(value), (sum(value) / count(value)) * 2 group by labels.label1
+----
+TableScan [concurrent] - Projection (labels.label1, value) - HashAggregate (max(value),min(value),sum(value),count(value),sum(value),count(value) by labels.label1) - Synchronizer - HashAggregate (max(value),min(value),sum(value),count(value),sum(value),count(value) by labels.label1) - Projection (labels.label1, max(value) - min(value) / 2, sum(value) / count(value), sum(value) / count(value) * 2)
+
+# Make sure that the limit happens after the aggregation is done.
+exec
+explain select sum(value) as value_sum group by (example_type, labels.label1) limit 2
+----
+TableScan [concurrent] - Projection (value, example_type, labels.label1) - HashAggregate (sum(value) by example_type,labels.label1) - Synchronizer - HashAggregate (sum(value) by example_type,labels.label1) - Projection (sum(value) as value_sum) - Limit(2)
diff --git a/logictest/testdata/plan/aggregate/window b/logictest/testdata/plan/aggregate/window
new file mode 100644
index 000000000..b6310ef70
--- /dev/null
+++ b/logictest/testdata/plan/aggregate/window
@@ -0,0 +1,9 @@
+createtable schema=default
+----
+
+# timestamps have to be in milliseconds for these tests
+
+exec
+explain select sum(value) as value_sum, ((timestamp / 1000) * 1000) as timestamp_bucket group by timestamp_bucket
+----
+TableScan [concurrent] - Projection (value, timestamp / 1000 * 1000 as timestamp_bucket) - HashAggregate (sum(value) by timestamp_bucket) - Synchronizer - HashAggregate (sum(value) by timestamp_bucket) - Projection (sum(value) as value_sum, timestamp_bucket)
diff --git a/logictest/testdata/plan/filter/filter b/logictest/testdata/plan/filter/filter
new file mode 100644
index 000000000..0e01035fa
--- /dev/null
+++ b/logictest/testdata/plan/filter/filter
@@ -0,0 +1,22 @@
+createtable schema=default
+----
+
+exec
+explain select stacktrace, value where stacktrace LIKE 'ack'
+----
+TableScan [concurrent] - PredicateFilter (stacktrace contains ack) - Projection (stacktrace, value) - Synchronizer
+
+exec
+explain select stacktrace, value where stacktrace NOT LIKE 'ack'
+----
+TableScan [concurrent] - PredicateFilter (stacktrace not contains ack) - Projection (stacktrace, value) - Synchronizer
+
+exec
+explain select stacktrace, value where labels.label1 not like 'ue2' and stacktrace like 'ack'
+----
+TableScan [concurrent] - PredicateFilter ((labels.label1 not contains ue2 AND stacktrace contains ack)) - Projection (stacktrace, value) - Synchronizer
+
+exec
+explain select stacktrace, value where stacktrace LIKE 'ack' limit 1
+----
+TableScan [concurrent] - PredicateFilter (stacktrace contains ack) - Projection (stacktrace, value) - Limit(1) - Synchronizer - Limit(1)
diff --git a/logictest/testdata/plan/limit/limit b/logictest/testdata/plan/limit/limit
new file mode 100644
index 000000000..39838ae1e
--- /dev/null
+++ b/logictest/testdata/plan/limit/limit
@@ -0,0 +1,12 @@
+createtable schema=default
+----
+
+exec
+explain select labels.label1
+----
+TableScan [concurrent] - Projection (labels.label1) - Synchronizer
+
+exec
+explain select labels.label1 limit 1
+----
+TableScan [concurrent] - Projection (labels.label1) - Limit(1) - Synchronizer - Limit(1)
diff --git a/metrics.go b/metrics.go
new file mode 100644
index 000000000..802bf547d
--- /dev/null
+++ b/metrics.go
@@ -0,0 +1,367 @@
+package frostdb
+
+import (
+	"github.com/polarsignals/wal"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+
+	filewal "github.com/polarsignals/frostdb/wal"
+
+	"github.com/polarsignals/frostdb/index"
+)
+
+var (
+	descTxHighWatermark = prometheus.NewDesc(
+		"frostdb_tx_high_watermark",
+		"The highest transaction number that has been released to be read",
+		[]string{"db"}, nil,
+	)
+	descActiveBlockSize = prometheus.NewDesc(
+		"frostdb_table_active_block_size",
+		"Size of the active table block in bytes.",
+		[]string{"db", "table"}, nil,
+	)
+)
+
+// collector is a custom prometheus collector that exports metrics from live
+// databases and tables.
+type collector struct {
+	s *ColumnStore
+}
+
+var _ prometheus.Collector = (*collector)(nil)
+
+func (c *collector) Describe(ch chan<- *prometheus.Desc) {
+	ch <- descTxHighWatermark
+	ch <- descActiveBlockSize
+}
+
+func (c *collector) Collect(ch chan<- prometheus.Metric) {
+	for _, dbName := range c.s.DBs() {
+		db, err := c.s.GetDB(dbName)
+		if err != nil {
+			continue
+		}
+		ch <- prometheus.MustNewConstMetric(descTxHighWatermark, prometheus.GaugeValue, float64(db.HighWatermark()), dbName)
+		for _, tableName := range db.TableNames() {
+			table, err := db.GetTable(tableName)
+			if err != nil {
+				continue
+			}
+			activeBlock := table.ActiveBlock()
+			if activeBlock == nil {
+				continue
+			}
+			ch <- prometheus.MustNewConstMetric(descActiveBlockSize, prometheus.GaugeValue, float64(activeBlock.Size()), dbName, tableName)
+		}
+	}
+}
+
+// globalMetrics defines the store-level metrics registered at instantiation.
+// Most metrics are not directly accessed, and instead provided to components
+// at instantiation time with preset labels.
+type globalMetrics struct {
+	shutdownDuration  prometheus.Histogram
+	shutdownStarted   prometheus.Counter
+	shutdownCompleted prometheus.Counter
+	dbMetrics         struct {
+		snapshotMetrics struct {
+			snapshotsTotal            *prometheus.CounterVec
+			snapshotFileSizeBytes     *prometheus.GaugeVec
+			snapshotDurationHistogram *prometheus.HistogramVec
+		}
+		walMetrics struct {
+			bytesWritten          *prometheus.CounterVec
+			entriesWritten        *prometheus.CounterVec
+			appends               *prometheus.CounterVec
+			entryBytesRead        *prometheus.CounterVec
+			entriesRead           *prometheus.CounterVec
+			segmentRotations      *prometheus.CounterVec
+			entriesTruncated      *prometheus.CounterVec
+			truncations           *prometheus.CounterVec
+			lastSegmentAgeSeconds *prometheus.GaugeVec
+		}
+		fileWalMetrics struct {
+			failedLogs            *prometheus.CounterVec
+			lastTruncationAt      *prometheus.GaugeVec
+			walRepairs            *prometheus.CounterVec
+			walRepairsLostRecords *prometheus.CounterVec
+			walCloseTimeouts      *prometheus.CounterVec
+			walQueueSize          *prometheus.GaugeVec
+		}
+	}
+	tableMetrics struct {
+		blockPersisted       *prometheus.CounterVec
+		blockRotated         *prometheus.CounterVec
+		rowsInserted         *prometheus.CounterVec
+		rowBytesInserted     *prometheus.CounterVec
+		zeroRowsInserted     *prometheus.CounterVec
+		rowInsertSize        *prometheus.HistogramVec
+		lastCompletedBlockTx *prometheus.GaugeVec
+		numParts             *prometheus.GaugeVec
+		indexMetrics         struct {
+			compactions        *prometheus.CounterVec
+			levelSize          *prometheus.GaugeVec
+			compactionDuration *prometheus.HistogramVec
+		}
+	}
+}
+
+func makeLabelsForDBMetric(extraLabels ...string) []string {
+	return append([]string{"db"}, extraLabels...)
+}
+
+func makeLabelsForTablesMetrics(extraLabels ...string) []string {
+	return append([]string{"db", "table"}, extraLabels...)
+}
+
+func makeAndRegisterGlobalMetrics(unwrappedReg prometheus.Registerer) globalMetrics {
+	m := globalMetrics{
+		shutdownDuration: promauto.With(unwrappedReg).NewHistogram(prometheus.HistogramOpts{
+			Name: "frostdb_shutdown_duration",
+			Help: "time it takes for the columnarstore to complete a full shutdown.",
+		}),
+		shutdownStarted: promauto.With(unwrappedReg).NewCounter(prometheus.CounterOpts{
+			Name: "frostdb_shutdown_started",
+			Help: "Indicates a shutdown of the columnarstore has started.",
+		}),
+		shutdownCompleted: promauto.With(unwrappedReg).NewCounter(prometheus.CounterOpts{
+			Name: "frostdb_shutdown_completed",
+			Help: "Indicates a shutdown of the columnarstore has completed.",
+		}),
+	}
+
+	// DB metrics.
+	{
+		// Snapshot metrics.
+		{
+			reg := prometheus.WrapRegistererWithPrefix("frostdb_snapshot_", unwrappedReg)
+			m.dbMetrics.snapshotMetrics.snapshotsTotal = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "total",
+				Help: "Total number of snapshots",
+			}, makeLabelsForDBMetric("success"))
+			m.dbMetrics.snapshotMetrics.snapshotFileSizeBytes = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
+				Name: "file_size_bytes",
+				Help: "Size of snapshots in bytes",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.snapshotMetrics.snapshotDurationHistogram = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
+				Name:    "duration_seconds",
+				Help:    "Duration of snapshots in seconds",
+				Buckets: prometheus.ExponentialBucketsRange(1, 60, 5),
+			}, makeLabelsForDBMetric())
+		}
+		// WAL metrics.
+		{
+			reg := prometheus.WrapRegistererWithPrefix("frostdb_wal_", unwrappedReg)
+			m.dbMetrics.walMetrics.bytesWritten = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "entry_bytes_written",
+				Help: "entry_bytes_written counts the bytes of log entry after encoding." +
+					" Actual bytes written to disk might be slightly higher as it" +
+					" includes headers and index entries.",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.walMetrics.entriesWritten = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "entries_written",
+				Help: "entries_written counts the number of entries written.",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.walMetrics.appends = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "appends",
+				Help: "appends counts the number of calls to StoreLog(s) i.e." +
+					" number of batches of entries appended.",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.walMetrics.entryBytesRead = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "entry_bytes_read",
+				Help: "entry_bytes_read counts the bytes of log entry read from" +
+					" segments before decoding. actual bytes read from disk might be higher" +
+					" as it includes headers and index entries and possible secondary reads" +
+					" for large entries that don't fit in buffers.",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.walMetrics.entriesRead = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "entries_read",
+				Help: "entries_read counts the number of calls to get_log.",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.walMetrics.segmentRotations = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "segment_rotations",
+				Help: "segment_rotations counts how many times we move to a new segment file.",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.walMetrics.entriesTruncated = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "entries_truncated_total",
+				Help: "entries_truncated counts how many log entries have been truncated" +
+					" from the front or back.",
+			}, makeLabelsForDBMetric("type"))
+			m.dbMetrics.walMetrics.truncations = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "truncations_total",
+				Help: "truncations is the number of truncate calls categorized by whether" +
+					" the call was successful or not.",
+			}, makeLabelsForDBMetric("type", "success"))
+			m.dbMetrics.walMetrics.lastSegmentAgeSeconds = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
+				Name: "last_segment_age_seconds",
+				Help: "last_segment_age_seconds is a gauge that is set each time we" +
+					" rotate a segment and describes the number of seconds between when" +
+					" that segment file was first created and when it was sealed. this" +
+					" gives a rough estimate how quickly writes are filling the disk.",
+			}, makeLabelsForDBMetric())
+		}
+		// FileWAL metrics.
+		{
+			reg := prometheus.WrapRegistererWithPrefix("frostdb_wal_", unwrappedReg)
+			m.dbMetrics.fileWalMetrics.failedLogs = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "failed_logs_total",
+				Help: "Number of failed WAL logs",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.fileWalMetrics.lastTruncationAt = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
+				Name: "last_truncation_at",
+				Help: "The last transaction the WAL was truncated to",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.fileWalMetrics.walRepairs = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "repairs_total",
+				Help: "The number of times the WAL had to be repaired (truncated) due to corrupt records",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.fileWalMetrics.walRepairsLostRecords = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "repairs_lost_records_total",
+				Help: "The number of WAL records lost due to WAL repairs (truncations)",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.fileWalMetrics.walCloseTimeouts = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "close_timeouts_total",
+				Help: "The number of times the WAL failed to close due to a timeout",
+			}, makeLabelsForDBMetric())
+			m.dbMetrics.fileWalMetrics.walQueueSize = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
+				Name: "queue_size",
+				Help: "The number of unprocessed requests in the WAL queue",
+			}, makeLabelsForDBMetric())
+		}
+	}
+
+	// Table metrics.
+	{
+		reg := prometheus.WrapRegistererWithPrefix("frostdb_table_", unwrappedReg)
+		m.tableMetrics.blockPersisted = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Name: "blocks_persisted_total",
+			Help: "Number of table blocks that have been persisted.",
+		}, makeLabelsForTablesMetrics())
+		m.tableMetrics.blockRotated = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Name: "blocks_rotated_total",
+			Help: "Number of table blocks that have been rotated.",
+		}, makeLabelsForTablesMetrics())
+		m.tableMetrics.rowsInserted = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Name: "rows_inserted_total",
+			Help: "Number of rows inserted into table.",
+		}, makeLabelsForTablesMetrics())
+		m.tableMetrics.rowBytesInserted = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Name: "row_bytes_inserted_total",
+			Help: "Number of bytes inserted into table.",
+		}, makeLabelsForTablesMetrics())
+		m.tableMetrics.zeroRowsInserted = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Name: "zero_rows_inserted_total",
+			Help: "Number of times it was attempted to insert zero rows into the table.",
+		}, makeLabelsForTablesMetrics())
+		m.tableMetrics.rowInsertSize = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
+			Name:    "row_insert_size",
+			Help:    "Size of batch inserts into table.",
+			Buckets: prometheus.ExponentialBuckets(1, 2, 10),
+		}, makeLabelsForTablesMetrics())
+		m.tableMetrics.lastCompletedBlockTx = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
+			Name: "last_completed_block_tx",
+			Help: "Last completed block transaction.",
+		}, makeLabelsForTablesMetrics())
+		m.tableMetrics.numParts = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
+			Name: "num_parts",
+			Help: "Number of parts currently active.",
+		}, makeLabelsForTablesMetrics())
+
+		// LSM metrics.
+		{
+			reg := prometheus.WrapRegistererWithPrefix("frostdb_lsm_", unwrappedReg)
+			m.tableMetrics.indexMetrics.compactions = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+				Name: "compactions_total",
+				Help: "The total number of compactions that have occurred.",
+			}, makeLabelsForTablesMetrics("level"))
+
+			m.tableMetrics.indexMetrics.levelSize = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
+				Name: "level_size_bytes",
+				Help: "The size of the level in bytes.",
+			}, makeLabelsForTablesMetrics("level"))
+
+			m.tableMetrics.indexMetrics.compactionDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
+				Name:                        "compaction_total_duration_seconds",
+				Help:                        "Total compaction duration",
+				NativeHistogramBucketFactor: 1.1,
+			}, makeLabelsForTablesMetrics())
+		}
+	}
+	return m
+}
+
+type snapshotMetrics struct {
+	snapshotsTotal            *prometheus.CounterVec
+	snapshotFileSizeBytes     prometheus.Gauge
+	snapshotDurationHistogram prometheus.Observer
+}
+
+func (m globalMetrics) snapshotMetricsForDB(dbName string) snapshotMetrics {
+	return snapshotMetrics{
+		snapshotsTotal:            m.dbMetrics.snapshotMetrics.snapshotsTotal.MustCurryWith(prometheus.Labels{"db": dbName}),
+		snapshotFileSizeBytes:     m.dbMetrics.snapshotMetrics.snapshotFileSizeBytes.WithLabelValues(dbName),
+		snapshotDurationHistogram: m.dbMetrics.snapshotMetrics.snapshotDurationHistogram.WithLabelValues(dbName),
+	}
+}
+
+type tableMetricsProvider struct {
+	dbName string
+	m      globalMetrics
+}
+
+type tableMetrics struct {
+	blockPersisted       prometheus.Counter
+	blockRotated         prometheus.Counter
+	rowsInserted         prometheus.Counter
+	rowBytesInserted     prometheus.Counter
+	zeroRowsInserted     prometheus.Counter
+	rowInsertSize        prometheus.Observer
+	lastCompletedBlockTx prometheus.Gauge
+	numParts             prometheus.Gauge
+
+	indexMetrics index.LSMMetrics
+}
+
+func (p tableMetricsProvider) metricsForTable(tableName string) tableMetrics {
+	return tableMetrics{
+		blockPersisted:       p.m.tableMetrics.blockPersisted.WithLabelValues(p.dbName, tableName),
+		blockRotated:         p.m.tableMetrics.blockRotated.WithLabelValues(p.dbName, tableName),
+		rowsInserted:         p.m.tableMetrics.rowsInserted.WithLabelValues(p.dbName, tableName),
+		rowBytesInserted:     p.m.tableMetrics.rowBytesInserted.WithLabelValues(p.dbName, tableName),
+		zeroRowsInserted:     p.m.tableMetrics.zeroRowsInserted.WithLabelValues(p.dbName, tableName),
+		rowInsertSize:        p.m.tableMetrics.rowInsertSize.WithLabelValues(p.dbName, tableName),
+		lastCompletedBlockTx: p.m.tableMetrics.lastCompletedBlockTx.WithLabelValues(p.dbName, tableName),
+		numParts:             p.m.tableMetrics.numParts.WithLabelValues(p.dbName, tableName),
+		indexMetrics: index.LSMMetrics{
+			Compactions:        p.m.tableMetrics.indexMetrics.compactions.MustCurryWith(prometheus.Labels{"db": p.dbName, "table": tableName}),
+			LevelSize:          p.m.tableMetrics.indexMetrics.levelSize.MustCurryWith(prometheus.Labels{"db": p.dbName, "table": tableName}),
+			CompactionDuration: p.m.tableMetrics.indexMetrics.compactionDuration.WithLabelValues(p.dbName, tableName),
+		},
+	}
+}
+
+func (m globalMetrics) metricsForWAL(dbName string) *wal.Metrics {
+	return &wal.Metrics{
+		BytesWritten:          m.dbMetrics.walMetrics.bytesWritten.WithLabelValues(dbName),
+		EntriesWritten:        m.dbMetrics.walMetrics.entriesWritten.WithLabelValues(dbName),
+		Appends:               m.dbMetrics.walMetrics.appends.WithLabelValues(dbName),
+		EntryBytesRead:        m.dbMetrics.walMetrics.entryBytesRead.WithLabelValues(dbName),
+		EntriesRead:           m.dbMetrics.walMetrics.entriesRead.WithLabelValues(dbName),
+		SegmentRotations:      m.dbMetrics.walMetrics.segmentRotations.WithLabelValues(dbName),
+		EntriesTruncated:      m.dbMetrics.walMetrics.entriesTruncated.MustCurryWith(prometheus.Labels{"db": dbName}),
+		Truncations:           m.dbMetrics.walMetrics.truncations.MustCurryWith(prometheus.Labels{"db": dbName}),
+		LastSegmentAgeSeconds: m.dbMetrics.walMetrics.lastSegmentAgeSeconds.WithLabelValues(dbName),
+	}
+}
+
+func (m globalMetrics) metricsForFileWAL(dbName string) *filewal.Metrics {
+	return &filewal.Metrics{
+		FailedLogs:            m.dbMetrics.fileWalMetrics.failedLogs.WithLabelValues(dbName),
+		LastTruncationAt:      m.dbMetrics.fileWalMetrics.lastTruncationAt.WithLabelValues(dbName),
+		WalRepairs:            m.dbMetrics.fileWalMetrics.walRepairs.WithLabelValues(dbName),
+		WalRepairsLostRecords: m.dbMetrics.fileWalMetrics.walRepairsLostRecords.WithLabelValues(dbName),
+		WalCloseTimeouts:      m.dbMetrics.fileWalMetrics.walCloseTimeouts.WithLabelValues(dbName),
+		WalQueueSize:          m.dbMetrics.fileWalMetrics.walQueueSize.WithLabelValues(dbName),
+	}
+}
diff --git a/parts/arrow.go b/parts/arrow.go
new file mode 100644
index 000000000..1a2be1bc8
--- /dev/null
+++ b/parts/arrow.go
@@ -0,0 +1,131 @@
+package parts
+
+import (
+	"io"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/ipc"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/pqarrow"
+)
+
+// arrow implments the Part interface backed by an Arrow record.
+type arrowPart struct {
+	basePart
+
+	schema *dynparquet.Schema
+	record arrow.Record
+	size   uint64
+}
+
+// NewArrowPart returns a new Arrow part.
+func NewArrowPart(tx uint64, record arrow.Record, size uint64, schema *dynparquet.Schema, options ...Option) Part {
+	p := &arrowPart{
+		basePart: basePart{
+			tx: tx,
+		},
+		schema: schema,
+		record: record,
+		size:   size,
+	}
+
+	for _, option := range options {
+		option(&p.basePart)
+	}
+
+	return p
+}
+
+func (p *arrowPart) Write(w io.Writer) error {
+	recordWriter := ipc.NewWriter(
+		w,
+		ipc.WithSchema(p.record.Schema()),
+	)
+	defer recordWriter.Close()
+	return recordWriter.Write(p.record)
+}
+
+func (p *arrowPart) Retain() { p.record.Retain() }
+
+func (p *arrowPart) Record() arrow.Record {
+	return p.record
+}
+
+func (p *arrowPart) Release() { p.record.Release() }
+
+func (p *arrowPart) SerializeBuffer(schema *dynparquet.Schema, w dynparquet.ParquetWriter) error {
+	return pqarrow.RecordToFile(schema, w, p.record)
+}
+
+func (p *arrowPart) AsSerializedBuffer(schema *dynparquet.Schema) (*dynparquet.SerializedBuffer, error) {
+	return pqarrow.SerializeRecord(p.record, schema)
+}
+
+func (p *arrowPart) NumRows() int64 {
+	return p.record.NumRows()
+}
+
+func (p *arrowPart) Size() int64 {
+	return int64(p.size)
+}
+
+// Least returns the least row  in the part.
+func (p *arrowPart) Least() (*dynparquet.DynamicRow, error) {
+	if p.minRow != nil {
+		return p.minRow, nil
+	}
+
+	dynCols := pqarrow.RecordDynamicCols(p.record)
+	pooledSchema, err := p.schema.GetDynamicParquetSchema(dynCols)
+	if err != nil {
+		return nil, err
+	}
+	defer p.schema.PutPooledParquetSchema(pooledSchema)
+	p.minRow, err = pqarrow.RecordToDynamicRow(pooledSchema.Schema, p.record, dynCols, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	return p.minRow, nil
+}
+
+func (p *arrowPart) Most() (*dynparquet.DynamicRow, error) {
+	if p.maxRow != nil {
+		return p.maxRow, nil
+	}
+
+	dynCols := pqarrow.RecordDynamicCols(p.record)
+	pooledSchema, err := p.schema.GetDynamicParquetSchema(dynCols)
+	if err != nil {
+		return nil, err
+	}
+	defer p.schema.PutPooledParquetSchema(pooledSchema)
+	p.maxRow, err = pqarrow.RecordToDynamicRow(pooledSchema.Schema, p.record, dynCols, int(p.record.NumRows()-1))
+	if err != nil {
+		return nil, err
+	}
+
+	return p.maxRow, nil
+}
+
+func (p *arrowPart) OverlapsWith(schema *dynparquet.Schema, otherPart Part) (bool, error) {
+	a, err := p.Least()
+	if err != nil {
+		return false, err
+	}
+	b, err := p.Most()
+	if err != nil {
+		return false, err
+	}
+	c, err := otherPart.Least()
+	if err != nil {
+		return false, err
+	}
+	d, err := otherPart.Most()
+	if err != nil {
+		return false, err
+	}
+
+	return schema.Cmp(a, d) <= 0 && schema.Cmp(c, b) <= 0, nil
+}
diff --git a/parts/parquet.go b/parts/parquet.go
new file mode 100644
index 000000000..3604811a6
--- /dev/null
+++ b/parts/parquet.go
@@ -0,0 +1,163 @@
+package parts
+
+import (
+	"fmt"
+	"io"
+	"sync/atomic"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/parquet-go/parquet-go"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+)
+
+// This file contains the implementation of the Part interface backed by a Parquet Buffer.
+type parquetPart struct {
+	basePart
+
+	ref *atomic.Int32
+	buf *dynparquet.SerializedBuffer
+}
+
+func (p *parquetPart) Record() arrow.Record {
+	return nil
+}
+
+func (p *parquetPart) Retain() { p.ref.Add(1) }
+
+func (p *parquetPart) Release() {
+	ref := p.ref.Add(-1)
+	if ref <= 0 && p.release != nil {
+		p.release()
+	}
+}
+
+func (p *parquetPart) Write(w io.Writer) error {
+	buf, err := p.AsSerializedBuffer(nil)
+	if err != nil {
+		return err
+	}
+
+	f := buf.ParquetFile()
+	_, err = io.Copy(w, io.NewSectionReader(f, 0, f.Size()))
+	return err
+}
+
+func (p *parquetPart) SerializeBuffer(_ *dynparquet.Schema, _ dynparquet.ParquetWriter) error {
+	return fmt.Errorf("not a record part")
+}
+
+func (p *parquetPart) AsSerializedBuffer(_ *dynparquet.Schema) (*dynparquet.SerializedBuffer, error) {
+	return p.buf, nil
+}
+
+func NewParquetPart(tx uint64, buf *dynparquet.SerializedBuffer, options ...Option) Part {
+	p := &parquetPart{
+		basePart: basePart{
+			tx: tx,
+		},
+		ref: &atomic.Int32{},
+		buf: buf,
+	}
+
+	for _, opt := range options {
+		opt(&p.basePart)
+	}
+
+	p.ref.Add(1)
+	return p
+}
+
+func (p *parquetPart) NumRows() int64 {
+	return p.buf.NumRows()
+}
+
+func (p *parquetPart) Size() int64 {
+	return p.buf.ParquetFile().Size()
+}
+
+// Least returns the least row  in the part.
+func (p *parquetPart) Least() (*dynparquet.DynamicRow, error) {
+	if p.minRow != nil {
+		return p.minRow, nil
+	}
+
+	minRow, err := minRow(p.buf)
+	if err != nil {
+		return nil, err
+	}
+
+	p.minRow = minRow
+	return p.minRow, nil
+}
+
+func (p *parquetPart) Most() (*dynparquet.DynamicRow, error) {
+	if p.maxRow != nil {
+		return p.maxRow, nil
+	}
+
+	maxRow, err := maxRow(p.buf)
+	if err != nil {
+		return nil, err
+	}
+	p.maxRow = maxRow
+	return p.maxRow, nil
+}
+
+func (p *parquetPart) OverlapsWith(schema *dynparquet.Schema, otherPart Part) (bool, error) {
+	a, err := p.Least()
+	if err != nil {
+		return false, err
+	}
+	b, err := p.Most()
+	if err != nil {
+		return false, err
+	}
+	c, err := otherPart.Least()
+	if err != nil {
+		return false, err
+	}
+	d, err := otherPart.Most()
+	if err != nil {
+		return false, err
+	}
+
+	return schema.Cmp(a, d) <= 0 && schema.Cmp(c, b) <= 0, nil
+}
+
+func maxRow(buf *dynparquet.SerializedBuffer) (*dynparquet.DynamicRow, error) {
+	rowBuf := &dynparquet.DynamicRows{Rows: make([]parquet.Row, 1)}
+	rg := buf.DynamicRowGroup(buf.NumRowGroups() - 1)
+	reader := rg.DynamicRows()
+	defer reader.Close()
+
+	if err := reader.SeekToRow(rg.NumRows() - 1); err != nil {
+		return nil, fmt.Errorf("seek to last row of part: %w", err)
+	}
+
+	if n, err := reader.ReadRows(rowBuf); err != nil {
+		return nil, fmt.Errorf("read last row of part: %w", err)
+	} else if n != 1 {
+		return nil, fmt.Errorf("expected to read exactly 1 row, but read %d", n)
+	}
+
+	// Copy here so that this reference does not prevent the decompressed page
+	// from being GCed.
+	return rowBuf.GetCopy(0), nil
+}
+
+func minRow(buf *dynparquet.SerializedBuffer) (*dynparquet.DynamicRow, error) {
+	rowBuf := &dynparquet.DynamicRows{Rows: make([]parquet.Row, 1)}
+	reader := buf.DynamicRowGroup(0).DynamicRows()
+	defer reader.Close()
+
+	if n, err := reader.ReadRows(rowBuf); err != nil {
+		return nil, fmt.Errorf("read first row of part: %w", err)
+	} else if n != 1 {
+		return nil, fmt.Errorf("expected to read exactly 1 row, but read %d", n)
+	}
+
+	// Copy here so that this reference does not prevent the decompressed page
+	// from being GCed.
+	return rowBuf.GetCopy(0), nil
+}
diff --git a/parts/part.go b/parts/part.go
index eb5eb0b77..6159c1b9c 100644
--- a/parts/part.go
+++ b/parts/part.go
@@ -1,268 +1,74 @@
 package parts
 
 import (
-	"bytes"
-	"fmt"
-	"math"
+	"io"
+	"sort"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/segmentio/parquet-go"
+	"github.com/apache/arrow-go/v18/arrow"
 
 	"github.com/polarsignals/frostdb/dynparquet"
-	"github.com/polarsignals/frostdb/pqarrow"
 )
 
-type CompactionLevel uint8
-
-const (
-	// CompactionLevel0 is the default compaction level for new Parts. This
-	// means that the Part contains multiple variable-length row groups.
-	CompactionLevel0 CompactionLevel = iota
-	// CompactionLevel1 is the compaction level for Parts that are the result of
-	// a compaction. Parts with this compaction level contain multiple row
-	// groups with the row group size specified on table creation.
-	CompactionLevel1
-)
-
-type Part struct {
-	buf    *dynparquet.SerializedBuffer
-	record arrow.Record
-	schema *dynparquet.Schema
-
-	// tx is the id of the transaction that created this part.
-	tx uint64
-
-	compactionLevel CompactionLevel
-
-	minRow *dynparquet.DynamicRow
-	maxRow *dynparquet.DynamicRow
-}
-
-func (p *Part) Record() arrow.Record {
-	return p.record
-}
-
-func (p *Part) SerializeBuffer(schema *dynparquet.Schema, w *parquet.GenericWriter[any]) error {
-	if p.record == nil {
-		return fmt.Errorf("not a record part")
-	}
-
-	return pqarrow.RecordToFile(schema, w, p.record)
-}
-
-func (p *Part) AsSerializedBuffer(schema *dynparquet.Schema) (*dynparquet.SerializedBuffer, error) {
-	if p.buf != nil {
-		return p.buf, nil
-	}
-
-	// If this is a Arrow record part, convert the record into a serialized buffer
-	b := &bytes.Buffer{}
-
-	w, err := schema.GetWriter(b, pqarrow.RecordDynamicCols(p.record))
-	if err != nil {
-		return nil, err
-	}
-	defer schema.PutWriter(w)
-	if err := p.SerializeBuffer(schema, w.ParquetWriter()); err != nil {
-		return nil, err
-	}
-
-	f, err := parquet.OpenFile(bytes.NewReader(b.Bytes()), int64(b.Len()))
-	if err != nil {
-		return nil, err
-	}
-
-	buf, err := dynparquet.NewSerializedBuffer(f)
-	if err != nil {
-		return nil, err
-	}
-
-	return buf, nil
-}
-
-type Option func(*Part)
-
-func WithCompactionLevel(level CompactionLevel) Option {
-	return func(p *Part) {
-		p.compactionLevel = level
-	}
-}
-
-// NewArrowPart returns a new Arrow part.
-func NewArrowPart(tx uint64, record arrow.Record, schema *dynparquet.Schema, options ...Option) *Part {
-	p := &Part{
-		tx:     tx,
-		record: record,
-		schema: schema,
-	}
-
-	for _, opt := range options {
-		opt(p)
-	}
-
-	return p
-}
-
-func NewPart(tx uint64, buf *dynparquet.SerializedBuffer, options ...Option) *Part {
-	p := &Part{
-		tx:  tx,
-		buf: buf,
-	}
-
-	for _, opt := range options {
-		opt(p)
-	}
-
-	return p
-}
-
-func (p *Part) NumRows() int64 {
-	if p.buf != nil {
-		return p.buf.NumRows()
-	}
-
-	return p.record.NumRows()
-}
-
-func (p *Part) Size() int64 {
-	if p.buf != nil {
-		return p.buf.ParquetFile().Size()
-	}
-
-	size := int64(0)
-	for _, col := range p.record.Columns() {
-		for _, buf := range col.Data().Buffers() { // NOTE: may need to get Children data instances for nested
-			if buf != nil {
-				size += int64(buf.Len())
-			}
-		}
-	}
-	return size
-}
-
-func (p *Part) CompactionLevel() CompactionLevel {
+type Part interface {
+	// Record returns the Arrow record for the part. If the part is not an Arrow
+	// record part, nil is returned.
+	Record() arrow.Record
+	Release()
+	Retain()
+	SerializeBuffer(schema *dynparquet.Schema, w dynparquet.ParquetWriter) error
+	AsSerializedBuffer(schema *dynparquet.Schema) (*dynparquet.SerializedBuffer, error)
+	NumRows() int64
+	Size() int64
+	CompactionLevel() int
+	TX() uint64
+	Least() (*dynparquet.DynamicRow, error)
+	Most() (*dynparquet.DynamicRow, error)
+	OverlapsWith(schema *dynparquet.Schema, otherPart Part) (bool, error)
+	Write(io.Writer) error
+}
+
+type basePart struct {
+	tx              uint64
+	compactionLevel int
+	minRow          *dynparquet.DynamicRow
+	maxRow          *dynparquet.DynamicRow
+	release         func()
+}
+
+func (p *basePart) CompactionLevel() int {
 	return p.compactionLevel
 }
 
-// TX returns the transaction id for the part.
-func (p *Part) TX() uint64 { return p.tx }
-
-// Least returns the least row  in the part.
-func (p *Part) Least() (*dynparquet.DynamicRow, error) {
-	if p.minRow != nil {
-		return p.minRow, nil
-	}
-
-	if p.record != nil {
-		var err error
-		p.minRow, err = pqarrow.RecordToDynamicRow(p.schema, p.record, 0)
-		if err != nil {
-			return nil, err
-		}
-
-		return p.minRow, nil
-	}
-
-	rowBuf := &dynparquet.DynamicRows{Rows: make([]parquet.Row, 1)}
-	reader := p.buf.DynamicRowGroup(0).DynamicRows()
-	defer reader.Close()
-
-	if n, err := reader.ReadRows(rowBuf); err != nil {
-		return nil, fmt.Errorf("read first row of part: %w", err)
-	} else if n != 1 {
-		return nil, fmt.Errorf("expected to read exactly 1 row, but read %d", n)
-	}
-
-	// Copy here so that this reference does not prevent the decompressed page
-	// from being GCed.
-	p.minRow = rowBuf.GetCopy(0)
-	return p.minRow, nil
-}
-
-func (p *Part) most() (*dynparquet.DynamicRow, error) {
-	if p.maxRow != nil {
-		return p.maxRow, nil
-	}
-
-	if p.record != nil {
-		var err error
-		p.maxRow, err = pqarrow.RecordToDynamicRow(p.schema, p.record, int(p.record.NumRows()-1))
-		if err != nil {
-			return nil, err
-		}
-
-		return p.maxRow, nil
-	}
-
-	rowBuf := &dynparquet.DynamicRows{Rows: make([]parquet.Row, 1)}
-	rg := p.buf.DynamicRowGroup(p.buf.NumRowGroups() - 1)
-	reader := rg.DynamicRows()
-	defer reader.Close()
-
-	if err := reader.SeekToRow(rg.NumRows() - 1); err != nil {
-		return nil, fmt.Errorf("seek to last row of part: %w", err)
-	}
+func (p *basePart) TX() uint64 { return p.tx }
 
-	if n, err := reader.ReadRows(rowBuf); err != nil {
-		return nil, fmt.Errorf("read last row of part: %w", err)
-	} else if n != 1 {
-		return nil, fmt.Errorf("expected to read exactly 1 row, but read %d", n)
-	}
-
-	// Copy here so that this reference does not prevent the decompressed page
-	// from being GCed.
-	p.maxRow = rowBuf.GetCopy(0)
-	return p.maxRow, nil
-}
+type Option func(*basePart)
 
-func (p Part) OverlapsWith(schema *dynparquet.Schema, otherPart *Part) (bool, error) {
-	a, err := p.Least()
-	if err != nil {
-		return false, err
-	}
-	b, err := p.most()
-	if err != nil {
-		return false, err
-	}
-	c, err := otherPart.Least()
-	if err != nil {
-		return false, err
-	}
-	d, err := otherPart.most()
-	if err != nil {
-		return false, err
+func WithCompactionLevel(level int) Option {
+	return func(p *basePart) {
+		p.compactionLevel = level
 	}
-
-	return schema.Cmp(a, d) <= 0 && schema.Cmp(c, b) <= 0, nil
 }
 
-// Tombstone marks all the parts with the max tx id to ensure they aren't
-// included in reads. Tombstoned parts will be eventually be dropped from the
-// database during compaction.
-func Tombstone(parts []*Part) {
-	for _, part := range parts {
-		part.tx = math.MaxUint64
+func WithRelease(release func()) Option {
+	return func(p *basePart) {
+		p.release = release
 	}
 }
 
-func (p Part) HasTombstone() bool {
-	return p.tx == math.MaxUint64
-}
-
 type PartSorter struct {
 	schema *dynparquet.Schema
-	parts  []*Part
+	parts  []Part
 	err    error
 }
 
-func NewPartSorter(schema *dynparquet.Schema, parts []*Part) *PartSorter {
+func NewPartSorter(schema *dynparquet.Schema, parts []Part) *PartSorter {
 	return &PartSorter{
 		schema: schema,
 		parts:  parts,
 	}
 }
 
-func (p PartSorter) Len() int {
+func (p *PartSorter) Len() int {
 	return len(p.parts)
 }
 
@@ -287,3 +93,73 @@ func (p *PartSorter) Swap(i, j int) {
 func (p *PartSorter) Err() error {
 	return p.err
 }
+
+// FindMaximumNonOverlappingSet removes the minimum number of parts from the
+// given slice in order to return the maximum non-overlapping set of parts.
+// The function returns the non-overlapping parts first and any overlapping
+// parts second. The parts returned are in sorted order according to their Least
+// row.
+func FindMaximumNonOverlappingSet(schema *dynparquet.Schema, parts []Part) ([]Part, []Part, error) {
+	if len(parts) < 2 {
+		return parts, nil, nil
+	}
+	sorter := NewPartSorter(schema, parts)
+	sort.Sort(sorter)
+	if sorter.Err() != nil {
+		return nil, nil, sorter.Err()
+	}
+
+	// Parts are now sorted according to their Least row.
+	prev := 0
+	prevEnd, err := parts[0].Most()
+	if err != nil {
+		return nil, nil, err
+	}
+	nonOverlapping := make([]Part, 0, len(parts))
+	overlapping := make([]Part, 0, len(parts))
+	var missing Part
+	for i := 1; i < len(parts); i++ {
+		start, err := parts[i].Least()
+		if err != nil {
+			return nil, nil, err
+		}
+		curEnd, err := parts[i].Most()
+		if err != nil {
+			return nil, nil, err
+		}
+		if schema.Cmp(prevEnd, start) <= 0 {
+			// No overlap, append the previous part and update end for the next
+			// iteration.
+			nonOverlapping = append(nonOverlapping, parts[prev])
+			prevEnd = curEnd
+			prev = i
+			continue
+		}
+
+		// This part overlaps with the previous part. Remove the part with
+		// the highest end row.
+		if schema.Cmp(prevEnd, curEnd) >= 0 {
+			overlapping = append(overlapping, parts[prev])
+			prevEnd = curEnd
+			prev = i
+		} else {
+			// The current part must be removed. Don't update prevEnd or prev,
+			// this will be used in the next iteration and must stay the same.
+			overlapping = append(overlapping, parts[i])
+
+			if i == len(parts)-1 { // This is the last iteration mark this one as missing
+				missing = parts[prev]
+			}
+		}
+	}
+	if len(overlapping) == 0 || overlapping[len(overlapping)-1] != parts[len(parts)-1] {
+		// The last part either did not overlap with its previous part, or
+		// overlapped but had a smaller end row than its previous part (so the
+		// previous part is in the overlapping slice). The last part must be
+		// appended to nonOverlapping.
+		nonOverlapping = append(nonOverlapping, parts[len(parts)-1])
+	} else if missing != nil {
+		overlapping = append(overlapping, missing)
+	}
+	return nonOverlapping, overlapping, nil
+}
diff --git a/parts/part_list.go b/parts/part_list.go
deleted file mode 100644
index 76482655a..000000000
--- a/parts/part_list.go
+++ /dev/null
@@ -1,106 +0,0 @@
-package parts
-
-import (
-	"sync/atomic"
-)
-
-type SentinelType uint8
-
-const (
-	None SentinelType = iota
-	Compacting
-	Compacted
-)
-
-// Node is a Part that is a part of a linked-list.
-type Node struct {
-	next *atomic.Pointer[Node]
-	part *Part
-
-	sentinel SentinelType // sentinel nodes contain no parts, and are to indicate the start of a new sub list
-}
-
-func (n *Node) Compacted() bool {
-	return n.sentinel == Compacted
-}
-
-type List struct {
-	next *atomic.Pointer[Node]
-
-	// listType indicates the type of list this list is
-	listType SentinelType
-}
-
-// NewList creates a new part list using atomic constructs.
-func NewList(next *atomic.Pointer[Node], s SentinelType) *List {
-	p := &List{
-		next:     next,
-		listType: s,
-	}
-	return p
-}
-
-// Sentinel adds a new sentinel node to the list, and returns the sub list starting from that sentinel.
-func (l *List) Sentinel(s SentinelType) *List {
-	node := &Node{
-		next:     &atomic.Pointer[Node]{},
-		sentinel: s,
-	}
-	for { // continue until a successful compare and swap occurs
-		next := l.next.Load()
-		node.next.Store(next)
-		if l.next.CompareAndSwap(next, node) {
-			return NewList(l.next, s)
-		}
-	}
-}
-
-// Prepend a node onto the front of the list.
-func (l *List) Prepend(part *Part) *Node {
-	node := &Node{
-		next: &atomic.Pointer[Node]{},
-		part: part,
-	}
-	for { // continue until a successful compare and swap occurs
-		next := l.next.Load()
-		node.next.Store(next)
-		if next != nil && next.sentinel == Compacted { // This list is apart of a compacted granule, propogate the compacted value so each subsequent Prepend can return the correct value
-			node.sentinel = Compacted
-		}
-		if l.next.CompareAndSwap(next, node) {
-			return node
-		}
-	}
-}
-
-// Iterate accesses every node in the list.
-func (l *List) Iterate(iterate func(*Part) bool) {
-	next := l.next.Load()
-	for {
-		node := (*Node)(next)
-		if node == nil {
-			return
-		}
-		switch node.part {
-		case nil: // sentinel node
-			if l.listType != None && node.sentinel != l.listType { // if we've encountererd a sentinel node from a different type of list we must exit
-				return
-			}
-		default: // normal node
-			if !iterate(node.part) { // if the part == nil then this is a sentinel node, and we can skip it
-				return
-			}
-		}
-		next = node.next.Load()
-	}
-}
-
-func (l *List) Total() int {
-	count := 0
-	l.Iterate(func(_ *Part) bool {
-		count++
-		return true
-	})
-
-	return count
-}
diff --git a/parts/part_test.go b/parts/part_test.go
new file mode 100644
index 000000000..49d27bf4e
--- /dev/null
+++ b/parts/part_test.go
@@ -0,0 +1,122 @@
+package parts
+
+import (
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	"github.com/polarsignals/frostdb/internal/records"
+	"github.com/polarsignals/frostdb/pqarrow"
+)
+
+func TestFindMaximumNonOverlappingSet(t *testing.T) {
+	testSchema, err := dynparquet.SchemaFromDefinition(&schemapb.Schema{
+		Name: "test_schema",
+		Columns: []*schemapb.Column{{
+			Name: "ints",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_INT64,
+				Encoding: schemapb.StorageLayout_ENCODING_PLAIN_UNSPECIFIED,
+			},
+		}},
+		SortingColumns: []*schemapb.SortingColumn{{Name: "ints", Direction: schemapb.SortingColumn_DIRECTION_ASCENDING}},
+	})
+	require.NoError(t, err)
+
+	type rng struct {
+		start int64
+		end   int64
+	}
+	type dataModel struct {
+		Ints int64 `frostdb:",asc,plain"`
+	}
+	for _, tc := range []struct {
+		name                   string
+		ranges                 []rng
+		expectedNonOverlapping []rng
+		expectedOverlapping    []rng
+	}{
+		{
+			name:                   "SinglePart",
+			ranges:                 []rng{{1, 2}},
+			expectedNonOverlapping: []rng{{1, 2}},
+		},
+		{
+			name:                   "RemoveFirst",
+			ranges:                 []rng{{1, 4}, {1, 2}},
+			expectedNonOverlapping: []rng{{1, 2}},
+			expectedOverlapping:    []rng{{1, 4}},
+		},
+		{
+			name:                   "TwoNonOverlapping",
+			ranges:                 []rng{{1, 2}, {3, 4}},
+			expectedNonOverlapping: []rng{{1, 2}, {3, 4}},
+		},
+		{
+			name:                   "OneOverlap",
+			ranges:                 []rng{{1, 2}, {4, 7}, {3, 8}},
+			expectedNonOverlapping: []rng{{1, 2}, {4, 7}},
+			expectedOverlapping:    []rng{{3, 8}},
+		},
+		{
+			name:                   "ChooseMinimumNumber",
+			ranges:                 []rng{{1, 2}, {4, 10}, {4, 5}, {6, 7}},
+			expectedNonOverlapping: []rng{{1, 2}, {4, 5}, {6, 7}},
+			expectedOverlapping:    []rng{{4, 10}},
+		},
+		{
+			// ReuseCursor makes sure that when dropping a range, its boundaries
+			// are not reused. This is a regression test (which is why it's so
+			// specific).
+			name:                   "ReuseCursor",
+			ranges:                 []rng{{1, 3}, {2, 4}, {4, 5}, {6, 7}},
+			expectedNonOverlapping: []rng{{1, 3}, {4, 5}, {6, 7}},
+			expectedOverlapping:    []rng{{2, 4}},
+		},
+		{
+			name:                   "OnlyTwoOverlap",
+			ranges:                 []rng{{1, 3}, {2, 4}},
+			expectedNonOverlapping: []rng{},
+			expectedOverlapping:    []rng{{2, 4}, {1, 3}},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			parts := make([]Part, len(tc.ranges))
+			b := records.NewBuild[dataModel](memory.NewGoAllocator())
+			schema, err := dynparquet.SchemaFromDefinition(b.Schema("test_schema"))
+			require.NoError(t, err)
+			defer b.Release()
+			for i := range parts {
+				start := dataModel{Ints: tc.ranges[i].start}
+				end := dataModel{Ints: tc.ranges[i].end}
+				require.Nil(t, b.Append(start, end))
+				r := b.NewRecord()
+				serBuf, err := pqarrow.SerializeRecord(r, schema)
+				require.NoError(t, err)
+				r.Release()
+				require.NoError(t, err)
+				parts[i] = NewParquetPart(0, serBuf)
+			}
+			nonOverlapping, overlapping, err := FindMaximumNonOverlappingSet(testSchema, parts)
+			require.NoError(t, err)
+
+			verify := func(t *testing.T, expected []rng, actual []Part) {
+				t.Helper()
+				require.Len(t, actual, len(expected))
+				for i := range actual {
+					start, err := actual[i].Least()
+					require.NoError(t, err)
+					end, err := actual[i].Most()
+					require.NoError(t, err)
+					require.Equal(t, expected[i].start, start.Row[0].Int64())
+					require.Equal(t, expected[i].end, end.Row[0].Int64())
+				}
+			}
+			verify(t, tc.expectedNonOverlapping, nonOverlapping)
+			verify(t, tc.expectedOverlapping, overlapping)
+		})
+	}
+}
diff --git a/pqarrow/arrow.go b/pqarrow/arrow.go
index 163da362b..d1ec39603 100644
--- a/pqarrow/arrow.go
+++ b/pqarrow/arrow.go
@@ -2,15 +2,16 @@ package pqarrow
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"sort"
 	"sync"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
-	"github.com/segmentio/parquet-go"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/parquet-go/parquet-go"
 
 	"github.com/polarsignals/frostdb/dynparquet"
 	"github.com/polarsignals/frostdb/pqarrow/builder"
@@ -20,11 +21,11 @@ import (
 )
 
 // ParquetRowGroupToArrowSchema converts a parquet row group to an arrow schema.
-func ParquetRowGroupToArrowSchema(ctx context.Context, rg parquet.RowGroup, options logicalplan.IterOptions) (*arrow.Schema, error) {
-	return ParquetSchemaToArrowSchema(ctx, rg.Schema(), options)
+func ParquetRowGroupToArrowSchema(ctx context.Context, rg parquet.RowGroup, s *dynparquet.Schema, options logicalplan.IterOptions) (*arrow.Schema, error) {
+	return ParquetSchemaToArrowSchema(ctx, rg.Schema(), s, options)
 }
 
-func ParquetSchemaToArrowSchema(ctx context.Context, schema *parquet.Schema, options logicalplan.IterOptions) (*arrow.Schema, error) {
+func ParquetSchemaToArrowSchema(ctx context.Context, schema *parquet.Schema, s *dynparquet.Schema, options logicalplan.IterOptions) (*arrow.Schema, error) {
 	parquetFields := schema.Fields()
 
 	if len(options.DistinctColumns) == 1 && options.Filter == nil {
@@ -59,23 +60,49 @@ func ParquetSchemaToArrowSchema(ctx context.Context, schema *parquet.Schema, opt
 		}
 	}
 
-	for _, distinctExpr := range options.DistinctColumns {
-		if distinctExpr.Computed() {
-			dataType, err := distinctExpr.DataType(schema)
-			if err != nil {
-				return nil, err
+	if len(options.DistinctColumns) > 0 {
+		for _, distinctExpr := range options.DistinctColumns {
+			if distinctExpr.Computed() {
+				// Usually we would pass the logical query plan as the data type
+				// finder, but we're here because of an intended layering
+				// violation, which is pushing distinct queries down to the scan
+				// layer. In this case there are no other possible physical types
+				// other than the actual schema, so we can just implement a
+				// simplified version of the type finder that doesn't need to
+				// traverse the logical plan, since this is already the physical
+				// scan layer execution.
+				dataType, err := distinctExpr.DataType(&exprTypeFinder{s: s})
+				if err != nil {
+					return nil, err
+				}
+				fields = append(fields, arrow.Field{
+					Name:     distinctExpr.Name(),
+					Type:     dataType,
+					Nullable: true, // TODO: This should be determined by the expression and underlying column(s).
+				})
 			}
-			fields = append(fields, arrow.Field{
-				Name:     distinctExpr.Name(),
-				Type:     dataType,
-				Nullable: true, // TODO: This should be determined by the expression and underlying column(s).
-			})
 		}
+
+		// Need to sort as the distinct columns are just appended, but we need
+		// the schema to be sorted. If we didn't sort them here, then
+		// subsequent schemas would be in a different order as
+		// `mergeArrowSchemas` sorts fields by name.
+		sort.Slice(fields, func(i, j int) bool {
+			return fields[i].Name < fields[j].Name
+		})
 	}
 
 	return arrow.NewSchema(fields, nil), nil
 }
 
+type exprTypeFinder struct {
+	s *dynparquet.Schema
+}
+
+func (e *exprTypeFinder) DataTypeForExpr(expr logicalplan.Expr) (arrow.DataType, error) {
+	return logicalplan.DataTypeForExprWithSchema(expr, e.s)
+}
+
 func parquetFieldToArrowField(prefix string, field parquet.Field, physicalProjections []logicalplan.Expr) (arrow.Field, error) {
 	if includedProjection(physicalProjections, fullPath(prefix, field)) {
 		af, err := convert.ParquetFieldToArrowField(field)
@@ -234,13 +261,13 @@ func NewParquetConverter(
 	return c
 }
 
-func (c *ParquetConverter) Convert(ctx context.Context, rg parquet.RowGroup) error {
-	schema, err := ParquetRowGroupToArrowSchema(ctx, rg, c.iterOpts)
+func (c *ParquetConverter) Convert(ctx context.Context, rg parquet.RowGroup, s *dynparquet.Schema) error {
+	schema, err := ParquetRowGroupToArrowSchema(ctx, rg, s, c.iterOpts)
 	if err != nil {
 		return err
 	}
 	// If the schema has no fields we simply ignore this RowGroup that has no data.
-	if len(schema.Fields()) == 0 {
+	if schema.NumFields() == 0 {
 		return nil
 	}
 
@@ -270,7 +297,7 @@ func (c *ParquetConverter) Convert(ctx context.Context, rg parquet.RowGroup) err
 	}
 
 	if _, ok := rg.(*dynparquet.MergedRowGroup); ok {
-		return rowBasedParquetRowGroupToArrowRecord(ctx, c.pool, rg, c.outputSchema, c.builder)
+		return rowBasedParquetRowGroupToArrowRecord(ctx, rg, c.outputSchema, c.builder)
 	}
 
 	parquetSchema := rg.Schema()
@@ -365,6 +392,12 @@ func (c *ParquetConverter) NewRecord() arrow.Record {
 	return nil
 }
 
+func (c *ParquetConverter) Reset() {
+	if c.builder != nil {
+		c.builder.Reset()
+	}
+}
+
 func (c *ParquetConverter) Close() {
 	if c.builder != nil {
 		c.builder.Release()
@@ -547,7 +580,9 @@ func (c *ParquetConverter) writeDistinctAllColumns(
 		} else if fieldLen < maxLen {
 			repeatTimes := maxLen - fieldLen
 			if ob, ok := field.(builder.OptimizedBuilder); ok {
-				ob.RepeatLastValue(repeatTimes)
+				if err := ob.RepeatLastValue(repeatTimes); err != nil {
+					return false, err
+				}
 				continue
 			}
 			arr := field.NewArray()
@@ -606,14 +641,13 @@ var rowBufPool = &sync.Pool{
 // record row by row. The result is appended to b.
 func rowBasedParquetRowGroupToArrowRecord(
 	ctx context.Context,
-	pool memory.Allocator,
 	rg parquet.RowGroup,
 	schema *arrow.Schema,
 	builder *builder.RecordBuilder,
 ) error {
 	parquetFields := rg.Schema().Fields()
 
-	if len(schema.Fields()) != len(parquetFields) {
+	if schema.NumFields() != len(parquetFields) {
 		return fmt.Errorf("inconsistent schema between arrow and parquet")
 	}
 
@@ -688,7 +722,10 @@ func (c *ParquetConverter) writeColumnToArray(
 		// the index values.
 		// TODO(asubiotto): This optimization can be applied at a finer
 		// granularity at the page level as well.
-		columnIndex := columnChunk.ColumnIndex()
+		columnIndex, err := columnChunk.ColumnIndex()
+		if err != nil {
+			return err
+		}
 		columnType := columnChunk.Type()
 
 		globalMinValue := columnIndex.MinValue(0)
@@ -744,36 +781,42 @@ func (c *ParquetConverter) writeColumnToArray(
 			}
 			return fmt.Errorf("read page: %w", err)
 		}
-		dict := p.Dictionary()
 
-		switch {
-		case !repeated && dictionaryOnly && dict != nil && p.NumNulls() == 0:
-			// If we are only writing the dictionary, we don't need to read
-			// the values.
-			if err := w.WritePage(dict.Page()); err != nil {
-				return fmt.Errorf("write dictionary page: %w", err)
+		dict := p.Dictionary()
+		if dict != nil && dictionaryOnly {
+			// We only want distinct values; write only the dictionary page.
+			if p.NumNulls() > 0 {
+				// Since dictionary pages do not represent nulls, write a null
+				// value if the non-dictionary page has at least one null.
+				w.Write([]parquet.Value{parquet.NullValue()})
 			}
-		case !repeated && p.NumNulls() == 0 && dict == nil:
-			// If the column has no nulls, we can read all values at once
-			// consecutively without worrying about null values.
-			if err := w.WritePage(p); err != nil {
+			p = dict.Page()
+		}
+
+		if pw, ok := w.(writer.PageWriter); ok {
+			err := pw.WritePage(p)
+			if err == nil {
+				continue
+			} else if err != nil && !errors.Is(err, writer.ErrCannotWritePageDirectly) {
 				return fmt.Errorf("write page: %w", err)
 			}
-		default:
-			if n := p.NumValues(); int64(cap(c.scratchValues)) < n {
-				c.scratchValues = make([]parquet.Value, n)
-			} else {
-				c.scratchValues = c.scratchValues[:n]
-			}
+			// Could not write page directly, fall through to slow path.
+		}
 
-			// We're reading all values in the page so we always expect an io.EOF.
-			reader := p.Values()
-			if _, err := reader.ReadValues(c.scratchValues); err != nil && err != io.EOF {
-				return fmt.Errorf("read values: %w", err)
-			}
+		// Write values using the slow path.
+		n := p.NumValues()
+		if int64(cap(c.scratchValues)) < n {
+			c.scratchValues = make([]parquet.Value, n)
+		}
+		c.scratchValues = c.scratchValues[:n]
 
-			w.Write(c.scratchValues)
+		// We're reading all values in the page so we always expect an io.EOF.
+		reader := p.Values()
+		if _, err := reader.ReadValues(c.scratchValues); err != nil && err != io.EOF {
+			return fmt.Errorf("read values: %w", err)
 		}
+
+		w.Write(c.scratchValues)
 	}
 
 	return nil
@@ -816,7 +859,7 @@ func recordBuilderLength(rb *builder.RecordBuilder) (maxLength, maxLengthFields
 			maxLengthFields++
 		}
 	}
-	return maxLength, maxLengthFields, !(maxLengthFields == len(rb.Fields()))
+	return maxLength, maxLengthFields, maxLengthFields != len(rb.Fields())
 }
 
 // parquetSchemaEqual returns whether the two input schemas are equal. For now,
@@ -850,7 +893,11 @@ func (f PreExprVisitorFunc) PreVisit(expr logicalplan.Expr) bool {
 	return f(expr)
 }
 
-func (f PreExprVisitorFunc) PostVisit(expr logicalplan.Expr) bool {
+func (f PreExprVisitorFunc) Visit(_ logicalplan.Expr) bool {
+	return false
+}
+
+func (f PreExprVisitorFunc) PostVisit(_ logicalplan.Expr) bool {
 	return false
 }
 
@@ -885,7 +932,10 @@ func binaryDistinctExpr(
 	value := *info.v
 	switch expr.Op {
 	case logicalplan.OpGt:
-		index := columnChunk.ColumnIndex()
+		index, err := columnChunk.ColumnIndex()
+		if err != nil {
+			return false, err
+		}
 		allGreater, noneGreater := allOrNoneGreaterThan(
 			typ,
 			index,
@@ -918,14 +968,14 @@ func allOrNoneGreaterThan(
 	allTrue := true
 	allFalse := true
 	for i := 0; i < numPages; i++ {
-		min := index.MinValue(i)
-		max := index.MaxValue(i)
+		minValue := index.MinValue(i)
+		maxValue := index.MaxValue(i)
 
-		if typ.Compare(max, value) <= 0 {
+		if typ.Compare(maxValue, value) <= 0 {
 			allTrue = false
 		}
 
-		if typ.Compare(min, value) > 0 {
+		if typ.Compare(minValue, value) > 0 {
 			allFalse = false
 		}
 	}
@@ -976,6 +1026,17 @@ func copyArrToBuilder(builder builder.ColumnBuilder, arr arrow.Array, toCopy int
 				b.Append(arr.Value(i))
 			}
 		}
+	case *array.String:
+		b := builder.(*array.BinaryBuilder)
+		for i := 0; i < toCopy; i++ {
+			if arr.IsNull(i) {
+				// We cannot use unsafe appends with the binary builder
+				// because offsets won't be appended.
+				b.AppendNull()
+			} else {
+				b.AppendString(arr.Value(i))
+			}
+		}
 	case *array.Int64:
 		b := builder.(*array.Int64Builder)
 		for i := 0; i < toCopy; i++ {
@@ -1016,6 +1077,16 @@ func copyArrToBuilder(builder builder.ColumnBuilder, arr arrow.Array, toCopy int
 					}
 				}
 			}
+		case *array.String:
+			for i := 0; i < toCopy; i++ {
+				if arr.IsNull(i) {
+					b.AppendNull()
+				} else {
+					if err := b.AppendString(dict.Value(arr.GetValueIndex(i))); err != nil {
+						panic("failed to append to dictionary")
+					}
+				}
+			}
 		default:
 			panic(fmt.Sprintf("unsupported dictionary type: %T", dict))
 		}
@@ -1145,7 +1216,8 @@ func mergeArrowSchemas(schemas []*arrow.Schema) *arrow.Schema {
 	fieldsMap := make(map[string]arrow.Field)
 
 	for _, schema := range schemas {
-		for _, f := range schema.Fields() {
+		for i := 0; i < schema.NumFields(); i++ {
+			f := schema.Field(i)
 			if _, ok := fieldsMap[f.Name]; !ok {
 				fieldNames = append(fieldNames, f.Name)
 				fieldsMap[f.Name] = f
@@ -1180,3 +1252,31 @@ func ColToWriter(col int, writers []MultiColumnWriter) writer.ValueWriter {
 
 	return nil
 }
+
+// Project will project the record according to the given projections.
+func Project(r arrow.Record, projections []logicalplan.Expr) arrow.Record {
+	if len(projections) == 0 {
+		r.Retain() // NOTE: we're creating another reference to this record, so retain it
+		return r
+	}
+
+	cols := make([]arrow.Array, 0, r.Schema().NumFields())
+	fields := make([]arrow.Field, 0, r.Schema().NumFields())
+	for i := 0; i < r.Schema().NumFields(); i++ {
+		for _, projection := range projections {
+			if projection.MatchColumn(r.Schema().Field(i).Name) {
+				cols = append(cols, r.Column(i))
+				fields = append(fields, r.Schema().Field(i))
+				break
+			}
+		}
+	}
+
+	// If the projection matches the entire record, return the record as is.
+	if len(cols) == r.Schema().NumFields() {
+		r.Retain() // NOTE: we're creating another reference to this record, so retain it
+		return r
+	}
+
+	return array.NewRecord(arrow.NewSchema(fields, nil), cols, r.NumRows())
+}
diff --git a/pqarrow/arrow_test.go b/pqarrow/arrow_test.go
index 6487f8a8f..e75fd32d7 100644
--- a/pqarrow/arrow_test.go
+++ b/pqarrow/arrow_test.go
@@ -5,14 +5,15 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/google/uuid"
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 	"github.com/stretchr/testify/require"
 
 	"github.com/polarsignals/frostdb/dynparquet"
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
 	"github.com/polarsignals/frostdb/query/logicalplan"
 )
 
@@ -20,9 +21,9 @@ func TestDifferentSchemasToArrow(t *testing.T) {
 	dynSchema := dynparquet.NewSampleSchema()
 
 	samples := dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -32,14 +33,14 @@ func TestDifferentSchemasToArrow(t *testing.T) {
 		Value:     1,
 	}}
 
-	buf0, err := samples.ToBuffer(dynSchema)
+	buf0, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	samples = dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -49,14 +50,14 @@ func TestDifferentSchemasToArrow(t *testing.T) {
 		Value:     2,
 	}}
 
-	buf1, err := samples.ToBuffer(dynSchema)
+	buf1, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	samples = dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -66,13 +67,13 @@ func TestDifferentSchemasToArrow(t *testing.T) {
 		Value:     3,
 	}}
 
-	buf2, err := samples.ToBuffer(dynSchema)
+	buf2, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	samples = dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -82,14 +83,14 @@ func TestDifferentSchemasToArrow(t *testing.T) {
 		Value:     2,
 	}}
 
-	buf3, err := samples.ToBuffer(dynSchema)
+	buf3, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	samples = dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -99,28 +100,31 @@ func TestDifferentSchemasToArrow(t *testing.T) {
 		Value:     3,
 	}}
 
-	buf4, err := samples.ToBuffer(dynSchema)
+	buf4, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	ctx := context.Background()
 
-	c := NewParquetConverter(memory.DefaultAllocator, logicalplan.IterOptions{})
+	alloc := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer alloc.AssertSize(t, 0)
+	c := NewParquetConverter(alloc, logicalplan.IterOptions{})
 	defer c.Close()
 
-	require.NoError(t, c.Convert(ctx, buf0))
-	require.NoError(t, c.Convert(ctx, buf1))
-	require.NoError(t, c.Convert(ctx, buf2))
-	require.NoError(t, c.Convert(ctx, buf3))
-	require.NoError(t, c.Convert(ctx, buf4))
+	require.NoError(t, c.Convert(ctx, buf0, dynSchema))
+	require.NoError(t, c.Convert(ctx, buf1, dynSchema))
+	require.NoError(t, c.Convert(ctx, buf2, dynSchema))
+	require.NoError(t, c.Convert(ctx, buf3, dynSchema))
+	require.NoError(t, c.Convert(ctx, buf4, dynSchema))
 
 	ar := c.NewRecord()
+	defer ar.Release()
 	require.Equal(t, int64(8), ar.NumCols())
 	require.Equal(t, int64(5), ar.NumRows())
 	for j := 0; j < int(ar.NumCols()); j++ {
 		switch j {
 		case 0:
-			require.Equal(t, `{ dictionary: []
-  indices: [(null) (null) (null) (null) (null)] }`, fmt.Sprintf("%v", ar.Column(j)))
+			require.Equal(t, `{ dictionary: [""]
+  indices: [0 0 0 0 0] }`, fmt.Sprintf("%v", ar.Column(j)))
 		case 1:
 			require.Equal(t, `{ dictionary: ["value1" "value2" "value3"]
   indices: [0 1 2 0 0] }`, fmt.Sprintf("%v", ar.Column(j)))
@@ -145,9 +149,9 @@ func TestMergeToArrow(t *testing.T) {
 	dynSchema := dynparquet.NewSampleSchema()
 
 	samples := dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -156,10 +160,10 @@ func TestMergeToArrow(t *testing.T) {
 		Timestamp: 1,
 		Value:     1,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -168,10 +172,10 @@ func TestMergeToArrow(t *testing.T) {
 		Timestamp: 2,
 		Value:     2,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -181,13 +185,13 @@ func TestMergeToArrow(t *testing.T) {
 		Value:     3,
 	}}
 
-	buf1, err := samples.ToBuffer(dynSchema)
+	buf1, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	samples = dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -197,14 +201,14 @@ func TestMergeToArrow(t *testing.T) {
 		Value:     2,
 	}}
 
-	buf2, err := samples.ToBuffer(dynSchema)
+	buf2, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	samples = dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -214,7 +218,7 @@ func TestMergeToArrow(t *testing.T) {
 		Value:     3,
 	}}
 
-	buf3, err := samples.ToBuffer(dynSchema)
+	buf3, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	merge, err := dynSchema.MergeDynamicRowGroups([]dynparquet.DynamicRowGroup{buf1, buf2, buf3})
@@ -222,31 +226,31 @@ func TestMergeToArrow(t *testing.T) {
 
 	ctx := context.Background()
 
-	as, err := ParquetRowGroupToArrowSchema(ctx, merge, logicalplan.IterOptions{})
+	as, err := ParquetRowGroupToArrowSchema(ctx, merge, dynSchema, logicalplan.IterOptions{})
 	require.NoError(t, err)
 	require.Len(t, as.Fields(), 8)
 	require.Equal(t, as.Field(0), arrow.Field{Name: "example_type", Type: &arrow.DictionaryType{
-		IndexType: &arrow.Int16Type{},
+		IndexType: &arrow.Uint32Type{},
 		ValueType: &arrow.BinaryType{},
 	}})
 	require.Equal(t, as.Field(1), arrow.Field{Name: "labels.label1", Type: &arrow.DictionaryType{
-		IndexType: &arrow.Int16Type{},
+		IndexType: &arrow.Uint32Type{},
 		ValueType: &arrow.BinaryType{},
 	}, Nullable: true})
 	require.Equal(t, as.Field(2), arrow.Field{Name: "labels.label2", Type: &arrow.DictionaryType{
-		IndexType: &arrow.Int16Type{},
+		IndexType: &arrow.Uint32Type{},
 		ValueType: &arrow.BinaryType{},
 	}, Nullable: true})
 	require.Equal(t, as.Field(3), arrow.Field{Name: "labels.label3", Type: &arrow.DictionaryType{
-		IndexType: &arrow.Int16Type{},
+		IndexType: &arrow.Uint32Type{},
 		ValueType: &arrow.BinaryType{},
 	}, Nullable: true})
 	require.Equal(t, as.Field(4), arrow.Field{Name: "labels.label4", Type: &arrow.DictionaryType{
-		IndexType: &arrow.Int16Type{},
+		IndexType: &arrow.Uint32Type{},
 		ValueType: &arrow.BinaryType{},
 	}, Nullable: true})
 	require.Equal(t, as.Field(5), arrow.Field{Name: "stacktrace", Type: &arrow.DictionaryType{
-		IndexType: &arrow.Int16Type{},
+		IndexType: &arrow.Uint32Type{},
 		ValueType: &arrow.BinaryType{},
 	}})
 	require.Equal(t, as.Field(6), arrow.Field{Name: "timestamp", Type: &arrow.Int64Type{}})
@@ -254,7 +258,7 @@ func TestMergeToArrow(t *testing.T) {
 
 	c := NewParquetConverter(memory.DefaultAllocator, logicalplan.IterOptions{})
 	defer c.Close()
-	require.NoError(t, c.Convert(ctx, merge))
+	require.NoError(t, c.Convert(ctx, merge, dynSchema))
 	ar := c.NewRecord()
 	require.Equal(t, int64(5), ar.NumRows())
 	require.Equal(t, int64(8), ar.NumCols())
@@ -263,8 +267,10 @@ func TestMergeToArrow(t *testing.T) {
 
 func BenchmarkNestedParquetToArrow(b *testing.B) {
 	dynSchema := dynparquet.NewNestedSampleSchema(b)
+	schema, err := dynparquet.SchemaFromDefinition(dynSchema)
+	require.NoError(b, err)
 
-	pb, err := dynSchema.NewBuffer(map[string][]string{})
+	pb, err := schema.NewBuffer(map[string][]string{})
 	require.NoError(b, err)
 
 	for i := 0; i < 1000; i++ {
@@ -287,7 +293,7 @@ func BenchmarkNestedParquetToArrow(b *testing.B) {
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		require.NoError(b, c.Convert(ctx, pb))
+		require.NoError(b, c.Convert(ctx, pb, schema))
 		// Reset converter.
 		_ = c.NewRecord()
 	}
@@ -299,9 +305,9 @@ func BenchmarkParquetToArrow(b *testing.B) {
 	samples := make(dynparquet.Samples, 0, 1000)
 	for i := 0; i < 1000; i++ {
 		samples = append(samples, dynparquet.Sample{
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
+			Labels: map[string]string{
+				"label1": "value1",
+				"label2": "value2",
 			},
 			Stacktrace: []uuid.UUID{
 				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -312,7 +318,7 @@ func BenchmarkParquetToArrow(b *testing.B) {
 		})
 	}
 
-	buf, err := samples.ToBuffer(dynSchema)
+	buf, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(b, err)
 
 	ctx := context.Background()
@@ -323,7 +329,7 @@ func BenchmarkParquetToArrow(b *testing.B) {
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		require.NoError(b, c.Convert(ctx, buf))
+		require.NoError(b, c.Convert(ctx, buf, dynSchema))
 		// Reset converter.
 		_ = c.NewRecord()
 	}
@@ -406,9 +412,9 @@ func TestDistinctBinaryExprOptimization(t *testing.T) {
 	dynSchema := dynparquet.NewSampleSchema()
 
 	samples := dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -417,9 +423,9 @@ func TestDistinctBinaryExprOptimization(t *testing.T) {
 		Timestamp: 1,
 		Value:     1,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -429,7 +435,7 @@ func TestDistinctBinaryExprOptimization(t *testing.T) {
 		Value:     2,
 	}}
 
-	buf, err := samples.ToBuffer(dynSchema)
+	buf, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	ctx := context.Background()
@@ -441,6 +447,7 @@ func TestDistinctBinaryExprOptimization(t *testing.T) {
 	as, err := ParquetRowGroupToArrowSchema(
 		ctx,
 		buf,
+		dynSchema,
 		logicalplan.IterOptions{
 			PhysicalProjection: []logicalplan.Expr{
 				logicalplan.Col("example_type"),
@@ -453,7 +460,7 @@ func TestDistinctBinaryExprOptimization(t *testing.T) {
 	require.NoError(t, err)
 	require.Len(t, as.Fields(), 3)
 	require.Equal(t, as.Field(0), arrow.Field{Name: "example_type", Type: &arrow.DictionaryType{
-		IndexType: &arrow.Int16Type{},
+		IndexType: &arrow.Uint32Type{},
 		ValueType: &arrow.BinaryType{},
 	}})
 	require.Equal(t, as.Field(1), arrow.Field{Name: "timestamp", Type: &arrow.Int64Type{}})
@@ -469,7 +476,7 @@ func TestDistinctBinaryExprOptimization(t *testing.T) {
 			DistinctColumns: distinctColumns,
 		})
 	defer c.Close()
-	require.NoError(t, c.Convert(ctx, buf))
+	require.NoError(t, c.Convert(ctx, buf, dynSchema))
 	ar := c.NewRecord()
 	require.Equal(t, int64(1), ar.NumRows())
 	require.Equal(t, int64(3), ar.NumCols())
@@ -480,9 +487,9 @@ func TestDistinctBinaryExprOptimizationMixed(t *testing.T) {
 	dynSchema := dynparquet.NewSampleSchema()
 
 	samples := dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -491,9 +498,9 @@ func TestDistinctBinaryExprOptimizationMixed(t *testing.T) {
 		Timestamp: 1,
 		Value:     1,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -502,9 +509,9 @@ func TestDistinctBinaryExprOptimizationMixed(t *testing.T) {
 		Timestamp: 2,
 		Value:     2,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -513,9 +520,9 @@ func TestDistinctBinaryExprOptimizationMixed(t *testing.T) {
 		Timestamp: 3,
 		Value:     0,
 	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -525,7 +532,7 @@ func TestDistinctBinaryExprOptimizationMixed(t *testing.T) {
 		Value:     0,
 	}}
 
-	buf, err := samples.ToBuffer(dynSchema)
+	buf, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	ctx := context.Background()
@@ -537,6 +544,7 @@ func TestDistinctBinaryExprOptimizationMixed(t *testing.T) {
 	as, err := ParquetRowGroupToArrowSchema(
 		ctx,
 		buf,
+		dynSchema,
 		logicalplan.IterOptions{
 			PhysicalProjection: []logicalplan.Expr{
 				logicalplan.Col("example_type"),
@@ -548,7 +556,7 @@ func TestDistinctBinaryExprOptimizationMixed(t *testing.T) {
 	require.NoError(t, err)
 	require.Len(t, as.Fields(), 3)
 	require.Equal(t, as.Field(0), arrow.Field{Name: "example_type", Type: &arrow.DictionaryType{
-		IndexType: &arrow.Int16Type{},
+		IndexType: &arrow.Uint32Type{},
 		ValueType: &arrow.BinaryType{},
 	}})
 	require.Equal(t, as.Field(1), arrow.Field{Name: "value", Type: &arrow.Int64Type{}})
@@ -562,7 +570,7 @@ func TestDistinctBinaryExprOptimizationMixed(t *testing.T) {
 		DistinctColumns: distinctColumns,
 	})
 	defer c.Close()
-	require.NoError(t, c.Convert(ctx, buf))
+	require.NoError(t, c.Convert(ctx, buf, dynSchema))
 	ar := c.NewRecord()
 	require.Equal(t, int64(2), ar.NumRows())
 	require.Equal(t, int64(3), ar.NumCols())
@@ -582,7 +590,18 @@ func TestList(t *testing.T) {
 
 	c := NewParquetConverter(memory.DefaultAllocator, logicalplan.IterOptions{})
 	defer c.Close()
-	require.NoError(t, c.Convert(ctx, buf))
+	s, err := dynparquet.SchemaFromDefinition(&schemapb.Schema{
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "data",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_INT64,
+				Repeated: true,
+			},
+		}},
+	})
+	require.NoError(t, err)
+	require.NoError(t, c.Convert(ctx, buf, s))
 
 	record := c.NewRecord()
 	t.Log(record)
@@ -610,7 +629,9 @@ func TestList(t *testing.T) {
 }
 
 func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
-	schema := dynparquet.NewNestedSampleSchema(t)
+	dynSchema := dynparquet.NewNestedSampleSchema(t)
+	schema, err := dynparquet.SchemaFromDefinition(dynSchema)
+	require.NoError(t, err)
 	buf, err := schema.NewBufferV2(
 		dynparquet.LabelColumn("label1"),
 		dynparquet.LabelColumn("label2"),
@@ -643,7 +664,7 @@ func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
 						{
 							Name: "label1",
 							Type: &arrow.DictionaryType{
-								IndexType: &arrow.Int16Type{},
+								IndexType: &arrow.Uint32Type{},
 								ValueType: &arrow.BinaryType{},
 							},
 							Nullable: true,
@@ -651,7 +672,7 @@ func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
 						{
 							Name: "label2",
 							Type: &arrow.DictionaryType{
-								IndexType: &arrow.Int16Type{},
+								IndexType: &arrow.Uint32Type{},
 								ValueType: &arrow.BinaryType{},
 							},
 							Nullable: true,
@@ -680,7 +701,7 @@ func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
 						{
 							Name: "label1",
 							Type: &arrow.DictionaryType{
-								IndexType: &arrow.Int16Type{},
+								IndexType: &arrow.Uint32Type{},
 								ValueType: &arrow.BinaryType{},
 							},
 							Nullable: true,
@@ -705,7 +726,7 @@ func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
 						{
 							Name: "label2",
 							Type: &arrow.DictionaryType{
-								IndexType: &arrow.Int16Type{},
+								IndexType: &arrow.Uint32Type{},
 								ValueType: &arrow.BinaryType{},
 							},
 							Nullable: true,
@@ -730,7 +751,7 @@ func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
 						{
 							Name: "label1",
 							Type: &arrow.DictionaryType{
-								IndexType: &arrow.Int16Type{},
+								IndexType: &arrow.Uint32Type{},
 								ValueType: &arrow.BinaryType{},
 							},
 							Nullable: true,
@@ -738,7 +759,7 @@ func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
 						{
 							Name: "label2",
 							Type: &arrow.DictionaryType{
-								IndexType: &arrow.Int16Type{},
+								IndexType: &arrow.Uint32Type{},
 								ValueType: &arrow.BinaryType{},
 							},
 							Nullable: true,
@@ -758,6 +779,7 @@ func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
 			as, err := ParquetRowGroupToArrowSchema(
 				ctx,
 				buf,
+				schema,
 				logicalplan.IterOptions{
 					PhysicalProjection: test.physicalProjections,
 				},
@@ -770,14 +792,18 @@ func Test_ParquetRowGroupToArrowSchema_Groups(t *testing.T) {
 
 func Test_ParquetToArrowV2(t *testing.T) {
 	dynSchema := dynparquet.NewNestedSampleSchema(t)
+	schema, err := dynparquet.SchemaFromDefinition(dynSchema)
+	require.NoError(t, err)
 
 	ctx := context.Background()
-	c := NewParquetConverter(memory.DefaultAllocator, logicalplan.IterOptions{})
+	alloc := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer alloc.AssertSize(t, 0)
+	c := NewParquetConverter(alloc, logicalplan.IterOptions{})
 	defer c.Close()
 
 	n := 10
 	for i := 0; i < n; i++ {
-		pb, err := dynSchema.NewBufferV2(
+		pb, err := schema.NewBufferV2(
 			dynparquet.LabelColumn("label1"),
 			dynparquet.LabelColumn("label2"),
 		)
@@ -793,10 +819,10 @@ func Test_ParquetToArrowV2(t *testing.T) {
 			},
 		})
 		require.NoError(t, err)
-		require.NoError(t, c.Convert(ctx, pb))
+		require.NoError(t, c.Convert(ctx, pb, schema))
 	}
 	r := c.NewRecord()
-	fmt.Println(r)
+	defer r.Release()
 	require.Equal(t, int64(n), r.NumRows())
 }
 
@@ -806,9 +832,9 @@ func Test_ParquetToArrow(t *testing.T) {
 	samples := make(dynparquet.Samples, 0, 1000)
 	for i := 0; i < 1000; i++ {
 		samples = append(samples, dynparquet.Sample{
-			Labels: []dynparquet.Label{
-				{Name: "label1", Value: "value1"},
-				{Name: "label2", Value: "value2"},
+			Labels: map[string]string{
+				"label1": "value1",
+				"label2": "value2",
 			},
 			Stacktrace: []uuid.UUID{
 				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -819,15 +845,18 @@ func Test_ParquetToArrow(t *testing.T) {
 		})
 	}
 
-	buf, err := samples.ToBuffer(dynSchema)
+	buf, err := dynparquet.ToBuffer(samples, dynSchema)
 	require.NoError(t, err)
 
 	ctx := context.Background()
 
-	c := NewParquetConverter(memory.DefaultAllocator, logicalplan.IterOptions{})
+	alloc := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer alloc.AssertSize(t, 0)
+	c := NewParquetConverter(alloc, logicalplan.IterOptions{})
 	defer c.Close()
 
-	require.NoError(t, c.Convert(ctx, buf))
+	require.NoError(t, c.Convert(ctx, buf, dynSchema))
 	r := c.NewRecord()
+	defer r.Release()
 	require.Equal(t, int64(1000), r.NumRows())
 }
diff --git a/pqarrow/arrowutils/groupranges.go b/pqarrow/arrowutils/groupranges.go
index 5d62e8d86..ccf9a3590 100644
--- a/pqarrow/arrowutils/groupranges.go
+++ b/pqarrow/arrowutils/groupranges.go
@@ -6,8 +6,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
 )
 
 // GetGroupsAndOrderedSetRanges returns a min-heap of group ranges and ordered
@@ -68,10 +68,7 @@ func GetGroupsAndOrderedSetRanges(
 			}
 
 			// And update the current group.
-			v, err := GetValue(t, j)
-			if err != nil {
-				return err
-			}
+			v := t.GetOneForMarshal(j)
 			switch concreteV := v.(type) {
 			case []byte:
 				// Safe copy, otherwise the value might get overwritten.
@@ -178,6 +175,26 @@ func GetGroupsAndOrderedSetRanges(
 						return nil, nil, nil, err
 					}
 				}
+
+			case *array.String:
+				for j := 0; j < arr.Len(); j++ {
+					var curGroupValue *string
+					if curGroup[i] != nil {
+						g := curGroup[i].(string)
+						curGroupValue = &g
+					}
+					vIsNull := t.IsNull(j)
+					cmp, ok := nullComparison(curGroupValue == nil, vIsNull)
+					if !ok {
+						cmp = strings.Compare(*curGroupValue,
+							dict.Value(t.GetValueIndex(j)),
+						)
+					}
+					if err := handleCmpResult(cmp, i, t, j); err != nil {
+						return nil, nil, nil, err
+					}
+				}
+
 			default:
 				panic(fmt.Sprintf("unsupported dictionary type: %T", dict))
 			}
@@ -195,7 +212,7 @@ func GetGroupsAndOrderedSetRanges(
 // If the returned boolean is false, the comparison should be disregarded.
 func nullComparison(leftNull, rightNull bool) (int, bool) {
 	if !leftNull && !rightNull {
-		// Both are null, this implies that the null comparison should be
+		// Both are not null, this implies that the null comparison should be
 		// disregarded.
 		return 0, false
 	}
diff --git a/pqarrow/arrowutils/merge.go b/pqarrow/arrowutils/merge.go
index e336a66ab..61a2f3466 100644
--- a/pqarrow/arrowutils/merge.go
+++ b/pqarrow/arrowutils/merge.go
@@ -4,21 +4,27 @@ import (
 	"bytes"
 	"container/heap"
 	"fmt"
+	"math"
+	"strings"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 
 	"github.com/polarsignals/frostdb/pqarrow/builder"
 )
 
 // MergeRecords merges the given records. The records must all have the same
 // schema. orderByCols is a slice of indexes into the columns that the records
-// and resulting records are ordered by. Note that the given records should
-// already be ordered by the given columns.
+// and resulting records are ordered by. While ordering the limit is checked before appending more rows.
+// If limit is  0, no limit is applied.
+// Note that the given records should already be ordered by the given columns.
 // WARNING: Only ascending ordering is currently supported.
 func MergeRecords(
-	mem memory.Allocator, records []arrow.Record, orderByCols []int,
+	mem memory.Allocator,
+	records []arrow.Record,
+	orderByCols []SortingColumn,
+	limit uint64,
 ) (arrow.Record, error) {
 	h := cursorHeap{
 		cursors:     make([]cursor, len(records)),
@@ -30,9 +36,15 @@ func MergeRecords(
 
 	schema := records[0].Schema()
 	recordBuilder := builder.NewRecordBuilder(mem, schema)
+	defer recordBuilder.Release()
+
+	if limit == 0 {
+		limit = math.MaxInt64
+	}
+	count := uint64(0)
 
 	heap.Init(&h)
-	for h.Len() > 0 {
+	for h.Len() > 0 && count < limit {
 		// Minimum cursor is always at index 0.
 		r := h.cursors[0].r
 		i := h.cursors[0].curIdx
@@ -44,10 +56,12 @@ func MergeRecords(
 		if int64(i+1) >= r.NumRows() {
 			// Pop the cursor since it has no more data.
 			_ = heap.Pop(&h)
+			count++
 			continue
 		}
 		h.cursors[0].curIdx++
 		heap.Fix(&h, 0)
+		count++
 	}
 
 	return recordBuilder.NewRecord(), nil
@@ -60,7 +74,7 @@ type cursor struct {
 
 type cursorHeap struct {
 	cursors     []cursor
-	orderByCols []int
+	orderByCols []SortingColumn
 }
 
 func (h cursorHeap) Len() int {
@@ -68,58 +82,100 @@ func (h cursorHeap) Len() int {
 }
 
 func (h cursorHeap) Less(i, j int) bool {
-	c1 := h.cursors[i]
-	c2 := h.cursors[j]
-	for _, i := range h.orderByCols {
-		col1 := c1.r.Column(i)
-		col2 := c2.r.Column(i)
+	for idx := range h.orderByCols {
+		c1 := h.cursors[i]
+		c2 := h.cursors[j]
+		sc := h.orderByCols[idx]
+		col1 := c1.r.Column(sc.Index)
+		col2 := c2.r.Column(sc.Index)
 		if cmp, ok := nullComparison(col1.IsNull(c1.curIdx), col2.IsNull(c2.curIdx)); ok {
-			if cmp == 0 {
-				continue
+			if h.orderByCols[idx].NullsFirst {
+				return cmp == -1
+			}
+			if !h.orderByCols[idx].NullsFirst {
+				return cmp == 1
 			}
-			return cmp < 0
 		}
-		switch arr1 := c1.r.Column(i).(type) {
+
+		cmp := h.compare(idx, i, j)
+		if cmp != 0 {
+			// Use direction to reorder the comparison. Direction determines if the list
+			// is in ascending or descending.
+			//
+			// For instance if comparison between i,j value is -1 and direction is -1
+			// this will resolve to true hence the list will be in ascending order. Same
+			// principle applies for descending.
+			return cmp == h.orderByCols[idx].Direction.comparison()
+		}
+		// Try comparing the next column
+	}
+
+	return false
+}
+
+func (h cursorHeap) compare(idx, i, j int) int {
+	c1 := h.cursors[i]
+	c2 := h.cursors[j]
+	sc := h.orderByCols[idx]
+	switch arr1 := c1.r.Column(sc.Index).(type) {
+	case *array.Binary:
+		arr2 := c2.r.Column(sc.Index).(*array.Binary)
+		return bytes.Compare(arr1.Value(c1.curIdx), arr2.Value(c2.curIdx))
+	case *array.String:
+		arr2 := c2.r.Column(sc.Index).(*array.String)
+		return strings.Compare(arr1.Value(c1.curIdx), arr2.Value(c2.curIdx))
+	case *array.Int64:
+		arr2 := c2.r.Column(sc.Index).(*array.Int64)
+		v1 := arr1.Value(c1.curIdx)
+		v2 := arr2.Value(c2.curIdx)
+		if v1 == v2 {
+			return 0
+		}
+		if v1 < v2 {
+			return -1
+		}
+		return 1
+	case *array.Int32:
+		arr2 := c2.r.Column(sc.Index).(*array.Int32)
+		v1 := arr1.Value(c1.curIdx)
+		v2 := arr2.Value(c2.curIdx)
+		if v1 == v2 {
+			return 0
+		}
+		if v1 < v2 {
+			return -1
+		}
+		return 1
+	case *array.Uint64:
+		arr2 := c2.r.Column(sc.Index).(*array.Uint64)
+		v1 := arr1.Value(c1.curIdx)
+		v2 := arr2.Value(c2.curIdx)
+		if v1 == v2 {
+			return 0
+		}
+		if v1 < v2 {
+			return -1
+		}
+		return 1
+	case *array.Dictionary:
+		switch dict := arr1.Dictionary().(type) {
 		case *array.Binary:
-			arr2 := c2.r.Column(i).(*array.Binary)
-			cmp := bytes.Compare(arr1.Value(c1.curIdx), arr2.Value(c2.curIdx))
-			if cmp == 0 {
-				continue
-			}
-			return cmp < 0
-		case *array.Int64:
-			arr2 := c2.r.Column(i).(*array.Int64)
-			v1 := arr1.Value(c1.curIdx)
-			v2 := arr2.Value(c2.curIdx)
-			if v1 == v2 {
-				continue
-			}
-			return v1 < v2
-		case *array.Dictionary:
-			switch dict := arr1.Dictionary().(type) {
-			case *array.Binary:
-				arr2 := c2.r.Column(i).(*array.Dictionary)
-				dict2 := arr2.Dictionary().(*array.Binary)
-				cmp := bytes.Compare(dict.Value(arr1.GetValueIndex(c1.curIdx)), dict2.Value(arr2.GetValueIndex(c2.curIdx)))
-				if cmp == 0 {
-					continue
-				}
-				return cmp < 0
-			default:
-				panic(fmt.Sprintf("unsupported dictionary type for record merging %T", dict))
-			}
+			arr2 := c2.r.Column(sc.Index).(*array.Dictionary)
+			dict2 := arr2.Dictionary().(*array.Binary)
+			return bytes.Compare(dict.Value(arr1.GetValueIndex(c1.curIdx)), dict2.Value(arr2.GetValueIndex(c2.curIdx)))
 		default:
-			panic(fmt.Sprintf("unsupported type for record merging %T", arr1))
+			panic(fmt.Sprintf("unsupported dictionary type for record merging %T", dict))
 		}
+	default:
+		panic(fmt.Sprintf("unsupported type for record merging %T", arr1))
 	}
-	return false
 }
 
 func (h cursorHeap) Swap(i, j int) {
 	h.cursors[i], h.cursors[j] = h.cursors[j], h.cursors[i]
 }
 
-func (h cursorHeap) Push(x any) {
+func (h cursorHeap) Push(_ any) {
 	panic(
 		"number of cursors are known at Init time, none should ever be pushed",
 	)
diff --git a/pqarrow/arrowutils/merge_test.go b/pqarrow/arrowutils/merge_test.go
index 0ab29ef1e..76121e2b9 100644
--- a/pqarrow/arrowutils/merge_test.go
+++ b/pqarrow/arrowutils/merge_test.go
@@ -1,50 +1,395 @@
 package arrowutils_test
 
 import (
+	"context"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
 
+	"github.com/polarsignals/frostdb/internal/records"
 	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
 )
 
 func TestMerge(t *testing.T) {
-	schema := arrow.NewSchema(
-		[]arrow.Field{{Name: "test", Type: arrow.PrimitiveTypes.Int64}},
-		nil,
-	)
-
-	b := array.NewInt64Builder(memory.DefaultAllocator)
-	b.Append(0)
-	b.Append(1)
-	b.Append(3)
-	b.Append(5)
-	a := b.NewArray()
-	record1 := array.NewRecord(schema, []arrow.Array{a}, int64(a.Len()))
-	b.Append(2)
-	b.Append(4)
-	b.Append(5)
-	b.Append(6)
-	a = b.NewArray()
-	record2 := array.NewRecord(schema, []arrow.Array{a}, int64(a.Len()))
-	b.AppendNull()
-	a = b.NewArray()
-	record3 := array.NewRecord(schema, []arrow.Array{a}, int64(a.Len()))
-
-	res, err := arrowutils.MergeRecords(
-		memory.DefaultAllocator, []arrow.Record{record1, record2, record3}, []int{0},
-	)
+	type row struct {
+		Number *int64  `frostdb:",asc(0)"`
+		Text   *string `frostdb:",asc(1)"`
+	}
+	int64Ptr := func(i int64) *int64 { return &i }
+	stringPtr := func(s string) *string { return &s }
+
+	// mem := memory.NewGoAllocator()
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	t.Parallel()
+
+	build := records.NewBuild[row](mem)
+	defer build.Release()
+	err := build.Append([]row{
+		{Number: nil, Text: nil},
+		{Number: int64Ptr(0), Text: stringPtr("a")},
+		{Number: int64Ptr(1), Text: stringPtr("b")},
+		{Number: int64Ptr(3), Text: stringPtr("d")},
+		{Number: int64Ptr(5), Text: stringPtr("d")},
+	}...)
 	require.NoError(t, err)
-	require.Equal(t, int64(1), res.NumCols())
-	col := res.Column(0).(*array.Int64)
-	require.Equal(t, 9, col.Len())
-	// Nulls sort first.
-	require.True(t, col.IsNull(0))
-	expected := []int64{0, 1, 2, 3, 4, 5, 5, 6}
-	for i := 1; i < col.Len(); i++ {
-		require.Equal(t, expected[i-1], col.Value(i))
+	record1 := build.NewRecord()
+	defer record1.Release()
+
+	for _, tc := range []struct {
+		name string
+		// inputs
+		records [][]row
+		orderBy []arrowutils.SortingColumn
+		limit   uint64
+		// expected output
+		expected []row
+	}{{
+		name: "merge-ascending",
+		records: [][]row{{
+			{Number: nil},         // 0
+			{Number: int64Ptr(0)}, // 1
+			{Number: int64Ptr(2)}, // 3
+			{Number: int64Ptr(4)}, // 5
+			{Number: int64Ptr(5)}, // 7
+		}, {
+			{Number: int64Ptr(1)}, // 2
+			{Number: int64Ptr(3)}, // 4
+			{Number: int64Ptr(5)}, // 6
+			{Number: int64Ptr(6)}, // 8
+			{Number: int64Ptr(7)}, // 9
+		}},
+		orderBy: []arrowutils.SortingColumn{
+			{Index: 0, Direction: arrowutils.Ascending, NullsFirst: true},
+		},
+
+		expected: []row{
+			{Number: nil, Text: nil},
+			{Number: int64Ptr(0)},
+			{Number: int64Ptr(1)},
+			{Number: int64Ptr(2)},
+			{Number: int64Ptr(3)},
+			{Number: int64Ptr(4)},
+			{Number: int64Ptr(5)},
+			{Number: int64Ptr(5)},
+			{Number: int64Ptr(6)},
+			{Number: int64Ptr(7)},
+		},
+	}, {
+		name: "merge-ascending-limit",
+		records: [][]row{{
+			{Number: nil},         // 0
+			{Number: int64Ptr(0)}, // 1
+			{Number: int64Ptr(2)}, // 3
+			{Number: int64Ptr(4)}, // 5
+			{Number: int64Ptr(5)}, // 7
+		}, {
+			{Number: int64Ptr(1)}, // 2
+			{Number: int64Ptr(3)}, // 4
+			{Number: int64Ptr(5)}, // 6
+			{Number: int64Ptr(6)}, // 8
+			{Number: int64Ptr(7)}, // 9
+		}},
+		orderBy: []arrowutils.SortingColumn{
+			{Index: 0, Direction: arrowutils.Ascending, NullsFirst: true},
+		},
+		limit: 3,
+
+		expected: []row{
+			{Number: nil, Text: nil},
+			{Number: int64Ptr(0)},
+			{Number: int64Ptr(1)},
+		},
+	}, {
+		name: "merge-descending",
+		records: [][]row{{
+			{Number: nil},         // 0
+			{Number: int64Ptr(5)}, // 3
+			{Number: int64Ptr(3)}, // 6
+			{Number: int64Ptr(1)}, // 7
+			{Number: int64Ptr(0)}, // 8
+		}, {
+			{Number: int64Ptr(7)}, // 1
+			{Number: int64Ptr(6)}, // 2
+			{Number: int64Ptr(5)}, // 4
+			{Number: int64Ptr(4)}, // 5
+		}},
+		orderBy: []arrowutils.SortingColumn{
+			{Index: 0, Direction: arrowutils.Descending, NullsFirst: true},
+		},
+
+		expected: []row{
+			{Number: nil},
+			{Number: int64Ptr(7)},
+			{Number: int64Ptr(6)},
+			{Number: int64Ptr(5)},
+			{Number: int64Ptr(5)},
+			{Number: int64Ptr(4)},
+			{Number: int64Ptr(3)},
+			{Number: int64Ptr(1)},
+			{Number: int64Ptr(0)},
+		},
+	}, {
+		name: "merge-descending-limit",
+		records: [][]row{{
+			{Number: nil},         // 0
+			{Number: int64Ptr(5)}, // 4
+			{Number: int64Ptr(3)}, // limited
+			{Number: int64Ptr(1)}, // limited
+			{Number: int64Ptr(0)}, // limited
+		}, {
+			{Number: int64Ptr(7)}, // 1
+			{Number: int64Ptr(6)}, // 2
+			{Number: int64Ptr(5)}, // 3
+			{Number: int64Ptr(4)}, // 5
+		}},
+		orderBy: []arrowutils.SortingColumn{
+			{Index: 0, Direction: arrowutils.Descending, NullsFirst: true},
+		},
+		limit: 6,
+
+		expected: []row{
+			{Number: nil},
+			{Number: int64Ptr(7)},
+			{Number: int64Ptr(6)},
+			{Number: int64Ptr(5)},
+			{Number: int64Ptr(5)},
+			{Number: int64Ptr(4)},
+		},
+	}, {
+		name: "multiple-ascending",
+		records: [][]row{{
+			{Number: int64Ptr(0), Text: stringPtr("a")}, // 1
+			{Number: int64Ptr(0), Text: stringPtr("c")}, // 3
+			{Number: int64Ptr(1), Text: stringPtr("e")}, // 4
+		}, {
+			{Number: nil, Text: nil},                    // 0
+			{Number: int64Ptr(0), Text: stringPtr("b")}, // 2
+			{Number: int64Ptr(1), Text: stringPtr("d")}, // 3
+			{Number: int64Ptr(2), Text: stringPtr("f")}, // 5
+		}},
+		orderBy: []arrowutils.SortingColumn{
+			{Index: 0, Direction: arrowutils.Ascending, NullsFirst: true},
+			{Index: 1, Direction: arrowutils.Ascending, NullsFirst: true},
+		},
+
+		expected: []row{
+			{Number: nil, Text: nil},
+			{Number: int64Ptr(0), Text: stringPtr("a")},
+			{Number: int64Ptr(0), Text: stringPtr("b")},
+			{Number: int64Ptr(0), Text: stringPtr("c")},
+			{Number: int64Ptr(1), Text: stringPtr("d")},
+			{Number: int64Ptr(1), Text: stringPtr("e")},
+			{Number: int64Ptr(2), Text: stringPtr("f")},
+		},
+	}, {
+		name: "multiple-descending",
+		records: [][]row{{
+			{Number: int64Ptr(1), Text: stringPtr("e")},
+			{Number: int64Ptr(0), Text: stringPtr("c")},
+			{Number: int64Ptr(0), Text: stringPtr("a")},
+		}, {
+			{Number: nil, Text: nil},                    // 0
+			{Number: int64Ptr(2), Text: stringPtr("f")}, //
+			{Number: int64Ptr(1), Text: stringPtr("d")},
+			{Number: int64Ptr(0), Text: stringPtr("b")},
+		}},
+		orderBy: []arrowutils.SortingColumn{
+			{Index: 0, Direction: arrowutils.Descending, NullsFirst: true},
+			{Index: 1, Direction: arrowutils.Descending, NullsFirst: true},
+		},
+
+		expected: []row{
+			{Number: nil, Text: nil},
+			{Number: int64Ptr(2), Text: stringPtr("f")},
+			{Number: int64Ptr(1), Text: stringPtr("e")},
+			{Number: int64Ptr(1), Text: stringPtr("d")},
+			{Number: int64Ptr(0), Text: stringPtr("c")},
+			{Number: int64Ptr(0), Text: stringPtr("b")},
+			{Number: int64Ptr(0), Text: stringPtr("a")},
+		},
+	}, {
+		name: "multiple-mixed",
+		records: [][]row{{
+			{Number: int64Ptr(1), Text: stringPtr("e")},
+			{Number: int64Ptr(0), Text: stringPtr("a")},
+			{Number: int64Ptr(0), Text: stringPtr("c")},
+		}, {
+			{Number: nil, Text: nil},
+			{Number: int64Ptr(2), Text: stringPtr("f")},
+			{Number: int64Ptr(1), Text: stringPtr("d")},
+			{Number: int64Ptr(0), Text: stringPtr("b")},
+		}},
+		orderBy: []arrowutils.SortingColumn{
+			{Index: 0, Direction: arrowutils.Descending, NullsFirst: true},
+			{Index: 1, Direction: arrowutils.Ascending, NullsFirst: true},
+		},
+
+		expected: []row{
+			{Number: nil, Text: nil},
+			{Number: int64Ptr(2), Text: stringPtr("f")},
+			{Number: int64Ptr(1), Text: stringPtr("d")},
+			{Number: int64Ptr(1), Text: stringPtr("e")},
+			{Number: int64Ptr(0), Text: stringPtr("a")},
+			{Number: int64Ptr(0), Text: stringPtr("b")},
+			{Number: int64Ptr(0), Text: stringPtr("c")},
+		},
+	}} {
+		t.Run(tc.name, func(t *testing.T) {
+			builder := records.NewBuild[row](mem)
+			defer builder.Release()
+
+			records := make([]arrow.Record, 0, len(tc.records))
+			defer func() {
+				for _, record := range records {
+					record.Release()
+				}
+			}()
+
+			for _, rows := range tc.records {
+				err := builder.Append(rows...)
+				require.NoError(t, err)
+				records = append(records, builder.NewRecord())
+			}
+
+			res, err := arrowutils.MergeRecords(mem, records, tc.orderBy, tc.limit)
+			require.NoError(t, err)
+			defer res.Release()
+
+			numbers := res.Column(0).(*array.Int64)
+			texts := res.Column(1).(*array.String)
+
+			// TODO: Create an equivalent to the records.NewBuild for reading back into the struct.
+			result := make([]row, res.NumRows())
+			for i := 0; i < int(res.NumRows()); i++ {
+				if numbers.IsValid(i) {
+					result[i].Number = int64Ptr(numbers.Value(i))
+				}
+				if texts.IsValid(i) {
+					result[i].Text = stringPtr(texts.Value(i))
+				}
+			}
+
+			require.Equal(t, tc.expected, result)
+		})
+	}
+}
+
+func TestMergeNestedListStruct(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	rb := array.NewRecordBuilder(mem, arrow.NewSchema([]arrow.Field{
+		{Name: "int64", Type: arrow.PrimitiveTypes.Int64},
+		{Name: "list", Type: arrow.ListOf(arrow.StructOf([]arrow.Field{
+			{Name: "int32", Type: arrow.PrimitiveTypes.Int32},
+			{Name: "uint64", Type: arrow.PrimitiveTypes.Uint64},
+		}...))},
+	}, nil))
+	defer rb.Release()
+
+	var recs []arrow.Record
+	defer func() {
+		for _, r := range recs {
+			r.Release()
+		}
+	}()
+
+	int64Builder := rb.Field(0).(*array.Int64Builder)
+	listBuilder := rb.Field(1).(*array.ListBuilder)
+	listStructBuilder := listBuilder.ValueBuilder().(*array.StructBuilder)
+	listStructInt32Builder := listStructBuilder.FieldBuilder(0).(*array.Int32Builder)
+	listStructUint64Builder := listStructBuilder.FieldBuilder(1).(*array.Uint64Builder)
+
+	int64Builder.Append(-123)
+	listBuilder.Append(true)
+	listStructBuilder.Append(true)
+	listStructInt32Builder.Append(123)
+	listStructUint64Builder.Append(123 * 2)
+	listStructBuilder.Append(true)
+	listStructInt32Builder.Append(123 * 3)
+	listStructUint64Builder.Append(123 * 4)
+	recs = append(recs, rb.NewRecord())
+
+	int64Builder.Append(-123 * 2)
+	listBuilder.Append(true)
+	listStructBuilder.Append(true)
+	listStructInt32Builder.Append(123 * 5)
+	listStructUint64Builder.Append(123 * 6)
+	listStructBuilder.Append(true)
+	listStructInt32Builder.Append(123 * 7)
+	listStructUint64Builder.Append(123 * 8)
+	listStructBuilder.Append(true)
+	listStructInt32Builder.Append(123 * 9)
+	listStructUint64Builder.Append(123 * 10)
+	recs = append(recs, rb.NewRecord())
+
+	mergeRecord, err := arrowutils.MergeRecords(mem, recs, []arrowutils.SortingColumn{
+		{Index: 0, Direction: arrowutils.Ascending},
+	}, 0)
+	require.NoError(t, err)
+	defer mergeRecord.Release()
+
+	require.Equal(t, int64(2), mergeRecord.NumCols())
+	require.Equal(t, int64(2), mergeRecord.NumRows())
+
+	require.Equal(t, `[-246 -123]`, mergeRecord.Column(0).String())
+	require.Equal(t, `[{[615 861 1107] [738 984 1230]} {[123 369] [246 492]}]`, mergeRecord.Column(1).String())
+}
+
+func BenchmarkMergeRecords(b *testing.B) {
+	ctx := context.Background()
+	mem := memory.NewGoAllocator()
+
+	var r1, r2 arrow.Record
+	{
+		build := records.NewBuild[Model](mem)
+		err := build.Append(makeModels(40)...)
+		require.NoError(b, err)
+		r := build.NewRecord()
+		defer r.Release()
+
+		indices, err := arrowutils.SortRecord(r.Record, r.SortingColumns)
+		require.NoError(b, err)
+		defer indices.Release()
+
+		r1, err = arrowutils.Take(ctx, r.Record, indices)
+		require.NoError(b, err)
+		defer r1.Release()
+	}
+	{
+		build := records.NewBuild[Model](mem)
+		err := build.Append(makeModels(60)...)
+		require.NoError(b, err)
+		r := build.NewRecord()
+		defer r.Release()
+
+		indices, err := arrowutils.SortRecord(r.Record, r.SortingColumns)
+		require.NoError(b, err)
+		defer indices.Release()
+
+		r2, err = arrowutils.Take(ctx, r.Record, indices)
+		require.NoError(b, err)
+		defer r2.Release()
+	}
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		_, err := arrowutils.MergeRecords(
+			mem,
+			[]arrow.Record{r1, r2},
+			[]arrowutils.SortingColumn{
+				{Index: 0, Direction: arrowutils.Ascending, NullsFirst: true},
+			},
+			0,
+		)
+		if err != nil {
+			b.Fatal(err)
+		}
 	}
 }
diff --git a/pqarrow/arrowutils/nullarray.go b/pqarrow/arrowutils/nullarray.go
index 6ecdbcdce..8953913e5 100644
--- a/pqarrow/arrowutils/nullarray.go
+++ b/pqarrow/arrowutils/nullarray.go
@@ -1,8 +1,8 @@
 package arrowutils
 
 import (
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 
 	"github.com/polarsignals/frostdb/pqarrow/builder"
 )
@@ -17,21 +17,22 @@ type VirtualNullArray struct {
 	len int
 }
 
-func MakeVirtualNullArray(dt arrow.DataType, len int) VirtualNullArray {
+func MakeVirtualNullArray(dt arrow.DataType, length int) VirtualNullArray {
 	return VirtualNullArray{
 		dt:  dt,
-		len: len,
+		len: length,
 	}
 }
 
 // MakeNullArray makes a physical arrow.Array full of NULLs of the given
 // DataType.
-func MakeNullArray(mem memory.Allocator, dt arrow.DataType, len int) arrow.Array {
+func MakeNullArray(mem memory.Allocator, dt arrow.DataType, length int) arrow.Array {
 	// TODO(asubiotto): This can be improved by using the optimized builders'
 	// AppendNulls. Not sure whether this should be part of the builder package.
 	b := builder.NewBuilder(mem, dt)
-	b.Reserve(len)
-	for i := 0; i < len; i++ {
+	defer b.Release()
+	b.Reserve(length)
+	for i := 0; i < length; i++ {
 		b.AppendNull()
 	}
 	return b.NewArray()
@@ -74,3 +75,7 @@ func (n VirtualNullArray) Retain() {}
 func (n VirtualNullArray) Release() {}
 
 func (n VirtualNullArray) String() string { return "VirtualNullArray" }
+
+func (n VirtualNullArray) ValueStr(_ int) string { return "" }
+
+func (n VirtualNullArray) GetOneForMarshal(_ int) any { return nil }
diff --git a/pqarrow/arrowutils/schema.go b/pqarrow/arrowutils/schema.go
new file mode 100644
index 000000000..d5b7785ad
--- /dev/null
+++ b/pqarrow/arrowutils/schema.go
@@ -0,0 +1,95 @@
+package arrowutils
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+)
+
+// EnsureSameSchema ensures that all the records have the same schema. In cases
+// where the schema is not equal, virtual null columns are inserted in the
+// records with the missing column. When we have static schemas in the execution
+// engine, steps like these should be unnecessary.
+func EnsureSameSchema(records []arrow.Record) ([]arrow.Record, error) {
+	if len(records) < 2 {
+		return records, nil
+	}
+
+	lastSchema := records[0].Schema()
+	needSchemaRecalculation := false
+	for i := range records {
+		if !records[i].Schema().Equal(lastSchema) {
+			needSchemaRecalculation = true
+			break
+		}
+	}
+	if !needSchemaRecalculation {
+		return records, nil
+	}
+
+	columns := make(map[string]arrow.Field)
+	for _, r := range records {
+		for j := 0; j < r.Schema().NumFields(); j++ {
+			field := r.Schema().Field(j)
+			if _, ok := columns[field.Name]; !ok {
+				columns[field.Name] = field
+			}
+		}
+	}
+
+	columnNames := make([]string, 0, len(columns))
+	for name := range columns {
+		columnNames = append(columnNames, name)
+	}
+	sort.Strings(columnNames)
+
+	mergedFields := make([]arrow.Field, 0, len(columnNames))
+	for _, name := range columnNames {
+		mergedFields = append(mergedFields, columns[name])
+	}
+	mergedSchema := arrow.NewSchema(mergedFields, nil)
+
+	mergedRecords := make([]arrow.Record, len(records))
+	var replacedRecords []arrow.Record
+
+	for i := range records {
+		recordSchema := records[i].Schema()
+		if mergedSchema.Equal(recordSchema) {
+			mergedRecords[i] = records[i]
+			continue
+		}
+
+		mergedColumns := make([]arrow.Array, 0, len(mergedFields))
+		recordNumRows := records[i].NumRows()
+		for j := 0; j < mergedSchema.NumFields(); j++ {
+			field := mergedSchema.Field(j)
+			if otherFields := recordSchema.FieldIndices(field.Name); otherFields != nil {
+				if len(otherFields) > 1 {
+					fieldsFound, _ := recordSchema.FieldsByName(field.Name)
+					return nil, fmt.Errorf(
+						"found multiple fields %v for name %s",
+						fieldsFound,
+						field.Name,
+					)
+				}
+				mergedColumns = append(mergedColumns, records[i].Column(otherFields[0]))
+			} else {
+				// Note that this VirtualNullArray will be read from, but the
+				// merged output will be a physical null array, so there is no
+				// virtual->physical conversion necessary before we return data.
+				mergedColumns = append(mergedColumns, MakeVirtualNullArray(field.Type, int(recordNumRows)))
+			}
+		}
+
+		replacedRecords = append(replacedRecords, records[i])
+		mergedRecords[i] = array.NewRecord(mergedSchema, mergedColumns, recordNumRows)
+	}
+
+	for _, r := range replacedRecords {
+		r.Release()
+	}
+
+	return mergedRecords, nil
+}
diff --git a/pqarrow/arrowutils/schema_test.go b/pqarrow/arrowutils/schema_test.go
new file mode 100644
index 000000000..4ad6c41f0
--- /dev/null
+++ b/pqarrow/arrowutils/schema_test.go
@@ -0,0 +1,91 @@
+package arrowutils_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/internal/records"
+	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
+)
+
+func TestEnsureSameSchema(t *testing.T) {
+	type struct1 struct {
+		Field1 int64 `frostdb:",asc(0)"`
+		Field2 int64 `frostdb:",asc(1)"`
+	}
+	type struct2 struct {
+		Field1 int64 `frostdb:",asc(0)"`
+		Field3 int64 `frostdb:",asc(1)"`
+	}
+	type struct3 struct {
+		Field1 int64 `frostdb:",asc(0)"`
+		Field2 int64 `frostdb:",asc(1)"`
+		Field3 int64 `frostdb:",asc(1)"`
+	}
+
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	build1 := records.NewBuild[struct1](mem)
+	defer build1.Release()
+	err := build1.Append([]struct1{
+		{Field1: 1, Field2: 2},
+		{Field1: 1, Field2: 3},
+	}...)
+	require.NoError(t, err)
+
+	build2 := records.NewBuild[struct2](mem)
+	defer build2.Release()
+	err = build2.Append([]struct2{
+		{Field1: 1, Field3: 2},
+		{Field1: 1, Field3: 3},
+	}...)
+	require.NoError(t, err)
+
+	build3 := records.NewBuild[struct3](mem)
+	defer build3.Release()
+	err = build3.Append([]struct3{
+		{Field1: 1, Field2: 1, Field3: 1},
+		{Field1: 2, Field2: 2, Field3: 2},
+	}...)
+	require.NoError(t, err)
+
+	record1 := build1.NewRecord()
+	record2 := build2.NewRecord()
+	record3 := build3.NewRecord()
+
+	recs := []arrow.Record{record1, record2, record3}
+	defer func() {
+		for _, r := range recs {
+			r.Release()
+		}
+	}()
+
+	recs, err = arrowutils.EnsureSameSchema(recs)
+	require.NoError(t, err)
+
+	expected := []struct3{
+		// record1
+		{Field1: 1, Field2: 2, Field3: 0},
+		{Field1: 1, Field2: 3, Field3: 0},
+		// record2
+		{Field1: 1, Field2: 0, Field3: 2},
+		{Field1: 1, Field2: 0, Field3: 3},
+		// record3
+		{Field1: 1, Field2: 1, Field3: 1},
+		{Field1: 2, Field2: 2, Field3: 2},
+	}
+
+	reader := records.NewReader[struct3](recs...)
+	rows := reader.NumRows()
+	require.Equal(t, int64(len(expected)), rows)
+
+	actual := make([]struct3, rows)
+	for i := 0; i < int(rows); i++ {
+		actual[i] = reader.Value(i)
+	}
+	require.Equal(t, expected, actual)
+}
diff --git a/pqarrow/arrowutils/sort.go b/pqarrow/arrowutils/sort.go
new file mode 100644
index 000000000..9a92e0610
--- /dev/null
+++ b/pqarrow/arrowutils/sort.go
@@ -0,0 +1,637 @@
+package arrowutils
+
+import (
+	"bytes"
+	"cmp"
+	"context"
+	"errors"
+	"fmt"
+	"slices"
+	"sort"
+	"strconv"
+	"sync"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/compute"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"golang.org/x/sync/errgroup"
+
+	"github.com/polarsignals/frostdb/pqarrow/builder"
+)
+
+type Direction uint
+
+const (
+	Ascending Direction = iota
+	Descending
+)
+
+func (d Direction) comparison() int {
+	switch d {
+	case Ascending:
+		return -1
+	case Descending:
+		return 1
+	default:
+		panic("unexpected direction value " + strconv.Itoa(int(d)) + " only -1 and 1 are allowed")
+	}
+}
+
+// SortingColumn describes a sorting column on a arrow.Record.
+type SortingColumn struct {
+	Index      int
+	Direction  Direction
+	NullsFirst bool
+}
+
+// SortRecord sorts given arrow.Record by columns. Returns *array.Int32 of
+// indices to sorted rows or record r.
+//
+// Comparison is made sequentially by each column. When rows are equal in the
+// first column we compare the rows om the second column and so on and so forth
+// until rows that are not equal are found.
+func SortRecord(r arrow.Record, columns []SortingColumn) (*array.Int32, error) {
+	if len(columns) == 0 {
+		return nil, errors.New("pqarrow/arrowutils: at least one column is needed for sorting")
+	}
+	ms, err := newMultiColSorter(r, columns)
+	if err != nil {
+		return nil, err
+	}
+	defer ms.Release()
+	sort.Sort(ms)
+	return ms.indices.NewArray().(*array.Int32), nil
+}
+
+// Take uses indices which is an array of row index and returns a new record
+// that only contains rows specified in indices.
+//
+// Use compute.WithAllocator to pass a custom memory.Allocator.
+func Take(ctx context.Context, r arrow.Record, indices *array.Int32) (arrow.Record, error) {
+	// compute.Take doesn't support dictionaries or lists. Use take on r when r
+	// does not have these columns.
+	var customTake bool
+	for i := 0; i < int(r.NumCols()); i++ {
+		if r.Column(i).DataType().ID() == arrow.DICTIONARY ||
+			r.Column(i).DataType().ID() == arrow.RUN_END_ENCODED ||
+			r.Column(i).DataType().ID() == arrow.LIST ||
+			r.Column(i).DataType().ID() == arrow.STRUCT {
+			customTake = true
+			break
+		}
+	}
+	if !customTake {
+		res, err := compute.Take(
+			ctx,
+			compute.TakeOptions{BoundsCheck: true},
+			compute.NewDatumWithoutOwning(r),
+			compute.NewDatumWithoutOwning(indices),
+		)
+		if err != nil {
+			return nil, err
+		}
+		return res.(*compute.RecordDatum).Value, nil
+	}
+	if r.NumCols() == 0 {
+		return r, nil
+	}
+
+	resArr := make([]arrow.Array, r.NumCols())
+	defer func() {
+		for _, a := range resArr {
+			if a != nil {
+				a.Release()
+			}
+		}
+	}()
+	var g errgroup.Group
+	for i := 0; i < int(r.NumCols()); i++ {
+		i := i
+		col := r.Column(i)
+		switch arr := r.Column(i).(type) {
+		case *array.Dictionary:
+			g.Go(func() error { return TakeDictColumn(ctx, arr, i, resArr, indices) })
+		case *array.RunEndEncoded:
+			g.Go(func() error { return TakeRunEndEncodedColumn(ctx, arr, i, resArr, indices) })
+		case *array.List:
+			g.Go(func() error { return TakeListColumn(ctx, arr, i, resArr, indices) })
+		case *array.Struct:
+			g.Go(func() error { return TakeStructColumn(ctx, arr, i, resArr, indices) })
+		default:
+			g.Go(func() error { return TakeColumn(ctx, col, i, resArr, indices) })
+		}
+	}
+	if err := g.Wait(); err != nil {
+		return nil, err
+	}
+
+	// We checked for at least one column at the beginning of the function.
+	expectedLen := resArr[0].Len()
+	for _, a := range resArr {
+		if a.Len() != expectedLen {
+			return nil, fmt.Errorf(
+				"pqarrow/arrowutils: expected same length %d for all columns got %d for %s", expectedLen, a.Len(), a.DataType().Name(),
+			)
+		}
+	}
+	return array.NewRecord(r.Schema(), resArr, int64(indices.Len())), nil
+}
+
+func TakeColumn(ctx context.Context, a arrow.Array, idx int, arr []arrow.Array, indices *array.Int32) error {
+	r, err := compute.TakeArray(ctx, a, indices)
+	if err != nil {
+		return err
+	}
+	arr[idx] = r
+	return nil
+}
+
+func TakeDictColumn(ctx context.Context, a *array.Dictionary, idx int, arr []arrow.Array, indices *array.Int32) error {
+	switch a.Dictionary().(type) {
+	case *array.String, *array.Binary:
+		r := array.NewDictionaryBuilderWithDict(
+			compute.GetAllocator(ctx), a.DataType().(*arrow.DictionaryType), a.Dictionary(),
+		).(*array.BinaryDictionaryBuilder)
+		defer r.Release()
+
+		r.Reserve(indices.Len())
+		idxBuilder := r.IndexBuilder()
+		for _, i := range indices.Int32Values() {
+			if a.IsNull(int(i)) {
+				r.AppendNull()
+				continue
+			}
+			idxBuilder.Append(a.GetValueIndex(int(i)))
+		}
+
+		arr[idx] = r.NewArray()
+		return nil
+	case *array.FixedSizeBinary:
+		r := array.NewDictionaryBuilderWithDict(
+			compute.GetAllocator(ctx), a.DataType().(*arrow.DictionaryType), a.Dictionary(),
+		).(*array.FixedSizeBinaryDictionaryBuilder)
+		defer r.Release()
+
+		r.Reserve(indices.Len())
+		idxBuilder := r.IndexBuilder()
+		for _, i := range indices.Int32Values() {
+			if a.IsNull(int(i)) {
+				r.AppendNull()
+				continue
+			}
+			// TODO: Improve this by not copying actual values.
+			idxBuilder.Append(a.GetValueIndex(int(i)))
+		}
+
+		arr[idx] = r.NewArray()
+		return nil
+	}
+
+	return nil
+}
+
+func TakeRunEndEncodedColumn(ctx context.Context, a *array.RunEndEncoded, idx int, arr []arrow.Array, indices *array.Int32) error {
+	expandedIndexBuilder := array.NewInt32Builder(compute.GetAllocator(ctx))
+	defer expandedIndexBuilder.Release()
+
+	dict := a.Values().(*array.Dictionary)
+	for i := 0; i < a.Len(); i++ {
+		if dict.IsNull(a.GetPhysicalIndex(i)) {
+			expandedIndexBuilder.AppendNull()
+		} else {
+			expandedIndexBuilder.Append(int32(dict.GetValueIndex(a.GetPhysicalIndex(i))))
+		}
+	}
+	expandedIndex := expandedIndexBuilder.NewInt32Array()
+	defer expandedIndex.Release()
+
+	expandedReorderedArr := make([]arrow.Array, 1)
+	if err := TakeColumn(ctx, expandedIndex, 0, expandedReorderedArr, indices); err != nil {
+		return err
+	}
+	expandedReordered := expandedReorderedArr[0].(*array.Int32)
+	defer expandedReordered.Release()
+
+	b := array.NewRunEndEncodedBuilder(
+		compute.GetAllocator(ctx), a.RunEndsArr().DataType(), a.Values().DataType(),
+	)
+	defer b.Release()
+	b.Reserve(indices.Len())
+
+	dictValues := dict.Dictionary().(*array.String)
+	for i := 0; i < expandedReordered.Len(); i++ {
+		if expandedReordered.IsNull(i) {
+			b.AppendNull()
+			continue
+		}
+		reorderedIndex := expandedReordered.Value(i)
+		v := dictValues.Value(int(reorderedIndex))
+		if err := b.AppendValueFromString(v); err != nil {
+			return err
+		}
+	}
+
+	arr[idx] = b.NewRunEndEncodedArray()
+	return nil
+}
+
+func TakeListColumn(ctx context.Context, a *array.List, idx int, arr []arrow.Array, indices *array.Int32) error {
+	mem := compute.GetAllocator(ctx)
+	r := array.NewBuilder(mem, a.DataType()).(*array.ListBuilder)
+
+	switch valueBuilder := r.ValueBuilder().(type) {
+	case *array.BinaryDictionaryBuilder:
+		defer valueBuilder.Release()
+
+		listValues := a.ListValues().(*array.Dictionary)
+		switch dictV := listValues.Dictionary().(type) {
+		case *array.String:
+			if err := valueBuilder.InsertStringDictValues(dictV); err != nil {
+				return err
+			}
+		case *array.Binary:
+			if err := valueBuilder.InsertDictValues(dictV); err != nil {
+				return err
+			}
+		}
+		idxBuilder := valueBuilder.IndexBuilder()
+
+		r.Reserve(indices.Len())
+		for _, i := range indices.Int32Values() {
+			if a.IsNull(int(i)) {
+				r.AppendNull()
+				continue
+			}
+
+			r.Append(true)
+			start, end := a.ValueOffsets(int(i))
+			for j := start; j < end; j++ {
+				idxBuilder.Append(listValues.GetValueIndex(int(j)))
+			}
+			// Resize is necessary here for the correct offsets to be appended to
+			// the list builder. Otherwise, length will remain at 0.
+			valueBuilder.Resize(idxBuilder.Len())
+		}
+
+		arr[idx] = r.NewArray()
+		return nil
+	case *array.StructBuilder:
+		defer valueBuilder.Release()
+
+		structArray := a.ListValues().(*array.Struct)
+
+		// expand the indices from the list to each row in the struct.
+		structIndicesBuilder := array.NewInt32Builder(mem)
+		structIndicesBuilder.Reserve(structArray.Len())
+		defer structIndicesBuilder.Release()
+
+		for _, i := range indices.Int32Values() {
+			start, end := a.ValueOffsets(int(i))
+			for j := start; j < end; j++ {
+				structIndicesBuilder.Append(int32(j))
+			}
+		}
+		structIndices := structIndicesBuilder.NewInt32Array()
+		defer structIndices.Release()
+
+		arrays := []arrow.Array{structArray}
+		err := TakeStructColumn(ctx, structArray, 0, arrays, structIndices)
+		if err != nil {
+			return err
+		}
+		defer func() {
+			for _, a := range arrays {
+				a.Release()
+			}
+		}()
+
+		newOffsetBuilder := array.NewInt32Builder(mem)
+		defer newOffsetBuilder.Release()
+
+		newOffsetBuilder.Append(0)
+		newOffsetPrevious := int32(0)
+		for _, i := range indices.Int32Values() {
+			if a.IsNull(int(i)) {
+				newOffsetBuilder.AppendNull()
+				continue
+			}
+
+			start, end := a.ValueOffsets(int(i))
+			// calculate the length of the current list element and add it to the offsets
+			newOffsetPrevious += int32(end - start)
+			newOffsetBuilder.Append(newOffsetPrevious)
+		}
+		newOffsets := newOffsetBuilder.NewInt32Array()
+		defer newOffsets.Release()
+
+		data := array.NewData(
+			arrow.ListOf(structArray.DataType()),
+			a.Len(),
+			[]*memory.Buffer{nil, newOffsets.Data().Buffers()[1]},
+			[]arrow.ArrayData{arrays[0].Data()},
+			newOffsets.NullN(),
+			0,
+		)
+		defer data.Release()
+		arr[idx] = array.NewListData(data)
+
+		return nil
+	default:
+		return fmt.Errorf("unexpected value builder type %T for list column", r.ValueBuilder())
+	}
+}
+
+func TakeStructColumn(ctx context.Context, a *array.Struct, idx int, arr []arrow.Array, indices *array.Int32) error {
+	aType := a.Data().DataType().(*arrow.StructType)
+
+	// Immediately, return this struct if it has no fields/columns
+	if a.NumField() == 0 {
+		// If the original record is released and this is released once more,
+		// as usually done, we want to retain it once more.
+		a.Retain()
+		arr[idx] = a
+		return nil
+	}
+
+	cols := make([]arrow.Array, a.NumField())
+	names := make([]string, a.NumField())
+	defer func() {
+		for _, col := range cols {
+			if col != nil {
+				col.Release()
+			}
+		}
+	}()
+
+	for i := 0; i < a.NumField(); i++ {
+		names[i] = aType.Field(i).Name
+
+		switch f := a.Field(i).(type) {
+		case *array.RunEndEncoded:
+			if err := TakeRunEndEncodedColumn(ctx, f, i, cols, indices); err != nil {
+				return err
+			}
+		case *array.Dictionary:
+			if err := TakeDictColumn(ctx, f, i, cols, indices); err != nil {
+				return err
+			}
+		case *array.List:
+			if err := TakeListColumn(ctx, f, i, cols, indices); err != nil {
+				return err
+			}
+		default:
+			err := TakeColumn(ctx, f, i, cols, indices)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	takeStruct, err := array.NewStructArray(cols, names)
+	if err != nil {
+		return err
+	}
+
+	arr[idx] = takeStruct
+	return nil
+}
+
+type multiColSorter struct {
+	indices     *builder.OptInt32Builder
+	comparisons []comparator
+	directions  []int
+	nullsFirst  []bool
+}
+
+func newMultiColSorter(
+	r arrow.Record,
+	columns []SortingColumn,
+) (*multiColSorter, error) {
+	ms := multiColSorterPool.Get().(*multiColSorter)
+	if r.NumRows() <= 1 {
+		if r.NumRows() == 1 {
+			ms.indices.Append(0)
+		}
+		return ms, nil
+	}
+	ms.Reserve(int(r.NumRows()), len(columns))
+	for i := range columns {
+		ms.directions[i] = columns[i].Direction.comparison()
+		ms.nullsFirst[i] = columns[i].NullsFirst
+	}
+	for i, col := range columns {
+		switch e := r.Column(col.Index).(type) {
+		case *array.Int16:
+			ms.comparisons[i] = newOrderedSorter[int16](e, cmp.Compare)
+		case *array.Int32:
+			ms.comparisons[i] = newOrderedSorter[int32](e, cmp.Compare)
+		case *array.Int64:
+			ms.comparisons[i] = newOrderedSorter[int64](e, cmp.Compare)
+		case *array.Uint16:
+			ms.comparisons[i] = newOrderedSorter[uint16](e, cmp.Compare)
+		case *array.Uint32:
+			ms.comparisons[i] = newOrderedSorter[uint32](e, cmp.Compare)
+		case *array.Uint64:
+			ms.comparisons[i] = newOrderedSorter[uint64](e, cmp.Compare)
+		case *array.Float64:
+			ms.comparisons[i] = newOrderedSorter[float64](e, cmp.Compare)
+		case *array.String:
+			ms.comparisons[i] = newOrderedSorter[string](e, cmp.Compare)
+		case *array.Binary:
+			ms.comparisons[i] = newOrderedSorter[[]byte](e, bytes.Compare)
+		case *array.Timestamp:
+			ms.comparisons[i] = newOrderedSorter[arrow.Timestamp](e, cmp.Compare)
+		case *array.Dictionary:
+			switch elem := e.Dictionary().(type) {
+			case *array.String:
+				ms.comparisons[i] = newOrderedSorter[string](
+					&stringDictionary{
+						dict: e,
+						elem: elem,
+					},
+					cmp.Compare,
+				)
+			case *array.Binary:
+				ms.comparisons[i] = newOrderedSorter[[]byte](
+					&binaryDictionary{
+						dict: e,
+						elem: elem,
+					},
+					bytes.Compare,
+				)
+			case *array.FixedSizeBinary:
+				ms.comparisons[i] = newOrderedSorter[[]byte](
+					&fixedSizeBinaryDictionary{
+						dict: e,
+						elem: elem,
+					},
+					bytes.Compare,
+				)
+			default:
+				ms.Release()
+				return nil, fmt.Errorf("unsupported dictionary column type for sorting %T for column %s", e, r.Schema().Field(col.Index).Name)
+			}
+		default:
+			ms.Release()
+			return nil, fmt.Errorf("unsupported column type for sorting %T for column %s", e, r.Schema().Field(col.Index).Name)
+		}
+	}
+	return ms, nil
+}
+
+func (m *multiColSorter) Reserve(rows, columns int) {
+	m.indices.Reserve(rows)
+	for i := 0; i < rows; i++ {
+		m.indices.Set(i, int32(i))
+	}
+	m.comparisons = slices.Grow(m.comparisons, columns)[:columns]
+	m.directions = slices.Grow(m.directions, columns)[:columns]
+	m.nullsFirst = slices.Grow(m.nullsFirst, columns)[:columns]
+}
+
+func (m *multiColSorter) Reset() {
+	m.indices.Reserve(0)
+	m.comparisons = m.comparisons[:0]
+	m.directions = m.directions[:0]
+	m.nullsFirst = m.nullsFirst[:0]
+}
+
+func (m *multiColSorter) Release() {
+	m.Reset()
+	multiColSorterPool.Put(m)
+}
+
+var multiColSorterPool = &sync.Pool{
+	New: func() any {
+		return &multiColSorter{
+			indices: builder.NewOptInt32Builder(arrow.PrimitiveTypes.Int32),
+		}
+	},
+}
+
+var _ sort.Interface = (*multiColSorter)(nil)
+
+func (m *multiColSorter) Len() int { return m.indices.Len() }
+
+func (m *multiColSorter) Less(i, j int) bool {
+	for idx := range m.comparisons {
+		cmp := m.compare(idx, int(m.indices.Value(i)), int(m.indices.Value(j)))
+		if cmp != 0 {
+			// Use direction to reorder the comparison. Direction determines if the list
+			// is in ascending or descending.
+			//
+			// For instance if comparison between i,j value is -1 and direction is -1
+			// this will resolve to true hence the list will be in ascending order. Same
+			// principle applies for descending.
+			return cmp == m.directions[idx]
+		}
+		// Try comparing the next column
+	}
+	return false
+}
+
+func (m *multiColSorter) compare(idx, i, j int) int {
+	x := m.comparisons[idx]
+	if x.IsNull(i) {
+		if x.IsNull(j) {
+			return 0
+		}
+		if m.directions[idx] == 1 {
+			if m.nullsFirst[idx] {
+				return 1
+			}
+			return -1
+		}
+		if m.nullsFirst[idx] {
+			return -1
+		}
+		return 1
+	}
+	if x.IsNull(j) {
+		if m.directions[idx] == 1 {
+			if m.nullsFirst[idx] {
+				return -1
+			}
+			return 1
+		}
+		if m.nullsFirst[idx] {
+			return 1
+		}
+		return -1
+	}
+	return x.Compare(i, j)
+}
+
+func (m *multiColSorter) Swap(i, j int) {
+	m.indices.Swap(i, j)
+}
+
+type comparator interface {
+	Compare(i, j int) int
+	IsNull(int) bool
+}
+
+type orderedArray[T any] interface {
+	Value(int) T
+	IsNull(int) bool
+}
+
+type orderedSorter[T any] struct {
+	array   orderedArray[T]
+	compare func(T, T) int
+}
+
+func newOrderedSorter[T any](a orderedArray[T], compare func(T, T) int) *orderedSorter[T] {
+	return &orderedSorter[T]{
+		array:   a,
+		compare: compare,
+	}
+}
+
+func (s *orderedSorter[T]) IsNull(i int) bool {
+	return s.array.IsNull(i)
+}
+
+func (s *orderedSorter[T]) Compare(i, j int) int {
+	return s.compare(s.array.Value(i), s.array.Value(j))
+}
+
+type stringDictionary struct {
+	dict *array.Dictionary
+	elem *array.String
+}
+
+func (s *stringDictionary) IsNull(i int) bool {
+	return s.dict.IsNull(i)
+}
+
+func (s *stringDictionary) Value(i int) string {
+	return s.elem.Value(s.dict.GetValueIndex(i))
+}
+
+type binaryDictionary struct {
+	dict *array.Dictionary
+	elem *array.Binary
+}
+
+func (s *binaryDictionary) IsNull(i int) bool {
+	return s.dict.IsNull(i)
+}
+
+func (s *binaryDictionary) Value(i int) []byte {
+	return s.elem.Value(s.dict.GetValueIndex(i))
+}
+
+type fixedSizeBinaryDictionary struct {
+	dict *array.Dictionary
+	elem *array.FixedSizeBinary
+}
+
+func (s *fixedSizeBinaryDictionary) IsNull(i int) bool {
+	return s.dict.IsNull(i)
+}
+
+func (s *fixedSizeBinaryDictionary) Value(i int) []byte {
+	return s.elem.Value(s.dict.GetValueIndex(i))
+}
diff --git a/pqarrow/arrowutils/sort_benchmark_test.go b/pqarrow/arrowutils/sort_benchmark_test.go
new file mode 100644
index 000000000..c91f9bb83
--- /dev/null
+++ b/pqarrow/arrowutils/sort_benchmark_test.go
@@ -0,0 +1,102 @@
+package arrowutils_test
+
+import (
+	"math/rand"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/internal/records"
+	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
+)
+
+func BenchmarkSortRecord(b *testing.B) {
+	b.StopTimer()
+	build := records.NewBuild[Model](memory.NewGoAllocator())
+	err := build.Append(makeModels(40)...)
+	require.NoError(b, err)
+	r := build.NewRecord()
+	b.StartTimer()
+
+	for i := 0; i < b.N; i++ {
+		_, err := arrowutils.SortRecord(r.Record, r.SortingColumns)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+type Model struct {
+	Timestamp  int64  `frostdb:",asc(0)"`
+	Plain      string `frostdb:",asc(1)"`
+	Dictionary string `frostdb:",asc(2)"`
+}
+
+func makeModels(n int) []Model {
+	o := make([]Model, n)
+	plain := makeRandomStrings(n)
+	dict := makeRandomStrings(n)
+	ts := makeRandomInts(n)
+
+	// Simulate when we need to touch multiple columns
+
+	// case 1
+	// first column is equal second column is not
+	span := 4
+	limit := span + 2
+	for i := span; i < n && i < limit; i++ {
+		ts[i] = ts[span]
+	}
+
+	// case 2
+	// both first and second column are equal
+	span = limit + 1
+	limit = span + 4
+	for i := span; i < n && i < limit; i++ {
+		ts[i] = ts[span]
+		plain[i] = plain[span]
+	}
+	// case 3
+	// all three columns are equal
+	span = limit + 1
+	limit = span + 4
+	for i := span; i < n && i < limit; i++ {
+		ts[i] = ts[span]
+		plain[i] = plain[span]
+		dict[i] = dict[span]
+	}
+	for i := range o {
+		o[i] = Model{
+			Timestamp:  ts[i],
+			Plain:      plain[i],
+			Dictionary: dict[i],
+		}
+	}
+	return o
+}
+
+func makeRandomInts(n int) []int64 {
+	r := rand.New(rand.NewSource(42))
+	ints := make([]int64, n)
+	for i := 0; i < n; i++ {
+		ints[i] = r.Int63n(int64(n))
+	}
+	return ints
+}
+
+func makeRandomStrings(n int) []string {
+	r := rand.New(rand.NewSource(42))
+	letters := []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+	ss := make([]string, n)
+	for i := 0; i < n; i++ {
+		var sb strings.Builder
+		slen := 2 + rand.Intn(50)
+		for j := 0; j < slen; j++ {
+			sb.WriteRune(letters[r.Intn(len(letters))])
+		}
+		ss[i] = sb.String()
+	}
+	return ss
+}
diff --git a/pqarrow/arrowutils/sort_test.go b/pqarrow/arrowutils/sort_test.go
new file mode 100644
index 000000000..7b1ca74b7
--- /dev/null
+++ b/pqarrow/arrowutils/sort_test.go
@@ -0,0 +1,857 @@
+package arrowutils
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/compute"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSortRecord(t *testing.T) {
+	null := func(v int64) *int64 {
+		return &v
+	}
+
+	cases := []SortCase{
+		{
+			Name: "must provide at least one column",
+			Samples: Samples{
+				{},
+			},
+			Error: "expected missing column error",
+		},
+
+		{
+			Name:    "No Nows",
+			Samples: Samples{},
+			Columns: []SortingColumn{{Index: 0}},
+		},
+		{
+			Name: "One Row",
+			Samples: Samples{
+				{},
+			},
+			Columns: []SortingColumn{
+				{
+					Index: 0,
+				},
+			},
+			Indices: []int32{0},
+		},
+		{
+			Name: "By Integer column ascending",
+			Samples: Samples{
+				{Int: 3},
+				{Int: 2},
+				{Int: 1},
+			},
+			Columns: []SortingColumn{
+				{Index: 0},
+			},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By Integer column descending",
+			Samples: Samples{
+				{Int: 1},
+				{Int: 2},
+				{Int: 3},
+			},
+
+			Columns: []SortingColumn{
+				{Index: 0, Direction: Descending},
+			},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By Double column ascending",
+			Samples: Samples{
+				{Double: 3},
+				{Double: 2},
+				{Double: 1},
+			},
+			Columns: []SortingColumn{{Index: 1}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By Double column descending",
+			Samples: Samples{
+				{Double: 1},
+				{Double: 2},
+				{Double: 3},
+			},
+			Columns: []SortingColumn{{Index: 1, Direction: Descending}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By String column ascending",
+			Samples: Samples{
+				{String: "3"},
+				{String: "2"},
+				{String: "1"},
+			},
+			Columns: []SortingColumn{{Index: 2}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By String column descending",
+			Samples: Samples{
+				{String: "1"},
+				{String: "2"},
+				{String: "3"},
+			},
+			Columns: []SortingColumn{{Index: 2, Direction: Descending}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By Timestamp column ascending",
+			Samples: Samples{
+				{Timestamp: 3},
+				{Timestamp: 2},
+				{Timestamp: 1},
+			},
+			Columns: []SortingColumn{{Index: 6}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By Timestamp column descending",
+			Samples: Samples{
+				{Timestamp: 1},
+				{Timestamp: 2},
+				{Timestamp: 3},
+			},
+			Columns: []SortingColumn{{Index: 6, Direction: Descending}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By Dict column ascending",
+			Samples: Samples{
+				{Dict: "3"},
+				{Dict: "2"},
+				{Dict: "1"},
+			},
+			Columns: []SortingColumn{{Index: 3}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By Dict column descending",
+			Samples: Samples{
+				{Dict: "1"},
+				{Dict: "2"},
+				{Dict: "3"},
+			},
+			Columns: []SortingColumn{{Index: 3, Direction: Descending}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By DictFixed column ascending",
+			Samples: Samples{
+				{DictFixed: [2]byte{0, 3}},
+				{DictFixed: [2]byte{0, 2}},
+				{DictFixed: [2]byte{0, 1}},
+			},
+			Columns: []SortingColumn{{Index: 4}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By DictFixed column descending",
+			Samples: Samples{
+				{DictFixed: [2]byte{0, 1}},
+				{DictFixed: [2]byte{0, 2}},
+				{DictFixed: [2]byte{0, 3}},
+			},
+			Columns: []SortingColumn{{Index: 4, Direction: Descending}},
+			Indices: []int32{2, 1, 0},
+		},
+		{
+			Name: "By Null column ascending",
+			Samples: Samples{
+				{},
+				{},
+				{Nullable: null(1)},
+			},
+			Columns: []SortingColumn{{Index: 5}},
+			Indices: []int32{2, 0, 1},
+		},
+		{
+			Name: "By Null column ascending nullsFirst",
+			Samples: Samples{
+				{},
+				{},
+				{Nullable: null(1)},
+			},
+			Columns: []SortingColumn{{Index: 5, NullsFirst: true}},
+			Indices: []int32{0, 1, 2},
+		},
+		{
+			Name: "By Null column descending",
+			Samples: Samples{
+				{},
+				{},
+				{Nullable: null(1)},
+			},
+			Columns: []SortingColumn{{Index: 5, Direction: Descending}},
+			Indices: []int32{2, 0, 1},
+		},
+		{
+			Name: "By Null column descending nullsFirst",
+			Samples: Samples{
+				{},
+				{},
+				{Nullable: null(1)},
+			},
+			Columns: []SortingColumn{{Index: 5, Direction: Descending, NullsFirst: true}},
+			Indices: []int32{0, 1, 2},
+		},
+		{
+			Name: "Multiple columns same direction",
+			Samples: Samples{
+				{String: "1", Int: 3},
+				{String: "2", Int: 2},
+				{String: "3", Int: 2},
+				{String: "4", Int: 1},
+			},
+			Columns: []SortingColumn{
+				{Index: 0},
+				{Index: 2},
+			},
+			Indices: []int32{3, 1, 2, 0},
+		},
+		{
+			Name: "Multiple columns different direction",
+			Samples: Samples{
+				{String: "1", Int: 3},
+				{String: "2", Int: 2},
+				{String: "3", Int: 2},
+				{String: "4", Int: 1},
+			},
+			Columns: []SortingColumn{
+				{Index: 0, Direction: Ascending},
+				{Index: 2, Direction: Descending},
+			},
+			Indices: []int32{3, 2, 1, 0},
+		},
+	}
+
+	for _, kase := range cases {
+		t.Run(kase.Name, func(t *testing.T) {
+			sortAndCompare(t, kase)
+		})
+	}
+}
+
+func TestSortRecordBuilderReuse(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+
+	schema := arrow.NewSchema([]arrow.Field{{Name: "int64", Type: arrow.PrimitiveTypes.Int64}}, nil)
+
+	b1 := array.NewInt64Builder(mem)
+	b1.AppendValues([]int64{3, 2, 1}, nil)
+	arr1 := b1.NewArray()
+	r1 := array.NewRecord(schema, []arrow.Array{arr1}, 3)
+
+	ms, err := newMultiColSorter(r1, []SortingColumn{{Index: 0}})
+	require.Nil(t, err)
+	sort.Sort(ms)
+	sortedArr1 := ms.indices.NewArray().(*array.Int32)
+	require.Equal(t, []int32{2, 1, 0}, sortedArr1.Int32Values())
+	ms.Release() // usually defer
+
+	b2 := array.NewInt64Builder(mem)
+	b2.AppendValues([]int64{2, 1}, nil)
+	arr2 := b2.NewArray()
+	r2 := array.NewRecord(schema, []arrow.Array{arr2}, 2)
+
+	ms, err = newMultiColSorter(r2, []SortingColumn{{Index: 0}})
+	require.Nil(t, err)
+	sort.Sort(ms)
+	sortedArr2 := ms.indices.NewArray().(*array.Int32)
+	require.Equal(t, []int32{1, 0}, sortedArr2.Int32Values())
+	ms.Release() // usually defer
+
+	// This failed before the fix because the builder's data was reused.
+	require.Equal(t, []int32{2, 1, 0}, sortedArr1.Int32Values())
+	require.Equal(t, []int32{1, 0}, sortedArr2.Int32Values())
+}
+
+func TestReorderRecord(t *testing.T) {
+	readRunEndEncodedDictionary := func(arr *array.RunEndEncoded) string {
+		arrDict := arr.Values().(*array.Dictionary)
+		arrDictValues := arrDict.Dictionary().(*array.String)
+
+		values := make([]string, arr.Len())
+		for i := 0; i < arr.Len(); i++ {
+			physicalIndex := arr.GetPhysicalIndex(i)
+			if arrDict.IsNull(physicalIndex) {
+				values[i] = array.NullValueStr
+				continue
+			}
+			valueIndex := arrDict.GetValueIndex(physicalIndex)
+			values[i] = arrDictValues.Value(valueIndex)
+		}
+		return "[" + strings.Join(values, " ") + "]"
+	}
+
+	t.Run("Simple", func(t *testing.T) {
+		mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		defer mem.AssertSize(t, 0)
+		b := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "int",
+					Type: arrow.PrimitiveTypes.Int64,
+				},
+			}, nil,
+		))
+		defer b.Release()
+		b.Field(0).(*array.Int64Builder).AppendValues([]int64{3, 2, 1}, nil)
+		r := b.NewRecord()
+		defer r.Release()
+
+		indices := array.NewInt32Builder(mem)
+		indices.AppendValues([]int32{2, 1, 0}, nil)
+		by := indices.NewInt32Array()
+		defer by.Release()
+		result, err := Take(compute.WithAllocator(context.Background(), mem), r, by)
+		require.Nil(t, err)
+		defer result.Release()
+
+		want := []int64{1, 2, 3}
+		require.Equal(t, want, result.Column(0).(*array.Int64).Int64Values())
+	})
+	t.Run("WithStringDict", func(t *testing.T) {
+		mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		defer mem.AssertSize(t, 0)
+		b := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "dict",
+					Type: &arrow.DictionaryType{
+						IndexType: arrow.PrimitiveTypes.Int32,
+						ValueType: arrow.BinaryTypes.String,
+					},
+				},
+			}, nil,
+		))
+		defer b.Release()
+		d := b.Field(0).(*array.BinaryDictionaryBuilder)
+		require.NoError(t, d.AppendString("3"))
+		require.NoError(t, d.AppendString("2"))
+		require.NoError(t, d.AppendString("1"))
+		d.AppendNull()
+		require.NoError(t, d.AppendString("3"))
+		r := b.NewRecord()
+		defer r.Release()
+
+		indices := array.NewInt32Builder(mem)
+		indices.AppendValues([]int32{2, 1, 4, 0, 3}, nil)
+		by := indices.NewInt32Array()
+		defer by.Release()
+		result, err := Take(compute.WithAllocator(context.Background(), mem), r, by)
+		require.NoError(t, err)
+		defer result.Release()
+
+		want := []string{"1", "2", "3", "3", ""}
+		got := result.Column(0).(*array.Dictionary)
+		require.Equal(t, len(want), got.Len())
+		for i, v := range want {
+			if v == "" {
+				require.True(t, got.IsNull(i))
+				continue
+			}
+			require.Equal(t, want[i], got.ValueStr(i))
+		}
+	})
+	t.Run("RunEndEncoded", func(t *testing.T) {
+		mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		defer mem.AssertSize(t, 0)
+
+		b := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "ree",
+					Type: arrow.RunEndEncodedOf(
+						arrow.PrimitiveTypes.Int32,
+						&arrow.DictionaryType{
+							IndexType: arrow.PrimitiveTypes.Uint32,
+							ValueType: arrow.BinaryTypes.String,
+						}),
+				},
+			}, nil,
+		))
+		defer b.Release()
+
+		ree := b.Field(0).(*array.RunEndEncodedBuilder)
+		require.NoError(t, ree.AppendValueFromString("3"))
+		require.NoError(t, ree.AppendValueFromString("2"))
+		require.NoError(t, ree.AppendValueFromString("1"))
+		ree.AppendNull()
+		require.NoError(t, ree.AppendValueFromString("3"))
+		r := b.NewRecord()
+		defer r.Release()
+
+		indices := array.NewInt32Builder(mem)
+		indices.AppendValues([]int32{2, 1, 4, 0, 3}, nil)
+		by := indices.NewInt32Array()
+		defer by.Release()
+
+		// Reordering
+
+		result, err := Take(compute.WithAllocator(context.Background(), mem), r, by)
+		require.NoError(t, err)
+		defer result.Release()
+
+		// Testing
+
+		sorted := result.Column(0).(*array.RunEndEncoded)
+		sortedEnds := sorted.RunEndsArr().(*array.Int32)
+		// notice how the index to 3 is runEndEncoded
+		require.Equal(t, "[1 2 4 5]", sortedEnds.String())
+		require.Equal(t, "[1 2 3 3 (null)]", readRunEndEncodedDictionary(sorted))
+	})
+	t.Run("WithFixedSizeBinaryDict", func(t *testing.T) {
+		mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		defer mem.AssertSize(t, 0)
+		b := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "dict",
+					Type: &arrow.DictionaryType{
+						IndexType: arrow.PrimitiveTypes.Int32,
+						ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 2},
+					},
+				},
+			}, nil,
+		))
+		defer b.Release()
+		d := b.Field(0).(*array.FixedSizeBinaryDictionaryBuilder)
+		require.NoError(t, d.Append([]byte{0, 3}))
+		require.NoError(t, d.Append([]byte{0, 2}))
+		require.NoError(t, d.Append([]byte{0, 1}))
+		d.AppendNull()
+		require.NoError(t, d.Append([]byte{0, 3}))
+		r := b.NewRecord()
+		defer r.Release()
+
+		indices := array.NewInt32Builder(mem)
+		indices.AppendValues([]int32{2, 1, 4, 0, 3}, nil)
+		by := indices.NewInt32Array()
+		defer by.Release()
+		result, err := Take(compute.WithAllocator(context.Background(), mem), r, by)
+		require.NoError(t, err)
+		defer result.Release()
+
+		want := [][]byte{{0, 1}, {0, 2}, {0, 3}, {0, 3}, {}}
+		got := result.Column(0).(*array.Dictionary)
+		require.Equal(t, len(want), got.Len())
+		for i, v := range want {
+			if len(v) == 0 {
+				require.True(t, got.IsNull(i))
+				continue
+			}
+			require.Equal(t, want[i], got.Dictionary().(*array.FixedSizeBinary).Value(got.GetValueIndex(i)))
+		}
+	})
+	t.Run("List", func(t *testing.T) {
+		mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		defer mem.AssertSize(t, 0)
+		b := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "list",
+					Type: arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.BinaryTypes.String}),
+				},
+			}, nil,
+		))
+		defer b.Release()
+		lb := b.Field(0).(*array.ListBuilder)
+		vb := lb.ValueBuilder().(*array.BinaryDictionaryBuilder)
+		lb.Append(true)
+		require.NoError(t, vb.AppendString("1"))
+		require.NoError(t, vb.AppendString("2"))
+		require.NoError(t, vb.AppendString("3"))
+		require.NoError(t, vb.AppendString("1"))
+		lb.Append(false)
+		lb.Append(true)
+		require.NoError(t, vb.AppendString("4"))
+		require.NoError(t, vb.AppendString("5"))
+		require.NoError(t, vb.AppendString("6"))
+		lb.Append(true)
+		require.NoError(t, vb.AppendString("3"))
+		require.NoError(t, vb.AppendString("3"))
+		require.NoError(t, vb.AppendString("3"))
+		require.NoError(t, vb.AppendString("4"))
+		r := b.NewRecord()
+		defer r.Release()
+
+		indices := array.NewInt32Builder(mem)
+		indices.AppendValues([]int32{2, 1, 0, 3}, nil)
+		by := indices.NewInt32Array()
+		defer by.Release()
+		result, err := Take(
+			compute.WithAllocator(context.Background(), mem), r, by)
+		require.Nil(t, err)
+		defer result.Release()
+
+		got := result.Column(0).(*array.List)
+		expected := []string{
+			"[\"4\",\"5\",\"6\"]",
+			"",
+			"[\"1\",\"2\",\"3\",\"1\"]",
+			"[\"3\",\"3\",\"3\",\"4\"]",
+		}
+		require.Equal(t, len(expected), got.Len())
+		for i, v := range expected {
+			if len(v) == 0 {
+				require.True(t, got.IsNull(i), "expected null at %d", i)
+				continue
+			}
+			require.Equal(t, expected[i], got.ValueStr(i), "unexpected value at %d", i)
+		}
+	})
+	t.Run("Struct", func(t *testing.T) {
+		LabelArrowType := arrow.RunEndEncodedOf(
+			arrow.PrimitiveTypes.Int32,
+			&arrow.DictionaryType{
+				IndexType: arrow.PrimitiveTypes.Uint32,
+				ValueType: arrow.BinaryTypes.String,
+			},
+		)
+
+		mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		defer mem.AssertSize(t, 0)
+
+		b := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "struct",
+					Type: arrow.StructOf(
+						arrow.Field{Name: "first", Type: LabelArrowType, Nullable: true},
+						arrow.Field{Name: "second", Type: LabelArrowType, Nullable: true},
+						arrow.Field{Name: "third", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+					),
+				},
+			}, &arrow.Metadata{},
+		))
+		defer b.Release()
+
+		sb := b.Field(0).(*array.StructBuilder)
+		firstFieldBuilder := sb.FieldBuilder(0).(*array.RunEndEncodedBuilder)
+		secondFieldBuilder := sb.FieldBuilder(1).(*array.RunEndEncodedBuilder)
+		thirdFieldBuilder := sb.FieldBuilder(2).(*array.Int64Builder)
+
+		sb.Append(true)
+		require.NoError(t, firstFieldBuilder.AppendValueFromString("3"))
+		require.NoError(t, secondFieldBuilder.AppendValueFromString("1"))
+		thirdFieldBuilder.Append(1)
+		sb.Append(true)
+		require.NoError(t, firstFieldBuilder.AppendValueFromString("2"))
+		require.NoError(t, secondFieldBuilder.AppendValueFromString("2"))
+		thirdFieldBuilder.Append(2)
+		sb.Append(true)
+		require.NoError(t, firstFieldBuilder.AppendValueFromString("1"))
+		require.NoError(t, secondFieldBuilder.AppendValueFromString("3"))
+		thirdFieldBuilder.Append(3)
+		sb.Append(true)
+		firstFieldBuilder.AppendNull()
+		require.NoError(t, secondFieldBuilder.AppendValueFromString("4"))
+		thirdFieldBuilder.Append(4)
+		sb.Append(true)
+		require.NoError(t, firstFieldBuilder.AppendValueFromString("3"))
+		require.NoError(t, secondFieldBuilder.AppendValueFromString("5"))
+		thirdFieldBuilder.Append(5)
+
+		r := b.NewRecord()
+		defer r.Release()
+
+		indices := array.NewInt32Builder(mem)
+		indices.AppendValues([]int32{2, 1, 4, 0, 3}, nil)
+		by := indices.NewInt32Array()
+		defer by.Release()
+		result, err := Take(compute.WithAllocator(context.Background(), mem), r, by)
+		require.Nil(t, err)
+		defer result.Release()
+		resultStruct := result.Column(0).(*array.Struct)
+
+		require.Equal(t, "[1 2 3 3 (null)]", readRunEndEncodedDictionary(resultStruct.Field(0).(*array.RunEndEncoded)))
+		require.Equal(t, "[3 2 5 1 4]", readRunEndEncodedDictionary(resultStruct.Field(1).(*array.RunEndEncoded)))
+		require.Equal(t, "[3 2 5 1 4]", resultStruct.Field(2).(*array.Int64).String())
+	})
+	t.Run("ListStruct", func(t *testing.T) {
+		mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		defer mem.AssertSize(t, 0)
+
+		b := array.NewRecordBuilder(mem, arrow.NewSchema([]arrow.Field{
+			{Name: "list", Type: arrow.ListOf(arrow.StructOf([]arrow.Field{
+				{Name: "int64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+				{Name: "uint64", Type: arrow.PrimitiveTypes.Uint64, Nullable: true},
+			}...))},
+		}, nil))
+		defer b.Release()
+
+		lb := b.Field(0).(*array.ListBuilder)
+		sb := lb.ValueBuilder().(*array.StructBuilder)
+		int64b := sb.FieldBuilder(0).(*array.Int64Builder)
+		uint64b := sb.FieldBuilder(1).(*array.Uint64Builder)
+
+		lb.Append(true)
+		sb.Append(true)
+		int64b.Append(1)
+		uint64b.Append(2)
+		sb.Append(true)
+		int64b.Append(3)
+		uint64b.Append(4)
+
+		lb.Append(true)
+		sb.Append(true)
+		int64b.Append(5)
+		uint64b.Append(6)
+
+		lb.Append(true)
+		sb.Append(true)
+		int64b.Append(7)
+		uint64b.Append(8)
+		sb.Append(true)
+		int64b.Append(9)
+		uint64b.Append(10)
+
+		r := b.NewRecord()
+		defer r.Release()
+
+		indices := array.NewInt32Builder(mem)
+		indices.AppendValues([]int32{2, 1, 0}, nil)
+		defer indices.Release()
+		by := indices.NewInt32Array()
+		defer by.Release()
+
+		result, err := Take(compute.WithAllocator(context.Background(), mem), r, by)
+		require.Nil(t, err)
+		defer result.Release()
+
+		require.Equal(t, `[{[7 9] [8 10]} {[5] [6]} {[1 3] [2 4]}]`, result.Column(0).String())
+	})
+	t.Run("StructEmpty", func(t *testing.T) {
+		mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		defer mem.AssertSize(t, 0)
+
+		b := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "struct",
+					Type: arrow.StructOf(),
+				},
+			}, &arrow.Metadata{},
+		))
+		defer b.Release()
+		b.Field(0).AppendNulls(5)
+
+		r := b.NewRecord()
+		defer r.Release()
+
+		indices := array.NewInt32Builder(mem)
+		indices.AppendValues([]int32{2, 1, 4, 0, 3}, nil)
+		by := indices.NewInt32Array()
+		defer by.Release()
+
+		result, err := Take(compute.WithAllocator(context.Background(), mem), r, by)
+		require.Nil(t, err)
+		defer result.Release()
+		resultStruct := result.Column(0).(*array.Struct)
+		resultStruct.Len()
+	})
+}
+
+// Use all supported sort field.
+type Sample struct {
+	Int       int64
+	Double    float64
+	String    string
+	Dict      string
+	DictFixed [2]byte
+	Nullable  *int64
+	Timestamp arrow.Timestamp
+}
+
+type Samples []Sample
+
+func (s Samples) Record() arrow.Record {
+	b := array.NewRecordBuilder(memory.NewGoAllocator(),
+		arrow.NewSchema([]arrow.Field{
+			{
+				Name: "int",
+				Type: arrow.PrimitiveTypes.Int64,
+			},
+			{
+				Name: "double",
+				Type: arrow.PrimitiveTypes.Float64,
+			},
+			{
+				Name: "string",
+				Type: arrow.BinaryTypes.String,
+			},
+			{
+				Name: "dict",
+				Type: &arrow.DictionaryType{
+					IndexType: arrow.PrimitiveTypes.Int32,
+					ValueType: arrow.BinaryTypes.String,
+				},
+			},
+			{
+				Name: "dictFixed",
+				Type: &arrow.DictionaryType{
+					IndexType: arrow.PrimitiveTypes.Int32,
+					ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 2},
+				},
+			},
+			{
+				Name:     "nullable",
+				Type:     arrow.PrimitiveTypes.Int64,
+				Nullable: true,
+			},
+			{
+				Name:     "timestamp",
+				Type:     &arrow.TimestampType{},
+				Nullable: true,
+			},
+		}, nil),
+	)
+
+	fInt := b.Field(0).(*array.Int64Builder)
+	fDouble := b.Field(1).(*array.Float64Builder)
+	fString := b.Field(2).(*array.StringBuilder)
+	fBinaryDict := b.Field(3).(*array.BinaryDictionaryBuilder)
+	fFixedDict := b.Field(4).(*array.FixedSizeBinaryDictionaryBuilder)
+	fNullable := b.Field(5).(*array.Int64Builder)
+	fTimestamp := b.Field(6).(*array.TimestampBuilder)
+
+	for _, v := range s {
+		fInt.Append(v.Int)
+		fDouble.Append(v.Double)
+		fString.Append(v.String)
+		if v.Timestamp == 0 {
+			fTimestamp.AppendNull()
+		} else {
+			fTimestamp.Append(v.Timestamp)
+		}
+		_ = fBinaryDict.AppendString(v.Dict)
+		_ = fFixedDict.Append(v.DictFixed[:])
+		if v.Nullable != nil {
+			fNullable.Append(*v.Nullable)
+		} else {
+			fNullable.AppendNull()
+		}
+	}
+	return b.NewRecord()
+}
+
+type SortCase struct {
+	Name    string
+	Samples Samples
+	Columns []SortingColumn
+	Indices []int32
+	Error   string
+}
+
+func sortAndCompare(t *testing.T, kase SortCase) {
+	t.Helper()
+
+	got, err := SortRecord(kase.Samples.Record(), kase.Columns)
+	if kase.Error != "" {
+		require.NotNil(t, err, kase.Error)
+		return
+	}
+	defer got.Release()
+
+	require.Equal(t, kase.Indices, got.Int32Values())
+}
+
+func BenchmarkTake(b *testing.B) {
+	const (
+		numRows            = 1024
+		numValsPerListElem = 4
+	)
+	mem := memory.NewGoAllocator()
+	b.Run("Dict", func(b *testing.B) {
+		rb := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "dict",
+					Type: &arrow.DictionaryType{
+						IndexType: arrow.PrimitiveTypes.Int32,
+						ValueType: arrow.BinaryTypes.Binary,
+					},
+				},
+			}, nil,
+		))
+		defer rb.Release()
+		d := rb.Field(0).(*array.BinaryDictionaryBuilder)
+		for i := 0; i < numRows; i++ {
+			// Interesting to benchmark with a string that appears every other row.
+			// i.e. only one entry in the dict.
+			require.NoError(b, d.AppendString("appearseveryotherrow"))
+			require.NoError(b, d.AppendString(fmt.Sprintf("%d", i)))
+		}
+		r := rb.NewRecord()
+		indices := array.NewInt32Builder(mem)
+		for i := r.NumRows() - 1; i > 0; i-- {
+			indices.Append(int32(i))
+		}
+		ctx := compute.WithAllocator(context.Background(), mem)
+		indArr := indices.NewInt32Array()
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			if _, err := Take(ctx, r, indArr); err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+
+	b.Run("List", func(b *testing.B) {
+		listb := array.NewRecordBuilder(mem, arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name: "list",
+					Type: arrow.ListOf(
+						&arrow.DictionaryType{
+							IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.BinaryTypes.Binary,
+						},
+					),
+				},
+			}, nil,
+		))
+		defer listb.Release()
+
+		l := listb.Field(0).(*array.ListBuilder)
+		vb := l.ValueBuilder().(*array.BinaryDictionaryBuilder)
+		for i := 0; i < numRows; i++ {
+			l.Append(true)
+			for j := 0; j < numValsPerListElem-1; j++ {
+				require.NoError(b, vb.AppendString(fmt.Sprintf("%d", i)))
+			}
+			require.NoError(b, vb.AppendString("appearseveryrow"))
+		}
+
+		r := listb.NewRecord()
+		indices := array.NewInt32Builder(mem)
+		for i := numRows - 1; i > 0; i-- {
+			indices.Append(int32(i))
+		}
+		ctx := compute.WithAllocator(context.Background(), mem)
+		indArr := indices.NewInt32Array()
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			if _, err := Take(ctx, r, indArr); err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+}
diff --git a/pqarrow/arrowutils/utils.go b/pqarrow/arrowutils/utils.go
index b3b000493..53c644318 100644
--- a/pqarrow/arrowutils/utils.go
+++ b/pqarrow/arrowutils/utils.go
@@ -1,43 +1,11 @@
 package arrowutils
 
 import (
-	"fmt"
-
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 )
 
-// GetValue returns the value at index i in arr. If the value is null, nil is
-// returned.
-func GetValue(arr arrow.Array, i int) (any, error) {
-	if arr.IsNull(i) {
-		return nil, nil
-	}
-
-	switch a := arr.(type) {
-	case *array.Binary:
-		return a.Value(i), nil
-	case *array.FixedSizeBinary:
-		return a.Value(i), nil
-	case *array.String:
-		return a.Value(i), nil
-	case *array.Int64:
-		return a.Value(i), nil
-	case *array.Boolean:
-		return a.Value(i), nil
-	case *array.Dictionary:
-		switch dict := a.Dictionary().(type) {
-		case *array.Binary:
-			return dict.Value(a.GetValueIndex(i)), nil
-		default:
-			return nil, fmt.Errorf("unsupported dictionary type for GetValue %T", dict)
-		}
-	default:
-		return nil, fmt.Errorf("unsupported type for GetValue %T", a)
-	}
-}
-
 // ArrayConcatenator is an object that helps callers keep track of a slice of
 // arrays and concatenate them into a single one when needed. This is more
 // efficient and memory safe than using a builder.
@@ -61,3 +29,10 @@ func (c *ArrayConcatenator) NewArray(mem memory.Allocator) (arrow.Array, error)
 func (c *ArrayConcatenator) Len() int {
 	return len(c.arrs)
 }
+
+func (c *ArrayConcatenator) Release() {
+	for _, arr := range c.arrs {
+		arr.Release()
+	}
+	c.arrs = c.arrs[:0]
+}
diff --git a/pqarrow/builder/listbuilder.go b/pqarrow/builder/listbuilder.go
index fe8334993..36617eaf0 100644
--- a/pqarrow/builder/listbuilder.go
+++ b/pqarrow/builder/listbuilder.go
@@ -21,10 +21,10 @@ package builder
 import (
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/bitutil"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/bitutil"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 )
 
 const (
@@ -150,13 +150,6 @@ func (b *builder) unsafeSetValid(length int) {
 	b.length = newLength
 }
 
-func min(a, b int) int {
-	if a < b {
-		return a
-	}
-	return b
-}
-
 // ListBuilder is a wrapper over an array.ListBuilder that uses ColumnBuilder as a values buffer.
 type ListBuilder struct {
 	builder
@@ -208,7 +201,7 @@ func (b *ListBuilder) AppendNull() {
 func (b *ListBuilder) AppendValues(offsets []int32, valid []bool) {
 	b.Reserve(len(valid))
 	b.offsets.AppendValues(offsets, nil)
-	b.builder.unsafeAppendBoolsToBitmap(valid, len(valid))
+	b.unsafeAppendBoolsToBitmap(valid, len(valid))
 }
 
 func (b *ListBuilder) unsafeAppendBoolToBitmap(isValid bool) {
@@ -228,7 +221,7 @@ func (b *ListBuilder) init(capacity int) {
 // Reserve ensures there is enough space for appending n elements
 // by checking the capacity and calling Resize if necessary.
 func (b *ListBuilder) Reserve(n int) {
-	b.builder.reserve(n, b.resizeHelper)
+	b.reserve(n, b.resizeHelper)
 	b.offsets.Reserve(n)
 }
 
@@ -247,7 +240,7 @@ func (b *ListBuilder) resizeHelper(n int) {
 	if b.capacity == 0 {
 		b.init(n)
 	} else {
-		b.builder.resize(n, b.builder.init)
+		b.resize(n, b.builder.init)
 	}
 }
 
diff --git a/pqarrow/builder/optbuilders.go b/pqarrow/builder/optbuilders.go
index d9d71e0e4..7144bd328 100644
--- a/pqarrow/builder/optbuilders.go
+++ b/pqarrow/builder/optbuilders.go
@@ -1,15 +1,17 @@
 package builder
 
 import (
-	"reflect"
+	"fmt"
+	"math"
+	"slices"
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/bitutil"
-	"github.com/apache/arrow/go/v10/arrow/memory"
-	"github.com/segmentio/parquet-go"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/bitutil"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/parquet-go/parquet-go"
 )
 
 // ColumnBuilder is a subset of the array.Builder interface implemented by the
@@ -28,7 +30,10 @@ type OptimizedBuilder interface {
 	ColumnBuilder
 	AppendNulls(int)
 	ResetToLength(int)
-	RepeatLastValue(int)
+	RepeatLastValue(int) error
+	IsNull(i int) bool
+	IsValid(i int) bool
+	SetNull(i int)
 }
 
 type builderBase struct {
@@ -72,6 +77,15 @@ func (b *builderBase) AppendNulls(n int) {
 	b.length += n
 }
 
+// SetNull is setting the value at the index i to null.
+func (b *builderBase) SetNull(i int) {
+	bitutil.ClearBit(b.validityBitmap, i)
+}
+
+func (b *builderBase) IsValid(n int) bool {
+	return bitutil.BitIsSet(b.validityBitmap, n)
+}
+
 // appendValid does the opposite of appendNulls.
 func (b *builderBase) appendValid(n int) {
 	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length+n)
@@ -79,6 +93,10 @@ func (b *builderBase) appendValid(n int) {
 	b.length += n
 }
 
+func (b *builderBase) IsNull(n int) bool {
+	return bitutil.BitIsNotSet(b.validityBitmap, n)
+}
+
 func resizeBitmap(bitmap []byte, valuesToRepresent int) []byte {
 	bytesNeeded := int(bitutil.BytesForBits(int64(valuesToRepresent)))
 	if cap(bitmap) < bytesNeeded {
@@ -93,6 +111,7 @@ var (
 	_ OptimizedBuilder = (*OptBinaryBuilder)(nil)
 	_ OptimizedBuilder = (*OptInt64Builder)(nil)
 	_ OptimizedBuilder = (*OptBooleanBuilder)(nil)
+	_ OptimizedBuilder = (*OptFloat64Builder)(nil)
 )
 
 // OptBinaryBuilder is an optimized array.BinaryBuilder.
@@ -132,6 +151,13 @@ func (b *OptBinaryBuilder) AppendNull() {
 	b.builderBase.AppendNulls(1)
 }
 
+// AppendEmptyValue adds a new empty byte slice to the array being built.
+func (b *OptBinaryBuilder) AppendEmptyValue() {
+	b.offsets = append(b.offsets, uint32(len(b.data)))
+	// Don't append any data, just close the offset for an empty slice
+	b.appendValid(1)
+}
+
 // AppendNulls appends n null values to the array being built. This is specific
 // to distinct optimizations in FrostDB.
 func (b *OptBinaryBuilder) AppendNulls(n int) {
@@ -146,14 +172,7 @@ func (b *OptBinaryBuilder) AppendNulls(n int) {
 // a new array.
 func (b *OptBinaryBuilder) NewArray() arrow.Array {
 	b.offsets = append(b.offsets, uint32(len(b.data)))
-	var offsetsAsBytes []byte
-
-	fromHeader := (*reflect.SliceHeader)(unsafe.Pointer(&b.offsets))
-	toHeader := (*reflect.SliceHeader)(unsafe.Pointer(&offsetsAsBytes))
-	toHeader.Data = fromHeader.Data
-	toHeader.Len = fromHeader.Len * arrow.Uint32SizeBytes
-	toHeader.Cap = fromHeader.Cap * arrow.Uint32SizeBytes
-
+	offsetsAsBytes := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(b.offsets))), len(b.offsets)*arrow.Uint32SizeBytes)
 	data := array.NewData(
 		b.dtype,
 		b.length,
@@ -168,13 +187,20 @@ func (b *OptBinaryBuilder) NewArray() arrow.Array {
 	)
 	b.reset()
 	b.offsets = b.offsets[:0]
-	b.data = b.data[:0]
+	b.data = nil
+
 	return array.NewBinaryData(data)
 }
 
+var ErrMaxSizeReached = fmt.Errorf("max size reached")
+
 // AppendData appends a flat slice of bytes to the builder, with an accompanying
 // slice of offsets. This data is considered to be non-null.
-func (b *OptBinaryBuilder) AppendData(data []byte, offsets []uint32) {
+func (b *OptBinaryBuilder) AppendData(data []byte, offsets []uint32) error {
+	if len(b.data)+len(data) > math.MaxInt32 { // NOTE: we check against a max int32 here (instead of the uint32 that we're using for offsets) because the arror binary arrays use int32s.
+		return ErrMaxSizeReached
+	}
+
 	// Trim the last offset since we want this last range to be "open".
 	offsets = offsets[:len(offsets)-1]
 
@@ -189,20 +215,33 @@ func (b *OptBinaryBuilder) AppendData(data []byte, offsets []uint32) {
 	b.length += len(offsets)
 	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length)
 	bitutil.SetBitsTo(b.validityBitmap, int64(startOffset), int64(len(offsets)), true)
+	return nil
 }
 
-func (b *OptBinaryBuilder) Append(v []byte) {
+func (b *OptBinaryBuilder) Append(v []byte) error {
+	if len(b.data)+len(v) > math.MaxInt32 {
+		return ErrMaxSizeReached
+	}
 	b.offsets = append(b.offsets, uint32(len(b.data)))
 	b.data = append(b.data, v...)
 	b.length++
 	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length)
 	bitutil.SetBit(b.validityBitmap, b.length-1)
+	return nil
 }
 
 // AppendParquetValues appends the given parquet values to the builder. The
 // values may be null, but if it is known upfront that none of the values are
 // null, AppendData offers a more efficient way of appending values.
-func (b *OptBinaryBuilder) AppendParquetValues(values []parquet.Value) {
+func (b *OptBinaryBuilder) AppendParquetValues(values []parquet.Value) error {
+	size := 0
+	for i := range values {
+		size += len(values[i].ByteArray())
+	}
+	if len(b.data)+size > math.MaxInt32 {
+		return ErrMaxSizeReached
+	}
+
 	for i := range values {
 		b.offsets = append(b.offsets, uint32(len(b.data)))
 		b.data = append(b.data, values[i].ByteArray()...)
@@ -215,22 +254,28 @@ func (b *OptBinaryBuilder) AppendParquetValues(values []parquet.Value) {
 	for i := range values {
 		bitutil.SetBitTo(b.validityBitmap, oldLength+i, !values[i].IsNull())
 	}
+
+	return nil
 }
 
 // RepeatLastValue is specific to distinct optimizations in FrostDB.
-func (b *OptBinaryBuilder) RepeatLastValue(n int) {
+func (b *OptBinaryBuilder) RepeatLastValue(n int) error {
 	if bitutil.BitIsNotSet(b.validityBitmap, b.length-1) {
 		// Last value is null.
 		b.AppendNulls(n)
-		return
+		return nil
 	}
 
 	lastValue := b.data[b.offsets[len(b.offsets)-1]:]
+	if len(b.data)+(len(lastValue)*n) > math.MaxInt32 {
+		return ErrMaxSizeReached
+	}
 	for i := 0; i < n; i++ {
 		b.offsets = append(b.offsets, uint32(len(b.data)))
 		b.data = append(b.data, lastValue...)
 	}
 	b.appendValid(n)
+	return nil
 }
 
 // ResetToLength is specific to distinct optimizations in FrostDB.
@@ -245,6 +290,13 @@ func (b *OptBinaryBuilder) ResetToLength(n int) {
 	b.validityBitmap = resizeBitmap(b.validityBitmap, n)
 }
 
+func (b *OptBinaryBuilder) Value(i int) []byte {
+	if i == b.length-1 { // last value
+		return b.data[b.offsets[i]:]
+	}
+	return b.data[b.offsets[i]:b.offsets[i+1]]
+}
+
 type OptInt64Builder struct {
 	builderBase
 
@@ -277,20 +329,18 @@ func (b *OptInt64Builder) AppendNull() {
 	b.AppendNulls(1)
 }
 
+// AppendEmptyValue adds a new zero value (0) to the array being built.
+func (b *OptInt64Builder) AppendEmptyValue() {
+	b.Append(0)
+}
+
 func (b *OptInt64Builder) AppendNulls(n int) {
 	b.resizeData(b.length + n)
 	b.builderBase.AppendNulls(n)
 }
 
 func (b *OptInt64Builder) NewArray() arrow.Array {
-	var dataAsBytes []byte
-
-	fromHeader := (*reflect.SliceHeader)(unsafe.Pointer(&b.data))
-	toHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dataAsBytes))
-	toHeader.Data = fromHeader.Data
-	toHeader.Len = fromHeader.Len * arrow.Int64SizeBytes
-	toHeader.Cap = fromHeader.Cap * arrow.Int64SizeBytes
-
+	dataAsBytes := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(b.data))), len(b.data)*arrow.Int64SizeBytes)
 	data := array.NewData(
 		b.dtype,
 		b.length,
@@ -303,7 +353,7 @@ func (b *OptInt64Builder) NewArray() arrow.Array {
 		0,
 	)
 	b.reset()
-	b.data = b.data[:0]
+	b.data = nil
 	return array.NewInt64Data(data)
 }
 
@@ -324,6 +374,19 @@ func (b *OptInt64Builder) Append(v int64) {
 	bitutil.SetBit(b.validityBitmap, b.length-1)
 }
 
+func (b *OptInt64Builder) Set(i int, v int64) {
+	b.data[i] = v
+}
+
+func (b *OptInt64Builder) Add(i int, v int64) {
+	b.data[i] += v
+}
+
+// Value returns the ith value of the builder.
+func (b *OptInt64Builder) Value(i int) int64 {
+	return b.data[i]
+}
+
 func (b *OptInt64Builder) AppendParquetValues(values []parquet.Value) {
 	b.resizeData(b.length + len(values))
 	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length+len(values))
@@ -336,10 +399,10 @@ func (b *OptInt64Builder) AppendParquetValues(values []parquet.Value) {
 	b.length += len(values)
 }
 
-func (b *OptInt64Builder) RepeatLastValue(n int) {
+func (b *OptInt64Builder) RepeatLastValue(n int) error {
 	if bitutil.BitIsNotSet(b.validityBitmap, b.length-1) {
 		b.AppendNulls(n)
-		return
+		return nil
 	}
 
 	lastValue := b.data[b.length-1]
@@ -348,6 +411,7 @@ func (b *OptInt64Builder) RepeatLastValue(n int) {
 		b.data[i] = lastValue
 	}
 	b.appendValid(n)
+	return nil
 }
 
 // ResetToLength is specific to distinct optimizations in FrostDB.
@@ -383,6 +447,11 @@ func (b *OptBooleanBuilder) AppendNull() {
 	b.AppendNulls(1)
 }
 
+// AppendEmptyValue adds a new zero value (false) to the array being built.
+func (b *OptBooleanBuilder) AppendEmptyValue() {
+	b.AppendSingle(false)
+}
+
 func (b *OptBooleanBuilder) AppendNulls(n int) {
 	v := b.length + n
 	b.data = resizeBitmap(b.data, v)
@@ -408,7 +477,7 @@ func (b *OptBooleanBuilder) NewArray() arrow.Array {
 		0,
 	)
 	b.reset()
-	b.data = b.data[:0]
+	b.data = nil
 	array := array.NewBooleanData(data)
 	return array
 }
@@ -426,7 +495,15 @@ func (b *OptBooleanBuilder) Append(data []byte, valid int) {
 	}
 }
 
-func (b *OptBooleanBuilder) AppendData(data []byte) {
+func (b *OptBooleanBuilder) Set(i int, v bool) {
+	bitutil.SetBitTo(b.data, i, v)
+}
+
+func (b *OptBooleanBuilder) Value(i int) bool {
+	return bitutil.BitIsSet(b.data, i)
+}
+
+func (b *OptBooleanBuilder) AppendData(_ []byte) {
 	panic("do not use AppendData for opt boolean builder, use Append instead")
 }
 
@@ -450,16 +527,17 @@ func (b *OptBooleanBuilder) AppendSingle(v bool) {
 	bitutil.SetBit(b.validityBitmap, b.length-1)
 }
 
-func (b *OptBooleanBuilder) RepeatLastValue(n int) {
+func (b *OptBooleanBuilder) RepeatLastValue(n int) error {
 	if bitutil.BitIsNotSet(b.validityBitmap, b.length-1) {
 		b.AppendNulls(n)
-		return
+		return nil
 	}
 
 	lastValue := bitutil.BitIsSet(b.data, b.length-1)
 	b.data = resizeBitmap(b.data, b.length+n)
 	bitutil.SetBitsTo(b.data, int64(b.length), int64(n), lastValue)
 	b.appendValid(n)
+	return nil
 }
 
 // ResetToLength is specific to distinct optimizations in FrostDB.
@@ -472,3 +550,272 @@ func (b *OptBooleanBuilder) ResetToLength(n int) {
 	b.data = resizeBitmap(b.data, n)
 	b.validityBitmap = resizeBitmap(b.validityBitmap, n)
 }
+
+type OptInt32Builder struct {
+	builderBase
+
+	data []int32
+}
+
+func NewOptInt32Builder(dtype arrow.DataType) *OptInt32Builder {
+	b := &OptInt32Builder{}
+	b.dtype = dtype
+	return b
+}
+
+func (b *OptInt32Builder) resizeData(neededLength int) {
+	if cap(b.data) < neededLength {
+		oldData := b.data
+		b.data = make([]int32, bitutil.NextPowerOf2(neededLength))
+		copy(b.data, oldData)
+	}
+	b.data = b.data[:neededLength]
+}
+
+func (b *OptInt32Builder) Release() {
+	if atomic.AddInt64(&b.refCount, -1) == 0 {
+		b.data = nil
+		b.releaseInternal()
+	}
+}
+
+func (b *OptInt32Builder) AppendNull() {
+	b.AppendNulls(1)
+}
+
+// AppendEmptyValue adds a new zero value (0) to the array being built.
+func (b *OptInt32Builder) AppendEmptyValue() {
+	b.Append(0)
+}
+
+func (b *OptInt32Builder) AppendNulls(n int) {
+	b.resizeData(b.length + n)
+	b.builderBase.AppendNulls(n)
+}
+
+func (b *OptInt32Builder) NewArray() arrow.Array {
+	dataAsBytes := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(b.data))), len(b.data)*arrow.Int32SizeBytes)
+	data := array.NewData(
+		b.dtype,
+		b.length,
+		[]*memory.Buffer{
+			memory.NewBufferBytes(b.validityBitmap),
+			memory.NewBufferBytes(dataAsBytes),
+		},
+		nil,
+		b.length-bitutil.CountSetBits(b.validityBitmap, 0, b.length),
+		0,
+	)
+	b.reset()
+	b.data = nil
+	return array.NewInt32Data(data)
+}
+
+// AppendData appends a slice of int32s to the builder. This data is considered
+// to be non-null.
+func (b *OptInt32Builder) AppendData(data []int32) {
+	oldLength := b.length
+	b.data = append(b.data, data...)
+	b.length += len(data)
+	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length)
+	bitutil.SetBitsTo(b.validityBitmap, int64(oldLength), int64(len(data)), true)
+}
+
+func (b *OptInt32Builder) Append(v int32) {
+	b.data = append(b.data, v)
+	b.length++
+	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length)
+	bitutil.SetBit(b.validityBitmap, b.length-1)
+}
+
+// Set sets value v at index i. THis will panic if i is out of bounds. Use this
+// after calling Reserve.
+func (b *OptInt32Builder) Set(i int, v int32) {
+	b.data[i] = v
+	bitutil.SetBit(b.validityBitmap, i)
+}
+
+// Swap swaps values at i and j index.
+func (b *OptInt32Builder) Swap(i, j int) {
+	b.data[i], b.data[j] = b.data[j], b.data[i]
+}
+
+func (b *OptInt32Builder) Add(i int, v int32) {
+	b.data[i] += v
+}
+
+func (b *OptInt32Builder) Value(i int) int32 {
+	return b.data[i]
+}
+
+func (b *OptInt32Builder) AppendParquetValues(values []parquet.Value) {
+	b.resizeData(b.length + len(values))
+	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length+len(values))
+	for i, j := b.length, 0; i < b.length+len(values) && j < len(values); {
+		b.data[i] = values[j].Int32()
+		bitutil.SetBitTo(b.validityBitmap, i, !values[j].IsNull())
+		i++
+		j++
+	}
+	b.length += len(values)
+}
+
+func (b *OptInt32Builder) RepeatLastValue(n int) error {
+	if bitutil.BitIsNotSet(b.validityBitmap, b.length-1) {
+		b.AppendNulls(n)
+		return nil
+	}
+
+	lastValue := b.data[b.length-1]
+	b.resizeData(b.length + n)
+	for i := b.length; i < b.length+n; i++ {
+		b.data[i] = lastValue
+	}
+	b.appendValid(n)
+	return nil
+}
+
+// ResetToLength is specific to distinct optimizations in FrostDB.
+func (b *OptInt32Builder) ResetToLength(n int) {
+	if n == b.length {
+		return
+	}
+
+	b.length = n
+	b.data = b.data[:n]
+	b.validityBitmap = resizeBitmap(b.validityBitmap, n)
+}
+
+func (b *OptInt32Builder) Reserve(n int) {
+	b.length = n
+	b.data = slices.Grow(b.data, n)[:n]
+	b.validityBitmap = resizeBitmap(b.validityBitmap, n)
+}
+
+type OptFloat64Builder struct {
+	builderBase
+
+	data []float64
+}
+
+func NewOptFloat64Builder(dtype arrow.DataType) *OptFloat64Builder {
+	b := &OptFloat64Builder{}
+	b.dtype = dtype
+	return b
+}
+
+func (b *OptFloat64Builder) resizeData(neededLength int) {
+	if cap(b.data) < neededLength {
+		oldData := b.data
+		b.data = make([]float64, bitutil.NextPowerOf2(neededLength))
+		copy(b.data, oldData)
+	}
+	b.data = b.data[:neededLength]
+}
+
+func (b *OptFloat64Builder) Release() {
+	if atomic.AddInt64(&b.refCount, -1) == 0 {
+		b.data = nil
+		b.releaseInternal()
+	}
+}
+
+func (b *OptFloat64Builder) AppendNull() {
+	b.AppendNulls(1)
+}
+
+// AppendEmptyValue adds a new zero value (0.0) to the array being built.
+func (b *OptFloat64Builder) AppendEmptyValue() {
+	b.Append(0.0)
+}
+
+func (b *OptFloat64Builder) AppendNulls(n int) {
+	b.resizeData(b.length + n)
+	b.builderBase.AppendNulls(n)
+}
+
+func (b *OptFloat64Builder) NewArray() arrow.Array {
+	dataAsBytes := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(b.data))), len(b.data)*arrow.Float64SizeBytes)
+	data := array.NewData(
+		b.dtype,
+		b.length,
+		[]*memory.Buffer{
+			memory.NewBufferBytes(b.validityBitmap),
+			memory.NewBufferBytes(dataAsBytes),
+		},
+		nil,
+		b.length-bitutil.CountSetBits(b.validityBitmap, 0, b.length),
+		0,
+	)
+	b.reset()
+	b.data = nil
+	return array.NewFloat64Data(data)
+}
+
+// AppendData appends a slice of float64s to the builder.
+// This data is considered to be non-null.
+func (b *OptFloat64Builder) AppendData(data []float64) {
+	oldLength := b.length
+	b.data = append(b.data, data...)
+	b.length += len(data)
+	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length)
+	bitutil.SetBitsTo(b.validityBitmap, int64(oldLength), int64(len(data)), true)
+}
+
+func (b *OptFloat64Builder) Append(v float64) {
+	b.data = append(b.data, v)
+	b.length++
+	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length)
+	bitutil.SetBit(b.validityBitmap, b.length-1)
+}
+
+func (b *OptFloat64Builder) Set(i int, v float64) {
+	b.data[i] = v
+}
+
+func (b *OptFloat64Builder) Add(i int, v float64) {
+	b.data[i] += v
+}
+
+// Value returns the ith value of the builder.
+func (b *OptFloat64Builder) Value(i int) float64 {
+	return b.data[i]
+}
+
+func (b *OptFloat64Builder) AppendParquetValues(values []parquet.Value) {
+	b.resizeData(b.length + len(values))
+	b.validityBitmap = resizeBitmap(b.validityBitmap, b.length+len(values))
+	for i, j := b.length, 0; i < b.length+len(values) && j < len(values); {
+		b.data[i] = values[j].Double()
+		bitutil.SetBitTo(b.validityBitmap, i, !values[j].IsNull())
+		i++
+		j++
+	}
+	b.length += len(values)
+}
+
+func (b *OptFloat64Builder) RepeatLastValue(n int) error {
+	if bitutil.BitIsNotSet(b.validityBitmap, b.length-1) {
+		b.AppendNulls(n)
+		return nil
+	}
+
+	lastValue := b.data[b.length-1]
+	b.resizeData(b.length + n)
+	for i := b.length; i < b.length+n; i++ {
+		b.data[i] = lastValue
+	}
+	b.appendValid(n)
+	return nil
+}
+
+// ResetToLength is specific to distinct optimizations in FrostDB.
+func (b *OptFloat64Builder) ResetToLength(n int) {
+	if n == b.length {
+		return
+	}
+
+	b.length = n
+	b.data = b.data[:n]
+	b.validityBitmap = resizeBitmap(b.validityBitmap, n)
+}
diff --git a/pqarrow/builder/optbuilders_test.go b/pqarrow/builder/optbuilders_test.go
index ac8391d33..e636402ed 100644
--- a/pqarrow/builder/optbuilders_test.go
+++ b/pqarrow/builder/optbuilders_test.go
@@ -2,23 +2,70 @@ package builder_test
 
 import (
 	"fmt"
+	"math"
+	"math/rand"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
 
-	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
 	"github.com/polarsignals/frostdb/pqarrow/builder"
 )
 
+func TestOptBuilders(t *testing.T) {
+	testCases := []struct {
+		b builder.OptimizedBuilder
+		v any
+	}{
+		{
+			b: builder.NewOptBinaryBuilder(arrow.BinaryTypes.Binary),
+			v: []byte("hello"),
+		},
+		{
+			b: builder.NewOptBooleanBuilder(arrow.FixedWidthTypes.Boolean),
+			v: true,
+		},
+		{
+			b: builder.NewOptFloat64Builder(arrow.PrimitiveTypes.Float64),
+			v: 1.0,
+		},
+		{
+			b: builder.NewOptInt32Builder(arrow.PrimitiveTypes.Int32),
+			v: int32(123),
+		},
+		{
+			b: builder.NewOptInt64Builder(arrow.PrimitiveTypes.Int64),
+			v: int64(123),
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("%T", tc.b), func(t *testing.T) {
+			require.NoError(t, builder.AppendGoValue(tc.b, tc.v))
+			require.NoError(t, builder.AppendGoValue(tc.b, tc.v))
+
+			require.Equal(t, tc.b.Len(), 2)
+			require.True(t, tc.b.IsValid(0))
+			require.True(t, tc.b.IsValid(1))
+
+			tc.b.SetNull(1) // overwrite second value with NULL
+			require.True(t, tc.b.IsValid(0))
+			require.True(t, tc.b.IsNull(1))
+
+			a := tc.b.NewArray()
+			require.Equal(t, tc.v, a.GetOneForMarshal(0))
+			require.Equal(t, nil, a.GetOneForMarshal(1))
+		})
+	}
+}
+
 // https://github.com/polarsignals/frostdb/issues/270
 func TestIssue270(t *testing.T) {
 	b := builder.NewOptBinaryBuilder(arrow.BinaryTypes.Binary)
 	b.AppendNull()
 	const expString = "hello"
-	b.Append([]byte(expString))
+	require.NoError(t, b.Append([]byte(expString)))
 	require.Equal(t, b.Len(), 2)
 
 	a := b.NewArray().(*array.Binary)
@@ -48,12 +95,11 @@ func TestRepeatLastValue(t *testing.T) {
 	for _, tc := range testCases {
 		require.NoError(t, builder.AppendGoValue(tc.b, tc.v))
 		require.Equal(t, tc.b.Len(), 1)
-		tc.b.RepeatLastValue(9)
+		require.NoError(t, tc.b.RepeatLastValue(9))
 		require.Equal(t, tc.b.Len(), 10)
 		a := tc.b.NewArray()
 		for i := 0; i < a.Len(); i++ {
-			v, err := arrowutils.GetValue(a, i)
-			require.NoError(t, err)
+			v := a.GetOneForMarshal(i)
 			require.Equal(t, tc.v, v)
 		}
 	}
@@ -74,3 +120,119 @@ func Test_ListBuilder(t *testing.T) {
 	ar := lb.NewArray()
 	require.Equal(t, "[[1 2 3] [4 5 6]]", fmt.Sprintf("%v", ar))
 }
+
+// Test_BuildLargeArray is a test that build a large array ( > MaxInt32)
+// The reason for this test we've hit cases where the binary array builder had so many values appended onto
+// it that it caused the uint32 that was being used to track value offsets to overflow.
+func Test_BuildLargeArray(t *testing.T) {
+	if testing.Short() {
+		t.Skip("in short mode; skipping long test")
+	}
+	alloc := memory.NewGoAllocator()
+	bldr := builder.NewBuilder(alloc, &arrow.BinaryType{})
+
+	size := rand.Intn(1024) * 1024 // [1k,1MB) values
+	buf := make([]byte, size)
+	binbldr := array.NewBinaryBuilder(alloc, &arrow.BinaryType{})
+	binbldr.Append(buf)
+	arr := binbldr.NewArray()
+
+	n := (math.MaxInt32 / size) + 1
+	for i := 0; i < n; i++ {
+		switch i {
+		case n - 1:
+			require.Error(t, builder.AppendValue(bldr, arr, 0))
+		default:
+			require.NoError(t, builder.AppendValue(bldr, arr, 0))
+		}
+	}
+
+	newarr := bldr.NewArray()
+
+	// Validate we can read all rows
+	for i := 0; i < n-1; i++ {
+		newarr.(*array.Binary).Value(i)
+	}
+
+	// We expect fewer rows in the array
+	require.Equal(t, n-1, newarr.Data().Len())
+}
+
+func TestOptBinaryBuilder_Value(t *testing.T) {
+	b := builder.NewOptBinaryBuilder(arrow.BinaryTypes.Binary)
+	values := []string{"1", "2", "3"}
+	for _, v := range values {
+		require.NoError(t, b.Append([]byte(v)))
+	}
+
+	for i, value := range values {
+		require.Equal(t, value, string(b.Value(i)))
+	}
+}
+
+func TestAppendEmptyValue(t *testing.T) {
+	t.Run("OptBinaryBuilder", func(t *testing.T) {
+		b := builder.NewOptBinaryBuilder(arrow.BinaryTypes.Binary)
+		b.AppendEmptyValue()
+		require.Equal(t, 1, b.Len())
+		require.True(t, b.IsValid(0))
+		require.Len(t, b.Value(0), 0) // Empty value should have length 0
+
+		arr := b.NewArray().(*array.Binary)
+		require.Equal(t, 1, arr.Len())
+		require.False(t, arr.IsNull(0))
+		require.Len(t, arr.Value(0), 0) // Empty value should have length 0
+	})
+
+	t.Run("OptInt64Builder", func(t *testing.T) {
+		b := builder.NewOptInt64Builder(arrow.PrimitiveTypes.Int64)
+		b.AppendEmptyValue()
+		require.Equal(t, 1, b.Len())
+		require.True(t, b.IsValid(0))
+		require.Equal(t, int64(0), b.Value(0))
+
+		arr := b.NewArray().(*array.Int64)
+		require.Equal(t, 1, arr.Len())
+		require.False(t, arr.IsNull(0))
+		require.Equal(t, int64(0), arr.Value(0))
+	})
+
+	t.Run("OptInt32Builder", func(t *testing.T) {
+		b := builder.NewOptInt32Builder(arrow.PrimitiveTypes.Int32)
+		b.AppendEmptyValue()
+		require.Equal(t, 1, b.Len())
+		require.True(t, b.IsValid(0))
+		require.Equal(t, int32(0), b.Value(0))
+
+		arr := b.NewArray().(*array.Int32)
+		require.Equal(t, 1, arr.Len())
+		require.False(t, arr.IsNull(0))
+		require.Equal(t, int32(0), arr.Value(0))
+	})
+
+	t.Run("OptFloat64Builder", func(t *testing.T) {
+		b := builder.NewOptFloat64Builder(arrow.PrimitiveTypes.Float64)
+		b.AppendEmptyValue()
+		require.Equal(t, 1, b.Len())
+		require.True(t, b.IsValid(0))
+		require.Equal(t, 0.0, b.Value(0))
+
+		arr := b.NewArray().(*array.Float64)
+		require.Equal(t, 1, arr.Len())
+		require.False(t, arr.IsNull(0))
+		require.Equal(t, 0.0, arr.Value(0))
+	})
+
+	t.Run("OptBooleanBuilder", func(t *testing.T) {
+		b := builder.NewOptBooleanBuilder(arrow.FixedWidthTypes.Boolean)
+		b.AppendEmptyValue()
+		require.Equal(t, 1, b.Len())
+		require.True(t, b.IsValid(0))
+		require.Equal(t, false, b.Value(0))
+
+		arr := b.NewArray().(*array.Boolean)
+		require.Equal(t, 1, arr.Len())
+		require.False(t, arr.IsNull(0))
+		require.Equal(t, false, arr.Value(0))
+	})
+}
diff --git a/pqarrow/builder/recordbuilder.go b/pqarrow/builder/recordbuilder.go
index 8f8daad1e..cddc3dab1 100644
--- a/pqarrow/builder/recordbuilder.go
+++ b/pqarrow/builder/recordbuilder.go
@@ -4,9 +4,9 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 )
 
 // The code in this file is based heavily on Apache arrow's array.RecordBuilder,
@@ -28,11 +28,11 @@ func NewRecordBuilder(mem memory.Allocator, schema *arrow.Schema) *RecordBuilder
 		refCount: 1,
 		mem:      mem,
 		schema:   schema,
-		fields:   make([]ColumnBuilder, len(schema.Fields())),
+		fields:   make([]ColumnBuilder, schema.NumFields()),
 	}
 
-	for i, f := range schema.Fields() {
-		b.fields[i] = NewBuilder(mem, f.Type)
+	for i := 0; i < schema.NumFields(); i++ {
+		b.fields[i] = NewBuilder(mem, schema.Field(i).Type)
 	}
 
 	return b
@@ -97,9 +97,11 @@ func (b *RecordBuilder) NewRecord() arrow.Record {
 
 // ExpandSchema expands the record builder schema by adding new fields.
 func (b *RecordBuilder) ExpandSchema(schema *arrow.Schema) {
-	for i, f := range schema.Fields() {
+	for i := 0; i < schema.NumFields(); i++ {
+		f := schema.Field(i)
 		found := false
-		for _, old := range b.schema.Fields() {
+		for j := 0; j < b.schema.NumFields(); j++ {
+			old := b.schema.Field(j)
 			if f.Equal(old) {
 				found = true
 				break
@@ -115,3 +117,14 @@ func (b *RecordBuilder) ExpandSchema(schema *arrow.Schema) {
 
 	b.schema = schema
 }
+
+// Reset will call ResetFull on any dictionary builders to prevent memo tables from growing unbounded.
+func (b *RecordBuilder) Reset() {
+	for _, f := range b.fields {
+		if lb, ok := f.(*ListBuilder); ok {
+			if vb, ok := lb.ValueBuilder().(array.DictionaryBuilder); ok {
+				vb.ResetFull()
+			}
+		}
+	}
+}
diff --git a/pqarrow/builder/utils.go b/pqarrow/builder/utils.go
index 3f1f55e25..fff5603c5 100644
--- a/pqarrow/builder/utils.go
+++ b/pqarrow/builder/utils.go
@@ -4,9 +4,9 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 )
 
 func NewBuilder(mem memory.Allocator, t arrow.DataType) ColumnBuilder {
@@ -24,6 +24,33 @@ func NewBuilder(mem memory.Allocator, t arrow.DataType) ColumnBuilder {
 	}
 }
 
+func RollbackPrevious(cb ColumnBuilder) error {
+	switch b := cb.(type) {
+	case *OptBinaryBuilder:
+		b.ResetToLength(b.Len() - 1)
+	case *OptInt64Builder:
+		b.ResetToLength(b.Len() - 1)
+	case *OptBooleanBuilder:
+		b.ResetToLength(b.Len() - 1)
+	case *array.Int64Builder:
+		b.Resize(b.Len() - 1)
+
+	case *array.StringBuilder:
+		b.Resize(b.Len() - 1)
+	case *array.BinaryBuilder:
+		b.Resize(b.Len() - 1)
+	case *array.FixedSizeBinaryBuilder:
+		b.Resize(b.Len() - 1)
+	case *array.BooleanBuilder:
+		b.Resize(b.Len() - 1)
+	case *array.BinaryDictionaryBuilder:
+		b.Resize(b.Len() - 1)
+	default:
+		return fmt.Errorf("unsupported type for RollbackPrevious %T", b)
+	}
+	return nil
+}
+
 func AppendValue(cb ColumnBuilder, arr arrow.Array, i int) error {
 	if arr == nil || arr.IsNull(i) {
 		cb.AppendNull()
@@ -32,13 +59,19 @@ func AppendValue(cb ColumnBuilder, arr arrow.Array, i int) error {
 
 	switch b := cb.(type) {
 	case *OptBinaryBuilder:
-		b.Append(arr.(*array.Binary).Value(i))
+		return b.Append(arr.(*array.Binary).Value(i))
 	case *OptInt64Builder:
 		b.Append(arr.(*array.Int64).Value(i))
 	case *OptBooleanBuilder:
 		b.AppendSingle(arr.(*array.Boolean).Value(i))
 	case *array.Int64Builder:
 		b.Append(arr.(*array.Int64).Value(i))
+	case *array.Int32Builder:
+		b.Append(arr.(*array.Int32).Value(i))
+	case *array.Float64Builder:
+		b.Append(arr.(*array.Float64).Value(i))
+	case *array.Uint64Builder:
+		b.Append(arr.(*array.Uint64).Value(i))
 	case *array.StringBuilder:
 		b.Append(arr.(*array.String).Value(i))
 	case *array.BinaryBuilder:
@@ -47,6 +80,15 @@ func AppendValue(cb ColumnBuilder, arr arrow.Array, i int) error {
 		b.Append(arr.(*array.FixedSizeBinary).Value(i))
 	case *array.BooleanBuilder:
 		b.Append(arr.(*array.Boolean).Value(i))
+	case *array.StructBuilder:
+		arrStruct := arr.(*array.Struct)
+
+		b.Append(true)
+		for j := 0; j < b.NumField(); j++ {
+			if err := AppendValue(b.FieldBuilder(j), arrStruct.Field(j), i); err != nil {
+				return fmt.Errorf("failed to append struct field: %w", err)
+			}
+		}
 	case *array.BinaryDictionaryBuilder:
 		switch a := arr.(type) {
 		case *array.Dictionary:
@@ -65,32 +107,76 @@ func AppendValue(cb ColumnBuilder, arr arrow.Array, i int) error {
 		default:
 			return fmt.Errorf("non-dictionary array %T provided for dictionary builder", a)
 		}
-	// case *array.List:
-	//	// TODO: This seems horribly inefficient, we already have the whole
-	//	// array and are just doing an expensive copy, but arrow doesn't seem
-	//	// to be able to append whole list scalars at once.
-	//	length := s.Value.Len()
-	//	larr := arr.(*array.ListBuilder)
-	//	vb := larr.ValueBuilder()
-	//	larr.Append(true)
-	//	for i := 0; i < length; i++ {
-	//		v, err := scalar.GetScalar(s.Value, i)
-	//		if err != nil {
-	//			return err
-	//		}
-
-	//		err = appendValue(vb, v)
-	//		if err != nil {
-	//			return err
-	//		}
-	//	}
-	//	return nil
+	case *array.ListBuilder:
+		return buildList(b.ValueBuilder(), b, arr, i)
+	case *ListBuilder:
+		return buildList(b.ValueBuilder(), b, arr, i)
 	default:
 		return fmt.Errorf("unsupported type for arrow append %T", b)
 	}
 	return nil
 }
 
+type ListLikeBuilder interface {
+	Append(bool)
+}
+
+func buildList(vb any, b ListLikeBuilder, arr arrow.Array, i int) error {
+	list := arr.(*array.List)
+	start, end := list.ValueOffsets(i)
+
+	data := list.ListValues().Data()
+	if start > int64(data.Len()) || start > end || data.Offset()+int(start) > data.Offset()+data.Len() {
+		return fmt.Errorf("invalid data range: start=%d end=%d for list with %v", start, end, list.Offsets())
+	}
+
+	values := array.NewSlice(list.ListValues(), start, end)
+	defer values.Release()
+
+	switch v := values.(type) {
+	case *array.Int64:
+		int64Builder := vb.(*OptInt64Builder)
+		b.Append(true)
+		for j := 0; j < v.Len(); j++ {
+			int64Builder.Append(v.Value(j))
+		}
+	case *array.Dictionary:
+		switch dict := v.Dictionary().(type) {
+		case *array.Binary:
+			b.Append(true)
+			for j := 0; j < v.Len(); j++ {
+				switch bldr := vb.(type) {
+				case *array.BinaryDictionaryBuilder:
+					if err := bldr.Append(dict.Value(v.GetValueIndex(j))); err != nil {
+						return err
+					}
+				default:
+					return fmt.Errorf("unknown value builder type %T", bldr)
+				}
+			}
+		}
+	case *array.Struct:
+		structBuilder, ok := vb.(*array.StructBuilder)
+		if !ok {
+			return fmt.Errorf("unsupported type for ListLikeBuilder: %T", vb)
+		}
+
+		b.Append(true)
+		for j := 0; j < v.Len(); j++ {
+			structBuilder.Append(true)
+			for k := 0; k < v.NumField(); k++ {
+				if err := AppendValue(structBuilder.FieldBuilder(k), v.Field(k), j); err != nil {
+					return err
+				}
+			}
+		}
+	default:
+		return fmt.Errorf("unsupported type for List builder %T", v)
+	}
+
+	return nil
+}
+
 // TODO(asubiotto): This function doesn't handle NULLs in the case of optimized
 // builders.
 func AppendArray(cb ColumnBuilder, arr arrow.Array) error {
@@ -98,7 +184,7 @@ func AppendArray(cb ColumnBuilder, arr arrow.Array) error {
 	case *OptBinaryBuilder:
 		v := arr.(*array.Binary)
 		offsets := v.ValueOffsets()
-		b.AppendData(v.ValueBytes(), *(*[]uint32)(unsafe.Pointer(&offsets)))
+		return b.AppendData(v.ValueBytes(), *(*[]uint32)(unsafe.Pointer(&offsets)))
 	case *OptInt64Builder:
 		b.AppendData(arr.(*array.Int64).Int64Values())
 	default:
@@ -124,13 +210,19 @@ func AppendGoValue(cb ColumnBuilder, v any) error {
 
 	switch b := cb.(type) {
 	case *OptBinaryBuilder:
-		b.Append(v.([]byte))
-	case *OptInt64Builder:
-		b.Append(v.(int64))
+		return b.Append(v.([]byte))
 	case *OptBooleanBuilder:
 		b.AppendSingle(v.(bool))
+	case *OptFloat64Builder:
+		b.Append(v.(float64))
+	case *OptInt32Builder:
+		b.Append(v.(int32))
+	case *OptInt64Builder:
+		b.Append(v.(int64))
 	case *array.Int64Builder:
 		b.Append(v.(int64))
+	case *array.Int32Builder:
+		b.Append(v.(int32))
 	case *array.StringBuilder:
 		b.Append(v.(string))
 	case *array.BinaryBuilder:
@@ -140,8 +232,13 @@ func AppendGoValue(cb ColumnBuilder, v any) error {
 	case *array.BooleanBuilder:
 		b.Append(v.(bool))
 	case *array.BinaryDictionaryBuilder:
-		if err := b.Append(v.([]byte)); err != nil {
-			return err
+		switch e := v.(type) {
+		case string:
+			return b.Append([]byte(e))
+		case []byte:
+			return b.Append(e)
+		default:
+			return fmt.Errorf("unsupported type %T for append go value %T", e, b)
 		}
 	default:
 		return fmt.Errorf("unsupported type for append go value %T", b)
diff --git a/pqarrow/convert/convert.go b/pqarrow/convert/convert.go
index 2130f0c3c..5508df9e6 100644
--- a/pqarrow/convert/convert.go
+++ b/pqarrow/convert/convert.go
@@ -3,9 +3,9 @@ package convert
 import (
 	"errors"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/segmentio/parquet-go"
-	"github.com/segmentio/parquet-go/format"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/parquet-go/parquet-go"
+	"github.com/parquet-go/parquet-go/format"
 
 	"github.com/polarsignals/frostdb/pqarrow/writer"
 )
@@ -65,7 +65,7 @@ func ParquetNodeToType(n parquet.Node) (arrow.DataType, error) {
 					fallthrough
 				case format.RLEDictionary:
 					dt = &arrow.DictionaryType{
-						IndexType: &arrow.Int16Type{}, // TODO: do we need more width?
+						IndexType: &arrow.Uint32Type{},
 						ValueType: &arrow.BinaryType{},
 					}
 				default:
@@ -149,12 +149,12 @@ func GetWriter(offset int, n parquet.Node) (writer.NewWriterFunc, error) {
 // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps
 func hasMapFields(n parquet.Node) bool {
 	// toplevel group requiredto be repeated group key_value with
-	if !(len(n.Fields()) == 1 && n.Fields()[0].Repeated() && n.Fields()[0].Name() == "key_value") {
+	if len(n.Fields()) != 1 || !n.Fields()[0].Repeated() || n.Fields()[0].Name() != "key_value" {
 		return false
 	}
 
 	// can only be two fields, a key field and an optional value field
-	if !(len(n.Fields()[0].Fields()) >= 1 && len(n.Fields()[0].Fields()) <= 2) {
+	if len(n.Fields()[0].Fields()) < 1 || len(n.Fields()[0].Fields()) > 2 {
 		return false
 	}
 
@@ -170,12 +170,12 @@ func hasMapFields(n parquet.Node) bool {
 
 // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
 func hasListFields(n parquet.Node) bool {
-	if !((n.Optional() || n.Required()) && len(n.Fields()) == 1) {
+	if (!n.Optional() && !n.Required()) || len(n.Fields()) != 1 {
 		return false
 	}
 
 	list := n.Fields()[0]
-	if !(list.Name() == "list" && list.Repeated() && len(list.Fields()) == 1) {
+	if list.Name() != "list" || !list.Repeated() || len(list.Fields()) != 1 {
 		return false
 	}
 
diff --git a/pqarrow/convert/convert_test.go b/pqarrow/convert/convert_test.go
index 54fda4783..508e4139b 100644
--- a/pqarrow/convert/convert_test.go
+++ b/pqarrow/convert/convert_test.go
@@ -4,8 +4,8 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/segmentio/parquet-go"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/parquet-go/parquet-go"
 	"github.com/stretchr/testify/require"
 )
 
@@ -93,7 +93,7 @@ func TestParquetNodeToType(t *testing.T) {
 			msg:         "unsupported logical type: ENUM",
 		},
 		// This causes stack overflow.
-		// Fix PR: https://github.com/segmentio/parquet-go/pull/244
+		// Fix PR: https://github.com/parquet-go/parquet-go/pull/244
 		//{
 		//	parquetNode: parquet.JSON(),
 		//	msg:         "unsupported type: JSON",
@@ -114,7 +114,7 @@ func TestParquetNodeToType(t *testing.T) {
 			parquetNode: parquet.Timestamp(parquet.Millisecond),
 			msg:         "unsupported logical type: TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS)",
 		},
-		// nullType is unexported by segmentio/parquet-go.
+		// nullType is unexported by parquet-go/parquet-go.
 	}
 	for _, c := range errCases {
 		t.Run(c.msg, func(t *testing.T) {
diff --git a/pqarrow/parquet.go b/pqarrow/parquet.go
index 701f2c4c7..b1ac1f262 100644
--- a/pqarrow/parquet.go
+++ b/pqarrow/parquet.go
@@ -1,16 +1,17 @@
 package pqarrow
 
 import (
+	"bytes"
 	"fmt"
 	"io"
+	"sort"
 	"strings"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/scalar"
-	"github.com/segmentio/parquet-go"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
+	"github.com/parquet-go/parquet-go"
 
-	"github.com/polarsignals/frostdb/bufutils"
 	"github.com/polarsignals/frostdb/dynparquet"
 )
 
@@ -20,6 +21,10 @@ func ArrowScalarToParquetValue(sc scalar.Scalar) (parquet.Value, error) {
 		return parquet.ValueOf(string(s.Data())), nil
 	case *scalar.Int64:
 		return parquet.ValueOf(s.Value), nil
+	case *scalar.Int32:
+		return parquet.ValueOf(s.Value), nil
+	case *scalar.Uint64:
+		return parquet.ValueOf(s.Value), nil
 	case *scalar.FixedSizeBinary:
 		width := s.Type.(*arrow.FixedSizeBinaryType).ByteWidth
 		v := [16]byte{}
@@ -36,149 +41,361 @@ func ArrowScalarToParquetValue(sc scalar.Scalar) (parquet.Value, error) {
 	}
 }
 
-func appendToRow(row []parquet.Value, c arrow.Array, index, rep, def, col int) ([]parquet.Value, error) {
-	switch arr := c.(type) {
-	case *array.Int64:
-		row = append(row, parquet.ValueOf(arr.Value(index)).Level(rep, def, col))
-	case *array.Boolean:
-		row = append(row, parquet.ValueOf(arr.Value(index)).Level(rep, def, col))
-	case *array.Binary:
-		row = append(row, parquet.ValueOf(arr.Value(index)).Level(rep, def, col))
-	case *array.String:
-		row = append(row, parquet.ValueOf(arr.Value(index)).Level(rep, def, col))
-	case *array.Uint64:
-		row = append(row, parquet.ValueOf(arr.Value(index)).Level(rep, def, col))
-	case *array.Dictionary:
-		switch dict := arr.Dictionary().(type) {
-		case *array.Binary:
-			row = append(row, parquet.ValueOf(dict.Value(arr.GetValueIndex(index))).Level(rep, def, col))
+// singlePassThroughWriter is used to keep a reference to the rows written to
+// a parquet writer when only converting a single row. Calling WriteRows more
+// than once is unsupported.
+type singlePassThroughWriter struct {
+	rows []parquet.Row
+}
+
+var _ dynparquet.ParquetWriter = (*singlePassThroughWriter)(nil)
+
+func (w *singlePassThroughWriter) Schema() *parquet.Schema { return nil }
+
+func (w *singlePassThroughWriter) Write(_ []any) (int, error) { panic("use WriteRows instead") }
+
+func (w *singlePassThroughWriter) WriteRows(rows []parquet.Row) (int, error) {
+	if w.rows != nil {
+		panic("cannot call WriteRows more than once")
+	}
+	w.rows = rows
+	return len(rows), nil
+}
+
+func (w *singlePassThroughWriter) Flush() error { return nil }
+
+func (w *singlePassThroughWriter) Close() error { return nil }
+
+func (w *singlePassThroughWriter) Reset(_ io.Writer) {}
+
+// RecordToRow converts an arrow record with dynamic columns into a row using a dynamic parquet schema.
+func RecordToRow(final *parquet.Schema, record arrow.Record, index int) (parquet.Row, error) {
+	w := &singlePassThroughWriter{}
+	if err := recordToRows(w, record, index, index+1, final.Fields()); err != nil {
+		return nil, err
+	}
+	return w.rows[0], nil
+}
+
+// recordToRows converts a full arrow record to parquet rows which are written
+// to the parquet writer.
+// The caller should use recordStart=0 and recordEnd=record.NumRows() to convert
+// the entire record. Alternatively, the caller may only convert a subset of
+// rows by specifying a range of [recordStart, recordEnd).
+func recordToRows(w dynparquet.ParquetWriter, record arrow.Record, recordStart, recordEnd int, finalFields []parquet.Field) error {
+	numRows := recordEnd - recordStart
+	schema := record.Schema()
+	row := make(parquet.Row, len(finalFields))
+	writers := make([]arrowToParquet, len(finalFields))
+	for i := range writers {
+		f := finalFields[i]
+		name := f.Name()
+		def := 0
+		if f.Optional() {
+			def = 1
+		}
+		idx := schema.FieldIndices(name)
+		if len(idx) == 0 {
+			writers[i] = writeNull(i)
+			continue
+		}
+		column := record.Column(idx[0])
+		switch a := column.(type) {
+		case *array.List:
+			ls, err := writeList(def, i, recordStart, a)
+			if err != nil {
+				return err
+			}
+			writers[i] = ls
+		case *array.Dictionary:
+			writers[i] = writeDictionary(def, i, recordStart, a)
+		case *array.Int32:
+			writers[i] = writeInt32(def, i, recordStart, a)
+		case *array.Uint64:
+			writers[i] = writeUint64(def, i, recordStart, a)
+		case *array.Int64:
+			writers[i] = writeInt64(def, i, recordStart, a)
 		case *array.String:
-			row = append(row, parquet.ValueOf(dict.Value(arr.GetValueIndex(index))).Level(rep, def, col))
+			writers[i] = writeString(def, i, recordStart, a)
+		case *array.Binary:
+			writers[i] = writeBinary(def, i, recordStart, a)
 		default:
-			return nil, fmt.Errorf("dictionary not of expected type: %T", dict)
+			writers[i] = writeGeneral(def, i, recordStart, a)
+		}
+	}
+	rows := make([]parquet.Row, 1)
+	for i := 0; i < numRows; i++ {
+		row = row[:0]
+		for j := range writers {
+			row = writers[j](row, i)
+		}
+		rows[0] = row
+		_, err := w.WriteRows(rows)
+		if err != nil {
+			return err
 		}
-	default:
-		return nil, fmt.Errorf("column not of expected type: %v", c.DataType().ID())
 	}
+	return nil
+}
 
-	return row, nil
+func writeGeneral(def, column, startIdx int, a arrow.Array) arrowToParquet {
+	return func(w parquet.Row, row int) parquet.Row {
+		if a.IsNull(row + startIdx) {
+			return append(w,
+				parquet.Value{}.Level(0, 0, column),
+			)
+		}
+		return append(w,
+			parquet.ValueOf(a.GetOneForMarshal(row+startIdx)).Level(0, def, column),
+		)
+	}
 }
 
-// RecordToRow converts an arrow record with dynamic columns into a row using a dynamic parquet schema.
-func RecordToRow(schema *dynparquet.Schema, final *parquet.Schema, record arrow.Record, index int) (parquet.Row, error) {
-	return getRecordRow(schema, final, record, index, final.Fields(), record.Schema().Fields())
-}
-
-func getRecordRow(schema *dynparquet.Schema, final *parquet.Schema, record arrow.Record, index int, finalFields []parquet.Field, recordFields []arrow.Field) (parquet.Row, error) {
-	var err error
-	row := make([]parquet.Value, 0, len(finalFields))
-	for i, f := range finalFields { // assuming flat schema
-		found := false
-		for j, af := range recordFields {
-			if f.Name() == af.Name {
-				def := 0
-				if isDynamicColumn(schema, af.Name) {
-					def = 1
-				}
-				row, err = appendToRow(row, record.Column(j), index, 0, def, i)
-				if err != nil {
-					return nil, err
-				}
-				found = true
-				break
-			}
+func writeUint64(def, column, startIdx int, a *array.Uint64) arrowToParquet {
+	return func(w parquet.Row, row int) parquet.Row {
+		if a.IsNull(row + startIdx) {
+			return append(w,
+				parquet.Value{}.Level(0, 0, column),
+			)
 		}
+		return append(w,
+			parquet.Int64Value(int64(a.Value(row+startIdx))).Level(0, def, column),
+		)
+	}
+}
 
-		// No record field found; append null
-		if !found {
-			row = append(row, parquet.ValueOf(nil).Level(0, 0, i))
+func writeInt64(def, column, startIdx int, a *array.Int64) arrowToParquet {
+	return func(w parquet.Row, row int) parquet.Row {
+		if a.IsNull(row + startIdx) {
+			return append(w,
+				parquet.Value{}.Level(0, 0, column),
+			)
 		}
+		return append(w,
+			parquet.Int64Value(a.Value(row+startIdx)).Level(0, def, column),
+		)
 	}
+}
 
-	return row, nil
+func writeInt32(def, column, startIdx int, a *array.Int32) arrowToParquet {
+	return func(w parquet.Row, row int) parquet.Row {
+		if a.IsNull(row + startIdx) {
+			return append(w,
+				parquet.Value{}.Level(0, 0, column),
+			)
+		}
+		return append(w,
+			parquet.Int32Value(a.Value(row+startIdx)).Level(0, def, column),
+		)
+	}
 }
 
-func isDynamicColumn(schema *dynparquet.Schema, column string) bool {
-	parts := strings.SplitN(column, ".", 2)
-	return len(parts) == 2 && schema.IsDynamicColumn(parts[0]) // dynamic column
+func writeBinary(def, column, startIdx int, a *array.Binary) arrowToParquet {
+	return func(w parquet.Row, row int) parquet.Row {
+		if a.IsNull(row + startIdx) {
+			return append(w,
+				parquet.Value{}.Level(0, 0, column),
+			)
+		}
+		return append(w,
+			parquet.ByteArrayValue(a.Value(row+startIdx)).Level(0, def, column),
+		)
+	}
 }
 
-func RecordDynamicCols(record arrow.Record) map[string][]string {
-	dyncols := map[string][]string{}
-	for _, af := range record.Schema().Fields() {
-		parts := strings.SplitN(af.Name, ".", 2)
-		if len(parts) == 2 { // dynamic column
-			dyncols[parts[0]] = append(dyncols[parts[0]], parts[1])
+func writeString(def, column, startIdx int, a *array.String) arrowToParquet {
+	return func(w parquet.Row, row int) parquet.Row {
+		if a.IsNull(row + startIdx) {
+			return append(w,
+				parquet.Value{}.Level(0, 0, column),
+			)
 		}
+		return append(w,
+			parquet.ByteArrayValue([]byte(a.Value(row+startIdx))).Level(0, def, column),
+		)
 	}
+}
 
-	return bufutils.Dedupe(dyncols)
+func writeDictionary(def, column, startIdx int, a *array.Dictionary) arrowToParquet {
+	value := func(row int) parquet.Value {
+		return parquet.ValueOf(a.GetOneForMarshal(row))
+	}
+	if x, ok := a.Dictionary().(*array.Binary); ok {
+		value = func(row int) parquet.Value {
+			return parquet.ByteArrayValue(
+				x.Value(a.GetValueIndex(row)),
+			)
+		}
+	}
+	return func(w parquet.Row, row int) parquet.Row {
+		if a.IsNull(row + startIdx) {
+			return append(w,
+				parquet.Value{}.Level(0, 0, column),
+			)
+		}
+		return append(w,
+			value(row+startIdx).Level(0, def, column),
+		)
+	}
 }
 
-// RecordToDynamicSchema converts an arrow record into a parquet schema, dynamic cols, and parquet fields.
-func RecordToDynamicSchema(schema *dynparquet.Schema, record arrow.Record) (*parquet.Schema, map[string][]string, []parquet.Field) {
-	dyncols := map[string][]string{}
-	g := parquet.Group{}
-	for _, f := range schema.ParquetSchema().Fields() {
-		for _, af := range record.Schema().Fields() {
-			name := af.Name
-			parts := strings.SplitN(name, ".", 2)
-			if len(parts) == 2 { // dynamic column
-				name = parts[0] // dedupe
-				dyncols[parts[0]] = append(dyncols[parts[0]], parts[1])
-			}
+func writeList(def, column, startIdx int, a *array.List) (arrowToParquet, error) {
+	var lw arrowToParquet
+	switch e := a.ListValues().(type) {
+	case *array.Int32:
+		// WHile this is not base type. To avoid breaking things I have left it here.
+		lw = writeListOf(def, column, startIdx, a, func(idx int) parquet.Value {
+			return parquet.Int32Value(e.Value(idx))
+		})
+	case *array.Int64:
+		lw = writeListOf(def, column, startIdx, a, func(idx int) parquet.Value {
+			return parquet.Int64Value(e.Value(idx))
+		})
+	case *array.Boolean:
+		lw = writeListOf(def, column, startIdx, a, func(idx int) parquet.Value {
+			return parquet.BooleanValue(e.Value(idx))
+		})
+	case *array.Float64:
+		lw = writeListOf(def, column, startIdx, a, func(idx int) parquet.Value {
+			return parquet.DoubleValue(e.Value(idx))
+		})
+	case *array.String:
+		lw = writeListOf(def, column, startIdx, a, func(idx int) parquet.Value {
+			return parquet.ByteArrayValue([]byte(e.Value(idx)))
+		})
+	case *array.Binary:
+		lw = writeListOf(def, column, startIdx, a, func(idx int) parquet.Value {
+			return parquet.ByteArrayValue([]byte(e.Value(idx)))
+		})
+	case *array.Dictionary:
+		switch d := e.Dictionary().(type) {
+		case *array.Binary:
+			lw = writeListOf(def, column, startIdx, a, func(idx int) parquet.Value {
+				return parquet.ByteArrayValue(
+					d.Value(e.GetValueIndex(idx)),
+				)
+			})
+		case *array.String:
+			lw = writeListOf(def, column, startIdx, a, func(idx int) parquet.Value {
+				return parquet.ByteArrayValue(
+					[]byte(d.Value(e.GetValueIndex(idx))),
+				)
+			})
+		default:
+			return nil, fmt.Errorf("list dictionary not of expected type: %T", d)
+		}
+	default:
+		return nil, fmt.Errorf("list not of expected type: %T", e)
+	}
+	return func(w parquet.Row, row int) parquet.Row {
+		if a.IsNull(row + startIdx) {
+			return append(w,
+				parquet.Value{}.Level(0, 0, column),
+			)
+		}
+		return lw(w, row)
+	}, nil
+}
 
-			if f.Name() == name {
-				g[af.Name] = f
+func writeListOf(def, column, startIdx int, a *array.List, value func(idx int) parquet.Value) arrowToParquet {
+	return func(w parquet.Row, row int) parquet.Row {
+		start, end := a.ValueOffsets(row + startIdx)
+		for k := start; k < end; k++ {
+			rep := 0
+			if k != start {
+				rep = 1
 			}
+			w = append(w, value(int(k)).Level(rep, def+1, column))
 		}
+		return w
+	}
+}
+
+func writeNull(column int) arrowToParquet {
+	return func(w parquet.Row, _ int) parquet.Row {
+		return append(w, parquet.Value{}.Level(0, 0, column))
 	}
+}
+
+type arrowToParquet func(w parquet.Row, row int) parquet.Row
 
-	sc := parquet.NewSchema("arrow converted", g)
-	return sc, bufutils.Dedupe(dyncols), sc.Fields()
+func RecordDynamicCols(record arrow.Record) (columns map[string][]string) {
+	dyncols := make(map[string]struct{})
+	for i := 0; i < record.Schema().NumFields(); i++ {
+		af := record.Schema().Field(i)
+		if strings.Contains(af.Name, ".") {
+			dyncols[af.Name] = struct{}{}
+		}
+	}
+	columns = make(map[string][]string)
+	for s := range dyncols {
+		name, part, _ := strings.Cut(s, ".")
+		columns[name] = append(columns[name], part)
+	}
+	for k := range columns {
+		sort.Strings(columns[k])
+	}
+	return
 }
 
-func RecordToDynamicRow(schema *dynparquet.Schema, record arrow.Record, index int) (*dynparquet.DynamicRow, error) {
+func RecordToDynamicRow(pqSchema *parquet.Schema, record arrow.Record, dyncols map[string][]string, index int) (*dynparquet.DynamicRow, error) {
 	if index >= int(record.NumRows()) {
 		return nil, io.EOF
 	}
 
-	ps, err := schema.DynamicParquetSchema(RecordDynamicCols(record))
+	row, err := RecordToRow(pqSchema, record, index)
 	if err != nil {
 		return nil, err
 	}
 
-	row, err := RecordToRow(schema, ps, record, index)
+	return dynparquet.NewDynamicRow(row, pqSchema, dyncols, pqSchema.Fields()), nil
+}
+
+func SerializeRecord(r arrow.Record, schema *dynparquet.Schema) (*dynparquet.SerializedBuffer, error) {
+	b := &bytes.Buffer{}
+	w, err := schema.GetWriter(b, RecordDynamicCols(r), false)
 	if err != nil {
 		return nil, err
 	}
+	defer schema.PutWriter(w)
+	if err := RecordToFile(schema, w.ParquetWriter, r); err != nil {
+		return nil, err
+	}
+	f, err := parquet.OpenFile(bytes.NewReader(b.Bytes()), int64(b.Len()))
+	if err != nil {
+		return nil, fmt.Errorf("failed to read buf: %v", err)
+	}
+	buf, err := dynparquet.NewSerializedBuffer(f)
+	if err != nil {
+		return nil, err
+	}
+	return buf, nil
+}
 
-	sch, dyncols, fields := RecordToDynamicSchema(schema, record)
-	return dynparquet.NewDynamicRow(row, sch, dyncols, fields), nil
+func RecordToFile(schema *dynparquet.Schema, w dynparquet.ParquetWriter, r arrow.Record) error {
+	return RecordsToFile(schema, w, []arrow.Record{r})
 }
 
-func RecordToFile(schema *dynparquet.Schema, w *parquet.GenericWriter[any], r arrow.Record) error {
+func RecordsToFile(schema *dynparquet.Schema, w dynparquet.ParquetWriter, recs []arrow.Record) error {
+	dynColSets := make([]map[string][]string, 0, len(recs))
+	for _, r := range recs {
+		dynColSets = append(dynColSets, RecordDynamicCols(r))
+	}
+	dynCols := dynparquet.MergeDynamicColumnSets(dynColSets)
 	defer w.Close()
 
-	ps, err := schema.DynamicParquetSchema(RecordDynamicCols(r))
+	ps, err := schema.GetDynamicParquetSchema(dynCols)
 	if err != nil {
 		return err
 	}
+	defer schema.PutPooledParquetSchema(ps)
 
-	rows := make([]parquet.Row, 0, r.NumRows())
-	finalFields := ps.Fields()
-	recordFields := r.Schema().Fields()
-	for i := 0; i < int(r.NumRows()); i++ {
-		row, err := getRecordRow(schema, ps, r, i, finalFields, recordFields)
-		if err != nil {
+	finalFields := ps.Schema.Fields()
+
+	for _, r := range recs {
+		if err := recordToRows(w, r, 0, int(r.NumRows()), finalFields); err != nil {
 			return err
 		}
-		rows = append(rows, row)
-	}
-
-	_, err = w.WriteRows(rows)
-	if err != nil {
-		return err
 	}
 
 	return nil
diff --git a/pqarrow/parquet_test.go b/pqarrow/parquet_test.go
new file mode 100644
index 000000000..a552040e1
--- /dev/null
+++ b/pqarrow/parquet_test.go
@@ -0,0 +1,321 @@
+package pqarrow
+
+import (
+	"fmt"
+	"io"
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/parquet-go/parquet-go"
+	"github.com/stretchr/testify/require"
+)
+
+type noopWriter struct{}
+
+func (w noopWriter) Schema() *parquet.Schema { return nil }
+
+func (w noopWriter) Write(r []any) (int, error) { return len(r), nil }
+
+func (w noopWriter) WriteRows(r []parquet.Row) (int, error) { return len(r), nil }
+
+func (w noopWriter) Flush() error { return nil }
+
+func (w noopWriter) Close() error { return nil }
+
+func (w noopWriter) Reset(_ io.Writer) {}
+
+func BenchmarkRecordsToFile(b *testing.B) {
+	b.ReportAllocs()
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "int_column", Type: arrow.PrimitiveTypes.Int64},
+		{Name: "string_column", Type: arrow.BinaryTypes.String},
+		{
+			Name: "dictionary_column",
+			Type: &arrow.DictionaryType{
+				ValueType: arrow.BinaryTypes.String,
+				IndexType: arrow.PrimitiveTypes.Int32,
+			},
+		},
+		{
+			Name: "list_column",
+			Type: arrow.ListOf(&arrow.DictionaryType{
+				ValueType: arrow.BinaryTypes.Binary,
+				IndexType: arrow.PrimitiveTypes.Int32,
+			}),
+		},
+	}, nil)
+
+	parquetFields := parquet.Group{}
+	for _, f := range schema.Fields() {
+		// No need to create actual nodes since the code only looks for field
+		// names.
+		parquetFields[f.Name] = parquet.Node(nil)
+	}
+
+	checked := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer checked.AssertSize(b, 0)
+	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
+	defer builder.Release()
+
+	const numRows = 1024
+	for i := 0; i < numRows; i++ {
+		builder.Field(0).(*array.Int64Builder).Append(int64(i))
+
+		builder.Field(1).(*array.StringBuilder).Append(fmt.Sprintf("%d", i))
+
+		dictBuilder := builder.Field(2).(*array.BinaryDictionaryBuilder)
+		require.NoError(b, dictBuilder.Append([]byte(fmt.Sprintf("%d-key", i))))
+
+		listBuilder := builder.Field(3).(*array.ListBuilder)
+		listBuilder.Append(true)
+		valueBuilder := listBuilder.ValueBuilder().(*array.BinaryDictionaryBuilder)
+		require.NoError(b, valueBuilder.Append([]byte(fmt.Sprintf("string-1-%d", i))))
+		require.NoError(b, valueBuilder.Append([]byte(fmt.Sprintf("string-2-%d", i))))
+		require.NoError(b, valueBuilder.Append([]byte(fmt.Sprintf("string-3-%d", i))))
+	}
+
+	record := builder.NewRecord()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if err := recordToRows(
+			noopWriter{}, record, 0, numRows, parquetFields.Fields(),
+		); err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func TestRecordToRows_list(t *testing.T) {
+	b := array.NewRecordBuilder(memory.NewGoAllocator(),
+		arrow.NewSchema(
+			[]arrow.Field{
+				{
+					Name:     "int",
+					Type:     arrow.ListOf(arrow.PrimitiveTypes.Int64),
+					Nullable: true,
+				},
+				{
+					Name:     "double",
+					Type:     arrow.ListOf(arrow.PrimitiveTypes.Float64),
+					Nullable: true,
+				},
+				{
+					Name:     "string",
+					Type:     arrow.ListOf(arrow.BinaryTypes.String),
+					Nullable: true,
+				},
+				{
+					Name:     "binary",
+					Type:     arrow.ListOf(arrow.BinaryTypes.Binary),
+					Nullable: true,
+				},
+				{
+					Name:     "bool",
+					Type:     arrow.ListOf(arrow.FixedWidthTypes.Boolean),
+					Nullable: true,
+				},
+			}, nil,
+		),
+	)
+	defer b.Release()
+
+	type Sample struct {
+		Int    []int64
+		Double []float64
+		String []string
+		Binary [][]byte
+		Bool   []bool
+	}
+	samples := []Sample{
+		{}, // handle nulls
+		{
+			Int:    []int64{1},
+			Double: []float64{1},
+			String: []string{"1"},
+			Binary: [][]byte{
+				[]byte("1"),
+			},
+			Bool: []bool{true},
+		},
+	}
+	ints := b.Field(0).(*array.ListBuilder)
+	intsBuild := ints.ValueBuilder().(*array.Int64Builder)
+	double := b.Field(1).(*array.ListBuilder)
+	doubleBuild := double.ValueBuilder().(*array.Float64Builder)
+	str := b.Field(2).(*array.ListBuilder)
+	strBuild := str.ValueBuilder().(*array.StringBuilder)
+	bin := b.Field(3).(*array.ListBuilder)
+	binBuild := bin.ValueBuilder().(*array.BinaryBuilder)
+	boolean := b.Field(4).(*array.ListBuilder)
+	booleanBuild := boolean.ValueBuilder().(*array.BooleanBuilder)
+
+	for _, s := range samples {
+		appendList[int64](ints, s.Int, intsBuild.Append)
+		appendList[float64](double, s.Double, doubleBuild.Append)
+		appendList[string](str, s.String, strBuild.Append)
+		appendList[[]byte](bin, s.Binary, binBuild.Append)
+		appendList[bool](boolean, s.Bool, booleanBuild.Append)
+	}
+	r := b.NewRecord()
+	defer r.Release()
+
+	parquetFields := parquet.Group{}
+	for _, f := range r.Schema().Fields() {
+		parquetFields[f.Name] = parquet.Required(parquet.Node(nil))
+	}
+	clone := &cloneWriter{}
+	if err := recordToRows(
+		clone, r, 0, int(r.NumRows()), parquetFields.Fields(),
+	); err != nil {
+		t.Fatal(err)
+	}
+	// parquetFields.Fields() changes the order of the rows
+	// From
+	//  int, double, string, binary, bool
+	// To
+	//  binary, bool, double, int, string
+	want := []parquet.Row{
+		{
+			parquet.Value{}.Level(0, 0, 0),
+			parquet.Value{}.Level(0, 0, 1),
+			parquet.Value{}.Level(0, 0, 2),
+			parquet.Value{}.Level(0, 0, 3),
+			parquet.Value{}.Level(0, 0, 4),
+		},
+		{
+			parquet.ByteArrayValue([]byte("1")).Level(0, 1, 0),
+			parquet.BooleanValue(true).Level(0, 1, 1),
+			parquet.DoubleValue(1).Level(0, 1, 2),
+			parquet.Int64Value(1).Level(0, 1, 3),
+			parquet.ByteArrayValue([]byte("1")).Level(0, 1, 4),
+		},
+	}
+	require.Equal(t, len(want), len(clone.rows))
+	for i := range want {
+		require.True(t, want[i].Equal(clone.rows[i]))
+	}
+}
+
+type cloneWriter struct {
+	rows []parquet.Row
+}
+
+func (w cloneWriter) Schema() *parquet.Schema { return nil }
+
+func (w cloneWriter) Write(r []any) (int, error) { return len(r), nil }
+
+func (w *cloneWriter) WriteRows(r []parquet.Row) (int, error) {
+	for i := range r {
+		w.rows = append(w.rows, r[i].Clone())
+	}
+	return len(r), nil
+}
+
+func (w cloneWriter) Flush() error { return nil }
+
+func (w cloneWriter) Close() error { return nil }
+
+func (w cloneWriter) Reset(_ io.Writer) {}
+
+func appendList[T any](ls *array.ListBuilder, values []T, add func(T)) {
+	if values == nil {
+		ls.AppendNull()
+		return
+	}
+	ls.Append(true)
+	for i := range values {
+		add(values[i])
+	}
+}
+
+func TestRecordDynamicCols(t *testing.T) {
+	build := array.NewRecordBuilder(memory.NewGoAllocator(),
+		arrow.NewSchema([]arrow.Field{
+			{
+				Name: "labels.label1",
+				Type: arrow.BinaryTypes.String,
+			},
+			{
+				Name: "labels.label2",
+				Type: arrow.BinaryTypes.String,
+			},
+			{
+				Name: "labels.label3",
+				Type: arrow.BinaryTypes.String,
+			},
+		}, nil),
+	)
+	defer build.Release()
+	r := build.NewRecord()
+
+	res := RecordDynamicCols(r)
+	require.Equal(t, map[string][]string{
+		"labels": {"label1", "label2", "label3"},
+	}, res)
+}
+
+func BenchmarkRecordDynamicCols(b *testing.B) {
+	build := array.NewRecordBuilder(memory.NewGoAllocator(),
+		arrow.NewSchema([]arrow.Field{
+			{
+				Name: "labels.label1",
+				Type: arrow.BinaryTypes.String,
+			},
+			{
+				Name: "labels.label2",
+				Type: arrow.BinaryTypes.String,
+			},
+			{
+				Name: "labels.label3",
+				Type: arrow.BinaryTypes.String,
+			},
+		}, nil),
+	)
+	defer build.Release()
+	r := build.NewRecord()
+	defer r.Release()
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_ = RecordDynamicCols(r)
+	}
+}
+
+func Test_Uint64RecordToRow(t *testing.T) {
+	alloc := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer alloc.AssertSize(t, 0)
+	build := array.NewRecordBuilder(alloc,
+		arrow.NewSchema([]arrow.Field{
+			{
+				Name: "value",
+				Type: arrow.PrimitiveTypes.Uint64,
+			},
+		}, nil),
+	)
+	defer build.Release()
+	build.Field(0).(*array.Uint64Builder).AppendValues([]uint64{0, 1, 2, 3}, nil)
+	r := build.NewRecord()
+	defer r.Release()
+
+	// Build with required
+	parquetFields := parquet.Group{}
+	parquetFields["value"] = parquet.Required(parquet.Int(64))
+	schema := parquet.NewSchema("test", parquetFields)
+
+	row, err := RecordToRow(schema, r, 2)
+	require.NoError(t, err)
+
+	require.Equal(t, "[2]", fmt.Sprintf("%v", row))
+
+	// Build with optional
+	parquetFields = parquet.Group{}
+	parquetFields["value"] = parquet.Optional(parquet.Int(64))
+	schema = parquet.NewSchema("test", parquetFields)
+
+	row, err = RecordToRow(schema, r, 2)
+	require.NoError(t, err)
+
+	require.Equal(t, "[2]", fmt.Sprintf("%v", row))
+}
diff --git a/pqarrow/writer/writer.go b/pqarrow/writer/writer.go
index 336670202..31b1f2ad0 100644
--- a/pqarrow/writer/writer.go
+++ b/pqarrow/writer/writer.go
@@ -1,66 +1,70 @@
 package writer
 
 import (
+	"errors"
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/segmentio/parquet-go"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/parquet-go/parquet-go"
 
 	"github.com/polarsignals/frostdb/pqarrow/builder"
 )
 
 type ValueWriter interface {
-	WritePage(p parquet.Page) error
+	// Write writes a slice of values to the underlying builder (slow path).
 	Write([]parquet.Value)
 }
 
+type PageWriter interface {
+	ValueWriter
+	// WritePage is the optimized path for writing a page of values to the
+	// underlying builder. There are cases in which the given page cannot be
+	// written directly, in which case ErrCannotWritePageDirectly is returned.
+	// The caller should fall back to writing values.
+	WritePage(parquet.Page) error
+}
+
+var ErrCannotWritePageDirectly = errors.New("cannot write page directly")
+
+type writerBase struct{}
+
+func (w writerBase) canWritePageDirectly(p parquet.Page) bool {
+	// Currently, for most writers, only pages with zero nulls and no dictionary
+	// can be written.
+	return p.NumNulls() == 0 && p.Dictionary() == nil
+}
+
 type binaryValueWriter struct {
-	b       *builder.OptBinaryBuilder
-	scratch struct {
-		values []parquet.Value
-	}
+	writerBase
+	b *builder.OptBinaryBuilder
 }
 
 type NewWriterFunc func(b builder.ColumnBuilder, numValues int) ValueWriter
 
-func NewBinaryValueWriter(b builder.ColumnBuilder, numValues int) ValueWriter {
+func NewBinaryValueWriter(b builder.ColumnBuilder, _ int) ValueWriter {
 	return &binaryValueWriter{
 		b: b.(*builder.OptBinaryBuilder),
 	}
 }
 
 func (w *binaryValueWriter) Write(values []parquet.Value) {
-	w.b.AppendParquetValues(values)
+	if err := w.b.AppendParquetValues(values); err != nil {
+		panic("unable to write value") // TODO: handle this error gracefully
+	}
 }
 
 func (w *binaryValueWriter) WritePage(p parquet.Page) error {
-	if p.NumNulls() != 0 {
-		reader := p.Values()
-		if cap(w.scratch.values) < int(p.NumValues()) {
-			w.scratch.values = make([]parquet.Value, p.NumValues())
-		}
-		w.scratch.values = w.scratch.values[:p.NumValues()]
-		_, err := reader.ReadValues(w.scratch.values)
-		// We're reading all values in the page so we always expect an io.EOF.
-		if err != nil && err != io.EOF {
-			return fmt.Errorf("read values: %w", err)
-		}
-		w.Write(w.scratch.values)
-		return nil
+	if !w.canWritePageDirectly(p) {
+		return ErrCannotWritePageDirectly
 	}
-
-	// No nulls in page.
 	values := p.Data()
-	w.b.AppendData(values.ByteArray())
-	return nil
+	return w.b.AppendData(values.ByteArray())
 }
 
 type int64ValueWriter struct {
-	b       *builder.OptInt64Builder
-	scratch struct {
-		values []parquet.Value
-	}
+	writerBase
+	b *builder.OptInt64Builder
 }
 
 func NewInt64ValueWriter(b builder.ColumnBuilder, _ int) ValueWriter {
@@ -75,21 +79,9 @@ func (w *int64ValueWriter) Write(values []parquet.Value) {
 }
 
 func (w *int64ValueWriter) WritePage(p parquet.Page) error {
-	if p.NumNulls() != 0 {
-		reader := p.Values()
-		if cap(w.scratch.values) < int(p.NumValues()) {
-			w.scratch.values = make([]parquet.Value, p.NumValues())
-		}
-		w.scratch.values = w.scratch.values[:p.NumValues()]
-		_, err := reader.ReadValues(w.scratch.values)
-		// We're reading all values in the page so we always expect an io.EOF.
-		if err != nil && err != io.EOF {
-			return fmt.Errorf("read values: %w", err)
-		}
-		w.Write(w.scratch.values)
-		return nil
+	if !w.canWritePageDirectly(p) {
+		return ErrCannotWritePageDirectly
 	}
-
 	// No nulls in page.
 	values := p.Data()
 	w.b.AppendData(values.Int64())
@@ -121,22 +113,6 @@ func (w *uint64ValueWriter) Write(values []parquet.Value) {
 	}
 }
 
-// TODO: implement fast path of writing the whole page directly.
-func (w *uint64ValueWriter) WritePage(p parquet.Page) error {
-	reader := p.Values()
-
-	values := make([]parquet.Value, p.NumValues())
-	_, err := reader.ReadValues(values)
-	// We're reading all values in the page so we always expect an io.EOF.
-	if err != nil && err != io.EOF {
-		return fmt.Errorf("read values: %w", err)
-	}
-
-	w.Write(values)
-
-	return nil
-}
-
 type repeatedValueWriter struct {
 	b      *builder.ListBuilder
 	values ValueWriter
@@ -154,13 +130,6 @@ func NewListValueWriter(newValueWriter func(b builder.ColumnBuilder, numValues i
 }
 
 func (w *repeatedValueWriter) Write(values []parquet.Value) {
-	v0 := values[0]
-	rep := v0.RepetitionLevel()
-	def := v0.DefinitionLevel()
-	if rep == 0 && def == 0 {
-		w.b.AppendNull()
-	}
-
 	listStart := false
 	start := 0
 	for i, v := range values {
@@ -169,30 +138,23 @@ func (w *repeatedValueWriter) Write(values []parquet.Value) {
 				w.b.Append(true)
 				w.values.Write(values[start:i])
 			}
-			listStart = true
-			start = i
+
+			if v.DefinitionLevel() == 0 {
+				w.b.AppendNull()
+				listStart = false
+				start = i + 1
+			} else {
+				listStart = true
+				start = i
+			}
 		}
 	}
 
 	// write final list
-	w.b.Append(true)
-	w.values.Write(values[start:])
-}
-
-// TODO: implement fast path of writing the whole page directly.
-func (w *repeatedValueWriter) WritePage(p parquet.Page) error {
-	reader := p.Values()
-
-	values := make([]parquet.Value, p.NumValues())
-	_, err := reader.ReadValues(values)
-	// We're reading all values in the page so we always expect an io.EOF.
-	if err != nil && err != io.EOF {
-		return fmt.Errorf("read values: %w", err)
+	if len(values[start:]) > 0 {
+		w.b.Append(true)
+		w.values.Write(values[start:])
 	}
-
-	w.Write(values)
-
-	return nil
 }
 
 type float64ValueWriter struct {
@@ -219,46 +181,32 @@ func (w *float64ValueWriter) Write(values []parquet.Value) {
 }
 
 func (w *float64ValueWriter) WritePage(p parquet.Page) error {
-	reader := p.Values()
+	ireader, ok := p.Values().(parquet.DoubleReader)
+	if !ok {
+		return ErrCannotWritePageDirectly
+	}
 
-	ireader, ok := reader.(parquet.DoubleReader)
-	if ok {
-		// fast path
-		if w.buf == nil {
-			w.buf = make([]float64, p.NumValues())
+	if w.buf == nil {
+		w.buf = make([]float64, p.NumValues())
+	}
+	values := w.buf
+	for {
+		n, err := ireader.ReadDoubles(values)
+		if err != nil && err != io.EOF {
+			return fmt.Errorf("read values: %w", err)
 		}
-		values := w.buf
-		for {
-			n, err := ireader.ReadDoubles(values)
-			if err != nil && err != io.EOF {
-				return fmt.Errorf("read values: %w", err)
-			}
 
-			w.b.AppendValues(values[:n], nil)
-			if err == io.EOF {
-				break
-			}
+		w.b.AppendValues(values[:n], nil)
+		if err == io.EOF {
+			break
 		}
-		return nil
 	}
-
-	values := make([]parquet.Value, p.NumValues())
-	_, err := reader.ReadValues(values)
-	// We're reading all values in the page so we always expect an io.EOF.
-	if err != nil && err != io.EOF {
-		return fmt.Errorf("read values: %w", err)
-	}
-
-	w.Write(values)
-
 	return nil
 }
 
 type booleanValueWriter struct {
-	b       *builder.OptBooleanBuilder
-	scratch struct {
-		values []parquet.Value
-	}
+	writerBase
+	b *builder.OptBooleanBuilder
 }
 
 func NewBooleanValueWriter(b builder.ColumnBuilder, numValues int) ValueWriter {
@@ -274,22 +222,9 @@ func (w *booleanValueWriter) Write(values []parquet.Value) {
 }
 
 func (w *booleanValueWriter) WritePage(p parquet.Page) error {
-	if p.NumNulls() != 0 {
-		reader := p.Values()
-		if cap(w.scratch.values) < int(p.NumValues()) {
-			w.scratch.values = make([]parquet.Value, p.NumValues())
-		}
-		w.scratch.values = w.scratch.values[:p.NumValues()]
-		_, err := reader.ReadValues(w.scratch.values)
-		// We're reading all values in the page so we always expect an io.EOF.
-		if err != nil && err != io.EOF {
-			return fmt.Errorf("read values: %w", err)
-		}
-		w.Write(w.scratch.values)
-		return nil
+	if !w.canWritePageDirectly(p) {
+		return ErrCannotWritePageDirectly
 	}
-
-	// No nulls in page.
 	values := p.Data()
 	w.b.Append(values.Boolean(), int(p.NumValues()))
 	return nil
@@ -310,19 +245,6 @@ func NewStructWriterFromOffset(offset int) NewWriterFunc {
 	}
 }
 
-func (s *structWriter) WritePage(p parquet.Page) error {
-	// TODO: there's probably a more optimized way to handle a page of values here; but doing this for simplicity of implementation right meow.
-	values := make([]parquet.Value, p.NumValues())
-	_, err := p.Values().ReadValues(values)
-	// We're reading all values in the page so we always expect an io.EOF.
-	if err != nil && err != io.EOF {
-		return fmt.Errorf("read values: %w", err)
-	}
-
-	s.Write(values)
-	return nil
-}
-
 func (s *structWriter) Write(values []parquet.Value) {
 	total := 0
 	for _, v := range values {
@@ -452,11 +374,7 @@ func NewMapWriter(b builder.ColumnBuilder, _ int) ValueWriter {
 	}
 }
 
-func (m *mapWriter) WritePage(p parquet.Page) error {
-	panic("not implemented")
-}
-
-func (m *mapWriter) Write(values []parquet.Value) {
+func (m *mapWriter) Write(_ []parquet.Value) {
 	panic("not implemented")
 }
 
@@ -485,15 +403,3 @@ func (w *dictionaryValueWriter) Write(values []parquet.Value) {
 		}
 	}
 }
-
-func (w *dictionaryValueWriter) WritePage(p parquet.Page) error {
-	values := make([]parquet.Value, p.NumValues())
-	_, err := p.Values().ReadValues(values)
-	// We're reading all values in the page so we always expect an io.EOF.
-	if err != nil && err != io.EOF {
-		return fmt.Errorf("read values: %w", err)
-	}
-
-	w.Write(values)
-	return nil
-}
diff --git a/proto/frostdb/schema/v1alpha1/schema.proto b/proto/frostdb/schema/v1alpha1/schema.proto
index 1b093d865..a0fddbecb 100644
--- a/proto/frostdb/schema/v1alpha1/schema.proto
+++ b/proto/frostdb/schema/v1alpha1/schema.proto
@@ -4,101 +4,114 @@ package frostdb.schema.v1alpha1;
 
 // Schema definition for a table.
 message Schema {
-    // Name of the schema.
-    string name = 1;
-    // Columns in the schema.
-    repeated Column columns = 2;
-    // Columns to sort by in the schema.
-    repeated SortingColumn sorting_columns = 3;
+  // Name of the schema.
+  string name = 1;
+  // Columns in the schema.
+  repeated Column columns = 2;
+  // Columns to sort by in the schema.
+  repeated SortingColumn sorting_columns = 3;
+  // UniquePrimaryIndex defines whether the primary index is unique. Duplicate
+  // (according to the sorting column) rows will be dropped on compaction.
+  bool unique_primary_index = 4;
 }
 
 // Column definition.
 message Column {
-    // Name of the column.
-    string name = 1;
-    // Storage layout of the column.
-    StorageLayout storage_layout = 2;
-    // Whether the column can dynamically expand.
-    bool dynamic = 3;
+  // Name of the column.
+  string name = 1;
+  // Storage layout of the column.
+  StorageLayout storage_layout = 2;
+  // Whether the column can dynamically expand.
+  bool dynamic = 3;
+  // Prehash the column before storing it. This is an optimization to speed up aggregation queries when this column is often aggregated.
+  // This will create a separate non-dynamic column with the same name and the prefix "hashed." that contains the prehashed values.
+  bool prehash = 4;
 }
 
 // Storage layout describes the physical storage properties of a column.
 message StorageLayout {
-    // Type enum of a column.
-    enum Type {
-        // Unknown type.
-        TYPE_UNKNOWN_UNSPECIFIED = 0;
-        // Represents a string type.
-        TYPE_STRING = 1;
-        // Represents an int64 type.
-        TYPE_INT64 = 2;
-        // Represents a double type.
-        TYPE_DOUBLE = 3;
-        // Represents a boolean type.
-        TYPE_BOOL = 4;
-    }
+  // Type enum of a column.
+  enum Type {
+    // Unknown type.
+    TYPE_UNKNOWN_UNSPECIFIED = 0;
+    // Represents a string type.
+    TYPE_STRING = 1;
+    // Represents an int64 type.
+    TYPE_INT64 = 2;
+    // Represents a double type.
+    TYPE_DOUBLE = 3;
+    // Represents a boolean type.
+    TYPE_BOOL = 4;
+    // Represents a int32 type.
+    TYPE_INT32 = 5;
+    // Represents a uint64 type.
+    TYPE_UINT64 = 6;
+  }
 
-    // Type of the column.
-    Type type = 1;
+  // Type of the column.
+  Type type = 1;
 
-    // Encoding enum of a column.
-    enum Encoding {
-        // Plain encoding.
-        ENCODING_PLAIN_UNSPECIFIED = 0;
-        // Dictionary run-length encoding.
-        ENCODING_RLE_DICTIONARY = 1;
-        // Delta binary packed encoding.
-        ENCODING_DELTA_BINARY_PACKED = 2;
-        // Delta Byte Array encoding.
-        ENCODING_DELTA_BYTE_ARRAY = 3;
-        // Delta Length Byte Array encoding.
-        ENCODING_DELTA_LENGTH_BYTE_ARRAY = 4;
-    }
+  // Encoding enum of a column.
+  enum Encoding {
+    // Plain encoding.
+    ENCODING_PLAIN_UNSPECIFIED = 0;
+    // Dictionary run-length encoding.
+    ENCODING_RLE_DICTIONARY = 1;
+    // Delta binary packed encoding.
+    ENCODING_DELTA_BINARY_PACKED = 2;
+    // Delta Byte Array encoding.
+    ENCODING_DELTA_BYTE_ARRAY = 3;
+    // Delta Length Byte Array encoding.
+    ENCODING_DELTA_LENGTH_BYTE_ARRAY = 4;
+  }
 
-    // Encoding of the column.
-    Encoding encoding = 2;
+  // Encoding of the column.
+  Encoding encoding = 2;
 
-    // Compression enum of a column.
-    enum Compression {
-        // No compression.
-        COMPRESSION_NONE_UNSPECIFIED = 0;
-        // Snappy compression.
-        COMPRESSION_SNAPPY = 1;
-        // GZIP compression.
-        COMPRESSION_GZIP = 2;
-        // Brotli compression.
-        COMPRESSION_BROTLI = 3;
-        // LZ4_RAW compression.
-        COMPRESSION_LZ4_RAW = 4;
-        // ZSTD compression.
-        COMPRESSION_ZSTD = 5;
-    }
+  // Compression enum of a column.
+  enum Compression {
+    // No compression.
+    COMPRESSION_NONE_UNSPECIFIED = 0;
+    // Snappy compression.
+    COMPRESSION_SNAPPY = 1;
+    // GZIP compression.
+    COMPRESSION_GZIP = 2;
+    // Brotli compression.
+    COMPRESSION_BROTLI = 3;
+    // LZ4_RAW compression.
+    COMPRESSION_LZ4_RAW = 4;
+    // ZSTD compression.
+    COMPRESSION_ZSTD = 5;
+  }
 
-    // Compression of the column.
-    Compression compression = 3;
+  // Compression of the column.
+  Compression compression = 3;
 
-    // Wether values in the column are allowed to be null.
-    bool nullable = 4;
+  // Wether values in the column are allowed to be null.
+  bool nullable = 4;
+
+  // Whether the column is repeated.
+  bool repeated = 5;
 }
 
 // SortingColumn definition.
 message SortingColumn {
-    // Name of the column to sort by.
-    string name = 1;
+  // Name of the column to sort by.
+  string name = 1;
 
-    // Enum of possible sorting directions.
-    enum Direction {
-        // Unknown direction.
-        DIRECTION_UNKNOWN_UNSPECIFIED = 0;
-        // Sort in ascending order.
-        DIRECTION_ASCENDING = 1;
-        // Sort in descending order.
-        DIRECTION_DESCENDING = 2;
-    }
+  // Enum of possible sorting directions.
+  enum Direction {
+    // Unknown direction.
+    DIRECTION_UNKNOWN_UNSPECIFIED = 0;
+    // Sort in ascending order.
+    DIRECTION_ASCENDING = 1;
+    // Sort in descending order.
+    DIRECTION_DESCENDING = 2;
+  }
 
-    // Direction of the sorting.
-    Direction direction = 2;
+  // Direction of the sorting.
+  Direction direction = 2;
 
-    // Whether nulls are the smallest or largest values.
-    bool nulls_first = 3;
+  // Whether nulls are the smallest or largest values.
+  bool nulls_first = 3;
 }
diff --git a/proto/frostdb/schema/v1alpha2/schema.proto b/proto/frostdb/schema/v1alpha2/schema.proto
index 04322c134..17788b04b 100644
--- a/proto/frostdb/schema/v1alpha2/schema.proto
+++ b/proto/frostdb/schema/v1alpha2/schema.proto
@@ -3,175 +3,183 @@ syntax = "proto3";
 package frostdb.schema.v1alpha2;
 
 /* This schema version differs from the previous one in that it supports nested schema definitions.
-  For example the following struct
-    labels: {
-        label1: value1
-        label2: value2
-    }
-    timestamps: [1,2,3]
-    values: [2,2,2]
-
-    Could be represented by this schema as:
-
-    {
-        "root": {
-            "name": "example_schema",
-            "nodes": [
-                {
-                    "group": {
-                        "name": "labels",
-                        "nodes": [
-                            {
-                                "leaf": {
-                                    "name": "label1",
-                                    "storage_layout": { ... }
-                                }
-                            },
-                            {
-                                "leaf": {
-                                    "name": "label2",
-                                    "storage_layout": { ... }
-                                }
-                            }
-                        ]
-                    }
-                },
-                { 
-                    "leaf": {
-                        "name": "timestamps",
-                        "storage_layout": { ... }
-                    }
-                },
-                {
-                    "leaf": {
-                        "name": "values",
-                        "storage_layout": { ... }
-                    }
-                }
-            ]
-        }
-    }
-        
+   For example the following struct
+     labels: {
+         label1: value1
+         label2: value2
+     }
+     timestamps: [1,2,3]
+     values: [2,2,2]
+
+     Could be represented by this schema as:
+
+     {
+         "root": {
+             "name": "example_schema",
+             "nodes": [
+                 {
+                     "group": {
+                         "name": "labels",
+                         "nodes": [
+                             {
+                                 "leaf": {
+                                     "name": "label1",
+                                     "storage_layout": { ... }
+                                 }
+                             },
+                             {
+                                 "leaf": {
+                                     "name": "label2",
+                                     "storage_layout": { ... }
+                                 }
+                             }
+                         ]
+                     }
+                 },
+                 {
+                     "leaf": {
+                         "name": "timestamps",
+                         "storage_layout": { ... }
+                     }
+                 },
+                 {
+                     "leaf": {
+                         "name": "values",
+                         "storage_layout": { ... }
+                     }
+                 }
+             ]
+         }
+     }
+
 */
 
 // Schema definition for a table.
 message Schema {
-    // Root node of the schema.
-    Group root = 2;
-    // Columns to sort by in the schema.
-    repeated SortingColumn sorting_columns = 3;
+  // Root node of the schema.
+  Group root = 2;
+  // Columns to sort by in the schema.
+  repeated SortingColumn sorting_columns = 3;
+  // UniquePrimaryIndex defines whether the primary index is unique. Duplicate
+  // (according to the sorting column) rows will be dropped on compaction.
+  bool unique_primary_index = 4;
 }
 
 // Node is a node in a schema tree.
 message Node {
-    oneof type {
-        // Leaf node of the schema.
-        Leaf leaf = 1;
-        // Group of other nodes for nested schemas.
-        Group group = 2;
-    }
+  // type can either be a leaf or a group.
+  oneof type {
+    // Leaf node of the schema.
+    Leaf leaf = 1;
+    // Group of other nodes for nested schemas.
+    Group group = 2;
+  }
 }
 
 // Leaf definition.
 message Leaf {
-    // Name of the column.
-    string name = 1;
-    // Storage layout of the column.
-    StorageLayout storage_layout = 2;
+  // Name of the column.
+  string name = 1;
+  // Storage layout of the column.
+  StorageLayout storage_layout = 2;
 }
 
 // Group is a grouping of nodes.
 message Group {
-    // Name of the group.
-    string name = 1;
-    // Wether the group is allowed to be null.
-    bool nullable = 2;
-    // Indicates whether the group is repeated.
-    bool repeated = 3;
-    // Nodes that this group is composed of.
-    repeated Node nodes = 4;
+  // Name of the group.
+  string name = 1;
+  // Wether the group is allowed to be null.
+  bool nullable = 2;
+  // Indicates whether the group is repeated.
+  bool repeated = 3;
+  // Nodes that this group is composed of.
+  repeated Node nodes = 4;
 }
 
 // Storage layout describes the physical storage properties of a column.
 message StorageLayout {
-    // Type enum of a column.
-    enum Type {
-        // Unknown type.
-        TYPE_UNKNOWN_UNSPECIFIED = 0;
-        // Represents a string type.
-        TYPE_STRING = 1;
-        // Represents an int64 type.
-        TYPE_INT64 = 2;
-        // Represents a double type.
-        TYPE_DOUBLE = 3;
-        // Represents a boolean type.
-        TYPE_BOOL = 4;
-    }
-
-    // Type of the column.
-    Type type = 1;
-
-    // Encoding enum of a column.
-    enum Encoding {
-        // Plain encoding.
-        ENCODING_PLAIN_UNSPECIFIED = 0;
-        // Dictionary run-length encoding.
-        ENCODING_RLE_DICTIONARY = 1;
-        // Delta binary packed encoding.
-        ENCODING_DELTA_BINARY_PACKED = 2;
-        // Delta Byte Array encoding.
-        ENCODING_DELTA_BYTE_ARRAY = 3;
-        // Delta Length Byte Array encoding.
-        ENCODING_DELTA_LENGTH_BYTE_ARRAY = 4;
-    }
-
-    // Encoding of the column.
-    Encoding encoding = 2;
-
-    // Compression enum of a column.
-    enum Compression {
-        // No compression.
-        COMPRESSION_NONE_UNSPECIFIED = 0;
-        // Snappy compression.
-        COMPRESSION_SNAPPY = 1;
-        // GZIP compression.
-        COMPRESSION_GZIP = 2;
-        // Brotli compression.
-        COMPRESSION_BROTLI = 3;
-        // LZ4_RAW compression.
-        COMPRESSION_LZ4_RAW = 4;
-        // ZSTD compression.
-        COMPRESSION_ZSTD = 5;
-    }
-
-    // Compression of the column.
-    Compression compression = 3;
-
-    // Wether values in the column are allowed to be null.
-    bool nullable = 4;
-
-    // Indicates whether the parquet column is repeated.
-    bool repeated = 5;
+  // Type enum of a column.
+  enum Type {
+    // Unknown type.
+    TYPE_UNKNOWN_UNSPECIFIED = 0;
+    // Represents a string type.
+    TYPE_STRING = 1;
+    // Represents an int64 type.
+    TYPE_INT64 = 2;
+    // Represents a double type.
+    TYPE_DOUBLE = 3;
+    // Represents a boolean type.
+    TYPE_BOOL = 4;
+    // Represents a int32 type.
+    TYPE_INT32 = 5;
+    // Represents a uint64 type.
+    TYPE_UINT64 = 6;
+  }
+
+  // Type of the column.
+  Type type = 1;
+
+  // Encoding enum of a column.
+  enum Encoding {
+    // Plain encoding.
+    ENCODING_PLAIN_UNSPECIFIED = 0;
+    // Dictionary run-length encoding.
+    ENCODING_RLE_DICTIONARY = 1;
+    // Delta binary packed encoding.
+    ENCODING_DELTA_BINARY_PACKED = 2;
+    // Delta Byte Array encoding.
+    ENCODING_DELTA_BYTE_ARRAY = 3;
+    // Delta Length Byte Array encoding.
+    ENCODING_DELTA_LENGTH_BYTE_ARRAY = 4;
+  }
+
+  // Encoding of the column.
+  Encoding encoding = 2;
+
+  // Compression enum of a column.
+  enum Compression {
+    // No compression.
+    COMPRESSION_NONE_UNSPECIFIED = 0;
+    // Snappy compression.
+    COMPRESSION_SNAPPY = 1;
+    // GZIP compression.
+    COMPRESSION_GZIP = 2;
+    // Brotli compression.
+    COMPRESSION_BROTLI = 3;
+    // LZ4_RAW compression.
+    COMPRESSION_LZ4_RAW = 4;
+    // ZSTD compression.
+    COMPRESSION_ZSTD = 5;
+  }
+
+  // Compression of the column.
+  Compression compression = 3;
+
+  // Wether values in the column are allowed to be null.
+  bool nullable = 4;
+
+  // Indicates whether the parquet column is repeated.
+  bool repeated = 5;
 }
 
 // SortingColumn definition.
 message SortingColumn {
-    // Path to the leaf column to sort by.
-    string path = 1;
-
-    // Enum of possible sorting directions.
-    enum Direction {
-        // Unknown direction.
-        DIRECTION_UNKNOWN_UNSPECIFIED = 0;
-        // Sort in ascending order.
-        DIRECTION_ASCENDING = 1;
-        // Sort in descending order.
-        DIRECTION_DESCENDING = 2;
-    }
-
-    // Direction of the sorting.
-    Direction direction = 2;
-
-    // Whether nulls are the smallest or largest values.
-    bool nulls_first = 3;
+  // Path to the leaf column to sort by.
+  string path = 1;
+
+  // Enum of possible sorting directions.
+  enum Direction {
+    // Unknown direction.
+    DIRECTION_UNKNOWN_UNSPECIFIED = 0;
+    // Sort in ascending order.
+    DIRECTION_ASCENDING = 1;
+    // Sort in descending order.
+    DIRECTION_DESCENDING = 2;
+  }
+
+  // Direction of the sorting.
+  Direction direction = 2;
+
+  // Whether nulls are the smallest or largest values.
+  bool nulls_first = 3;
 }
diff --git a/proto/frostdb/snapshot/v1alpha1/snapshot.proto b/proto/frostdb/snapshot/v1alpha1/snapshot.proto
new file mode 100644
index 000000000..76cf90754
--- /dev/null
+++ b/proto/frostdb/snapshot/v1alpha1/snapshot.proto
@@ -0,0 +1,65 @@
+syntax = "proto3";
+
+package frostdb.snapshot.v1alpha1;
+
+import "frostdb/table/v1alpha1/config.proto";
+
+// FooterData is a message stored in the footer of a snapshot file that encodes
+// data about the rest of the file.
+message FooterData {
+  reserved 2; // Previously used for transaction metadata.
+  // table_metadata is a list of all tables in the snapshot.
+  repeated Table table_metadata = 1;
+}
+
+// Table encodes metadata about a table in a snapshot.
+message Table {
+  // name of the table.
+  string name = 1;
+  // config is the configuration of the table.
+  frostdb.table.v1alpha1.TableConfig config = 2;
+  // TableBlock
+  message TableBlock {
+    // ulid is the unique identifier of the block.
+    bytes ulid = 1;
+    // size is the size of the block in bytes.
+    int64 size = 2;
+    // min_tx is the minimum transaction id in the block.
+    uint64 min_tx = 3;
+    // prev_tx is the transaction id of the previous block.
+    uint64 prev_tx = 4;
+  }
+  // active_block is the active block of the table.
+  TableBlock active_block = 3;
+  // granule_metadata is a list of all granules in the table.
+  repeated Granule granule_metadata = 4;
+}
+
+// Granule encodes metadata about a granule in a table.
+message Granule {
+  // part_metadata is a list of all parts in the granule.
+  repeated Part part_metadata = 1;
+}
+
+// Part encodes metadata about a part in a granule.
+message Part {
+  // start_offset is the start offset of the part.
+  int64 start_offset = 1;
+  // end_offset is the end offset of the part.
+  int64 end_offset = 2;
+  // tx is the transaction id of the part.
+  uint64 tx = 3;
+  // compaction_level is the compaction level of the part.
+  uint64 compaction_level = 4;
+  // Encoding is a enum of possible encodings of the part.
+  enum Encoding {
+    // ENCODING_UNSPECIFIED is an unknown encoding.
+    ENCODING_UNSPECIFIED = 0;
+    // ENCODING_PARQUET is a parquet encoding.
+    ENCODING_PARQUET = 1;
+    // ENCODING_ARROW is an arrow encoding.
+    ENCODING_ARROW = 2;
+  }
+  // encoding is the actual encoding of the part.
+  Encoding encoding = 5;
+}
diff --git a/proto/frostdb/storage/v1alpha1/storage.proto b/proto/frostdb/storage/v1alpha1/storage.proto
new file mode 100644
index 000000000..30dcabc52
--- /dev/null
+++ b/proto/frostdb/storage/v1alpha1/storage.proto
@@ -0,0 +1,307 @@
+syntax = "proto3";
+
+package frostdb.storage.v1alpha1;
+
+// FrostDBService is a service that allows clients to execute queries as logicalplan
+// against an instance of FrostDB.
+service FrostDBService {
+  // Query executes the query plan contained in the request and returns back
+  // the results as arrow records in IPC format.
+  rpc Query(QueryRequest) returns (stream QueryResponse) {}
+}
+
+// QueryRequest is the message sent to the Query gRPC endpoint.
+message QueryRequest {
+  // PlanRoot is the root of the query plan (i.e. a scan node).
+  PlanNode plan_root = 1;
+  // description describes the query.
+  string description = 2;
+}
+
+// QueryResponse is the message received from the Query gRPC endpoint.
+message QueryResponse {
+  // The resulting arrow record as bytes.
+  bytes record = 1;
+}
+
+// PlanNode describes a query plan.
+message PlanNode {
+  // Next is the next PlanNode in the plan, i.e. the node that this node should
+  // push data to.
+  PlanNode next = 1;
+  // Spec is this PlanNode's spec.
+  PlanNodeSpec spec = 2;
+}
+
+// PlanNodeSpec is a PlanNode's information. This is a message that wraps oneof
+// to ensure forward compatibility:
+// https://developers.google.com/protocol-buffers/docs/proto3#backwards-compatibility_issues
+message PlanNodeSpec {
+  // Spec is this PlanNode's information.
+  oneof spec {
+    // TableScan is specified if this PlanNode represents a table scan.
+    TableScan table_scan = 1;
+    // SchemaScan is specified if this PlanNode represents a table scan.
+    SchemaScan schema_scan = 2;
+    // Filter is specified if this PlanNode represents a filter.
+    Filter filter = 3;
+    // Projections is specified if this PlanNode represents a projection.
+    Projection projection = 4;
+    // Distinct is specified if this PlanNode represents a distinct.
+    Distinct distinct = 5;
+    // Aggregation is specified if this PlanNode represents an aggregation.
+    Aggregation aggregation = 6;
+    // Limit is specified if this PlanNode represents a limit.
+    Limit limit = 7;
+  }
+}
+
+// TableScan describes scanning a table to obtain rows.
+message TableScan {
+  // Base specifies the fields shared with SchemaScan.
+  ScanBase base = 1;
+}
+
+// SchemaScan describes scanning a table to obtain the schema.
+message SchemaScan {
+  // Base specifies the fields shared with TableScan.
+  ScanBase base = 1;
+}
+
+// ScanBase is a collection of fields shared by scans.
+message ScanBase {
+  // Database is the name of the database to scan.
+  string database = 1;
+  // Table is the name of the table to scan.
+  string table = 2;
+}
+
+// Filter describes a filter.
+message Filter {
+  // Expr is the filter expression.
+  Expr expr = 1;
+}
+
+// Distinct describes a distinct node.
+message Distinct {
+  // Exprs are the expressions to distinct by.
+  repeated Expr exprs = 1;
+}
+
+// Projection describes a projection node.
+message Projection {
+  // Exprs are the expressions to project.
+  repeated Expr exprs = 1;
+}
+
+// Limit describes a limit node.
+message Limit {
+  // Expr is the expression to limit by.
+  Expr expr = 1;
+}
+
+// Aggregation describes an aggregation node.
+message Aggregation {
+  // GroupExprs are the expressions to group by.
+  repeated Expr group_exprs = 1;
+  // AggExprs are the aggregation functions applied to values of each group.
+  repeated Expr agg_exprs = 2;
+}
+
+// Expr is the base type for all expressions.
+message Expr {
+  // def is the definition of the expression.
+  ExprDef def = 1;
+}
+
+// ExprDefinition is a definition of an expression. It is intentionally only a
+// single field that is a oneof to maintain forward compatibility when changing
+// fields in each of the oneofs. This message should never have any other
+// fields for forward compatibility.
+message ExprDef {
+  // content is the content of the expression definition.
+  oneof content {
+    // BinaryExpr is a binary expression.
+    BinaryExpr binary_expr = 1;
+    // Column is a column expression.
+    Column column = 2;
+    // Literal is a literal expression.
+    Literal literal = 3;
+    // DynamicColumn is a dynamic column expression.
+    DynamicColumn dynamic_column = 4;
+    // AggregationFunction is an aggregation function expression.
+    AggregationFunction aggregation_function = 5;
+    // Alias is an alias expression.
+    Alias alias = 6;
+    // DurationExpr is a duration expression to group by.
+    DurationExpr duration = 7;
+    // ConvertExpr is an expression to convert a column from one type to another.
+    ConvertExpr convert = 8;
+    // IfExpr is an if expression.
+    IfExpr if = 9;
+  }
+}
+
+// Op describes a binary operator.
+enum Op {
+  // OP_UNKNOWN_UNSPECIFIED is the default value. It should not be used.
+  OP_UNKNOWN_UNSPECIFIED = 0;
+  // OP_EQ is the equality operator (`==`).
+  OP_EQ = 1;
+  // OP_NOT_EQ is the not-equality operator (`!=`).
+  OP_NOT_EQ = 2;
+  // OP_LT is the less-than operator (`<`).
+  OP_LT = 3;
+  // OP_LT_EQ is the less-than-or-equal operator (`<=`).
+  OP_LT_EQ = 4;
+  // OP_GT is the greater-than operator (`>`).
+  OP_GT = 5;
+  // OP_GT_EQ is the greater-than-or-equal operator (`>=`).
+  OP_GT_EQ = 6;
+  // OP_REGEX_MATCH is the regular expression match operator (`=~`).
+  OP_REGEX_MATCH = 7;
+  // OP_REGEX_NOT_MATCH is the regular expression not-match operator (`!~`).
+  OP_REGEX_NOT_MATCH = 8;
+  // OP_AND is the logical and operator (`&&`).
+  OP_AND = 9;
+  // OP_OR is the logical or operator (`||`).
+  OP_OR = 10;
+  // OP_ADD is the arithmetric addition (`+`).
+  OP_ADD = 11;
+  // OP_SUB is the arithmetric subtraction (`-`).
+  OP_SUB = 12;
+  // OP_MUL is the arithmetric multiplication (`*`).
+  OP_MUL = 13;
+  // OP_DIV is the arithmetric division (`/`).
+  OP_DIV = 14;
+  // OP_CONTAINS performs substring matches.
+  OP_CONTAINS = 15;
+  // OP_NOT_CONTAINS performs substring matches.
+  OP_NOT_CONTAINS = 16;
+}
+
+// BinaryExpression is a binary expression.
+message BinaryExpr {
+  // left is the left operand.
+  Expr left = 1;
+  // right is the right operand.
+  Expr right = 2;
+  // op is the operator.
+  Op op = 3;
+}
+
+// IfExpr represents an if expression.
+message IfExpr {
+  // the condition
+  Expr condition = 1;
+  // the true branch
+  Expr then = 2;
+  // the false branch
+  Expr else = 3;
+}
+
+// ConvertExpr is an expression to convert an expression to another type.
+message ConvertExpr {
+  // the expression to convert
+  Expr expr = 1;
+  // the type to convert to
+  Type type = 2;
+}
+
+// Apache Arrow type.
+enum Type {
+  // The default value, this must not occur.
+  TYPE_UNKNOWN_UNSPECIFIED = 0;
+  // Float64
+  TYPE_FLOAT64 = 1;
+}
+
+// Column is an explicit column in a table.
+message Column {
+  // name is the name of the column.
+  string name = 1;
+}
+
+// Literal is a literal value.
+message Literal {
+  // content is the content of the literal.
+  LiteralContent content = 1;
+}
+
+// LiteralContent is the content of a literal.
+message LiteralContent {
+  // value is the value of the literal.
+  oneof value {
+    // null whether the value is the null value.
+    Null null_value = 1;
+    // bool_value is the bool value.
+    bool bool_value = 2;
+    // int32_value is the int32 value.
+    int32 int32_value = 3;
+    // uint32_value is the uint32 value.
+    uint32 uint32_value = 4;
+    // int64_value is the int64 value.
+    int64 int64_value = 5;
+    // uint64_value is the uint64 value.
+    uint64 uint64_value = 6;
+    // float_value is the float value.
+    float float_value = 7;
+    // double_value is the double value.
+    double double_value = 8;
+    // binary_value is the binary value.
+    bytes binary_value = 9;
+    // string_value is the string value.
+    string string_value = 10;
+  }
+}
+
+// Null is the null value.
+message Null {}
+
+// Alias is an alias for an expression.
+message Alias {
+  // name is the name of the alias.
+  string name = 1;
+  // expr is the expression to alias.
+  Expr expr = 2;
+}
+
+// DynamicColumn is a dynamic column.
+message DynamicColumn {
+  // name is the name of the dynamic column.
+  string name = 1;
+}
+
+// AggregationFunction is an aggregation function.
+message AggregationFunction {
+  // Type is the type of aggregation function.
+  enum Type {
+    // UNKNOWN_UNSPECIFIED is the default value. It should not be used.
+    TYPE_UNKNOWN_UNSPECIFIED = 0;
+    // SUM is the sum aggregation function.
+    TYPE_SUM = 1;
+    // MIN is the min aggregation function.
+    TYPE_MIN = 2;
+    // MAX is the max aggregation function.
+    TYPE_MAX = 3;
+    // COUNT is the count aggregation function.
+    TYPE_COUNT = 4;
+    // AVG is the avg aggregation function.
+    TYPE_AVG = 5;
+    // UNIQUE is the unique aggregation function.
+    TYPE_UNIQUE = 6;
+    // AND is the and aggregation function.
+    TYPE_AND = 7;
+  }
+
+  // type is the type of aggregation function.
+  Type type = 1;
+  // expr is the expression to aggregate.
+  Expr expr = 2;
+}
+
+// DurationExpr is a duration expressed in milliseconds.
+message DurationExpr {
+  // milliseconds is the duration in milliseconds.
+  int64 milliseconds = 1;
+}
diff --git a/proto/frostdb/table/v1alpha1/config.proto b/proto/frostdb/table/v1alpha1/config.proto
new file mode 100644
index 000000000..fefb7d47b
--- /dev/null
+++ b/proto/frostdb/table/v1alpha1/config.proto
@@ -0,0 +1,23 @@
+syntax = "proto3";
+
+package frostdb.table.v1alpha1;
+
+import "frostdb/schema/v1alpha1/schema.proto";
+import "frostdb/schema/v1alpha2/schema.proto";
+
+// TableConfig is the configuration information for a table.
+message TableConfig {
+  // Schema of the table.
+  oneof schema {
+    // Deprecated schema definition. Used for backwards compatibility.
+    frostdb.schema.v1alpha1.Schema deprecated_schema = 1;
+    // Schema of the table. Use this field.
+    frostdb.schema.v1alpha2.Schema schema_v2 = 2;
+  }
+  // RowGroupSize is the size in rows of row groups that are written to Parquet files.
+  uint64 row_group_size = 3;
+  // BlockReaderLimit is the concurrency limit of the number of Go routines spawned when reading storage blocks.
+  uint64 block_reader_limit = 4;
+  // DisableWal disables the write ahead log for this table.
+  bool disable_wal = 5;
+}
diff --git a/proto/frostdb/wal/v1alpha1/wal.proto b/proto/frostdb/wal/v1alpha1/wal.proto
index d73d9d61a..094122fd9 100644
--- a/proto/frostdb/wal/v1alpha1/wal.proto
+++ b/proto/frostdb/wal/v1alpha1/wal.proto
@@ -2,11 +2,11 @@ syntax = "proto3";
 
 package frostdb.wal.v1alpha1;
 
-import "frostdb/schema/v1alpha1/schema.proto";
-import "frostdb/schema/v1alpha2/schema.proto";
+import "frostdb/table/v1alpha1/config.proto";
 
 // Record describes a single entry into the WAL.
 message Record {
+  reserved 2; // Previously used for transaction metadata.
   // Data of the record. This is intentionally nested so the only thing in
   // the entry can be a protobuf `oneof` and have forward compatilibity.
   Entry entry = 1;
@@ -27,18 +27,13 @@ message Entry {
 
   // The new-table-block entry.
   message NewTableBlock {
+    reserved 3, 4; // Previoulsy for schema versions that are now contained in the table config.
     // Table name of the new-table-block.
     string table_name = 1;
     // Block ID of the new-table-block.
     bytes block_id = 2;
-    // Schema of the new-table-block.
-    oneof schema {
-      // Deprecated schema of the new-table-block. Only used for reading for
-      // backwards compatibility.
-      frostdb.schema.v1alpha1.Schema deprecated_schema = 3;
-      // Schema of the new-table-block. Use this field.
-      frostdb.schema.v1alpha2.Schema schema_v2 = 4;
-    }
+    // Config of the new-table-block.
+    frostdb.table.v1alpha1.TableConfig config = 5;
   }
 
   // The table-block persisted entry.
@@ -47,6 +42,17 @@ message Entry {
     string table_name = 1;
     // Block ID of the new-table-block.
     bytes block_id = 2;
+    // NextTx is the next non-persisted transaction at the time of block
+    // persistence. If the block has been persisted, any txn id < next_tx is
+    // considered persisted or not relevant to this table (i.e. it can be a
+    // non-persisted txn from another table).
+    uint64 next_tx = 3;
+  }
+
+  // The snapshot entry.
+  message Snapshot {
+    // The tx the snapshot was taken at.
+    uint64 tx = 1;
   }
 
   // The new-table entry.
@@ -57,5 +63,7 @@ message Entry {
     NewTableBlock new_table_block = 2;
     // TableBlockPersisted is set if the entry describes a table-block-persisted.
     TableBlockPersisted table_block_persisted = 3;
+    // Snapshot is set if the entry describes a snapshot.
+    Snapshot snapshot = 4;
   }
 }
diff --git a/query/engine.go b/query/engine.go
index a96a2bea9..6d90dafdd 100644
--- a/query/engine.go
+++ b/query/engine.go
@@ -3,8 +3,10 @@ package query
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"go.opentelemetry.io/otel/trace/noop"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"go.opentelemetry.io/otel/trace"
 
 	"github.com/polarsignals/frostdb/query/logicalplan"
@@ -12,12 +14,14 @@ import (
 )
 
 type Builder interface {
-	Aggregate(aggExpr, groupExprs []logicalplan.Expr) Builder
+	Aggregate(aggExpr []*logicalplan.AggregationFunction, groupExprs []logicalplan.Expr) Builder
 	Filter(expr logicalplan.Expr) Builder
 	Distinct(expr ...logicalplan.Expr) Builder
 	Project(projections ...logicalplan.Expr) Builder
+	Limit(expr logicalplan.Expr) Builder
 	Execute(ctx context.Context, callback func(ctx context.Context, r arrow.Record) error) error
 	Explain(ctx context.Context) (string, error)
+	Sample(size, limitInBytes int64) Builder
 }
 
 type LocalEngine struct {
@@ -48,7 +52,7 @@ func NewEngine(
 ) *LocalEngine {
 	e := &LocalEngine{
 		pool:          pool,
-		tracer:        trace.NewNoopTracerProvider().Tracer(""),
+		tracer:        noop.NewTracerProvider().Tracer(""),
 		tableProvider: tableProvider,
 	}
 
@@ -85,7 +89,7 @@ func (e *LocalEngine) ScanSchema(name string) Builder {
 }
 
 func (b LocalQueryBuilder) Aggregate(
-	aggExpr []logicalplan.Expr,
+	aggExpr []*logicalplan.AggregationFunction,
 	groupExprs []logicalplan.Expr,
 ) Builder {
 	return LocalQueryBuilder{
@@ -129,6 +133,28 @@ func (b LocalQueryBuilder) Project(
 	}
 }
 
+func (b LocalQueryBuilder) Limit(
+	expr logicalplan.Expr,
+) Builder {
+	return LocalQueryBuilder{
+		pool:        b.pool,
+		tracer:      b.tracer,
+		planBuilder: b.planBuilder.Limit(expr),
+		execOpts:    b.execOpts,
+	}
+}
+
+func (b LocalQueryBuilder) Sample(
+	size, limitInBytes int64,
+) Builder {
+	return LocalQueryBuilder{
+		pool:        b.pool,
+		tracer:      b.tracer,
+		planBuilder: b.planBuilder.Sample(logicalplan.Literal(size), logicalplan.Literal(limitInBytes)),
+		execOpts:    b.execOpts,
+	}
+}
+
 func (b LocalQueryBuilder) Execute(ctx context.Context, callback func(ctx context.Context, r arrow.Record) error) error {
 	ctx, span := b.tracer.Start(ctx, "LocalQueryBuilder/Execute")
 	defer span.End()
@@ -155,7 +181,7 @@ func (b LocalQueryBuilder) buildPhysical(ctx context.Context) (*physicalplan.Out
 		return nil, err
 	}
 
-	for _, optimizer := range logicalplan.DefaultOptimizers {
+	for _, optimizer := range logicalplan.DefaultOptimizers() {
 		logicalPlan = optimizer.Optimize(logicalPlan)
 	}
 
diff --git a/query/engine_test.go b/query/engine_test.go
new file mode 100644
index 000000000..a7062b642
--- /dev/null
+++ b/query/engine_test.go
@@ -0,0 +1,239 @@
+package query
+
+import (
+	"context"
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+func TestUniqueAggregation(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	schema, err := dynparquet.SchemaFromDefinition(&schemapb.Schema{
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "example",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+		}, {
+			Name: "timestamp",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+		}},
+	})
+	require.NoError(t, err)
+
+	rb := array.NewRecordBuilder(mem, arrow.NewSchema([]arrow.Field{{
+		Name: "example",
+		Type: arrow.PrimitiveTypes.Int64,
+	}, {
+		Name: "timestamp",
+		Type: arrow.PrimitiveTypes.Int64,
+	}}, nil))
+	defer rb.Release()
+
+	rb.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3}, nil)
+	rb.Field(1).(*array.Int64Builder).AppendValues([]int64{1, 1, 3}, nil)
+
+	r := rb.NewRecord()
+	defer r.Release()
+
+	ran := false
+	err = NewEngine(mem, &FakeTableProvider{
+		Tables: map[string]logicalplan.TableReader{
+			"test": &FakeTableReader{
+				FrostdbSchema: schema,
+				Records:       []arrow.Record{r},
+			},
+		},
+	}).ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{logicalplan.Unique(logicalplan.Col("example"))},
+			[]logicalplan.Expr{logicalplan.Col("timestamp")},
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			require.Equal(t, []int64{1, 3}, r.Column(0).(*array.Int64).Int64Values())
+			require.True(t, r.Column(1).(*array.Int64).IsNull(0))
+			require.True(t, r.Column(1).(*array.Int64).IsValid(1))
+			require.Equal(t, int64(3), r.Column(1).(*array.Int64).Value(1))
+			ran = true
+			return nil
+		})
+	require.NoError(t, err)
+	require.True(t, ran)
+}
+
+func TestAndAggregation(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	schema, err := dynparquet.SchemaFromDefinition(&schemapb.Schema{
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "example",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_BOOL,
+			},
+		}, {
+			Name: "timestamp",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+		}},
+	})
+	require.NoError(t, err)
+
+	rb := array.NewRecordBuilder(mem, arrow.NewSchema([]arrow.Field{{
+		Name: "example",
+		Type: arrow.FixedWidthTypes.Boolean,
+	}, {
+		Name: "timestamp",
+		Type: arrow.PrimitiveTypes.Int64,
+	}}, nil))
+	defer rb.Release()
+
+	rb.Field(0).(*array.BooleanBuilder).AppendValues([]bool{true, false, true, true}, nil)
+	rb.Field(1).(*array.Int64Builder).AppendValues([]int64{1, 1, 3, 3}, nil)
+
+	r := rb.NewRecord()
+	defer r.Release()
+
+	ran := false
+	err = NewEngine(mem, &FakeTableProvider{
+		Tables: map[string]logicalplan.TableReader{
+			"test": &FakeTableReader{
+				FrostdbSchema: schema,
+				Records:       []arrow.Record{r},
+			},
+		},
+	}).ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{logicalplan.AndAgg(logicalplan.Col("example"))},
+			[]logicalplan.Expr{logicalplan.Col("timestamp")},
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			require.Equal(t, []int64{1, 3}, r.Column(0).(*array.Int64).Int64Values())
+			require.False(t, r.Column(1).(*array.Boolean).Value(0))
+			require.True(t, r.Column(1).(*array.Boolean).Value(1))
+			ran = true
+			return nil
+		})
+	require.NoError(t, err)
+	require.True(t, ran)
+}
+
+func TestIfProjection(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	schema, err := dynparquet.SchemaFromDefinition(&schemapb.Schema{
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "example",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+		}, {
+			Name: "timestamp",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+		}},
+	})
+	require.NoError(t, err)
+
+	rb := array.NewRecordBuilder(mem, arrow.NewSchema([]arrow.Field{{
+		Name: "example",
+		Type: arrow.PrimitiveTypes.Int64,
+	}, {
+		Name: "timestamp",
+		Type: arrow.PrimitiveTypes.Int64,
+	}}, nil))
+	defer rb.Release()
+
+	rb.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3}, nil)
+	rb.Field(1).(*array.Int64Builder).AppendValues([]int64{1, 1, 3}, nil)
+
+	r := rb.NewRecord()
+	defer r.Release()
+
+	ran := false
+	err = NewEngine(mem, &FakeTableProvider{
+		Tables: map[string]logicalplan.TableReader{
+			"test": &FakeTableReader{
+				FrostdbSchema: schema,
+				Records:       []arrow.Record{r},
+			},
+		},
+	}).ScanTable("test").
+		Project(
+			logicalplan.If(logicalplan.Col("example").Eq(logicalplan.Literal(int64(1))), logicalplan.Literal(int64(1)), logicalplan.Literal(int64(0))),
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			require.Equal(t, []int64{1, 0, 0}, r.Column(0).(*array.Int64).Int64Values())
+			ran = true
+			return nil
+		})
+	require.NoError(t, err)
+	require.True(t, ran)
+}
+
+func TestIsNullProjection(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	schema, err := dynparquet.SchemaFromDefinition(&schemapb.Schema{
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "example",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+		}},
+	})
+	require.NoError(t, err)
+
+	rb := array.NewRecordBuilder(mem, arrow.NewSchema([]arrow.Field{{
+		Name: "example",
+		Type: arrow.PrimitiveTypes.Int64,
+	}}, nil))
+	defer rb.Release()
+
+	rb.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3}, []bool{true, true, false})
+
+	r := rb.NewRecord()
+	defer r.Release()
+
+	ran := false
+	err = NewEngine(mem, &FakeTableProvider{
+		Tables: map[string]logicalplan.TableReader{
+			"test": &FakeTableReader{
+				FrostdbSchema: schema,
+				Records:       []arrow.Record{r},
+			},
+		},
+	}).ScanTable("test").
+		Project(
+			logicalplan.IsNull(logicalplan.Col("example")),
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			require.False(t, r.Column(0).(*array.Boolean).Value(0))
+			require.False(t, r.Column(0).(*array.Boolean).Value(1))
+			require.True(t, r.Column(0).(*array.Boolean).Value(2))
+			ran = true
+			return nil
+		})
+	require.NoError(t, err)
+	require.True(t, ran)
+}
diff --git a/query/expr/binaryscalarexpr.go b/query/expr/binaryscalarexpr.go
new file mode 100644
index 000000000..e63eb72c6
--- /dev/null
+++ b/query/expr/binaryscalarexpr.go
@@ -0,0 +1,242 @@
+package expr
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/parquet-go/parquet-go"
+
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+type ColumnRef struct {
+	ColumnName string
+}
+
+func (c *ColumnRef) Column(p Particulate) (parquet.ColumnChunk, bool, error) {
+	columnIndex := findColumnIndex(p.Schema(), c.ColumnName)
+	var columnChunk parquet.ColumnChunk
+	// columnChunk can be nil if the column is not present in the row group.
+	if columnIndex != -1 {
+		columnChunk = p.ColumnChunks()[columnIndex]
+	}
+
+	return columnChunk, columnIndex != -1, nil
+}
+
+func findColumnIndex(s *parquet.Schema, columnName string) int {
+	for i, field := range s.Fields() {
+		if field.Name() == columnName {
+			return i
+		}
+	}
+	return -1
+}
+
+type BinaryScalarExpr struct {
+	Left  *ColumnRef
+	Op    logicalplan.Op
+	Right parquet.Value
+}
+
+func (e BinaryScalarExpr) Eval(p Particulate, ignoreMissingCol bool) (bool, error) {
+	leftData, exists, err := e.Left.Column(p)
+	if err != nil {
+		return false, err
+	}
+
+	if !exists && ignoreMissingCol {
+		return true, nil
+	}
+
+	// TODO: This needs a bunch of test cases to validate edge cases like non
+	// existant columns or null values. I'm pretty sure this is completely
+	// wrong and needs per operation, per type specific behavior.
+	if !exists {
+		if e.Right.IsNull() {
+			switch e.Op {
+			case logicalplan.OpEq:
+				return true, nil
+			case logicalplan.OpNotEq:
+				return false, nil
+			}
+		}
+
+		// only handling string for now.
+		if e.Right.Kind() == parquet.ByteArray || e.Right.Kind() == parquet.FixedLenByteArray {
+			switch {
+			case e.Op == logicalplan.OpEq && e.Right.String() == "":
+				return true, nil
+			case e.Op == logicalplan.OpNotEq && e.Right.String() != "":
+				return true, nil
+			}
+		}
+		return false, nil
+	}
+
+	return BinaryScalarOperation(leftData, e.Right, e.Op)
+}
+
+var ErrUnsupportedBinaryOperation = errors.New("unsupported binary operation")
+
+// BinaryScalarOperation applies the given operator between the given column
+// chunk and value. If BinaryScalarOperation returns true, it means that the
+// operator may be satisfied by at least one value in the column chunk. If it
+// returns false, it means that the operator will definitely not be satisfied
+// by any value in the column chunk.
+func BinaryScalarOperation(left parquet.ColumnChunk, right parquet.Value, operator logicalplan.Op) (bool, error) {
+	leftColumnIndex, err := left.ColumnIndex()
+	if err != nil {
+		return true, err
+	}
+	numNulls := NullCount(leftColumnIndex)
+	fullOfNulls := numNulls == left.NumValues()
+	if operator == logicalplan.OpEq {
+		if right.IsNull() {
+			return numNulls > 0, nil
+		}
+		if fullOfNulls {
+			// Right value is not null and there are no non-null values, so
+			// there is definitely not a match.
+			return false, nil
+		}
+
+		bloomFilter := left.BloomFilter()
+		if bloomFilter == nil {
+			// If there is no bloom filter then we cannot make a statement about true negative, instead check the min max values of the column chunk
+			return compare(right, Max(leftColumnIndex)) <= 0 && compare(right, Min(leftColumnIndex)) >= 0, nil
+		}
+
+		ok, err := bloomFilter.Check(right)
+		if err != nil {
+			return true, err
+		}
+		if !ok {
+			// Bloom filters may return false positives, but never return false
+			// negatives, we know this column chunk does not contain the value.
+			return false, nil
+		}
+
+		return true, nil
+	}
+
+	// If right is NULL automatically return that the column chunk needs further
+	// processing. According to SQL semantics we might be able to elide column
+	// chunks in these cases since NULL is not comparable to anything else, but
+	// play it safe (delegate to execution engine) for now since we shouldn't
+	// have many of these cases.
+	if right.IsNull() {
+		return true, nil
+	}
+
+	if numNulls == left.NumValues() {
+		// In this case min/max values are meaningless and not comparable to the
+		// right expression, so we can automatically discard the column chunk.
+		return false, nil
+	}
+
+	switch operator {
+	case logicalplan.OpLtEq:
+		minValue := Min(leftColumnIndex)
+		if minValue.IsNull() {
+			// If min is null, we don't know what the non-null min value is, so
+			// we need to let the execution engine scan this column chunk
+			// further.
+			return true, nil
+		}
+		return compare(minValue, right) <= 0, nil
+	case logicalplan.OpLt:
+		minValue := Min(leftColumnIndex)
+		if minValue.IsNull() {
+			// If min is null, we don't know what the non-null min value is, so
+			// we need to let the execution engine scan this column chunk
+			// further.
+			return true, nil
+		}
+		return compare(minValue, right) < 0, nil
+	case logicalplan.OpGt:
+		maxValue := Max(leftColumnIndex)
+		if maxValue.IsNull() {
+			// If max is null, we don't know what the non-null max value is, so
+			// we need to let the execution engine scan this column chunk
+			// further.
+			return true, nil
+		}
+		return compare(maxValue, right) > 0, nil
+	case logicalplan.OpGtEq:
+		maxValue := Max(leftColumnIndex)
+		if maxValue.IsNull() {
+			// If max is null, we don't know what the non-null max value is, so
+			// we need to let the execution engine scan this column chunk
+			// further.
+			return true, nil
+		}
+		return compare(maxValue, right) >= 0, nil
+	default:
+		return true, nil
+	}
+}
+
+// Min returns the minimum value found in the column chunk across all pages.
+func Min(columnIndex parquet.ColumnIndex) parquet.Value {
+	minV := columnIndex.MinValue(0)
+	for i := 1; i < columnIndex.NumPages(); i++ {
+		v := columnIndex.MinValue(i)
+		if minV.IsNull() {
+			minV = v
+			continue
+		}
+
+		if compare(minV, v) == 1 {
+			minV = v
+		}
+	}
+
+	return minV
+}
+
+func NullCount(columnIndex parquet.ColumnIndex) int64 {
+	numNulls := int64(0)
+	for i := 0; i < columnIndex.NumPages(); i++ {
+		numNulls += columnIndex.NullCount(i)
+	}
+	return numNulls
+}
+
+// Max returns the maximum value found in the column chunk across all pages.
+func Max(columnIndex parquet.ColumnIndex) parquet.Value {
+	maxValue := columnIndex.MaxValue(0)
+	for i := 1; i < columnIndex.NumPages(); i++ {
+		v := columnIndex.MaxValue(i)
+		if maxValue.IsNull() {
+			maxValue = v
+			continue
+		}
+
+		if compare(maxValue, v) == -1 {
+			maxValue = v
+		}
+	}
+
+	return maxValue
+}
+
+// compares two parquet values. 0 if they are equal, -1 if v1 < v2, 1 if v1 > v2.
+func compare(v1, v2 parquet.Value) int {
+	switch v1.Kind() {
+	case parquet.Int32:
+		return parquet.Int32Type.Compare(v1, v2)
+	case parquet.Int64:
+		return parquet.Int64Type.Compare(v1, v2)
+	case parquet.Float:
+		return parquet.FloatType.Compare(v1, v2)
+	case parquet.Double:
+		return parquet.DoubleType.Compare(v1, v2)
+	case parquet.ByteArray, parquet.FixedLenByteArray:
+		return parquet.ByteArrayType.Compare(v1, v2)
+	case parquet.Boolean:
+		return parquet.BooleanType.Compare(v1, v2)
+	default:
+		panic(fmt.Sprintf("unsupported value comparison: %v", v1.Kind()))
+	}
+}
diff --git a/query/expr/binaryscalarexpr_test.go b/query/expr/binaryscalarexpr_test.go
new file mode 100644
index 000000000..dd69907c2
--- /dev/null
+++ b/query/expr/binaryscalarexpr_test.go
@@ -0,0 +1,213 @@
+package expr
+
+import (
+	"testing"
+
+	"github.com/parquet-go/parquet-go"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+type FakeColumnChunk struct {
+	index     *FakeColumnIndex
+	numValues int64
+}
+
+func (f *FakeColumnChunk) Type() parquet.Type                        { return nil }
+func (f *FakeColumnChunk) Column() int                               { return 0 }
+func (f *FakeColumnChunk) Pages() parquet.Pages                      { return nil }
+func (f *FakeColumnChunk) ColumnIndex() (parquet.ColumnIndex, error) { return f.index, nil }
+func (f *FakeColumnChunk) OffsetIndex() (parquet.OffsetIndex, error) { return nil, nil }
+func (f *FakeColumnChunk) BloomFilter() parquet.BloomFilter          { return nil }
+func (f *FakeColumnChunk) NumValues() int64                          { return f.numValues }
+
+type FakeColumnIndex struct {
+	numPages  int
+	min       parquet.Value
+	max       parquet.Value
+	nullCount int64
+}
+
+func (f *FakeColumnIndex) NumPages() int {
+	return f.numPages
+}
+func (f *FakeColumnIndex) NullCount(int) int64        { return f.nullCount }
+func (f *FakeColumnIndex) NullPage(int) bool          { return false }
+func (f *FakeColumnIndex) MinValue(int) parquet.Value { return f.min }
+func (f *FakeColumnIndex) MaxValue(int) parquet.Value { return f.max }
+func (f *FakeColumnIndex) IsAscending() bool          { return false }
+func (f *FakeColumnIndex) IsDescending() bool         { return false }
+
+// This is a regression test that ensures the Min/Max functions return a null
+// value (instead of panicing) should they be passed a column chunk that only
+// has null values.
+func Test_MinMax_EmptyColumnChunk(t *testing.T) {
+	fakeIndex := &FakeColumnIndex{
+		numPages: 10,
+	}
+
+	v := Min(fakeIndex)
+	require.True(t, v.IsNull())
+
+	v = Max(fakeIndex)
+	require.True(t, v.IsNull())
+}
+
+func TestBinaryScalarOperation(t *testing.T) {
+	const numValues = 10
+	for _, tc := range []struct {
+		name string
+		min  int
+		max  int
+		// -1 is interpreted as a null value.
+		right     int
+		nullCount int64
+		op        logicalplan.Op
+		// expectSatisfies is true if the predicate should be satisfied by the
+		// column chunk.
+		expectSatisfies bool
+	}{
+		{
+			name:            "OpEqValueContained",
+			min:             1,
+			max:             10,
+			right:           5,
+			op:              logicalplan.OpEq,
+			expectSatisfies: true,
+		},
+		{
+			name:            "OpEqValueGt",
+			min:             1,
+			max:             10,
+			right:           11,
+			op:              logicalplan.OpEq,
+			expectSatisfies: false,
+		},
+		{
+			name:            "OpEqValueLt",
+			min:             1,
+			max:             10,
+			right:           0,
+			op:              logicalplan.OpEq,
+			expectSatisfies: false,
+		},
+		{
+			name:            "OpEqMaxBound",
+			min:             1,
+			max:             10,
+			right:           10,
+			op:              logicalplan.OpEq,
+			expectSatisfies: true,
+		},
+		{
+			name:            "OpEqMinBound",
+			min:             1,
+			max:             10,
+			right:           1,
+			op:              logicalplan.OpEq,
+			expectSatisfies: true,
+		},
+		{
+			name:            "OpEqNullValueNoMatch",
+			right:           -1,
+			nullCount:       0,
+			op:              logicalplan.OpEq,
+			expectSatisfies: false,
+		},
+		{
+			name:            "OpEqNullValueMatch",
+			right:           -1,
+			nullCount:       1,
+			op:              logicalplan.OpEq,
+			expectSatisfies: true,
+		},
+		{
+			name:            "OpEqNullColumn",
+			right:           1,
+			nullCount:       1,
+			op:              logicalplan.OpEq,
+			expectSatisfies: false,
+		},
+		{
+			name:            "OpEqFullNullColumn",
+			right:           1,
+			nullCount:       10,
+			op:              logicalplan.OpEq,
+			expectSatisfies: false,
+		},
+		{
+			name:            "OpGtFullNullColumn",
+			right:           1,
+			nullCount:       10,
+			op:              logicalplan.OpGt,
+			expectSatisfies: false,
+		},
+		{
+			name:            "OpGtNullValueMatch",
+			right:           -1,
+			nullCount:       0,
+			op:              logicalplan.OpGt,
+			expectSatisfies: true,
+		},
+		{
+			name:      "OpGtNullValueNoMatch",
+			right:     -1,
+			nullCount: 1,
+			op:        logicalplan.OpGt,
+			// expectSatisfies should probably be false once we optimize this.
+			expectSatisfies: true,
+		},
+		{
+			name:            "OpGtWithSomeNullValuesNoMatch",
+			min:             1,
+			max:             10,
+			right:           11,
+			nullCount:       1,
+			op:              logicalplan.OpGt,
+			expectSatisfies: false,
+		},
+		{
+			name:            "OpGtWithSomeNullValuesMatch",
+			min:             1,
+			max:             10,
+			right:           5,
+			nullCount:       1,
+			op:              logicalplan.OpGt,
+			expectSatisfies: true,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			if tc.op == logicalplan.OpUnknown {
+				t.Fatal("test programming error: remember to set operator")
+			}
+			minV := parquet.ValueOf(tc.min)
+			maxV := parquet.ValueOf(tc.max)
+			if tc.nullCount == numValues {
+				// All values in page are null. Parquet doesn't have
+				// well-defined min/max values in this case, but setting them
+				// explicitly to null here will tickle some edge cases.
+				minV = parquet.ValueOf(nil)
+				maxV = parquet.ValueOf(nil)
+			}
+			fakeChunk := &FakeColumnChunk{
+				index: &FakeColumnIndex{
+					numPages:  1,
+					min:       minV,
+					max:       maxV,
+					nullCount: tc.nullCount,
+				},
+				numValues: numValues,
+			}
+			var v parquet.Value
+			if tc.right == -1 {
+				v = parquet.ValueOf(nil)
+			} else {
+				v = parquet.ValueOf(tc.right)
+			}
+			res, err := BinaryScalarOperation(fakeChunk, v, tc.op)
+			require.NoError(t, err)
+			require.Equal(t, tc.expectSatisfies, res)
+		})
+	}
+}
diff --git a/query/expr/filter.go b/query/expr/filter.go
new file mode 100644
index 000000000..270d8836f
--- /dev/null
+++ b/query/expr/filter.go
@@ -0,0 +1,266 @@
+package expr
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+	"sync"
+	"sync/atomic"
+
+	"github.com/parquet-go/parquet-go"
+
+	"github.com/polarsignals/frostdb/pqarrow"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+type PreExprVisitorFunc func(expr logicalplan.Expr) bool
+
+func (f PreExprVisitorFunc) PreVisit(expr logicalplan.Expr) bool {
+	return f(expr)
+}
+
+func (f PreExprVisitorFunc) Visit(_ logicalplan.Expr) bool {
+	return false
+}
+
+func (f PreExprVisitorFunc) PostVisit(_ logicalplan.Expr) bool {
+	return false
+}
+
+// Particulate is an abstraction of something that can be filtered.
+// A parquet.RowGroup is a particulate that is able to be filtered, and wrapping a parquet.File with
+// ParquetFileParticulate allows a file to be filtered.
+type Particulate interface {
+	Schema() *parquet.Schema
+	ColumnChunks() []parquet.ColumnChunk
+}
+
+type TrueNegativeFilter interface {
+	// Eval should be safe to call concurrently.
+	Eval(p Particulate, ignoreMissingCols bool) (bool, error)
+}
+
+type AlwaysTrueFilter struct{}
+
+func (f *AlwaysTrueFilter) Eval(_ Particulate, _ bool) (bool, error) {
+	return true, nil
+}
+
+func binaryBooleanExpr(expr *logicalplan.BinaryExpr) (TrueNegativeFilter, error) {
+	switch expr.Op {
+	case logicalplan.OpNotEq:
+		fallthrough
+	case logicalplan.OpLt:
+		fallthrough
+	case logicalplan.OpLtEq:
+		fallthrough
+	case logicalplan.OpGt:
+		fallthrough
+	case logicalplan.OpGtEq:
+		fallthrough
+	case logicalplan.OpEq: // , logicalplan.OpNotEq, logicalplan.OpLt, logicalplan.OpLtEq, logicalplan.OpGt, logicalplan.OpGtEq, logicalplan.OpRegexMatch, logicalplan.RegexNotMatch:
+		var leftColumnRef *ColumnRef
+		expr.Left.Accept(PreExprVisitorFunc(func(expr logicalplan.Expr) bool {
+			switch e := expr.(type) {
+			case *logicalplan.Column:
+				leftColumnRef = &ColumnRef{
+					ColumnName: e.ColumnName,
+				}
+				return false
+			}
+			return true
+		}))
+		if leftColumnRef == nil {
+			return nil, errors.New("left side of binary expression must be a column")
+		}
+
+		var (
+			rightValue parquet.Value
+			err        error
+		)
+		expr.Right.Accept(PreExprVisitorFunc(func(expr logicalplan.Expr) bool {
+			switch e := expr.(type) {
+			case *logicalplan.LiteralExpr:
+				rightValue, err = pqarrow.ArrowScalarToParquetValue(e.Value)
+				return false
+			}
+			return true
+		}))
+
+		if err != nil {
+			return nil, err
+		}
+
+		return &BinaryScalarExpr{
+			Left:  leftColumnRef,
+			Op:    expr.Op,
+			Right: rightValue,
+		}, nil
+	case logicalplan.OpAnd:
+		left, err := BooleanExpr(expr.Left)
+		if err != nil {
+			return nil, err
+		}
+
+		right, err := BooleanExpr(expr.Right)
+		if err != nil {
+			return nil, err
+		}
+
+		return &AndExpr{
+			Left:  left,
+			Right: right,
+		}, nil
+	case logicalplan.OpOr:
+		left, err := BooleanExpr(expr.Left)
+		if err != nil {
+			return nil, err
+		}
+
+		right, err := BooleanExpr(expr.Right)
+		if err != nil {
+			return nil, err
+		}
+
+		return &OrExpr{
+			Left:  left,
+			Right: right,
+		}, nil
+	default:
+		return &AlwaysTrueFilter{}, nil
+	}
+}
+
+func aggregationExpr(expr *logicalplan.AggregationFunction) (TrueNegativeFilter, error) {
+	switch expr.Func {
+	case logicalplan.AggFuncMax:
+		a := &MaxAgg{}
+		expr.Expr.Accept(PreExprVisitorFunc(func(expr logicalplan.Expr) bool {
+			switch e := expr.(type) {
+			case *logicalplan.Column:
+				a.columnName = e.ColumnName
+			case *logicalplan.DynamicColumn:
+				a.columnName = e.ColumnName
+				a.dynamic = true
+			default:
+				return true
+			}
+			return false
+		}))
+		return a, nil
+	default:
+		return &AlwaysTrueFilter{}, nil
+	}
+}
+
+type MaxAgg struct {
+	maxMap sync.Map
+
+	// It would be nicer to use a dynamic-aware ColumnRef here, but that would
+	// introduce allocations (slices for indexes and concrete names), so for
+	// performance reasons we execute the column lookup manually.
+	columnName string
+	dynamic    bool
+}
+
+func (a *MaxAgg) Eval(p Particulate, _ bool) (bool, error) {
+	processFurther := false
+	for i, f := range p.Schema().Fields() {
+		if (a.dynamic && !strings.HasPrefix(f.Name(), a.columnName+".")) || (!a.dynamic && f.Name() != a.columnName) {
+			continue
+		}
+
+		chunk := p.ColumnChunks()[i]
+		index, err := chunk.ColumnIndex()
+		if err != nil {
+			return false, fmt.Errorf("error retrieving column index in MaxAgg.Eval")
+		}
+		if NullCount(index) == chunk.NumValues() {
+			// This page is full of nulls. Nothing to do since we can't trust
+			// min/max index values. This chunk should not be processed since
+			// it can't contribute to min/max unless another column can.
+			continue
+		}
+
+		columnPointer, _ := a.maxMap.LoadOrStore(f.Name(), &atomic.Pointer[parquet.Value]{})
+		atomicMax := columnPointer.(*atomic.Pointer[parquet.Value])
+
+		v := Max(index)
+		for globalMax := atomicMax.Load(); globalMax == nil || compare(v, *globalMax) > 0; globalMax = atomicMax.Load() {
+			if atomicMax.CompareAndSwap(globalMax, &v) {
+				// At least one column exceeded the current max so this chunk
+				// satisfies the filter. Note that we do not break out of
+				// scanning the rest of the columns since we do want to memoize
+				// the max for other columns as well.
+				processFurther = true
+				break
+			}
+		}
+		if !a.dynamic && processFurther {
+			// No need to look at the remaining columns if we're only looking
+			// for a single concrete column.
+			break
+		}
+	}
+	return processFurther, nil
+}
+
+type AndExpr struct {
+	Left  TrueNegativeFilter
+	Right TrueNegativeFilter
+}
+
+func (a *AndExpr) Eval(p Particulate, ignoreMissingCols bool) (bool, error) {
+	left, err := a.Left.Eval(p, ignoreMissingCols)
+	if err != nil {
+		return false, err
+	}
+	if !left {
+		return false, nil
+	}
+
+	right, err := a.Right.Eval(p, ignoreMissingCols)
+	if err != nil {
+		return false, err
+	}
+	return right, nil
+}
+
+type OrExpr struct {
+	Left  TrueNegativeFilter
+	Right TrueNegativeFilter
+}
+
+func (a *OrExpr) Eval(p Particulate, ignoreMissingCols bool) (bool, error) {
+	left, err := a.Left.Eval(p, ignoreMissingCols)
+	if err != nil {
+		return false, err
+	}
+	if left {
+		return true, nil
+	}
+
+	right, err := a.Right.Eval(p, ignoreMissingCols)
+	if err != nil {
+		return false, err
+	}
+
+	return right, nil
+}
+
+func BooleanExpr(expr logicalplan.Expr) (TrueNegativeFilter, error) {
+	if expr == nil {
+		return &AlwaysTrueFilter{}, nil
+	}
+
+	switch e := expr.(type) {
+	case *logicalplan.BinaryExpr:
+		return binaryBooleanExpr(e)
+	case *logicalplan.AggregationFunction:
+		// NOTE: Aggregations are optimized in the case of no grouping columns
+		// or other filters.
+		return aggregationExpr(e)
+	default:
+		return nil, fmt.Errorf("unsupported boolean expression %T", e)
+	}
+}
diff --git a/query/expr/filter_test.go b/query/expr/filter_test.go
new file mode 100644
index 000000000..9b0f3cfe3
--- /dev/null
+++ b/query/expr/filter_test.go
@@ -0,0 +1,209 @@
+package expr
+
+import (
+	"testing"
+
+	"github.com/parquet-go/parquet-go"
+	"github.com/stretchr/testify/require"
+)
+
+type fakeParticulate struct {
+	schema       *parquet.Schema
+	columnChunks []parquet.ColumnChunk
+}
+
+// newFakeParticulate creates a new fake particulate. maxValues[i] is the
+// maximum value of the ith column. If maxValues[i] is -1, then the ith column
+// index will return that it is full of nulls.
+func newFakeParticulate(columnNames []string, maxValues []int64) fakeParticulate {
+	if len(columnNames) != len(maxValues) {
+		panic("columnNames and maxValues must have the same length")
+	}
+	vForName := make(map[string]int64)
+	for i, name := range columnNames {
+		vForName[name] = maxValues[i]
+	}
+
+	g := parquet.Group{}
+	for _, name := range columnNames {
+		g[name] = parquet.Int(64)
+	}
+	s := parquet.NewSchema("", g)
+
+	columnChunks := make([]parquet.ColumnChunk, len(columnNames))
+	// Iterate over the schema in creation order (not doing so mixes up which
+	// column has which value).
+	for i, f := range s.Fields() {
+		numValues := int64(1)
+		numNulls := int64(0)
+		var maxV parquet.Value
+		if v := vForName[f.Name()]; v == -1 {
+			maxV = parquet.ValueOf(nil)
+			numNulls = numValues
+		} else {
+			maxV = parquet.ValueOf(v)
+		}
+		columnChunks[i] = &FakeColumnChunk{
+			index: &FakeColumnIndex{
+				numPages:  1,
+				min:       parquet.Value{},
+				max:       maxV,
+				nullCount: numNulls,
+			},
+			numValues: numValues,
+		}
+	}
+	return fakeParticulate{
+		schema:       s,
+		columnChunks: columnChunks,
+	}
+}
+
+func (f fakeParticulate) Schema() *parquet.Schema {
+	return f.schema
+}
+
+func (f fakeParticulate) ColumnChunks() []parquet.ColumnChunk {
+	return f.columnChunks
+}
+
+func TestMaxAgg(t *testing.T) {
+	for _, tc := range []struct {
+		name string
+		agg  *MaxAgg
+		// values[i] is the ith particulate.
+		values []struct {
+			p fakeParticulate
+			// expected result of Eval(values[i]).
+			expected bool
+		}
+	}{
+		{
+			name: "ConcreteColumn",
+			agg: &MaxAgg{
+				columnName: "a",
+			},
+			values: []struct {
+				p        fakeParticulate
+				expected bool
+			}{
+				{
+					newFakeParticulate(
+						[]string{"anotfound"},
+						[]int64{3},
+					),
+					// Column not found.
+					false,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a.a"},
+						[]int64{3},
+					),
+					// Column not found.
+					false,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a"},
+						[]int64{2},
+					),
+					true,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a"},
+						[]int64{1},
+					),
+					// Values is less than the current max.
+					false,
+				},
+				{
+					newFakeParticulate(
+						[]string{"b.a", "a"},
+						[]int64{-1, 3},
+					),
+					true,
+				},
+			},
+		},
+		{
+			name: "DynamicColumn",
+			agg: &MaxAgg{
+				columnName: "a",
+				dynamic:    true,
+			},
+			values: []struct {
+				p        fakeParticulate
+				expected bool
+			}{
+				{
+					newFakeParticulate(
+						[]string{"a"},
+						[]int64{3},
+					),
+					// Not a dynamic column.
+					false,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a.a", "a"},
+						[]int64{-1, 3},
+					),
+					// Nulls in the dynamic column.
+					false,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a.a", "a.b", "a.c"},
+						[]int64{3, 3, 3},
+					),
+					// This particulate should pass the filter. Also, this
+					// particulate verifies that the filter memoizes the max for
+					// all columns in the particulate for the future.
+					true,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a.a"},
+						[]int64{1},
+					),
+					// Lower than max.
+					false,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a.b"},
+						[]int64{1},
+					),
+					// Lower than max.
+					false,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a.c"},
+						[]int64{1},
+					),
+					// Lower than max.
+					false,
+				},
+				{
+					newFakeParticulate(
+						[]string{"a.a", "a.b"},
+						[]int64{1, 4},
+					),
+					// First column is lower, but second column is higher.
+					true,
+				},
+			},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			for i, v := range tc.values {
+				got, err := tc.agg.Eval(v.p, false)
+				require.NoError(t, err)
+				require.Equal(t, v.expected, got, "index %d mismatch", i)
+			}
+		})
+	}
+}
diff --git a/query/exprpb/engine.go b/query/exprpb/engine.go
new file mode 100644
index 000000000..5e375465f
--- /dev/null
+++ b/query/exprpb/engine.go
@@ -0,0 +1,179 @@
+package exprpb
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"go.opentelemetry.io/otel/trace"
+	"go.opentelemetry.io/otel/trace/noop"
+
+	pb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/storage/v1alpha1"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+	"github.com/polarsignals/frostdb/query/physicalplan"
+)
+
+type ProtoEngine struct {
+	pool          memory.Allocator
+	tracer        trace.Tracer
+	tableProvider logicalplan.TableProvider
+	execOpts      []physicalplan.Option
+}
+
+type Option func(*ProtoEngine)
+
+func WithTracer(tracer trace.Tracer) Option {
+	return func(e *ProtoEngine) {
+		e.tracer = tracer
+	}
+}
+
+func WithPhysicalplanOptions(opts ...physicalplan.Option) Option {
+	return func(e *ProtoEngine) {
+		e.execOpts = opts
+	}
+}
+
+func NewEngine(
+	pool memory.Allocator,
+	tableProvider logicalplan.TableProvider,
+	options ...Option,
+) *ProtoEngine {
+	e := &ProtoEngine{
+		pool:          pool,
+		tracer:        noop.NewTracerProvider().Tracer(""),
+		tableProvider: tableProvider,
+	}
+
+	for _, option := range options {
+		option(e)
+	}
+
+	return e
+}
+
+type ProtoQueryBuilder struct {
+	pool          memory.Allocator
+	tracer        trace.Tracer
+	LogicalPlan   *logicalplan.LogicalPlan
+	execOpts      []physicalplan.Option
+	tableProvider logicalplan.TableProvider
+}
+
+func (e *ProtoEngine) FromProto(root *pb.PlanNode) (ProtoQueryBuilder, error) {
+	pqb := ProtoQueryBuilder{
+		pool:          e.pool,
+		tracer:        e.tracer,
+		execOpts:      e.execOpts,
+		tableProvider: e.tableProvider,
+	}
+	builder, err := pqb.planFromProto(root)
+	if err != nil {
+		return pqb, err
+	}
+	pqb.LogicalPlan, err = builder.Build()
+	if err != nil {
+		return pqb, err
+	}
+
+	return pqb, nil
+}
+
+func (qb ProtoQueryBuilder) planFromProto(plan *pb.PlanNode) (logicalplan.Builder, error) {
+	var b logicalplan.Builder
+	// First convert the next plan node.
+	if plan.GetNext() != nil {
+		var err error
+		b, err = qb.planFromProto(plan.GetNext())
+		if err != nil {
+			return b, err
+		}
+	} else {
+		b = logicalplan.Builder{}
+	}
+
+	switch {
+	case plan.GetSpec().GetSchemaScan() != nil:
+		b = b.ScanSchema(qb.tableProvider, plan.GetSpec().GetSchemaScan().GetBase().GetTable())
+	case plan.GetSpec().GetTableScan() != nil:
+		b = b.Scan(qb.tableProvider, plan.GetSpec().GetTableScan().GetBase().GetTable())
+	case plan.GetSpec().GetFilter() != nil:
+		expr, err := ExprFromProto(plan.GetSpec().GetFilter().GetExpr())
+		if err != nil {
+			return b, fmt.Errorf("failed to convert expr from proto: %v", err)
+		}
+		b = b.Filter(expr)
+	case plan.GetSpec().GetDistinct() != nil:
+		exprs, err := ExprsFromProtos(plan.GetSpec().GetDistinct().GetExprs())
+		if err != nil {
+			return b, fmt.Errorf("failed to convert exprs from proto: %v", err)
+		}
+		b = b.Distinct(exprs...)
+	case plan.GetSpec().GetProjection() != nil:
+		exprs, err := ExprsFromProtos(plan.GetSpec().GetProjection().GetExprs())
+		if err != nil {
+			return b, fmt.Errorf("failed to convert exprs from proto: %v", err)
+		}
+		b = b.Project(exprs...)
+	case plan.GetSpec().GetLimit() != nil:
+		expr, err := ExprFromProto(plan.GetSpec().GetLimit().GetExpr())
+		if err != nil {
+			return b, fmt.Errorf("failed to convert expr from proto: %v", err)
+		}
+		b = b.Limit(expr)
+	case plan.GetSpec().GetAggregation() != nil:
+		exprs, err := ExprsFromProtos(plan.GetSpec().GetAggregation().GetAggExprs())
+		if err != nil {
+			return b, fmt.Errorf("failed to convert exprs from proto: %v", err)
+		}
+		groupExprs, err := ExprsFromProtos(plan.GetSpec().GetAggregation().GetGroupExprs())
+		if err != nil {
+			return b, fmt.Errorf("failed to convert exprs from proto: %v", err)
+		}
+
+		aggExprs := make([]*logicalplan.AggregationFunction, 0, len(exprs))
+		for _, expr := range exprs {
+			aggExprs = append(aggExprs, expr.(*logicalplan.AggregationFunction))
+		}
+
+		b.Aggregate(aggExprs, groupExprs)
+	}
+
+	return b, nil
+}
+
+func (qb ProtoQueryBuilder) Execute(ctx context.Context, callback func(ctx context.Context, r arrow.Record) error) error {
+	ctx, span := qb.tracer.Start(ctx, "ProtoEngine/Execute")
+	defer span.End()
+
+	phyPlan, err := qb.buildPhysical(ctx)
+	if err != nil {
+		return err
+	}
+
+	return phyPlan.Execute(ctx, qb.pool, callback)
+}
+
+func (qb ProtoQueryBuilder) buildPhysical(ctx context.Context) (*physicalplan.OutputPlan, error) {
+	for _, optimizer := range logicalplan.DefaultOptimizers() {
+		qb.LogicalPlan = optimizer.Optimize(qb.LogicalPlan)
+	}
+
+	return physicalplan.Build(
+		ctx,
+		qb.pool,
+		qb.tracer,
+		qb.LogicalPlan.InputSchema(),
+		qb.LogicalPlan,
+		qb.execOpts...,
+	)
+}
+
+func (qb ProtoQueryBuilder) Explain(ctx context.Context) (string, error) {
+	phyPlan, err := qb.buildPhysical(ctx)
+	if err != nil {
+		return "", err
+	}
+	return phyPlan.DrawString(), nil
+}
diff --git a/query/exprpb/engine_test.go b/query/exprpb/engine_test.go
new file mode 100644
index 000000000..754888a1d
--- /dev/null
+++ b/query/exprpb/engine_test.go
@@ -0,0 +1,135 @@
+package exprpb
+
+import (
+	"context"
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	pb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/storage/v1alpha1"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+func TestPlan(t *testing.T) {
+	protoPlan := &pb.PlanNode{
+		Spec: &pb.PlanNodeSpec{
+			Spec: &pb.PlanNodeSpec_SchemaScan{
+				SchemaScan: &pb.SchemaScan{
+					Base: &pb.ScanBase{
+						Database: "foo",
+						Table:    "bar",
+					},
+				},
+			},
+		},
+	}
+
+	engine := NewEngine(memory.NewGoAllocator(), &mockTableProvider{
+		schema: dynparquet.NewSampleSchema(),
+	})
+
+	builder, err := engine.FromProto(protoPlan)
+	require.NoError(t, err)
+
+	require.Equal(t, "bar", builder.LogicalPlan.SchemaScan.TableName)
+
+	err = builder.Execute(context.Background(), func(_ context.Context, _ arrow.Record) error {
+		return nil
+	})
+	require.NoError(t, err)
+
+	explain, err := builder.Explain(context.Background())
+	require.NoError(t, err)
+	require.Equal(t, `SchemaScan [concurrent] - Synchronizer`, explain)
+
+	// next plan
+
+	protoPlan = &pb.PlanNode{
+		Spec: &pb.PlanNodeSpec{
+			Spec: &pb.PlanNodeSpec_Projection{
+				Projection: &pb.Projection{
+					Exprs: []*pb.Expr{{
+						Def: &pb.ExprDef{
+							Content: &pb.ExprDef_Column{
+								Column: &pb.Column{
+									Name: "foo",
+								},
+							},
+						},
+					}},
+				},
+			},
+		},
+		Next: &pb.PlanNode{
+			Spec: &pb.PlanNodeSpec{
+				Spec: &pb.PlanNodeSpec_TableScan{
+					TableScan: &pb.TableScan{
+						Base: &pb.ScanBase{
+							Table: "bar",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	builder, err = engine.FromProto(protoPlan)
+	require.NoError(t, err)
+
+	require.Equal(t, &logicalplan.Column{ColumnName: "foo"}, builder.LogicalPlan.Projection.Exprs[0])
+	require.Equal(t, "bar", builder.LogicalPlan.Input.TableScan.TableName)
+
+	err = builder.Execute(context.Background(), func(_ context.Context, _ arrow.Record) error {
+		return nil
+	})
+	require.NoError(t, err)
+
+	explain, err = builder.Explain(context.Background())
+	require.NoError(t, err)
+	require.Equal(t, `TableScan [concurrent] - Projection (foo) - Synchronizer`, explain)
+}
+
+type mockTableReader struct {
+	schema *dynparquet.Schema
+}
+
+func (m *mockTableReader) Schema() *dynparquet.Schema {
+	return m.schema
+}
+
+func (m *mockTableReader) View(_ context.Context, _ func(ctx context.Context, tx uint64) error) error {
+	return nil
+}
+
+func (m *mockTableReader) Iterator(
+	_ context.Context,
+	_ uint64,
+	_ memory.Allocator,
+	_ []logicalplan.Callback,
+	_ ...logicalplan.Option,
+) error {
+	return nil
+}
+
+func (m *mockTableReader) SchemaIterator(
+	_ context.Context,
+	_ uint64,
+	_ memory.Allocator,
+	_ []logicalplan.Callback,
+	_ ...logicalplan.Option,
+) error {
+	return nil
+}
+
+type mockTableProvider struct {
+	schema *dynparquet.Schema
+}
+
+func (m *mockTableProvider) GetTable(_ string) (logicalplan.TableReader, error) {
+	return &mockTableReader{
+		schema: m.schema,
+	}, nil
+}
diff --git a/query/exprpb/proto.go b/query/exprpb/proto.go
new file mode 100644
index 000000000..6d22172fa
--- /dev/null
+++ b/query/exprpb/proto.go
@@ -0,0 +1,591 @@
+package exprpb
+
+import (
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
+
+	storagepb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/storage/v1alpha1"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+// ExprsFromProtos converts a slice of proto representations of expressions to
+// the canonical logicalplan expressions representation.
+func ExprsFromProtos(exprs []*storagepb.Expr) ([]logicalplan.Expr, error) {
+	if exprs == nil {
+		return nil, nil
+	}
+
+	res := make([]logicalplan.Expr, 0, len(exprs))
+	for _, expr := range exprs {
+		e, err := ExprFromProto(expr)
+		if err != nil {
+			return nil, err
+		}
+		res = append(res, e)
+	}
+	return res, nil
+}
+
+// ExprFromProto converts a proto representation of an expression to the
+// canonical logicalplan expressions representation.
+func ExprFromProto(expr *storagepb.Expr) (logicalplan.Expr, error) {
+	if expr == nil {
+		return nil, nil
+	}
+
+	switch e := expr.Def.Content.(type) {
+	case *storagepb.ExprDef_BinaryExpr:
+		left, err := ExprFromProto(e.BinaryExpr.Left)
+		if err != nil {
+			return nil, err
+		}
+
+		right, err := ExprFromProto(e.BinaryExpr.Right)
+		if err != nil {
+			return nil, err
+		}
+
+		op, err := protoOpToLogicalOp(e.BinaryExpr.Op)
+		if err != nil {
+			return nil, err
+		}
+
+		return &logicalplan.BinaryExpr{
+			Left:  left,
+			Right: right,
+			Op:    op,
+		}, nil
+	case *storagepb.ExprDef_Column:
+		return &logicalplan.Column{ColumnName: e.Column.Name}, nil
+	case *storagepb.ExprDef_Literal:
+		s, err := protoLiteralToArrowScalar(e.Literal)
+		if err != nil {
+			return nil, err
+		}
+
+		return &logicalplan.LiteralExpr{Value: s}, nil
+	case *storagepb.ExprDef_DynamicColumn:
+		return &logicalplan.DynamicColumn{ColumnName: e.DynamicColumn.Name}, nil
+	case *storagepb.ExprDef_AggregationFunction:
+		expr, err := ExprFromProto(e.AggregationFunction.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		f, err := protoAggFuncToLogicalAggFunc(e.AggregationFunction.Type)
+		if err != nil {
+			return nil, err
+		}
+
+		return &logicalplan.AggregationFunction{
+			Func: f,
+			Expr: expr,
+		}, nil
+	case *storagepb.ExprDef_Alias:
+		expr, err := ExprFromProto(e.Alias.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		return &logicalplan.AliasExpr{
+			Expr:  expr,
+			Alias: e.Alias.Name,
+		}, nil
+	case *storagepb.ExprDef_Duration:
+		return logicalplan.Duration(time.Duration(e.Duration.Milliseconds) * time.Millisecond), nil
+	case *storagepb.ExprDef_Convert:
+		expr, err := ExprFromProto(e.Convert.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		t, err := protoTypeToArrow(e.Convert.Type)
+		if err != nil {
+			return nil, err
+		}
+
+		return &logicalplan.ConvertExpr{
+			Expr: expr,
+			Type: t,
+		}, nil
+	case *storagepb.ExprDef_If:
+		cond, err := ExprFromProto(e.If.Condition)
+		if err != nil {
+			return nil, err
+		}
+
+		then, err := ExprFromProto(e.If.Then)
+		if err != nil {
+			return nil, err
+		}
+
+		els, err := ExprFromProto(e.If.Else)
+		if err != nil {
+			return nil, err
+		}
+
+		return &logicalplan.IfExpr{
+			Cond: cond,
+			Then: then,
+			Else: els,
+		}, nil
+	default:
+		return nil, fmt.Errorf("unsupported expression type: %T", e)
+	}
+}
+
+func protoTypeToArrow(t storagepb.Type) (arrow.DataType, error) {
+	switch t {
+	case storagepb.Type_TYPE_FLOAT64:
+		return arrow.PrimitiveTypes.Float64, nil
+	default:
+		return nil, fmt.Errorf("unsupported type: %v", t)
+	}
+}
+
+func protoOpToLogicalOp(op storagepb.Op) (logicalplan.Op, error) {
+	switch op {
+	case storagepb.Op_OP_EQ:
+		return logicalplan.OpEq, nil
+	case storagepb.Op_OP_NOT_EQ:
+		return logicalplan.OpNotEq, nil
+	case storagepb.Op_OP_LT:
+		return logicalplan.OpLt, nil
+	case storagepb.Op_OP_LT_EQ:
+		return logicalplan.OpLtEq, nil
+	case storagepb.Op_OP_GT:
+		return logicalplan.OpGt, nil
+	case storagepb.Op_OP_GT_EQ:
+		return logicalplan.OpGtEq, nil
+	case storagepb.Op_OP_REGEX_MATCH:
+		return logicalplan.OpRegexMatch, nil
+	case storagepb.Op_OP_REGEX_NOT_MATCH:
+		return logicalplan.OpRegexNotMatch, nil
+	case storagepb.Op_OP_AND:
+		return logicalplan.OpAnd, nil
+	case storagepb.Op_OP_OR:
+		return logicalplan.OpOr, nil
+	case storagepb.Op_OP_ADD:
+		return logicalplan.OpAdd, nil
+	case storagepb.Op_OP_SUB:
+		return logicalplan.OpSub, nil
+	case storagepb.Op_OP_MUL:
+		return logicalplan.OpMul, nil
+	case storagepb.Op_OP_DIV:
+		return logicalplan.OpDiv, nil
+	case storagepb.Op_OP_CONTAINS:
+		return logicalplan.OpContains, nil
+	case storagepb.Op_OP_NOT_CONTAINS:
+		return logicalplan.OpNotContains, nil
+	default:
+		return logicalplan.OpUnknown, fmt.Errorf("unsupported op: %v", op)
+	}
+}
+
+func protoAggFuncToLogicalAggFunc(f storagepb.AggregationFunction_Type) (logicalplan.AggFunc, error) {
+	switch f {
+	case storagepb.AggregationFunction_TYPE_SUM:
+		return logicalplan.AggFuncSum, nil
+	case storagepb.AggregationFunction_TYPE_MIN:
+		return logicalplan.AggFuncMin, nil
+	case storagepb.AggregationFunction_TYPE_MAX:
+		return logicalplan.AggFuncMax, nil
+	case storagepb.AggregationFunction_TYPE_COUNT:
+		return logicalplan.AggFuncCount, nil
+	default:
+		return logicalplan.AggFuncUnknown, fmt.Errorf("unsupported agg func: %v", f)
+	}
+}
+
+func protoLiteralToArrowScalar(lit *storagepb.Literal) (scalar.Scalar, error) {
+	switch val := lit.Content.Value.(type) {
+	case *storagepb.LiteralContent_NullValue:
+		return scalar.ScalarNull, nil
+	case *storagepb.LiteralContent_BoolValue:
+		return scalar.NewBooleanScalar(val.BoolValue), nil
+	case *storagepb.LiteralContent_Int32Value:
+		return scalar.NewInt32Scalar(val.Int32Value), nil
+	case *storagepb.LiteralContent_Uint32Value:
+		return scalar.NewUint32Scalar(val.Uint32Value), nil
+	case *storagepb.LiteralContent_Int64Value:
+		return scalar.NewInt64Scalar(val.Int64Value), nil
+	case *storagepb.LiteralContent_Uint64Value:
+		return scalar.NewUint64Scalar(val.Uint64Value), nil
+	case *storagepb.LiteralContent_FloatValue:
+		return scalar.NewFloat32Scalar(val.FloatValue), nil
+	case *storagepb.LiteralContent_DoubleValue:
+		return scalar.NewFloat64Scalar(val.DoubleValue), nil
+	case *storagepb.LiteralContent_BinaryValue:
+		return scalar.NewBinaryScalar(memory.NewBufferBytes(val.BinaryValue), arrow.BinaryTypes.Binary), nil
+	case *storagepb.LiteralContent_StringValue:
+		return scalar.NewStringScalar(val.StringValue), nil
+	default:
+		return nil, fmt.Errorf("unsupported literal type: %T", val)
+	}
+}
+
+func ExprsToProtos(exprs []logicalplan.Expr) ([]*storagepb.Expr, error) {
+	res := make([]*storagepb.Expr, 0, len(exprs))
+	for _, e := range exprs {
+		expr, err := ExprToProto(e)
+		if err != nil {
+			return nil, err
+		}
+		res = append(res, expr)
+	}
+	return res, nil
+}
+
+func ExprToProto(expr logicalplan.Expr) (*storagepb.Expr, error) {
+	if expr == nil {
+		return nil, nil
+	}
+
+	switch e := expr.(type) {
+	case *logicalplan.BinaryExpr:
+		return BinaryExprToProto(e)
+	case *logicalplan.Column:
+		return ColumnExprToProto(e)
+	case *logicalplan.LiteralExpr:
+		return LiteralExprToProto(e)
+	case *logicalplan.DynamicColumn:
+		return DynamicColumnExprToProto(e)
+	case *logicalplan.AggregationFunction:
+		return AggregationFunctionToProto(e)
+	case *logicalplan.AliasExpr:
+		return AliasExprToProto(e)
+	case *logicalplan.DurationExpr:
+		return DurationExprToProto(e)
+	case *logicalplan.ConvertExpr:
+		return ConvertExprToProto(e)
+	case *logicalplan.IfExpr:
+		return IfExprToProto(e)
+	default:
+		return nil, fmt.Errorf("unsupported expression type: %T", e)
+	}
+}
+
+func IfExprToProto(e *logicalplan.IfExpr) (*storagepb.Expr, error) {
+	cond, err := ExprToProto(e.Cond)
+	if err != nil {
+		return nil, err
+	}
+	yes, err := ExprToProto(e.Then)
+	if err != nil {
+		return nil, err
+	}
+	no, err := ExprToProto(e.Else)
+	if err != nil {
+		return nil, err
+	}
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_If{
+				If: &storagepb.IfExpr{
+					Condition: cond,
+					Then:      yes,
+					Else:      no,
+				},
+			},
+		},
+	}, nil
+}
+
+func ConvertExprToProto(e *logicalplan.ConvertExpr) (*storagepb.Expr, error) {
+	expr, err := ExprToProto(e.Expr)
+	if err != nil {
+		return nil, err
+	}
+
+	t, err := arrowTypeToProto(e.Type)
+	if err != nil {
+		return nil, err
+	}
+
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_Convert{
+				Convert: &storagepb.ConvertExpr{
+					Expr: expr,
+					Type: t,
+				},
+			},
+		},
+	}, nil
+}
+
+func arrowTypeToProto(t arrow.DataType) (storagepb.Type, error) {
+	switch t {
+	case arrow.PrimitiveTypes.Float64:
+		return storagepb.Type_TYPE_FLOAT64, nil
+	default:
+		return storagepb.Type_TYPE_UNKNOWN_UNSPECIFIED, fmt.Errorf("unsupported type: %v", t)
+	}
+}
+
+func AliasExprToProto(e *logicalplan.AliasExpr) (*storagepb.Expr, error) {
+	expr, err := ExprToProto(e.Expr)
+	if err != nil {
+		return nil, err
+	}
+
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_Alias{
+				Alias: &storagepb.Alias{
+					Name: e.Alias,
+					Expr: expr,
+				},
+			},
+		},
+	}, nil
+}
+
+func BinaryExprToProto(e *logicalplan.BinaryExpr) (*storagepb.Expr, error) {
+	left, err := ExprToProto(e.Left)
+	if err != nil {
+		return nil, err
+	}
+
+	right, err := ExprToProto(e.Right)
+	if err != nil {
+		return nil, err
+	}
+
+	op, err := logicalOpToProtoOp(e.Op)
+	if err != nil {
+		return nil, err
+	}
+
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_BinaryExpr{
+				BinaryExpr: &storagepb.BinaryExpr{
+					Left:  left,
+					Op:    op,
+					Right: right,
+				},
+			},
+		},
+	}, nil
+}
+
+func logicalOpToProtoOp(op logicalplan.Op) (storagepb.Op, error) {
+	switch op {
+	case logicalplan.OpEq:
+		return storagepb.Op_OP_EQ, nil
+	case logicalplan.OpNotEq:
+		return storagepb.Op_OP_NOT_EQ, nil
+	case logicalplan.OpLt:
+		return storagepb.Op_OP_LT, nil
+	case logicalplan.OpLtEq:
+		return storagepb.Op_OP_LT_EQ, nil
+	case logicalplan.OpGt:
+		return storagepb.Op_OP_GT, nil
+	case logicalplan.OpGtEq:
+		return storagepb.Op_OP_GT_EQ, nil
+	case logicalplan.OpRegexMatch:
+		return storagepb.Op_OP_REGEX_MATCH, nil
+	case logicalplan.OpRegexNotMatch:
+		return storagepb.Op_OP_REGEX_NOT_MATCH, nil
+	case logicalplan.OpAnd:
+		return storagepb.Op_OP_AND, nil
+	case logicalplan.OpOr:
+		return storagepb.Op_OP_OR, nil
+	case logicalplan.OpAdd:
+		return storagepb.Op_OP_ADD, nil
+	case logicalplan.OpSub:
+		return storagepb.Op_OP_SUB, nil
+	case logicalplan.OpMul:
+		return storagepb.Op_OP_MUL, nil
+	case logicalplan.OpDiv:
+		return storagepb.Op_OP_DIV, nil
+	case logicalplan.OpContains:
+		return storagepb.Op_OP_CONTAINS, nil
+	case logicalplan.OpNotContains:
+		return storagepb.Op_OP_NOT_CONTAINS, nil
+	default:
+		return storagepb.Op_OP_UNKNOWN_UNSPECIFIED, fmt.Errorf("unsupported op: %v", op)
+	}
+}
+
+func ColumnExprToProto(e *logicalplan.Column) (*storagepb.Expr, error) {
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_Column{
+				Column: &storagepb.Column{
+					Name: e.ColumnName,
+				},
+			},
+		},
+	}, nil
+}
+
+func DynamicColumnExprToProto(e *logicalplan.DynamicColumn) (*storagepb.Expr, error) {
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_DynamicColumn{
+				DynamicColumn: &storagepb.DynamicColumn{
+					Name: e.ColumnName,
+				},
+			},
+		},
+	}, nil
+}
+
+func LiteralExprToProto(e *logicalplan.LiteralExpr) (*storagepb.Expr, error) {
+	val, err := scalarToLiteral(e.Value)
+	if err != nil {
+		return nil, err
+	}
+
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_Literal{
+				Literal: val,
+			},
+		},
+	}, nil
+}
+
+func scalarToLiteral(s scalar.Scalar) (*storagepb.Literal, error) {
+	switch s := s.(type) {
+	case *scalar.Null:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_NullValue{
+					NullValue: &storagepb.Null{},
+				},
+			},
+		}, nil
+	case *scalar.Boolean:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_BoolValue{
+					BoolValue: s.Value,
+				},
+			},
+		}, nil
+	case *scalar.Int32:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_Int32Value{
+					Int32Value: s.Value,
+				},
+			},
+		}, nil
+	case *scalar.Uint32:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_Uint32Value{
+					Uint32Value: s.Value,
+				},
+			},
+		}, nil
+	case *scalar.Int64:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_Int64Value{
+					Int64Value: s.Value,
+				},
+			},
+		}, nil
+	case *scalar.Uint64:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_Uint64Value{
+					Uint64Value: s.Value,
+				},
+			},
+		}, nil
+	case *scalar.Float32:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_FloatValue{
+					FloatValue: s.Value,
+				},
+			},
+		}, nil
+	case *scalar.Float64:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_DoubleValue{
+					DoubleValue: s.Value,
+				},
+			},
+		}, nil
+	case *scalar.Binary:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_BinaryValue{
+					BinaryValue: s.Data(),
+				},
+			},
+		}, nil
+	case *scalar.String:
+		return &storagepb.Literal{
+			Content: &storagepb.LiteralContent{
+				Value: &storagepb.LiteralContent_StringValue{
+					StringValue: s.String(),
+				},
+			},
+		}, nil
+	default:
+		return nil, errors.New("unsupported scalar type " + s.DataType().Name())
+	}
+}
+
+func AggregationFunctionToProto(e *logicalplan.AggregationFunction) (*storagepb.Expr, error) {
+	expr, err := ExprToProto(e.Expr)
+	if err != nil {
+		return nil, err
+	}
+
+	f, err := logicalAggFuncToProto(e.Func)
+	if err != nil {
+		return nil, err
+	}
+
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_AggregationFunction{
+				AggregationFunction: &storagepb.AggregationFunction{
+					Type: f,
+					Expr: expr,
+				},
+			},
+		},
+	}, nil
+}
+
+func logicalAggFuncToProto(f logicalplan.AggFunc) (storagepb.AggregationFunction_Type, error) {
+	switch f {
+	case logicalplan.AggFuncSum:
+		return storagepb.AggregationFunction_TYPE_SUM, nil
+	case logicalplan.AggFuncMin:
+		return storagepb.AggregationFunction_TYPE_MIN, nil
+	case logicalplan.AggFuncMax:
+		return storagepb.AggregationFunction_TYPE_MAX, nil
+	case logicalplan.AggFuncCount:
+		return storagepb.AggregationFunction_TYPE_COUNT, nil
+	default:
+		return storagepb.AggregationFunction_TYPE_UNKNOWN_UNSPECIFIED, errors.New("unsupported aggregation function")
+	}
+}
+
+func DurationExprToProto(e *logicalplan.DurationExpr) (*storagepb.Expr, error) {
+	return &storagepb.Expr{
+		Def: &storagepb.ExprDef{
+			Content: &storagepb.ExprDef_Duration{
+				Duration: &storagepb.DurationExpr{
+					Milliseconds: e.Value().Milliseconds(),
+				},
+			},
+		},
+	}, nil
+}
diff --git a/query/logicalplan/builder.go b/query/logicalplan/builder.go
index 0202081ec..9a621dca5 100644
--- a/query/logicalplan/builder.go
+++ b/query/logicalplan/builder.go
@@ -1,12 +1,15 @@
 package logicalplan
 
 import (
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/segmentio/parquet-go"
+	"errors"
+	"fmt"
+
+	"github.com/apache/arrow-go/v18/arrow"
 )
 
 type Builder struct {
 	plan *LogicalPlan
+	err  error
 }
 
 func (b Builder) Scan(
@@ -14,6 +17,7 @@ func (b Builder) Scan(
 	tableName string,
 ) Builder {
 	return Builder{
+		err: b.err,
 		plan: &LogicalPlan{
 			TableScan: &TableScan{
 				TableProvider: provider,
@@ -28,6 +32,7 @@ func (b Builder) ScanSchema(
 	tableName string,
 ) Builder {
 	return Builder{
+		err: b.err,
 		plan: &LogicalPlan{
 			SchemaScan: &SchemaScan{
 				TableProvider: provider,
@@ -41,6 +46,7 @@ func (b Builder) Project(
 	exprs ...Expr,
 ) Builder {
 	return Builder{
+		err: b.err,
 		plan: &LogicalPlan{
 			Input: b.plan,
 			Projection: &Projection{
@@ -52,13 +58,20 @@ func (b Builder) Project(
 
 type Visitor interface {
 	PreVisit(expr Expr) bool
+	Visit(expr Expr) bool
 	PostVisit(expr Expr) bool
 }
 
+type ExprTypeFinder interface {
+	DataTypeForExpr(expr Expr) (arrow.DataType, error)
+}
+
 type Expr interface {
-	DataType(*parquet.Schema) (arrow.DataType, error)
+	DataType(ExprTypeFinder) (arrow.DataType, error)
 	Accept(Visitor) bool
 	Name() string
+	Equal(Expr) bool
+	fmt.Stringer
 
 	// ColumnsUsedExprs extracts all the expressions that are used that cause
 	// physical data to be read from a column.
@@ -80,6 +93,9 @@ type Expr interface {
 	// Computed returns whether the expression is computed as opposed to being
 	// a static value or unmodified physical column.
 	Computed() bool
+
+	// Clone returns a deep copy of the expression.
+	Clone() Expr
 }
 
 func (b Builder) Filter(expr Expr) Builder {
@@ -88,6 +104,7 @@ func (b Builder) Filter(expr Expr) Builder {
 	}
 
 	return Builder{
+		err: b.err,
 		plan: &LogicalPlan{
 			Input: b.plan,
 			Filter: &Filter{
@@ -101,31 +118,147 @@ func (b Builder) Distinct(
 	exprs ...Expr,
 ) Builder {
 	return Builder{
+		err: b.err,
 		plan: &LogicalPlan{
-			Input: b.plan,
 			Distinct: &Distinct{
 				Exprs: exprs,
 			},
+			Input: &LogicalPlan{
+				Projection: &Projection{
+					Exprs: exprs,
+				},
+				Input: b.plan,
+			},
+		},
+	}
+}
+
+func (b Builder) Limit(expr Expr) Builder {
+	if expr == nil {
+		return b
+	}
+
+	return Builder{
+		err: b.err,
+		plan: &LogicalPlan{
+			Input: b.plan,
+			Limit: &Limit{
+				Expr: expr,
+			},
 		},
 	}
 }
 
 func (b Builder) Aggregate(
-	aggExpr []Expr,
+	aggExpr []*AggregationFunction,
 	groupExprs []Expr,
 ) Builder {
+	resolvedAggExpr := make([]*AggregationFunction, 0, len(aggExpr))
+	projectExprs := make([]Expr, 0, len(aggExpr))
+	needsPostProcessing := false
+
+	var err error
+	for _, agg := range aggExpr {
+		resolvedAggregations, projections, changed, rerr := resolveAggregation(b.plan, agg)
+		if err != nil {
+			err = errors.Join(err, rerr)
+		}
+
+		if changed {
+			needsPostProcessing = true
+		}
+
+		resolvedAggExpr = append(resolvedAggExpr, resolvedAggregations...)
+		projectExprs = append(projectExprs, projections...)
+	}
+
+	if !needsPostProcessing {
+		return Builder{
+			err: err,
+			plan: &LogicalPlan{
+				Aggregation: &Aggregation{
+					GroupExprs: groupExprs,
+					AggExprs:   aggExpr,
+				},
+				Input: b.plan,
+			},
+		}
+	}
+
 	return Builder{
+		err: err,
+		plan: &LogicalPlan{
+			Projection: &Projection{
+				Exprs: append(groupExprs, projectExprs...),
+			},
+			Input: &LogicalPlan{
+				Aggregation: &Aggregation{
+					GroupExprs: groupExprs,
+					AggExprs:   resolvedAggExpr,
+				},
+				Input: b.plan,
+			},
+		},
+	}
+}
+
+func resolveAggregation(plan *LogicalPlan, agg *AggregationFunction) ([]*AggregationFunction, []Expr, bool, error) {
+	switch agg.Func {
+	case AggFuncAvg:
+		sum := &AggregationFunction{
+			Func: AggFuncSum,
+			Expr: agg.Expr,
+		}
+		count := &AggregationFunction{
+			Func: AggFuncCount,
+			Expr: agg.Expr,
+		}
+
+		var (
+			countExpr Expr = count
+			aggType   arrow.DataType
+		)
+		aggType, err := agg.Expr.DataType(plan)
+		// intentionally not handling the error here, as it will be handled
+		// in the build function.
+		if !arrow.TypeEqual(aggType, arrow.PrimitiveTypes.Int64) {
+			countExpr = Convert(countExpr, aggType)
+		}
+
+		div := (&BinaryExpr{
+			Left:  sum,
+			Op:    OpDiv,
+			Right: countExpr,
+		}).Alias(agg.String())
+
+		return []*AggregationFunction{sum, count}, []Expr{div}, true, err
+	default:
+		return []*AggregationFunction{agg}, []Expr{agg}, false, nil
+	}
+}
+
+func (b Builder) Sample(expr, limit Expr) Builder {
+	if expr == nil || limit == nil {
+		return b
+	}
+
+	return Builder{
+		err: b.err,
 		plan: &LogicalPlan{
 			Input: b.plan,
-			Aggregation: &Aggregation{
-				GroupExprs: groupExprs,
-				AggExprs:   aggExpr,
+			Sample: &Sample{
+				Expr:  expr,
+				Limit: limit,
 			},
 		},
 	}
 }
 
 func (b Builder) Build() (*LogicalPlan, error) {
+	if b.err != nil {
+		return nil, b.err
+	}
+
 	if err := Validate(b.plan); err != nil {
 		return nil, err
 	}
diff --git a/query/logicalplan/builder_test.go b/query/logicalplan/builder_test.go
index 391ccedcf..9ec168011 100644
--- a/query/logicalplan/builder_test.go
+++ b/query/logicalplan/builder_test.go
@@ -3,7 +3,7 @@ package logicalplan
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow/scalar"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 	"github.com/stretchr/testify/require"
 
 	"github.com/polarsignals/frostdb/dynparquet"
@@ -15,10 +15,10 @@ func TestLogicalPlanBuilder(t *testing.T) {
 		Scan(tableProvider, "table1").
 		Filter(Col("labels.test").Eq(Literal("abc"))).
 		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
+			[]*AggregationFunction{Sum(Col("value"))},
 			[]Expr{Col("stacktrace")},
 		).
-		Project(Col("stacktrace")).
+		Project(Col("stacktrace"), Sum(Col("value")).Alias("value_sum")).
 		Build()
 
 	require.Nil(t, err)
@@ -27,15 +27,16 @@ func TestLogicalPlanBuilder(t *testing.T) {
 		Projection: &Projection{
 			Exprs: []Expr{
 				&Column{ColumnName: "stacktrace"},
+				&AliasExpr{
+					Expr:  &AggregationFunction{Func: AggFuncSum, Expr: &Column{ColumnName: "value"}},
+					Alias: "value_sum",
+				},
 			},
 		},
 		Input: &LogicalPlan{
 			Aggregation: &Aggregation{
 				GroupExprs: []Expr{&Column{ColumnName: "stacktrace"}},
-				AggExprs: []Expr{&AliasExpr{
-					Expr:  &AggregationFunction{Func: AggFuncSum, Expr: &Column{ColumnName: "value"}},
-					Alias: "value_sum",
-				}},
+				AggExprs:   []*AggregationFunction{{Func: AggFuncSum, Expr: &Column{ColumnName: "value"}}},
 			},
 			Input: &LogicalPlan{
 				Filter: &Filter{
@@ -68,10 +69,33 @@ func TestLogicalPlanBuilderWithoutProjection(t *testing.T) {
 			Exprs: []Expr{&Column{ColumnName: "labels.test"}},
 		},
 		Input: &LogicalPlan{
-			TableScan: &TableScan{
-				TableProvider: tableProvider,
-				TableName:     "table1",
+			Projection: &Projection{
+				Exprs: []Expr{&Column{ColumnName: "labels.test"}},
+			},
+			Input: &LogicalPlan{
+				TableScan: &TableScan{
+					TableProvider: tableProvider,
+					TableName:     "table1",
+				},
 			},
 		},
 	}, p)
 }
+
+func TestRenamedColumn(t *testing.T) {
+	tableProvider := &mockTableProvider{schema: dynparquet.NewSampleSchema()}
+	_, err := (&Builder{}).
+		Scan(tableProvider, "table1").
+		Filter(Col("labels.test").Eq(Literal("abc"))).
+		Project(
+			Div(Mul(Col("value"), Literal(int64(2))), Literal(int64(2))).Alias("other_value"),
+			Col("stacktrace"),
+		).
+		Aggregate(
+			[]*AggregationFunction{Sum(Col("other_value"))},
+			[]Expr{Col("stacktrace")},
+		).
+		Project(Col("stacktrace"), Sum(Col("other_value")).Alias("value_sum")).
+		Build()
+	require.NoError(t, err)
+}
diff --git a/query/logicalplan/expr.go b/query/logicalplan/expr.go
index ad94cc015..4911ba6f3 100644
--- a/query/logicalplan/expr.go
+++ b/query/logicalplan/expr.go
@@ -2,14 +2,12 @@ package logicalplan
 
 import (
 	"errors"
+	"fmt"
 	"strings"
 	"time"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/scalar"
-	"github.com/segmentio/parquet-go"
-
-	"github.com/polarsignals/frostdb/pqarrow/convert"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 )
 
 type Op uint32
@@ -26,6 +24,12 @@ const (
 	OpRegexNotMatch
 	OpAnd
 	OpOr
+	OpAdd
+	OpSub
+	OpMul
+	OpDiv
+	OpContains
+	OpNotContains
 )
 
 func (o Op) String() string {
@@ -50,17 +54,81 @@ func (o Op) String() string {
 		return "&&"
 	case OpOr:
 		return "||"
+	case OpAdd:
+		return "+"
+	case OpSub:
+		return "-"
+	case OpMul:
+		return "*"
+	case OpDiv:
+		return "/"
+	case OpContains:
+		return "contains"
+	case OpNotContains:
+		return "not contains"
 	default:
 		panic("unknown operator")
 	}
 }
 
+func (o Op) ArrowString() string {
+	switch o {
+	case OpEq:
+		return "equal"
+	case OpNotEq:
+		return "not_equal"
+	case OpLt:
+		return "less"
+	case OpLtEq:
+		return "less_equal"
+	case OpGt:
+		return "greater"
+	case OpGtEq:
+		return "greater_equal"
+	case OpAnd:
+		return "and"
+	case OpOr:
+		return "or"
+	case OpAdd:
+		return "add"
+	case OpSub:
+		return "subtract"
+	case OpMul:
+		return "multiply"
+	case OpDiv:
+		return "divide"
+	default:
+		panic("unknown arrow operator")
+	}
+}
+
 type BinaryExpr struct {
 	Left  Expr
 	Op    Op
 	Right Expr
 }
 
+func (e *BinaryExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return e == nil
+	}
+
+	if b, ok := other.(*BinaryExpr); ok {
+		return e.Op == b.Op && e.Left.Equal(b.Left) && e.Right.Equal(b.Right)
+	}
+
+	return false
+}
+
+func (e *BinaryExpr) Clone() Expr {
+	return &BinaryExpr{
+		Left:  e.Left.Clone(),
+		Op:    e.Op,
+		Right: e.Right.Clone(),
+	}
+}
+
 func (e *BinaryExpr) Accept(visitor Visitor) bool {
 	continu := visitor.PreVisit(e)
 	if !continu {
@@ -72,6 +140,11 @@ func (e *BinaryExpr) Accept(visitor Visitor) bool {
 		return false
 	}
 
+	continu = visitor.Visit(e)
+	if !continu {
+		return false
+	}
+
 	continu = e.Right.Accept(visitor)
 	if !continu {
 		return false
@@ -80,14 +153,37 @@ func (e *BinaryExpr) Accept(visitor Visitor) bool {
 	return visitor.PostVisit(e)
 }
 
-func (e *BinaryExpr) DataType(_ *parquet.Schema) (arrow.DataType, error) {
-	return &arrow.BooleanType{}, nil
+func (e *BinaryExpr) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	leftType, err := e.Left.DataType(l)
+	if err != nil {
+		return nil, fmt.Errorf("left operand: %w", err)
+	}
+
+	rightType, err := e.Right.DataType(l)
+	if err != nil {
+		return nil, fmt.Errorf("right operand: %w", err)
+	}
+
+	if !arrow.TypeEqual(leftType, rightType) {
+		return nil, fmt.Errorf("left and right operands must be of the same type, got %s and %s", leftType, rightType)
+	}
+
+	switch e.Op {
+	case OpEq, OpNotEq, OpLt, OpLtEq, OpGt, OpGtEq, OpAnd, OpOr:
+		return arrow.FixedWidthTypes.Boolean, nil
+	case OpAdd, OpSub, OpMul, OpDiv:
+		return leftType, nil
+	default:
+		return nil, errors.New("unknown operator")
+	}
 }
 
 func (e *BinaryExpr) Name() string {
 	return e.Left.Name() + " " + e.Op.String() + " " + e.Right.Name()
 }
 
+func (e *BinaryExpr) String() string { return e.Name() }
+
 func (e *BinaryExpr) ColumnsUsedExprs() []Expr {
 	return append(e.Left.ColumnsUsedExprs(), e.Right.ColumnsUsedExprs()...)
 }
@@ -108,10 +204,112 @@ func (e *BinaryExpr) Alias(alias string) *AliasExpr {
 	return &AliasExpr{Expr: e, Alias: alias}
 }
 
+func Convert(e Expr, t arrow.DataType) *ConvertExpr {
+	return &ConvertExpr{Expr: e, Type: t}
+}
+
+type ConvertExpr struct {
+	Expr Expr
+	Type arrow.DataType
+}
+
+func (e *ConvertExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return e == nil
+	}
+
+	if c, ok := other.(*ConvertExpr); ok {
+		return arrow.TypeEqual(e.Type, c.Type) && e.Expr.Equal(c.Expr)
+	}
+
+	return false
+}
+
+func (e *ConvertExpr) Clone() Expr {
+	return &ConvertExpr{
+		Expr: e.Expr.Clone(),
+		Type: e.Type,
+	}
+}
+
+func (e *ConvertExpr) Accept(visitor Visitor) bool {
+	continu := visitor.PreVisit(e)
+	if !continu {
+		return false
+	}
+
+	continu = e.Expr.Accept(visitor)
+	if !continu {
+		return false
+	}
+
+	continu = visitor.Visit(e)
+	if !continu {
+		return false
+	}
+
+	return visitor.PostVisit(e)
+}
+
+func (e *ConvertExpr) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	// We don't care about the result, but we want to make sure the expression
+	// tree typing is correct.
+	_, err := e.Expr.DataType(l)
+	if err != nil {
+		return nil, fmt.Errorf("convert type: %w", err)
+	}
+
+	return e.Type, nil
+}
+
+func (e *ConvertExpr) Name() string {
+	return "convert(" + e.Expr.Name() + ", " + e.Type.String() + ")"
+}
+
+func (e *ConvertExpr) String() string { return e.Name() }
+
+func (e *ConvertExpr) ColumnsUsedExprs() []Expr {
+	return e.Expr.ColumnsUsedExprs()
+}
+
+func (e *ConvertExpr) MatchPath(path string) bool {
+	return strings.HasPrefix(e.Name(), path)
+}
+
+func (e *ConvertExpr) MatchColumn(columnName string) bool {
+	return e.Name() == columnName
+}
+
+func (e *ConvertExpr) Computed() bool {
+	return true
+}
+
+func (e *ConvertExpr) Alias(alias string) *AliasExpr {
+	return &AliasExpr{Expr: e, Alias: alias}
+}
+
 type Column struct {
 	ColumnName string
 }
 
+func (c *Column) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return c == nil
+	}
+
+	if col, ok := other.(*Column); ok {
+		return c.ColumnName == col.ColumnName
+	}
+
+	return false
+}
+
+func (c *Column) Clone() Expr {
+	return &Column{ColumnName: c.ColumnName}
+}
+
 func (c *Column) Computed() bool {
 	return false
 }
@@ -129,52 +327,15 @@ func (c *Column) Name() string {
 	return c.ColumnName
 }
 
-func (c *Column) DataType(s *parquet.Schema) (arrow.DataType, error) {
-	for _, field := range s.Fields() {
-		af, err := c.findField("", field)
-		if err != nil {
-			return nil, err
-		}
-		if af.Name != "" {
-			return af.Type, nil
-		}
-	}
-
-	return nil, errors.New("column not found")
-}
+func (c *Column) String() string { return c.Name() }
 
-func fullPath(prefix string, parquetField parquet.Field) string {
-	if prefix == "" {
-		return parquetField.Name()
+func (c *Column) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	t, err := l.DataTypeForExpr(c)
+	if err != nil {
+		return nil, fmt.Errorf("column %q type: %w", c.ColumnName, err)
 	}
-	return strings.Join([]string{prefix, parquetField.Name()}, ".")
-}
 
-func (c *Column) findField(prefix string, field parquet.Field) (arrow.Field, error) {
-	if c.ColumnName == fullPath(prefix, field) {
-		return convert.ParquetFieldToArrowField(field)
-	}
-
-	if !field.Leaf() && strings.HasPrefix(c.ColumnName, fullPath(prefix, field)) {
-		group := []arrow.Field{}
-		for _, f := range field.Fields() {
-			af, err := c.findField(fullPath(prefix, field), f)
-			if err != nil {
-				return arrow.Field{}, err
-			}
-			if af.Name != "" {
-				group = append(group, af)
-			}
-		}
-		if len(group) > 0 {
-			return arrow.Field{
-				Name:     field.Name(),
-				Type:     arrow.StructOf(group...),
-				Nullable: field.Optional(),
-			}, nil
-		}
-	}
-	return arrow.Field{}, nil
+	return t, nil
 }
 
 func (c *Column) Alias(alias string) *AliasExpr {
@@ -257,6 +418,22 @@ func (c *Column) RegexNotMatch(pattern string) *BinaryExpr {
 	}
 }
 
+func (c *Column) Contains(pattern string) *BinaryExpr {
+	return &BinaryExpr{
+		Left:  c,
+		Op:    OpContains,
+		Right: Literal(pattern),
+	}
+}
+
+func (c *Column) ContainsNot(pattern string) *BinaryExpr {
+	return &BinaryExpr{
+		Left:  c,
+		Op:    OpNotContains,
+		Right: Literal(pattern),
+	}
+}
+
 func Col(name string) *Column {
 	return &Column{ColumnName: name}
 }
@@ -265,6 +442,38 @@ func And(exprs ...Expr) Expr {
 	return and(exprs)
 }
 
+func Add(left, right Expr) *BinaryExpr {
+	return &BinaryExpr{
+		Left:  left,
+		Op:    OpAdd,
+		Right: right,
+	}
+}
+
+func Sub(left, right Expr) *BinaryExpr {
+	return &BinaryExpr{
+		Left:  left,
+		Op:    OpSub,
+		Right: right,
+	}
+}
+
+func Mul(left, right Expr) *BinaryExpr {
+	return &BinaryExpr{
+		Left:  left,
+		Op:    OpMul,
+		Right: right,
+	}
+}
+
+func Div(left, right Expr) *BinaryExpr {
+	return &BinaryExpr{
+		Left:  left,
+		Op:    OpDiv,
+		Right: right,
+	}
+}
+
 func and(exprs []Expr) Expr {
 	return computeBinaryExpr(exprs, OpAnd)
 }
@@ -310,6 +519,23 @@ type DynamicColumn struct {
 	ColumnName string
 }
 
+func (c *DynamicColumn) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return c == nil
+	}
+
+	if col, ok := other.(*DynamicColumn); ok {
+		return c.ColumnName == col.ColumnName
+	}
+
+	return false
+}
+
+func (c *DynamicColumn) Clone() Expr {
+	return &DynamicColumn{ColumnName: c.ColumnName}
+}
+
 func (c *DynamicColumn) Computed() bool {
 	return false
 }
@@ -318,16 +544,13 @@ func DynCol(name string) *DynamicColumn {
 	return &DynamicColumn{ColumnName: name}
 }
 
-func (c *DynamicColumn) DataType(s *parquet.Schema) (arrow.DataType, error) {
-	for _, field := range s.Fields() {
-		if names := strings.Split(field.Name(), "."); len(names) == 2 {
-			if names[0] == c.ColumnName {
-				return convert.ParquetNodeToType(field)
-			}
-		}
+func (c *DynamicColumn) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	t, err := l.DataTypeForExpr(c)
+	if err != nil {
+		return nil, fmt.Errorf("dynamic column %q type: %w", c.ColumnName, err)
 	}
 
-	return nil, errors.New("column not found")
+	return t, nil
 }
 
 func (c *DynamicColumn) ColumnsUsedExprs() []Expr {
@@ -346,6 +569,8 @@ func (c *DynamicColumn) Name() string {
 	return c.ColumnName
 }
 
+func (c *DynamicColumn) String() string { return c.Name() }
+
 func (c *DynamicColumn) Accept(visitor Visitor) bool {
 	return visitor.PreVisit(c) && visitor.PostVisit(c)
 }
@@ -362,6 +587,25 @@ type LiteralExpr struct {
 	Value scalar.Scalar
 }
 
+func (e *LiteralExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return e == nil
+	}
+
+	if lit, ok := other.(*LiteralExpr); ok {
+		return scalar.Equals(e.Value, lit.Value)
+	}
+
+	return false
+}
+
+func (e *LiteralExpr) Clone() Expr {
+	return &LiteralExpr{
+		Value: e.Value,
+	}
+}
+
 func (e *LiteralExpr) Computed() bool {
 	return false
 }
@@ -372,7 +616,7 @@ func Literal(v interface{}) *LiteralExpr {
 	}
 }
 
-func (e *LiteralExpr) DataType(_ *parquet.Schema) (arrow.DataType, error) {
+func (e *LiteralExpr) DataType(_ ExprTypeFinder) (arrow.DataType, error) {
 	return e.Value.DataType(), nil
 }
 
@@ -380,6 +624,8 @@ func (e *LiteralExpr) Name() string {
 	return e.Value.String()
 }
 
+func (e *LiteralExpr) String() string { return e.Name() }
+
 func (e *LiteralExpr) Accept(visitor Visitor) bool {
 	continu := visitor.PreVisit(e)
 	if !continu {
@@ -404,8 +650,28 @@ type AggregationFunction struct {
 	Expr Expr
 }
 
-func (f *AggregationFunction) DataType(s *parquet.Schema) (arrow.DataType, error) {
-	return f.Expr.DataType(s)
+func (f *AggregationFunction) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return f == nil
+	}
+
+	if agg, ok := other.(*AggregationFunction); ok {
+		return f.Func == agg.Func && f.Expr.Equal(agg.Expr)
+	}
+
+	return false
+}
+
+func (f *AggregationFunction) Clone() Expr {
+	return &AggregationFunction{
+		Func: f.Func,
+		Expr: f.Expr.Clone(),
+	}
+}
+
+func (f *AggregationFunction) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	return f.Expr.DataType(l)
 }
 
 func (f *AggregationFunction) Accept(visitor Visitor) bool {
@@ -419,6 +685,11 @@ func (f *AggregationFunction) Accept(visitor Visitor) bool {
 		return false
 	}
 
+	continu = visitor.Visit(f)
+	if !continu {
+		return false
+	}
+
 	return visitor.PostVisit(f)
 }
 
@@ -430,6 +701,8 @@ func (f *AggregationFunction) Name() string {
 	return f.Func.String() + "(" + f.Expr.Name() + ")"
 }
 
+func (f *AggregationFunction) String() string { return f.Name() }
+
 func (f *AggregationFunction) ColumnsUsedExprs() []Expr {
 	return f.Expr.ColumnsUsedExprs()
 }
@@ -451,6 +724,8 @@ const (
 	AggFuncMax
 	AggFuncCount
 	AggFuncAvg
+	AggFuncUnique
+	AggFuncAnd
 )
 
 func (f AggFunc) String() string {
@@ -465,6 +740,10 @@ func (f AggFunc) String() string {
 		return "count"
 	case AggFuncAvg:
 		return "avg"
+	case AggFuncUnique:
+		return "unique"
+	case AggFuncAnd:
+		return "and"
 	default:
 		panic("unknown aggregation function")
 	}
@@ -498,6 +777,20 @@ func Count(expr Expr) *AggregationFunction {
 	}
 }
 
+func Unique(expr Expr) *AggregationFunction {
+	return &AggregationFunction{
+		Func: AggFuncUnique,
+		Expr: expr,
+	}
+}
+
+func AndAgg(expr Expr) *AggregationFunction {
+	return &AggregationFunction{
+		Func: AggFuncAnd,
+		Expr: expr,
+	}
+}
+
 func Avg(expr Expr) *AggregationFunction {
 	return &AggregationFunction{
 		Func: AggFuncAvg,
@@ -505,19 +798,240 @@ func Avg(expr Expr) *AggregationFunction {
 	}
 }
 
+func IsNull(expr Expr) *IsNullExpr {
+	return &IsNullExpr{
+		Expr: expr,
+	}
+}
+
+type IsNullExpr struct {
+	Expr Expr
+}
+
+func (e *IsNullExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return e == nil
+	}
+
+	if isNull, ok := other.(*IsNullExpr); ok {
+		return e.Expr.Equal(isNull.Expr)
+	}
+
+	return false
+}
+
+func (e *IsNullExpr) Clone() Expr {
+	return &IsNullExpr{
+		Expr: e.Expr.Clone(),
+	}
+}
+
+func (e *IsNullExpr) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	_, err := e.Expr.DataType(l)
+	if err != nil {
+		return nil, err
+	}
+
+	return arrow.FixedWidthTypes.Boolean, nil
+}
+
+func (e *IsNullExpr) Accept(visitor Visitor) bool {
+	continu := visitor.PreVisit(e)
+	if !continu {
+		return false
+	}
+
+	continu = e.Expr.Accept(visitor)
+	if !continu {
+		return false
+	}
+
+	continu = visitor.Visit(e)
+	if !continu {
+		return false
+	}
+
+	return visitor.PostVisit(e)
+}
+
+func (e *IsNullExpr) Computed() bool {
+	return true
+}
+
+func (e *IsNullExpr) Name() string {
+	return "isnull(" + e.Expr.Name() + ")"
+}
+
+func (e *IsNullExpr) String() string { return e.Name() }
+
+func (e *IsNullExpr) ColumnsUsedExprs() []Expr {
+	return e.Expr.ColumnsUsedExprs()
+}
+
+func (e *IsNullExpr) MatchColumn(columnName string) bool {
+	return e.Name() == columnName
+}
+
+func (e *IsNullExpr) MatchPath(path string) bool {
+	return strings.HasPrefix(e.Name(), path)
+}
+
+func If(cond, then, els Expr) *IfExpr {
+	return &IfExpr{
+		Cond: cond,
+		Then: then,
+		Else: els,
+	}
+}
+
+type IfExpr struct {
+	Cond Expr
+	Then Expr
+	Else Expr
+}
+
+func (e *IfExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return e == nil
+	}
+
+	if ife, ok := other.(*IfExpr); ok {
+		return e.Cond.Equal(ife.Cond) && e.Then.Equal(ife.Then) && e.Else.Equal(ife.Else)
+	}
+
+	return false
+}
+
+func (e *IfExpr) Clone() Expr {
+	return &IfExpr{
+		Cond: e.Cond.Clone(),
+		Then: e.Then.Clone(),
+		Else: e.Else.Clone(),
+	}
+}
+
+func (e *IfExpr) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	condType, err := e.Cond.DataType(l)
+	if err != nil {
+		return nil, err
+	}
+
+	if !arrow.TypeEqual(condType, arrow.FixedWidthTypes.Boolean) {
+		return nil, fmt.Errorf("condition expression must be of type bool, got %s", condType)
+	}
+
+	thenType, err := e.Then.DataType(l)
+	if err != nil {
+		return nil, err
+	}
+
+	elseType, err := e.Else.DataType(l)
+	if err != nil {
+		return nil, err
+	}
+
+	if !arrow.TypeEqual(thenType, elseType) {
+		return nil, fmt.Errorf("then and else expression must have the same type, got %s and %s", thenType, elseType)
+	}
+
+	return thenType, nil
+}
+
+func (e *IfExpr) Accept(visitor Visitor) bool {
+	continu := visitor.PreVisit(e)
+	if !continu {
+		return false
+	}
+
+	continu = e.Cond.Accept(visitor)
+	if !continu {
+		return false
+	}
+
+	continu = e.Then.Accept(visitor)
+	if !continu {
+		return false
+	}
+
+	continu = e.Else.Accept(visitor)
+	if !continu {
+		return false
+	}
+
+	continu = visitor.Visit(e)
+	if !continu {
+		return false
+	}
+
+	return visitor.PostVisit(e)
+}
+
+func (e *IfExpr) Alias(name string) *AliasExpr {
+	return &AliasExpr{
+		Expr:  e,
+		Alias: name,
+	}
+}
+
+func (e *IfExpr) Computed() bool {
+	return true
+}
+
+func (e *IfExpr) Name() string {
+	return "if(" + e.Cond.Name() + ") { " + e.Then.Name() + " } else { " + e.Else.Name() + "}"
+}
+
+func (e *IfExpr) String() string { return e.Name() }
+
+func (e *IfExpr) ColumnsUsedExprs() []Expr {
+	return append(append(e.Cond.ColumnsUsedExprs(), e.Then.ColumnsUsedExprs()...), e.Else.ColumnsUsedExprs()...)
+}
+
+func (e *IfExpr) MatchColumn(columnName string) bool {
+	return e.Cond.MatchColumn(columnName) || e.Then.MatchColumn(columnName) || e.Else.MatchColumn(columnName)
+}
+
+func (e *IfExpr) MatchPath(path string) bool {
+	return e.Cond.MatchPath(path) || e.Then.MatchPath(path) || e.Else.MatchPath(path)
+}
+
 type AliasExpr struct {
 	Expr  Expr
 	Alias string
 }
 
-func (e *AliasExpr) DataType(s *parquet.Schema) (arrow.DataType, error) {
-	return e.Expr.DataType(s)
+func (e *AliasExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return e == nil
+	}
+
+	if alias, ok := other.(*AliasExpr); ok {
+		return e.Alias == alias.Alias && e.Expr.Equal(alias.Expr)
+	}
+
+	return false
+}
+
+func (e *AliasExpr) Clone() Expr {
+	return &AliasExpr{
+		Expr:  e.Expr.Clone(),
+		Alias: e.Alias,
+	}
+}
+
+func (e *AliasExpr) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	return e.Expr.DataType(l)
 }
 
 func (e *AliasExpr) Name() string {
 	return e.Alias
 }
 
+func (e *AliasExpr) String() string { return fmt.Sprintf("%s as %s", e.Expr.String(), e.Alias) }
+
 func (e *AliasExpr) Computed() bool {
 	return e.Expr.Computed()
 }
@@ -563,11 +1077,30 @@ type DurationExpr struct {
 	duration time.Duration
 }
 
-func (d *DurationExpr) DataType(schema *parquet.Schema) (arrow.DataType, error) {
+func (d *DurationExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return d == nil
+	}
+
+	if dur, ok := other.(*DurationExpr); ok {
+		return d.duration == dur.duration
+	}
+
+	return false
+}
+
+func (d *DurationExpr) Clone() Expr {
+	return &DurationExpr{
+		duration: d.duration,
+	}
+}
+
+func (d *DurationExpr) DataType(_ ExprTypeFinder) (arrow.DataType, error) {
 	return &arrow.DurationType{}, nil
 }
 
-func (d *DurationExpr) MatchPath(path string) bool {
+func (d *DurationExpr) MatchPath(_ string) bool {
 	return false
 }
 
@@ -581,9 +1114,11 @@ func (d *DurationExpr) Accept(visitor Visitor) bool {
 }
 
 func (d *DurationExpr) Name() string {
-	return ""
+	return fmt.Sprintf("second(%d)", int(d.duration.Seconds()))
 }
 
+func (d *DurationExpr) String() string { return d.Name() }
+
 func (d *DurationExpr) ColumnsUsedExprs() []Expr {
 	// DurationExpr expect to work on a timestamp column
 	return []Expr{Col("timestamp")}
@@ -601,39 +1136,91 @@ func (d *DurationExpr) Value() time.Duration {
 	return d.duration
 }
 
-type AverageExpr struct {
-	Expr Expr
+type AllExpr struct{}
+
+func All() *AllExpr {
+	return &AllExpr{}
 }
 
-func (a *AverageExpr) DataType(s *parquet.Schema) (arrow.DataType, error) {
-	return a.Expr.DataType(s)
+func (a *AllExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return a == nil
+	}
+
+	_, ok := other.(*AllExpr)
+	return ok
 }
 
-func (a *AverageExpr) Name() string {
-	return a.Expr.Name()
+func (a *AllExpr) DataType(ExprTypeFinder) (arrow.DataType, error) { return nil, nil }
+func (a *AllExpr) Accept(visitor Visitor) bool {
+	continu := visitor.PreVisit(a)
+	if !continu {
+		return false
+	}
+
+	return visitor.PostVisit(a)
+}
+func (a *AllExpr) Name() string   { return "all" }
+func (a *AllExpr) String() string { return a.Name() }
+func (a *AllExpr) ColumnsUsedExprs() []Expr {
+	return []Expr{&AllExpr{}}
 }
+func (a *AllExpr) MatchColumn(_ string) bool { return true }
+func (a *AllExpr) MatchPath(_ string) bool   { return true }
+func (a *AllExpr) Computed() bool            { return false }
+func (a *AllExpr) Clone() Expr               { return &AllExpr{} }
 
-func (a *AverageExpr) ColumnsUsedExprs() []Expr {
-	return a.Expr.ColumnsUsedExprs()
+type NotExpr struct {
+	Expr Expr
 }
 
-func (a *AverageExpr) MatchPath(path string) bool {
-	return a.Expr.MatchPath(path)
+func Not(expr Expr) *NotExpr {
+	return &NotExpr{
+		Expr: expr,
+	}
 }
 
-func (a *AverageExpr) MatchColumn(name string) bool {
-	return a.Expr.MatchColumn(name)
+func (n *NotExpr) Equal(other Expr) bool {
+	if other == nil {
+		// if both are nil, they are equal
+		return n == nil
+	}
+
+	if not, ok := other.(*NotExpr); ok {
+		return n.Expr.Equal(not.Expr)
+	}
+
+	return false
 }
 
-func (a *AverageExpr) Computed() bool {
-	return true
+func (n *NotExpr) DataType(l ExprTypeFinder) (arrow.DataType, error) {
+	typ, err := n.Expr.DataType(l)
+	if err != nil {
+		return nil, err
+	}
+
+	if !arrow.TypeEqual(typ, arrow.FixedWidthTypes.Boolean) {
+		return nil, fmt.Errorf("not expression can only be applied to boolean expressions, got %s", typ)
+	}
+
+	return arrow.FixedWidthTypes.Boolean, nil
 }
 
-func (a *AverageExpr) Accept(visitor Visitor) bool {
-	continu := visitor.PreVisit(a)
+func (n *NotExpr) Accept(visitor Visitor) bool {
+	continu := visitor.PreVisit(n)
 	if !continu {
 		return false
 	}
 
-	return visitor.PostVisit(a)
+	return visitor.PostVisit(n)
+}
+func (n *NotExpr) Name() string   { return "not " + n.Expr.Name() }
+func (n *NotExpr) String() string { return n.Name() }
+func (n *NotExpr) ColumnsUsedExprs() []Expr {
+	return []Expr{&NotExpr{Expr: n.Expr}}
 }
+func (n *NotExpr) MatchColumn(columnName string) bool { return !n.Expr.MatchColumn(columnName) }
+func (n *NotExpr) MatchPath(path string) bool         { return !n.Expr.MatchPath(path) }
+func (n *NotExpr) Computed() bool                     { return false }
+func (n *NotExpr) Clone() Expr                        { return &NotExpr{Expr: n.Expr} }
diff --git a/query/logicalplan/logicalplan.go b/query/logicalplan/logicalplan.go
index a1a7ee5ea..69664638e 100644
--- a/query/logicalplan/logicalplan.go
+++ b/query/logicalplan/logicalplan.go
@@ -5,10 +5,11 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 
 	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/pqarrow/convert"
 )
 
 // LogicalPlan is a logical representation of a query. Each LogicalPlan is a
@@ -23,6 +24,8 @@ type LogicalPlan struct {
 	Distinct    *Distinct
 	Projection  *Projection
 	Aggregation *Aggregation
+	Limit       *Limit
+	Sample      *Sample
 }
 
 // Callback is a function that is called throughout a chain of operators
@@ -35,10 +38,17 @@ type IterOptions struct {
 	Projection         []Expr
 	Filter             Expr
 	DistinctColumns    []Expr
+	ReadMode           ReadMode
 }
 
 type Option func(opts *IterOptions)
 
+func WithReadMode(m ReadMode) Option {
+	return func(opts *IterOptions) {
+		opts.ReadMode = m
+	}
+}
+
 func WithPhysicalProjection(e ...Expr) Option {
 	return func(opts *IterOptions) {
 		opts.PhysicalProjection = append(opts.PhysicalProjection, e...)
@@ -93,6 +103,76 @@ func (plan *LogicalPlan) string(indent int) string {
 	return res
 }
 
+func (plan *LogicalPlan) DataTypeForExpr(expr Expr) (arrow.DataType, error) {
+	switch {
+	case plan.SchemaScan != nil:
+		t, err := plan.SchemaScan.DataTypeForExpr(expr)
+		if err != nil {
+			return nil, fmt.Errorf("data type for expr %v within SchemaScan: %w", expr, err)
+		}
+
+		return t, nil
+	case plan.TableScan != nil:
+		t, err := plan.TableScan.DataTypeForExpr(expr)
+		if err != nil {
+			return nil, fmt.Errorf("data type for expr %v within TableScan: %w", expr, err)
+		}
+
+		return t, nil
+	case plan.Filter != nil:
+		t, err := plan.Input.DataTypeForExpr(expr)
+		if err != nil {
+			return nil, fmt.Errorf("data type for expr %v within Filter: %w", expr, err)
+		}
+
+		return t, nil
+	case plan.Projection != nil:
+		for _, e := range plan.Projection.Exprs {
+			if e.Name() == expr.Name() {
+				return e.DataType(plan.Input)
+			}
+		}
+
+		t, err := expr.DataType(plan.Input)
+		if err != nil {
+			return nil, fmt.Errorf("data type for expr %v within Projection: %w", expr, err)
+		}
+
+		return t, nil
+	case plan.Aggregation != nil:
+		if agg, ok := expr.(*AggregationFunction); ok {
+			if agg.Func == AggFuncCount {
+				return arrow.PrimitiveTypes.Int64, nil
+			}
+
+			return agg.Expr.DataType(plan.Input)
+		}
+
+		t, err := expr.DataType(plan.Input)
+		if err != nil {
+			return nil, fmt.Errorf("data type for expr %v within Aggregation: %w", expr, err)
+		}
+
+		return t, nil
+	case plan.Distinct != nil:
+		t, err := expr.DataType(plan.Input)
+		if err != nil {
+			return nil, fmt.Errorf("data type for expr %v within Distinct: %w", expr, err)
+		}
+
+		return t, nil
+	case plan.Sample != nil:
+		t, err := expr.DataType(plan.Input)
+		if err != nil {
+			return nil, fmt.Errorf("data type for expr %v within Sample: %w", expr, err)
+		}
+
+		return t, nil
+	default:
+		return nil, fmt.Errorf("unknown logical plan")
+	}
+}
+
 // TableReader returns the table reader.
 func (plan *LogicalPlan) TableReader() (TableReader, error) {
 	if plan.TableScan != nil {
@@ -165,7 +245,8 @@ type TableScan struct {
 	TableName     string
 
 	// PhysicalProjection describes the columns that are to be physically read
-	// by the table scan.
+	// by the table scan. This is an Expr so it can be either a column or
+	// dynamic column.
 	PhysicalProjection []Expr
 
 	// Filter is the predicate that is to be applied by the table scan to rule
@@ -177,6 +258,69 @@ type TableScan struct {
 
 	// Projection is the list of columns that are to be projected.
 	Projection []Expr
+
+	// ReadMode indicates the mode to use when reading.
+	ReadMode ReadMode
+}
+
+func (scan *TableScan) DataTypeForExpr(expr Expr) (arrow.DataType, error) {
+	tp, err := scan.TableProvider.GetTable(scan.TableName)
+	if err != nil {
+		return nil, fmt.Errorf("get table %q: %w", scan.TableName, err)
+	}
+
+	s := tp.Schema()
+	if s == nil {
+		return nil, fmt.Errorf("table %q has no schema", scan.TableName)
+	}
+
+	t, err := DataTypeForExprWithSchema(expr, s)
+	if err != nil {
+		return nil, fmt.Errorf("type for expr %q in table %q: %w", expr, scan.TableName, err)
+	}
+
+	return t, nil
+}
+
+func DataTypeForExprWithSchema(expr Expr, s *dynparquet.Schema) (arrow.DataType, error) {
+	switch expr := expr.(type) {
+	case *Column:
+		colDef, found := s.FindDynamicColumnForConcreteColumn(expr.ColumnName)
+		if found {
+			t, err := convert.ParquetNodeToType(colDef.StorageLayout)
+			if err != nil {
+				return nil, fmt.Errorf("convert parquet node to type: %w", err)
+			}
+
+			return t, nil
+		}
+
+		colDef, found = s.FindColumn(expr.ColumnName)
+		if found {
+			t, err := convert.ParquetNodeToType(colDef.StorageLayout)
+			if err != nil {
+				return nil, fmt.Errorf("convert parquet node to type: %w", err)
+			}
+
+			return t, nil
+		}
+
+		return nil, fmt.Errorf("column %q not found", expr.ColumnName)
+	case *DynamicColumn:
+		colDef, found := s.FindDynamicColumn(expr.ColumnName)
+		if found {
+			t, err := convert.ParquetNodeToType(colDef.StorageLayout)
+			if err != nil {
+				return nil, fmt.Errorf("convert parquet node to type: %w", err)
+			}
+
+			return t, nil
+		}
+
+		return nil, fmt.Errorf("dynamic column %q not found", expr.ColumnName)
+	default:
+		return nil, fmt.Errorf("unhandled expr type %T", expr)
+	}
 }
 
 func (scan *TableScan) String() string {
@@ -187,6 +331,18 @@ func (scan *TableScan) String() string {
 		" Distinct: " + fmt.Sprint(scan.Distinct)
 }
 
+type ReadMode int
+
+const (
+	// ReadModeDefault is the default read mode. Reads from in-memory and object
+	// storage.
+	ReadModeDefault ReadMode = iota
+	// ReadModeInMemoryOnly reads from in-memory storage only.
+	ReadModeInMemoryOnly
+	// ReadModeDataSourcesOnly reads from data sources only.
+	ReadModeDataSourcesOnly
+)
+
 type SchemaScan struct {
 	TableProvider TableProvider
 	TableName     string
@@ -204,12 +360,28 @@ type SchemaScan struct {
 
 	// Projection is the list of columns that are to be projected.
 	Projection []Expr
+
+	// ReadMode indicates the mode to use when reading.
+	ReadMode ReadMode
 }
 
 func (s *SchemaScan) String() string {
 	return "SchemaScan"
 }
 
+func (s *SchemaScan) DataTypeForExpr(expr Expr) (arrow.DataType, error) {
+	switch expr := expr.(type) {
+	case *Column:
+		if expr.ColumnName == "name" {
+			return arrow.BinaryTypes.String, nil
+		}
+
+		return nil, fmt.Errorf("unknown column %s", expr.ColumnName)
+	default:
+		return nil, fmt.Errorf("unhandled expr %T", expr)
+	}
+}
+
 type Filter struct {
 	Expr Expr
 }
@@ -231,14 +403,31 @@ type Projection struct {
 }
 
 func (p *Projection) String() string {
-	return "Projection"
+	return "Projection (" + fmt.Sprint(p.Exprs) + ")"
 }
 
 type Aggregation struct {
-	AggExprs   []Expr
+	AggExprs   []*AggregationFunction
 	GroupExprs []Expr
 }
 
 func (a *Aggregation) String() string {
 	return "Aggregation " + fmt.Sprint(a.AggExprs) + " Group: " + fmt.Sprint(a.GroupExprs)
 }
+
+type Limit struct {
+	Expr Expr
+}
+
+func (l *Limit) String() string {
+	return "Limit" + " Expr: " + fmt.Sprint(l.Expr)
+}
+
+type Sample struct {
+	Expr  Expr
+	Limit Expr
+}
+
+func (s *Sample) String() string {
+	return "Sample" + " Expr: " + fmt.Sprint(s.Expr)
+}
diff --git a/query/logicalplan/logicalplan_test.go b/query/logicalplan/logicalplan_test.go
index ac718c8ef..c38538dbc 100644
--- a/query/logicalplan/logicalplan_test.go
+++ b/query/logicalplan/logicalplan_test.go
@@ -4,7 +4,7 @@ import (
 	"context"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
 
 	"github.com/polarsignals/frostdb/dynparquet"
@@ -18,26 +18,26 @@ func (m *mockTableReader) Schema() *dynparquet.Schema {
 	return m.schema
 }
 
-func (m *mockTableReader) View(ctx context.Context, fn func(ctx context.Context, tx uint64) error) error {
+func (m *mockTableReader) View(_ context.Context, _ func(ctx context.Context, tx uint64) error) error {
 	return nil
 }
 
 func (m *mockTableReader) Iterator(
-	ctx context.Context,
-	tx uint64,
-	pool memory.Allocator,
-	callbacks []Callback,
-	iterOpts ...Option,
+	_ context.Context,
+	_ uint64,
+	_ memory.Allocator,
+	_ []Callback,
+	_ ...Option,
 ) error {
 	return nil
 }
 
 func (m *mockTableReader) SchemaIterator(
-	ctx context.Context,
-	tx uint64,
-	pool memory.Allocator,
-	callbacks []Callback,
-	iterOpts ...Option,
+	_ context.Context,
+	_ uint64,
+	_ memory.Allocator,
+	_ []Callback,
+	_ ...Option,
 ) error {
 	return nil
 }
@@ -46,7 +46,7 @@ type mockTableProvider struct {
 	schema *dynparquet.Schema
 }
 
-func (m *mockTableProvider) GetTable(name string) (TableReader, error) {
+func (m *mockTableProvider) GetTable(_ string) (TableReader, error) {
 	return &mockTableReader{
 		schema: m.schema,
 	}, nil
@@ -56,38 +56,25 @@ func TestInputSchemaGetter(t *testing.T) {
 	schema := dynparquet.NewSampleSchema()
 
 	// test we can get the table by traversing to find the TableScan
-	plan, _ := (&Builder{}).
+	plan, err := (&Builder{}).
 		Scan(&mockTableProvider{schema}, "table1").
 		Filter(Col("labels.test").Eq(Literal("abc"))).
 		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
+			[]*AggregationFunction{Sum(Col("value"))},
 			[]Expr{Col("stacktrace")},
 		).
-		Project(Col("stacktrace")).
+		Project(Col("stacktrace"), Sum(Col("value")).Alias("value_sum")).
 		Build()
+	require.NoError(t, err)
 	require.Equal(t, schema, plan.InputSchema())
+}
 
-	// test we can get the table by traversing to find SchemaScan
-	plan, _ = (&Builder{}).
-		ScanSchema(&mockTableProvider{schema}, "table1").
-		Filter(Col("labels.test").Eq(Literal("abc"))).
-		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
-			[]Expr{Col("stacktrace")},
-		).
-		Project(Col("stacktrace")).
-		Build()
-	require.Equal(t, schema, plan.InputSchema())
+func Test_ExprClone(t *testing.T) {
+	expr := Col("labels.test").Eq(Literal("abc"))
+	expr2 := expr.Clone()
+	require.Equal(t, expr, expr2)
 
-	// test it returns null in case where we built a logical plan w/ no
-	// TableScan or SchemaScan
-	plan, _ = (&Builder{}).
-		Filter(Col("labels.test").Eq(Literal("abc"))).
-		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
-			[]Expr{Col("stacktrace")},
-		).
-		Project(Col("stacktrace")).
-		Build()
-	require.Nil(t, plan.InputSchema())
+	// Modify the original expr and make sure the clone is not affected
+	expr.Op = OpGt
+	require.NotEqual(t, expr, expr2)
 }
diff --git a/query/logicalplan/optimize.go b/query/logicalplan/optimize.go
index 9c1599d05..620eeaed9 100644
--- a/query/logicalplan/optimize.go
+++ b/query/logicalplan/optimize.go
@@ -1,86 +1,32 @@
 package logicalplan
 
-import (
-	"golang.org/x/exp/slices"
-)
+var hashedMatch = "hashed"
 
 type Optimizer interface {
 	Optimize(plan *LogicalPlan) *LogicalPlan
 }
 
-var DefaultOptimizers = []Optimizer{
-	&AverageAggregationPushDown{},
-	&PhysicalProjectionPushDown{},
-	&FilterPushDown{},
-	&DistinctPushDown{},
-	&ProjectionPushDown{},
-}
-
-type AverageAggregationPushDown struct{}
-
-func (p *AverageAggregationPushDown) Optimize(plan *LogicalPlan) *LogicalPlan {
-	if plan.Aggregation == nil {
-		return plan
-	}
-
-	for i, aggExpr := range plan.Aggregation.AggExprs {
-		var aggFunc *AggregationFunction
-		var alias *AliasExpr
-		var column Expr
-
-		// In case the aggregation contains an alias
-		if aliasExpr, ok := aggExpr.(*AliasExpr); ok {
-			alias = aliasExpr
-			if af, ok := aliasExpr.Expr.(*AggregationFunction); ok {
-				if af.Func == AggFuncAvg {
-					column = af.Expr
-					aggFunc = af
-				}
-			}
-		}
-		if ae, ok := aggExpr.(*AggregationFunction); ok {
-			if ae.Func == AggFuncAvg {
-				column = ae.Expr
-				aggFunc = ae
-			}
-		}
-
-		if aggFunc == nil {
-			// no aggregation func found, skipping
-			continue
-		}
-
-		// Delete this average aggregation from the logicalplan.
-		plan.Aggregation.AggExprs = slices.Delete(plan.Aggregation.AggExprs, i, i+1)
-		// Add sum and count aggregation for the column to the logicalplan.
-		plan.Aggregation.AggExprs = append(plan.Aggregation.AggExprs,
-			Sum(aggFunc.Expr),
-			Count(aggFunc.Expr),
-		)
-
-		projection := &AverageExpr{Expr: column}
-		if alias != nil {
-			alias.Expr = column
-			projection.Expr = alias
-		}
-
-		// Wrap the aggregations with the average projection to always call it after aggregating.
-		plan = &LogicalPlan{
-			Input: plan,
-			Projection: &Projection{
-				Exprs: []Expr{projection},
+func DefaultOptimizers() []Optimizer {
+	return []Optimizer{
+		&PhysicalProjectionPushDown{
+			defaultProjections: []Expr{
+				Not(DynCol(hashedMatch)),
 			},
-		}
+		},
+		&FilterPushDown{},
+		&DistinctPushDown{},
+		&AggFuncPushDown{},
 	}
-
-	return plan
 }
 
-// The PhysicalProjectionPushDown optimizer tries to push down the actual
-// physical columns used by the query to the table scan, so the table provider
-// can decide to only read the columns that are actually going to be used by
-// the query.
-type PhysicalProjectionPushDown struct{}
+// PhysicalProjectionPushDown finds the first projecting logical plan and
+// collects all columns it needs, it is concatenated with all other columns
+// used until it, for example a filter layer. Because the tree has the scan
+// layer as the inner most layer, the logic actually works by resetting the
+// list every time a projecting layer is found.
+type PhysicalProjectionPushDown struct {
+	defaultProjections []Expr
+}
 
 func (p *PhysicalProjectionPushDown) Optimize(plan *LogicalPlan) *LogicalPlan {
 	p.optimize(plan, nil)
@@ -90,26 +36,35 @@ func (p *PhysicalProjectionPushDown) Optimize(plan *LogicalPlan) *LogicalPlan {
 func (p *PhysicalProjectionPushDown) optimize(plan *LogicalPlan, columnsUsedExprs []Expr) {
 	switch {
 	case plan.SchemaScan != nil:
-		plan.SchemaScan.PhysicalProjection = columnsUsedExprs
+		plan.SchemaScan.PhysicalProjection = append(p.defaultProjections, columnsUsedExprs...)
 	case plan.TableScan != nil:
-		plan.TableScan.PhysicalProjection = columnsUsedExprs
+		plan.TableScan.PhysicalProjection = append(p.defaultProjections, columnsUsedExprs...)
 	case plan.Filter != nil:
+		p.defaultProjections = []Expr{}
 		columnsUsedExprs = append(columnsUsedExprs, plan.Filter.Expr.ColumnsUsedExprs()...)
 	case plan.Distinct != nil:
+		// distinct is projecting so we need to reset
+		columnsUsedExprs = []Expr{}
 		for _, expr := range plan.Distinct.Exprs {
 			columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...)
 		}
 	case plan.Projection != nil:
+		// projections are is projecting so we need to reset
+		columnsUsedExprs = []Expr{}
 		for _, expr := range plan.Projection.Exprs {
 			columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...)
 		}
 	case plan.Aggregation != nil:
+		// aggregations are projecting so we need to reset
+		columnsUsedExprs = []Expr{}
 		for _, expr := range plan.Aggregation.GroupExprs {
 			columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...)
 		}
 		for _, expr := range plan.Aggregation.AggExprs {
 			columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...)
 		}
+		p.defaultProjections = []Expr{}
+		columnsUsedExprs = append(columnsUsedExprs, DynCol(hashedMatch))
 	}
 
 	if plan.Input != nil {
@@ -117,154 +72,7 @@ func (p *PhysicalProjectionPushDown) optimize(plan *LogicalPlan, columnsUsedExpr
 	}
 }
 
-// The ProjectionPushDown finds the projection expressions that can be pushed
-// down. If there is no projection expression, but there is an implicit
-// projection such as a `Distinct` query plan, then it will insert a new
-// projection plan and push it down. It functions in three steps, first it will
-// find the projection expressions in the plan, then remove explicit projection
-// plans from the overall plan if it exists, and will then synthesize one if it
-// doesn't exist, and insert it in the deepest possible position in the plan.
-type ProjectionPushDown struct{}
-
-func (p *ProjectionPushDown) Optimize(plan *LogicalPlan) *LogicalPlan {
-	// Don't perform the optimization if filters or aggregations contain a column that projections do not.
-	// Otherwise we'll removed the columns we're filtering/aggregating.
-	projectColumns := projectionColumns(plan)
-	projectMap := map[string]bool{}
-	filterColumns := filterColumns(plan)
-	aggColumns := aggregationColumns(plan)
-	for _, m := range projectColumns {
-		projectMap[m.Name()] = true
-	}
-	for _, m := range filterColumns {
-		if !projectMap[m.Name()] {
-			return plan
-		}
-	}
-	for _, m := range aggColumns {
-		if !projectMap[m.Name()] {
-			return plan
-		}
-	}
-
-	c := &projectionCollector{}
-	c.collect(plan)
-
-	if len(c.projections) == 0 {
-		// If there are no projection expressions, then we don't need to do
-		// anything.
-		return plan
-	}
-
-	plan = removeProjection(plan)
-	return insertProjection(plan, &Projection{Exprs: c.projections})
-}
-
-type projectionCollector struct {
-	projections []Expr
-}
-
-func (p *projectionCollector) collect(plan *LogicalPlan) {
-	switch {
-	case plan.Distinct != nil:
-		p.projections = append(p.projections, plan.Distinct.Exprs...)
-	case plan.Projection != nil:
-		p.projections = append(p.projections, plan.Projection.Exprs...)
-	}
-
-	if plan.Input != nil {
-		p.collect(plan.Input)
-	}
-}
-
-// filterColumns returns all the column matchers for filters in a given plan.
-func filterColumns(plan *LogicalPlan) []Expr {
-	if plan == nil {
-		return nil
-	}
-
-	columnsUsedExprs := []Expr{}
-	switch {
-	case plan.Filter != nil:
-		columnsUsedExprs = append(columnsUsedExprs, plan.Filter.Expr.ColumnsUsedExprs()...)
-	}
-
-	return append(columnsUsedExprs, filterColumns(plan.Input)...)
-}
-
-func aggregationColumns(plan *LogicalPlan) []Expr {
-	if plan == nil {
-		return nil
-	}
-
-	columnsUsedExprs := []Expr{}
-	switch {
-	case plan.Aggregation != nil:
-		for _, expr := range plan.Aggregation.GroupExprs {
-			columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...)
-		}
-		for _, expr := range plan.Aggregation.AggExprs {
-			columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...)
-		}
-	}
-
-	return append(columnsUsedExprs, aggregationColumns(plan.Input)...)
-}
-
-// projectionColumns returns all the column matchers for projections in a given plan.
-func projectionColumns(plan *LogicalPlan) []Expr {
-	if plan == nil {
-		return nil
-	}
-
-	columnsUsedExprs := []Expr{}
-	switch {
-	case plan.Projection != nil:
-		for _, expr := range plan.Projection.Exprs {
-			columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...)
-		}
-	}
-
-	return append(columnsUsedExprs, projectionColumns(plan.Input)...)
-}
-
-func removeProjection(plan *LogicalPlan) *LogicalPlan {
-	if plan == nil {
-		return nil
-	}
-
-	switch {
-	case plan.Projection != nil:
-		return plan.Input
-	}
-
-	plan.Input = removeProjection(plan.Input)
-	return plan
-}
-
-func insertProjection(cur *LogicalPlan, projection *Projection) *LogicalPlan {
-	if cur == nil {
-		return nil
-	}
-
-	switch {
-	case cur.TableScan != nil:
-		return &LogicalPlan{
-			Input:      cur,
-			Projection: projection,
-		}
-	case cur.SchemaScan != nil:
-		return &LogicalPlan{
-			Input:      cur,
-			Projection: projection,
-		}
-	}
-
-	cur.Input = insertProjection(cur.Input, projection)
-	return cur
-}
-
-// The FilterPushDown optimizer tries to push down the filters of a query down
+// FilterPushDown optimizer tries to push down the filters of a query down
 // to the actual physical table scan. This allows the table provider to make
 // smarter decisions about which pieces of data to load in the first place or
 // which are definitely not useful to the query at all. It does not guarantee
@@ -296,7 +104,7 @@ func (p *FilterPushDown) optimize(plan *LogicalPlan, exprs []Expr) {
 	}
 }
 
-// The DistinctPushDown optimizer tries to push down the distinct operator to
+// DistinctPushDown optimizer tries to push down the distinct operator to
 // the table provider. There are certain cases of distinct queries where the
 // storage engine can make smarter decisions than just returning all the data,
 // such as with dictionary encoded columns that are not filtered they can
@@ -309,6 +117,20 @@ func (p *DistinctPushDown) Optimize(plan *LogicalPlan) *LogicalPlan {
 	return plan
 }
 
+func exprsEqual(a, b []Expr) bool {
+	if len(a) != len(b) {
+		return false
+	}
+
+	for i, expr := range a {
+		if !expr.Equal(b[i]) {
+			return false
+		}
+	}
+
+	return true
+}
+
 func (p *DistinctPushDown) optimize(plan *LogicalPlan, distinctColumns []Expr) {
 	switch {
 	case plan.TableScan != nil:
@@ -317,9 +139,55 @@ func (p *DistinctPushDown) optimize(plan *LogicalPlan, distinctColumns []Expr) {
 		}
 	case plan.Distinct != nil:
 		distinctColumns = append(distinctColumns, plan.Distinct.Exprs...)
+	case plan.Projection != nil:
+		if !exprsEqual(distinctColumns, plan.Projection.Exprs) {
+			// if and only if the distinct columns are identical to the
+			// projection columns we can perform the optimization, so we need
+			// to reset it in this case.
+			distinctColumns = []Expr{}
+		}
+	default:
+		// reset distinct columns
+		distinctColumns = []Expr{}
 	}
 
 	if plan.Input != nil {
 		p.optimize(plan.Input, distinctColumns)
 	}
 }
+
+// AggFuncPushDown optimizer tries to push down an aggregation function operator
+// to the table provider. This can be done in the case of some aggregation
+// functions on global aggregations (i.e. no group by) without filters.
+// The storage engine can make smarter decisions than just returning all the
+// data, such as in the case of max functions, memoizing the max value seen
+// so far and only scanning row groups that contain a value greater than the
+// memoized value. It modifies the plan in place.
+type AggFuncPushDown struct{}
+
+func (p *AggFuncPushDown) Optimize(plan *LogicalPlan) *LogicalPlan {
+	p.optimize(plan, nil)
+	return plan
+}
+
+func (p *AggFuncPushDown) optimize(plan *LogicalPlan, filterExpr Expr) {
+	switch {
+	case plan.TableScan != nil:
+		if filterExpr != nil {
+			plan.TableScan.Filter = filterExpr
+		}
+	case plan.Aggregation != nil:
+		if len(plan.Aggregation.GroupExprs) == 0 && len(plan.Aggregation.AggExprs) == 1 {
+			// TODO(asubiotto): Should we make this less specific?
+			filterExpr = plan.Aggregation.AggExprs[0]
+		}
+	default:
+		// If we find anything other than a table scan after a global
+		// aggregation, bail out by setting the filterExpr to nil.
+		filterExpr = nil
+	}
+
+	if plan.Input != nil {
+		p.optimize(plan.Input, filterExpr)
+	}
+}
diff --git a/query/logicalplan/optimize_test.go b/query/logicalplan/optimize_test.go
index f351b20bb..839790b0a 100644
--- a/query/logicalplan/optimize_test.go
+++ b/query/logicalplan/optimize_test.go
@@ -5,21 +5,22 @@ import (
 
 	"github.com/polarsignals/frostdb/dynparquet"
 
-	"github.com/apache/arrow/go/v10/arrow/scalar"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 	"github.com/stretchr/testify/require"
 )
 
 func TestOptimizePhysicalProjectionPushDown(t *testing.T) {
 	tableProvider := &mockTableProvider{schema: dynparquet.NewSampleSchema()}
-	p, _ := (&Builder{}).
+	p, err := (&Builder{}).
 		Scan(tableProvider, "table1").
 		Filter(Col("labels.test").Eq(Literal("abc"))).
 		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
+			[]*AggregationFunction{Sum(Col("value"))},
 			[]Expr{Col("stacktrace")},
 		).
-		Project(Col("stacktrace")).
+		Project(Col("stacktrace"), Sum(Col("value")).Alias("value_sum")).
 		Build()
+	require.NoError(t, err)
 
 	optimizer := &PhysicalProjectionPushDown{}
 	optimizer.Optimize(p)
@@ -32,9 +33,9 @@ func TestOptimizePhysicalProjectionPushDown(t *testing.T) {
 		// columns they access. The optimizer could potentially deduplicate or
 		// use a more efficient datastructure in the future.
 		PhysicalProjection: []Expr{
-			&Column{ColumnName: "stacktrace"},
 			&Column{ColumnName: "stacktrace"},
 			&Column{ColumnName: "value"},
+			DynCol(hashedMatch),
 			&Column{ColumnName: "labels.test"},
 		},
 	},
@@ -44,36 +45,35 @@ func TestOptimizePhysicalProjectionPushDown(t *testing.T) {
 }
 
 func TestOptimizeDistinctPushDown(t *testing.T) {
-	p, _ := (&Builder{}).
-		Scan(nil, "table1").
+	tableProvider := &mockTableProvider{schema: dynparquet.NewSampleSchema()}
+	p, err := (&Builder{}).
+		Scan(tableProvider, "table1").
 		Distinct(Col("labels.test")).
 		Build()
+	require.NoError(t, err)
 
 	optimizer := &DistinctPushDown{}
 	p = optimizer.Optimize(p)
 
-	require.Equal(t, &TableScan{
-		TableName: "table1",
-		Distinct: []Expr{
-			&Column{ColumnName: "labels.test"},
-		},
-	},
+	require.Equal(t,
+		[]Expr{&Column{ColumnName: "labels.test"}},
 		// Distinct -> TableScan
-		p.Input.TableScan,
+		p.Input.Input.TableScan.Distinct,
 	)
 }
 
 func TestOptimizeFilterPushDown(t *testing.T) {
 	tableProvider := &mockTableProvider{schema: dynparquet.NewSampleSchema()}
-	p, _ := (&Builder{}).
+	p, err := (&Builder{}).
 		Scan(tableProvider, "table1").
 		Filter(Col("labels.test").Eq(Literal("abc"))).
 		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
+			[]*AggregationFunction{Sum(Col("value"))},
 			[]Expr{Col("stacktrace")},
 		).
-		Project(Col("stacktrace")).
+		Project(Col("stacktrace"), Sum(Col("value")).Alias("value_sum")).
 		Build()
+	require.NoError(t, err)
 
 	optimizer := &FilterPushDown{}
 	optimizer.Optimize(p)
@@ -95,99 +95,75 @@ func TestOptimizeFilterPushDown(t *testing.T) {
 	)
 }
 
-func TestRemoveProjectionAtRoot(t *testing.T) {
-	p, _ := (&Builder{}).
-		Scan(&mockTableProvider{schema: dynparquet.NewSampleSchema()}, "table1").
-		Filter(Col("labels.test").Eq(Literal("abc"))).
-		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
-			[]Expr{Col("stacktrace")},
-		).
-		Project(Col("stacktrace")).
-		Build()
-
-	p = removeProjection(p)
-
-	require.True(t, p.Projection == nil)
-}
-
-func TestRemoveMiddleProjection(t *testing.T) {
-	p, _ := (&Builder{}).
-		Scan(&mockTableProvider{schema: dynparquet.NewSampleSchema()}, "table1").
-		Filter(Col("labels.test").Eq(Literal("abc"))).
-		Project(Col("stacktrace")).
-		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
-			[]Expr{Col("stacktrace")},
-		).
-		Build()
-
-	p = removeProjection(p)
-
-	require.True(t, p.Input.Projection == nil)
-}
-
-func TestRemoveLowestProjection(t *testing.T) {
-	p, _ := (&Builder{}).
+func TestProjectionPushDown(t *testing.T) {
+	p, err := (&Builder{}).
 		Scan(&mockTableProvider{schema: dynparquet.NewSampleSchema()}, "table1").
-		Project(Col("stacktrace")).
 		Filter(Col("labels.test").Eq(Literal("abc"))).
 		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
+			[]*AggregationFunction{Sum(Col("value"))},
 			[]Expr{Col("stacktrace")},
 		).
 		Build()
+	require.NoError(t, err)
 
-	p = removeProjection(p)
+	p = (&PhysicalProjectionPushDown{}).Optimize(p)
 
-	require.True(t, p.Input.Input.Projection == nil)
+	require.Equal(t, []Expr{
+		Col("stacktrace"),
+		Col("value"),
+		DynCol("hashed"),
+		Col("labels.test"),
+	}, p.Input.Input.TableScan.PhysicalProjection)
 }
 
-func TestProjectionPushDown(t *testing.T) {
-	p, _ := (&Builder{}).
+func TestProjectionPushDownReset(t *testing.T) {
+	p, err := (&Builder{}).
 		Scan(&mockTableProvider{schema: dynparquet.NewSampleSchema()}, "table1").
 		Filter(Col("labels.test").Eq(Literal("abc"))).
 		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
+			[]*AggregationFunction{Sum(Col("value"))},
 			[]Expr{Col("stacktrace")},
 		).
-		Project(Col("labels")).
+		Project(Col("test")).
 		Build()
+	require.NoError(t, err)
 
-	p = (&ProjectionPushDown{}).Optimize(p)
+	p = (&PhysicalProjectionPushDown{}).Optimize(p)
 
-	require.True(t, p.Input.Input.Projection == nil)
+	require.Equal(t, []Expr{
+		Col("stacktrace"),
+		Col("value"),
+		DynCol("hashed"),
+		Col("labels.test"),
+	}, p.Input.Input.Input.TableScan.PhysicalProjection)
 }
 
 func TestProjectionPushDownOfDistinct(t *testing.T) {
-	p, _ := (&Builder{}).
+	p, err := (&Builder{}).
 		Scan(&mockTableProvider{schema: dynparquet.NewSampleSchema()}, "table1").
 		Distinct(DynCol("labels")).
 		Build()
+	require.NoError(t, err)
 
-	p = (&ProjectionPushDown{}).Optimize(p)
+	p = (&PhysicalProjectionPushDown{}).Optimize(p)
 
-	require.True(t, p.Input.Projection != nil)
+	require.Equal(t, []Expr{DynCol("labels")}, p.Input.Input.TableScan.PhysicalProjection)
 }
 
 func TestAllOptimizers(t *testing.T) {
 	tableProvider := &mockTableProvider{schema: dynparquet.NewSampleSchema()}
-	p, _ := (&Builder{}).
+	p, err := (&Builder{}).
 		Scan(tableProvider, "table1").
 		Filter(Col("labels.test").Eq(Literal("abc"))).
 		Aggregate(
-			[]Expr{Sum(Col("value")).Alias("value_sum")},
+			[]*AggregationFunction{Sum(Col("value"))},
 			[]Expr{Col("stacktrace")},
 		).
 		Project(Col("stacktrace")).
 		Build()
+	require.NoError(t, err)
 
-	optimizers := []Optimizer{
-		&PhysicalProjectionPushDown{},
-		&FilterPushDown{},
-		&DistinctPushDown{},
-		&ProjectionPushDown{},
-	}
+	optimizers := DefaultOptimizers()
 
 	for _, optimizer := range optimizers {
 		p = optimizer.Optimize(p)
@@ -201,9 +177,9 @@ func TestAllOptimizers(t *testing.T) {
 		// columns they access. The optimizer could potentially deduplicate or
 		// use a more efficient datastructure in the future.
 		PhysicalProjection: []Expr{
-			&Column{ColumnName: "stacktrace"},
 			&Column{ColumnName: "stacktrace"},
 			&Column{ColumnName: "value"},
+			DynCol(hashedMatch),
 			&Column{ColumnName: "labels.test"},
 		},
 		Filter: &BinaryExpr{
@@ -218,3 +194,77 @@ func TestAllOptimizers(t *testing.T) {
 		p.Input.Input.Input.TableScan,
 	)
 }
+
+func TestAggFuncPushDown(t *testing.T) {
+	t.Run("GlobalAgg", func(t *testing.T) {
+		tableProvider := &mockTableProvider{schema: dynparquet.NewSampleSchema()}
+		p, err := (&Builder{}).
+			Scan(tableProvider, "table1").
+			Aggregate(
+				[]*AggregationFunction{Max(Col("value"))},
+				nil,
+			).
+			Build()
+		require.NoError(t, err)
+
+		p = (&AggFuncPushDown{}).Optimize(p)
+		// Aggregation should still happen.
+		require.NotNil(t, p.Aggregation)
+		require.Equal(t,
+			&TableScan{
+				TableName:     "table1",
+				TableProvider: tableProvider,
+				Filter: &AggregationFunction{
+					Func: AggFuncMax,
+					Expr: &Column{ColumnName: "value"},
+				},
+			},
+			p.Input.TableScan,
+		)
+	})
+	t.Run("DontPushWithGroupExprs", func(t *testing.T) {
+		tableProvider := &mockTableProvider{schema: dynparquet.NewSampleSchema()}
+		p, err := (&Builder{}).
+			Scan(tableProvider, "table1").
+			Aggregate(
+				[]*AggregationFunction{Max(Col("value"))},
+				[]Expr{Col("stacktrace")},
+			).
+			Build()
+		require.NoError(t, err)
+
+		p = (&AggFuncPushDown{}).Optimize(p)
+		// Aggregation should still happen.
+		require.NotNil(t, p.Aggregation)
+		require.Equal(t,
+			&TableScan{
+				TableName:     "table1",
+				TableProvider: tableProvider,
+			},
+			p.Input.TableScan,
+		)
+	})
+	t.Run("DontPushWithFilter", func(t *testing.T) {
+		tableProvider := &mockTableProvider{schema: dynparquet.NewSampleSchema()}
+		p, err := (&Builder{}).
+			Scan(tableProvider, "table1").
+			Filter(Col("labels.test").Eq(Literal("abc"))).
+			Aggregate(
+				[]*AggregationFunction{Max(Col("value"))},
+				nil,
+			).
+			Build()
+		require.NoError(t, err)
+
+		p = (&AggFuncPushDown{}).Optimize(p)
+		// Aggregation should still happen.
+		require.NotNil(t, p.Aggregation)
+		require.Equal(t,
+			&TableScan{
+				TableName:     "table1",
+				TableProvider: tableProvider,
+			},
+			p.Input.Input.TableScan,
+		)
+	})
+}
diff --git a/query/logicalplan/validate.go b/query/logicalplan/validate.go
index ec3ac4f42..1646b3f74 100644
--- a/query/logicalplan/validate.go
+++ b/query/logicalplan/validate.go
@@ -5,8 +5,9 @@ import (
 	"reflect"
 	"strings"
 
-	"github.com/apache/arrow/go/v10/arrow/scalar"
-	"github.com/segmentio/parquet-go/format"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
+	"github.com/parquet-go/parquet-go/format"
 )
 
 // PlanValidationError is the error representing a logical plan that is not valid.
@@ -52,7 +53,7 @@ func (e *ExprValidationError) Error() string {
 	message := make([]string, 0)
 	message = append(message, e.message)
 	message = append(message, ": ")
-	message = append(message, fmt.Sprintf("%s", e.expr))
+	message = append(message, e.expr.String())
 	for _, child := range e.children {
 		message = append(message, "\n     -> invalid sub-expression: ")
 		message = append(message, child.Error())
@@ -67,9 +68,9 @@ func Validate(plan *LogicalPlan) error {
 	if err == nil {
 		switch {
 		case plan.SchemaScan != nil:
-			err = nil
+			err = ValidateSchemaScan(plan)
 		case plan.TableScan != nil:
-			err = nil
+			err = ValidateTableScan(plan)
 		case plan.Filter != nil:
 			err = ValidateFilter(plan)
 		case plan.Distinct != nil:
@@ -118,10 +119,16 @@ func ValidateSingleFieldSet(plan *LogicalPlan) *PlanValidationError {
 	if plan.Aggregation != nil {
 		fieldsSet = append(fieldsSet, 5)
 	}
+	if plan.Limit != nil {
+		fieldsSet = append(fieldsSet, 6)
+	}
+	if plan.Sample != nil {
+		fieldsSet = append(fieldsSet, 7)
+	}
 
 	if len(fieldsSet) != 1 {
 		fieldsFound := make([]string, 0)
-		fields := []string{"SchemaScan", "TableScan", "Filter", "Distinct", "Projection", "Aggregation"}
+		fields := []string{"SchemaScan", "TableScan", "Filter", "Distinct", "Projection", "Aggregation", "Limit", "Sample"}
 		for _, i := range fieldsSet {
 			fieldsFound = append(fieldsFound, fields[i])
 		}
@@ -143,10 +150,90 @@ func ValidateSingleFieldSet(plan *LogicalPlan) *PlanValidationError {
 	return nil
 }
 
+func ValidateSchemaScan(plan *LogicalPlan) *PlanValidationError {
+	if plan.SchemaScan.TableProvider == nil {
+		return &PlanValidationError{
+			plan:    plan,
+			message: "table provider must not be nil",
+		}
+	}
+
+	if plan.SchemaScan.TableName == "" {
+		return &PlanValidationError{
+			plan:    plan,
+			message: "table name must not be empty",
+		}
+	}
+
+	tableReader, err := plan.SchemaScan.TableProvider.GetTable(plan.SchemaScan.TableName)
+	if err != nil {
+		return &PlanValidationError{
+			plan:    plan,
+			message: fmt.Sprintf("failed to get table: %s", err),
+		}
+	}
+	if tableReader == nil {
+		return &PlanValidationError{
+			plan:    plan,
+			message: "table not found",
+		}
+	}
+
+	schema := tableReader.Schema()
+	if schema == nil {
+		return &PlanValidationError{
+			plan:    plan,
+			message: "table schema must not be nil",
+		}
+	}
+
+	return nil
+}
+
+func ValidateTableScan(plan *LogicalPlan) *PlanValidationError {
+	if plan.TableScan.TableProvider == nil {
+		return &PlanValidationError{
+			plan:    plan,
+			message: "table provider must not be nil",
+		}
+	}
+
+	if plan.TableScan.TableName == "" {
+		return &PlanValidationError{
+			plan:    plan,
+			message: "table name must not be empty",
+		}
+	}
+
+	tableReader, err := plan.TableScan.TableProvider.GetTable(plan.TableScan.TableName)
+	if err != nil {
+		return &PlanValidationError{
+			plan:    plan,
+			message: fmt.Sprintf("failed to get table: %s", err),
+		}
+	}
+	if tableReader == nil {
+		return &PlanValidationError{
+			plan:    plan,
+			message: "table not found",
+		}
+	}
+
+	schema := tableReader.Schema()
+	if schema == nil {
+		return &PlanValidationError{
+			plan:    plan,
+			message: "table schema must not be nil",
+		}
+	}
+
+	return nil
+}
+
 // ValidateAggregation validates the logical plan's aggregation step.
 func ValidateAggregation(plan *LogicalPlan) *PlanValidationError {
 	// check that the expression is not nil
-	if plan.Aggregation.AggExprs == nil || len(plan.Aggregation.AggExprs) == 0 {
+	if len(plan.Aggregation.AggExprs) == 0 {
 		return &PlanValidationError{
 			plan:    plan,
 			message: "invalid aggregation: expression cannot be nil",
@@ -166,63 +253,46 @@ func ValidateAggregation(plan *LogicalPlan) *PlanValidationError {
 	return nil
 }
 
-func ValidateAggregationExpr(plan *LogicalPlan) *ExprValidationError {
-	aliases := map[string]struct{}{}
+type Named interface {
+	Name() string
+}
 
+func ValidateAggregationExpr(plan *LogicalPlan) *ExprValidationError {
 	for _, expr := range plan.Aggregation.AggExprs {
-		// check that the aggregation expression has the required structure
-		colFinder := newTypeFinder((*Column)(nil))
-		expr.Accept(&colFinder)
-
-		aggFuncFinder := newTypeFinder((*AggregationFunction)(nil))
-		expr.Accept(&aggFuncFinder)
-
-		if colFinder.result == nil || aggFuncFinder.result == nil {
+		t, err := expr.Expr.DataType(plan.Input)
+		if err != nil {
 			return &ExprValidationError{
-				message: "aggregation expression is invalid. must contain AggregationFunction and Column",
-				expr:    expr,
+				expr:    expr.Expr,
+				message: fmt.Errorf("get type of expression to aggregate: %w", err).Error(),
 			}
 		}
 
-		// check that column being aggregated on exists in the schema
-		colExpr := colFinder.result.(*Column)
-		schema := plan.InputSchema()
-		if schema == nil {
-			return nil // cannot check column type if there's no input schema
-		}
-
-		column, found := schema.ColumnByName(colExpr.ColumnName)
-		if !found {
+		if t == nil {
 			return &ExprValidationError{
-				message: fmt.Sprintf("column not found: %s", colExpr.ColumnName),
-				expr:    expr,
+				expr:    expr.Expr,
+				message: "invalid aggregation: expression type cannot be determined",
 			}
 		}
 
-		if alias, ok := expr.(*AliasExpr); ok {
-			if _, found := aliases[alias.Alias]; found {
+		switch expr.Func {
+		case AggFuncSum, AggFuncMin, AggFuncMax, AggFuncCount, AggFuncAvg, AggFuncUnique:
+			switch t {
+			case
+				arrow.PrimitiveTypes.Int64,
+				arrow.PrimitiveTypes.Uint64,
+				arrow.PrimitiveTypes.Float64:
+				// valid
+			default:
 				return &ExprValidationError{
-					message: fmt.Sprintf("alias used twice: %s", alias.Alias),
-					expr:    expr,
+					expr:    expr.Expr,
+					message: fmt.Errorf("invalid aggregation: expression type %s is not supported", t).Error(),
 				}
 			}
-			aliases[alias.Alias] = struct{}{}
-		}
-
-		// check that the column type can be aggregated by the function type
-		columnType := column.StorageLayout.Type()
-		aggFuncExpr := aggFuncFinder.result.(*AggregationFunction)
-		if columnType.LogicalType().UTF8 != nil {
-			switch aggFuncExpr.Func {
-			case AggFuncSum:
+		case AggFuncAnd:
+			if t != arrow.FixedWidthTypes.Boolean {
 				return &ExprValidationError{
-					message: "cannot sum text column",
-					expr:    expr,
-				}
-			case AggFuncMax:
-				return &ExprValidationError{
-					message: "cannot max text column",
-					expr:    expr,
+					expr:    expr.Expr,
+					message: fmt.Errorf("invalid aggregation: and aggregations can only aggregate bool type expressions, not %s", t).Error(),
 				}
 			}
 		}
@@ -397,7 +467,11 @@ type findExpressionForTypeVisitor struct {
 	result Expr
 }
 
-func (v *findExpressionForTypeVisitor) PreVisit(expr Expr) bool {
+func (v *findExpressionForTypeVisitor) PreVisit(_ Expr) bool {
+	return true
+}
+
+func (v *findExpressionForTypeVisitor) Visit(_ Expr) bool {
 	return true
 }
 
diff --git a/query/logicalplan/validate_test.go b/query/logicalplan/validate_test.go
index 3ceb678f1..4fd50ca06 100644
--- a/query/logicalplan/validate_test.go
+++ b/query/logicalplan/validate_test.go
@@ -69,32 +69,10 @@ func TestAggregationMustHaveExpr(t *testing.T) {
 	require.True(t, strings.HasPrefix(planErr.message, "invalid aggregation: expression cannot be nil"))
 }
 
-func TestAggregationExprCannotHaveInvalidType(t *testing.T) {
-	invalidExprs := [][]Expr{
-		{Literal(4)},
-		{Col("Test")},
-	}
-
-	for _, expr := range invalidExprs {
-		_, err := (&Builder{}).
-			Aggregate(expr, nil).
-			Build()
-
-		require.NotNil(t, err)
-		require.NotNil(t, err)
-		planErr, ok := err.(*PlanValidationError)
-		require.True(t, ok)
-		require.True(t, strings.HasPrefix(planErr.message, "invalid aggregation"))
-		require.Len(t, planErr.children, 1)
-		exprErr := planErr.children[0]
-		require.True(t, strings.HasPrefix(exprErr.message, "aggregation expression is invalid"))
-	}
-}
-
 func TestAggregationExprColumnMustExistInSchema(t *testing.T) {
 	_, err := (&Builder{}).
 		Scan(&mockTableProvider{dynparquet.NewSampleSchema()}, "table1").
-		Aggregate([]Expr{Sum(Col("bad_column"))}, nil).
+		Aggregate([]*AggregationFunction{Sum(Col("bad_column"))}, nil).
 		Build()
 
 	require.NotNil(t, err)
@@ -104,7 +82,7 @@ func TestAggregationExprColumnMustExistInSchema(t *testing.T) {
 	require.True(t, strings.HasPrefix(planErr.message, "invalid aggregation"))
 	require.Len(t, planErr.children, 1)
 	exprErr := planErr.children[0]
-	require.True(t, strings.HasPrefix(exprErr.message, "column not found"))
+	require.Contains(t, exprErr.message, "column \"bad_column\" not found")
 }
 
 func TestAggregationCannotSumOrMaxTextColumn(t *testing.T) {
@@ -123,7 +101,7 @@ func TestAggregationCannotSumOrMaxTextColumn(t *testing.T) {
 	} {
 		_, err := (&Builder{}).
 			Scan(&mockTableProvider{dynparquet.NewSampleSchema()}, "table1").
-			Aggregate([]Expr{testCase.fn(Col("example_type"))}, nil).
+			Aggregate([]*AggregationFunction{testCase.fn(Col("example_type"))}, nil).
 			Build()
 
 		require.NotNil(t, err)
@@ -133,28 +111,10 @@ func TestAggregationCannotSumOrMaxTextColumn(t *testing.T) {
 		require.True(t, strings.HasPrefix(planErr.message, "invalid aggregation"))
 		require.Len(t, planErr.children, 1)
 		exprErr := planErr.children[0]
-		require.True(t, strings.HasPrefix(exprErr.message, testCase.errMsg))
+		require.Contains(t, exprErr.message, "expression type dictionary<values=binary, indices=uint32, ordered=false> is not supported")
 	}
 }
 
-func TestAggregationCannotUseAliasTwice(t *testing.T) {
-	_, err := (&Builder{}).
-		Scan(&mockTableProvider{dynparquet.NewSampleSchema()}, "table1").
-		Aggregate([]Expr{
-			Sum(Col("value")).Alias("value"), // should use e.g. sum_foo
-			Max(Col("value")).Alias("value"), // should use e.g. max_foo
-		}, nil).
-		Build()
-
-	require.NotNil(t, err)
-	planErr, ok := err.(*PlanValidationError)
-	require.True(t, ok)
-	require.True(t, strings.HasPrefix(planErr.message, "invalid aggregation"))
-	require.Len(t, planErr.children, 1)
-	exprErr := planErr.children[0]
-	require.True(t, strings.HasPrefix(exprErr.message, "alias used twice: value"))
-}
-
 func TestFilterBinaryExprLeftSideMustBeColumn(t *testing.T) {
 	_, err := (&Builder{}).
 		Scan(&mockTableProvider{dynparquet.NewSampleSchema()}, "table1").
diff --git a/query/memory.go b/query/memory.go
new file mode 100644
index 000000000..282f19822
--- /dev/null
+++ b/query/memory.go
@@ -0,0 +1,93 @@
+package query
+
+import (
+	"runtime/debug"
+	"sync/atomic"
+
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+)
+
+const PanicMemoryLimit = "memory limit exceeded"
+
+var _ memory.Allocator = (*LimitAllocator)(nil)
+
+// LimitAllocator is a wrapper around a memory.Allocator that panics if the memory usage exceeds the defined limit.
+type LimitAllocator struct {
+	limit     int64
+	allocated *atomic.Int64
+	allocator memory.Allocator
+	reg       prometheus.Registerer
+}
+
+type AllocatorOption func(*LimitAllocator)
+
+func WithRegistry(reg prometheus.Registerer) AllocatorOption {
+	return func(a *LimitAllocator) {
+		a.reg = reg
+	}
+}
+
+func NewLimitAllocator(limit int64, allocator memory.Allocator, options ...AllocatorOption) *LimitAllocator {
+	l := &LimitAllocator{
+		limit:     limit,
+		allocated: &atomic.Int64{},
+		allocator: allocator,
+		reg:       prometheus.NewRegistry(),
+	}
+
+	for _, option := range options {
+		option(l)
+	}
+
+	promauto.With(l.reg).NewGaugeFunc(prometheus.GaugeOpts{
+		Name: "memory_allocated_bytes",
+		Help: "The total number of bytes allocated by the allocator.",
+	}, func() float64 {
+		return float64(l.allocated.Load())
+	})
+
+	return l
+}
+
+func (a *LimitAllocator) Allocate(size int) []byte {
+	for {
+		allocated := a.allocated.Load()
+		if allocated+int64(size) > a.limit {
+			panic(PanicMemoryLimit)
+		}
+
+		if a.allocated.CompareAndSwap(allocated, allocated+int64(size)) {
+			return a.allocator.Allocate(size)
+		}
+	}
+}
+
+func (a *LimitAllocator) Reallocate(size int, b []byte) []byte {
+	if len(b) == size {
+		return b
+	}
+
+	diff := int64(size - len(b))
+	for {
+		allocated := a.allocated.Load()
+		if allocated+diff > a.limit {
+			debug.PrintStack()
+			panic(PanicMemoryLimit)
+		}
+
+		if a.allocated.CompareAndSwap(allocated, allocated+diff) {
+			return a.allocator.Reallocate(size, b)
+		}
+	}
+}
+
+func (a *LimitAllocator) Free(b []byte) {
+	a.allocated.Add(-int64(len(b)))
+	a.allocator.Free(b)
+}
+
+func (a *LimitAllocator) Allocated() int {
+	return int(a.allocated.Load())
+}
diff --git a/query/physicalplan/aggregate.go b/query/physicalplan/aggregate.go
index 4271bffea..8864b374a 100644
--- a/query/physicalplan/aggregate.go
+++ b/query/physicalplan/aggregate.go
@@ -7,15 +7,15 @@ import (
 	"hash/maphash"
 	"strings"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/math"
-	"github.com/apache/arrow/go/v10/arrow/memory"
-	"github.com/apache/arrow/go/v10/arrow/scalar"
-	"github.com/dgryski/go-metro"
-	"github.com/segmentio/parquet-go"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/math"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
+	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
 
+	"github.com/polarsignals/frostdb/dynparquet"
 	"github.com/polarsignals/frostdb/pqarrow/builder"
 	"github.com/polarsignals/frostdb/query/logicalplan"
 )
@@ -23,53 +23,30 @@ import (
 func Aggregate(
 	pool memory.Allocator,
 	tracer trace.Tracer,
-	s *parquet.Schema,
 	agg *logicalplan.Aggregation,
 	final bool,
 	ordered bool,
+	seed maphash.Seed,
 ) (PhysicalPlan, error) {
 	aggregations := make([]Aggregation, 0, len(agg.AggExprs))
 
+	// TODO(brancz): This is not correct, it doesn't handle aggregations
+	// correctly of previously projected columns like `sum(value + timestamp)`.
+	// Need to understand why we need to handle dynamic columns here
+	// differently and not just use the aggregation funciton's expression.
 	for _, expr := range agg.AggExprs {
-		var (
-			aggregation    Aggregation
-			aggFunc        logicalplan.AggFunc
-			aggFuncFound   bool
-			aggColumnFound bool
-		)
+		aggregation := Aggregation{}
 		expr.Accept(PreExprVisitorFunc(func(expr logicalplan.Expr) bool {
-			switch e := expr.(type) {
-			case *logicalplan.AggregationFunction:
-				aggFunc = e.Func
-				aggFuncFound = true
-			case *logicalplan.Column:
-				aggregation.expr = e
-				aggColumnFound = true
+			if _, ok := expr.(*logicalplan.DynamicColumn); ok {
+				aggregation.dynamic = true
 			}
 
 			return true
 		}))
 
-		if !aggFuncFound {
-			return nil, errors.New("aggregation function not found")
-		}
-
-		if !aggColumnFound {
-			return nil, errors.New("aggregation column not found")
-		}
-
-		dataType, err := expr.DataType(s)
-		if err != nil {
-			return nil, err
-		}
-
-		f, err := chooseAggregationFunction(aggFunc, dataType)
-		if err != nil {
-			return nil, err
-		}
-
 		aggregation.resultName = expr.Name()
-		aggregation.function = f
+		aggregation.function = expr.Func
+		aggregation.expr = expr.Expr
 
 		aggregations = append(aggregations, aggregation)
 	}
@@ -96,38 +73,28 @@ func Aggregate(
 		tracer,
 		aggregations,
 		agg.GroupExprs,
+		seed,
 		final,
 	), nil
 }
 
 func chooseAggregationFunction(
 	aggFunc logicalplan.AggFunc,
-	dataType arrow.DataType,
+	_ arrow.DataType,
 ) (AggregationFunction, error) {
 	switch aggFunc {
 	case logicalplan.AggFuncSum:
-		switch dataType.ID() {
-		case arrow.INT64:
-			return &Int64SumAggregation{}, nil
-		default:
-			return nil, fmt.Errorf("unsupported sum of type: %s", dataType.Name())
-		}
+		return &SumAggregation{}, nil
 	case logicalplan.AggFuncMin:
-		switch dataType.ID() {
-		case arrow.INT64:
-			return &Int64MinAggregation{}, nil
-		default:
-			return nil, fmt.Errorf("unsupported min of type: %s", dataType.Name())
-		}
+		return &MinAggregation{}, nil
 	case logicalplan.AggFuncMax:
-		switch dataType.ID() {
-		case arrow.INT64:
-			return &Int64MaxAggregation{}, nil
-		default:
-			return nil, fmt.Errorf("unsupported max of type: %s", dataType.Name())
-		}
+		return &MaxAggregation{}, nil
 	case logicalplan.AggFuncCount:
 		return &CountAggregation{}, nil
+	case logicalplan.AggFuncUnique:
+		return &UniqueAggregation{}, nil
+	case logicalplan.AggFuncAnd:
+		return &AndAggregation{}, nil
 	default:
 		return nil, fmt.Errorf("unsupported aggregation function: %s", aggFunc.String())
 	}
@@ -136,8 +103,9 @@ func chooseAggregationFunction(
 // Aggregation groups together some lower level primitives to for the column to be aggregated by its function.
 type Aggregation struct {
 	expr       logicalplan.Expr
+	dynamic    bool // dynamic indicates that this aggregation is performed against a dynamic column.
 	resultName string
-	function   AggregationFunction
+	function   logicalplan.AggFunc
 	arrays     []builder.ColumnBuilder // TODO: These can actually live outside this struct and be shared. Only at the very end will they be read by each column and then aggregated separately.
 }
 
@@ -148,10 +116,6 @@ type AggregationFunction interface {
 type HashAggregate struct {
 	pool                  memory.Allocator
 	tracer                trace.Tracer
-	aggregations          []Aggregation
-	groupByCols           map[string]builder.ColumnBuilder
-	colOrdering           []string
-	hashToAggregate       map[uint64]int
 	groupByColumnMatchers []logicalplan.Expr
 	hashSeed              maphash.Seed
 	next                  PhysicalPlan
@@ -163,6 +127,34 @@ type HashAggregate struct {
 	groupByFields      []arrow.Field
 	groupByFieldHashes []hashCombiner
 	groupByArrays      []arrow.Array
+	hashToAggregate    map[uint64]hashtuple
+
+	// aggregates are the collection of all the hash aggregates for this hash aggregation. This is useful when a single hash aggregate cannot fit
+	// into a single record and needs to be split into multiple records.
+	aggregates []*hashAggregate
+}
+
+type hashtuple struct {
+	aggregate int // aggregate is the index into the aggregates slice
+	array     int // array is the index into the aggregations array
+}
+
+// hashAggregate represents a single hash aggregation.
+type hashAggregate struct {
+	dynamicAggregations []Aggregation
+	// dynamicFieldsConverted tracks the fields that match with
+	// dynamicAggregations and have been converted to aggregations on a concrete
+	// column.
+	dynamicAggregationsConverted map[string]struct{}
+	aggregations                 []Aggregation
+	// concreteAggregations memoizes the number of concrete aggregations at
+	// initialization this number needs to be recorded because dynamic
+	// aggregations are converted to concrete aggregations at runtime.
+	concreteAggregations int
+	groupByCols          map[string]builder.ColumnBuilder
+
+	colOrdering []string
+	rowCount    int
 }
 
 func NewHashAggregate(
@@ -170,24 +162,59 @@ func NewHashAggregate(
 	tracer trace.Tracer,
 	aggregations []Aggregation,
 	groupByColumnMatchers []logicalplan.Expr,
+	seed maphash.Seed,
 	finalStage bool,
 ) *HashAggregate {
+	dynamic := []Aggregation{}
+	static := []Aggregation{}
+	for _, agg := range aggregations {
+		if agg.dynamic {
+			dynamic = append(dynamic, agg)
+		} else {
+			static = append(static, agg)
+		}
+	}
+
 	return &HashAggregate{
-		pool:            pool,
-		tracer:          tracer,
-		aggregations:    aggregations,
-		groupByCols:     map[string]builder.ColumnBuilder{},
-		colOrdering:     []string{},
-		hashToAggregate: map[uint64]int{},
+		pool:   pool,
+		tracer: tracer,
 		// TODO: Matchers can be optimized to be something like a radix tree or just a fast-lookup datastructure for exact matches or prefix matches.
 		groupByColumnMatchers: groupByColumnMatchers,
-		hashSeed:              maphash.MakeSeed(),
+		hashSeed:              seed,
 		finalStage:            finalStage,
 
 		groupByFields:      make([]arrow.Field, 0, 10),
 		groupByFieldHashes: make([]hashCombiner, 0, 10),
 		groupByArrays:      make([]arrow.Array, 0, 10),
+		hashToAggregate:    map[uint64]hashtuple{},
+		aggregates: []*hashAggregate{ // initialize a single hash aggregate; we expect this array to only every grow during very large aggregations.
+			{
+				dynamicAggregations:          dynamic,
+				dynamicAggregationsConverted: make(map[string]struct{}),
+				aggregations:                 static,
+				concreteAggregations:         len(static),
+				groupByCols:                  map[string]builder.ColumnBuilder{},
+				colOrdering:                  []string{},
+			},
+		},
+	}
+}
+
+func (a *HashAggregate) Close() {
+	for _, arr := range a.groupByArrays {
+		arr.Release()
+	}
+	for _, aggregate := range a.aggregates {
+		for _, aggregation := range aggregate.aggregations {
+			for _, bldr := range aggregation.arrays {
+				bldr.Release()
+			}
+		}
+		for _, bldr := range aggregate.groupByCols {
+			bldr.Release()
+		}
 	}
+	a.next.Close()
 }
 
 func (a *HashAggregate) SetNext(next PhysicalPlan) {
@@ -200,16 +227,15 @@ func (a *HashAggregate) Draw() *Diagram {
 		child = a.next.Draw()
 	}
 
-	names := make([]string, 0, len(a.aggregations))
-	for _, agg := range a.aggregations {
+	names := make([]string, 0, len(a.aggregates[0].aggregations))
+	for _, agg := range a.aggregates[0].aggregations {
 		names = append(names, agg.resultName)
 	}
 
 	var groupings []string
 	for _, grouping := range a.groupByColumnMatchers {
-		groupings = append(groupings, grouping.Name())
+		groupings = append(groupings, grouping.String())
 	}
-
 	details := fmt.Sprintf("HashAggregate (%s by %s)", strings.Join(names, ","), strings.Join(groupings, ","))
 	return &Diagram{Details: details, Child: child}
 }
@@ -234,108 +260,15 @@ func (u *uint64HashCombine) hashCombine(rhs uint64) uint64 {
 	return hashCombine(u.value, rhs)
 }
 
-// durationHashCombine hashes a given timestamp by dividing it through a given duration.
-// timestamp | duration | hash
-// 0 		 | 2		| 0
-// 1 		 | 2		| 0
-// 2 		 | 2		| 1
-// 3 		 | 2		| 1
-// 4 		 | 2		| 2
-// 5 		 | 2		| 2
-// Essentially hashing timestamps into buckets of durations.
-type durationHashCombine struct {
-	milliseconds uint64
-}
-
-func (d *durationHashCombine) hashCombine(rhs uint64) uint64 {
-	return rhs / d.milliseconds // floors by default
-}
-
-func hashArray(arr arrow.Array) []uint64 {
-	switch ar := arr.(type) {
-	case *array.String:
-		return hashStringArray(ar)
-	case *array.Binary:
-		return hashBinaryArray(ar)
-	case *array.Int64:
-		return hashInt64Array(ar)
-	case *array.Boolean:
-		return hashBooleanArray(ar)
-	case *array.Dictionary:
-		return hashDictionaryArray(ar)
-	default:
-		panic("unsupported array type " + fmt.Sprintf("%T", arr))
-	}
-}
-
-func hashDictionaryArray(arr *array.Dictionary) []uint64 {
-	res := make([]uint64, arr.Len())
-	for i := 0; i < arr.Len(); i++ {
-		if !arr.IsNull(i) {
-			switch dict := arr.Dictionary().(type) {
-			case *array.Binary:
-				res[i] = metro.Hash64(dict.Value(arr.GetValueIndex(i)), 0)
-			case *array.String:
-				res[i] = metro.Hash64([]byte(dict.Value(arr.GetValueIndex(i))), 0)
-			default:
-				panic("unsupported dictionary type " + fmt.Sprintf("%T", dict))
-			}
-		}
-	}
-	return res
-}
-
-func hashBinaryArray(arr *array.Binary) []uint64 {
-	res := make([]uint64, arr.Len())
-	for i := 0; i < arr.Len(); i++ {
-		if !arr.IsNull(i) {
-			res[i] = metro.Hash64(arr.Value(i), 0)
-		}
-	}
-	return res
-}
-
-func hashBooleanArray(arr *array.Boolean) []uint64 {
-	res := make([]uint64, arr.Len())
-	for i := 0; i < arr.Len(); i++ {
-		if arr.IsNull(i) {
-			res[i] = 0
-			continue
-		}
-		if arr.Value(i) {
-			res[i] = 2
-		} else {
-			res[i] = 1
-		}
-	}
-	return res
-}
-
-func hashStringArray(arr *array.String) []uint64 {
-	res := make([]uint64, arr.Len())
-	for i := 0; i < arr.Len(); i++ {
-		if !arr.IsNull(i) {
-			res[i] = metro.Hash64([]byte(arr.Value(i)), 0)
-		}
-	}
-	return res
-}
-
-func hashInt64Array(arr *array.Int64) []uint64 {
-	res := make([]uint64, arr.Len())
-	for i := 0; i < arr.Len(); i++ {
-		if !arr.IsNull(i) {
-			res[i] = uint64(arr.Value(i))
-		}
-	}
-	return res
-}
-
-func (a *HashAggregate) Callback(ctx context.Context, r arrow.Record) error {
+func (a *HashAggregate) Callback(_ context.Context, r arrow.Record) error {
 	// Generates high volume of spans. Comment out if needed during development.
 	// ctx, span := a.tracer.Start(ctx, "HashAggregate/Callback")
 	// defer span.End()
 
+	// aggregate is the current aggregation
+	aggregate := a.aggregates[len(a.aggregates)-1]
+
+	fields := r.Schema().Fields() // NOTE: call Fields() once to avoid creating a copy each time
 	groupByFields := a.groupByFields
 	groupByFieldHashes := a.groupByFieldHashes
 	groupByArrays := a.groupByArrays
@@ -346,56 +279,120 @@ func (a *HashAggregate) Callback(ctx context.Context, r arrow.Record) error {
 		groupByArrays = groupByArrays[:0]
 	}()
 
-	columnToAggregate := make([]arrow.Array, len(a.aggregations))
-	aggregateFieldsFound := 0
+	columnToAggregate := make([]arrow.Array, len(aggregate.aggregations))
+	concreteAggregateFieldsFound := 0
+	dynamicAggregateFieldsFound := 0
 
-	for i, field := range r.Schema().Fields() {
+	for i := 0; i < r.Schema().NumFields(); i++ {
+		field := r.Schema().Field(i)
 		for _, matcher := range a.groupByColumnMatchers {
 			if matcher.MatchColumn(field.Name) {
 				groupByFields = append(groupByFields, field)
 				groupByArrays = append(groupByArrays, r.Column(i))
 
-				switch v := matcher.(type) {
-				case *logicalplan.DurationExpr:
-					duration := v.Value()
-					groupByFieldHashes = append(groupByFieldHashes,
-						&durationHashCombine{milliseconds: uint64(duration.Milliseconds())},
-					)
-				default:
+				if a.finalStage { // in the final stage expect the hashes to already exist, so only need to combine them as normal hashes
 					groupByFieldHashes = append(groupByFieldHashes,
 						&uint64HashCombine{value: scalar.Hash(a.hashSeed, scalar.NewStringScalar(field.Name))},
 					)
+					continue
+				}
+
+				groupByFieldHashes = append(groupByFieldHashes,
+					&uint64HashCombine{value: scalar.Hash(a.hashSeed, scalar.NewStringScalar(field.Name))},
+				)
+			}
+		}
+
+		if _, ok := aggregate.dynamicAggregationsConverted[field.Name]; !ok {
+			for _, col := range aggregate.dynamicAggregations {
+				if a.finalStage {
+					if col.expr.MatchColumn(field.Name) {
+						// expand the aggregate.aggregations with a final concrete column aggregation.
+						columnToAggregate = append(columnToAggregate, nil)
+						aggregate.aggregations = append(aggregate.aggregations, Aggregation{
+							expr:       logicalplan.Col(field.Name),
+							dynamic:    true,
+							resultName: resultNameWithConcreteColumn(col.function, field.Name),
+							function:   col.function,
+						})
+						aggregate.dynamicAggregationsConverted[field.Name] = struct{}{}
+					}
+				} else {
+					// If we're aggregating the raw data we need to find the columns by their actual names for now.
+					if col.expr.MatchColumn(field.Name) {
+						// expand the aggregate.aggregations with a concrete column aggregation.
+						columnToAggregate = append(columnToAggregate, nil)
+						aggregate.aggregations = append(aggregate.aggregations, Aggregation{
+							expr:       logicalplan.Col(field.Name),
+							dynamic:    true,
+							resultName: field.Name, // Don't rename the column yet, we'll do that in the final stage. Dynamic aggregations can't match agains't the pre-computed name.
+							function:   col.function,
+						})
+						aggregate.dynamicAggregationsConverted[field.Name] = struct{}{}
+					}
 				}
 			}
 		}
 
-		for j, col := range a.aggregations {
+		for j, col := range aggregate.aggregations {
 			// If we're aggregating at the final stage we have previously
 			// renamed the pre-aggregated columns to their result names.
 			if a.finalStage {
-				if col.resultName == field.Name {
+				if col.resultName == field.Name || (col.dynamic && col.expr.MatchColumn(field.Name)) {
 					columnToAggregate[j] = r.Column(i)
-					aggregateFieldsFound++
+					if col.dynamic {
+						dynamicAggregateFieldsFound++
+					} else {
+						concreteAggregateFieldsFound++
+					}
 				}
 			} else {
 				// If we're aggregating the raw data we need to find the columns by their actual names for now.
 				if col.expr.MatchColumn(field.Name) {
 					columnToAggregate[j] = r.Column(i)
-					aggregateFieldsFound++
+					if col.dynamic {
+						dynamicAggregateFieldsFound++
+					} else {
+						concreteAggregateFieldsFound++
+					}
 				}
 			}
 		}
 	}
 
-	if aggregateFieldsFound != len(a.aggregations) {
-		return errors.New("aggregate field not found, aggregations are not possible without it")
+	// It's ok for the same aggregation to be found multiple times, optimizers
+	// should remove them but for correctness in the case where they don't we
+	// need to handle it, so concrete aggregates are allowed to be different
+	// from concrete aggregations.
+	if ((concreteAggregateFieldsFound == 0 || aggregate.concreteAggregations == 0) && (len(aggregate.dynamicAggregations) == 0)) ||
+		(len(aggregate.dynamicAggregations) > 0) && dynamicAggregateFieldsFound == 0 {
+		// To perform an aggregation ALL concrete columns must have been matched
+		// or at least one dynamic column if performing dynamic aggregations.
+		exprs := make([]string, len(aggregate.aggregations))
+		for i, col := range aggregate.aggregations {
+			exprs[i] = col.expr.String()
+		}
+
+		if a.finalStage {
+			return fmt.Errorf("aggregate field(s) not found %#v, final aggregations are not possible without it (%d concrete aggregation fields found; %d concrete aggregations)", exprs, concreteAggregateFieldsFound, aggregate.concreteAggregations)
+		}
+		return fmt.Errorf("aggregate field(s) not found %#v, aggregations are not possible without it (%d concrete aggregation fields found; %d concrete aggregations)", exprs, concreteAggregateFieldsFound, aggregate.concreteAggregations)
 	}
 
 	numRows := int(r.NumRows())
 
 	colHashes := make([][]uint64, len(groupByArrays))
 	for i, arr := range groupByArrays {
-		colHashes[i] = hashArray(arr)
+		col := dynparquet.FindHashedColumn(groupByFields[i].Name, fields)
+		if col != -1 {
+			vals := make([]uint64, 0, numRows)
+			for _, v := range r.Column(col).(*array.Int64).Int64Values() {
+				vals = append(vals, uint64(v))
+			}
+			colHashes[i] = vals
+		} else {
+			colHashes[i] = dynparquet.HashArray(arr)
+		}
 	}
 
 	for i := 0; i < numRows; i++ {
@@ -411,42 +408,79 @@ func (a *HashAggregate) Callback(ctx context.Context, r arrow.Record) error {
 			)
 		}
 
-		k, ok := a.hashToAggregate[hash]
+		tuple, ok := a.hashToAggregate[hash]
 		if !ok {
+			aggregate = a.aggregates[len(a.aggregates)-1]
 			for j, col := range columnToAggregate {
 				agg := builder.NewBuilder(a.pool, col.DataType())
-				a.aggregations[j].arrays = append(a.aggregations[j].arrays, agg)
+				aggregate.aggregations[j].arrays = append(aggregate.aggregations[j].arrays, agg)
+			}
+			tuple = hashtuple{
+				aggregate: len(a.aggregates) - 1, // always add new aggregates to the current aggregate
+				array:     len(aggregate.aggregations[0].arrays) - 1,
 			}
-			k = len(a.aggregations[0].arrays) - 1
-			a.hashToAggregate[hash] = k
+			a.hashToAggregate[hash] = tuple
+			aggregate.rowCount++
 
 			// insert new row into columns grouped by and create new aggregate array to append to.
-			for j, arr := range groupByArrays {
-				fieldName := groupByFields[j].Name
-
-				groupByCol, found := a.groupByCols[fieldName]
-				if !found {
-					groupByCol = builder.NewBuilder(a.pool, groupByFields[j].Type)
-					a.groupByCols[fieldName] = groupByCol
-					a.colOrdering = append(a.colOrdering, fieldName)
+			if err := a.updateGroupByCols(i, groupByArrays, groupByFields); err != nil {
+				if !errors.Is(err, builder.ErrMaxSizeReached) {
+					return err
 				}
 
-				// We already appended to the arrays to aggregate, so we have
-				// to account for that. We only want to back-fill null values
-				// up until the index that we are about to insert into.
-				for groupByCol.Len() < len(a.aggregations[0].arrays)-1 {
-					groupByCol.AppendNull()
+				// Max size reached, rollback the aggregation creation and create new aggregate
+				aggregate.rowCount--
+				for j := range columnToAggregate {
+					l := len(aggregate.aggregations[j].arrays)
+					aggregate.aggregations[j].arrays = aggregate.aggregations[j].arrays[:l-1]
 				}
 
-				err := builder.AppendValue(groupByCol, arr, i)
-				if err != nil {
+				// Create new aggregation
+				aggregations := make([]Aggregation, 0, len(a.aggregates[0].aggregations))
+				for _, agg := range a.aggregates[0].aggregations {
+					aggregations = append(aggregations, Aggregation{
+						expr:       agg.expr,
+						resultName: agg.resultName,
+						function:   agg.function,
+					})
+				}
+				a.aggregates = append(a.aggregates, &hashAggregate{
+					aggregations: aggregations,
+					groupByCols:  map[string]builder.ColumnBuilder{},
+					colOrdering:  []string{},
+				})
+
+				aggregate = a.aggregates[len(a.aggregates)-1]
+				for j, col := range columnToAggregate {
+					agg := builder.NewBuilder(a.pool, col.DataType())
+					aggregate.aggregations[j].arrays = append(aggregate.aggregations[j].arrays, agg)
+				}
+				tuple = hashtuple{
+					aggregate: len(a.aggregates) - 1, // always add new aggregates to the current aggregate
+					array:     len(aggregate.aggregations[0].arrays) - 1,
+				}
+				a.hashToAggregate[hash] = tuple
+				aggregate.rowCount++
+
+				if err := a.updateGroupByCols(i, groupByArrays, groupByFields); err != nil {
 					return err
 				}
 			}
 		}
 
 		for j, col := range columnToAggregate {
-			if err := builder.AppendValue(a.aggregations[j].arrays[k], col, i); err != nil {
+			if col == nil {
+				// This is a dynamic aggregation that had no match.
+				continue
+			}
+			if a.aggregates[tuple.aggregate].aggregations[j].arrays == nil {
+				// This can happen with dynamic column aggregations without
+				// groupings. The group exists, but the array to append to does
+				// not.
+				agg := builder.NewBuilder(a.pool, col.DataType())
+				aggregate.aggregations[j].arrays = append(aggregate.aggregations[j].arrays, agg)
+			}
+			if err := builder.AppendValue(a.aggregates[tuple.aggregate].aggregations[j].arrays[tuple.array], col, i); err != nil {
 				return err
 			}
 		}
@@ -455,18 +489,79 @@ func (a *HashAggregate) Callback(ctx context.Context, r arrow.Record) error {
 	return nil
 }
 
+func (a *HashAggregate) updateGroupByCols(row int, groupByArrays []arrow.Array, groupByFields []arrow.Field) error {
+	// aggregate is the current aggregation
+	aggregate := a.aggregates[len(a.aggregates)-1]
+
+	for i, arr := range groupByArrays {
+		fieldName := groupByFields[i].Name
+
+		groupByCol, found := aggregate.groupByCols[fieldName]
+		if !found {
+			groupByCol = builder.NewBuilder(a.pool, groupByFields[i].Type)
+			aggregate.groupByCols[fieldName] = groupByCol
+			aggregate.colOrdering = append(aggregate.colOrdering, fieldName)
+		}
+
+		// We already appended to the arrays to aggregate, so we have
+		// to account for that. We only want to back-fill null values
+		// up until the index that we are about to insert into.
+		for groupByCol.Len() < len(aggregate.aggregations[0].arrays)-1 {
+			groupByCol.AppendNull()
+		}
+
+		if err := builder.AppendValue(groupByCol, arr, row); err != nil {
+			// Rollback
+			for j := 0; j < i; j++ {
+				if err := builder.RollbackPrevious(aggregate.groupByCols[groupByFields[j].Name]); err != nil {
+					return err
+				}
+			}
+
+			return err
+		}
+	}
+	return nil
+}
+
 func (a *HashAggregate) Finish(ctx context.Context) error {
 	ctx, span := a.tracer.Start(ctx, "HashAggregate/Finish")
+	span.SetAttributes(attribute.Bool("finalStage", a.finalStage))
 	defer span.End()
 
-	numCols := len(a.groupByCols) + 1
-	// Each hash that's aggregated by will become one row in the final result.
-	numRows := len(a.hashToAggregate)
+	totalRows := 0
+	for i, aggregate := range a.aggregates {
+		if err := a.finishAggregate(ctx, i, aggregate); err != nil {
+			return err
+		}
+		totalRows += aggregate.rowCount
+	}
+	span.SetAttributes(attribute.Int64("rows", int64(totalRows)))
+	return a.next.Finish(ctx)
+}
+
+func (a *HashAggregate) finishAggregate(ctx context.Context, aggIdx int, aggregate *hashAggregate) error {
+	numCols := len(aggregate.groupByCols) + len(aggregate.aggregations)
+	numRows := aggregate.rowCount
+
+	if numRows == 0 { // skip empty aggregates
+		return nil
+	}
 
 	groupByFields := make([]arrow.Field, 0, numCols)
 	groupByArrays := make([]arrow.Array, 0, numCols)
-	for _, fieldName := range a.colOrdering {
-		groupByCol, ok := a.groupByCols[fieldName]
+	defer func() {
+		for _, arr := range groupByArrays {
+			if arr != nil {
+				arr.Release()
+			}
+		}
+	}()
+	for _, fieldName := range aggregate.colOrdering {
+		if a.finalStage && dynparquet.IsHashedColumn(fieldName) {
+			continue
+		}
+		groupByCol, ok := aggregate.groupByCols[fieldName]
 		if !ok {
 			return fmt.Errorf("unknown field name: %s", fieldName)
 		}
@@ -481,46 +576,166 @@ func (a *HashAggregate) Finish(ctx context.Context) error {
 		arr := groupByCol.NewArray()
 		groupByFields = append(groupByFields, arrow.Field{Name: fieldName, Type: arr.DataType()})
 		groupByArrays = append(groupByArrays, arr)
+		// Pass forward the hashings of the group-by columns
+		if !a.finalStage {
+			groupByFields = append(groupByFields, arrow.Field{Name: dynparquet.HashedColumnName(fieldName), Type: arrow.PrimitiveTypes.Int64})
+			func() {
+				bldr := array.NewInt64Builder(a.pool)
+				defer bldr.Release()
+				sortedHashes := make([]int64, arr.Len())
+				for hash, tuple := range a.hashToAggregate {
+					if tuple.aggregate == aggIdx { // only append the hash for the current aggregate
+						sortedHashes[tuple.array] = int64(hash)
+					}
+				}
+				bldr.AppendValues(sortedHashes, nil)
+				groupByArrays = append(groupByArrays, bldr.NewArray())
+			}()
+		}
 	}
 
 	// Rename to clarity upon appending aggregations later
-	aggregateColumns := groupByArrays
 	aggregateFields := groupByFields
 
-	for _, aggregation := range a.aggregations {
+	for _, aggregation := range aggregate.aggregations {
 		arr := make([]arrow.Array, 0, numRows)
 		for _, a := range aggregation.arrays {
 			arr = append(arr, a.NewArray())
 		}
 
 		aggregateArray, err := runAggregation(a.finalStage, aggregation.function, a.pool, arr)
+		for _, a := range arr {
+			a.Release()
+		}
 		if err != nil {
 			return fmt.Errorf("aggregate batched arrays: %w", err)
 		}
-		aggregateColumns = append(aggregateColumns, aggregateArray)
+		groupByArrays = append(groupByArrays, aggregateArray)
 
 		aggregateFields = append(aggregateFields, arrow.Field{
-			Name: aggregation.resultName, Type: aggregateArray.DataType(),
+			Name: aggregation.resultName,
+			Type: aggregateArray.DataType(),
 		})
 	}
 
-	err := a.next.Callback(ctx, array.NewRecord(
+	r := array.NewRecord(
 		arrow.NewSchema(aggregateFields, nil),
-		aggregateColumns,
+		groupByArrays,
 		int64(numRows),
-	))
+	)
+	defer r.Release()
+	err := a.next.Callback(ctx, r)
 	if err != nil {
 		return err
 	}
 
-	return a.next.Finish(ctx)
+	return nil
+}
+
+type AndAggregation struct{}
+
+var ErrUnsupportedAndType = errors.New("unsupported type for is and aggregation, expected bool")
+
+func (a *AndAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
+	if len(arrs) == 0 {
+		return array.NewBooleanBuilder(pool).NewArray(), nil
+	}
+
+	typ := arrs[0].DataType().ID()
+	switch typ {
+	case arrow.BOOL:
+		return AndArrays(pool, arrs), nil
+	default:
+		return nil, fmt.Errorf("and array of %s: %w", typ, ErrUnsupportedAndType)
+	}
+}
+
+func AndArrays(pool memory.Allocator, arrs []arrow.Array) arrow.Array {
+	b := array.NewBooleanBuilder(pool)
+	defer b.Release()
+
+	for _, arr := range arrs {
+		if arr.Len() == 0 {
+			b.AppendNull()
+		}
+
+		arr := arr.(*array.Boolean)
+
+		val := true
+		for i := 0; i < arr.Len(); i++ {
+			if arr.IsValid(i) {
+				val = val && arr.Value(i)
+			}
+		}
+
+		b.Append(val)
+	}
+
+	return b.NewArray()
+}
+
+type UniqueAggregation struct{}
+
+var ErrUnsupportedIsUniqueType = errors.New("unsupported type for is unique aggregation, expected int64")
+
+func (a *UniqueAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
+	if len(arrs) == 0 {
+		return array.NewInt64Builder(pool).NewArray(), nil
+	}
+
+	typ := arrs[0].DataType().ID()
+	switch typ {
+	case arrow.INT64:
+		return uniqueInt64arrays(pool, arrs), nil
+	default:
+		return nil, fmt.Errorf("isUnique array of %s: %w", typ, ErrUnsupportedIsUniqueType)
+	}
 }
 
-type Int64SumAggregation struct{}
+func uniqueInt64arrays(pool memory.Allocator, arrs []arrow.Array) arrow.Array {
+	res := array.NewInt64Builder(pool)
+	defer res.Release()
 
-var ErrUnsupportedSumType = errors.New("unsupported type for sum aggregation, expected int64")
+	for _, arr := range arrs {
+		uniqueVal, isUnique, hasValues := int64ArrayHasUniqueValue(arr.(*array.Int64))
+		if !hasValues || !isUnique {
+			res.AppendNull()
+		} else {
+			res.Append(uniqueVal)
+		}
+	}
+
+	arr := res.NewArray()
+	return arr
+}
 
-func (a *Int64SumAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
+func int64ArrayHasUniqueValue(arr *array.Int64) (int64, bool, bool) {
+	if arr.Len() == 0 {
+		return 0, false, false
+	}
+
+	if !arr.IsValid(0) {
+		return 0, false, true
+	}
+
+	val := arr.Value(0)
+	for i := 1; i < arr.Len(); i++ {
+		if !arr.IsValid(i) {
+			return 0, false, true
+		}
+		if val != arr.Value(i) {
+			return 0, false, true
+		}
+	}
+
+	return val, true, true
+}
+
+type SumAggregation struct{}
+
+var ErrUnsupportedSumType = errors.New("unsupported type for sum aggregation, expected int64 or float64")
+
+func (a *SumAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
 	if len(arrs) == 0 {
 		return array.NewInt64Builder(pool).NewArray(), nil
 	}
@@ -529,6 +744,8 @@ func (a *Int64SumAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Arra
 	switch typ {
 	case arrow.INT64:
 		return sumInt64arrays(pool, arrs), nil
+	case arrow.FLOAT64:
+		return sumFloat64arrays(pool, arrs), nil
 	default:
 		return nil, fmt.Errorf("sum array of %s: %w", typ, ErrUnsupportedSumType)
 	}
@@ -536,6 +753,7 @@ func (a *Int64SumAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Arra
 
 func sumInt64arrays(pool memory.Allocator, arrs []arrow.Array) arrow.Array {
 	res := array.NewInt64Builder(pool)
+	defer res.Release()
 	for _, arr := range arrs {
 		res.Append(sumInt64array(arr.(*array.Int64)))
 	}
@@ -547,11 +765,25 @@ func sumInt64array(arr *array.Int64) int64 {
 	return math.Int64.Sum(arr)
 }
 
-var ErrUnsupportedMinType = errors.New("unsupported type for max aggregation, expected int64")
+func sumFloat64arrays(pool memory.Allocator, arrs []arrow.Array) arrow.Array {
+	res := array.NewFloat64Builder(pool)
+	defer res.Release()
+	for _, arr := range arrs {
+		res.Append(sumFloat64array(arr.(*array.Float64)))
+	}
 
-type Int64MinAggregation struct{}
+	return res.NewArray()
+}
+
+func sumFloat64array(arr *array.Float64) float64 {
+	return math.Float64.Sum(arr)
+}
 
-func (a *Int64MinAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
+var ErrUnsupportedMinType = errors.New("unsupported type for max aggregation, expected int64 or float64")
+
+type MinAggregation struct{}
+
+func (a *MinAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
 	if len(arrs) == 0 {
 		return array.NewInt64Builder(pool).NewArray(), nil
 	}
@@ -560,6 +792,8 @@ func (a *Int64MinAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Arra
 	switch typ {
 	case arrow.INT64:
 		return minInt64arrays(pool, arrs), nil
+	case arrow.FLOAT64:
+		return minFloat64arrays(pool, arrs), nil
 	default:
 		return nil, fmt.Errorf("min array of %s: %w", typ, ErrUnsupportedMinType)
 	}
@@ -567,6 +801,7 @@ func (a *Int64MinAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Arra
 
 func minInt64arrays(pool memory.Allocator, arrs []arrow.Array) arrow.Array {
 	res := array.NewInt64Builder(pool)
+	defer res.Release()
 	for _, arr := range arrs {
 		if arr.Len() == 0 {
 			res.AppendNull()
@@ -585,20 +820,48 @@ func minInt64array(arr *array.Int64) int64 {
 	// Note that the zero-length check must be performed before calling this
 	// function.
 	vals := arr.Int64Values()
-	min := vals[0]
+	minV := vals[0]
 	for _, v := range vals {
-		if v < min {
-			min = v
+		if v < minV {
+			minV = v
 		}
 	}
-	return min
+	return minV
 }
 
-type Int64MaxAggregation struct{}
+func minFloat64arrays(pool memory.Allocator, arrs []arrow.Array) arrow.Array {
+	res := array.NewFloat64Builder(pool)
+	defer res.Release()
+	for _, arr := range arrs {
+		if arr.Len() == 0 {
+			res.AppendNull()
+			continue
+		}
+		res.Append(minFloat64array(arr.(*array.Float64)))
+	}
 
-var ErrUnsupportedMaxType = errors.New("unsupported type for max aggregation, expected int64")
+	return res.NewArray()
+}
 
-func (a *Int64MaxAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
+// Same as minInt64array but for Float64.
+func minFloat64array(arr *array.Float64) float64 {
+	// Note that the zero-length check must be performed before calling this
+	// function.
+	vals := arr.Float64Values()
+	minV := vals[0]
+	for _, v := range vals {
+		if v < minV {
+			minV = v
+		}
+	}
+	return minV
+}
+
+type MaxAggregation struct{}
+
+var ErrUnsupportedMaxType = errors.New("unsupported type for max aggregation, expected int64 or float64")
+
+func (a *MaxAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
 	if len(arrs) == 0 {
 		return array.NewInt64Builder(pool).NewArray(), nil
 	}
@@ -607,6 +870,8 @@ func (a *Int64MaxAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Arra
 	switch typ {
 	case arrow.INT64:
 		return maxInt64arrays(pool, arrs), nil
+	case arrow.FLOAT64:
+		return maxFloat64arrays(pool, arrs), nil
 	default:
 		return nil, fmt.Errorf("max array of %s: %w", typ, ErrUnsupportedMaxType)
 	}
@@ -614,6 +879,7 @@ func (a *Int64MaxAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Arra
 
 func maxInt64arrays(pool memory.Allocator, arrs []arrow.Array) arrow.Array {
 	res := array.NewInt64Builder(pool)
+	defer res.Release()
 	for _, arr := range arrs {
 		if arr.Len() == 0 {
 			res.AppendNull()
@@ -632,13 +898,40 @@ func maxInt64array(arr *array.Int64) int64 {
 	// Note that the zero-length check must be performed before calling this
 	// function.
 	vals := arr.Int64Values()
-	max := vals[0]
+	maxV := vals[0]
 	for _, v := range vals {
-		if v > max {
-			max = v
+		if v > maxV {
+			maxV = v
 		}
 	}
-	return max
+	return maxV
+}
+
+func maxFloat64arrays(pool memory.Allocator, arrs []arrow.Array) arrow.Array {
+	res := array.NewFloat64Builder(pool)
+	defer res.Release()
+	for _, arr := range arrs {
+		if arr.Len() == 0 {
+			res.AppendNull()
+			continue
+		}
+		res.Append(maxFloat64array(arr.(*array.Float64)))
+	}
+
+	return res.NewArray()
+}
+
+func maxFloat64array(arr *array.Float64) float64 {
+	// Note that the zero-length check must be performed before calling this
+	// function.
+	vals := arr.Float64Values()
+	maxV := vals[0]
+	for _, v := range vals {
+		if v > maxV {
+			maxV = v
+		}
+	}
+	return maxV
 }
 
 type CountAggregation struct{}
@@ -649,6 +942,7 @@ func (a *CountAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array)
 	}
 
 	res := array.NewInt64Builder(pool)
+	defer res.Release()
 	for _, arr := range arrs {
 		res.Append(int64(arr.Len()))
 	}
@@ -658,16 +952,37 @@ func (a *CountAggregation) Aggregate(pool memory.Allocator, arrs []arrow.Array)
 // runAggregation is a helper to run the given aggregation function given
 // the set of values. It is aware of the final stage and chooses the aggregation
 // function appropriately.
-func runAggregation(
-	finalStage bool,
-	fn AggregationFunction,
-	pool memory.Allocator,
-	arrs []arrow.Array,
-) (arrow.Array, error) {
-	if _, ok := fn.(*CountAggregation); ok && finalStage {
+func runAggregation(finalStage bool, fn logicalplan.AggFunc, pool memory.Allocator, arrs []arrow.Array) (arrow.Array, error) {
+	if len(arrs) == 0 {
+		return array.NewInt64Builder(pool).NewArray(), nil
+	}
+
+	aggFunc, err := chooseAggregationFunction(fn, arrs[0].DataType())
+	if err != nil {
+		return nil, err
+	}
+
+	if _, ok := aggFunc.(*CountAggregation); ok && finalStage {
 		// The final stage of aggregation needs to sum up all the counts of the
 		// previous steps, instead of counting the previous counts.
-		return (&Int64SumAggregation{}).Aggregate(pool, arrs)
+		return (&SumAggregation{}).Aggregate(pool, arrs)
+	}
+	return aggFunc.Aggregate(pool, arrs)
+}
+
+func resultNameWithConcreteColumn(function logicalplan.AggFunc, col string) string {
+	switch function {
+	case logicalplan.AggFuncSum:
+		return logicalplan.Sum(logicalplan.Col(col)).Name()
+	case logicalplan.AggFuncMin:
+		return logicalplan.Min(logicalplan.Col(col)).Name()
+	case logicalplan.AggFuncMax:
+		return logicalplan.Max(logicalplan.Col(col)).Name()
+	case logicalplan.AggFuncCount:
+		return logicalplan.Count(logicalplan.Col(col)).Name()
+	case logicalplan.AggFuncAvg:
+		return logicalplan.Avg(logicalplan.Col(col)).Name()
+	default:
+		return ""
 	}
-	return fn.Aggregate(pool, arrs)
 }
diff --git a/query/physicalplan/aggregate_test.go b/query/physicalplan/aggregate_test.go
new file mode 100644
index 000000000..613937c8f
--- /dev/null
+++ b/query/physicalplan/aggregate_test.go
@@ -0,0 +1,118 @@
+package physicalplan
+
+import (
+	"context"
+	"encoding/binary"
+	"hash/maphash"
+	"math/rand"
+	"testing"
+
+	"go.opentelemetry.io/otel/trace/noop"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+func randByteSlice(n int) []byte {
+	b := make([]byte, n)
+	binary.LittleEndian.PutUint64(b, rand.Uint64())
+	return b
+}
+
+// Test_Aggregate_ArrayOverflow ensures that if more data is sent to the aggreagte function than can fit in a single array
+// that the Aggregate function correctly splits the data into multiple records.
+func Test_Aggregate_ArrayOverflow(t *testing.T) {
+	if testing.Short() {
+		t.Skip("short test; skipping")
+	}
+	ctx := context.Background()
+	allocator := memory.NewGoAllocator()
+
+	agg := NewHashAggregate(
+		allocator,
+		noop.NewTracerProvider().Tracer(""),
+		[]Aggregation{
+			{
+				expr:       logicalplan.Col("value"),
+				resultName: "result",
+				function:   logicalplan.AggFuncSum,
+			},
+		},
+		[]logicalplan.Expr{
+			logicalplan.Col("stacktrace"),
+			logicalplan.Col("id"),
+		},
+		maphash.MakeSeed(),
+		false,
+	)
+
+	totalRows := int64(0)
+	agg.SetNext(&OutputPlan{
+		callback: func(_ context.Context, r arrow.Record) error {
+			require.Equal(t, 5, len(r.Schema().Fields()))
+			for i := 0; i < int(r.NumCols()); i++ {
+				require.Equal(t, r.NumRows(), int64(r.Column(i).Len()))
+			}
+			totalRows += r.NumRows()
+			return nil
+		},
+	})
+
+	fields := []arrow.Field{
+		{
+			Name: "value",
+			Type: &arrow.Int64Type{},
+		},
+		{
+			Name: "id",
+			Type: &arrow.Int64Type{},
+		},
+		{
+			Name: "stacktrace",
+			Type: &arrow.BinaryType{},
+		},
+	}
+
+	n := 3
+	rows := 1000
+	for i := 0; i < n; i++ {
+		arrays := make([]arrow.Array, len(fields))
+
+		// Build the value array
+		valBuilder := array.NewInt64Builder(allocator)
+		for j := 0; j < rows; j++ {
+			valBuilder.Append(rand.Int63())
+		}
+		arrays[0] = valBuilder.NewArray()
+
+		// Build the id array
+		idBuilder := array.NewInt64Builder(allocator)
+		for j := 0; j < rows; j++ {
+			idBuilder.Append(int64(j + (i * rows)))
+		}
+		arrays[1] = idBuilder.NewArray()
+
+		// Build the stacktrace array
+		stacktraceBuilder := array.NewBinaryBuilder(allocator, &arrow.BinaryType{})
+		for j := 0; j < rows; j++ {
+			// Generate a new stack trace each time, this will cause the group by array to grow quite large
+			stacktraceBuilder.Append(randByteSlice(1024 * 1024)) // Use 1MB stack traces to quickly exceed array size
+		}
+		arrays[2] = stacktraceBuilder.NewArray()
+
+		require.NoError(t, agg.Callback(ctx,
+			array.NewRecord(
+				arrow.NewSchema(fields, nil),
+				arrays,
+				int64(rows),
+			),
+		))
+	}
+
+	require.NoError(t, agg.Finish(ctx))
+	require.Equal(t, int64(n*rows), totalRows)
+}
diff --git a/query/physicalplan/binaryscalarexpr.go b/query/physicalplan/binaryscalarexpr.go
index be3dad9e4..ceb91b100 100644
--- a/query/physicalplan/binaryscalarexpr.go
+++ b/query/physicalplan/binaryscalarexpr.go
@@ -2,12 +2,15 @@ package physicalplan
 
 import (
 	"bytes"
+	"context"
 	"errors"
 	"fmt"
+	"unsafe"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/scalar"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/compute"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 
 	"github.com/polarsignals/frostdb/query/logicalplan"
 )
@@ -41,14 +44,31 @@ func (e BinaryScalarExpr) Eval(r arrow.Record) (*Bitmap, error) {
 		return nil, err
 	}
 
-	// TODO: This needs a bunch of test cases to validate edge cases like non
-	// existant columns or null values. I'm pretty sure this is completely
-	// wrong and needs per operation, per type specific behavior.
 	if !exists {
 		res := NewBitmap()
-		for i := uint32(0); i < uint32(r.NumRows()); i++ {
-			res.Add(i)
+		switch e.Op {
+		case logicalplan.OpEq:
+			if e.Right.IsValid() { // missing column; looking for == non-nil
+				switch t := e.Right.(type) {
+				case *scalar.Binary:
+					if t.String() != "" { // treat empty string equivalent to nil
+						return res, nil
+					}
+				case *scalar.String:
+					if t.String() != "" { // treat empty string equivalent to nil
+						return res, nil
+					}
+				}
+			}
+		case logicalplan.OpNotEq: // missing column; looking for != nil
+			if !e.Right.IsValid() {
+				return res, nil
+			}
+		case logicalplan.OpLt, logicalplan.OpLtEq, logicalplan.OpGt, logicalplan.OpGtEq:
+			return res, nil
 		}
+
+		res.AddRange(0, uint64(r.NumRows()))
 		return res, nil
 	}
 
@@ -62,77 +82,20 @@ func (e BinaryScalarExpr) String() string {
 var ErrUnsupportedBinaryOperation = errors.New("unsupported binary operation")
 
 func BinaryScalarOperation(left arrow.Array, right scalar.Scalar, operator logicalplan.Op) (*Bitmap, error) {
-	leftType := left.DataType()
-	switch leftType {
-	case arrow.FixedWidthTypes.Boolean:
-		switch operator {
-		case logicalplan.OpEq:
-			return BooleanArrayScalarEqual(left.(*array.Boolean), right.(*scalar.Boolean))
-		case logicalplan.OpNotEq:
-			return BooleanArrayScalarNotEqual(left.(*array.Boolean), right.(*scalar.Boolean))
+	switch operator {
+	case logicalplan.OpContains, logicalplan.OpNotContains:
+		switch arr := left.(type) {
+		case *array.Binary, *array.String:
+			return ArrayScalarContains(left, right, operator == logicalplan.OpNotContains)
+		case *array.Dictionary:
+			return DictionaryArrayScalarContains(arr, right, operator == logicalplan.OpNotContains)
 		default:
-			panic("something terrible has happened, this should have errored previously during validation")
-		}
-	case &arrow.FixedSizeBinaryType{ByteWidth: 16}:
-		switch operator {
-		case logicalplan.OpEq:
-			return FixedSizeBinaryArrayScalarEqual(left.(*array.FixedSizeBinary), right.(*scalar.FixedSizeBinary))
-		case logicalplan.OpNotEq:
-			return FixedSizeBinaryArrayScalarNotEqual(left.(*array.FixedSizeBinary), right.(*scalar.FixedSizeBinary))
-		default:
-			panic("something terrible has happened, this should have errored previously during validation")
-		}
-	case arrow.BinaryTypes.String:
-		switch operator {
-		case logicalplan.OpEq:
-			return StringArrayScalarEqual(left.(*array.String), right.(*scalar.String))
-		case logicalplan.OpNotEq:
-			return StringArrayScalarNotEqual(left.(*array.String), right.(*scalar.String))
-		default:
-			panic("something terrible has happened, this should have errored previously during validation")
-		}
-	case arrow.BinaryTypes.Binary:
-		switch operator {
-		case logicalplan.OpEq:
-			switch r := right.(type) {
-			case *scalar.Binary:
-				return BinaryArrayScalarEqual(left.(*array.Binary), r)
-			case *scalar.String:
-				return BinaryArrayScalarEqual(left.(*array.Binary), r.Binary)
-			default:
-				panic("something terrible has happened, this should have errored previously during validation")
-			}
-		case logicalplan.OpNotEq:
-			switch r := right.(type) {
-			case *scalar.Binary:
-				return BinaryArrayScalarNotEqual(left.(*array.Binary), r)
-			case *scalar.String:
-				return BinaryArrayScalarNotEqual(left.(*array.Binary), r.Binary)
-			default:
-				panic("something terrible has happened, this should have errored previously during validation")
-			}
-		default:
-			panic("something terrible has happened, this should have errored previously during validation")
-		}
-	case arrow.PrimitiveTypes.Int64:
-		switch operator {
-		case logicalplan.OpEq:
-			return Int64ArrayScalarEqual(left.(*array.Int64), right.(*scalar.Int64))
-		case logicalplan.OpNotEq:
-			return Int64ArrayScalarNotEqual(left.(*array.Int64), right.(*scalar.Int64))
-		case logicalplan.OpLt:
-			return Int64ArrayScalarLessThan(left.(*array.Int64), right.(*scalar.Int64))
-		case logicalplan.OpLtEq:
-			return Int64ArrayScalarLessThanOrEqual(left.(*array.Int64), right.(*scalar.Int64))
-		case logicalplan.OpGt:
-			return Int64ArrayScalarGreaterThan(left.(*array.Int64), right.(*scalar.Int64))
-		case logicalplan.OpGtEq:
-			return Int64ArrayScalarGreaterThanOrEqual(left.(*array.Int64), right.(*scalar.Int64))
-		default:
-			panic("something terrible has happened, this should have errored previously during validation")
+			panic("unsupported array type " + fmt.Sprintf("%T", arr))
 		}
 	}
 
+	// TODO: Figure out dictionary arrays and lists with compute next
+	leftType := left.DataType()
 	switch arr := left.(type) {
 	case *array.Dictionary:
 		switch operator {
@@ -150,7 +113,42 @@ func BinaryScalarOperation(left arrow.Array, right scalar.Scalar, operator logic
 		panic("TODO: list comparisons unimplemented")
 	}
 
-	return nil, ErrUnsupportedBinaryOperation
+	return ArrayScalarCompute(operator.ArrowString(), left, right)
+}
+
+func ArrayScalarCompute(funcName string, left arrow.Array, right scalar.Scalar) (*Bitmap, error) {
+	leftData := compute.NewDatum(left)
+	defer leftData.Release()
+	rightData := compute.NewDatum(right)
+	defer rightData.Release()
+	equalsResult, err := compute.CallFunction(context.TODO(), funcName, nil, leftData, rightData)
+	if err != nil {
+		if errors.Unwrap(err).Error() == "not implemented" {
+			return nil, ErrUnsupportedBinaryOperation
+		}
+		return nil, fmt.Errorf("error calling equal function: %w", err)
+	}
+	defer equalsResult.Release()
+	equalsDatum, ok := equalsResult.(*compute.ArrayDatum)
+	if !ok {
+		return nil, fmt.Errorf("expected *compute.ArrayDatum, got %T", equalsResult)
+	}
+	equalsArray, ok := equalsDatum.MakeArray().(*array.Boolean)
+	if !ok {
+		return nil, fmt.Errorf("expected *array.Boolean, got %T", equalsDatum.MakeArray())
+	}
+	defer equalsArray.Release()
+
+	res := NewBitmap()
+	for i := 0; i < equalsArray.Len(); i++ {
+		if equalsArray.IsNull(i) {
+			continue
+		}
+		if equalsArray.Value(i) {
+			res.AddInt(i)
+		}
+	}
+	return res, nil
 }
 
 func DictionaryArrayScalarNotEqual(left *array.Dictionary, right scalar.Scalar) (*Bitmap, error) {
@@ -233,210 +231,85 @@ func DictionaryArrayScalarEqual(left *array.Dictionary, right scalar.Scalar) (*B
 	return res, nil
 }
 
-func FixedSizeBinaryArrayScalarEqual(left *array.FixedSizeBinary, right *scalar.FixedSizeBinary) (*Bitmap, error) {
-	res := NewBitmap()
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
-		}
-		if bytes.Equal(left.Value(i), right.Data()) {
-			res.Add(uint32(i))
-		}
-	}
-
-	return res, nil
-}
-
-func FixedSizeBinaryArrayScalarNotEqual(left *array.FixedSizeBinary, right *scalar.FixedSizeBinary) (*Bitmap, error) {
-	res := NewBitmap()
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			res.Add(uint32(i))
-			continue
-		}
-		if !bytes.Equal(left.Value(i), right.Data()) {
-			res.Add(uint32(i))
-		}
-	}
-
-	return res, nil
-}
-
-func StringArrayScalarEqual(left *array.String, right *scalar.String) (*Bitmap, error) {
-	res := NewBitmap()
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
-		}
-		if left.Value(i) == string(right.Data()) {
-			res.Add(uint32(i))
-		}
-	}
-
-	return res, nil
-}
-
-func StringArrayScalarNotEqual(left *array.String, right *scalar.String) (*Bitmap, error) {
-	res := NewBitmap()
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			res.Add(uint32(i))
-			continue
-		}
-		if left.Value(i) != string(right.Data()) {
-			res.Add(uint32(i))
-		}
-	}
-
-	return res, nil
-}
-
-func BinaryArrayScalarEqual(left *array.Binary, right *scalar.Binary) (*Bitmap, error) {
-	res := NewBitmap()
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
-		}
-		if bytes.Equal(left.Value(i), right.Data()) {
-			res.Add(uint32(i))
-		}
-	}
-
-	return res, nil
-}
-
-func BinaryArrayScalarNotEqual(left *array.Binary, right *scalar.Binary) (*Bitmap, error) {
-	res := NewBitmap()
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			res.Add(uint32(i))
-			continue
-		}
-		if !bytes.Equal(left.Value(i), right.Data()) {
-			res.Add(uint32(i))
-		}
-	}
-
-	return res, nil
-}
-
-func Int64ArrayScalarEqual(left *array.Int64, right *scalar.Int64) (*Bitmap, error) {
-	res := NewBitmap()
-
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
-		}
-		if left.Value(i) == right.Value {
-			res.Add(uint32(i))
-		}
+func ArrayScalarContains(arr arrow.Array, right scalar.Scalar, not bool) (*Bitmap, error) {
+	var r []byte
+	switch s := right.(type) {
+	case *scalar.Binary:
+		r = s.Data()
+	case *scalar.String:
+		r = s.Data()
 	}
 
-	return res, nil
-}
-
-func Int64ArrayScalarNotEqual(left *array.Int64, right *scalar.Int64) (*Bitmap, error) {
 	res := NewBitmap()
-
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			res.Add(uint32(i))
-			continue
-		}
-		if left.Value(i) != right.Value {
-			res.Add(uint32(i))
-		}
-	}
-
-	return res, nil
-}
-
-func Int64ArrayScalarLessThan(left *array.Int64, right *scalar.Int64) (*Bitmap, error) {
-	res := NewBitmap()
-
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
+	switch a := arr.(type) {
+	case *array.Binary:
+		for i := 0; i < a.Len(); i++ {
+			if a.IsNull(i) {
+				continue
+			}
+			contains := bytes.Contains(a.Value(i), r)
+			if contains && !not || !contains && not {
+				res.Add(uint32(i))
+			}
 		}
-		if left.Value(i) < right.Value {
-			res.Add(uint32(i))
+		return res, nil
+	case *array.String:
+		for i := 0; i < a.Len(); i++ {
+			if a.IsNull(i) {
+				continue
+			}
+			contains := bytes.Contains(unsafeStringToBytes(a.Value(i)), r)
+			if contains && !not || !contains && not {
+				res.Add(uint32(i))
+			}
 		}
+		return res, nil
 	}
-
-	return res, nil
+	return nil, fmt.Errorf("contains not implemented for %T", arr)
 }
 
-func Int64ArrayScalarLessThanOrEqual(left *array.Int64, right *scalar.Int64) (*Bitmap, error) {
+func DictionaryArrayScalarContains(left *array.Dictionary, right scalar.Scalar, not bool) (*Bitmap, error) {
 	res := NewBitmap()
-
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
-		}
-		if left.Value(i) <= right.Value {
-			res.Add(uint32(i))
-		}
+	var data []byte
+	switch r := right.(type) {
+	case *scalar.Binary:
+		data = r.Data()
+	case *scalar.String:
+		data = r.Data()
 	}
 
-	return res, nil
-}
-
-func Int64ArrayScalarGreaterThan(left *array.Int64, right *scalar.Int64) (*Bitmap, error) {
-	res := NewBitmap()
-
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
-		}
-		if left.Value(i) > right.Value {
-			res.Add(uint32(i))
+	// This is a special case for where the left side should not equal NULL
+	if right == scalar.ScalarNull {
+		for i := 0; i < left.Len(); i++ {
+			if !left.IsNull(i) {
+				res.Add(uint32(i))
+			}
 		}
+		return res, nil
 	}
 
-	return res, nil
-}
-
-func Int64ArrayScalarGreaterThanOrEqual(left *array.Int64, right *scalar.Int64) (*Bitmap, error) {
-	res := NewBitmap()
-
 	for i := 0; i < left.Len(); i++ {
 		if left.IsNull(i) {
 			continue
 		}
-		if left.Value(i) >= right.Value {
-			res.Add(uint32(i))
-		}
-	}
 
-	return res, nil
-}
-
-func BooleanArrayScalarEqual(left *array.Boolean, right *scalar.Boolean) (*Bitmap, error) {
-	res := NewBitmap()
-
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
-		}
-		if left.Value(i) == right.Value {
-			res.Add(uint32(i))
+		switch dict := left.Dictionary().(type) {
+		case *array.Binary:
+			contains := bytes.Contains(dict.Value(left.GetValueIndex(i)), data)
+			if contains && !not || !contains && not {
+				res.Add(uint32(i))
+			}
+		case *array.String:
+			contains := bytes.Contains(unsafeStringToBytes(dict.Value(left.GetValueIndex(i))), data)
+			if contains && !not || !contains && not {
+				res.Add(uint32(i))
+			}
 		}
 	}
 
 	return res, nil
 }
 
-func BooleanArrayScalarNotEqual(left *array.Boolean, right *scalar.Boolean) (*Bitmap, error) {
-	res := NewBitmap()
-
-	for i := 0; i < left.Len(); i++ {
-		if left.IsNull(i) {
-			continue
-		}
-		if left.Value(i) != right.Value {
-			res.Add(uint32(i))
-		}
-	}
-
-	return res, nil
+func unsafeStringToBytes(s string) []byte {
+	return unsafe.Slice(unsafe.StringData(s), len(s))
 }
diff --git a/query/physicalplan/binaryscalarexpr_test.go b/query/physicalplan/binaryscalarexpr_test.go
new file mode 100644
index 000000000..0ae6b883f
--- /dev/null
+++ b/query/physicalplan/binaryscalarexpr_test.go
@@ -0,0 +1,128 @@
+package physicalplan
+
+import (
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
+	"github.com/stretchr/testify/require"
+
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+func BenchmarkBinaryScalarOperation(b *testing.B) {
+	ab := array.NewInt64Builder(memory.DefaultAllocator)
+	for i := int64(0); i < 1_000_000; i++ {
+		ab.Append(i % 10)
+	}
+
+	arr := ab.NewInt64Array()
+	ab.Release()
+
+	s := scalar.NewInt64Scalar(4) // chosen by fair dice roll. guaranteed to be random.
+
+	operators := []logicalplan.Op{
+		logicalplan.OpAnd,
+		logicalplan.OpEq,
+		logicalplan.OpNotEq,
+		logicalplan.OpLt,
+		logicalplan.OpLtEq,
+		logicalplan.OpGt,
+		logicalplan.OpGtEq,
+	}
+
+	for _, op := range operators {
+		b.Run(op.String(), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				_, _ = BinaryScalarOperation(arr, s, op)
+			}
+		})
+	}
+}
+
+func BenchmarkBinaryScalarContains(b *testing.B) {
+	lines := []string{
+		"Hot chicken dolor truffaut knausgaard, ramps shaman skateboard neutral milk hotel af letterpress kickstarter art party church-key hoodie. Butcher typewriter cold-pressed, williamsburg voluptate organic bushwick roof party banjo live-edge bodega boys. Fixie praxis incididunt health goth, in salvia cray commodo fam trust fund. Kale chips street art slow-carb shabby chic occaecat echo park messenger bag fam meggings DSA offal leggings. Taiyaki yes plz everyday carry sartorial eiusmod fugiat williamsburg elit jean shorts pop-up grailed. Authentic exercitation palo santo praxis distillery. Fam 3 wolf moon tbh pabst la croix lumbersexual sunt subway tile normcore.",
+		"Vexillologist art party cloud bread ipsum. Mlkshk intelligentsia taiyaki semiotics ut. Before they sold out YOLO health goth poke plaid. Literally edison bulb excepteur ethical man bun tacos, viral retro ipsum bicycle rights four dollar toast. Franzen roof party enim green juice iPhone officia. Gorpcore squid vegan dolore mumblecore quinoa lomo ullamco. Pork belly labore intelligentsia skateboard.",
+		"Actually lumbersexual praxis art party pug church-key yuccie JOMO tilde mollit reprehenderit. Fanny pack chambray literally fam biodiesel, single-origin coffee marxism everyday carry velit la croix direct trade JOMO. Offal praxis live-edge plaid cloud bread cupidatat est wayfarers farm-to-table +1 sed echo park magna. Hella raw denim cornhole slow-carb marxism gorpcore. Pop-up voluptate cornhole mumblecore tbh hexagon heirloom echo park activated charcoal eu. Beard kickstarter ethical pabst fingerstache bespoke tousled kogi 8-bit lyft try-hard bodega boys ullamco readymade.",
+		"Cardigan bruh cronut, farm-to-table marfa ex aliqua subway tile kinfolk. Brunch tbh adaptogen fam chartreuse, irure palo santo shaman JOMO neutra. Paleo thundercats non hashtag ennui. In readymade four loko non ea. Kinfolk subway tile kogi, waistcoat copper mug lomo pug duis sunt.",
+		"Deserunt you probably haven't heard of them lyft migas. Palo santo reprehenderit vegan, marfa proident gochujang kale chips paleo edison bulb ascot kinfolk banjo. Copper mug seitan quis officia flannel everyday carry sed cold-pressed. Blue bottle 3 wolf moon air plant, ex salvia fixie crucifix you probably haven't heard of them keffiyeh enim meditation kinfolk four dollar toast poke. 8-bit banh mi godard, offal cred fanny pack readymade taiyaki. Fixie glossier waistcoat, incididunt non pinterest yes plz ullamco pour-over aesthetic lo-fi.",
+	}
+
+	ab := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary)
+	as := array.NewStringBuilder(memory.DefaultAllocator)
+	ad := array.NewDictionaryBuilder(memory.DefaultAllocator, &arrow.DictionaryType{
+		IndexType: &arrow.Uint32Type{},
+		ValueType: arrow.BinaryTypes.String,
+	}).(*array.BinaryDictionaryBuilder)
+
+	for i := 0; i < 10_000; i++ {
+		line := lines[i%len(lines)]
+		ab.AppendString(line)
+		as.Append(line)
+		require.NoError(b, ad.AppendString(line))
+	}
+
+	arrayBinary := ab.NewBinaryArray()
+	arrayString := as.NewStringArray()
+	arrayDict := ad.NewDictionaryArray()
+	ab.Release()
+	as.Release()
+	ad.Release()
+
+	s := scalar.NewStringScalar("vegan")
+
+	types := []struct {
+		name string
+		arr  arrow.Array
+	}{{
+		name: "binary",
+		arr:  arrayBinary,
+	}, {
+		name: "string",
+		arr:  arrayString,
+	}, {
+		name: "dictionary",
+		arr:  arrayDict,
+	}}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for _, tt := range types {
+		b.Run(tt.name, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				_, _ = BinaryScalarOperation(tt.arr, s, logicalplan.OpContains)
+			}
+		})
+	}
+}
+
+func TestBinaryScalarOperationNotImplemented(t *testing.T) {
+	ab := array.NewInt64Builder(memory.DefaultAllocator)
+	arr := ab.NewInt64Array()
+	ab.Release()
+
+	s := scalar.NewInt64Scalar(4) // chosen by fair dice roll. guaranteed to be random.
+
+	_, err := BinaryScalarOperation(arr, s, logicalplan.OpAnd)
+	require.Equal(t, err, ErrUnsupportedBinaryOperation)
+}
+
+func Test_ArrayScalarCompute_Leak(t *testing.T) {
+	allocator := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer allocator.AssertSize(t, 0)
+
+	ab := array.NewInt64Builder(allocator)
+	defer ab.Release()
+
+	ab.AppendValues([]int64{1, 2, 3}, nil)
+	arr := ab.NewInt64Array()
+	defer arr.Release()
+
+	s := scalar.NewInt64Scalar(4)
+	_, err := ArrayScalarCompute("equal", arr, s)
+	require.NoError(t, err)
+}
diff --git a/query/physicalplan/distinct.go b/query/physicalplan/distinct.go
index 2aa19bd4c..3cae60b68 100644
--- a/query/physicalplan/distinct.go
+++ b/query/physicalplan/distinct.go
@@ -7,12 +7,13 @@ import (
 	"strings"
 	"sync"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
-	"github.com/apache/arrow/go/v10/arrow/scalar"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 	"go.opentelemetry.io/otel/trace"
 
+	"github.com/polarsignals/frostdb/dynparquet"
 	"github.com/polarsignals/frostdb/pqarrow/builder"
 	"github.com/polarsignals/frostdb/query/logicalplan"
 )
@@ -62,6 +63,10 @@ func (d *Distinction) Finish(ctx context.Context) error {
 	return d.next.Finish(ctx)
 }
 
+func (d *Distinction) Close() {
+	d.next.Close()
+}
+
 func (d *Distinction) Callback(ctx context.Context, r arrow.Record) error {
 	// Generates high volume of spans. Comment out if needed during development.
 	// ctx, span := d.tracer.Start(ctx, "Distinction/Callback")
@@ -71,7 +76,8 @@ func (d *Distinction) Callback(ctx context.Context, r arrow.Record) error {
 	distinctFieldHashes := make([]uint64, 0, 10)
 	distinctArrays := make([]arrow.Array, 0, 10)
 
-	for i, field := range r.Schema().Fields() {
+	for i := 0; i < r.Schema().NumFields(); i++ {
+		field := r.Schema().Field(i)
 		for _, col := range d.columns {
 			if col.MatchColumn(field.Name) {
 				distinctFields = append(distinctFields, field)
@@ -82,6 +88,11 @@ func (d *Distinction) Callback(ctx context.Context, r arrow.Record) error {
 	}
 
 	resBuilders := make([]builder.ColumnBuilder, 0, len(distinctArrays))
+	defer func() {
+		for _, builder := range resBuilders {
+			builder.Release()
+		}
+	}()
 	for _, arr := range distinctArrays {
 		resBuilders = append(resBuilders, builder.NewBuilder(d.pool, arr.DataType()))
 	}
@@ -91,7 +102,7 @@ func (d *Distinction) Callback(ctx context.Context, r arrow.Record) error {
 
 	colHashes := make([][]uint64, len(distinctFields))
 	for i, arr := range distinctArrays {
-		colHashes[i] = hashArray(arr)
+		colHashes[i] = dynparquet.HashArray(arr)
 	}
 
 	for i := 0; i < numRows; i++ {
@@ -137,6 +148,11 @@ func (d *Distinction) Callback(ctx context.Context, r arrow.Record) error {
 	}
 
 	resArrays := make([]arrow.Array, 0, len(resBuilders))
+	defer func() {
+		for _, arr := range resArrays {
+			arr.Release()
+		}
+	}()
 	for _, builder := range resBuilders {
 		resArrays = append(resArrays, builder.NewArray())
 	}
@@ -149,7 +165,6 @@ func (d *Distinction) Callback(ctx context.Context, r arrow.Record) error {
 		rows,
 	)
 
-	err := d.next.Callback(ctx, distinctRecord)
-	distinctRecord.Release()
-	return err
+	defer distinctRecord.Release()
+	return d.next.Callback(ctx, distinctRecord)
 }
diff --git a/query/physicalplan/filter.go b/query/physicalplan/filter.go
index 8691da1ed..a2bf1073c 100644
--- a/query/physicalplan/filter.go
+++ b/query/physicalplan/filter.go
@@ -7,10 +7,10 @@ import (
 	"regexp"
 
 	"github.com/RoaringBitmap/roaring"
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
-	"github.com/apache/arrow/go/v10/arrow/scalar"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 	"go.opentelemetry.io/otel/trace"
 
 	"github.com/polarsignals/frostdb/query/logicalplan"
@@ -53,13 +53,30 @@ func (f PreExprVisitorFunc) PreVisit(expr logicalplan.Expr) bool {
 	return f(expr)
 }
 
-func (f PreExprVisitorFunc) PostVisit(expr logicalplan.Expr) bool {
+func (f PreExprVisitorFunc) Visit(_ logicalplan.Expr) bool {
+	return false
+}
+
+func (f PreExprVisitorFunc) PostVisit(_ logicalplan.Expr) bool {
 	return false
 }
 
 func binaryBooleanExpr(expr *logicalplan.BinaryExpr) (BooleanExpression, error) {
 	switch expr.Op {
-	case logicalplan.OpEq, logicalplan.OpNotEq, logicalplan.OpLt, logicalplan.OpLtEq, logicalplan.OpGt, logicalplan.OpGtEq, logicalplan.OpRegexMatch, logicalplan.OpRegexNotMatch:
+	case logicalplan.OpEq,
+		logicalplan.OpNotEq,
+		logicalplan.OpLt,
+		logicalplan.OpLtEq,
+		logicalplan.OpGt,
+		logicalplan.OpGtEq,
+		logicalplan.OpRegexMatch,
+		logicalplan.OpRegexNotMatch,
+		logicalplan.OpAdd,
+		logicalplan.OpSub,
+		logicalplan.OpMul,
+		logicalplan.OpDiv,
+		logicalplan.OpContains,
+		logicalplan.OpNotContains:
 		var leftColumnRef *ArrayRef
 		expr.Left.Accept(PreExprVisitorFunc(func(expr logicalplan.Expr) bool {
 			switch e := expr.(type) {
@@ -115,12 +132,12 @@ func binaryBooleanExpr(expr *logicalplan.BinaryExpr) (BooleanExpression, error)
 	case logicalplan.OpAnd:
 		left, err := booleanExpr(expr.Left)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("left bool expr: %w", err)
 		}
 
 		right, err := booleanExpr(expr.Right)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("right bool expr: %w", err)
 		}
 
 		return &AndExpr{
@@ -130,12 +147,12 @@ func binaryBooleanExpr(expr *logicalplan.BinaryExpr) (BooleanExpression, error)
 	case logicalplan.OpOr:
 		left, err := booleanExpr(expr.Left)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("left bool expr: %w", err)
 		}
 
 		right, err := booleanExpr(expr.Right)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("right bool expr: %w", err)
 		}
 
 		return &OrExpr{
@@ -143,7 +160,7 @@ func binaryBooleanExpr(expr *logicalplan.BinaryExpr) (BooleanExpression, error)
 			Right: right,
 		}, nil
 	default:
-		panic("unsupported binary boolean expression")
+		return nil, fmt.Errorf("binary expr %s: %w", expr.Op.String(), ErrUnsupportedBooleanExpression)
 	}
 }
 
@@ -158,6 +175,10 @@ func (a *AndExpr) Eval(r arrow.Record) (*Bitmap, error) {
 		return nil, err
 	}
 
+	if left.IsEmpty() {
+		return left, nil
+	}
+
 	right, err := a.Right.Eval(r)
 	if err != nil {
 		return nil, err
@@ -209,7 +230,7 @@ func booleanExpr(expr logicalplan.Expr) (BooleanExpression, error) {
 func Filter(pool memory.Allocator, tracer trace.Tracer, filterExpr logicalplan.Expr) (*PredicateFilter, error) {
 	expr, err := booleanExpr(filterExpr)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("create bool expr: %w", err)
 	}
 
 	return newFilter(pool, tracer, expr), nil
@@ -227,6 +248,10 @@ func (f *PredicateFilter) SetNext(next PhysicalPlan) {
 	f.next = next
 }
 
+func (f *PredicateFilter) Close() {
+	f.next.Close()
+}
+
 func (f *PredicateFilter) Callback(ctx context.Context, r arrow.Record) error {
 	// Generates high volume of spans. Comment out if needed during development.
 	// ctx, span := f.tracer.Start(ctx, "PredicateFilter/Callback")
@@ -263,23 +288,35 @@ func filter(pool memory.Allocator, filterExpr BooleanExpression, ar arrow.Record
 
 	totalRows := int64(0)
 	recordRanges := make([]arrow.Record, len(ranges))
+	defer func() {
+		for _, r := range recordRanges {
+			r.Release()
+		}
+	}()
 	for j, r := range ranges {
 		recordRanges[j] = ar.NewSlice(int64(r.Start), int64(r.End))
 		totalRows += int64(r.End - r.Start)
 	}
 
-	cols := make([]arrow.Array, ar.NumCols())
+	cols := make([]arrow.Array, 0, ar.NumCols())
+	defer func() {
+		for _, col := range cols {
+			col.Release()
+		}
+	}()
 	numRanges := len(recordRanges)
-	for i := range cols {
+	for i := range ar.Columns() {
 		colRanges := make([]arrow.Array, 0, numRanges)
 		for _, rr := range recordRanges {
 			colRanges = append(colRanges, rr.Column(i))
 		}
 
-		cols[i], err = array.Concatenate(colRanges, pool)
+		c, err := array.Concatenate(colRanges, pool)
 		if err != nil {
 			return nil, true, err
 		}
+
+		cols = append(cols, c)
 	}
 
 	return array.NewRecord(ar.Schema(), cols, totalRows), false, nil
diff --git a/query/physicalplan/filter_test.go b/query/physicalplan/filter_test.go
index d39d45732..19c74956c 100644
--- a/query/physicalplan/filter_test.go
+++ b/query/physicalplan/filter_test.go
@@ -3,6 +3,7 @@ package physicalplan
 import (
 	"testing"
 
+	"github.com/apache/arrow-go/v18/arrow"
 	"github.com/stretchr/testify/require"
 )
 
@@ -47,3 +48,34 @@ func Test_BuildIndexRanges(t *testing.T) {
 		})
 	}
 }
+
+type mockExpression struct {
+	evalFn func(arrow.Record) (*Bitmap, error)
+}
+
+var _ BooleanExpression = mockExpression{}
+
+func (e mockExpression) Eval(record arrow.Record) (*Bitmap, error) {
+	return e.evalFn(record)
+}
+
+func (e mockExpression) String() string {
+	return ""
+}
+
+func TestAndExprShortCircuits(t *testing.T) {
+	left := mockExpression{evalFn: func(_ arrow.Record) (*Bitmap, error) {
+		return NewBitmap(), nil
+	}}
+	right := mockExpression{evalFn: func(_ arrow.Record) (*Bitmap, error) {
+		t.Fatal("right should not be evaluated")
+		return nil, nil
+	}}
+	andExpr := AndExpr{
+		Left:  left,
+		Right: right,
+	}
+	result, err := andExpr.Eval(nil)
+	require.NoError(t, err)
+	require.True(t, result.IsEmpty())
+}
diff --git a/query/physicalplan/limit.go b/query/physicalplan/limit.go
new file mode 100644
index 000000000..1d35c488c
--- /dev/null
+++ b/query/physicalplan/limit.go
@@ -0,0 +1,98 @@
+package physicalplan
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
+	"go.opentelemetry.io/otel/trace"
+
+	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+type Limiter struct {
+	pool   memory.Allocator
+	tracer trace.Tracer
+	next   PhysicalPlan
+
+	count uint64
+}
+
+func Limit(pool memory.Allocator, tracer trace.Tracer, expr logicalplan.Expr) (*Limiter, error) {
+	literal, ok := expr.(*logicalplan.LiteralExpr)
+	if !ok {
+		return nil, fmt.Errorf("expected literal expression, got %T", expr)
+	}
+
+	var count uint64
+	switch v := literal.Value.(type) {
+	case *scalar.Uint64:
+		count = v.Value
+	case *scalar.Int64:
+		count = uint64(v.Value)
+	default:
+		return nil, fmt.Errorf("expected limit count type, got %T", v)
+	}
+
+	return &Limiter{
+		pool:   pool,
+		tracer: tracer,
+		count:  count,
+	}, nil
+}
+
+func (l *Limiter) SetNext(next PhysicalPlan) { l.next = next }
+
+func (l *Limiter) Finish(ctx context.Context) error { return l.next.Finish(ctx) }
+
+func (l *Limiter) Close() { l.next.Close() }
+
+func (l *Limiter) Draw() *Diagram {
+	var child *Diagram
+	if l.next != nil {
+		child = l.next.Draw()
+	}
+	details := fmt.Sprintf("Limit(%d)", l.count)
+	return &Diagram{Details: details, Child: child}
+}
+
+func (l *Limiter) Callback(ctx context.Context, r arrow.Record) error {
+	if r.NumRows() == 0 {
+		return l.next.Callback(ctx, r)
+	}
+	if l.count == 0 {
+		newRecord := array.NewRecord(r.Schema(), nil, 0)
+		return l.next.Callback(ctx, newRecord)
+	}
+
+	if uint64(r.NumRows()) <= l.count {
+		return l.next.Callback(ctx, r)
+	}
+
+	// TODO: We should figure out a way to avoid copying the record here.
+	// Maybe we can use a different approach to limit the record.
+
+	indicesBuilder := array.NewInt32Builder(l.pool)
+	defer indicesBuilder.Release()
+
+	for i := int32(0); i < int32(l.count); i++ {
+		indicesBuilder.Append(i)
+	}
+	indices := indicesBuilder.NewInt32Array()
+	defer indices.Release()
+
+	limitedRecord, err := arrowutils.Take(ctx, r, indices)
+	if err != nil {
+		return err
+	}
+
+	if err := l.next.Callback(ctx, limitedRecord); err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/query/physicalplan/ordered_aggregate.go b/query/physicalplan/ordered_aggregate.go
index 496668cdb..e7630bf84 100644
--- a/query/physicalplan/ordered_aggregate.go
+++ b/query/physicalplan/ordered_aggregate.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"go.opentelemetry.io/otel/trace"
 
 	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
@@ -46,7 +46,7 @@ type OrderedAggregate struct {
 	tracer                trace.Tracer
 	resultColumnName      string
 	groupByColumnMatchers []logicalplan.Expr
-	aggregationFunction   AggregationFunction
+	aggregationFunction   logicalplan.AggFunc
 	next                  PhysicalPlan
 	columnToAggregate     logicalplan.Expr
 	// Indicate is this is the last aggregation or if this is an aggregation
@@ -133,6 +133,10 @@ func NewOrderedAggregate(
 	return o
 }
 
+func (a *OrderedAggregate) Close() {
+	a.next.Close()
+}
+
 func (a *OrderedAggregate) SetNext(next PhysicalPlan) {
 	a.next = next
 }
@@ -171,7 +175,8 @@ func (a *OrderedAggregate) Callback(_ context.Context, r arrow.Record) error {
 	var columnToAggregate arrow.Array
 	aggregateFieldFound := false
 	foundNewColumns := false
-	for i, field := range r.Schema().Fields() {
+	for i := 0; i < r.Schema().NumFields(); i++ {
+		field := r.Schema().Field(i)
 		for _, matcher := range a.groupByColumnMatchers {
 			if matcher.MatchColumn(field.Name) {
 				a.scratch.groupByMap[field.Name] = groupColInfo{field: field, arr: r.Column(i)}
@@ -242,10 +247,7 @@ func (a *OrderedAggregate) Callback(_ context.Context, r arrow.Record) error {
 		a.scratch.groupByArrays = append(a.scratch.groupByArrays, arr)
 		if !a.notFirstCall {
 			// Initialize curGroup to the first value in each column.
-			v, err := arrowutils.GetValue(arr, 0)
-			if err != nil {
-				return err
-			}
+			v := arr.GetOneForMarshal(0)
 			switch concreteV := v.(type) {
 			case []byte:
 				// Safe copy.
@@ -342,17 +344,12 @@ func (a *OrderedAggregate) Callback(_ context.Context, r arrow.Record) error {
 			a.groupResults = append(a.groupResults, nil)
 		}
 		for i, field := range a.groupColOrdering {
-			var (
-				v   any
-				err error
-			)
+			var v any
 			if groupEnd == 0 {
 				// End of the current group of the last record.
 				v = a.curGroup[field.Name]
 			} else {
-				if v, err = arrowutils.GetValue(a.scratch.groupByArrays[i], int(groupStart)); err != nil {
-					return err
-				}
+				v = a.scratch.groupByArrays[i].GetOneForMarshal(int(groupStart))
 			}
 			if err := builder.AppendGoValue(
 				a.groupBuilders[field.Name],
@@ -485,22 +482,18 @@ func (a *OrderedAggregate) Finish(ctx context.Context) error {
 		}
 	} else {
 		// The aggregation results must be merged.
-		orderByCols := make([]int, len(a.groupColOrdering))
+		orderByCols := make([]arrowutils.SortingColumn, len(a.groupColOrdering))
 		for i := range orderByCols {
-			orderByCols[i] = i
+			orderByCols[i] = arrowutils.SortingColumn{Index: i}
 		}
-		mergedRecord, err := arrowutils.MergeRecords(a.pool, records, orderByCols)
+		mergedRecord, err := arrowutils.MergeRecords(a.pool, records, orderByCols, 0)
 		if err != nil {
 			return err
 		}
 		firstGroup := make([]any, len(a.groupColOrdering))
 		groupArrs := mergedRecord.Columns()[:len(a.groupColOrdering)]
 		for i, arr := range groupArrs {
-			v, err := arrowutils.GetValue(arr, 0)
-			if err != nil {
-				return err
-			}
-			firstGroup[i] = v
+			firstGroup[i] = arr.GetOneForMarshal(0)
 		}
 		wrappedGroupRanges, _, _, err := arrowutils.GetGroupsAndOrderedSetRanges(firstGroup, groupArrs)
 		if err != nil {
diff --git a/query/physicalplan/ordered_aggregate_test.go b/query/physicalplan/ordered_aggregate_test.go
index dd928eda4..42dbecaf5 100644
--- a/query/physicalplan/ordered_aggregate_test.go
+++ b/query/physicalplan/ordered_aggregate_test.go
@@ -5,11 +5,12 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"go.opentelemetry.io/otel/trace/noop"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
-	"go.opentelemetry.io/otel/trace"
 
 	"github.com/polarsignals/frostdb/pqarrow/builder"
 	"github.com/polarsignals/frostdb/query/logicalplan"
@@ -165,18 +166,18 @@ func TestOrderedAggregate(t *testing.T) {
 			valBuilder := builder.NewOptInt64Builder(arrow.PrimitiveTypes.Int64)
 			o := NewOrderedAggregate(
 				memory.DefaultAllocator,
-				trace.NewNoopTracerProvider().Tracer(""),
+				noop.NewTracerProvider().Tracer(""),
 				Aggregation{
 					expr:       logicalplan.Col(valColName),
 					resultName: "result",
-					function:   &Int64SumAggregation{},
+					function:   logicalplan.AggFuncSum,
 				},
 				groupCols,
 				true,
 			)
 			resultCursor := 0
 			o.SetNext(&OutputPlan{
-				callback: func(ctx context.Context, r arrow.Record) error {
+				callback: func(_ context.Context, r arrow.Record) error {
 					if r.NumRows() == 0 {
 						require.True(t, resultCursor < len(tc.resultRecords))
 						return nil
@@ -221,7 +222,7 @@ func TestOrderedAggregate(t *testing.T) {
 							groupBuilders[i].AppendNull()
 							continue
 						}
-						groupBuilders[i].Append([]byte(v))
+						require.NoError(t, groupBuilders[i].Append([]byte(v)))
 					}
 					a := groupBuilders[i].NewArray()
 					recordFields = append(
@@ -272,10 +273,10 @@ func TestOrderedAggregateDynCols(t *testing.T) {
 	ctx := context.Background()
 	o := NewOrderedAggregate(
 		memory.DefaultAllocator,
-		trace.NewNoopTracerProvider().Tracer(""),
+		noop.NewTracerProvider().Tracer(""),
 		Aggregation{
 			expr:     logicalplan.Col(valColName),
-			function: &Int64SumAggregation{},
+			function: logicalplan.AggFuncSum,
 		},
 		[]logicalplan.Expr{
 			logicalplan.DynCol(dynColName),
@@ -304,7 +305,7 @@ func TestOrderedAggregateDynCols(t *testing.T) {
 		groupBuilder := builder.NewOptBinaryBuilder(arrow.BinaryTypes.Binary)
 		valBuilder := builder.NewOptInt64Builder(arrow.PrimitiveTypes.Int64)
 		for j := 0; j < numVals; j++ {
-			groupBuilder.Append([]byte("group"))
+			require.NoError(t, groupBuilder.Append([]byte("group")))
 			valBuilder.Append(1)
 		}
 
diff --git a/query/physicalplan/ordered_synchronizer.go b/query/physicalplan/ordered_synchronizer.go
index 0086787d3..57c00840d 100644
--- a/query/physicalplan/ordered_synchronizer.go
+++ b/query/physicalplan/ordered_synchronizer.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"sync"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 
 	"github.com/polarsignals/frostdb/dynparquet"
 	"github.com/polarsignals/frostdb/pqarrow/arrowutils"
@@ -24,7 +24,7 @@ import (
 type OrderedSynchronizer struct {
 	pool         memory.Allocator
 	orderByExprs []logicalplan.Expr
-	orderByCols  []int
+	orderByCols  []arrowutils.SortingColumn
 
 	sync struct {
 		mtx        sync.Mutex
@@ -52,6 +52,10 @@ func NewOrderedSynchronizer(pool memory.Allocator, inputs int, orderByExprs []lo
 	return o
 }
 
+func (o *OrderedSynchronizer) Close() {
+	o.next.Close()
+}
+
 func (o *OrderedSynchronizer) Callback(ctx context.Context, r arrow.Record) error {
 	o.sync.mtx.Lock()
 	o.sync.data = append(o.sync.data, r)
@@ -113,7 +117,7 @@ func (o *OrderedSynchronizer) mergeRecordsLocked() (arrow.Record, error) {
 	if err := o.ensureSameSchema(o.sync.data); err != nil {
 		return nil, err
 	}
-	mergedRecord, err := arrowutils.MergeRecords(o.pool, o.sync.data, o.orderByCols)
+	mergedRecord, err := arrowutils.MergeRecords(o.pool, o.sync.data, o.orderByCols, 0)
 	if err != nil {
 		return nil, err
 	}
@@ -153,7 +157,8 @@ func (o *OrderedSynchronizer) ensureSameSchema(records []arrow.Record) error {
 	for i, orderCol := range o.orderByExprs {
 		orderCols[i] = make(map[string]arrow.Field)
 		for _, r := range records {
-			for _, field := range r.Schema().Fields() {
+			for j := 0; j < r.Schema().NumFields(); j++ {
+				field := r.Schema().Field(j)
 				if ok := orderCol.MatchColumn(field.Name); ok {
 					orderCols[i][field.Name] = field
 				} else {
@@ -192,7 +197,7 @@ func (o *OrderedSynchronizer) ensureSameSchema(records []arrow.Record) error {
 
 	o.orderByCols = o.orderByCols[:0]
 	for i := range newFields {
-		o.orderByCols = append(o.orderByCols, i)
+		o.orderByCols = append(o.orderByCols, arrowutils.SortingColumn{Index: i})
 	}
 
 	for _, field := range leftoverCols {
@@ -209,7 +214,8 @@ func (o *OrderedSynchronizer) ensureSameSchema(records []arrow.Record) error {
 		}
 
 		var columns []arrow.Array
-		for _, field := range schema.Fields() {
+		for j := 0; j < schema.NumFields(); j++ {
+			field := schema.Field(j)
 			if otherFields := otherSchema.FieldIndices(field.Name); otherFields != nil {
 				if len(otherFields) > 1 {
 					fieldsFound, _ := otherSchema.FieldsByName(field.Name)
diff --git a/query/physicalplan/ordered_synchronizer_test.go b/query/physicalplan/ordered_synchronizer_test.go
index 19f92b988..c7f5baf25 100644
--- a/query/physicalplan/ordered_synchronizer_test.go
+++ b/query/physicalplan/ordered_synchronizer_test.go
@@ -6,9 +6,9 @@ import (
 	"sync/atomic"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
 	"golang.org/x/sync/errgroup"
 
diff --git a/query/physicalplan/physicalplan.go b/query/physicalplan/physicalplan.go
index 888ae9b75..84c5f62a2 100644
--- a/query/physicalplan/physicalplan.go
+++ b/query/physicalplan/physicalplan.go
@@ -3,16 +3,19 @@ package physicalplan
 import (
 	"context"
 	"fmt"
+	"hash/maphash"
 	"runtime"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
 	"golang.org/x/sync/errgroup"
 
 	"github.com/polarsignals/frostdb/dynparquet"
 	"github.com/polarsignals/frostdb/query/logicalplan"
+	"github.com/polarsignals/frostdb/recovery"
 )
 
 // TODO: Make this smarter.
@@ -23,6 +26,7 @@ type PhysicalPlan interface {
 	Finish(ctx context.Context) error
 	SetNext(next PhysicalPlan)
 	Draw() *Diagram
+	Close()
 }
 
 type ScanPhysicalPlan interface {
@@ -76,6 +80,8 @@ func (e *OutputPlan) Finish(_ context.Context) error {
 	return nil
 }
 
+func (e *OutputPlan) Close() {}
+
 func (e *OutputPlan) SetNext(_ PhysicalPlan) {
 	// OutputPlan should be the last step.
 	// If this gets called we're doing something wrong.
@@ -118,30 +124,42 @@ func (s *TableScan) Execute(ctx context.Context, pool memory.Allocator) error {
 	for _, plan := range s.plans {
 		callbacks = append(callbacks, plan.Callback)
 	}
+	defer func() { // Close all plans to ensure memory cleanup.
+		for _, plan := range s.plans {
+			plan.Close()
+		}
+	}()
+
+	opts := []logicalplan.Option{
+		logicalplan.WithPhysicalProjection(s.options.PhysicalProjection...),
+		logicalplan.WithProjection(s.options.Projection...),
+		logicalplan.WithFilter(s.options.Filter),
+		logicalplan.WithDistinctColumns(s.options.Distinct...),
+		logicalplan.WithReadMode(s.options.ReadMode),
+	}
 
-	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
-		return table.Iterator(
-			ctx,
-			tx,
-			pool,
-			callbacks,
-			logicalplan.WithPhysicalProjection(s.options.PhysicalProjection...),
-			logicalplan.WithProjection(s.options.Projection...),
-			logicalplan.WithFilter(s.options.Filter),
-			logicalplan.WithDistinctColumns(s.options.Distinct...),
-		)
-	})
-	if err != nil {
+	errg, _ := errgroup.WithContext(ctx)
+	errg.Go(recovery.Do(func() error {
+		return table.View(ctx, func(ctx context.Context, tx uint64) error {
+			return table.Iterator(
+				ctx,
+				tx,
+				pool,
+				callbacks,
+				opts...,
+			)
+		})
+	}))
+	if err := errg.Wait(); err != nil {
 		return err
 	}
 
-	errg, ctx := errgroup.WithContext(ctx)
-
+	errg, _ = errgroup.WithContext(ctx)
 	for _, plan := range s.plans {
 		plan := plan
-		errg.Go(func() error {
+		errg.Go(recovery.Do(func() (err error) {
 			return plan.Finish(ctx)
-		})
+		}))
 	}
 
 	return errg.Wait()
@@ -154,11 +172,15 @@ type SchemaScan struct {
 }
 
 func (s *SchemaScan) Draw() *Diagram {
-	// var children []*Diagram
-	// for _, plan := range s.plans {
-	//	children = append(children, plan.Draw())
-	// }
-	return &Diagram{Details: "SchemaScan"}
+	details := "SchemaScan"
+	var child *Diagram
+	if children := len(s.plans); children > 0 {
+		child = s.plans[0].Draw()
+		if children > 1 {
+			details += " [concurrent]"
+		}
+	}
+	return &Diagram{Details: details, Child: child}
 }
 
 func (s *SchemaScan) Execute(ctx context.Context, pool memory.Allocator) error {
@@ -172,29 +194,36 @@ func (s *SchemaScan) Execute(ctx context.Context, pool memory.Allocator) error {
 		callbacks = append(callbacks, plan.Callback)
 	}
 
-	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
-		return table.SchemaIterator(
-			ctx,
-			tx,
-			pool,
-			callbacks,
-			logicalplan.WithPhysicalProjection(s.options.PhysicalProjection...),
-			logicalplan.WithProjection(s.options.Projection...),
-			logicalplan.WithFilter(s.options.Filter),
-			logicalplan.WithDistinctColumns(s.options.Distinct...),
-		)
-	})
-	if err != nil {
-		return err
+	opts := []logicalplan.Option{
+		logicalplan.WithPhysicalProjection(s.options.PhysicalProjection...),
+		logicalplan.WithProjection(s.options.Projection...),
+		logicalplan.WithFilter(s.options.Filter),
+		logicalplan.WithDistinctColumns(s.options.Distinct...),
+		logicalplan.WithReadMode(s.options.ReadMode),
 	}
 
-	errg, ctx := errgroup.WithContext(ctx)
+	errg, _ := errgroup.WithContext(ctx)
+	errg.Go(recovery.Do(func() error {
+		return table.View(ctx, func(ctx context.Context, tx uint64) error {
+			return table.SchemaIterator(
+				ctx,
+				tx,
+				pool,
+				callbacks,
+				opts...,
+			)
+		})
+	}))
+	if err := errg.Wait(); err != nil {
+		return err
+	}
 
+	errg, _ = errgroup.WithContext(ctx)
 	for _, plan := range s.plans {
 		plan := plan
-		errg.Go(func() error {
+		errg.Go(recovery.Do(func() error {
 			return plan.Finish(ctx)
-		})
+		}))
 	}
 
 	return errg.Wait()
@@ -204,6 +233,10 @@ type noopOperator struct {
 	next PhysicalPlan
 }
 
+func (p *noopOperator) Close() {
+	p.next.Close()
+}
+
 func (p *noopOperator) Callback(ctx context.Context, r arrow.Record) error {
 	return p.next.Callback(ctx, r)
 }
@@ -226,10 +259,17 @@ func (p *noopOperator) Draw() *Diagram {
 type execOptions struct {
 	orderedAggregations bool
 	overrideInput       []PhysicalPlan
+	readMode            logicalplan.ReadMode
 }
 
 type Option func(o *execOptions)
 
+func WithReadMode(m logicalplan.ReadMode) Option {
+	return func(o *execOptions) {
+		o.readMode = m
+	}
+}
+
 func WithOrderedAggregations() Option {
 	return func(o *execOptions) {
 		o.orderedAggregations = true
@@ -268,7 +308,7 @@ func Build(
 	if s != nil {
 		// TODO(asubiotto): There are cases in which the schema can be nil.
 		// Eradicate these.
-		oInfo.sortingCols = s.SortingColumns()
+		oInfo.sortingCols = s.ColumnDefinitionsForSortingColumns()
 	}
 
 	var visitErr error
@@ -283,6 +323,7 @@ func Build(
 			for i := range plans {
 				plans[i] = &noopOperator{}
 			}
+			plan.SchemaScan.ReadMode = execOpts.readMode
 			outputPlan.scan = &SchemaScan{
 				tracer:  tracer,
 				options: plan.SchemaScan,
@@ -297,6 +338,7 @@ func Build(
 			for i := range plans {
 				plans[i] = &noopOperator{}
 			}
+			plan.TableScan.ReadMode = execOpts.readMode
 			outputPlan.scan = &TableScan{
 				tracer:  tracer,
 				options: plan.TableScan,
@@ -305,6 +347,11 @@ func Build(
 			prev = append(prev[:0], plans...)
 			oInfo.nodeMaintainsOrdering()
 		case plan.Projection != nil:
+			for _, e := range plan.Projection.Exprs { // Don't build the projection if it's a wildcard, the projection pushdown optimization will handle it.
+				if e.Name() == "all" {
+					return true
+				}
+			}
 			// For each previous physical plan create one Projection
 			for i := range prev {
 				p, err := Project(pool, tracer, plan.Projection.Exprs)
@@ -337,6 +384,36 @@ func Build(
 				prev = prev[0:1]
 				prev[0] = d
 			}
+		case plan.Limit != nil:
+			var sync *Synchronizer
+			if len(prev) > 1 {
+				// These limit operators need to be synchronized.
+				sync = Synchronize(len(prev))
+			}
+			for i := 0; i < len(prev); i++ {
+				d, err := Limit(pool, tracer, plan.Limit.Expr)
+				if err != nil {
+					visitErr = err
+					return false
+				}
+				prev[i].SetNext(d)
+				prev[i] = d
+				if sync != nil {
+					d.SetNext(sync)
+				}
+			}
+			if sync != nil {
+				// Plan a limit operator to run a limit on all the
+				// synchronized limits.
+				d, err := Limit(pool, tracer, plan.Limit.Expr)
+				if err != nil {
+					visitErr = err
+					return false
+				}
+				sync.SetNext(d)
+				prev = prev[0:1]
+				prev[0] = d
+			}
 		case plan.Filter != nil:
 			// Create a filter for each previous plan.
 			// Can be multiple filters or just a single
@@ -353,11 +430,6 @@ func Build(
 			oInfo.applyFilter(plan.Filter.Expr)
 			oInfo.nodeMaintainsOrdering()
 		case plan.Aggregation != nil:
-			schema := s.ParquetSchema()
-			if schema == nil {
-				visitErr = fmt.Errorf("aggregation got empty schema")
-				return false
-			}
 			ordered, err := shouldPlanOrderedAggregate(execOpts, oInfo, plan.Aggregation)
 			if err != nil {
 				// TODO(asubiotto): Log the error.
@@ -372,8 +444,9 @@ func Build(
 					sync = Synchronize(len(prev))
 				}
 			}
+			seed := maphash.MakeSeed()
 			for i := 0; i < len(prev); i++ {
-				a, err := Aggregate(pool, tracer, schema, plan.Aggregation, sync == nil, ordered)
+				a, err := Aggregate(pool, tracer, plan.Aggregation, sync == nil, ordered, seed)
 				if err != nil {
 					visitErr = err
 					return false
@@ -387,7 +460,7 @@ func Build(
 			if sync != nil {
 				// Plan an aggregate operator to run an aggregation on all the
 				// aggregations.
-				a, err := Aggregate(pool, tracer, schema, plan.Aggregation, true, ordered)
+				a, err := Aggregate(pool, tracer, plan.Aggregation, true, ordered, seed)
 				if err != nil {
 					visitErr = err
 					return false
@@ -399,6 +472,21 @@ func Build(
 			if ordered {
 				oInfo.nodeMaintainsOrdering()
 			}
+		case plan.Sample != nil:
+			v := plan.Sample.Expr.(*logicalplan.LiteralExpr).Value.(*scalar.Int64).Value
+			limit := plan.Sample.Limit.(*logicalplan.LiteralExpr).Value.(*scalar.Int64).Value
+			perSampler := v / int64(len(prev))
+			perSamplerLimit := limit / int64(len(prev))
+			r := v % int64(len(prev))
+			for i := range prev {
+				adjust := int64(0)
+				if i < int(r) {
+					adjust = 1
+				}
+				s := NewReservoirSampler(perSampler+adjust, perSamplerLimit, pool)
+				prev[i].SetNext(s)
+				prev[i] = s
+			}
 		default:
 			panic("Unsupported plan")
 		}
diff --git a/query/physicalplan/physicalplan_test.go b/query/physicalplan/physicalplan_test.go
index 7c27e63b9..993e0f43f 100644
--- a/query/physicalplan/physicalplan_test.go
+++ b/query/physicalplan/physicalplan_test.go
@@ -4,10 +4,11 @@ import (
 	"context"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"go.opentelemetry.io/otel/trace/noop"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
-	"go.opentelemetry.io/otel/trace"
 
 	"github.com/polarsignals/frostdb/dynparquet"
 	"github.com/polarsignals/frostdb/query/logicalplan"
@@ -21,26 +22,26 @@ func (m *mockTableReader) Schema() *dynparquet.Schema {
 	return m.schema
 }
 
-func (m *mockTableReader) View(ctx context.Context, fn func(ctx context.Context, tx uint64) error) error {
+func (m *mockTableReader) View(_ context.Context, _ func(ctx context.Context, tx uint64) error) error {
 	return nil
 }
 
 func (m *mockTableReader) Iterator(
-	ctx context.Context,
-	tx uint64,
-	pool memory.Allocator,
-	callbacks []logicalplan.Callback,
-	iterOpts ...logicalplan.Option,
+	_ context.Context,
+	_ uint64,
+	_ memory.Allocator,
+	_ []logicalplan.Callback,
+	_ ...logicalplan.Option,
 ) error {
 	return nil
 }
 
 func (m *mockTableReader) SchemaIterator(
-	ctx context.Context,
-	tx uint64,
-	pool memory.Allocator,
-	callbacks []logicalplan.Callback,
-	iterOpts ...logicalplan.Option,
+	_ context.Context,
+	_ uint64,
+	_ memory.Allocator,
+	_ []logicalplan.Callback,
+	_ ...logicalplan.Option,
 ) error {
 	return nil
 }
@@ -49,7 +50,7 @@ type mockTableProvider struct {
 	schema *dynparquet.Schema
 }
 
-func (m *mockTableProvider) GetTable(name string) (logicalplan.TableReader, error) {
+func (m *mockTableProvider) GetTable(_ string) (logicalplan.TableReader, error) {
 	return &mockTableReader{
 		schema: m.schema,
 	}, nil
@@ -60,10 +61,10 @@ func TestBuildPhysicalPlan(t *testing.T) {
 		Scan(&mockTableProvider{schema: dynparquet.NewSampleSchema()}, "table1").
 		Filter(logicalplan.Col("labels.test").Eq(logicalplan.Literal("abc"))).
 		Aggregate(
-			[]logicalplan.Expr{logicalplan.Sum(logicalplan.Col("value")).Alias("value_sum")},
+			[]*logicalplan.AggregationFunction{logicalplan.Sum(logicalplan.Col("value"))},
 			[]logicalplan.Expr{logicalplan.Col("stacktrace")},
 		).
-		Project(logicalplan.Col("stacktrace"), logicalplan.Col("value_sum")).
+		Project(logicalplan.Col("stacktrace"), logicalplan.Sum(logicalplan.Col("value")).Alias("value_sum")).
 		Build()
 
 	optimizers := []logicalplan.Optimizer{
@@ -78,7 +79,7 @@ func TestBuildPhysicalPlan(t *testing.T) {
 	_, err := Build(
 		context.Background(),
 		memory.DefaultAllocator,
-		trace.NewNoopTracerProvider().Tracer(""),
+		noop.NewTracerProvider().Tracer(""),
 		dynparquet.NewSampleSchema(),
 		p,
 	)
@@ -104,3 +105,7 @@ func (m *mockPhysicalPlan) SetNext(next PhysicalPlan) {
 func (m *mockPhysicalPlan) Draw() *Diagram {
 	return &Diagram{}
 }
+
+func (m *mockPhysicalPlan) Close() {
+	m.next.Close()
+}
diff --git a/query/physicalplan/planordering.go b/query/physicalplan/planordering.go
index c355602d8..0af4039c5 100644
--- a/query/physicalplan/planordering.go
+++ b/query/physicalplan/planordering.go
@@ -71,6 +71,10 @@ func (f preExprVisitorFunc) PreVisit(expr logicalplan.Expr) bool {
 	return f(expr)
 }
 
+func (f preExprVisitorFunc) Visit(_ logicalplan.Expr) bool {
+	return true
+}
+
 func (f preExprVisitorFunc) PostVisit(_ logicalplan.Expr) bool {
 	return true
 }
diff --git a/query/physicalplan/project.go b/query/physicalplan/project.go
index 2a2ac654b..9a4b1e2c5 100644
--- a/query/physicalplan/project.go
+++ b/query/physicalplan/project.go
@@ -5,20 +5,26 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 	"go.opentelemetry.io/otel/trace"
 
 	"github.com/polarsignals/frostdb/query/logicalplan"
 )
 
 type columnProjection interface {
+	// Name returns the name of the column that this projection will return.
 	Name() string
+	// String returns the string representation as it may have been used to create the projection.
+	String() string
+	// Projects one or more columns. Each element in the field list corresponds to an element in the list of arrays.
 	Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error)
 }
 
 type aliasProjection struct {
+	p    columnProjection
 	expr *logicalplan.AliasExpr
 	name string
 }
@@ -27,50 +33,387 @@ func (a aliasProjection) Name() string {
 	return a.name
 }
 
+func (a aliasProjection) String() string {
+	return a.expr.String()
+}
+
 func (a aliasProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
-	switch e := a.expr.Expr.(type) {
-	case *logicalplan.BinaryExpr:
-		boolExpr, err := binaryBooleanExpr(e)
-		if err != nil {
-			return nil, nil, err
+	fields, arrays, err := a.p.Project(mem, ar)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	if len(fields) == 0 || len(fields) != len(arrays) {
+		return nil, nil, fmt.Errorf("invalid projection for alias: fields and arrays must be non-empty and of equal length")
+	}
+
+	return []arrow.Field{{
+		Name:     a.name,
+		Type:     fields[0].Type,
+		Nullable: fields[0].Nullable,
+		Metadata: fields[0].Metadata,
+	}}, arrays, nil
+}
+
+type binaryExprProjection struct {
+	expr *logicalplan.BinaryExpr
+
+	left  columnProjection
+	right columnProjection
+}
+
+func (b binaryExprProjection) Name() string {
+	return b.expr.String()
+}
+
+func (b binaryExprProjection) String() string {
+	return b.expr.String()
+}
+
+func (b binaryExprProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+	leftFields, leftArrays, err := b.left.Project(mem, ar)
+	if err != nil {
+		return nil, nil, fmt.Errorf("project left side of binary expression: %w", err)
+	}
+	defer func() {
+		for _, arr := range leftArrays {
+			arr.Release()
 		}
-		fields, array, err := binaryExprProjection{boolExpr: boolExpr}.Project(mem, ar)
-		if err != nil {
-			return nil, nil, err
+	}()
+
+	rightFields, rightArrays, err := b.right.Project(mem, ar)
+	if err != nil {
+		return nil, nil, fmt.Errorf("project right side of binary expression: %w", err)
+	}
+	defer func() {
+		for _, arr := range rightArrays {
+			arr.Release()
 		}
-		for i, field := range fields {
-			if a.expr.Expr.MatchColumn(field.Name) {
-				fields[i].Name = a.name
-			}
+	}()
+
+	if len(leftFields) != 1 || len(leftArrays) != 1 {
+		return nil, nil, fmt.Errorf("binary expression projection expected one field and one array for each side, got %d fields and %d arrays on left", len(leftFields), len(leftArrays))
+	}
+
+	if len(rightFields) != 1 || len(rightArrays) != 1 {
+		return nil, nil, fmt.Errorf("binary expression projection expected one field and one array for each side, got %d fields and %d arrays on right", len(rightFields), len(rightArrays))
+	}
+
+	leftArray := leftArrays[0]
+	rightArray := rightArrays[0]
+
+	resultFields := []arrow.Field{{
+		Name:     b.expr.Name(),
+		Type:     leftFields[0].Type,
+		Nullable: leftFields[0].Nullable,
+		Metadata: leftFields[0].Metadata,
+	}}
+	switch leftArray := leftArray.(type) {
+	case *array.Int64:
+		switch b.expr.Op {
+		case logicalplan.OpAdd:
+			return resultFields, []arrow.Array{AddInt64s(mem, leftArray, rightArray.(*array.Int64))}, nil
+		case logicalplan.OpSub:
+			return resultFields, []arrow.Array{SubInt64s(mem, leftArray, rightArray.(*array.Int64))}, nil
+		case logicalplan.OpMul:
+			return resultFields, []arrow.Array{MulInt64s(mem, leftArray, rightArray.(*array.Int64))}, nil
+		case logicalplan.OpDiv:
+			return resultFields, []arrow.Array{DivInt64s(mem, leftArray, rightArray.(*array.Int64))}, nil
+		default:
+			return nil, nil, fmt.Errorf("unsupported binary expression: %s", b.expr.String())
 		}
-		return fields, array, nil
-	case *logicalplan.Column:
-		for i, field := range ar.Schema().Fields() {
-			if a.expr.MatchColumn(field.Name) {
-				field.Name = a.name
-				return []arrow.Field{field}, []arrow.Array{ar.Column(i)}, nil
-			}
+	case *array.Int32:
+		switch b.expr.Op {
+		case logicalplan.OpAdd:
+			return resultFields, []arrow.Array{AddInt32s(mem, leftArray, rightArray.(*array.Int32))}, nil
+		case logicalplan.OpSub:
+			return resultFields, []arrow.Array{SubInt32s(mem, leftArray, rightArray.(*array.Int32))}, nil
+		case logicalplan.OpMul:
+			return resultFields, []arrow.Array{MulInt32s(mem, leftArray, rightArray.(*array.Int32))}, nil
+		case logicalplan.OpDiv:
+			return resultFields, []arrow.Array{DivInt32s(mem, leftArray, rightArray.(*array.Int32))}, nil
+		default:
+			return nil, nil, fmt.Errorf("unsupported binary expression: %s", b.expr.String())
 		}
+	case *array.Uint64:
+		switch b.expr.Op {
+		case logicalplan.OpAdd:
+			return resultFields, []arrow.Array{AddUint64s(mem, leftArray, rightArray.(*array.Uint64))}, nil
+		case logicalplan.OpSub:
+			return resultFields, []arrow.Array{SubUint64s(mem, leftArray, rightArray.(*array.Uint64))}, nil
+		case logicalplan.OpMul:
+			return resultFields, []arrow.Array{MulUint64s(mem, leftArray, rightArray.(*array.Uint64))}, nil
+		case logicalplan.OpDiv:
+			return resultFields, []arrow.Array{DivUint64s(mem, leftArray, rightArray.(*array.Uint64))}, nil
+		default:
+			return nil, nil, fmt.Errorf("unsupported binary expression: %s", b.expr.String())
+		}
+	case *array.Float64:
+		switch b.expr.Op {
+		case logicalplan.OpAdd:
+			return resultFields, []arrow.Array{AddFloat64s(mem, leftArray, rightArray.(*array.Float64))}, nil
+		case logicalplan.OpSub:
+			return resultFields, []arrow.Array{SubFloat64s(mem, leftArray, rightArray.(*array.Float64))}, nil
+		case logicalplan.OpMul:
+			return resultFields, []arrow.Array{MulFloat64s(mem, leftArray, rightArray.(*array.Float64))}, nil
+		case logicalplan.OpDiv:
+			return resultFields, []arrow.Array{DivFloat64s(mem, leftArray, rightArray.(*array.Float64))}, nil
+		default:
+			return nil, nil, fmt.Errorf("unsupported binary expression: %s", b.expr.String())
+		}
+	default:
+		return nil, nil, fmt.Errorf("unsupported type: %T", leftArray)
 	}
+}
 
-	return nil, nil, nil
+func AddInt64s(mem memory.Allocator, left, right *array.Int64) *array.Int64 {
+	res := array.NewInt64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) + right.Value(i))
+	}
+
+	return res.NewInt64Array()
 }
 
-type binaryExprProjection struct {
+func SubInt64s(mem memory.Allocator, left, right *array.Int64) *array.Int64 {
+	res := array.NewInt64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) - right.Value(i))
+	}
+
+	return res.NewInt64Array()
+}
+
+func MulInt64s(mem memory.Allocator, left, right *array.Int64) *array.Int64 {
+	res := array.NewInt64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) * right.Value(i))
+	}
+
+	return res.NewInt64Array()
+}
+
+func DivInt64s(mem memory.Allocator, left, right *array.Int64) *array.Int64 {
+	res := array.NewInt64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		rightValue := right.Value(i)
+		if right.Value(i) == 0 {
+			res.AppendNull()
+		} else {
+			res.Append(left.Value(i) / rightValue)
+		}
+	}
+
+	return res.NewInt64Array()
+}
+
+func AddFloat64s(mem memory.Allocator, left, right *array.Float64) *array.Float64 {
+	res := array.NewFloat64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) + right.Value(i))
+	}
+
+	return res.NewFloat64Array()
+}
+
+func SubFloat64s(mem memory.Allocator, left, right *array.Float64) *array.Float64 {
+	res := array.NewFloat64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) - right.Value(i))
+	}
+
+	return res.NewFloat64Array()
+}
+
+func MulFloat64s(mem memory.Allocator, left, right *array.Float64) *array.Float64 {
+	res := array.NewFloat64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) * right.Value(i))
+	}
+
+	return res.NewFloat64Array()
+}
+
+func DivFloat64s(mem memory.Allocator, left, right *array.Float64) *array.Float64 {
+	res := array.NewFloat64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		rightValue := right.Value(i)
+		if right.Value(i) == 0 {
+			res.AppendNull()
+		} else {
+			res.Append(left.Value(i) / rightValue)
+		}
+	}
+
+	return res.NewFloat64Array()
+}
+
+func AddInt32s(mem memory.Allocator, left, right *array.Int32) *array.Int32 {
+	res := array.NewInt32Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) + right.Value(i))
+	}
+
+	return res.NewInt32Array()
+}
+
+func SubInt32s(mem memory.Allocator, left, right *array.Int32) *array.Int32 {
+	res := array.NewInt32Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) - right.Value(i))
+	}
+
+	return res.NewInt32Array()
+}
+
+func MulInt32s(mem memory.Allocator, left, right *array.Int32) *array.Int32 {
+	res := array.NewInt32Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) * right.Value(i))
+	}
+
+	return res.NewInt32Array()
+}
+
+func DivInt32s(mem memory.Allocator, left, right *array.Int32) *array.Int32 {
+	res := array.NewInt32Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		rightValue := right.Value(i)
+		if right.Value(i) == 0 {
+			res.AppendNull()
+		} else {
+			res.Append(left.Value(i) / rightValue)
+		}
+	}
+
+	return res.NewInt32Array()
+}
+
+func AddUint64s(mem memory.Allocator, left, right *array.Uint64) *array.Uint64 {
+	res := array.NewUint64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) + right.Value(i))
+	}
+
+	return res.NewUint64Array()
+}
+
+func SubUint64s(mem memory.Allocator, left, right *array.Uint64) *array.Uint64 {
+	res := array.NewUint64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) - right.Value(i))
+	}
+
+	return res.NewUint64Array()
+}
+
+func MulUint64s(mem memory.Allocator, left, right *array.Uint64) *array.Uint64 {
+	res := array.NewUint64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		res.Append(left.Value(i) * right.Value(i))
+	}
+
+	return res.NewUint64Array()
+}
+
+func DivUint64s(mem memory.Allocator, left, right *array.Uint64) *array.Uint64 {
+	res := array.NewUint64Builder(mem)
+	defer res.Release()
+
+	res.Resize(left.Len())
+
+	for i := 0; i < left.Len(); i++ {
+		rightValue := right.Value(i)
+		if right.Value(i) == 0 {
+			res.AppendNull()
+		} else {
+			res.Append(left.Value(i) / rightValue)
+		}
+	}
+
+	return res.NewUint64Array()
+}
+
+type boolExprProjection struct {
 	boolExpr BooleanExpression
 }
 
-func (b binaryExprProjection) Name() string {
+func (b boolExprProjection) Name() string {
 	return b.boolExpr.String()
 }
 
-func (b binaryExprProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+func (b boolExprProjection) String() string {
+	return b.boolExpr.String()
+}
+
+func (b boolExprProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
 	var partiallyComputedExprRes arrow.Array
 	if ar.Schema().HasField(b.boolExpr.String()) {
 		arr := ar.Column(ar.Schema().FieldIndices(b.boolExpr.String())[0])
 		if arr.NullN() == 0 {
 			// This means we have fully pre-computed the result of the
 			// expression in the table scan already.
+			arr.Retain()
 			return []arrow.Field{
 				{
 					Name: b.boolExpr.String(),
@@ -95,6 +438,7 @@ func (b binaryExprProjection) Project(mem memory.Allocator, ar arrow.Record) ([]
 
 	vals := make([]bool, ar.NumRows())
 	builder := array.NewBooleanBuilder(mem)
+	defer builder.Release()
 
 	// We can do this because we now the values in the array are between 0 and
 	// NumRows()-1
@@ -130,9 +474,15 @@ func (p plainProjection) Name() string {
 	return p.expr.ColumnName
 }
 
-func (p plainProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
-	for i, field := range ar.Schema().Fields() {
+func (p plainProjection) String() string {
+	return p.expr.ColumnName
+}
+
+func (p plainProjection) Project(_ memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+	for i := 0; i < ar.Schema().NumFields(); i++ {
+		field := ar.Schema().Field(i)
 		if p.expr.MatchColumn(field.Name) {
+			ar.Column(i).Retain() // Retain the column since we're keeping it.
 			return []arrow.Field{field}, []arrow.Array{ar.Column(i)}, nil
 		}
 	}
@@ -140,6 +490,243 @@ func (p plainProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow
 	return nil, nil, nil
 }
 
+type convertProjection struct {
+	p    columnProjection
+	expr *logicalplan.ConvertExpr
+}
+
+func (p convertProjection) Name() string {
+	return p.expr.Name()
+}
+
+func (p convertProjection) String() string {
+	return p.expr.Name()
+}
+
+func (p convertProjection) convert(mem memory.Allocator, c arrow.Array) (arrow.Array, error) {
+	defer c.Release()
+
+	switch c := c.(type) {
+	case *array.Int64:
+		switch p.expr.Type {
+		case arrow.PrimitiveTypes.Float64:
+			return convertInt64ToFloat64(mem, c), nil
+		default:
+			return nil, fmt.Errorf("unsupported conversion from %s to %s", c.DataType(), p.expr.Type)
+		}
+	default:
+		return nil, fmt.Errorf("unsupported conversion from %s to %s", c.DataType(), p.expr.Type)
+	}
+}
+
+func convertInt64ToFloat64(mem memory.Allocator, c *array.Int64) *array.Float64 {
+	res := array.NewFloat64Builder(mem)
+	defer res.Release()
+
+	res.Resize(c.Len())
+
+	for i := 0; i < c.Len(); i++ {
+		res.Append(float64(c.Value(i)))
+	}
+
+	return res.NewFloat64Array()
+}
+
+func (p convertProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+	fields, cols, err := p.p.Project(mem, ar)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	if len(fields) == 0 || len(fields) != len(cols) {
+		return nil, nil, fmt.Errorf("invalid projection for convert: fields and arrays must be non-empty and of equal length")
+	}
+
+	c, err := p.convert(mem, cols[0])
+	if err != nil {
+		return nil, nil, err
+	}
+
+	return []arrow.Field{{
+		Name:     p.expr.Name(),
+		Type:     p.expr.Type,
+		Nullable: fields[0].Nullable,
+		Metadata: fields[0].Metadata,
+	}}, []arrow.Array{c}, nil
+}
+
+type isNullProjection struct {
+	expr *logicalplan.IsNullExpr
+	p    columnProjection
+}
+
+func (p isNullProjection) Name() string {
+	return p.expr.String()
+}
+
+func (p isNullProjection) String() string {
+	return p.expr.String()
+}
+
+func (p isNullProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+	fields, cols, err := p.p.Project(mem, ar)
+	if err != nil {
+		return nil, nil, err
+	}
+	defer func() {
+		for _, arr := range cols {
+			arr.Release()
+		}
+	}()
+
+	if len(fields) != 1 || len(fields) != len(cols) {
+		return nil, nil, fmt.Errorf("invalid projection for isNull: expected 1 field and array, got %d fields %d arrays", len(fields), len(cols))
+	}
+
+	b := array.NewBooleanBuilder(mem)
+	defer b.Release()
+
+	b.Resize(cols[0].Len())
+
+	for i := 0; i < cols[0].Len(); i++ {
+		b.Append(cols[0].IsNull(i))
+	}
+
+	return []arrow.Field{{
+		Name:     p.expr.String(),
+		Type:     arrow.FixedWidthTypes.Boolean,
+		Nullable: fields[0].Nullable,
+		Metadata: fields[0].Metadata,
+	}}, []arrow.Array{b.NewBooleanArray()}, nil
+}
+
+type ifExprProjection struct {
+	expr *logicalplan.IfExpr
+
+	cond columnProjection
+	then columnProjection
+	els  columnProjection
+}
+
+func (p ifExprProjection) Name() string {
+	return p.expr.Name()
+}
+
+func (p ifExprProjection) String() string {
+	return p.expr.Name()
+}
+
+func (p ifExprProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+	f, thenCols, err := p.then.Project(mem, ar)
+	if err != nil {
+		return nil, nil, err
+	}
+	defer func() {
+		for _, arr := range thenCols {
+			arr.Release()
+		}
+	}()
+
+	_, elseCols, err := p.els.Project(mem, ar)
+	if err != nil {
+		return nil, nil, err
+	}
+	defer func() {
+		for _, arr := range elseCols {
+			arr.Release()
+		}
+	}()
+
+	_, condCols, err := p.cond.Project(mem, ar)
+	if err != nil {
+		return nil, nil, err
+	}
+	defer func() {
+		for _, arr := range condCols {
+			arr.Release()
+		}
+	}()
+
+	if len(thenCols) != 1 || len(elseCols) != 1 || len(condCols) != 1 {
+		return nil, nil, fmt.Errorf("invalid projection for if: all columns must be non-empty and of equal length")
+	}
+
+	condCol := condCols[0]
+	thenCol := thenCols[0]
+	elseCol := elseCols[0]
+
+	condArr, ok := condCol.(*array.Boolean)
+	if !ok {
+		return nil, nil, fmt.Errorf("invalid projection for if: condition column must be of type boolean")
+	}
+
+	if condArr.Len() != thenCol.Len() || condArr.Len() != elseCol.Len() {
+		return nil, nil, fmt.Errorf("invalid projection for if: condition, then and else columns must be of equal length")
+	}
+
+	if !arrow.TypeEqual(thenCols[0].DataType(), elseCols[0].DataType()) {
+		return nil, nil, fmt.Errorf("invalid projection for if: then and else columns must be of the same type")
+	}
+
+	var res arrow.Array
+	switch thenCols[0].(type) {
+	case *array.Int64:
+		res = conditionalAddInt64(mem, condArr, thenCol.(*array.Int64), elseCol.(*array.Int64))
+	default:
+		return nil, nil, fmt.Errorf("unsupported if expression type: %s %T", thenCols[0].DataType(), thenCols[0])
+	}
+
+	return []arrow.Field{{
+		Name:     p.expr.Name(),
+		Type:     res.DataType(),
+		Nullable: f[0].Nullable,
+		Metadata: f[0].Metadata,
+	}}, []arrow.Array{res}, nil
+}
+
+func conditionalAddInt64(mem memory.Allocator, cond *array.Boolean, a, b *array.Int64) *array.Int64 {
+	res := array.NewInt64Builder(mem)
+	defer res.Release()
+
+	res.Resize(cond.Len())
+
+	for i := 0; i < cond.Len(); i++ {
+		if cond.IsValid(i) && cond.Value(i) {
+			res.Append(a.Value(i))
+		} else {
+			res.Append(b.Value(i))
+		}
+	}
+
+	return res.NewInt64Array()
+}
+
+type literalProjection struct {
+	value scalar.Scalar
+}
+
+func (p literalProjection) Name() string {
+	return p.value.String()
+}
+
+func (p literalProjection) String() string {
+	return p.value.String()
+}
+
+func (p literalProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+	arr, err := scalar.MakeArrayFromScalar(p.value, int(ar.NumRows()), mem)
+	if err != nil {
+		return nil, nil, fmt.Errorf("make array from literal value: %w", err)
+	}
+
+	return []arrow.Field{{
+			Name: p.value.String(),
+			Type: p.value.DataType(),
+		}}, []arrow.Array{
+			arr,
+		}, nil
+}
+
 type dynamicProjection struct {
 	expr *logicalplan.DynamicColumn
 }
@@ -148,13 +735,19 @@ func (p dynamicProjection) Name() string {
 	return p.expr.ColumnName
 }
 
-func (p dynamicProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+func (p dynamicProjection) String() string {
+	return p.expr.ColumnName
+}
+
+func (p dynamicProjection) Project(_ memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
 	fields := []arrow.Field{}
 	arrays := []arrow.Array{}
-	for i, field := range ar.Schema().Fields() {
+	for i := 0; i < ar.Schema().NumFields(); i++ {
+		field := ar.Schema().Field(i)
 		if p.expr.MatchColumn(field.Name) {
 			fields = append(fields, field)
 			arrays = append(arrays, ar.Column(i))
+			ar.Column(i).Retain() // Retain the column since we're keeping it.
 		}
 	}
 
@@ -163,29 +756,107 @@ func (p dynamicProjection) Project(mem memory.Allocator, ar arrow.Record) ([]arr
 
 func projectionFromExpr(expr logicalplan.Expr) (columnProjection, error) {
 	switch e := expr.(type) {
+	case *logicalplan.AllExpr:
+		return allProjection{}, nil
 	case *logicalplan.Column:
 		return plainProjection{
 			expr: e,
 		}, nil
+	case *logicalplan.ConvertExpr:
+		p, err := projectionFromExpr(e.Expr)
+		if err != nil {
+			return nil, fmt.Errorf("projection to convert: %w", err)
+		}
+
+		return convertProjection{
+			p:    p,
+			expr: e,
+		}, nil
+	case *logicalplan.AggregationFunction:
+		return plainProjection{
+			expr: logicalplan.Col(e.Name()),
+		}, nil
 	case *logicalplan.DynamicColumn:
 		return dynamicProjection{
 			expr: e,
 		}, nil
+	case *logicalplan.LiteralExpr:
+		return literalProjection{
+			value: e.Value,
+		}, nil
 	case *logicalplan.AliasExpr:
+		p, err := projectionFromExpr(e.Expr)
+		if err != nil {
+			return nil, fmt.Errorf("projection to convert: %w", err)
+		}
+
 		return aliasProjection{
+			p:    p,
 			expr: e,
-			name: e.Name(),
+			name: e.Alias,
 		}, nil
 	case *logicalplan.BinaryExpr:
-		boolExpr, err := binaryBooleanExpr(e)
+		switch e.Op {
+		case logicalplan.OpEq, logicalplan.OpNotEq, logicalplan.OpGt, logicalplan.OpGtEq, logicalplan.OpLt, logicalplan.OpLtEq, logicalplan.OpRegexMatch, logicalplan.OpRegexNotMatch, logicalplan.OpAnd, logicalplan.OpOr:
+			boolExpr, err := binaryBooleanExpr(e)
+			if err != nil {
+				return nil, fmt.Errorf("boolean projection from expr: %w", err)
+			}
+			return boolExprProjection{
+				boolExpr: boolExpr,
+			}, nil
+		case logicalplan.OpAdd, logicalplan.OpSub, logicalplan.OpMul, logicalplan.OpDiv:
+			left, err := projectionFromExpr(e.Left)
+			if err != nil {
+				return nil, fmt.Errorf("left projection for arithmetic projection: %w", err)
+			}
+
+			right, err := projectionFromExpr(e.Right)
+			if err != nil {
+				return nil, fmt.Errorf("right projection for arithmetic projection: %w", err)
+			}
+
+			return binaryExprProjection{
+				expr: e,
+
+				left:  left,
+				right: right,
+			}, nil
+		default:
+			return nil, fmt.Errorf("unknown binary expression: %s", e.String())
+		}
+	case *logicalplan.IfExpr:
+		cond, err := projectionFromExpr(e.Cond)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("condition projection for if projection: %w", err)
+		}
+
+		then, err := projectionFromExpr(e.Then)
+		if err != nil {
+			return nil, fmt.Errorf("then projection for if projection: %w", err)
+		}
+
+		els, err := projectionFromExpr(e.Else)
+		if err != nil {
+			return nil, fmt.Errorf("else projection for if projection: %w", err)
+		}
+
+		return ifExprProjection{
+			expr: e,
+			cond: cond,
+			then: then,
+			els:  els,
+		}, nil
+	case *logicalplan.IsNullExpr:
+		p, err := projectionFromExpr(e.Expr)
+		if err != nil {
+			return nil, fmt.Errorf("projection for is null projection: %w", err)
 		}
-		return binaryExprProjection{
-			boolExpr: boolExpr,
+
+		return isNullProjection{
+			expr: e,
+			p:    p,
 		}, nil
-	case *logicalplan.AverageExpr:
-		return &averageProjection{expr: e}, nil
 	default:
 		return nil, fmt.Errorf("unsupported expression type for projection: %T", expr)
 	}
@@ -218,7 +889,21 @@ func Project(mem memory.Allocator, tracer trace.Tracer, exprs []logicalplan.Expr
 	return p, nil
 }
 
+func (p *Projection) Close() {
+	p.next.Close()
+}
+
 func (p *Projection) Callback(ctx context.Context, r arrow.Record) error {
+	ar, err := p.Project(ctx, r)
+	if err != nil {
+		return err
+	}
+	defer ar.Release()
+
+	return p.next.Callback(ctx, ar)
+}
+
+func (p *Projection) Project(_ context.Context, r arrow.Record) (arrow.Record, error) {
 	// Generates high volume of spans. Comment out if needed during development.
 	// ctx, span := p.tracer.Start(ctx, "Projection/Callback")
 	// defer span.End()
@@ -229,7 +914,7 @@ func (p *Projection) Callback(ctx context.Context, r arrow.Record) error {
 	for _, proj := range p.colProjections {
 		f, a, err := proj.Project(p.pool, r)
 		if err != nil {
-			return err
+			return nil, err
 		}
 		if a == nil {
 			continue
@@ -249,7 +934,12 @@ func (p *Projection) Callback(ctx context.Context, r arrow.Record) error {
 		resArrays,
 		rows,
 	)
-	return p.next.Callback(ctx, ar)
+
+	for _, arr := range resArrays {
+		arr.Release()
+	}
+
+	return ar, nil
 }
 
 func (p *Projection) Finish(ctx context.Context) error {
@@ -268,76 +958,18 @@ func (p *Projection) Draw() *Diagram {
 
 	var columns []string
 	for _, p := range p.colProjections {
-		columns = append(columns, p.Name())
+		columns = append(columns, p.String())
 	}
-	details := fmt.Sprintf("Projection (%s)", strings.Join(columns, ","))
+	details := fmt.Sprintf("Projection (%s)", strings.Join(columns, ", "))
 	return &Diagram{Details: details, Child: child}
 }
 
-type averageProjection struct {
-	expr logicalplan.Expr
-}
-
-func (a *averageProjection) Name() string {
-	return a.expr.Name()
-}
-
-func (a *averageProjection) Project(mem memory.Allocator, r arrow.Record) ([]arrow.Field, []arrow.Array, error) {
-	columnName := a.expr.Name()
-	resultName := "avg(" + columnName + ")"
-	if avgExpr, ok := a.expr.(*logicalplan.AverageExpr); ok {
-		if ae, ok := avgExpr.Expr.(*logicalplan.AliasExpr); ok {
-			columnName = ae.Expr.Name()
-			resultName = ae.Alias
-		}
-	}
-
-	columnSum := "sum(" + columnName + ")"
-	columnCount := "count(" + columnName + ")"
-
-	schema := r.Schema()
-
-	sumIndex := schema.FieldIndices(columnSum)
-	if len(sumIndex) != 1 {
-		return nil, nil, fmt.Errorf("sum column for average projection for column %s not found", columnName)
-	}
-	countIndex := schema.FieldIndices(columnCount)
-	if len(countIndex) != 1 {
-		return nil, nil, fmt.Errorf("count column for average projection for column %s not found", columnName)
-	}
-
-	sums := r.Column(sumIndex[0])
-	counts := r.Column(countIndex[0])
-
-	fields := make([]arrow.Field, 0, len(schema.Fields())-1)
-	columns := make([]arrow.Array, 0, len(schema.Fields())-1)
-
-	// Only add the fields and columns that aren't the average's underlying sum and count columns.
-	for i, field := range schema.Fields() {
-		if i != sumIndex[0] && i != countIndex[0] {
-			fields = append(fields, field)
-			columns = append(columns, r.Column(i))
-		}
-	}
-
-	// Add the field and column for the projected average aggregation.
-	fields = append(fields, arrow.Field{
-		Name: resultName,
-		Type: &arrow.Int64Type{},
-	})
-	columns = append(columns, avgInt64arrays(mem, sums, counts))
+type allProjection struct{}
 
-	return fields, columns, nil
-}
+func (a allProjection) Name() string { return "all" }
 
-func avgInt64arrays(pool memory.Allocator, sums, counts arrow.Array) arrow.Array {
-	sumsInts := sums.(*array.Int64)
-	countsInts := counts.(*array.Int64)
-
-	res := array.NewInt64Builder(pool)
-	for i := 0; i < sumsInts.Len(); i++ {
-		res.Append(sumsInts.Value(i) / countsInts.Value(i))
-	}
+func (a allProjection) String() string { return "*" }
 
-	return res.NewArray()
+func (a allProjection) Project(_ memory.Allocator, ar arrow.Record) ([]arrow.Field, []arrow.Array, error) {
+	return ar.Schema().Fields(), ar.Columns(), nil
 }
diff --git a/query/physicalplan/regexpfilter.go b/query/physicalplan/regexpfilter.go
index 5d312bb18..8a1eb2b02 100644
--- a/query/physicalplan/regexpfilter.go
+++ b/query/physicalplan/regexpfilter.go
@@ -4,8 +4,8 @@ import (
 	"fmt"
 	"regexp"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
 )
 
 type RegExpFilter struct {
diff --git a/query/physicalplan/sampler.go b/query/physicalplan/sampler.go
new file mode 100644
index 000000000..7c742ae22
--- /dev/null
+++ b/query/physicalplan/sampler.go
@@ -0,0 +1,289 @@
+package physicalplan
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"math/rand"
+	"slices"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/util"
+
+	"github.com/polarsignals/frostdb/pqarrow/builder"
+)
+
+type ReservoirSampler struct {
+	next      PhysicalPlan
+	allocator memory.Allocator
+
+	// size is the max number of rows in the reservoir
+	size int64
+	// sizeInBytes is the total number of bytes that are held by records in the reservoir. This includes
+	// rows that are not sampled but are being held onto because of a reference to the record that is still in the reservoir.
+	sizeInBytes int64
+	// sizeLimit is the number of bytes that sizeInBytes is allowed to get to before the reservoir is materialized. This is to prevent the reservoir from growing too large.
+	sizeLimit int64
+
+	// reservoir is the set of records that have been sampled. They may vary in schema due to dynamic columns.
+	reservoir []sample
+
+	w float64 // w is the probability of keeping a record
+	n int64   // n is the number of rows that have been sampled thus far
+	i float64 // i is the current row number being sampled
+}
+
+type sample struct {
+	// i is the index of the row in the record that is being sampled. If i is -1, the entire record is being sampled.
+	i int64
+	// ref to the record that is being sampled
+	ref *referencedRecord
+}
+
+type referencedRecord struct {
+	// Record is a pointer to the record that is being sampled
+	arrow.Record
+	// size is the size of the record in bytes
+	size int64
+	// ref is the number of references to the record. When ref is 0, the record can be released.
+	ref int64
+}
+
+func (s *referencedRecord) Release() int64 {
+	defer s.Record.Release()
+	s.ref--
+	if s.ref == 0 {
+		return s.size
+	}
+
+	return 0
+}
+
+func (s *referencedRecord) Retain() int64 {
+	defer s.Record.Retain()
+	s.ref++
+	if s.ref == 1 {
+		return s.size
+	}
+	return 0
+}
+
+// NewReservoirSampler will create a new ReservoirSampler operator that will sample up to size rows of all records seen by Callback.
+func NewReservoirSampler(size, limit int64, allocator memory.Allocator) *ReservoirSampler {
+	return &ReservoirSampler{
+		size:      size,
+		sizeLimit: limit,
+		w:         math.Exp(math.Log(rand.Float64()) / float64(size)),
+		allocator: allocator,
+	}
+}
+
+func (s *ReservoirSampler) SetNext(p PhysicalPlan) {
+	s.next = p
+}
+
+func (s *ReservoirSampler) Draw() *Diagram {
+	var child *Diagram
+	if s.next != nil {
+		child = s.next.Draw()
+	}
+	details := fmt.Sprintf("Reservoir Sampler (%v)", s.size)
+	return &Diagram{Details: details, Child: child}
+}
+
+func (s *ReservoirSampler) Close() {
+	for _, r := range s.reservoir {
+		s.sizeInBytes -= r.ref.Release()
+	}
+	s.next.Close()
+}
+
+// Callback collects all the records to sample.
+func (s *ReservoirSampler) Callback(_ context.Context, r arrow.Record) error {
+	var ref *referencedRecord
+	r, ref = s.fill(r)
+	if r == nil { // The record fit in the reservoir
+		return nil
+	}
+	if s.n == s.size { // The reservoir just filled up. Slice the reservoir to the correct size so we can easily perform row replacement
+		s.sliceReservoir()
+	}
+
+	// Sample the record
+	s.sample(r, ref)
+	if s.sizeInBytes >= s.sizeLimit {
+		if err := s.materialize(s.allocator); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func refPtr(r arrow.Record) *referencedRecord {
+	return &referencedRecord{Record: r, size: util.TotalRecordSize(r)}
+}
+
+// fill will fill the reservoir with the first size records.
+func (s *ReservoirSampler) fill(r arrow.Record) (arrow.Record, *referencedRecord) {
+	if s.n >= s.size {
+		return r, refPtr(r)
+	}
+
+	if s.n+r.NumRows() <= s.size { // The record fits in the reservoir
+		smpl := sample{
+			i:   -1,
+			ref: refPtr(r),
+		}
+		s.reservoir = append(s.reservoir, smpl)
+		s.sizeInBytes += smpl.ref.Retain()
+		s.n += r.NumRows()
+		return nil, nil
+	}
+
+	// The record partially fits in the reservoir
+	ref := refPtr(r)
+	smpl := sample{
+		i:   -1,
+		ref: refPtr(r.NewSlice(0, s.size-s.n)),
+	}
+	s.reservoir = append(s.reservoir, smpl)
+	s.sizeInBytes += smpl.ref.Retain()
+	r = r.NewSlice(s.size-s.n, r.NumRows())
+	s.n = s.size
+	return r, ref
+}
+
+func (s *ReservoirSampler) sliceReservoir() {
+	newReservoir := make([]sample, 0, s.size)
+	for _, r := range s.reservoir {
+		ref := refPtr(r.ref.Record)
+		for j := int64(0); j < r.ref.NumRows(); j++ {
+			smpl := sample{
+				i:   j,
+				ref: ref,
+			}
+			newReservoir = append(newReservoir, smpl)
+			s.sizeInBytes += smpl.ref.Retain()
+		}
+		s.sizeInBytes -= r.ref.Release()
+	}
+	s.reservoir = newReservoir
+}
+
+// sample implements the reservoir sampling algorithm found https://en.wikipedia.org/wiki/Reservoir_sampling.
+func (s *ReservoirSampler) sample(r arrow.Record, ref *referencedRecord) {
+	// The size can be 0 and in that case we don't want to sample.
+	if s.size == 0 {
+		return
+	}
+	n := s.n + r.NumRows()
+	if s.i == 0 {
+		s.i = float64(s.n) - 1
+	} else if s.i < float64(n) {
+		s.replace(rand.Intn(int(s.size)), sample{i: int64(s.i) - s.n, ref: ref})
+		s.w = s.w * math.Exp(math.Log(rand.Float64())/float64(s.size))
+	}
+
+	for s.i < float64(n) {
+		s.i += math.Floor(math.Log(rand.Float64())/math.Log(1-s.w)) + 1
+		if s.i < float64(n) {
+			// replace a random item of the reservoir with row i
+			s.replace(rand.Intn(int(s.size)), sample{i: int64(s.i) - s.n, ref: ref})
+			s.w = s.w * math.Exp(math.Log(rand.Float64())/float64(s.size))
+		}
+	}
+	s.n = n
+}
+
+// Finish sends all the records in the reservoir to the next operator.
+func (s *ReservoirSampler) Finish(ctx context.Context) error {
+	// Send all the records in the reservoir to the next operator
+	for _, r := range s.reservoir {
+		if r.i == -1 {
+			if err := s.next.Callback(ctx, r.ref.Record); err != nil {
+				return err
+			}
+			continue
+		}
+
+		record := r.ref.NewSlice(r.i, r.i+1)
+		defer record.Release()
+		if err := s.next.Callback(ctx, record); err != nil {
+			return err
+		}
+	}
+
+	return s.next.Finish(ctx)
+}
+
+// replace will replace the row at index i with the row in the record r at index j.
+func (s *ReservoirSampler) replace(i int, newRow sample) {
+	s.sizeInBytes -= s.reservoir[i].ref.Release()
+	s.reservoir[i] = newRow
+	s.sizeInBytes += newRow.ref.Retain()
+}
+
+// materialize will build a new record from the reservoir to release the underlying records.
+func (s *ReservoirSampler) materialize(allocator memory.Allocator) error {
+	// Build the unified schema for the records
+	schema := s.reservoir[0].ref.Schema()
+	fields := schema.Fields()
+	added := map[string]struct{}{}
+	for i := 1; i < len(s.reservoir); i++ {
+		for j := 0; j < s.reservoir[i].ref.Schema().NumFields(); j++ {
+			newField := s.reservoir[i].ref.Schema().Field(j).Name
+			if _, ok := added[newField]; !ok && !schema.HasField(s.reservoir[i].ref.Schema().Field(j).Name) {
+				fields = append(fields, s.reservoir[i].ref.Schema().Field(j))
+				added[newField] = struct{}{}
+			}
+		}
+	}
+
+	// Sort the fields alphabetically
+	slices.SortFunc(fields, func(i, j arrow.Field) int {
+		switch {
+		case i.Name < j.Name:
+			return -1
+		case i.Name > j.Name:
+			return 1
+		default:
+			return 0
+		}
+	})
+
+	// Merge all the records slices
+	schema = arrow.NewSchema(fields, nil)
+	bldr := array.NewRecordBuilder(allocator, schema)
+	defer bldr.Release()
+
+	for _, r := range s.reservoir {
+		for i, f := range bldr.Fields() { // TODO handle disparate schemas
+			// Check if this record has this field
+			if !r.ref.Schema().HasField(schema.Field(i).Name) {
+				if err := builder.AppendValue(f, nil, -1); err != nil {
+					return err
+				}
+			} else {
+				if err := builder.AppendValue(f, r.ref.Column(i), int(r.i)); err != nil {
+					return err
+				}
+			}
+		}
+	}
+
+	// Clear the reservoir
+	for _, r := range s.reservoir {
+		s.sizeInBytes -= r.ref.Release()
+	}
+	// Set the record to be the new reservoir
+	smpl := sample{i: -1, ref: refPtr(bldr.NewRecord())}
+	s.sizeInBytes += smpl.ref.Retain()
+	smpl.ref.Record.Release() // Release this here because of the retain in the previous line.
+	s.reservoir = []sample{smpl}
+
+	// reslice the reservoir for easy row replacement
+	s.sliceReservoir()
+	return nil
+}
diff --git a/query/physicalplan/sampler_test.go b/query/physicalplan/sampler_test.go
new file mode 100644
index 000000000..ab5e47e84
--- /dev/null
+++ b/query/physicalplan/sampler_test.go
@@ -0,0 +1,335 @@
+package physicalplan
+
+import (
+	"context"
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+)
+
+type TestPlan struct {
+	finish   func(ctx context.Context) error
+	callback func(ctx context.Context, r arrow.Record) error
+}
+
+func (t *TestPlan) Callback(ctx context.Context, r arrow.Record) error {
+	if t.callback != nil {
+		return t.callback(ctx, r)
+	}
+	return nil
+}
+
+func (t *TestPlan) Finish(ctx context.Context) error {
+	if t.finish != nil {
+		return t.finish(ctx)
+	}
+	return nil
+}
+
+func (t *TestPlan) SetNext(_ PhysicalPlan) {}
+func (t *TestPlan) Draw() *Diagram         { return nil }
+func (t *TestPlan) Close()                 {}
+
+func Test_Sampler(t *testing.T) {
+	ctx := context.Background()
+	tests := map[string]struct {
+		reservoirSize int64
+		numRows       int
+		recordSize    int
+	}{
+		"basic single row records": {
+			reservoirSize: 10,
+			numRows:       100,
+			recordSize:    1,
+		},
+		"basic multi row records": {
+			reservoirSize: 10,
+			numRows:       100,
+			recordSize:    10,
+		},
+	}
+
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			allocator := memory.NewCheckedAllocator(memory.NewGoAllocator())
+			t.Cleanup(func() {
+				allocator.AssertSize(t, 0)
+			})
+
+			// Create a new sampler
+			s := NewReservoirSampler(test.reservoirSize, 10_000, allocator)
+			called := false
+			total := int64(0)
+			s.SetNext(&TestPlan{
+				callback: func(_ context.Context, r arrow.Record) error {
+					called = true
+					total += r.NumRows()
+					return nil
+				},
+			})
+
+			schema := arrow.NewSchema([]arrow.Field{
+				{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+			}, nil)
+			bldr := array.NewRecordBuilder(allocator, schema)
+			t.Cleanup(bldr.Release)
+
+			for i := 0; i < test.numRows/test.recordSize; i++ {
+				for j := 0; j < test.recordSize; j++ {
+					bldr.Field(0).(*array.Int64Builder).Append(int64((i * test.recordSize) + j))
+				}
+				r := bldr.NewRecord()
+				t.Cleanup(r.Release)
+				require.NoError(t, s.Callback(ctx, r))
+			}
+
+			require.NoError(t, s.Finish(ctx))
+			require.True(t, called)
+			require.Equal(t, test.reservoirSize, total)
+			s.Close()
+			require.Zero(t, s.sizeInBytes)
+		})
+	}
+}
+
+// Test_Sampler_Randomness tests the randomness of the sampler by checking the distribution of the samples.
+func Test_Sampler_Randomness(t *testing.T) {
+	ctx := context.Background()
+	reservoirSize := int64(10)
+	numRows := int64(100)
+	recordSize := int64(1)
+	iterations := int64(10_000)
+	bins := make([]int64, numRows)
+
+	// Create a new sampler
+	for i := int64(0); i < iterations; i++ {
+		allocator := memory.NewCheckedAllocator(memory.NewGoAllocator())
+		t.Cleanup(func() {
+			allocator.AssertSize(t, 0)
+		})
+		s := NewReservoirSampler(reservoirSize, 10_000, allocator)
+		s.SetNext(&TestPlan{
+			callback: func(_ context.Context, r arrow.Record) error {
+				for _, v := range r.Column(0).(*array.Int64).Int64Values() {
+					bins[v]++
+				}
+				return nil
+			},
+		})
+
+		schema := arrow.NewSchema([]arrow.Field{
+			{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+		}, nil)
+		bldr := array.NewRecordBuilder(allocator, schema)
+		t.Cleanup(bldr.Release)
+
+		for i := int64(0); i < numRows/recordSize; i++ {
+			for j := int64(0); j < recordSize; j++ {
+				bldr.Field(0).(*array.Int64Builder).Append(int64((i * recordSize) + j))
+			}
+			r := bldr.NewRecord()
+			t.Cleanup(r.Release)
+			require.NoError(t, s.Callback(ctx, r))
+		}
+
+		require.NoError(t, s.Finish(ctx))
+		s.Close()
+		require.Zero(t, s.sizeInBytes)
+	}
+
+	// Any given number has a reservoirSize/numRows or for current settings a 10/100 chance of being selected
+	// 10/100 * 10000 iterations = 1000. So we expect each number to be selected roughly 1000 times.
+	// Using a tolerance of 10% we expect each number to be selected between 900 and 1100 times.
+	tolerance := 0.10
+	expectation := float64(reservoirSize) / float64(numRows) * float64(iterations)
+	lowerBound := expectation - expectation*tolerance
+	upperBound := expectation + expectation*tolerance
+	for _, count := range bins {
+		require.GreaterOrEqual(t, float64(count), lowerBound)
+		require.LessOrEqual(t, float64(count), upperBound)
+	}
+}
+
+func Benchmark_Sampler(b *testing.B) {
+	ctx := context.Background()
+	tests := map[string]struct {
+		reservoirSize int64
+		numRows       int
+		recordSize    int
+	}{
+		"10%_10_000_x10": {
+			reservoirSize: 1000,
+			numRows:       10_000,
+			recordSize:    10,
+		},
+	}
+
+	for name, test := range tests {
+		b.Run(name, func(b *testing.B) {
+			schema := arrow.NewSchema([]arrow.Field{
+				{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+			}, nil)
+			bldr := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
+
+			recordCount := test.numRows / test.recordSize
+			records := make([]arrow.Record, 0, recordCount)
+			b.Cleanup(func() {
+				for _, r := range records {
+					r.Release()
+				}
+			})
+			for i := 0; i < recordCount; i++ {
+				for j := 0; j < test.recordSize; j++ {
+					bldr.Field(0).(*array.Int64Builder).Append(int64((i * test.recordSize) + j))
+				}
+				records = append(records, bldr.NewRecord())
+			}
+			b.ResetTimer()
+
+			for i := 0; i < b.N; i++ {
+				// Create a new sampler
+				s := NewReservoirSampler(test.reservoirSize, 10_000, memory.NewGoAllocator())
+				total := int64(0)
+				s.SetNext(&TestPlan{
+					callback: func(_ context.Context, r arrow.Record) error {
+						total += r.NumRows()
+						return nil
+					},
+				})
+
+				for _, r := range records {
+					require.NoError(b, s.Callback(ctx, r))
+				}
+				require.NoError(b, s.Finish(ctx))
+				require.Equal(b, test.reservoirSize, total)
+			}
+		})
+	}
+}
+
+func Test_Sampler_Materialize(t *testing.T) {
+	ctx := context.Background()
+	allocator := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	t.Cleanup(func() {
+		allocator.AssertSize(t, 0)
+	})
+	s := NewReservoirSampler(10, 10_000, allocator)
+	s.SetNext(&TestPlan{})
+
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+	}, nil)
+	bldr := array.NewRecordBuilder(allocator, schema)
+	t.Cleanup(bldr.Release)
+
+	for i := 0; i < 10; i++ {
+		for j := 0; j < 10; j++ {
+			bldr.Field(0).(*array.Int64Builder).Append(int64((i * 10) + j))
+		}
+		r := bldr.NewRecord()
+		t.Cleanup(r.Release)
+		require.NoError(t, s.Callback(ctx, r))
+	}
+
+	// Create a new schema for records
+	schema = arrow.NewSchema([]arrow.Field{
+		{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+		{Name: "b", Type: arrow.PrimitiveTypes.Int64},
+	}, nil)
+	bldr = array.NewRecordBuilder(allocator, schema)
+	t.Cleanup(bldr.Release)
+
+	for i := 0; i < 10; i++ {
+		for j := 0; j < 10; j++ {
+			bldr.Field(0).(*array.Int64Builder).Append(int64((i * 10) + j))
+			bldr.Field(1).(*array.Int64Builder).Append(int64((i * 10) + j))
+		}
+		r := bldr.NewRecord()
+		t.Cleanup(r.Release)
+		require.NoError(t, s.Callback(ctx, r))
+	}
+
+	require.NoError(t, s.materialize(allocator))
+	s.Close()
+	require.Zero(t, s.sizeInBytes)
+}
+
+type AccountingAllocator struct {
+	allocator *memory.CheckedAllocator
+	maxUsed   int
+}
+
+func (a *AccountingAllocator) Allocate(size int) []byte {
+	b := a.allocator.Allocate(size)
+	if current := a.allocator.CurrentAlloc(); current > a.maxUsed {
+		a.maxUsed = current
+	}
+	return b
+}
+
+func (a *AccountingAllocator) Reallocate(size int, data []byte) []byte {
+	b := a.allocator.Reallocate(size, data)
+	if current := a.allocator.CurrentAlloc(); current > a.maxUsed {
+		a.maxUsed = current
+	}
+	return b
+}
+
+func (a *AccountingAllocator) Free(data []byte) {
+	a.allocator.Free(data)
+	if current := a.allocator.CurrentAlloc(); current > a.maxUsed {
+		a.maxUsed = current
+	}
+}
+
+func Test_Sampler_MaxSizeAllocation(t *testing.T) {
+	ctx := context.Background()
+	allocator := &AccountingAllocator{
+		allocator: memory.NewCheckedAllocator(memory.NewGoAllocator()),
+	}
+	t.Cleanup(func() {
+		require.LessOrEqual(t, allocator.maxUsed, 1024) // Expect the most we allocated was 1024 bytes during materialization
+		allocator.allocator.AssertSize(t, 0)
+	})
+	s := NewReservoirSampler(10, 200, allocator)
+	s.SetNext(&TestPlan{})
+
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+	}, nil)
+	bldr := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
+	t.Cleanup(bldr.Release)
+
+	for i := 0; i < 10; i++ {
+		for j := 0; j < 10; j++ {
+			bldr.Field(0).(*array.Int64Builder).Append(int64((i * 10) + j))
+		}
+		r := bldr.NewRecord()
+		t.Cleanup(r.Release)
+		require.NoError(t, s.Callback(ctx, r))
+	}
+
+	// Create a new schema for records
+	schema = arrow.NewSchema([]arrow.Field{
+		{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+		{Name: "b", Type: arrow.PrimitiveTypes.Int64},
+	}, nil)
+	bldr = array.NewRecordBuilder(memory.NewGoAllocator(), schema)
+	t.Cleanup(bldr.Release)
+
+	for i := 0; i < 10; i++ {
+		for j := 0; j < 10; j++ {
+			bldr.Field(0).(*array.Int64Builder).Append(int64((i * 10) + j))
+			bldr.Field(1).(*array.Int64Builder).Append(int64((i * 10) + j))
+		}
+		r := bldr.NewRecord()
+		t.Cleanup(r.Release)
+		require.NoError(t, s.Callback(ctx, r))
+	}
+
+	s.Close()
+	require.Zero(t, s.sizeInBytes)
+}
diff --git a/query/physicalplan/synchronize.go b/query/physicalplan/synchronize.go
index 7234e03c2..11194ece8 100644
--- a/query/physicalplan/synchronize.go
+++ b/query/physicalplan/synchronize.go
@@ -6,7 +6,7 @@ import (
 	"sync"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v10/arrow"
+	"github.com/apache/arrow-go/v18/arrow"
 )
 
 // Synchronizer is used to combine the results of multiple parallel streams
@@ -17,12 +17,15 @@ type Synchronizer struct {
 	next    PhysicalPlan
 	nextMtx sync.Mutex
 	running *atomic.Int64
+	open    *atomic.Int64
 }
 
 func Synchronize(concurrency int) *Synchronizer {
 	running := &atomic.Int64{}
 	running.Add(int64(concurrency))
-	return &Synchronizer{running: running}
+	open := &atomic.Int64{}
+	open.Add(int64(concurrency))
+	return &Synchronizer{running: running, open: open}
 }
 
 func (m *Synchronizer) Callback(ctx context.Context, r arrow.Record) error {
@@ -60,3 +63,14 @@ func (m *Synchronizer) SetNextPlan(nextPlan PhysicalPlan) {
 func (m *Synchronizer) Draw() *Diagram {
 	return &Diagram{Details: "Synchronizer", Child: m.next.Draw()}
 }
+
+func (m *Synchronizer) Close() {
+	open := m.open.Add(-1)
+	if open < 0 {
+		panic("too many Synchronizer Close calls")
+	}
+	if open > 0 {
+		return
+	}
+	m.next.Close()
+}
diff --git a/query/physicalplan/synchronize_test.go b/query/physicalplan/synchronize_test.go
index 61324fbf4..af22f5949 100644
--- a/query/physicalplan/synchronize_test.go
+++ b/query/physicalplan/synchronize_test.go
@@ -5,9 +5,9 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
 )
 
@@ -32,7 +32,7 @@ func TestSynchronize(t *testing.T) {
 	err := op.Execute(
 		context.Background(),
 		memory.NewGoAllocator(),
-		func(ctx context.Context, r arrow.Record) error {
+		func(_ context.Context, _ arrow.Record) error {
 			calls++
 			return nil
 		},
diff --git a/query/testing.go b/query/testing.go
new file mode 100644
index 000000000..b8312bf9c
--- /dev/null
+++ b/query/testing.go
@@ -0,0 +1,69 @@
+package query
+
+import (
+	"context"
+	"errors"
+	"fmt"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+type FakeTableReader struct {
+	Records       []arrow.Record
+	FrostdbSchema *dynparquet.Schema
+}
+
+func (r *FakeTableReader) View(ctx context.Context, fn func(ctx context.Context, tx uint64) error) error {
+	return fn(ctx, 0)
+}
+
+func (r *FakeTableReader) Iterator(
+	ctx context.Context,
+	_ uint64,
+	_ memory.Allocator,
+	callbacks []logicalplan.Callback,
+	_ ...logicalplan.Option,
+) error {
+	if len(callbacks) == 0 {
+		return errors.New("no callbacks provided")
+	}
+
+	for i, r := range r.Records {
+		cb := callbacks[i%len(callbacks)]
+		if err := cb(ctx, r); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (r *FakeTableReader) SchemaIterator(
+	_ context.Context,
+	_ uint64,
+	_ memory.Allocator,
+	_ []logicalplan.Callback,
+	_ ...logicalplan.Option,
+) error {
+	return errors.New("not implemented")
+}
+
+func (r *FakeTableReader) Schema() *dynparquet.Schema {
+	return r.FrostdbSchema
+}
+
+type FakeTableProvider struct {
+	Tables map[string]logicalplan.TableReader
+}
+
+func (f *FakeTableProvider) GetTable(name string) (logicalplan.TableReader, error) {
+	if t, ok := f.Tables[name]; ok {
+		return t, nil
+	}
+
+	return nil, fmt.Errorf("table %s not found", name)
+}
diff --git a/recovery/recovery.go b/recovery/recovery.go
new file mode 100644
index 000000000..43773e8b8
--- /dev/null
+++ b/recovery/recovery.go
@@ -0,0 +1,30 @@
+package recovery
+
+import (
+	"fmt"
+	"runtime/debug"
+
+	"github.com/go-kit/log"
+	"github.com/go-kit/log/level"
+)
+
+// Do is a function wrapper that will recover from a panic and return the error.
+// Optionally it takes a logger to log the stack trace. Note that it only logs to a single logger.
+func Do(f func() error, logger ...log.Logger) func() error {
+	return func() (err error) {
+		defer func() {
+			if r := recover(); r != nil {
+				switch e := r.(type) {
+				case error:
+					err = e
+				case string:
+					err = fmt.Errorf("%v", e)
+				}
+				if len(logger) > 0 {
+					level.Error(logger[0]).Log("msg", "recovered from panic", "err", err, "stacktrace", string(debug.Stack()))
+				}
+			}
+		}()
+		return f()
+	}
+}
diff --git a/renovate.json b/renovate.json
new file mode 100644
index 000000000..0af175266
--- /dev/null
+++ b/renovate.json
@@ -0,0 +1,53 @@
+{
+  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
+  "schedule": [
+    "on monday and wednesday"
+  ],
+  "updateNotScheduled": false,
+  "packageRules": [
+    {
+      "description": "One week stability period for Buf packages",
+      "matchFileNames": ["buf.gen.yaml"],
+      "minimumReleaseAge": "7 days"
+    },
+    {
+      "description": "Group buf packages",
+      "matchPackageNames": ["bufbuild/buf", "bufbuild/buf-setup-action"],
+      "minimumReleaseAge": "7 days",
+      "groupName": "buf"
+    },
+    {
+      "description": "Group grpc-gateway packages",
+      "matchSourceUrls": ["https://github.com/grpc-ecosystem/grpc-gateway"],
+      "groupName": "grpc-gateway"
+    },
+    {
+      "description": "Group grpc-health-probe packages",
+      "matchSourceUrls": [
+        "https://github.com/grpc-ecosystem/grpc-health-probe"
+      ],
+      "groupName": "grpc-health-probe"
+    },
+    {
+      "description": "Group protobuf-go packages",
+      "matchSourceUrls": ["https://github.com/protocolbuffers/protobuf-go"],
+      "groupName": "protobuf-go"
+    },
+    {
+      "description": "Don't upgrade avro",
+      "matchSourceUrls": ["https://github.com/hamba/avro/v2"],
+      "matchCurrentVersion": "<=2.19.0",
+      "groupName": "avro"
+    }
+  ],
+  "customManagers": [
+    {
+      "customType": "regex",
+      "description": "Update Buf plugins",
+      "fileMatch": ["(^|/)buf\\.gen\\.yaml$"],
+      "matchStrings": [
+        "# renovate: datasource=(?<datasource>.+?) depName=(?<depName>.+?)(?: (?:packageName)=(?<packageName>.+?))?(?: versioning=(?<versioning>.+?))?\\s*-?\\s*plugin: ('|\")?.*:(?<currentValue>.+?)('|\")?\\s"
+      ]
+    }
+  ]
+}
diff --git a/samples/example.go b/samples/example.go
new file mode 100644
index 000000000..cbe91ef6a
--- /dev/null
+++ b/samples/example.go
@@ -0,0 +1,383 @@
+package samples
+
+import (
+	"sort"
+	"testing"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/google/uuid"
+	"github.com/parquet-go/parquet-go"
+	"google.golang.org/protobuf/proto"
+
+	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	schemav2pb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha2"
+	"github.com/polarsignals/frostdb/internal/records"
+)
+
+type Sample struct {
+	ExampleType string            `frostdb:"example_type,rle_dict,asc(0)"`
+	Labels      map[string]string `frostdb:"labels,rle_dict,asc(1),null_first"`
+	Stacktrace  []uuid.UUID       `frostdb:"stacktrace,rle_dict,asc(3),null_first"`
+	Timestamp   int64             `frostdb:"timestamp,asc(2)"`
+	Value       int64             `frostdb:"value"`
+}
+
+type Samples []Sample
+
+func (s Samples) ToRecord() (arrow.Record, error) {
+	b := records.NewBuild[Sample](memory.NewGoAllocator())
+	defer b.Release()
+	err := b.Append(s...)
+	if err != nil {
+		return nil, err
+	}
+	return b.NewRecord(), nil
+}
+
+func (s Samples) SampleLabelNames() []string {
+	names := []string{}
+	seen := map[string]struct{}{}
+
+	for _, sample := range s {
+		for label := range sample.Labels {
+			if _, ok := seen[label]; !ok {
+				names = append(names, label)
+				seen[label] = struct{}{}
+			}
+		}
+	}
+	sort.Strings(names)
+
+	return names
+}
+
+func (s Sample) ToParquetRow(labelNames []string) parquet.Row {
+	// The order of these appends is important. Parquet values must be in the
+	// order of the schema and the schema orders columns by their names.
+
+	nameNumber := len(labelNames)
+	labelLen := len(s.Labels)
+	row := make([]parquet.Value, 0, nameNumber+3)
+
+	row = append(row, parquet.ValueOf(s.ExampleType).Level(0, 0, 0))
+
+	i, j := 0, 0
+	for i < nameNumber {
+		if value, ok := s.Labels[labelNames[i]]; ok {
+			row = append(row, parquet.ValueOf(value).Level(0, 1, i+1))
+			i++
+			j++
+			if j >= labelLen {
+				for ; i < nameNumber; i++ {
+					row = append(row, parquet.ValueOf(nil).Level(0, 0, i+1))
+				}
+				break
+			}
+		} else {
+			row = append(row, parquet.ValueOf(nil).Level(0, 0, i+1))
+			i++
+		}
+	}
+	row = append(row, parquet.ValueOf(ExtractLocationIDs(s.Stacktrace)).Level(0, 0, nameNumber+1))
+	row = append(row, parquet.ValueOf(s.Timestamp).Level(0, 0, nameNumber+2))
+	row = append(row, parquet.ValueOf(s.Value).Level(0, 0, nameNumber+3))
+
+	return row
+}
+
+func ExtractLocationIDs(locs []uuid.UUID) []byte {
+	b := make([]byte, len(locs)*16) // UUID are 16 bytes thus multiply by 16
+	index := 0
+	for i := len(locs) - 1; i >= 0; i-- {
+		copy(b[index:index+16], locs[i][:])
+		index += 16
+	}
+	return b
+}
+
+func PrehashedSampleDefinition() *schemapb.Schema {
+	return &schemapb.Schema{
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "example_type",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+			Dynamic: false,
+		}, {
+			Name: "labels",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Nullable: true,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+			Dynamic: true,
+			Prehash: true,
+		}, {
+			Name: "stacktrace",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+			Dynamic: false,
+			Prehash: true,
+		}, {
+			Name: "timestamp",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+			Dynamic: false,
+		}, {
+			Name: "value",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+			Dynamic: false,
+		}},
+		SortingColumns: []*schemapb.SortingColumn{{
+			Name:      "example_type",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}, {
+			Name:       "labels",
+			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
+			NullsFirst: true,
+		}, {
+			Name:      "timestamp",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}, {
+			Name:       "stacktrace",
+			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
+			NullsFirst: true,
+		}},
+	}
+}
+
+func SampleDefinition() *schemapb.Schema {
+	return &schemapb.Schema{
+		Name: "test",
+		Columns: []*schemapb.Column{{
+			Name: "example_type",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+			Dynamic: false,
+		}, {
+			Name: "labels",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Nullable: true,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+			Dynamic: true,
+		}, {
+			Name: "stacktrace",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+			Dynamic: false,
+		}, {
+			Name: "timestamp",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+			Dynamic: false,
+		}, {
+			Name: "value",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+			Dynamic: false,
+		}},
+		SortingColumns: []*schemapb.SortingColumn{{
+			Name:      "example_type",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}, {
+			Name:       "labels",
+			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
+			NullsFirst: true,
+		}, {
+			Name:      "timestamp",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}, {
+			Name:       "stacktrace",
+			Direction:  schemapb.SortingColumn_DIRECTION_ASCENDING,
+			NullsFirst: true,
+		}},
+	}
+}
+
+// Adds a float column to the SampleDefinition to be able to test
+// aggregations with float values.
+func SampleDefinitionWithFloat() *schemapb.Schema {
+	sample := SampleDefinition()
+	sample.Columns = append(sample.Columns, &schemapb.Column{
+		Name: "floatvalue",
+		StorageLayout: &schemapb.StorageLayout{
+			Type:     schemapb.StorageLayout_TYPE_DOUBLE,
+			Nullable: true,
+		},
+		Dynamic: false,
+	})
+	return sample
+}
+
+func NewTestSamples() Samples {
+	return Samples{
+		{
+			ExampleType: "cpu",
+			Labels: map[string]string{
+				"node": "test3",
+			},
+			Stacktrace: []uuid.UUID{
+				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+			},
+			Timestamp: 2,
+			Value:     5,
+		}, {
+			ExampleType: "cpu",
+			Labels: map[string]string{
+				"namespace": "default",
+				"pod":       "test1",
+			},
+			Stacktrace: []uuid.UUID{
+				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+			},
+			Timestamp: 2,
+			Value:     3,
+		}, {
+			ExampleType: "cpu",
+			Labels: map[string]string{
+				"container": "test2",
+				"namespace": "default",
+			},
+			Stacktrace: []uuid.UUID{
+				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+			},
+			Timestamp: 2,
+			Value:     3,
+		},
+	}
+}
+
+func GenerateTestSamples(n int) Samples {
+	s := Samples{}
+	for i := 0; i < n; i++ {
+		s = append(s,
+			Sample{
+				ExampleType: "cpu",
+				Labels: map[string]string{
+					"node": "test3",
+				},
+				Stacktrace: []uuid.UUID{
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+				},
+				Timestamp: int64(i),
+				Value:     int64(i),
+			})
+	}
+	return s
+}
+
+func NestedListDef(name string, layout *schemav2pb.StorageLayout) *schemav2pb.Node_Group {
+	return &schemav2pb.Node_Group{
+		Group: &schemav2pb.Group{
+			Name: name,
+			Nodes: []*schemav2pb.Node{ // NOTE that this nested group structure for a list is for backwards compatability: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
+				{
+					Type: &schemav2pb.Node_Group{
+						Group: &schemav2pb.Group{
+							Name:     "list",
+							Repeated: true,
+							Nodes: []*schemav2pb.Node{
+								{
+									Type: &schemav2pb.Node_Leaf{
+										Leaf: &schemav2pb.Leaf{
+											Name:          "element",
+											StorageLayout: layout,
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func LabelColumn(name string) *schemav2pb.Node {
+	return &schemav2pb.Node{
+		Type: &schemav2pb.Node_Group{
+			Group: &schemav2pb.Group{
+				Name: "labels",
+				Nodes: []*schemav2pb.Node{
+					{
+						Type: &schemav2pb.Node_Leaf{
+							Leaf: &schemav2pb.Leaf{
+								Name: name,
+								StorageLayout: &schemav2pb.StorageLayout{
+									Type:     schemav2pb.StorageLayout_TYPE_STRING,
+									Nullable: true,
+									Encoding: schemav2pb.StorageLayout_ENCODING_RLE_DICTIONARY,
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func NewNestedSampleSchema(t testing.TB) proto.Message {
+	t.Helper()
+	return &schemav2pb.Schema{
+		Root: &schemav2pb.Group{
+			Name: "nested",
+			Nodes: []*schemav2pb.Node{
+				{
+					Type: &schemav2pb.Node_Group{
+						Group: &schemav2pb.Group{
+							Name:  "labels",
+							Nodes: []*schemav2pb.Node{},
+						},
+					},
+				},
+				{
+					Type: NestedListDef("timestamps", &schemav2pb.StorageLayout{
+						Type:     schemav2pb.StorageLayout_TYPE_INT64,
+						Nullable: true,
+						Encoding: schemav2pb.StorageLayout_ENCODING_RLE_DICTIONARY,
+					}),
+				},
+				{
+					Type: NestedListDef("values", &schemav2pb.StorageLayout{
+						Type:     schemav2pb.StorageLayout_TYPE_INT64,
+						Nullable: true,
+						Encoding: schemav2pb.StorageLayout_ENCODING_RLE_DICTIONARY,
+					}),
+				},
+			},
+		},
+		SortingColumns: []*schemav2pb.SortingColumn{
+			{
+				Path:       "labels",
+				Direction:  schemav2pb.SortingColumn_DIRECTION_ASCENDING,
+				NullsFirst: true,
+			},
+			{
+				Path:      "timestamp",
+				Direction: schemav2pb.SortingColumn_DIRECTION_ASCENDING,
+			},
+		},
+	}
+}
diff --git a/snapshot.go b/snapshot.go
new file mode 100644
index 000000000..cc6beaa2c
--- /dev/null
+++ b/snapshot.go
@@ -0,0 +1,778 @@
+package frostdb
+
+import (
+	"bytes"
+	"context"
+	"encoding/binary"
+	"fmt"
+	"hash"
+	"hash/crc32"
+	"io"
+	"os"
+	"path/filepath"
+	"strconv"
+	"time"
+
+	"github.com/oklog/ulid/v2"
+	"google.golang.org/protobuf/proto"
+
+	"github.com/apache/arrow-go/v18/arrow/ipc"
+	"github.com/apache/arrow-go/v18/arrow/util"
+	"github.com/go-kit/log/level"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	snapshotpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/snapshot/v1alpha1"
+	tablepb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/table/v1alpha1"
+	walpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/wal/v1alpha1"
+	"github.com/polarsignals/frostdb/index"
+	"github.com/polarsignals/frostdb/parts"
+)
+
+// This file implements writing and reading database snapshots from disk.
+// The snapshot format at the time of writing is as follows:
+// 4-byte magic "FDBS"
+// <Table 1 Granule 1 Part 1>
+// <Table 2 Granule 1 Part 1>
+// <Table 2 Granule 1 Part 2>
+// <Table 2 Granule 2 Part 1>
+// ...
+// Footer/File Metadata
+// 4-byte length in bytes of footer/file metadata (little endian)
+// 4-byte version number (little endian)
+// 4-byte checksum (little endian)
+// 4-byte magic "FDBS"
+//
+// Readers should start reading a snapshot by first verifying that the magic
+// bytes are correct, followed by the version number to ensure that the snapshot
+// was encoded using a version the reader supports. A version bump could, for
+// example, add compression to the data bytes of the file.
+// Refer to minVersionSupported/maxVersionSupported for more details.
+
+const (
+	snapshotMagic = "FDBS"
+	dirPerms      = os.FileMode(0o755)
+	filePerms     = os.FileMode(0o640)
+	// When bumping the version number, please add a comment indicating the
+	// reason for the bump. Note that the version should only be bumped if the
+	// new version introduces backwards-incompatible changes. Note that protobuf
+	// changes are backwards-compatible, this version number is only necessary
+	// for the non-proto format (e.g. if compression is introduced).
+	// Version 1: Initial snapshot version with checksum and version number.
+	snapshotVersion = 1
+	// minReadVersion is bumped when deprecating older versions. For example,
+	// a reader of the new version can choose to still support reading older
+	// versions, but will bump this constant to the minimum version it claims
+	// to support.
+	minReadVersion = snapshotVersion
+)
+
+// segmentName returns a 20-byte textual representation of a snapshot file name
+// at a given txn used for lexical ordering.
+func snapshotFileName(tx uint64) string {
+	return fmt.Sprintf("%020d.fdbs", tx)
+}
+
+func getTxFromSnapshotFileName(fileName string) (uint64, error) {
+	parsedTx, err := strconv.ParseUint(fileName[:20], 10, 64)
+	if err != nil {
+		return 0, err
+	}
+	return parsedTx, nil
+}
+
+// asyncSnapshot begins a new transaction and takes a snapshot of the
+// database in a new goroutine at that txn. It returns whether a snapshot was
+// started (i.e. no other snapshot was in progress). When the snapshot
+// goroutine successfully completes a snapshot, onSuccess is called.
+func (db *DB) asyncSnapshot(ctx context.Context, onSuccess func()) {
+	db.snapshot(ctx, true, onSuccess)
+}
+
+// Snapshot performs a database snapshot and writes it to the database snapshots
+// directory, as is done by automatic snapshots.
+func (db *DB) Snapshot(ctx context.Context) error {
+	db.snapshot(ctx, false, func() {})
+	return db.reclaimDiskSpace(ctx, nil)
+}
+
+func (db *DB) snapshot(ctx context.Context, async bool, onSuccess func()) {
+	if !db.columnStore.enableWAL {
+		return
+	}
+	if !db.snapshotInProgress.CompareAndSwap(false, true) {
+		// Snapshot already in progress.
+		level.Debug(db.logger).Log(
+			"msg", "cannot start snapshot; snapshot already in progress",
+		)
+		return
+	}
+
+	tx, _, commit := db.begin()
+	level.Debug(db.logger).Log(
+		"msg", "starting a new snapshot",
+		"tx", tx,
+	)
+	doSnapshot := func(writeSnapshot func(context.Context, io.Writer) error) {
+		db.Wait(tx - 1) // Wait for all transactions to complete before taking a snapshot.
+		start := time.Now()
+		defer db.snapshotInProgress.Store(false)
+		defer commit()
+		if db.columnStore.enableWAL {
+			// Appending a snapshot record to the WAL is necessary,
+			// since the WAL expects a 1:1 relationship between txn ids
+			// and record indexes. This is done before the actual snapshot so
+			// that a failure to snapshot still appends a record to the WAL,
+			// avoiding a WAL deadlock.
+			if err := db.wal.Log(
+				tx,
+				&walpb.Record{
+					Entry: &walpb.Entry{
+						EntryType: &walpb.Entry_Snapshot_{Snapshot: &walpb.Entry_Snapshot{Tx: tx}},
+					},
+				},
+			); err != nil {
+				level.Error(db.logger).Log(
+					"msg", "failed to append snapshot record to WAL", "err", err,
+				)
+				return
+			}
+		}
+		if err := db.snapshotAtTX(ctx, tx, writeSnapshot); err != nil {
+			level.Error(db.logger).Log(
+				"msg", "failed to snapshot database", "err", err,
+			)
+			return
+		}
+		level.Debug(db.logger).Log(
+			"msg", "snapshot complete",
+			"tx", tx,
+			"duration", time.Since(start),
+		)
+		onSuccess()
+	}
+
+	if async {
+		go doSnapshot(db.snapshotWriter(tx))
+	} else {
+		doSnapshot(db.offlineSnapshotWriter(tx))
+	}
+}
+
+// snapshotAtTX takes a snapshot of the state of the database at transaction tx.
+func (db *DB) snapshotAtTX(ctx context.Context, tx uint64, writeSnapshot func(context.Context, io.Writer) error) error {
+	var fileSize int64
+	start := time.Now()
+	if err := func() error {
+		snapshotsDir := SnapshotDir(db, tx)
+		fileName := filepath.Join(snapshotsDir, snapshotFileName(tx))
+		_, err := os.Stat(fileName)
+		if err == nil { // Snapshot file already exists
+			if db.validateSnapshotTxn(ctx, tx) == nil {
+				return nil // valid snapshot already exists at tx no need to re-snapshot
+			}
+
+			// Snapshot exists but is invalid. Remove it.
+			if err := os.RemoveAll(SnapshotDir(db, tx)); err != nil {
+				return fmt.Errorf("failed to remove invalid snapshot %v: %w", tx, err)
+			}
+		}
+		if err := os.MkdirAll(snapshotsDir, dirPerms); err != nil {
+			return err
+		}
+
+		f, err := os.OpenFile(fileName, os.O_CREATE|os.O_RDWR|os.O_TRUNC, filePerms)
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+
+		if err := func() error {
+			if err := writeSnapshot(ctx, f); err != nil {
+				return err
+			}
+			if err := f.Sync(); err != nil {
+				return err
+			}
+			info, err := f.Stat()
+			if err != nil {
+				return err
+			}
+			fileSize = info.Size()
+			return nil
+		}(); err != nil {
+			err = fmt.Errorf("failed to write snapshot for tx %d: %w", tx, err)
+			if removeErr := os.RemoveAll(snapshotsDir); removeErr != nil {
+				err = fmt.Errorf("%w: failed to remove snapshot directory: %v", err, removeErr)
+			}
+			return err
+		}
+		return nil
+	}(); err != nil {
+		db.metrics.snapshotsTotal.WithLabelValues("false").Inc()
+		return err
+	}
+	db.metrics.snapshotsTotal.WithLabelValues("true").Inc()
+	if fileSize > 0 {
+		db.metrics.snapshotFileSizeBytes.Set(float64(fileSize))
+	}
+	db.metrics.snapshotDurationHistogram.Observe(time.Since(start).Seconds())
+	// TODO(asubiotto): If snapshot file sizes become too large, investigate
+	// adding compression.
+	return nil
+}
+
+// loadLatestSnapshot loads the latest snapshot (i.e. the snapshot with the
+// highest txn) from the snapshots dir into the database.
+func (db *DB) loadLatestSnapshot(ctx context.Context) (uint64, error) {
+	return db.loadLatestSnapshotFromDir(ctx, db.snapshotsDir())
+}
+
+func (db *DB) loadLatestSnapshotFromDir(ctx context.Context, dir string) (uint64, error) {
+	var (
+		lastErr   error
+		loadedTxn uint64
+	)
+	// No error should be returned from snapshotsDo.
+	_ = db.snapshotsDo(ctx, dir, func(parsedTx uint64, entry os.DirEntry) (bool, error) {
+		if err := func() error {
+			f, err := os.Open(filepath.Join(dir, entry.Name(), snapshotFileName(parsedTx)))
+			if err != nil {
+				return err
+			}
+			defer f.Close()
+			info, err := f.Stat()
+			if err != nil {
+				return err
+			}
+			watermark, err := LoadSnapshot(ctx, db, parsedTx, f, info.Size(), filepath.Join(dir, entry.Name()), false)
+			if err != nil {
+				return err
+			}
+			// Success.
+			loadedTxn = watermark
+			return nil
+		}(); err != nil {
+			err = fmt.Errorf("unable to read snapshot file %s: %w", entry.Name(), err)
+			level.Debug(db.logger).Log(
+				"msg", "error reading snapshot",
+				"error", err,
+			)
+			lastErr = err
+			return true, nil
+		}
+		return false, nil
+	})
+	if loadedTxn != 0 {
+		// Successfully loaded a snapshot.
+		return loadedTxn, nil
+	}
+
+	errString := "no valid snapshots found"
+	if lastErr != nil {
+		return 0, fmt.Errorf("%s: lastErr: %w", errString, lastErr)
+	}
+	return 0, fmt.Errorf("%s", errString)
+}
+
+func LoadSnapshot(ctx context.Context, db *DB, tx uint64, r io.ReaderAt, size int64, dir string, truncateWAL bool) (uint64, error) {
+	if err := loadSnapshot(ctx, db, r, size, dir); err != nil {
+		return 0, err
+	}
+	watermark := tx
+	var wal WAL
+	if truncateWAL {
+		wal = db.wal
+	}
+	db.resetToTxn(watermark, wal)
+	return watermark, nil
+}
+
+func (db *DB) validateSnapshotTxn(ctx context.Context, tx uint64) error {
+	dir := db.snapshotsDir()
+
+	return db.snapshotsDo(ctx, dir, func(parsedTx uint64, entry os.DirEntry) (bool, error) {
+		if parsedTx != tx { // We're only trying to validate a single tx
+			return true, nil
+		}
+
+		return false, func() error {
+			f, err := os.Open(filepath.Join(dir, entry.Name(), snapshotFileName(parsedTx)))
+			if err != nil {
+				return err
+			}
+			defer f.Close()
+			info, err := f.Stat()
+			if err != nil {
+				return err
+			}
+			// readFooter validates the checksum.
+			if _, err := readFooter(f, info.Size()); err != nil {
+				return err
+			}
+			return nil
+		}()
+	})
+}
+
+func (db *DB) getLatestValidSnapshotTxn(ctx context.Context) (uint64, error) {
+	dir := db.snapshotsDir()
+	latestValidTxn := uint64(0)
+	// No error should be returned from snapshotsDo.
+	_ = db.snapshotsDo(ctx, dir, func(parsedTx uint64, entry os.DirEntry) (bool, error) {
+		if err := func() error {
+			f, err := os.Open(filepath.Join(dir, entry.Name(), snapshotFileName(parsedTx)))
+			if err != nil {
+				return err
+			}
+			defer f.Close()
+			info, err := f.Stat()
+			if err != nil {
+				return err
+			}
+			// readFooter validates the checksum.
+			if _, err := readFooter(f, info.Size()); err != nil {
+				return err
+			}
+			return nil
+		}(); err != nil {
+			level.Debug(db.logger).Log(
+				"msg", "error reading snapshot",
+				"error", err,
+			)
+			// Continue to the next snapshot.
+			return true, nil
+		}
+		// Valid snapshot found.
+		latestValidTxn = parsedTx
+		return false, nil
+	})
+	return latestValidTxn, nil
+}
+
+type offsetWriter struct {
+	w               io.Writer
+	runningChecksum hash.Hash32
+	offset          int
+}
+
+func newChecksumWriter() hash.Hash32 {
+	return crc32.New(crc32.MakeTable(crc32.Castagnoli))
+}
+
+func newOffsetWriter(w io.Writer) *offsetWriter {
+	return &offsetWriter{
+		w:               w,
+		runningChecksum: newChecksumWriter(),
+	}
+}
+
+func (w *offsetWriter) Write(p []byte) (int, error) {
+	if n, err := w.runningChecksum.Write(p); err != nil {
+		return n, fmt.Errorf("error writing checksum: %w", err)
+	}
+	n, err := w.w.Write(p)
+	w.offset += n
+	return n, err
+}
+
+func (w *offsetWriter) checksum() uint32 {
+	return w.runningChecksum.Sum32()
+}
+
+func (db *DB) snapshotWriter(tx uint64) func(context.Context, io.Writer) error {
+	return func(ctx context.Context, w io.Writer) error {
+		return WriteSnapshot(ctx, tx, db, w)
+	}
+}
+
+// offlineSnapshotWriter is used when a database is closing after all the tables have closed.
+func (db *DB) offlineSnapshotWriter(tx uint64) func(context.Context, io.Writer) error {
+	return func(ctx context.Context, w io.Writer) error {
+		return WriteSnapshot(ctx, tx, db, w)
+	}
+}
+
+func WriteSnapshot(ctx context.Context, tx uint64, db *DB, w io.Writer) error {
+	offW := newOffsetWriter(w)
+	w = offW
+	var tables []*Table
+	db.mtx.RLock()
+	for _, t := range db.tables {
+		tables = append(tables, t)
+	}
+	db.mtx.RUnlock()
+
+	if _, err := w.Write([]byte(snapshotMagic)); err != nil {
+		return err
+	}
+
+	metadata := &snapshotpb.FooterData{}
+	for _, t := range tables {
+		if err := func() error {
+			// Obtain a write block to prevent racing with
+			// compaction/persistence.
+			block, done, err := t.ActiveWriteBlock()
+			if err != nil {
+				return err
+			}
+			defer done()
+			blockUlid, err := block.ulid.MarshalBinary()
+			if err != nil {
+				return err
+			}
+
+			tableMeta := &snapshotpb.Table{
+				Name:   t.name,
+				Config: t.config.Load(),
+				ActiveBlock: &snapshotpb.Table_TableBlock{
+					Ulid:   blockUlid,
+					Size:   block.Size(),
+					MinTx:  block.minTx,
+					PrevTx: block.prevTx,
+				},
+			}
+
+			if err := block.Index().Snapshot(tx, func(p parts.Part) error {
+				granuleMeta := &snapshotpb.Granule{}
+				partMeta := &snapshotpb.Part{
+					StartOffset:     int64(offW.offset),
+					Tx:              p.TX(),
+					CompactionLevel: uint64(p.CompactionLevel()),
+				}
+				if err := ctx.Err(); err != nil {
+					return err
+				}
+
+				if record := p.Record(); record != nil {
+					partMeta.Encoding = snapshotpb.Part_ENCODING_ARROW
+				} else {
+					partMeta.Encoding = snapshotpb.Part_ENCODING_PARQUET
+				}
+
+				if err := p.Write(w); err != nil {
+					return err
+				}
+
+				partMeta.EndOffset = int64(offW.offset)
+				granuleMeta.PartMetadata = append(granuleMeta.PartMetadata, partMeta)
+				tableMeta.GranuleMetadata = append(tableMeta.GranuleMetadata, granuleMeta) // TODO: we have one part per granule now
+				return nil
+			}, snapshotIndexDir(db, tx, t.name, block.ulid.String())); err != nil {
+				return fmt.Errorf("failed to snapshot table %s index: %w", t.name, err)
+			}
+
+			metadata.TableMetadata = append(metadata.TableMetadata, tableMeta)
+			return nil
+		}(); err != nil {
+			return err
+		}
+	}
+	footer, err := metadata.MarshalVT()
+	if err != nil {
+		return err
+	}
+	// Write footer + size.
+	footer = binary.LittleEndian.AppendUint32(footer, uint32(len(footer)))
+	if _, err := w.Write(footer); err != nil {
+		return err
+	}
+	if _, err := w.Write(binary.LittleEndian.AppendUint32(nil, snapshotVersion)); err != nil {
+		return err
+	}
+	if _, err := w.Write(binary.LittleEndian.AppendUint32(nil, offW.checksum())); err != nil {
+		return err
+	}
+	if _, err := w.Write([]byte(snapshotMagic)); err != nil {
+		return err
+	}
+	return nil
+}
+
+func readFooter(r io.ReaderAt, size int64) (*snapshotpb.FooterData, error) {
+	buffer := make([]byte, 16)
+	if _, err := r.ReadAt(buffer[:4], 0); err != nil {
+		return nil, err
+	}
+	if string(buffer[:4]) != snapshotMagic {
+		return nil, fmt.Errorf("invalid snapshot magic: %q", buffer[:4])
+	}
+	if _, err := r.ReadAt(buffer, size-int64(len(buffer))); err != nil {
+		return nil, err
+	}
+	if string(buffer[12:]) != snapshotMagic {
+		return nil, fmt.Errorf("invalid snapshot magic: %q", buffer[4:])
+	}
+
+	// The checksum does not include the last 8 bytes of the file, which is the
+	// magic and the checksum. Create a section reader of all but the last 8
+	// bytes to compute the checksum and validate it against the read checksum.
+	checksum := binary.LittleEndian.Uint32(buffer[8:12])
+	checksumWriter := newChecksumWriter()
+	if _, err := io.Copy(checksumWriter, io.NewSectionReader(r, 0, size-8)); err != nil {
+		return nil, fmt.Errorf("failed to compute checksum: %w", err)
+	}
+	if checksum != checksumWriter.Sum32() {
+		return nil, fmt.Errorf(
+			"snapshot file corrupt: invalid checksum: expected %x, got %x", checksum, checksumWriter.Sum32(),
+		)
+	}
+
+	version := binary.LittleEndian.Uint32(buffer[4:8])
+	if version > snapshotVersion {
+		return nil, fmt.Errorf(
+			"cannot read snapshot with version %d: max version supported: %d", version, snapshotVersion,
+		)
+	}
+	if version < minReadVersion {
+		return nil, fmt.Errorf(
+			"cannot read snapshot with version %d: min version supported: %d", version, minReadVersion,
+		)
+	}
+
+	footerSize := binary.LittleEndian.Uint32(buffer[:4])
+	footerBytes := make([]byte, footerSize)
+	if _, err := r.ReadAt(footerBytes, size-(int64(len(buffer))+int64(footerSize))); err != nil {
+		return nil, err
+	}
+	footer := &snapshotpb.FooterData{}
+	if err := footer.UnmarshalVT(footerBytes); err != nil {
+		return nil, fmt.Errorf("could not unmarshal footer: %v", err)
+	}
+	return footer, nil
+}
+
+// loadSnapshot loads a snapshot from the given io.ReaderAt and returns the
+// txnMetadata (if any) the snapshot was created with and an error if any
+// occurred.
+func loadSnapshot(ctx context.Context, db *DB, r io.ReaderAt, size int64, dir string) error {
+	footer, err := readFooter(r, size)
+	if err != nil {
+		return err
+	}
+
+	for i, tableMeta := range footer.TableMetadata {
+		if err := func() error {
+			var schemaMsg proto.Message
+			switch v := tableMeta.Config.Schema.(type) {
+			case *tablepb.TableConfig_DeprecatedSchema:
+				schemaMsg = v.DeprecatedSchema
+			case *tablepb.TableConfig_SchemaV2:
+				schemaMsg = v.SchemaV2
+			default:
+				return fmt.Errorf("unhandled schema type: %T", v)
+			}
+
+			options := []TableOption{
+				WithRowGroupSize(int(tableMeta.Config.RowGroupSize)),
+				WithBlockReaderLimit(int(tableMeta.Config.BlockReaderLimit)),
+			}
+			if tableMeta.Config.DisableWal {
+				options = append(options, WithoutWAL())
+			}
+			tableConfig := NewTableConfig(
+				schemaMsg,
+				options...,
+			)
+
+			var blockUlid ulid.ULID
+			if err := blockUlid.UnmarshalBinary(tableMeta.ActiveBlock.Ulid); err != nil {
+				return err
+			}
+
+			// Restore the table index from tx snapshot dir
+			if err := restoreIndexFilesFromSnapshot(db, tableMeta.Name, dir, blockUlid.String()); err != nil {
+				return err
+			}
+
+			table, err := db.table(tableMeta.Name, tableConfig, blockUlid)
+			if err != nil {
+				return err
+			}
+
+			table.mtx.Lock()
+			block := table.active
+			block.mtx.Lock()
+			// Store the last snapshot size so a snapshot is not triggered right
+			// after loading this snapshot.
+			block.lastSnapshotSize.Store(tableMeta.ActiveBlock.Size)
+			block.minTx = tableMeta.ActiveBlock.MinTx
+			block.prevTx = tableMeta.ActiveBlock.PrevTx
+			newIdx := block.Index()
+			block.mtx.Unlock()
+			table.mtx.Unlock()
+
+			for _, granuleMeta := range tableMeta.GranuleMetadata {
+				resultParts := make([]parts.Part, 0, len(granuleMeta.PartMetadata))
+				for _, partMeta := range granuleMeta.PartMetadata {
+					if err := ctx.Err(); err != nil {
+						return err
+					}
+					startOffset := partMeta.StartOffset
+					endOffset := partMeta.EndOffset
+					partBytes := make([]byte, endOffset-startOffset)
+					if _, err := r.ReadAt(partBytes, startOffset); err != nil {
+						return err
+					}
+					partOptions := parts.WithCompactionLevel(int(partMeta.CompactionLevel))
+					switch partMeta.Encoding {
+					case snapshotpb.Part_ENCODING_PARQUET:
+						serBuf, err := dynparquet.ReaderFromBytes(partBytes)
+						if err != nil {
+							return err
+						}
+						resultParts = append(resultParts, parts.NewParquetPart(partMeta.Tx, serBuf, partOptions))
+					case snapshotpb.Part_ENCODING_ARROW:
+						if err := func() error {
+							arrowReader, err := ipc.NewReader(bytes.NewReader(partBytes))
+							if err != nil {
+								return err
+							}
+							defer arrowReader.Release()
+
+							record, err := arrowReader.Read()
+							if err != nil {
+								return err
+							}
+
+							record.Retain()
+							resultParts = append(
+								resultParts,
+								parts.NewArrowPart(partMeta.Tx, record, uint64(util.TotalRecordSize(record)), table.schema, partOptions),
+							)
+							return nil
+						}(); err != nil {
+							return err
+						}
+					default:
+						return fmt.Errorf("unknown part encoding: %s", partMeta.Encoding)
+					}
+				}
+
+				for _, part := range resultParts {
+					newIdx.InsertPart(part)
+				}
+			}
+
+			return nil
+		}(); err != nil {
+			db.mtx.Lock()
+			for _, cleanupTable := range footer.TableMetadata[:i] {
+				delete(db.tables, cleanupTable.Name)
+			}
+			db.mtx.Unlock()
+			return err
+		}
+	}
+
+	return nil
+}
+
+// cleanupSnapshotDir should be called with a tx at which the caller is certain
+// a valid snapshot exists (e.g. the tx returned from
+// getLatestValidSnapshotTxn). This method deletes all snapshots taken at any
+// other transaction.
+func (db *DB) cleanupSnapshotDir(ctx context.Context, tx uint64) error {
+	dir := db.snapshotsDir()
+	return db.snapshotsDo(ctx, dir, func(fileTx uint64, entry os.DirEntry) (bool, error) {
+		if fileTx == tx {
+			// Continue.
+			return true, nil
+		}
+		if err := os.RemoveAll(filepath.Join(dir, entry.Name())); err != nil {
+			return false, err
+		}
+		return true, nil
+	})
+}
+
+// snapshotsDo executes the given callback with the directory of each snapshot
+// in dir in reverse lexicographical order (most recent snapshot first). If
+// false or an error is returned by the callback, the iteration is aborted and
+// the error returned.
+func (db *DB) snapshotsDo(ctx context.Context, dir string, callback func(tx uint64, entry os.DirEntry) (bool, error)) error {
+	files, err := os.ReadDir(dir)
+	if err != nil {
+		return err
+	}
+	for i := len(files) - 1; i >= 0; i-- {
+		entry := files[i]
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
+		if filepath.Ext(entry.Name()) == ".fdbs" { // Legacy snapshots were stored at the top-level. Ignore these
+			continue
+		}
+		name := entry.Name()
+		if len(name) < 20 {
+			continue
+		}
+		parsedTx, err := getTxFromSnapshotFileName(name)
+		if err != nil {
+			continue
+		}
+		if ok, err := callback(parsedTx, entry); err != nil {
+			return err
+		} else if !ok {
+			return nil
+		}
+	}
+	return nil
+}
+
+func StoreSnapshot(ctx context.Context, tx uint64, db *DB, snapshot io.Reader) error {
+	return db.snapshotAtTX(ctx, tx, func(_ context.Context, w io.Writer) error {
+		_, err := io.Copy(w, snapshot)
+		return err
+	})
+}
+
+// Will restore the index files found in the given directory back to the table's index directory.
+func restoreIndexFilesFromSnapshot(db *DB, table, snapshotDir, blockID string) error {
+	// Remove the current index directory.
+	if err := os.RemoveAll(filepath.Join(db.indexDir(), table)); err != nil {
+		return fmt.Errorf("failed to remove index directory: %w", err)
+	}
+
+	snapshotIndexDir := filepath.Join(snapshotDir, "index", table, blockID)
+
+	// Restore the index files from the snapshot files.
+	return filepath.WalkDir(snapshotIndexDir, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			if os.IsNotExist(err) {
+				return nil // There is no index directory for this table.
+			}
+			return fmt.Errorf("failed to walk snapshot index directory: %w", err)
+		}
+
+		if d.IsDir() { // Level dirs expected
+			return nil
+		}
+
+		if filepath.Ext(path) != index.IndexFileExtension {
+			return nil // unknown file
+		}
+
+		// Expected file path is .../<level>/<file>
+		filename := filepath.Base(path)
+		lvl := filepath.Base(filepath.Dir(path))
+
+		if err := os.MkdirAll(filepath.Join(db.indexDir(), table, blockID, lvl), dirPerms); err != nil {
+			return err
+		}
+
+		// Hard link the file back into the index directory.
+		if err := os.Link(path, filepath.Join(db.indexDir(), table, blockID, lvl, filename)); err != nil {
+			return fmt.Errorf("hard link file: %w", err)
+		}
+
+		return nil
+	})
+}
+
+func SnapshotDir(db *DB, tx uint64) string {
+	return filepath.Join(db.snapshotsDir(), fmt.Sprintf("%020d", tx))
+}
+
+func snapshotIndexDir(db *DB, tx uint64, table, block string) string {
+	return filepath.Join(SnapshotDir(db, tx), "index", table, block)
+}
diff --git a/snapshot_test.go b/snapshot_test.go
new file mode 100644
index 000000000..f93a392fe
--- /dev/null
+++ b/snapshot_test.go
@@ -0,0 +1,407 @@
+package frostdb
+
+import (
+	"context"
+	"math"
+	"os"
+	"strconv"
+	"testing"
+	"time"
+
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/google/uuid"
+	"github.com/stretchr/testify/require"
+	"golang.org/x/sync/errgroup"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/query"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+// insertSampleRecords is the same helper function as insertSamples but it inserts arrow records instead.
+func insertSampleRecords(ctx context.Context, t *testing.T, table *Table, timestamps ...int64) uint64 {
+	t.Helper()
+	var samples dynparquet.Samples
+	samples = make([]dynparquet.Sample, 0, len(timestamps))
+	for _, ts := range timestamps {
+		samples = append(samples, dynparquet.Sample{
+			ExampleType: "ex",
+			Labels: map[string]string{
+				"label1": "value1",
+			},
+			Stacktrace: []uuid.UUID{
+				{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			},
+			Timestamp: ts,
+		})
+	}
+
+	ar, err := samples.ToRecord()
+	require.NoError(t, err)
+
+	tx, err := table.InsertRecord(ctx, ar)
+	require.NoError(t, err)
+	return tx
+}
+
+func TestSnapshot(t *testing.T) {
+	ctx := context.Background()
+	// Create a new DB with multiple tables and granules with
+	// compacted/uncompacted parts that have a mixture of arrow/parquet records.
+	t.Run("Empty", func(t *testing.T) {
+		c, err := New(
+			WithStoragePath(t.TempDir()),
+			WithWAL(),
+			WithSnapshotTriggerSize(math.MaxInt64),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, "test")
+		require.NoError(t, err)
+
+		// Complete a txn so that the snapshot is created at txn 1, snapshots at
+		// txn 0 are considered empty so ignored.
+		_, _, commit := db.begin()
+		commit()
+
+		tx := db.highWatermark.Load()
+		require.NoError(t, db.snapshotAtTX(ctx, tx, db.snapshotWriter(tx)))
+
+		txBefore := db.highWatermark.Load()
+		tx, err = db.loadLatestSnapshot(ctx)
+		require.NoError(t, err)
+		require.Equal(t, txBefore, tx)
+	})
+
+	t.Run("WithData", func(t *testing.T) {
+		c, err := New(
+			WithStoragePath(t.TempDir()),
+			WithWAL(),
+			WithSnapshotTriggerSize(math.MaxInt64),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, "test")
+		require.NoError(t, err)
+
+		config := NewTableConfig(dynparquet.SampleDefinition())
+
+		table, err := db.Table("table1", config)
+		require.NoError(t, err)
+		insertSampleRecords(ctx, t, table, 1, 2, 3)
+		require.NoError(t, table.EnsureCompaction())
+		insertSampleRecords(ctx, t, table, 4, 5, 6)
+		insertSampleRecords(ctx, t, table, 7, 8, 9)
+
+		const overrideConfigVal = 1234
+		config.RowGroupSize = overrideConfigVal
+		table, err = db.Table("table2", config)
+		require.NoError(t, err)
+		insertSampleRecords(ctx, t, table, 1, 2, 3)
+		insertSampleRecords(ctx, t, table, 4, 5, 6)
+
+		config.BlockReaderLimit = overrideConfigVal
+		_, err = db.Table("empty", config)
+		require.NoError(t, err)
+
+		highWatermark := db.highWatermark.Load()
+
+		// Insert a sample that should not be snapshot.
+		insertSampleRecords(ctx, t, table, 10)
+		require.NoError(t, db.snapshotAtTX(ctx, highWatermark, db.snapshotWriter(highWatermark)))
+
+		// Create another db and verify.
+		snapshotDB, err := c.DB(ctx, "testsnapshot")
+		require.NoError(t, err)
+
+		// Load the other db's latest snapshot.
+		tx, err := snapshotDB.loadLatestSnapshotFromDir(ctx, db.snapshotsDir())
+		require.NoError(t, err)
+		require.Equal(t, highWatermark, tx)
+		require.Equal(t, highWatermark, snapshotDB.highWatermark.Load())
+
+		require.Equal(t, len(db.tables), len(snapshotDB.tables))
+
+		snapshotEngine := query.NewEngine(memory.DefaultAllocator, snapshotDB.TableProvider())
+
+		for _, testCase := range []struct {
+			name            string
+			expMaxTimestamp int
+		}{
+			{
+				name:            "table1",
+				expMaxTimestamp: 9,
+			},
+			{
+				name:            "table2",
+				expMaxTimestamp: 6,
+			},
+			{
+				name: "empty",
+			},
+		} {
+			if testCase.expMaxTimestamp != 0 {
+				aggrMax := []*logicalplan.AggregationFunction{
+					logicalplan.Max(logicalplan.Col("timestamp")),
+				}
+				require.NoError(
+					t,
+					snapshotEngine.ScanTable(testCase.name).Aggregate(aggrMax, nil).Execute(ctx,
+						func(_ context.Context,
+							r arrow.Record,
+						) error {
+							require.Equal(
+								t, testCase.expMaxTimestamp, int(r.Column(0).(*array.Int64).Int64Values()[0]),
+							)
+							return nil
+						}),
+				)
+			}
+			// Reset sync.Maps so reflect.DeepEqual can be used below.
+			db.tables[testCase.name].schema.ResetWriters()
+			db.tables[testCase.name].schema.ResetBuffers()
+			require.Equal(t, db.tables[testCase.name].config.Load(), snapshotDB.tables[testCase.name].config.Load())
+		}
+	})
+
+	t.Run("WithConcurrentWrites", func(t *testing.T) {
+		cancelCtx, cancelWrites := context.WithCancel(ctx)
+
+		c, err := New(
+			WithStoragePath(t.TempDir()),
+			WithWAL(),
+			WithSnapshotTriggerSize(math.MaxInt64),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, "test")
+		require.NoError(t, err)
+
+		config := NewTableConfig(dynparquet.SampleDefinition())
+		const tableName = "table"
+		table, err := db.Table(tableName, config)
+		require.NoError(t, err)
+
+		highWatermarkAtStart := db.highWatermark.Load()
+		shouldStartSnapshotChan := make(chan struct{})
+		var errg errgroup.Group
+		errg.Go(func() error {
+			ts := int64(highWatermarkAtStart)
+			for cancelCtx.Err() == nil {
+				tx := insertSampleRecords(ctx, t, table, ts)
+				// This check simply ensures that the assumption that inserting
+				// timestamp n corresponds to the n+1th transaction (the +1
+				// corresponding to table creation). This assumption is required
+				// by the snapshot.
+				require.Equal(t, uint64(ts+1), tx)
+				ts++
+				if ts == 10 {
+					close(shouldStartSnapshotChan)
+				}
+			}
+			return nil
+		})
+		// Wait until some writes have happened.
+		<-shouldStartSnapshotChan
+		defer cancelWrites()
+		snapshotDB, err := c.DB(ctx, "testsnapshot")
+		require.NoError(t, err)
+		tx := db.highWatermark.Load()
+		require.NoError(t, db.snapshotAtTX(ctx, tx, db.snapshotWriter(tx)))
+		snapshotTx, err := snapshotDB.loadLatestSnapshotFromDir(ctx, db.snapshotsDir())
+		require.NoError(t, err)
+		require.NoError(
+			t,
+			query.NewEngine(
+				memory.DefaultAllocator, snapshotDB.TableProvider(),
+			).ScanTable(tableName).Aggregate(
+				[]*logicalplan.AggregationFunction{logicalplan.Max(logicalplan.Col("timestamp"))}, nil,
+			).Execute(ctx, func(_ context.Context, r arrow.Record) error {
+				require.Equal(
+					t, int(snapshotTx-highWatermarkAtStart), int(r.Column(0).(*array.Int64).Int64Values()[0]),
+				)
+				return nil
+			}),
+		)
+		cancelWrites()
+		require.NoError(t, errg.Wait())
+	})
+}
+
+// TestSnapshotWithWAL verifies that the interaction between snapshots and WAL
+// entries works as expected. In general, snapshots should occur when a table
+// block is rotated out.
+func TestSnapshotWithWAL(t *testing.T) {
+	const dbAndTableName = "test"
+	var (
+		ctx                 = context.Background()
+		dir                 = t.TempDir()
+		snapshotTx          uint64
+		firstWriteTimestamp int64
+	)
+	func() {
+		c, err := New(
+			WithWAL(),
+			WithStoragePath(dir),
+			WithSnapshotTriggerSize(1),
+		)
+		require.NoError(t, err)
+		defer c.Close()
+
+		db, err := c.DB(ctx, dbAndTableName)
+		require.NoError(t, err)
+
+		schema := dynparquet.SampleDefinition()
+		table, err := db.Table(dbAndTableName, NewTableConfig(schema))
+		require.NoError(t, err)
+
+		samples := dynparquet.NewTestSamples()
+		firstWriteTimestamp = samples[0].Timestamp
+		for i := range samples {
+			samples[i].Timestamp = firstWriteTimestamp
+		}
+		ctx := context.Background()
+
+		r, err := samples.ToRecord()
+		require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+
+		// No snapshots should have happened yet.
+		_, err = os.ReadDir(db.snapshotsDir())
+		require.ErrorIs(t, err, os.ErrNotExist)
+
+		for i := range samples {
+			samples[i].Timestamp = firstWriteTimestamp + 1
+		}
+		r, err = samples.ToRecord()
+		require.NoError(t, err)
+		// With this new insert, a snapshot should be triggered.
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+
+		require.Eventually(t, func() bool {
+			files, err := os.ReadDir(db.snapshotsDir())
+			require.NoError(t, err)
+			if len(files) != 1 {
+				return false
+			}
+			snapshotTx, err = strconv.ParseUint(files[0].Name()[:20], 10, 64)
+			require.NoError(t, err)
+			return true
+		}, 1*time.Second, 10*time.Millisecond, "expected a snapshot on disk")
+	}()
+
+	verifyC, err := New(
+		WithWAL(),
+		WithStoragePath(dir),
+		// Snapshot trigger size is not needed here, we only want to use this
+		// column store to verify correctness.
+	)
+	require.NoError(t, err)
+	defer verifyC.Close()
+
+	verifyDB, err := verifyC.DB(ctx, dbAndTableName)
+	require.NoError(t, err)
+
+	// Truncate all entries from the WAL up to but not including the second
+	// insert.
+	require.NoError(t, verifyDB.wal.Truncate(snapshotTx+1))
+
+	engine := query.NewEngine(memory.DefaultAllocator, verifyDB.TableProvider())
+	require.NoError(
+		t,
+		engine.ScanTable(dbAndTableName).
+			Aggregate(
+				[]*logicalplan.AggregationFunction{logicalplan.Min(logicalplan.Col("timestamp"))},
+				nil,
+			).Execute(ctx, func(_ context.Context, r arrow.Record) error {
+			// This check verifies that the snapshot data (i.e. the first
+			// write) is correctly loaded.
+			require.Equal(t, firstWriteTimestamp, r.Column(0).(*array.Int64).Value(0))
+			return nil
+		}),
+	)
+	require.NoError(
+		t,
+		engine.ScanTable(dbAndTableName).
+			Aggregate(
+				[]*logicalplan.AggregationFunction{logicalplan.Max(logicalplan.Col("timestamp"))},
+				nil,
+			).Execute(ctx, func(_ context.Context, r arrow.Record) error {
+			// This check verifies that the write that is only represented in
+			// WAL entries is still replayed (i.e. the second write) in the
+			// presence of a snapshot.
+			require.Equal(t, firstWriteTimestamp+1, r.Column(0).(*array.Int64).Value(0))
+			return nil
+		}),
+	)
+}
+
+func TestSnapshotIsTakenOnUncompressedInserts(t *testing.T) {
+	const dbAndTableName = "test"
+	var (
+		ctx = context.Background()
+		dir = t.TempDir()
+	)
+	const (
+		numInserts                     = 100
+		expectedUncompressedInsertSize = 2000
+	)
+	c, err := New(
+		WithWAL(),
+		WithStoragePath(dir),
+		WithSnapshotTriggerSize(expectedUncompressedInsertSize),
+	)
+	require.NoError(t, err)
+	defer c.Close()
+
+	db, err := c.DB(ctx, dbAndTableName)
+	require.NoError(t, err)
+
+	type model struct {
+		Bytes string `frostdb:",rle_dict"`
+	}
+	table, err := NewGenericTable[model](
+		db, dbAndTableName, memory.NewGoAllocator(),
+	)
+	require.NoError(t, err)
+	defer table.Release()
+
+	for i := 0; i < numInserts; i++ {
+		_, err = table.Write(ctx, model{Bytes: "test"})
+		require.NoError(t, err)
+	}
+	activeBlock := table.ActiveBlock()
+	require.True(t, activeBlock.Size() == expectedUncompressedInsertSize, "expected uncompressed insert size is wrong. The test should be updated.")
+	// This will force a compaction of all the inserts we have so far.
+	require.NoError(t, table.EnsureCompaction())
+	require.True(
+		t,
+		activeBlock.Size() < activeBlock.uncompressedInsertsSize.Load(),
+		"expected uncompressed inserts to be larger than compressed inserts. Did the compaction run?",
+	)
+	require.Zero(t, activeBlock.lastSnapshotSize.Load(), "expected no snapshots to be taken so far")
+
+	// These writes should now trigger a snapshot even though the active block
+	// size is much lower than the uncompressed insert size.
+	for i := 0; i < 2; i++ {
+		_, err = table.Write(ctx, model{Bytes: "test"})
+		require.NoError(t, err)
+	}
+
+	require.Eventually(
+		t,
+		func() bool {
+			return activeBlock.lastSnapshotSize.Load() > 0
+		},
+		1*time.Second,
+		10*time.Millisecond,
+		"expected snapshot to be taken",
+	)
+}
diff --git a/sqlparse/parser.go b/sqlparse/parser.go
index a0ecd2041..5e9af3145 100644
--- a/sqlparse/parser.go
+++ b/sqlparse/parser.go
@@ -27,7 +27,11 @@ type ParseResult struct {
 // queryEngine.ScanTable is provided as a starting point and no table needs to
 // be specified in the SQL statement. Additionally, the idea is to change to
 // creating logical plans directly (rather than through a builder).
-func (p *Parser) ExperimentalParse(builder query.Builder, dynColNames []string, sql string) (ParseResult, error) {
+func (p *Parser) ExperimentalParse(
+	builder query.Builder,
+	dynColNames []string,
+	sql string,
+) (ParseResult, error) {
 	asts, _, err := p.p.Parse(sql, "", "")
 	if err != nil {
 		return ParseResult{}, err
diff --git a/sqlparse/visitor.go b/sqlparse/visitor.go
index 583a7443b..f73d71173 100644
--- a/sqlparse/visitor.go
+++ b/sqlparse/visitor.go
@@ -5,8 +5,10 @@ import (
 	"strings"
 	"time"
 
-	"github.com/apache/arrow/go/v10/arrow/scalar"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/scalar"
 	"github.com/pingcap/tidb/parser/ast"
+	"github.com/pingcap/tidb/parser/mysql"
 	"github.com/pingcap/tidb/parser/opcode"
 	"github.com/pingcap/tidb/parser/test_driver"
 
@@ -25,7 +27,10 @@ type astVisitor struct {
 
 var _ ast.Visitor = &astVisitor{}
 
-func newASTVisitor(builder query.Builder, dynColNames []string) *astVisitor {
+func newASTVisitor(
+	builder query.Builder,
+	dynColNames []string,
+) *astVisitor {
 	dynMap := make(map[string]struct{})
 	for _, n := range dynColNames {
 		dynMap[n] = struct{}{}
@@ -51,20 +56,98 @@ func (v *astVisitor) Enter(n ast.Node) (nRes ast.Node, skipChildren bool) {
 		expr.Fields.Accept(v)
 		switch {
 		case expr.GroupBy != nil:
+			// This represents everything before the "group by" clause.
+			beforeGroupBy := v.exprStack
 			expr.GroupBy.Accept(v)
-			var agg []logicalplan.Expr
-			var groups []logicalplan.Expr
-
-			for _, expr := range v.exprStack {
-				switch expr.(type) {
-				case *logicalplan.AliasExpr, *logicalplan.AggregationFunction:
-					agg = append(agg, expr)
-				default:
-					groups = append(groups, expr)
+			// This represents everything after the "group by" clause.
+			afterGroupBy := v.exprStack[len(beforeGroupBy):]
+			groups := afterGroupBy
+
+			includedPreprojections := make(map[string]struct{})
+			preProjections := []logicalplan.Expr{}
+			postProjections := []logicalplan.Expr{}
+			aggregations := []*logicalplan.AggregationFunction{}
+
+			for _, expr := range beforeGroupBy {
+				// Walk the expression tree and separate out what projections
+				// need to be done before aggregations, and which can be done
+				// after. We don't support nested aggregations, so we can just
+				// find aggregation functions and put their expressions into
+				// the pre-projections. If we wanted to support nested
+				// aggregations, this would need to be more sophisticated.
+				aggCollector := &aggregationCollector{}
+				expr.Accept(aggCollector)
+
+				// If we have any aggregations nested in the AST like
+				// "sum(value) as value_sum_or_anything_else" then the actual
+				// query plan nesting looks something like:
+				// alias(value_sum_or_anything_else, sum(value))
+
+				if len(aggCollector.aggregations) > 0 {
+					// This is the expression that will be aggregated so we
+					// need to ensure that it is in the pre-projection.
+					for _, agg := range aggCollector.aggregations {
+						if _, ok := includedPreprojections[agg.Expr.Name()]; !ok {
+							preProjections = append(preProjections, agg.Expr)
+							// The same expression can be used in multiple
+							// aggregations, but we only need to project it
+							// once.
+							includedPreprojections[agg.Expr.Name()] = struct{}{}
+						}
+
+						aggregations = append(aggregations, agg)
+					}
+					postProjections = append(postProjections, expr)
+				} else {
+					preProjections = append(preProjections, expr)
+					if _, ok := expr.(*logicalplan.DynamicColumn); ok {
+						postProjections = append(postProjections, expr)
+					} else {
+						postProjections = append(postProjections, logicalplan.Col(expr.Name()))
+					}
 				}
 			}
-			v.builder = v.builder.Aggregate(agg, groups)
+
+			// We need to ensure that anything we group by is in the pre-projection.
+			for _, expr := range afterGroupBy {
+				found := false
+				for _, preExpr := range preProjections {
+					if expr.Name() == preExpr.Name() {
+						found = true
+						break
+					}
+				}
+				if !found {
+					preProjections = append(preProjections, expr)
+				}
+			}
+
+			// Insert a projection for any groups that need to be computed
+			// before they can be used for an aggregation, for example:
+			//
+			// SELECT sum(value), (timestamp/1000) as timestamp_bucket group by timestamp_bucket
+			v.builder = v.builder.Project(preProjections...)
+			v.builder = v.builder.Aggregate(aggregations, groups)
+
+			// Insert a projection for any groups that need to be computed
+			// before they can be used for an aggregation, for example:
+			//
+			// SELECT sum(value)/count(value) value_avg, (timestamp/1000) as timestamp_bucket group by timestamp_bucket
+			v.builder = v.builder.Project(postProjections...)
+
+			// Finally we check if the result should be limited.
+			if expr.Limit != nil {
+				expr.Limit.Count.Accept(v)
+				lastExpr, _ := pop(v.exprStack)
+				v.builder = v.builder.Limit(lastExpr)
+			}
+		case expr.Limit != nil:
+			expr.Limit.Count.Accept(v)
+			lastExpr, newExprs := pop(v.exprStack)
+			v.builder = v.builder.Project(newExprs...)
+			v.builder = v.builder.Limit(lastExpr)
 		case expr.Distinct:
+			v.builder = v.builder.Project(v.exprStack...)
 			v.builder = v.builder.Distinct(v.exprStack...)
 		default:
 			v.builder = v.builder.Project(v.exprStack...)
@@ -74,6 +157,25 @@ func (v *astVisitor) Enter(n ast.Node) (nRes ast.Node, skipChildren bool) {
 	return n, false
 }
 
+type aggregationCollector struct {
+	aggregations []*logicalplan.AggregationFunction
+}
+
+func (a *aggregationCollector) PreVisit(_ logicalplan.Expr) bool {
+	return true
+}
+
+func (a *aggregationCollector) Visit(e logicalplan.Expr) bool {
+	if agg, ok := e.(*logicalplan.AggregationFunction); ok {
+		a.aggregations = append(a.aggregations, agg)
+	}
+	return true
+}
+
+func (a *aggregationCollector) PostVisit(_ logicalplan.Expr) bool {
+	return true
+}
+
 func (v *astVisitor) Leave(n ast.Node) (nRes ast.Node, ok bool) {
 	if err := v.leaveImpl(n); err != nil {
 		v.err = err
@@ -129,6 +231,14 @@ func (v *astVisitor) leaveImpl(n ast.Node) error {
 			frostDBOp = logicalplan.OpEq
 		case opcode.NE:
 			frostDBOp = logicalplan.OpNotEq
+		case opcode.Plus:
+			frostDBOp = logicalplan.OpAdd
+		case opcode.Minus:
+			frostDBOp = logicalplan.OpSub
+		case opcode.Mul:
+			frostDBOp = logicalplan.OpMul
+		case opcode.Div:
+			frostDBOp = logicalplan.OpDiv
 		case opcode.LogicAnd:
 			v.exprStack = append(v.exprStack, logicalplan.And(leftExpr, rightExpr))
 			return nil
@@ -136,8 +246,9 @@ func (v *astVisitor) leaveImpl(n ast.Node) error {
 			v.exprStack = append(v.exprStack, logicalplan.Or(leftExpr, rightExpr))
 			return nil
 		}
+
 		v.exprStack = append(v.exprStack, &logicalplan.BinaryExpr{
-			Left:  logicalplan.Col(leftExpr.Name()),
+			Left:  leftExpr,
 			Op:    frostDBOp,
 			Right: rightExpr,
 		})
@@ -162,7 +273,16 @@ func (v *astVisitor) leaveImpl(n ast.Node) error {
 	case *ast.SelectField:
 		if as := expr.AsName.String(); as != "" {
 			lastExpr := len(v.exprStack) - 1
-			v.exprStack[lastExpr] = v.exprStack[lastExpr].(*logicalplan.AggregationFunction).Alias(as) // TODO should probably just be an alias expr and not from an aggregate function
+			switch e := v.exprStack[lastExpr].(type) {
+			case *logicalplan.AggregationFunction:
+				v.exprStack[lastExpr] = e.Alias(as)
+			case *logicalplan.BinaryExpr:
+				v.exprStack[lastExpr] = e.Alias(as)
+			case *logicalplan.Column:
+				v.exprStack[lastExpr] = e.Alias(as)
+			default:
+				return fmt.Errorf("unhandled select field %s", as)
+			}
 		}
 	case *ast.PatternRegexpExpr:
 		rightExpr, newExprs := pop(v.exprStack)
@@ -178,7 +298,25 @@ func (v *astVisitor) leaveImpl(n ast.Node) error {
 			e.Op = logicalplan.OpRegexNotMatch
 		}
 		v.exprStack = append(v.exprStack, e)
-	case *ast.FieldList, *ast.ColumnNameExpr, *ast.GroupByClause, *ast.ByItem, *ast.RowExpr,
+	case *ast.PatternLikeOrIlikeExpr:
+		// Note that we're resolving exprs as a stack, so the last two
+		// expressions are the leaf expressions.
+		rightExpr, newExprs := pop(v.exprStack)
+		leftExpr, newExprs := pop(newExprs)
+		v.exprStack = newExprs
+
+		op := logicalplan.OpContains
+		if expr.Not {
+			op = logicalplan.OpNotContains
+		}
+
+		v.exprStack = append(v.exprStack, &logicalplan.BinaryExpr{
+			Left:  logicalplan.Col(leftExpr.Name()),
+			Op:    op,
+			Right: rightExpr,
+		})
+	case *ast.GroupByClause:
+	case *ast.FieldList, *ast.ColumnNameExpr, *ast.ByItem, *ast.RowExpr,
 		*ast.ParenthesesExpr:
 		// Deliberate pass-through nodes.
 	case *ast.FuncCallExpr:
@@ -198,6 +336,17 @@ func (v *astVisitor) leaveImpl(n ast.Node) error {
 		default:
 			return fmt.Errorf("unhandled func call: %s", expr.FnName.String())
 		}
+	case *ast.FuncCastExpr:
+		var t arrow.DataType
+		switch expr.Tp.GetType() {
+		case mysql.TypeFloat:
+			t = arrow.PrimitiveTypes.Float64
+		default:
+			return fmt.Errorf("unhandled cast type: %s", expr.Tp)
+		}
+		e, newExprs := pop(v.exprStack)
+		v.exprStack = newExprs
+		v.exprStack = append(v.exprStack, logicalplan.Convert(e, t))
 	default:
 		return fmt.Errorf("unhandled ast node %T", expr)
 	}
diff --git a/storage/bucket.go b/storage/bucket.go
index c3cb845c3..7ba8ba022 100644
--- a/storage/bucket.go
+++ b/storage/bucket.go
@@ -9,7 +9,6 @@ import (
 	"context"
 	"errors"
 	"io"
-	"strings"
 
 	"github.com/thanos-io/objstore"
 )
@@ -71,98 +70,3 @@ func (b *FileReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
 
 	return total, nil
 }
-
-// PrefixedBucket is a Bucket object that has file names prefixed with a given path.
-type PrefixedBucket struct {
-	bkt    Bucket
-	prefix string
-}
-
-// NewPrefixedBucket returns a new prefixed bucket.
-func NewPrefixedBucket(bkt Bucket, prefix string) Bucket {
-	if validPrefix(prefix) {
-		return &PrefixedBucket{bkt: bkt, prefix: strings.Trim(prefix, objstore.DirDelim)}
-	}
-
-	return bkt
-}
-
-func validPrefix(prefix string) bool {
-	prefix = strings.Replace(prefix, "/", "", -1)
-	return len(prefix) > 0
-}
-
-func conditionalPrefix(prefix, name string) string {
-	if len(name) > 0 {
-		return withPrefix(prefix, name)
-	}
-
-	return name
-}
-
-func withPrefix(prefix, name string) string {
-	return prefix + objstore.DirDelim + name
-}
-
-// GetReaderAt returns a io.ReaderAt object for the given file.
-func (p *PrefixedBucket) GetReaderAt(ctx context.Context, name string) (io.ReaderAt, error) {
-	return p.bkt.GetReaderAt(ctx, conditionalPrefix(p.prefix, name))
-}
-
-// Close implements the io.Closer interface.
-func (p *PrefixedBucket) Close() error {
-	return p.bkt.Close()
-}
-
-// Iter calls f for each entry in the given directory (not recursive.). The argument to f is the full
-// object name including the prefix of the inspected directory.
-// Entries are passed to function in sorted order.
-func (p *PrefixedBucket) Iter(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error {
-	pdir := withPrefix(p.prefix, dir)
-
-	return p.bkt.Iter(ctx, pdir, func(s string) error {
-		return f(strings.TrimPrefix(s, p.prefix+objstore.DirDelim))
-	}, options...)
-}
-
-// Get returns a reader for the given object name.
-func (p *PrefixedBucket) Get(ctx context.Context, name string) (io.ReadCloser, error) {
-	return p.bkt.Get(ctx, conditionalPrefix(p.prefix, name))
-}
-
-// GetRange returns a new range reader for the given object name and range.
-func (p *PrefixedBucket) GetRange(ctx context.Context, name string, off, length int64) (io.ReadCloser, error) {
-	return p.bkt.GetRange(ctx, conditionalPrefix(p.prefix, name), off, length)
-}
-
-// Exists checks if the given object exists in the bucket.
-func (p *PrefixedBucket) Exists(ctx context.Context, name string) (bool, error) {
-	return p.bkt.Exists(ctx, conditionalPrefix(p.prefix, name))
-}
-
-// IsObjNotFoundErr returns true if error means that object is not found. Relevant to Get operations.
-func (p *PrefixedBucket) IsObjNotFoundErr(err error) bool {
-	return p.bkt.IsObjNotFoundErr(err)
-}
-
-// Attributes returns information about the specified object.
-func (p PrefixedBucket) Attributes(ctx context.Context, name string) (objstore.ObjectAttributes, error) {
-	return p.bkt.Attributes(ctx, conditionalPrefix(p.prefix, name))
-}
-
-// Upload the contents of the reader as an object into the bucket.
-// Upload should be idempotent.
-func (p *PrefixedBucket) Upload(ctx context.Context, name string, r io.Reader) error {
-	return p.bkt.Upload(ctx, conditionalPrefix(p.prefix, name), r)
-}
-
-// Delete removes the object with the given name.
-// If object does not exists in the moment of deletion, Delete should throw error.
-func (p *PrefixedBucket) Delete(ctx context.Context, name string) error {
-	return p.bkt.Delete(ctx, conditionalPrefix(p.prefix, name))
-}
-
-// Name returns the bucket name for the provider.
-func (p *PrefixedBucket) Name() string {
-	return p.bkt.Name()
-}
diff --git a/storage/iceberg.go b/storage/iceberg.go
new file mode 100644
index 000000000..99f3d53dc
--- /dev/null
+++ b/storage/iceberg.go
@@ -0,0 +1,530 @@
+package storage
+
+import (
+	"context"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/go-kit/log"
+	"github.com/go-kit/log/level"
+	"github.com/oklog/ulid"
+	"github.com/parquet-go/parquet-go"
+	"github.com/polarsignals/iceberg-go"
+	"github.com/polarsignals/iceberg-go/catalog"
+	"github.com/polarsignals/iceberg-go/table"
+	"github.com/thanos-io/objstore"
+
+	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/query/expr"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+)
+
+/*
+	Iceberg is an Apache Iceberg backed DataSink/DataSource.
+
+	The Iceberg layout is as follows:
+	<warehouse>/<database>/<table>/<table_version_number>.metadata.json   // Metadata file
+	<warehouse>/<database>/<table>/data/<ulid>.parquet			          // data files
+	<warehouse>/<database>/<table>/metadata/snap.<ulid>.avro	          // Manifest list file (snapshot)
+	<warehouse>/<database>/<table>/metadata/<ulid>.avro		              // Manifest file
+	<warehouse>/<database>/<table>/metadata/version-hint.text		      // Version hint file (if hdfs catalog used)
+
+	On Upload a new snapshot is created and the data file is added to a manifest (or a new manifest is created depending on settings).
+	This manifest is then added to the existing manifest list, and a new version of the metadata file is created.
+
+	Once the new metadata file is written the version hint file is updated with the latest version number of the table.
+	This version-hint file is used to determine the latest version of the table. (HDFS catalog only)
+
+	On Scan the latest snapshot is loaded and the manifest list is read.
+	If the manifests are partitioned; the manifests are filtered out based on the given filter against the partition data.
+	The remaining manifests are then read, and the data files are filtered out based on the given filter and the min/max columns of the data file.
+
+	Remaining data files are then read and the filter is applied to each row group in the data file.
+
+*/
+
+const (
+	DefaultOrphanedFileAge = 24 * time.Hour
+)
+
+var defaultWriterOptions = []table.WriterOption{
+	table.WithManifestSizeBytes(8 * 1024 * 1024), // 8MiB manifest size
+	table.WithMergeSchema(),
+	table.WithExpireSnapshotsOlderThan(6 * time.Hour), // 6 hours of snapshots
+	table.WithMetadataDeleteAfterCommit(),
+	table.WithMetadataPreviousVersionsMax(3), // Keep 3 previous versions of the metadata
+}
+
+// Iceberg is an Apache Iceberg backed DataSink/DataSource.
+type Iceberg struct {
+	catalog   catalog.Catalog
+	bucketURI string // bucketURI is the URI of the bucket i.e gs://<bucket-name>, s3://<bucket-name> etc.
+	bucket    objstore.Bucket
+	logger    log.Logger
+
+	// configuration options
+	partitionSpec       iceberg.PartitionSpec
+	maxDataFileAge      time.Duration
+	orphanedFileAge     time.Duration
+	maintenanceSchedule time.Duration
+
+	// mainteneance goroutine lifecycle controls
+	maintenanceCtx  context.Context
+	maintenanceDone context.CancelFunc
+	maintenanceWg   sync.WaitGroup
+}
+
+// IcebergOption is a function that configures an Iceberg DataSink/DataSource.
+type IcebergOption func(*Iceberg)
+
+// NewIceberg creates a new Iceberg DataSink/DataSource.
+// You must provide the URI of the warehouse and the objstore.Bucket that points to that warehouse.
+func NewIceberg(uri string, ctlg catalog.Catalog, bucket objstore.Bucket, options ...IcebergOption) (*Iceberg, error) {
+	berg := &Iceberg{
+		catalog:         ctlg,
+		bucketURI:       uri,
+		bucket:          catalog.NewIcebucket(uri, bucket),
+		orphanedFileAge: DefaultOrphanedFileAge,
+		logger:          log.NewNopLogger(),
+	}
+
+	for _, opt := range options {
+		opt(berg)
+	}
+
+	// Start a maintenance goroutine if a schedule is set
+	if berg.maintenanceSchedule > 0 {
+		berg.maintenanceCtx, berg.maintenanceDone = context.WithCancel(context.Background())
+		berg.maintenanceWg.Add(1)
+		go func(ctx context.Context) {
+			defer berg.maintenanceWg.Done()
+			ticker := time.NewTicker(berg.maintenanceSchedule)
+			defer ticker.Stop()
+			for {
+				select {
+				case <-ticker.C:
+					if err := berg.Maintenance(ctx); err != nil {
+						level.Error(berg.logger).Log("msg", "iceberg maintenance failure", "err", err)
+					}
+				case <-ctx.Done():
+					return
+				}
+			}
+		}(berg.maintenanceCtx)
+	}
+
+	return berg, nil
+}
+
+func (i *Iceberg) Close() error {
+	if i.maintenanceDone != nil {
+		i.maintenanceDone()
+		i.maintenanceWg.Wait()
+	}
+	return nil
+}
+
+func (i *Iceberg) Maintenance(ctx context.Context) error {
+	dbs, err := i.catalog.ListNamespaces(ctx, []string{i.bucketURI})
+	if err != nil {
+		return err
+	}
+
+	for _, db := range dbs {
+		tables, err := i.catalog.ListTables(ctx, []string{filepath.Join(append([]string{i.bucketURI}, db...)...)}) // FIXME: this is clunky
+		if err != nil {
+			return err
+		}
+
+		for _, tbl := range tables {
+			tablePath := filepath.Join(i.bucketURI, db[0], tbl[0]) // FIXME this is clunky; Iceberg should just return the fully qualified path
+			t, err := i.catalog.LoadTable(ctx, []string{tablePath}, iceberg.Properties{})
+			if err != nil {
+				return err
+			}
+
+			if i.maxDataFileAge > 0 {
+				w, err := t.SnapshotWriter(defaultWriterOptions...)
+				if err != nil {
+					return err
+				}
+
+				// Delete all data files in a table that are older than the max age
+				if err := w.DeleteDataFile(ctx, func(d iceberg.DataFile) bool {
+					id, err := ulid.Parse(strings.TrimSuffix(filepath.Base(d.FilePath()), ".parquet"))
+					if err != nil {
+						level.Error(i.logger).Log("msg", "failed to parse ulid", "err", err)
+						return false
+					}
+
+					return time.Since(ulid.Time(id.Time())) > i.maxDataFileAge
+				}); err != nil {
+					return err
+				}
+
+				if err := w.Close(ctx); err != nil {
+					return err
+				}
+
+				// Reload the table we just modified
+				t, err = i.catalog.LoadTable(ctx, []string{tablePath}, iceberg.Properties{})
+				if err != nil {
+					return err
+				}
+			}
+
+			// Delete orphaned files that are more than the max orphaned file age
+			if err := table.DeleteOrphanFiles(ctx, t, i.orphanedFileAge); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+// WithIcebergPartitionSpec sets the partition spec for the Iceberg table. This is useful for pruning manifests during scans.
+// note that at this time the Iceberg storage engine does not write data in a partition fashion. So this is only useful for setting the upper/lower bounds
+// of columns in the manifest data.
+func WithIcebergPartitionSpec(spec iceberg.PartitionSpec) IcebergOption {
+	return func(i *Iceberg) {
+		i.partitionSpec = spec
+	}
+}
+
+// WithDataFileExpiry will a maxiumum age for data files. Data files older than the max age will be deleted from the table periodically according to the maintenance schedule.
+func WithDataFileExpiry(maxAge time.Duration) IcebergOption {
+	return func(i *Iceberg) {
+		i.maxDataFileAge = maxAge
+	}
+}
+
+// WithMaintenanceSchedule sets the schedule for the maintenance of the Iceberg table.
+// This will spawn a goroutine that will periodically expire data files if WithDataFileExpiry is set.
+// And will delete orphanded files from the table.
+func WithMaintenanceSchedule(schedule time.Duration) IcebergOption {
+	return func(i *Iceberg) {
+		i.maintenanceSchedule = schedule
+	}
+}
+
+func WithLogger(l log.Logger) IcebergOption {
+	return func(i *Iceberg) {
+		i.logger = l
+	}
+}
+
+func (i *Iceberg) String() string {
+	return "Iceberg"
+}
+
+// Scan will load the latest Iceberg table. It will filter out any manifests that do not contain useful data.
+// Then it will read the manifests that may contain useful data. It will then filter out the data file that dot not contain useful data.
+// Finally it has a set of data files that may contain useful data. It will then read the data files and apply the filter to each row group in the data file.
+func (i *Iceberg) Scan(ctx context.Context, prefix string, _ *dynparquet.Schema, filter logicalplan.Expr, _ uint64, callback func(context.Context, any) error) error {
+	t, err := i.catalog.LoadTable(ctx, []string{i.bucketURI, prefix}, iceberg.Properties{})
+	if err != nil {
+		if errors.Is(err, catalog.ErrorTableNotFound) {
+			return nil
+		}
+		return fmt.Errorf("failed to load table: %w", err)
+	}
+
+	// Get the latest snapshot
+	snapshot := t.CurrentSnapshot()
+	list, err := snapshot.Manifests(i.bucket)
+	if err != nil {
+		return fmt.Errorf("error reading manifest list: %w", err)
+	}
+
+	fltr, err := expr.BooleanExpr(filter)
+	if err != nil {
+		return err
+	}
+
+	for _, manifest := range list {
+		ok, err := manifestMayContainUsefulData(t.Metadata().PartitionSpec(), t.Schema(), manifest, fltr)
+		if err != nil {
+			return fmt.Errorf("failed to filter manifest: %w", err)
+		}
+		if !ok {
+			continue
+		}
+
+		entries, schema, err := manifest.FetchEntries(i.bucket, false)
+		if err != nil {
+			return fmt.Errorf("fetch entries %s: %w", manifest.FilePath(), err)
+		}
+
+		for _, e := range entries {
+			ok, err := manifestEntryMayContainUsefulData(icebergSchemaToParquetSchema(schema), e, fltr)
+			if err != nil {
+				return fmt.Errorf("failed to filter entry: %w", err)
+			}
+			if !ok {
+				continue
+			}
+
+			// TODO(thor): data files can be processed in parallel
+			bkt := NewBucketReaderAt(i.bucket)
+			r, err := bkt.GetReaderAt(ctx, e.DataFile().FilePath())
+			if err != nil {
+				return err
+			}
+
+			file, err := parquet.OpenFile(
+				r,
+				e.DataFile().FileSizeBytes(),
+				parquet.FileReadMode(parquet.ReadModeAsync),
+			)
+			if err != nil {
+				return fmt.Errorf("failed to open file %s: %w", e.DataFile().FilePath(), err)
+			}
+
+			// Get a reader from the file bytes
+			buf, err := dynparquet.NewSerializedBuffer(file)
+			if err != nil {
+				return err
+			}
+
+			for i := 0; i < buf.NumRowGroups(); i++ {
+				rg := buf.DynamicRowGroup(i)
+				mayContainUsefulData, err := fltr.Eval(rg, false)
+				if err != nil {
+					return err
+				}
+				if mayContainUsefulData {
+					if err := callback(ctx, rg); err != nil {
+						return err
+					}
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
+// Prefixes lists all the tables found in the warehouse for the given database(prefix).
+func (i *Iceberg) Prefixes(ctx context.Context, prefix string) ([]string, error) {
+	tables, err := i.catalog.ListTables(ctx, []string{filepath.Join(i.bucketURI, prefix)})
+	if err != nil {
+		return nil, err
+	}
+
+	tableNames := make([]string, 0, len(tables))
+	for _, t := range tables {
+		tableNames = append(tableNames, filepath.Join(t...))
+	}
+	return tableNames, nil
+}
+
+// Upload a parquet file into the Iceberg table.
+func (i *Iceberg) Upload(ctx context.Context, name string, r io.Reader) error {
+	tablePath := filepath.Join(i.bucketURI, filepath.Dir(filepath.Dir(name)))
+	t, err := i.catalog.LoadTable(ctx, []string{tablePath}, iceberg.Properties{})
+	if err != nil {
+		if !errors.Is(err, catalog.ErrorTableNotFound) {
+			return err
+		}
+
+		// Table doesn't exist, create it
+		t, err = i.catalog.CreateTable(ctx, tablePath, iceberg.NewSchema(0), iceberg.Properties{},
+			catalog.WithPartitionSpec(i.partitionSpec),
+		)
+		if err != nil {
+			return fmt.Errorf("failed to create table: %w", err)
+		}
+	}
+
+	w, err := t.SnapshotWriter(defaultWriterOptions...)
+	if err != nil {
+		return err
+	}
+
+	if err := w.Append(ctx, r); err != nil {
+		return err
+	}
+
+	return w.Close(ctx)
+}
+
+func (i *Iceberg) Delete(_ context.Context, _ string) error {
+	// Noop
+	// NOTE: Deletes are used in DataSinks when an upload fails for any reason. Because an Iceberg table is not updated
+	// until a full upload is successfull there is no risk of partial data being left in the table, or a corrupted file being read.
+	return nil
+}
+
+func icebergTypeToParquetNode(t iceberg.Type) parquet.Node {
+	switch t.Type() {
+	case "long":
+		return parquet.Int(64)
+	case "binary":
+		return parquet.String()
+	case "boolean":
+		return parquet.Leaf(parquet.BooleanType)
+	case "int":
+		return parquet.Int(32)
+	case "float":
+		return parquet.Leaf(parquet.FloatType)
+	case "double":
+		return parquet.Leaf(parquet.DoubleType)
+	case "string":
+		return parquet.String()
+	default:
+		panic(fmt.Sprintf("unsupported type: %s", t.Type()))
+	}
+}
+
+func icebergSchemaToParquetSchema(schema *iceberg.Schema) *parquet.Schema {
+	g := parquet.Group{}
+	for _, f := range schema.Fields() {
+		g[f.Name] = icebergTypeToParquetNode(f.Type)
+	}
+	return parquet.NewSchema("iceberg", g)
+}
+
+func manifestMayContainUsefulData(partition iceberg.PartitionSpec, schema *iceberg.Schema, manifest iceberg.ManifestFile, filter expr.TrueNegativeFilter) (bool, error) {
+	if partition.IsUnpartitioned() {
+		return true, nil
+	}
+	// Ignore missing columns as the partition spec only contains the columns that are partitioned
+	return filter.Eval(manifestToParticulate(partition, schema, manifest), true)
+}
+
+func manifestEntryMayContainUsefulData(schema *parquet.Schema, entry iceberg.ManifestEntry, filter expr.TrueNegativeFilter) (bool, error) {
+	return filter.Eval(dataFileToParticulate(schema, entry.DataFile()), false)
+}
+
+func manifestToParticulate(partition iceberg.PartitionSpec, schema *iceberg.Schema, m iceberg.ManifestFile) expr.Particulate {
+	// Convert the partition spec to a parquet schema
+	g := parquet.Group{}
+	virtualColumnChunks := make([]parquet.ColumnChunk, 0, partition.NumFields())
+	for i := 0; i < partition.NumFields(); i++ {
+		field := partition.Field(i)
+		summary := m.Partitions()[i]
+		node := icebergTypeToParquetNode(schema.Field(field.SourceID).Type)
+		g[field.Name] = node
+		virtualColumnChunks = append(virtualColumnChunks, &virtualColumnChunk{
+			pType:       node.Type(),
+			nulls:       0, // TODO future optimization?
+			column:      i,
+			lowerBounds: *summary.LowerBound,
+			upperBounds: *summary.UpperBound,
+			numValues:   1, // m.ExistingRows() + m.AddedRows() // TODO: future optimization?
+		})
+	}
+
+	return &manifestParticulate{
+		schema:       parquet.NewSchema("iceberg-partition", g),
+		columnChunks: virtualColumnChunks,
+	}
+}
+
+type manifestParticulate struct {
+	columnChunks []parquet.ColumnChunk
+	schema       *parquet.Schema
+}
+
+func (m *manifestParticulate) Schema() *parquet.Schema { return m.schema }
+
+func (m *manifestParticulate) ColumnChunks() []parquet.ColumnChunk { return m.columnChunks }
+
+func dataFileToParticulate(schema *parquet.Schema, d iceberg.DataFile) expr.Particulate {
+	return &dataFileParticulate{
+		schema: schema,
+		data:   d,
+	}
+}
+
+type dataFileParticulate struct {
+	schema *parquet.Schema
+	data   iceberg.DataFile
+}
+
+func (d *dataFileParticulate) Schema() *parquet.Schema {
+	return d.schema
+}
+
+func (d *dataFileParticulate) ColumnChunks() []parquet.ColumnChunk {
+	virtualColumnChunks := make([]parquet.ColumnChunk, 0, len(d.schema.Fields()))
+	for i := range d.schema.Fields() {
+		virtualColumnChunks = append(virtualColumnChunks, &virtualColumnChunk{
+			pType:       d.schema.Fields()[i].Type(),
+			nulls:       d.data.NullValueCounts()[i],
+			column:      i,
+			lowerBounds: d.data.LowerBoundValues()[i],
+			upperBounds: d.data.UpperBoundValues()[i],
+			numValues:   d.data.Count(),
+		})
+	}
+	return virtualColumnChunks
+}
+
+type virtualColumnChunk struct {
+	pType       parquet.Type
+	column      int
+	numValues   int64
+	nulls       int64
+	lowerBounds []byte
+	upperBounds []byte
+}
+
+func (v *virtualColumnChunk) Type() parquet.Type   { return nil }
+func (v *virtualColumnChunk) Column() int          { return v.column }
+func (v *virtualColumnChunk) Pages() parquet.Pages { return nil }
+func (v *virtualColumnChunk) ColumnIndex() (parquet.ColumnIndex, error) {
+	return &virtualColumnIndex{
+		pType:       v.pType,
+		nulls:       v.nulls,
+		lowerBounds: v.lowerBounds,
+		upperBounds: v.upperBounds,
+	}, nil
+}
+func (v *virtualColumnChunk) OffsetIndex() (parquet.OffsetIndex, error) { return nil, nil }
+func (v *virtualColumnChunk) BloomFilter() parquet.BloomFilter          { return nil }
+func (v *virtualColumnChunk) NumValues() int64                          { return v.numValues }
+
+type virtualColumnIndex struct {
+	lowerBounds []byte
+	upperBounds []byte
+	nulls       int64
+	pType       parquet.Type
+}
+
+func (v *virtualColumnIndex) NumPages() int       { return 1 }
+func (v *virtualColumnIndex) NullCount(int) int64 { return v.nulls }
+func (v *virtualColumnIndex) NullPage(int) bool   { return false }
+func (v *virtualColumnIndex) MinValue(int) parquet.Value {
+	switch v.pType.Kind() {
+	case parquet.Int64:
+		i := binary.LittleEndian.Uint64(v.lowerBounds)
+		return parquet.Int64Value(int64(i))
+	case parquet.ByteArray:
+		return parquet.ByteArrayValue(v.lowerBounds)
+	default:
+		return parquet.ByteArrayValue(v.lowerBounds)
+	}
+}
+
+func (v *virtualColumnIndex) MaxValue(int) parquet.Value {
+	switch v.pType.Kind() {
+	case parquet.Int64:
+		i := binary.LittleEndian.Uint64(v.upperBounds)
+		return parquet.Int64Value(int64(i))
+	case parquet.ByteArray:
+		return parquet.ByteArrayValue(v.upperBounds)
+	default:
+		return parquet.ByteArrayValue(v.upperBounds)
+	}
+}
+
+func (v *virtualColumnIndex) IsAscending() bool  { return true }
+func (v *virtualColumnIndex) IsDescending() bool { return false }
diff --git a/storage/iceberg_test.go b/storage/iceberg_test.go
new file mode 100644
index 000000000..3a9d3bc6b
--- /dev/null
+++ b/storage/iceberg_test.go
@@ -0,0 +1,80 @@
+package storage
+
+import (
+	"bytes"
+	"context"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/oklog/ulid"
+	"github.com/parquet-go/parquet-go"
+	"github.com/polarsignals/iceberg-go/catalog"
+	"github.com/polarsignals/iceberg-go/table"
+	"github.com/stretchr/testify/require"
+	"github.com/thanos-io/objstore"
+)
+
+func Test_IcebergMaintenance(t *testing.T) {
+	defaultWriterOptions = []table.WriterOption{
+		table.WithManifestSizeBytes(8 * 1024 * 1024), // 8MiB manifest size
+		table.WithMergeSchema(),
+		table.WithExpireSnapshotsOlderThan(time.Nanosecond),
+		table.WithMetadataDeleteAfterCommit(),
+		table.WithMetadataPreviousVersionsMax(0),
+	}
+	bucket := objstore.NewInMemBucket()
+	iceberg, err := NewIceberg("/", catalog.NewHDFS("/", bucket), bucket)
+	require.NoError(t, err)
+
+	type Element struct {
+		Name, Symbol string
+		Number       int
+		Mass         float64
+	}
+
+	b := &bytes.Buffer{}
+	err = parquet.Write(b, []Element{
+		{"Hydrogen", "H", 1, 1.00794},
+		{"Helium", "He", 2, 4.002602},
+		{"Lithium", "Li", 3, 6.941},
+		{"Beryllium", "Be", 4, 9.012182},
+	})
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	require.NoError(t, iceberg.Upload(ctx, "db/table/ulid/data.parquet", b))
+
+	var modtime time.Time
+	var deleted string
+	require.NoError(t, bucket.Iter(ctx, "", func(name string) error {
+		if strings.HasSuffix(name, ".parquet") { // Found the data file
+			deleted = name
+			id, err := ulid.Parse(filepath.Base(strings.TrimSuffix(name, ".parquet")))
+			require.NoError(t, err)
+			modtime = ulid.Time(id.Time())
+		}
+		return nil
+	}, objstore.WithRecursiveIter))
+
+	b.Reset()
+	err = parquet.Write(b, []Element{
+		{"Boron", "B", 5, 10.811},
+		{"Carbon", "C", 6, 12.0107},
+		{"Nitrogen", "N", 7, 14.0067},
+		{"Oxygen", "O", 8, 15.9994},
+	})
+	require.NoError(t, err)
+
+	require.NoError(t, iceberg.Upload(ctx, "db/table/ulid/data.parquet", b))
+
+	iceberg.maxDataFileAge = time.Since(modtime)
+	iceberg.orphanedFileAge = 1
+	require.NoError(t, iceberg.Maintenance(ctx)) // This maintenance will delete the first file from the table; And then remove the file from the bucket
+
+	require.NoError(t, bucket.Iter(ctx, "", func(name string) error {
+		require.NotEqual(t, deleted, name)
+		return nil
+	}, objstore.WithRecursiveIter))
+}
diff --git a/store.go b/store.go
index 964f415e4..c3bc6df86 100644
--- a/store.go
+++ b/store.go
@@ -6,75 +6,171 @@ import (
 	"io"
 	"path/filepath"
 
+	"go.opentelemetry.io/otel/trace/noop"
+
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/oklog/ulid"
-	"github.com/segmentio/parquet-go"
+	"github.com/oklog/ulid/v2"
+	"github.com/parquet-go/parquet-go"
+	"github.com/thanos-io/objstore"
 	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/trace"
 	"golang.org/x/sync/errgroup"
 
 	"github.com/polarsignals/frostdb/dynparquet"
+	"github.com/polarsignals/frostdb/query/expr"
+	"github.com/polarsignals/frostdb/query/logicalplan"
+	"github.com/polarsignals/frostdb/storage"
 )
 
+// DefaultBlockReaderLimit is the concurrency limit for reading blocks.
+const DefaultBlockReaderLimit = 10
+
 // Persist uploads the block to the underlying bucket.
 func (t *TableBlock) Persist() error {
-	if t.table.db.bucket == nil {
+	if len(t.table.db.sinks) == 0 {
 		return nil
 	}
 
-	r, w := io.Pipe()
-	var err error
-	go func() {
-		defer w.Close()
-		err = t.Serialize(w)
-	}()
-	defer r.Close()
+	for i, sink := range t.table.db.sinks {
+		if i > 0 {
+			return fmt.Errorf("multiple sinks not supported")
+		}
+		r, w := io.Pipe()
+		var err error
+		go func() {
+			defer w.Close()
+			err = t.Serialize(w)
+		}()
+		defer r.Close()
 
-	fileName := filepath.Join(t.table.name, t.ulid.String(), "data.parquet")
-	if err := t.table.db.bucket.Upload(context.Background(), fileName, r); err != nil {
-		return fmt.Errorf("failed to upload block %v", err)
-	}
+		fileName := filepath.Join(t.table.db.name, t.table.name, t.ulid.String(), "data.parquet")
+		if err := sink.Upload(context.Background(), fileName, r); err != nil {
+			return fmt.Errorf("failed to upload block %v", err)
+		}
 
-	if err != nil {
-		return fmt.Errorf("failed to serialize block: %v", err)
+		if err != nil {
+			if deleteErr := sink.Delete(context.Background(), fileName); deleteErr != nil {
+				err = fmt.Errorf("%v failed to delete file on error: %w", err, deleteErr)
+			}
+			return fmt.Errorf("failed to serialize block: %w", err)
+		}
 	}
+
+	t.table.metrics.blockPersisted.Inc()
 	return nil
 }
 
-func (t *Table) IterateBucketBlocks(
-	ctx context.Context,
-	logger log.Logger,
-	lastBlockTimestamp uint64,
-	filter TrueNegativeFilter,
-	rowGroups chan<- any,
-) error {
-	if t.db.bucket == nil || t.db.ignoreStorageOnQuery {
+// DefaultObjstoreBucket is the default implementation of the DataSource and DataSink interface.
+type DefaultObjstoreBucket struct {
+	storage.Bucket
+	tracer trace.Tracer
+	logger log.Logger
+
+	blockReaderLimit int
+}
+
+type DefaultObjstoreBucketOption func(*DefaultObjstoreBucket)
+
+func StorageWithBlockReaderLimit(limit int) DefaultObjstoreBucketOption {
+	return func(b *DefaultObjstoreBucket) {
+		b.blockReaderLimit = limit
+	}
+}
+
+func StorageWithTracer(tracer trace.Tracer) DefaultObjstoreBucketOption {
+	return func(b *DefaultObjstoreBucket) {
+		b.tracer = tracer
+	}
+}
+
+func StorageWithLogger(logger log.Logger) DefaultObjstoreBucketOption {
+	return func(b *DefaultObjstoreBucket) {
+		b.logger = logger
+	}
+}
+
+func NewDefaultBucket(b storage.Bucket, options ...DefaultObjstoreBucketOption) *DefaultObjstoreBucket {
+	d := &DefaultObjstoreBucket{
+		Bucket:           b,
+		tracer:           noop.NewTracerProvider().Tracer(""),
+		logger:           log.NewNopLogger(),
+		blockReaderLimit: DefaultBlockReaderLimit,
+	}
+
+	for _, option := range options {
+		option(d)
+	}
+
+	return d
+}
+
+func NewDefaultObjstoreBucket(b objstore.Bucket, options ...DefaultObjstoreBucketOption) *DefaultObjstoreBucket {
+	d := &DefaultObjstoreBucket{
+		Bucket:           storage.NewBucketReaderAt(b),
+		tracer:           noop.NewTracerProvider().Tracer(""),
+		logger:           log.NewNopLogger(),
+		blockReaderLimit: DefaultBlockReaderLimit,
+	}
+
+	for _, option := range options {
+		option(d)
+	}
+
+	return d
+}
+
+func (b *DefaultObjstoreBucket) Prefixes(ctx context.Context, prefix string) ([]string, error) {
+	ctx, span := b.tracer.Start(ctx, "Source/Prefixes")
+	defer span.End()
+
+	var prefixes []string
+	err := b.Iter(ctx, prefix, func(prefix string) error {
+		prefixes = append(prefixes, filepath.Base(prefix))
 		return nil
+	})
+	if err != nil {
+		return nil, err
 	}
-	ctx, span := t.tracer.Start(ctx, "Table/IterateBucketBlocks")
+
+	return prefixes, nil
+}
+
+func (b *DefaultObjstoreBucket) String() string {
+	return b.Name()
+}
+
+func (b *DefaultObjstoreBucket) Scan(ctx context.Context, prefix string, _ *dynparquet.Schema, filter logicalplan.Expr, lastBlockTimestamp uint64, callback func(context.Context, any) error) error {
+	ctx, span := b.tracer.Start(ctx, "Source/Scan")
 	span.SetAttributes(attribute.Int64("lastBlockTimestamp", int64(lastBlockTimestamp)))
 	defer span.End()
 
+	f, err := expr.BooleanExpr(filter)
+	if err != nil {
+		return err
+	}
+
 	n := 0
 	errg := &errgroup.Group{}
-	errg.SetLimit(t.config.blockReaderLimit)
-	err := t.db.bucket.Iter(ctx, t.name, func(blockDir string) error {
+	errg.SetLimit(int(b.blockReaderLimit))
+	err = b.Iter(ctx, prefix, func(blockDir string) error {
 		n++
-		errg.Go(func() error { return t.ProcessFile(ctx, blockDir, lastBlockTimestamp, filter, rowGroups) })
+		errg.Go(func() error { return b.ProcessFile(ctx, blockDir, lastBlockTimestamp, f, callback) })
 		return nil
 	})
 	if err != nil {
 		return err
 	}
 
-	level.Debug(logger).Log("msg", "read blocks", "n", n)
+	span.SetAttributes(attribute.Int("blocks", n))
+	level.Debug(b.logger).Log("msg", "read blocks", "n", n)
 	return errg.Wait()
 }
 
-func (t *Table) openBlockFile(ctx context.Context, blockName string, size int64) (*parquet.File, error) {
-	ctx, span := t.tracer.Start(ctx, "Table/IterateBucketBlocks/Iter/OpenFile")
+func (b *DefaultObjstoreBucket) openBlockFile(ctx context.Context, blockName string, size int64) (*parquet.File, error) {
+	ctx, span := b.tracer.Start(ctx, "Source/Scan/OpenFile")
 	defer span.End()
-	r, err := t.db.bucket.GetReaderAt(ctx, blockName)
+	r, err := b.GetReaderAt(ctx, blockName)
 	if err != nil {
 		return nil, err
 	}
@@ -82,20 +178,20 @@ func (t *Table) openBlockFile(ctx context.Context, blockName string, size int64)
 	file, err := parquet.OpenFile(
 		r,
 		size,
-		parquet.ReadBufferSize(5*1024*1024), // 5MB read buffers
+		parquet.ReadBufferSize(5*MiB), // 5MB read buffers
 		parquet.SkipBloomFilters(true),
 		parquet.FileReadMode(parquet.ReadModeAsync),
 	)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("failed to open block: %s :%v", blockName, err)
 	}
 
 	return file, nil
 }
 
 // ProcessFile will process a bucket block parquet file.
-func (t *Table) ProcessFile(ctx context.Context, blockDir string, lastBlockTimestamp uint64, filter TrueNegativeFilter, rowGroups chan<- any) error {
-	ctx, span := t.tracer.Start(ctx, "Table/IterateBucketBlocks/Iter/ProcessFile")
+func (b *DefaultObjstoreBucket) ProcessFile(ctx context.Context, blockDir string, lastBlockTimestamp uint64, filter expr.TrueNegativeFilter, callback func(context.Context, any) error) error {
+	ctx, span := b.tracer.Start(ctx, "Source/Scan/ProcessFile")
 	defer span.End()
 
 	blockUlid, err := ulid.Parse(filepath.Base(blockDir))
@@ -106,18 +202,31 @@ func (t *Table) ProcessFile(ctx context.Context, blockDir string, lastBlockTimes
 	span.SetAttributes(attribute.String("ulid", blockUlid.String()))
 
 	if lastBlockTimestamp != 0 && blockUlid.Time() >= lastBlockTimestamp {
+		level.Debug(b.logger).Log(
+			"msg", "ignoring block due to last block timestamp",
+			"blockTime", blockUlid.Time(),
+			"lastBlockTimestamp", lastBlockTimestamp,
+		)
 		return nil
 	}
 
 	blockName := filepath.Join(blockDir, "data.parquet")
-	attribs, err := t.db.bucket.Attributes(ctx, blockName)
+	attribs, err := b.Attributes(ctx, blockName)
 	if err != nil {
 		return err
 	}
 
 	span.SetAttributes(attribute.Int64("size", attribs.Size))
 
-	file, err := t.openBlockFile(ctx, blockName, attribs.Size)
+	if attribs.Size == 0 {
+		level.Debug(b.logger).Log(
+			"msg", "ignoring empty block",
+			"blockTime", blockUlid.Time(),
+		)
+		return nil
+	}
+
+	file, err := b.openBlockFile(ctx, blockName, attribs.Size)
 	if err != nil {
 		return err
 	}
@@ -128,22 +237,20 @@ func (t *Table) ProcessFile(ctx context.Context, blockDir string, lastBlockTimes
 		return err
 	}
 
-	return t.filterRowGroups(ctx, buf, filter, rowGroups)
+	return b.filterRowGroups(ctx, buf, filter, callback)
 }
 
-func (t *Table) filterRowGroups(ctx context.Context, buf *dynparquet.SerializedBuffer, filter TrueNegativeFilter, rowGroups chan<- any) error {
-	_, span := t.tracer.Start(ctx, "Table/filterRowGroups")
-	defer span.End()
-	span.SetAttributes(attribute.Int("row_groups", buf.NumRowGroups()))
-
+func (b *DefaultObjstoreBucket) filterRowGroups(ctx context.Context, buf *dynparquet.SerializedBuffer, filter expr.TrueNegativeFilter, callback func(context.Context, any) error) error {
 	for i := 0; i < buf.NumRowGroups(); i++ {
 		rg := buf.DynamicRowGroup(i)
-		mayContainUsefulData, err := filter.Eval(rg)
+		mayContainUsefulData, err := filter.Eval(rg, false)
 		if err != nil {
 			return err
 		}
 		if mayContainUsefulData {
-			rowGroups <- rg
+			if err := callback(ctx, rg); err != nil {
+				return err
+			}
 		}
 	}
 
diff --git a/table.go b/table.go
index 2f155de29..47f5a805d 100644
--- a/table.go
+++ b/table.go
@@ -6,40 +6,40 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"math"
 	"math/rand"
-	"reflect"
-	"regexp"
+	"path/filepath"
 	"runtime"
 	"sort"
-	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/util"
+	"github.com/dustin/go-humanize"
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/google/btree"
-	"github.com/google/uuid"
-	"github.com/oklog/ulid"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-	"github.com/segmentio/parquet-go"
+	"github.com/oklog/ulid/v2"
+	"github.com/parquet-go/parquet-go"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
 	"golang.org/x/sync/errgroup"
+	"google.golang.org/protobuf/proto"
 
-	"github.com/polarsignals/frostdb/bufutils"
 	"github.com/polarsignals/frostdb/dynparquet"
 	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
 	schemav2pb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha2"
+	tablepb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/table/v1alpha1"
 	walpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/wal/v1alpha1"
+	"github.com/polarsignals/frostdb/index"
+	"github.com/polarsignals/frostdb/internal/records"
 	"github.com/polarsignals/frostdb/parts"
 	"github.com/polarsignals/frostdb/pqarrow"
 	"github.com/polarsignals/frostdb/query/logicalplan"
+	"github.com/polarsignals/frostdb/query/physicalplan"
+	"github.com/polarsignals/frostdb/recovery"
 	walpkg "github.com/polarsignals/frostdb/wal"
 )
 
@@ -48,6 +48,16 @@ var (
 	ErrTableClosing = fmt.Errorf("table closing")
 )
 
+// DefaultIndexConfig returns the default level configs used. This is a function
+// So that any modifications to the result will not affect the default config.
+func DefaultIndexConfig() []*index.LevelConfig {
+	return []*index.LevelConfig{
+		{Level: index.L0, MaxSize: 1024 * 1024 * 15, Type: index.CompactionTypeParquetMemory},  // Compact to in-memory Parquet buffer after 15MiB of data
+		{Level: index.L1, MaxSize: 1024 * 1024 * 128, Type: index.CompactionTypeParquetMemory}, // Compact to a single in-memory Parquet buffer after 128MiB of Parquet files
+		{Level: index.L2, MaxSize: 1024 * 1024 * 512},                                          // Final level. Rotate after 512MiB of Parquet files
+	}
+}
+
 type ErrWriteRow struct{ err error }
 
 func (e ErrWriteRow) Error() string { return "failed to write row: " + e.err.Error() }
@@ -62,47 +72,80 @@ func (e ErrCreateSchemaWriter) Error() string {
 	return "failed to create schema write: " + e.err.Error()
 }
 
-type TableConfig struct {
-	schema *dynparquet.Schema
-	// rowGroupSize is the desired number of rows in each row group.
-	rowGroupSize     int
-	blockReaderLimit int
-	disableWAL       bool
-}
-
-type TableOption func(*TableConfig) error
+type TableOption func(*tablepb.TableConfig) error
 
 // WithRowGroupSize sets the size in number of rows for each row group for parquet files. A <= 0 value indicates no limit.
 func WithRowGroupSize(numRows int) TableOption {
-	return func(config *TableConfig) error {
-		config.rowGroupSize = numRows
+	return func(config *tablepb.TableConfig) error {
+		config.RowGroupSize = uint64(numRows)
 		return nil
 	}
 }
 
 // WithBlockReaderLimit sets the limit of go routines that will be used to read persisted block files. A negative number indicates no limit.
 func WithBlockReaderLimit(n int) TableOption {
-	return func(config *TableConfig) error {
-		config.blockReaderLimit = n
+	return func(config *tablepb.TableConfig) error {
+		config.BlockReaderLimit = uint64(n)
 		return nil
 	}
 }
 
 // WithoutWAL disables the WAL for this table.
 func WithoutWAL() TableOption {
-	return func(config *TableConfig) error {
-		config.disableWAL = true
+	return func(config *tablepb.TableConfig) error {
+		config.DisableWal = true
+		return nil
+	}
+}
+
+func WithUniquePrimaryIndex(unique bool) TableOption {
+	return func(config *tablepb.TableConfig) error {
+		switch e := config.Schema.(type) {
+		case *tablepb.TableConfig_DeprecatedSchema:
+			e.DeprecatedSchema.UniquePrimaryIndex = unique
+		case *tablepb.TableConfig_SchemaV2:
+			e.SchemaV2.UniquePrimaryIndex = unique
+		}
+		return nil
+	}
+}
+
+// FromConfig sets the table configuration from the given config.
+// NOTE: that this does not override the schema even though that is included in the passed in config.
+func FromConfig(config *tablepb.TableConfig) TableOption {
+	return func(cfg *tablepb.TableConfig) error {
+		if config.BlockReaderLimit != 0 { // the zero value is not a valid block reader limit
+			cfg.BlockReaderLimit = config.BlockReaderLimit
+		}
+		cfg.DisableWal = config.DisableWal
+		cfg.RowGroupSize = config.RowGroupSize
 		return nil
 	}
 }
 
+func defaultTableConfig() *tablepb.TableConfig {
+	return &tablepb.TableConfig{
+		BlockReaderLimit: uint64(runtime.GOMAXPROCS(0)),
+	}
+}
+
 func NewTableConfig(
-	schema *dynparquet.Schema,
+	schema proto.Message,
 	options ...TableOption,
-) *TableConfig {
-	t := &TableConfig{
-		schema:           schema,
-		blockReaderLimit: runtime.GOMAXPROCS(0),
+) *tablepb.TableConfig {
+	t := defaultTableConfig()
+
+	switch v := schema.(type) {
+	case *schemapb.Schema:
+		t.Schema = &tablepb.TableConfig_DeprecatedSchema{
+			DeprecatedSchema: v,
+		}
+	case *schemav2pb.Schema:
+		t.Schema = &tablepb.TableConfig_SchemaV2{
+			SchemaV2: v,
+		}
+	default:
+		panic(fmt.Sprintf("unsupported schema type: %T", v))
 	}
 
 	for _, opt := range options {
@@ -117,14 +160,108 @@ type completedBlock struct {
 	tx     uint64
 }
 
+// GenericTable is a wrapper around *Table that writes structs of type T. It
+// consist of  a generic arrow.Record builder that ingests structs of type T.
+// The generated record is then  passed to (*Table).InsertRecord.
+//
+// Struct tag `frostdb` is used to pass options for the schema for T.
+//
+// This api is opinionated.
+//
+//   - Nested Columns are not supported
+//
+// # Tags
+//
+// Use `frostdb` to define tags that customizes field values. You can express
+// everything needed to construct schema v1alpha1.
+//
+// Tags are defined as a comma separated list. The first item is the column
+// name. Column name is optional, when omitted it is derived from the field name
+// (snake_cased)
+//
+// Supported Tags
+//
+//	    delta_binary_packed | Delta binary packed encoding.
+//	                 brotli | Brotli compression.
+//	                    asc | Sorts in ascending order.Use asc(n) where n is an integer for sorting order.
+//	                   gzip | GZIP compression.
+//	                 snappy | Snappy compression.
+//	delta_length_byte_array | Delta Length Byte Array encoding.
+//	       delta_byte_array | Delta Byte Array encoding.
+//	                   desc | Sorts in descending order.Use desc(n) where n is an integer for sorting order
+//	                lz4_raw | LZ4_RAW compression.
+//	               pre_hash | Prehash the column before storing it.
+//	             null_first | When used wit asc nulls are smallest and with des nulls are largest.
+//	                   zstd | ZSTD compression.
+//	               rle_dict | Dictionary run-length encoding.
+//	                  plain | Plain encoding.
+//
+// Example tagged Sample struct
+//
+//	type Sample struct {
+//		ExampleType string      `frostdb:"example_type,rle_dict,asc(0)"`
+//		Labels      []Label     `frostdb:"labels,rle_dict,null,dyn,asc(1),null_first"`
+//		Stacktrace  []uuid.UUID `frostdb:"stacktrace,rle_dict,asc(3),null_first"`
+//		Timestamp   int64       `frostdb:"timestamp,asc(2)"`
+//		Value       int64       `frostdb:"value"`
+//	}
+//
+// # Dynamic columns
+//
+// Field of type map<string, T> is a dynamic column by default.
+//
+//	type Example struct {
+//		// Use supported tags to customize the column value
+//		Labels map[string]string `frostdb:"labels"`
+//	}
+//
+// # Repeated columns
+//
+// Fields of type []int64, []float64, []bool, and []string are supported. These
+// are represented as arrow.LIST.
+//
+// Generated schema for the repeated columns applies all supported tags. By
+// default repeated fields are nullable. You can safely pass nil slices for
+// repeated columns.
+type GenericTable[T any] struct {
+	*Table
+	mu    sync.Mutex
+	build *records.Build[T]
+}
+
+func (t *GenericTable[T]) Release() {
+	t.build.Release()
+}
+
+// Write builds arrow.Record directly from values and calls (*Table).InsertRecord.
+func (t *GenericTable[T]) Write(ctx context.Context, values ...T) (uint64, error) {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+	err := t.build.Append(values...)
+	if err != nil {
+		return 0, err
+	}
+	return t.InsertRecord(ctx, t.build.NewRecord())
+}
+
+func NewGenericTable[T any](db *DB, name string, mem memory.Allocator, options ...TableOption) (*GenericTable[T], error) {
+	build := records.NewBuild[T](mem)
+	table, err := db.Table(name, NewTableConfig(build.Schema(name), options...))
+	if err != nil {
+		return nil, err
+	}
+	return &GenericTable[T]{build: build, Table: table}, nil
+}
+
 type Table struct {
 	db      *DB
 	name    string
-	metrics *tableMetrics
+	metrics tableMetrics
 	logger  log.Logger
 	tracer  trace.Tracer
 
-	config *TableConfig
+	config atomic.Pointer[tablepb.TableConfig]
+	schema *dynparquet.Schema
 
 	pendingBlocks   map[*TableBlock]struct{}
 	completedBlocks []completedBlock
@@ -137,13 +274,25 @@ type Table struct {
 	closing bool
 }
 
+type Sync interface {
+	Sync() error
+}
+
 type WAL interface {
 	Close() error
 	Log(tx uint64, record *walpb.Record) error
 	LogRecord(tx uint64, table string, record arrow.Record) error
-	Replay(handler func(tx uint64, record *walpb.Record) error) error
+	// Replay replays WAL records from the given first index. If firstIndex is
+	// 0, the first index read from the WAL is used (i.e. given a truncation,
+	// using 0 is still valid). If the given firstIndex is less than the WAL's
+	// first index on disk, the replay happens from the first index on disk.
+	// If the handler panics, the WAL implementation will truncate the WAL up to
+	// the last valid index.
+	Replay(tx uint64, handler walpkg.ReplayHandlerFunc) error
 	Truncate(tx uint64) error
+	Reset(nextTx uint64) error
 	FirstIndex() (uint64, error)
+	LastIndex() (uint64, error)
 }
 
 type TableBlock struct {
@@ -155,34 +304,46 @@ type TableBlock struct {
 	minTx  uint64
 	prevTx uint64
 
-	size  *atomic.Int64
-	index *atomic.Pointer[btree.BTree] // *btree.BTree
+	// uncompressedInsertsSize keeps track of the cumulative L0 size. This is
+	// not the size of the block, since these inserts are eventually compressed.
+	// However, it serves to determine when to perform a snapshot, since these
+	// uncompressed inserts are stored in the WAL, and if the node crashes, it
+	// is obliged to re-read all of these uncompressed inserts into memory,
+	// potentially causing OOMs.
+	uncompressedInsertsSize atomic.Int64
+	// lastSnapshotSize keeps track of the uncompressedInsertsSize when a
+	// snapshot was last triggered.
+	lastSnapshotSize atomic.Int64
+
+	index *index.LSM
 
 	pendingWritersWg sync.WaitGroup
+	pendingReadersWg sync.WaitGroup
 
 	mtx *sync.RWMutex
 }
 
-type tableMetrics struct {
-	blockRotated              prometheus.Counter
-	granulesCreated           prometheus.Counter
-	compactions               prometheus.Counter
-	granulesSplits            prometheus.Counter
-	rowsInserted              prometheus.Counter
-	zeroRowsInserted          prometheus.Counter
-	granulesCompactionAborted prometheus.Counter
-	rowInsertSize             prometheus.Histogram
-	lastCompletedBlockTx      prometheus.Gauge
-	numParts                  prometheus.Gauge
-	unsortedInserts           prometheus.Counter
-	compactionMetrics         *compactionMetrics
+type Closer interface {
+	Close(cleanup bool) error
+}
+
+func schemaFromTableConfig(tableConfig *tablepb.TableConfig) (*dynparquet.Schema, error) {
+	switch schema := tableConfig.Schema.(type) {
+	case *tablepb.TableConfig_DeprecatedSchema:
+		return dynparquet.SchemaFromDefinition(schema.DeprecatedSchema)
+	case *tablepb.TableConfig_SchemaV2:
+		return dynparquet.SchemaFromDefinition(schema.SchemaV2)
+	default:
+		// No schema defined for table; read/only table
+		return nil, nil
+	}
 }
 
 func newTable(
 	db *DB,
 	name string,
-	tableConfig *TableConfig,
-	reg prometheus.Registerer,
+	tableConfig *tablepb.TableConfig,
+	metrics tableMetrics,
 	logger log.Logger,
 	tracer trace.Tracer,
 	wal WAL,
@@ -197,93 +358,36 @@ func newTable(
 		return nil, errors.New(msg)
 	}
 
-	reg = prometheus.WrapRegistererWith(prometheus.Labels{"table": name}, reg)
+	if tableConfig == nil {
+		tableConfig = defaultTableConfig()
+	}
+
+	s, err := schemaFromTableConfig(tableConfig)
+	if err != nil {
+		return nil, err
+	}
 
 	t := &Table{
-		db:     db,
-		config: tableConfig,
-		name:   name,
-		logger: logger,
-		tracer: tracer,
-		mtx:    &sync.RWMutex{},
-		wal:    wal,
-		metrics: &tableMetrics{
-			numParts: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
-				Name: "num_parts",
-				Help: "Number of parts currently active.",
-			}),
-			blockRotated: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "blocks_rotated_total",
-				Help: "Number of table blocks that have been rotated.",
-			}),
-			granulesCreated: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "granules_created_total",
-				Help: "Number of granules created.",
-			}),
-			compactions: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "granules_compactions_total",
-				Help: "Number of granule compactions.",
-			}),
-			granulesSplits: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "granules_splits_total",
-				Help: "Number of granules splits executed.",
-			}),
-			granulesCompactionAborted: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "granules_compaction_aborted_total",
-				Help: "Number of aborted granules compaction.",
-			}),
-			rowsInserted: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "rows_inserted_total",
-				Help: "Number of rows inserted into table.",
-			}),
-			zeroRowsInserted: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "zero_rows_inserted_total",
-				Help: "Number of times it was attempted to insert zero rows into the table.",
-			}),
-			rowInsertSize: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
-				Name:    "row_insert_size",
-				Help:    "Size of batch inserts into table.",
-				Buckets: prometheus.ExponentialBuckets(1, 2, 10),
-			}),
-			lastCompletedBlockTx: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
-				Name: "last_completed_block_tx",
-				Help: "Last completed block transaction.",
-			}),
-			unsortedInserts: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "unsorted_inserts_total",
-				Help: "The number of times a buffer to insert was not in sorted order.",
-			}),
-			compactionMetrics: newCompactionMetrics(reg, float64(db.columnStore.granuleSizeBytes)),
-		},
+		db:      db,
+		name:    name,
+		logger:  logger,
+		tracer:  tracer,
+		mtx:     &sync.RWMutex{},
+		wal:     wal,
+		schema:  s,
+		metrics: metrics,
 	}
 
+	// Store the table config
+	t.config.Store(tableConfig)
+
 	// Disable the WAL for this table by replacing any given WAL with a nop wal
-	if tableConfig.disableWAL {
+	if tableConfig.DisableWal {
 		t.wal = &walpkg.NopWAL{}
 	}
 
 	t.pendingBlocks = make(map[*TableBlock]struct{})
 
-	promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{
-		Name: "index_size",
-		Help: "Number of granules in the table index currently.",
-	}, func() float64 {
-		if active := t.ActiveBlock(); active != nil {
-			return float64(active.Index().Len())
-		}
-		return 0
-	})
-
-	promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{
-		Name: "active_table_block_size",
-		Help: "Size of the active table block in bytes.",
-	}, func() float64 {
-		if active := t.ActiveBlock(); active != nil {
-			return float64(active.Size())
-		}
-		return 0
-	})
-
 	return t, nil
 }
 
@@ -293,28 +397,14 @@ func (t *Table) newTableBlock(prevTx, tx uint64, id ulid.ULID) error {
 		return err
 	}
 
-	walTableBlock := &walpb.Entry_NewTableBlock{
-		TableName: t.name,
-		BlockId:   b,
-	}
-
-	switch v := t.config.schema.Definition().(type) {
-	case *schemapb.Schema:
-		walTableBlock.Schema = &walpb.Entry_NewTableBlock_DeprecatedSchema{
-			DeprecatedSchema: v,
-		}
-	case *schemav2pb.Schema:
-		walTableBlock.Schema = &walpb.Entry_NewTableBlock_SchemaV2{
-			SchemaV2: v,
-		}
-	default:
-		return fmt.Errorf("unknown schema type: %t", v)
-	}
-
 	if err := t.wal.Log(tx, &walpb.Record{
 		Entry: &walpb.Entry{
 			EntryType: &walpb.Entry_NewTableBlock_{
-				NewTableBlock: walTableBlock,
+				NewTableBlock: &walpb.Entry_NewTableBlock{
+					TableName: t.name,
+					BlockId:   b,
+					Config:    t.config.Load(),
+				},
 			},
 		},
 	}); err != nil {
@@ -332,29 +422,39 @@ func (t *Table) newTableBlock(prevTx, tx uint64, id ulid.ULID) error {
 func (t *Table) dropPendingBlock(block *TableBlock) {
 	t.mtx.Lock()
 	defer t.mtx.Unlock()
-	block.Index().Ascend(func(i btree.Item) bool {
-		g := i.(*Granule)
-		g.PartsForTx(math.MaxUint64, func(p *parts.Part) bool {
-			if r := p.Record(); r != nil {
-				r.Release()
-			}
-			return true
-		})
-		return true
-	})
 	delete(t.pendingBlocks, block)
+
+	// Wait for outstanding readers/writers to finish with the block before releasing underlying resources.
+	block.pendingReadersWg.Wait()
+	block.pendingWritersWg.Wait()
+
+	if err := block.index.Close(); err != nil {
+		level.Error(t.logger).Log("msg", "failed to close index", "err", err)
+	}
 }
 
-func (t *Table) writeBlock(block *TableBlock) {
-	level.Debug(t.logger).Log("msg", "syncing block")
+func (t *Table) writeBlock(
+	block *TableBlock, nextTxn uint64, snapshotDB bool, opts ...RotateBlockOption,
+) {
+	rbo := &rotateBlockOptions{}
+	for _, o := range opts {
+		o(rbo)
+	}
+	if rbo.wg != nil {
+		defer rbo.wg.Done()
+	}
+	level.Debug(t.logger).Log("msg", "syncing block", "next_txn", nextTxn, "ulid", block.ulid, "size", block.index.Size())
 	block.pendingWritersWg.Wait()
 
 	// from now on, the block will no longer be modified, we can persist it to disk
 
-	level.Debug(t.logger).Log("msg", "done syncing block")
+	level.Debug(t.logger).Log("msg", "done syncing block", "next_txn", nextTxn, "ulid", block.ulid, "size", block.index.Size())
 
 	// Persist the block
-	err := block.Persist()
+	var err error
+	if !rbo.skipPersist && block.index.Size() != 0 {
+		err = block.Persist()
+	}
 	t.dropPendingBlock(block)
 	if err != nil {
 		level.Error(t.logger).Log("msg", "failed to persist block")
@@ -362,26 +462,37 @@ func (t *Table) writeBlock(block *TableBlock) {
 		return
 	}
 
-	tx, _, commit := t.db.begin()
-	defer commit()
+	if err := func() error {
+		tx, _, commit := t.db.begin()
+		defer commit()
 
-	buf, err := block.ulid.MarshalBinary()
-	if err != nil {
-		level.Error(t.logger).Log("msg", "failed to record block persistence in WAL: marshal ulid", "err", err)
-		return
-	}
+		buf, err := block.ulid.MarshalBinary()
+		if err != nil {
+			level.Error(t.logger).Log("msg", "failed to record block persistence in WAL: marshal ulid", "err", err)
+			return err
+		}
 
-	if err := t.wal.Log(tx, &walpb.Record{
-		Entry: &walpb.Entry{
-			EntryType: &walpb.Entry_TableBlockPersisted_{
-				TableBlockPersisted: &walpb.Entry_TableBlockPersisted{
-					TableName: t.name,
-					BlockId:   buf,
+		level.Debug(t.logger).Log("msg", "recording block persistence in WAL", "ulid", block.ulid, "txn", tx)
+		if err := t.wal.Log(tx, &walpb.Record{
+			Entry: &walpb.Entry{
+				EntryType: &walpb.Entry_TableBlockPersisted_{
+					TableBlockPersisted: &walpb.Entry_TableBlockPersisted{
+						TableName: t.name,
+						BlockId:   buf,
+						// NOTE: nextTxn is used here instead of tx, since some
+						// writes could have happened between block rotation
+						// and the txn beginning above.
+						NextTx: nextTxn,
+					},
 				},
 			},
-		},
-	}); err != nil {
-		level.Error(t.logger).Log("msg", "failed to record block persistence in WAL", "err", err)
+		}); err != nil {
+			level.Error(t.logger).Log("msg", "failed to record block persistence in WAL", "err", err)
+			return err
+		}
+
+		return nil
+	}(); err != nil {
 		return
 	}
 
@@ -390,48 +501,125 @@ func (t *Table) writeBlock(block *TableBlock) {
 	sort.Slice(t.completedBlocks, func(i, j int) bool {
 		return t.completedBlocks[i].prevTx < t.completedBlocks[j].prevTx
 	})
-	for {
-		if len(t.completedBlocks) == 0 {
-			break
-		}
-		if t.completedBlocks[0].prevTx != t.lastCompleted {
-			break
-		}
-
+	for len(t.completedBlocks) > 0 && t.completedBlocks[0].prevTx == t.lastCompleted {
 		t.lastCompleted = t.completedBlocks[0].tx
 		t.metrics.lastCompletedBlockTx.Set(float64(t.lastCompleted))
 		t.completedBlocks = t.completedBlocks[1:]
 	}
 	t.mtx.Unlock()
 	t.db.maintainWAL()
+	if snapshotDB && t.db.columnStore.snapshotTriggerSize != 0 && t.db.columnStore.enableWAL {
+		func() {
+			if !t.db.snapshotInProgress.CompareAndSwap(false, true) {
+				// Snapshot already in progress. This could lead to duplicate
+				// data when replaying (refer to the snapshot design document),
+				// but discarding this data on recovery is better than a
+				// potential additional CPU spike caused by another snapshot.
+				return
+			}
+			defer t.db.snapshotInProgress.Store(false)
+			// This snapshot snapshots the new, active, table block. Refer to
+			// the snapshot design document for more details as to why this
+			// snapshot is necessary.
+			// context.Background is used here for the snapshot since callers
+			// might cancel the context when the write is finished but the
+			// snapshot is not. Note that block.Persist does the same.
+			// TODO(asubiotto): Eventually we should register a cancel function
+			// that is called with a grace period on db.Close.
+			ctx := context.Background()
+			tx := t.db.beginRead()
+			if err := t.db.snapshotAtTX(ctx, tx, t.db.snapshotWriter(tx)); err != nil {
+				level.Error(t.logger).Log(
+					"msg", "failed to write snapshot on block rotation",
+					"err", err,
+				)
+			}
+			if err := t.db.reclaimDiskSpace(ctx, nil); err != nil {
+				level.Error(t.logger).Log(
+					"msg", "failed to reclaim disk space after snapshot on block rotation",
+					"err", err,
+				)
+				return
+			}
+		}()
+	}
+}
+
+type rotateBlockOptions struct {
+	skipPersist bool
+	wg          *sync.WaitGroup
+}
+
+type RotateBlockOption func(*rotateBlockOptions)
+
+// WithRotateBlockSkipPersist instructs the block rotation operation to not
+// persist the block to object storage.
+func WithRotateBlockSkipPersist() RotateBlockOption {
+	return func(o *rotateBlockOptions) {
+		o.skipPersist = true
+	}
 }
 
-func (t *Table) RotateBlock(block *TableBlock) error {
+// WithRotateBlockWaitGroup provides a WaitGroup. The rotate block operation
+// will call wg.Done once the block has been persisted. Otherwise, RotateBlock
+// asynchronously persists the block.
+func WithRotateBlockWaitGroup(wg *sync.WaitGroup) RotateBlockOption {
+	return func(o *rotateBlockOptions) {
+		o.wg = wg
+	}
+}
+
+func (t *Table) RotateBlock(_ context.Context, block *TableBlock, opts ...RotateBlockOption) error {
+	rbo := &rotateBlockOptions{}
+	for _, o := range opts {
+		o(rbo)
+	}
 	t.mtx.Lock()
 	defer t.mtx.Unlock()
 
 	// Need to check that we haven't already rotated this block.
 	if t.active != block {
+		if rbo.wg != nil {
+			rbo.wg.Done()
+		}
 		return nil
 	}
 
-	level.Debug(t.logger).Log("msg", "rotating block", "blockSize", block.Size())
-	defer func() {
-		level.Debug(t.logger).Log("msg", "done rotating block")
-	}()
+	level.Debug(t.logger).Log(
+		"msg", "rotating block",
+		"ulid", block.ulid,
+		"size", block.Size(),
+		"skip_persist", rbo.skipPersist,
+	)
+	defer level.Debug(t.logger).Log("msg", "done rotating block", "ulid", block.ulid)
 
 	tx, _, commit := t.db.begin()
 	defer commit()
 
 	id := generateULID()
+	for id.Time() == block.ulid.Time() { // Ensure the new block has a different timestamp.
+		// Sleep a millisecond to ensure the ULID has a different timestamp.
+		time.Sleep(time.Millisecond)
+		id = generateULID()
+	}
 	if err := t.newTableBlock(t.active.minTx, tx, id); err != nil {
 		return err
 	}
 	t.metrics.blockRotated.Inc()
 	t.metrics.numParts.Set(float64(0))
 
-	t.pendingBlocks[block] = struct{}{}
-	go t.writeBlock(block)
+	if !rbo.skipPersist {
+		// If skipping persist, this block rotation is simply a block discard,
+		// so no need to add this block to pending blocks. Some callers rely
+		// on the fact that blocks are not available for reads as soon as
+		// RotateBlock returns with skipPersist=true.
+		t.pendingBlocks[block] = struct{}{}
+	}
+	// We don't check t.db.columnStore.manualBlockRotation here because this is
+	// the entry point for users to trigger a manual block rotation and they
+	// will specify through skipPersist if they want the block to be persisted.
+	go t.writeBlock(block, tx, true, opts...)
+
 	return nil
 }
 
@@ -455,255 +643,91 @@ func (t *Table) ActiveWriteBlock() (*TableBlock, func(), error) {
 }
 
 func (t *Table) Schema() *dynparquet.Schema {
-	if t.config == nil {
+	if t.config.Load() == nil {
 		return nil
 	}
-	return t.config.schema
+	return t.schema
 }
 
 func (t *Table) EnsureCompaction() error {
 	return t.ActiveBlock().EnsureCompaction()
 }
 
-// Write objects into the table.
-func (t *Table) Write(ctx context.Context, vals ...any) (uint64, error) {
-	b, err := ValuesToBuffer(t.Schema(), vals...)
-	if err != nil {
-		return 0, err
-	}
-
-	return t.InsertBuffer(ctx, b)
-}
-
-var (
-	matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
-	matchAllCap   = regexp.MustCompile("([a-z0-9])([A-Z])")
-)
-
-func ToSnakeCase(str string) string {
-	snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
-	snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
-	return strings.ToLower(snake)
-}
-
-func ValuesToBuffer(schema *dynparquet.Schema, vals ...any) (*dynparquet.Buffer, error) {
-	dynamicColumns := map[string][]string{}
-	rows := make([]parquet.Row, 0, len(vals))
-
-	findColumn := func(val reflect.Value, col string, v any) any {
-		for i := 0; i < val.NumField(); i++ {
-			if ToSnakeCase(val.Type().Field(i).Name) == col {
-				return val.Field(i).Interface()
-			}
-		}
-		return nil
-	}
-
-	// Collect dynamic columns
-	for _, v := range vals {
-		val := reflect.ValueOf(v)
-		for _, col := range schema.Columns() {
-			cv := findColumn(val, col.Name, v)
-			switch col.Dynamic {
-			case true:
-				switch reflect.TypeOf(cv).Kind() {
-				case reflect.Struct:
-					dynVals := reflect.ValueOf(cv)
-					for j := 0; j < dynVals.NumField(); j++ {
-						dynamicColumns[col.Name] = append(dynamicColumns[col.Name], ToSnakeCase(dynVals.Type().Field(j).Name))
-					}
-				case reflect.Slice:
-					dynVals := reflect.ValueOf(cv)
-					for j := 0; j < dynVals.Len(); j++ {
-						pair := reflect.ValueOf(dynVals.Index(j).Interface())
-						dynamicColumns[col.Name] = append(dynamicColumns[col.Name], ToSnakeCase(fmt.Sprintf("%v", pair.Field(0))))
-					}
-				default:
-					return nil, fmt.Errorf("unsupported dynamic type")
-				}
-			}
-		}
-	}
-
-	dynamicColumns = bufutils.Dedupe(dynamicColumns)
-
-	// Create all rows
-	for _, v := range vals {
-		row := []parquet.Value{}
-		val := reflect.ValueOf(v)
-
-		colIdx := 0
-		for _, col := range schema.Columns() {
-			cv := findColumn(val, col.Name, v)
-			switch col.Dynamic {
-			case true:
-				switch reflect.TypeOf(cv).Kind() {
-				case reflect.Struct:
-					dynVals := reflect.ValueOf(cv)
-					for _, dyncol := range dynamicColumns[col.Name] {
-						found := false
-						for j := 0; j < dynVals.NumField(); j++ {
-							if ToSnakeCase(dynVals.Type().Field(j).Name) == dyncol {
-								row = append(row, parquet.ValueOf(dynVals.Field(j).Interface()).Level(0, 1, colIdx))
-								colIdx++
-								found = true
-								break
-							}
-						}
-						if !found {
-							row = append(row, parquet.ValueOf(nil).Level(0, 0, colIdx))
-							colIdx++
-						}
-					}
-				case reflect.Slice:
-					dynVals := reflect.ValueOf(cv)
-					for _, dyncol := range dynamicColumns[col.Name] {
-						found := false
-						for j := 0; j < dynVals.Len(); j++ {
-							pair := reflect.ValueOf(dynVals.Index(j).Interface())
-							if ToSnakeCase(fmt.Sprintf("%v", pair.Field(0).Interface())) == dyncol {
-								row = append(row, parquet.ValueOf(pair.Field(1).Interface()).Level(0, 1, colIdx))
-								colIdx++
-								found = true
-								break
-							}
-						}
-						if !found {
-							row = append(row, parquet.ValueOf(nil).Level(0, 0, colIdx))
-							colIdx++
-						}
-					}
-				default:
-					return nil, fmt.Errorf("unsupported dynamic type")
-				}
-			default:
-				switch t := cv.(type) {
-				case []uuid.UUID: // Special handling for this type
-					row = append(row, parquet.ValueOf(dynparquet.ExtractLocationIDs(t)).Level(0, 0, colIdx))
-				default:
-					row = append(row, parquet.ValueOf(cv).Level(0, 0, colIdx))
-				}
-				colIdx++
-			}
-		}
-
-		rows = append(rows, row)
-	}
-
-	pb, err := schema.NewBuffer(dynamicColumns)
-	if err != nil {
-		return nil, err
-	}
-
-	_, err = pb.WriteRows(rows)
-	if err != nil {
-		return nil, err
-	}
-
-	return pb, nil
-}
-
 func (t *Table) InsertRecord(ctx context.Context, record arrow.Record) (uint64, error) {
-	block, close, err := t.appender()
+	block, finish, err := t.appender(ctx)
 	if err != nil {
 		return 0, fmt.Errorf("get appender: %w", err)
 	}
-	defer close()
+	defer finish()
 
 	tx, _, commit := t.db.begin()
 	defer commit()
 
-	if err := t.wal.LogRecord(tx, t.name, record); err != nil {
+	preHashedRecord := dynparquet.PrehashColumns(t.schema, record)
+	defer preHashedRecord.Release()
+
+	if err := t.wal.LogRecord(tx, t.name, preHashedRecord); err != nil {
 		return tx, fmt.Errorf("append to log: %w", err)
 	}
 
-	if err := block.InsertRecord(ctx, tx, record); err != nil {
+	if err := block.InsertRecord(ctx, tx, preHashedRecord); err != nil {
 		return tx, fmt.Errorf("insert buffer into block: %w", err)
 	}
 
 	return tx, nil
 }
 
-func (t *Table) InsertBuffer(ctx context.Context, buf *dynparquet.Buffer) (uint64, error) {
-	b := bytes.NewBuffer(nil)
-	err := t.config.schema.SerializeBuffer(b, buf) // TODO should we abort this function? If a large buffer is passed this could get long potentially...
-	if err != nil {
-		return 0, fmt.Errorf("serialize buffer: %w", err)
-	}
-
-	return t.Insert(ctx, b.Bytes())
-}
-
-func (t *Table) Insert(ctx context.Context, buf []byte) (uint64, error) {
-	return t.insert(ctx, buf)
-}
-
-func (t *Table) appendToLog(ctx context.Context, tx uint64, buf []byte) error {
-	if err := t.wal.Log(tx, &walpb.Record{
-		Entry: &walpb.Entry{
-			EntryType: &walpb.Entry_Write_{
-				Write: &walpb.Entry_Write{
-					Data:      buf,
-					TableName: t.name,
-				},
-			},
-		},
-	}); err != nil {
-		return err
-	}
-	return nil
-}
-
-func (t *Table) appender() (*TableBlock, func(), error) {
+func (t *Table) appender(ctx context.Context) (*TableBlock, func(), error) {
 	for {
 		// Using active write block is important because it ensures that we don't
 		// miss pending writers when synchronizing the block.
-		block, close, err := t.ActiveWriteBlock()
+		block, finish, err := t.ActiveWriteBlock()
 		if err != nil {
 			return nil, nil, err
 		}
 
-		if block.Size() < t.db.columnStore.activeMemorySize {
-			return block, close, nil
+		uncompressedInsertsSize := block.uncompressedInsertsSize.Load()
+		if t.db.columnStore.snapshotTriggerSize != 0 &&
+			// If size-lastSnapshotSize > snapshotTriggerSize (a column store
+			// option), a new snapshot is triggered. This is basically the size
+			// of the new data in this block since the last snapshot.
+			uncompressedInsertsSize-block.lastSnapshotSize.Load() > t.db.columnStore.snapshotTriggerSize {
+			// context.Background is used here for the snapshot since callers
+			// might cancel the context when the write is finished but the
+			// snapshot is not.
+			// TODO(asubiotto): Eventually we should register a cancel function
+			// that is called with a grace period on db.Close.
+			t.db.asyncSnapshot(context.Background(), func() {
+				level.Debug(t.logger).Log(
+					"msg", "successful snapshot on block size trigger",
+					"block_size", humanize.IBytes(uint64(uncompressedInsertsSize)),
+					"last_snapshot_size", humanize.IBytes(uint64(block.lastSnapshotSize.Load())),
+				)
+				block.lastSnapshotSize.Store(uncompressedInsertsSize)
+				if err := t.db.reclaimDiskSpace(context.Background(), nil); err != nil {
+					level.Error(t.logger).Log(
+						"msg", "failed to reclaim disk space after snapshot",
+						"err", err,
+					)
+					return
+				}
+			})
+		}
+		blockSize := block.Size()
+		if blockSize < t.db.columnStore.activeMemorySize || t.db.columnStore.manualBlockRotation {
+			return block, finish, nil
 		}
 
 		// We need to rotate the block and the writer won't actually be used.
-		close()
+		finish()
 
-		err = t.RotateBlock(block)
+		err = t.RotateBlock(ctx, block)
 		if err != nil {
 			return nil, nil, fmt.Errorf("rotate block: %w", err)
 		}
 	}
 }
 
-func (t *Table) insert(ctx context.Context, buf []byte) (uint64, error) {
-	block, close, err := t.appender()
-	if err != nil {
-		return 0, fmt.Errorf("get appender: %w", err)
-	}
-	defer close()
-
-	tx, _, commit := t.db.begin()
-	defer commit()
-
-	if err := t.appendToLog(ctx, tx, buf); err != nil {
-		return tx, fmt.Errorf("append to log: %w", err)
-	}
-
-	serBuf, err := dynparquet.ReaderFromBytes(buf)
-	if err != nil {
-		return tx, fmt.Errorf("deserialize buffer: %w", err)
-	}
-
-	err = block.Insert(ctx, tx, serBuf)
-	if err != nil {
-		return tx, fmt.Errorf("insert buffer into block: %w", err)
-	}
-
-	return tx, nil
-}
-
 func (t *Table) View(ctx context.Context, fn func(ctx context.Context, tx uint64) error) error {
 	ctx, span := t.tracer.Start(ctx, "Table/View")
 	tx := t.db.beginRead()
@@ -712,20 +736,6 @@ func (t *Table) View(ctx context.Context, fn func(ctx context.Context, tx uint64
 	return fn(ctx, tx)
 }
 
-// prepareForFlush sets the nullability on a Record column if the type is a
-// ListType.
-// TODO: Is this a bug in arrow? We already set the nullability in
-// parquetColumnToArrowArray, but it doesn't appear to transfer into the
-// resulting array's type. Needs to be investigated.
-func prepareForFlush(r arrow.Record, schema *parquet.Schema) {
-	for i, c := range r.Columns() {
-		switch t := c.DataType().(type) {
-		case *arrow.ListType:
-			t.SetElemNullable(schema.Fields()[i].Optional())
-		}
-	}
-}
-
 // Iterator iterates in order over all granules in the table. It stops iterating when the iterator function returns false.
 func (t *Table) Iterator(
 	ctx context.Context,
@@ -748,6 +758,16 @@ func (t *Table) Iterator(
 		return errors.New("no callbacks provided")
 	}
 	rowGroups := make(chan any, len(callbacks)*4) // buffer up to 4 row groups per callback
+	defer func() {                                // Drain the channel of any leftover parts due to cancellation or error
+		for rg := range rowGroups {
+			switch t := rg.(type) {
+			case index.ReleaseableRowGroup:
+				t.Release()
+			case arrow.Record:
+				t.Release()
+			}
+		}
+	}()
 
 	// Previously we sorted all row groups into a single row group here,
 	// but it turns out that none of the downstream uses actually rely on
@@ -762,12 +782,10 @@ func (t *Table) Iterator(
 	errg, ctx := errgroup.WithContext(ctx)
 	for _, callback := range callbacks {
 		callback := callback
-		errg.Go(func() error {
+		errg.Go(recovery.Do(func() error {
 			converter := pqarrow.NewParquetConverter(pool, *iterOpts)
 			defer converter.Close()
 
-			var rgSchema *parquet.Schema
-
 			for {
 				select {
 				case <-ctx.Done():
@@ -775,27 +793,47 @@ func (t *Table) Iterator(
 				case rg, ok := <-rowGroups:
 					if !ok {
 						r := converter.NewRecord()
-						if r == nil || r.NumRows() == 0 {
+						if r == nil {
 							return nil
 						}
-						prepareForFlush(r, rgSchema)
-						if err := callback(ctx, r); err != nil {
-							return err
+						defer r.Release()
+						if r.NumRows() == 0 {
+							return nil
 						}
-						r.Release()
-						return nil
+						return callback(ctx, r)
 					}
 
-					switch t := rg.(type) {
+					switch rg := rg.(type) {
 					case arrow.Record:
-						err := callback(ctx, t)
-						t.Release()
+						defer rg.Release()
+						r := pqarrow.Project(rg, iterOpts.PhysicalProjection)
+						defer r.Release()
+						err := callback(ctx, r)
 						if err != nil {
 							return err
 						}
+					case index.ReleaseableRowGroup:
+						defer rg.Release()
+						if err := converter.Convert(ctx, rg, t.schema); err != nil {
+							return fmt.Errorf("failed to convert row group to arrow record: %v", err)
+						}
+						// This RowGroup had no relevant data. Ignore it.
+						if len(converter.Fields()) == 0 {
+							continue
+						}
+						if converter.NumRows() >= bufferSize {
+							err := func() error {
+								r := converter.NewRecord()
+								defer r.Release()
+								converter.Reset() // Reset the converter to drop any dictionaries that were built.
+								return callback(ctx, r)
+							}()
+							if err != nil {
+								return err
+							}
+						}
 					case dynparquet.DynamicRowGroup:
-						rgSchema = t.Schema()
-						if err := converter.Convert(ctx, t); err != nil {
+						if err := converter.Convert(ctx, rg, t.schema); err != nil {
 							return fmt.Errorf("failed to convert row group to arrow record: %v", err)
 						}
 						// This RowGroup had no relevant data. Ignore it.
@@ -803,27 +841,27 @@ func (t *Table) Iterator(
 							continue
 						}
 						if converter.NumRows() >= bufferSize {
-							r := converter.NewRecord()
-							prepareForFlush(r, rgSchema)
-							if err := callback(ctx, r); err != nil {
+							err := func() error {
+								r := converter.NewRecord()
+								defer r.Release()
+								converter.Reset() // Reset the converter to drop any dictionaries that were built.
+								return callback(ctx, r)
+							}()
+							if err != nil {
 								return err
 							}
-							r.Release()
 						}
 					default:
 						return fmt.Errorf("unknown row group type: %T", t)
 					}
 				}
 			}
-		})
+		}, t.logger))
 	}
 
 	errg.Go(func() error {
-		if err := t.collectRowGroups(ctx, tx, iterOpts.Filter, rowGroups); err != nil {
-			return err
-		}
-		close(rowGroups)
-		return nil
+		defer close(rowGroups)
+		return t.collectRowGroups(ctx, tx, iterOpts.Filter, iterOpts.ReadMode, rowGroups)
 	})
 
 	return errg.Wait()
@@ -853,6 +891,16 @@ func (t *Table) SchemaIterator(
 	}
 
 	rowGroups := make(chan any, len(callbacks)*4) // buffer up to 4 row groups per callback
+	defer func() {                                // Drain the channel of any leftover parts due to cancellation or error
+		for rg := range rowGroups {
+			switch t := rg.(type) {
+			case index.ReleaseableRowGroup:
+				t.Release()
+			case arrow.Record:
+				t.Release()
+			}
+		}
+	}()
 
 	schema := arrow.NewSchema(
 		[]arrow.Field{
@@ -864,7 +912,7 @@ func (t *Table) SchemaIterator(
 	errg, ctx := errgroup.WithContext(ctx)
 	for _, callback := range callbacks {
 		callback := callback
-		errg.Go(func() error {
+		errg.Go(recovery.Do(func() error {
 			for {
 				select {
 				case <-ctx.Done():
@@ -878,8 +926,8 @@ func (t *Table) SchemaIterator(
 
 					switch t := rg.(type) {
 					case arrow.Record:
-						for _, f := range t.Schema().Fields() {
-							b.Field(0).(*array.StringBuilder).Append(f.Name)
+						for i := 0; i < t.Schema().NumFields(); i++ {
+							b.Field(0).(*array.StringBuilder).Append(t.Schema().Field(i).Name)
 						}
 						record := b.NewRecord()
 						err := callback(ctx, record)
@@ -888,10 +936,11 @@ func (t *Table) SchemaIterator(
 						if err != nil {
 							return err
 						}
-					case dynparquet.DynamicRowGroup:
+					case index.ReleaseableRowGroup:
 						if rg == nil {
 							return errors.New("received nil rowGroup") // shouldn't happen, but anyway
 						}
+						defer t.Release()
 						parquetFields := t.Schema().Fields()
 						fieldNames := make([]string, 0, len(parquetFields))
 						for _, f := range parquetFields {
@@ -906,16 +955,34 @@ func (t *Table) SchemaIterator(
 						}
 						record.Release()
 						b.Release()
-					default:
+					case dynparquet.DynamicRowGroup:
+						if rg == nil {
+							return errors.New("received nil rowGroup") // shouldn't happen, but anyway
+						}
+						parquetFields := t.Schema().Fields()
+						fieldNames := make([]string, 0, len(parquetFields))
+						for _, f := range parquetFields {
+							fieldNames = append(fieldNames, f.Name())
+						}
+
+						b.Field(0).(*array.StringBuilder).AppendValues(fieldNames, nil)
+
+						record := b.NewRecord()
+						if err := callback(ctx, record); err != nil {
+							return err
+						}
+						record.Release()
+						b.Release()
+					default:
 						return fmt.Errorf("unknown row group type: %T", t)
 					}
 				}
 			}
-		})
+		}, t.logger))
 	}
 
 	errg.Go(func() error {
-		if err := t.collectRowGroups(ctx, tx, iterOpts.Filter, rowGroups); err != nil {
+		if err := t.collectRowGroups(ctx, tx, iterOpts.Filter, iterOpts.ReadMode, rowGroups); err != nil {
 			return err
 		}
 		close(rowGroups)
@@ -932,31 +999,43 @@ func generateULID() ulid.ULID {
 }
 
 func newTableBlock(table *Table, prevTx, tx uint64, id ulid.ULID) (*TableBlock, error) {
-	index := atomic.Pointer[btree.BTree]{}
-	index.Store(btree.New(table.db.columnStore.indexDegree))
-
-	return &TableBlock{
+	tb := &TableBlock{
 		table:  table,
-		index:  &index,
 		mtx:    &sync.RWMutex{},
 		ulid:   id,
-		size:   &atomic.Int64{},
 		logger: table.logger,
 		tracer: table.tracer,
 		minTx:  tx,
 		prevTx: prevTx,
-	}, nil
+	}
+
+	var err error
+	tb.index, err = index.NewLSM(
+		filepath.Join(table.db.indexDir(), table.name, id.String()), // Any index files are found at <db.indexDir>/<table.name>/<block.id>
+		table.schema,
+		table.IndexConfig(),
+		table.db.HighWatermark,
+		index.LSMWithMetrics(&table.metrics.indexMetrics),
+		index.LSMWithLogger(table.logger),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("new LSM: %w", err)
+	}
+
+	return tb, nil
 }
 
 // EnsureCompaction forces a TableBlock compaction.
 func (t *TableBlock) EnsureCompaction() error {
-	return t.compact(t.table.db.columnStore.compactionConfig)
+	return t.index.EnsureCompaction()
 }
 
-func (t *TableBlock) InsertRecord(ctx context.Context, tx uint64, record arrow.Record) error {
+func (t *TableBlock) InsertRecord(_ context.Context, tx uint64, record arrow.Record) error {
+	recordSize := util.TotalRecordSize(record)
 	defer func() {
 		t.table.metrics.rowsInserted.Add(float64(record.NumRows()))
 		t.table.metrics.rowInsertSize.Observe(float64(record.NumRows()))
+		t.table.metrics.rowBytesInserted.Add(float64(recordSize))
 	}()
 
 	if record.NumRows() == 0 {
@@ -964,566 +1043,488 @@ func (t *TableBlock) InsertRecord(ctx context.Context, tx uint64, record arrow.R
 		return nil
 	}
 
-	if err := t.insertRecordToGranules(tx, record); err != nil {
-		return fmt.Errorf("failed to insert record into granules: %w", err)
-	}
-
+	t.index.Add(tx, record)
+	t.table.metrics.numParts.Inc()
+	t.uncompressedInsertsSize.Add(recordSize)
 	return nil
 }
 
-func (t *TableBlock) Insert(ctx context.Context, tx uint64, buf *dynparquet.SerializedBuffer) error {
-	defer func() {
-		t.table.metrics.rowsInserted.Add(float64(buf.NumRows()))
-		t.table.metrics.rowInsertSize.Observe(float64(buf.NumRows()))
-	}()
+// Size returns the cumulative size of all buffers in the table. This is roughly the size of the table in bytes.
+func (t *TableBlock) Size() int64 {
+	return t.index.Size()
+}
 
-	numRows := buf.NumRows()
-	if numRows == 0 {
-		t.table.metrics.zeroRowsInserted.Add(float64(buf.NumRows()))
-		return nil
-	}
+// Index provides atomic access to the table index.
+func (t *TableBlock) Index() *index.LSM {
+	return t.index
+}
 
-	var dynRows *dynparquet.DynamicRows
-	{
-		rowBuf := make([]parquet.Row, numRows)
-		rows := buf.Reader()
-		defer rows.Close()
+// Serialize the table block into a single Parquet file.
+func (t *TableBlock) Serialize(writer io.Writer) error {
+	return t.index.Rotate(t.table.externalParquetCompaction(writer))
+}
 
-		// TODO(asubiotto): Add utility method to read all rows.
-		n := 0
-		for int64(n) < numRows {
-			readN, err := rows.ReadRows(rowBuf[n:])
-			for i := n; i < n+readN; i++ {
-				rowBuf[i] = rowBuf[i].Clone()
-			}
-			n += readN
-			if err != nil {
-				if errors.Is(err, io.EOF) {
-					break
-				}
-				return err
-			}
-		}
+type ParquetWriter interface {
+	Flush() error
+	WriteRows([]parquet.Row) (int, error)
+	io.Closer
+}
 
-		dynRows = dynparquet.NewDynamicRows(
-			rowBuf,
-			buf.ParquetFile().Schema(),
-			buf.DynamicColumns(),
-			buf.ParquetFile().Schema().Fields(),
-		)
-
-		if !dynRows.IsSorted(t.table.config.schema) {
-			// Input rows should be sorted. Eventually, we should return an error.
-			// However, for caution, we just increment a metric and sort the rows.
-			t.table.metrics.unsortedInserts.Inc()
-			sorter := dynparquet.NewDynamicRowSorter(t.table.config.schema, dynRows)
-			sort.Sort(sorter)
-		}
-	}
+// parquetRowWriter is a stateful parquet row group writer.
+type parquetRowWriter struct {
+	schema *dynparquet.Schema
+	w      ParquetWriter
 
-	rowsToInsertPerGranule, err := t.splitRowsByGranule(dynRows)
-	if err != nil {
-		return fmt.Errorf("failed to split rows by granule: %w", err)
-	}
+	rowGroupSize int
+	maxNumRows   int
 
-	b := bytes.NewBuffer(nil)
-	w, err := t.table.config.schema.GetWriter(b, buf.DynamicColumns())
-	if err != nil {
-		return fmt.Errorf("failed to get writer: %w", err)
-	}
-	defer t.table.config.schema.PutWriter(w)
-
-	list := make([]*parts.Part, 0)
-	for granule, indices := range rowsToInsertPerGranule {
-		select {
-		case <-ctx.Done():
-			parts.Tombstone(list)
-			return ctx.Err()
-		default:
-
-			for idx := range dynRows.Rows {
-				// Check if this index belongs in this granule
-				if _, ok := indices[idx]; !ok {
-					continue
-				}
+	rowGroupRowsWritten int
+	totalRowsWritten    int
+	rowsBuf             []parquet.Row
+}
 
-				if _, err = w.WriteRows(dynRows.Rows[idx : idx+1]); err != nil {
-					return fmt.Errorf("failed to write rows: %w", err)
-				}
-			}
-			if err := w.Close(); err != nil {
-				return fmt.Errorf("failed to close writer: %w", err)
-			}
+type parquetRowWriterOption func(p *parquetRowWriter)
 
-			serBuf, err := dynparquet.ReaderFromBytes(b.Bytes())
-			if err != nil {
-				return fmt.Errorf("failed to get reader from bytes: %w", err)
-			}
+// rowWriter returns a new Parquet row writer with the given dynamic columns.
+// TODO(asubiotto): Can we delete this parquetRowWriter?
+func (t *TableBlock) rowWriter(w ParquetWriter, options ...parquetRowWriterOption) (*parquetRowWriter, error) {
+	buffSize := 256
+	config := t.table.config.Load()
+	if config.RowGroupSize > 0 {
+		buffSize = int(config.RowGroupSize)
+	}
 
-			part := parts.NewPart(tx, serBuf)
-			if granule == nil { // insert new granule with part
-				g, err := NewGranule(t.table.config, part)
-				if err != nil {
-					return fmt.Errorf("failed to create granule: %w", err)
-				}
-				t.table.metrics.granulesCreated.Inc()
-
-				for {
-					old := t.Index()
-					t.mtx.Lock()
-					newIndex := old.Clone()
-					t.mtx.Unlock()
-
-					newIndex.ReplaceOrInsert(g)
-					if t.index.CompareAndSwap(old, newIndex) {
-						t.table.metrics.numParts.Add(float64(1))
-						break
-					}
-				}
-			} else {
-				if _, err := granule.Append(part); err != nil {
-					return fmt.Errorf("failed to add part to granule: %w", err)
-				}
-			}
-			list = append(list, part)
-			t.size.Add(serBuf.ParquetFile().Size())
+	p := &parquetRowWriter{
+		w:            w,
+		schema:       t.table.schema,
+		rowsBuf:      make([]parquet.Row, buffSize),
+		rowGroupSize: int(config.RowGroupSize),
+	}
 
-			b = bytes.NewBuffer(nil)
-			w.Reset(b)
-		}
+	for _, option := range options {
+		option(p)
 	}
 
-	t.table.metrics.numParts.Add(float64(len(list)))
-	return nil
+	return p, nil
 }
 
-// RowGroupIterator iterates in order over all granules in the table.
-// It stops iterating when the iterator function returns false.
-func (t *TableBlock) RowGroupIterator(
-	ctx context.Context,
-	tx uint64,
-	filter TrueNegativeFilter,
-	rowGroups chan<- any,
-) error {
-	ctx, span := t.tracer.Start(ctx, "TableBlock/RowGroupIterator")
-	span.SetAttributes(attribute.Int64("tx", int64(tx))) // Attributes don't support uint64...
-	defer span.End()
-
-	index := t.Index()
+// WriteRows will write the given rows to the underlying Parquet writer. It returns the number of rows written.
+func (p *parquetRowWriter) writeRows(rows parquet.RowReader) (int, error) {
+	written := 0
+	for p.maxNumRows == 0 || p.totalRowsWritten < p.maxNumRows {
+		if p.rowGroupSize > 0 && p.rowGroupRowsWritten+len(p.rowsBuf) > p.rowGroupSize {
+			// Read only as many rows as we need to complete the row group size limit.
+			p.rowsBuf = p.rowsBuf[:p.rowGroupSize-p.rowGroupRowsWritten]
+		}
+		if p.maxNumRows != 0 && p.totalRowsWritten+len(p.rowsBuf) > p.maxNumRows {
+			// Read only as many rows as we need to write if they would bring
+			// us over the limit.
+			p.rowsBuf = p.rowsBuf[:p.maxNumRows-p.totalRowsWritten]
+		}
+		n, err := rows.ReadRows(p.rowsBuf)
+		if err != nil && err != io.EOF {
+			return 0, err
+		}
+		if n == 0 {
+			break
+		}
 
-	var err error
-	index.Ascend(func(i btree.Item) bool {
-		g := i.(*Granule)
-		g.Collect(ctx, tx, filter, rowGroups)
-		return true
-	})
+		if _, err = p.w.WriteRows(p.rowsBuf[:n]); err != nil {
+			return 0, err
+		}
+		written += n
+		p.rowGroupRowsWritten += n
+		p.totalRowsWritten += n
+		if p.rowGroupSize > 0 && p.rowGroupRowsWritten >= p.rowGroupSize {
+			if err := p.w.Flush(); err != nil {
+				return 0, err
+			}
+			p.rowGroupRowsWritten = 0
+		}
+	}
 
-	return err
+	return written, nil
 }
 
-// Size returns the cumulative size of all buffers in the table. This is roughly the size of the table in bytes.
-func (t *TableBlock) Size() int64 {
-	return t.size.Load()
+func (p *parquetRowWriter) close() error {
+	return p.w.Close()
 }
 
-// Index provides atomic access to the table index.
-func (t *TableBlock) Index() *btree.BTree {
-	return (*btree.BTree)(t.index.Load())
-}
+// memoryBlocks collects the active and pending blocks that are currently resident in memory.
+// The pendingReadersWg.Done() function must be called on all blocks returned once processing is finished.
+func (t *Table) memoryBlocks() ([]*TableBlock, uint64) {
+	t.mtx.RLock()
+	defer t.mtx.RUnlock()
 
-func (t *TableBlock) insertRecordToGranules(tx uint64, record arrow.Record) error {
-	ps := t.table.config.schema
-	ri := int64(0)
-	row, err := pqarrow.RecordToDynamicRow(ps, record, int(ri))
-	if err != nil {
-		if err == io.EOF {
-			level.Debug(t.logger).Log("msg", "inserted record with no rows")
-			return nil
+	if t.active == nil { // this is currently a read only table
+		return nil, 0
+	}
+
+	lastReadBlockTimestamp := t.active.ulid.Time()
+	t.active.pendingReadersWg.Add(1)
+	memoryBlocks := []*TableBlock{t.active}
+	for block := range t.pendingBlocks {
+		block.pendingReadersWg.Add(1)
+		memoryBlocks = append(memoryBlocks, block)
+
+		if block.ulid.Time() < lastReadBlockTimestamp {
+			lastReadBlockTimestamp = block.ulid.Time()
 		}
-		return err
 	}
 
-	var prev *Granule
-	var ascendErr error
-	index := t.Index()
-	index.Ascend(func(i btree.Item) bool {
-		g := i.(*Granule)
+	return memoryBlocks, lastReadBlockTimestamp
+}
 
-		for {
-			if t.table.config.schema.RowLessThan(row, g.Least()) {
-				if prev != nil {
-					if _, err := prev.Append(parts.NewArrowPart(tx, record.NewSlice(ri, ri+1), t.table.config.schema)); err != nil {
-						ascendErr = err
-						return false
-					}
-					ri++
-					t.table.metrics.numParts.Add(float64(1))
+// collectRowGroups collects all the row groups from the table for the given filter.
+func (t *Table) collectRowGroups(
+	ctx context.Context,
+	tx uint64,
+	filterExpr logicalplan.Expr,
+	readMode logicalplan.ReadMode,
+	rowGroups chan<- any,
+) error {
+	ctx, span := t.tracer.Start(ctx, "Table/collectRowGroups")
+	defer span.End()
 
-					row, err = pqarrow.RecordToDynamicRow(ps, record, int(ri))
-					if err != nil {
-						ascendErr = err
-						return false
+	// pending blocks could be uploaded to the bucket while we iterate on them.
+	// to avoid to iterate on them again while reading the block file
+	// we keep the last block timestamp to be read from the bucket and pass it to the IterateBucketBlocks() function
+	// so that every block with a timestamp >= lastReadBlockTimestamp is discarded while being read.
+	var lastBlockTimestamp uint64
+	if readMode != logicalplan.ReadModeDataSourcesOnly {
+		memoryBlocks, lbt := t.memoryBlocks()
+		lastBlockTimestamp = lbt
+		defer func() {
+			for _, block := range memoryBlocks {
+				block.pendingReadersWg.Done()
+			}
+		}()
+		for _, block := range memoryBlocks {
+			if err := block.index.Scan(ctx, "", t.schema, filterExpr, tx, func(ctx context.Context, v any) error {
+				select {
+				case <-ctx.Done():
+					if rg, ok := v.(index.ReleaseableRowGroup); ok {
+						rg.Release()
 					}
-
-					continue
+					return ctx.Err()
+				case rowGroups <- v:
+					return nil
 				}
-				return true
+			}); err != nil {
+				return err
 			}
-
-			// stop at the first granule where this is not the least
-			// this might be the correct granule, but we need to check that it isn't the next granule
-			prev = g
-			return true // continue btree iteration
-		}
-	})
-	if ascendErr != nil {
-		if ascendErr == io.EOF {
-			return nil
 		}
-		return ascendErr
 	}
 
-	if prev == nil { // No suitable granule was found; insert new granule
-		g, err := NewGranule(t.table.config, parts.NewArrowPart(tx, record.NewSlice(ri, record.NumRows()), t.table.config.schema))
-		if err != nil {
-			return fmt.Errorf("new granule failed: %w", err)
-		}
-		t.table.metrics.granulesCreated.Inc()
-
-		for {
-			t.mtx.Lock()
-			newIdx := index.Clone() // NOTE: this needs to be an index swap to avoid losing the new granule during a compaction
-			t.mtx.Unlock()
-
-			newIdx.ReplaceOrInsert(g)
+	if readMode == logicalplan.ReadModeInMemoryOnly {
+		return nil
+	}
 
-			if t.index.CompareAndSwap(index, newIdx) {
-				t.table.metrics.numParts.Add(float64(1))
+	// Collect from all other data sources.
+	for _, source := range t.db.sources {
+		span.AddEvent(fmt.Sprintf("source/%s", source.String()))
+		if err := source.Scan(ctx, filepath.Join(t.db.name, t.name), t.schema, filterExpr, lastBlockTimestamp, func(ctx context.Context, v any) error {
+			select {
+			case <-ctx.Done():
+				if rg, ok := v.(index.ReleaseableRowGroup); ok {
+					rg.Release()
+				}
+				return ctx.Err()
+			case rowGroups <- v:
 				return nil
 			}
-
-			index = t.Index()
+		}); err != nil {
+			return err
 		}
 	}
 
-	// Append to the last valid granule
-	if _, err := prev.Append(parts.NewArrowPart(tx, record.NewSlice(ri, record.NumRows()), t.table.config.schema)); err != nil && err != io.EOF {
-		return err
-	}
-	t.table.metrics.numParts.Add(float64(1))
 	return nil
 }
 
-type btreeComparableDynamicRow struct {
-	schema *dynparquet.Schema
-	*dynparquet.DynamicRow
-}
+// close notifies a table to stop accepting writes.
+func (t *Table) close() {
+	t.mtx.Lock()
+	defer t.mtx.Unlock()
 
-func (r btreeComparableDynamicRow) Less(than btree.Item) bool {
-	return r.schema.RowLessThan(r.DynamicRow, than.(*Granule).Least())
+	t.active.pendingWritersWg.Wait()
+	t.closing = true
+	t.active.index.WaitForPendingCompactions()
 }
 
-func (t *TableBlock) splitRowsByGranule(parquetRows *dynparquet.DynamicRows) (map[*Granule]map[int]struct{}, error) {
-	index := t.Index()
-	if index.Len() == 0 {
-		rows := map[int]struct{}{}
-		for i := 0; i < len(parquetRows.Rows); i++ {
-			rows[i] = struct{}{}
+func (t *Table) externalParquetCompaction(writer io.Writer) func(compact []parts.Part) (parts.Part, int64, int64, error) {
+	return func(compact []parts.Part) (parts.Part, int64, int64, error) {
+		size, err := t.compactParts(writer, compact)
+		if err != nil {
+			return nil, 0, 0, err
 		}
 
-		return map[*Granule]map[int]struct{}{
-			nil: rows, // NOTE: nil pointer to a granule indicates a new granule must be greated for insertion
-		}, nil
+		return nil, size, 0, nil
 	}
+}
 
-	var (
-		rowsByGranule = map[*Granule]map[int]struct{}{}
-		idx           = len(parquetRows.Rows) - 1
-	)
-
-	// Imagine our index looks like (in sorted order):
-	// [a, c) [c, h) [h, inf)
-	// Note that the "end" range bounds are implicit and defined by the least
-	// row of the next granule.
-	// If we insert 2 rows, [d, k], the DescendLessOrEqual call will return
-	// granule [h, inf) as a starting point for our descent. The rows to insert
-	// are iterated in reverse order until a row is found that does not belong
-	// to the current granule (e.g. d, since it's less than h). At this point,
-	// the iteration is continued to granule [c, h) into which d is inserted.
-	index.DescendLessOrEqual(
-		btreeComparableDynamicRow{
-			schema:     t.table.config.schema,
-			DynamicRow: parquetRows.Get(idx),
-		},
-		func(i btree.Item) bool {
-			g := i.(*Granule)
-			// Descend the rows to insert until we find a row that does not belong
-			// in this granule.
-			for ; idx >= 0; idx-- {
-				if t.table.config.schema.RowLessThan(parquetRows.Get(idx), g.Least()) {
-					// Go on to the next granule.
-					return true
-				}
-				if _, ok := rowsByGranule[g]; !ok {
-					rowsByGranule[g] = map[int]struct{}{}
-				}
-				rowsByGranule[g][idx] = struct{}{}
-			}
-			// If we got here, all rows were exhausted in the loop above.
-			return false
-		})
-
-	if idx < 0 {
-		// All rows exhausted.
-		return rowsByGranule, nil
+// compactParts will compact the given parts into a Parquet file written to w.
+// It returns the size in bytes of the compacted parts.
+func (t *Table) compactParts(w io.Writer, compact []parts.Part, options ...parquet.WriterOption) (int64, error) {
+	if len(compact) == 0 {
+		return 0, nil
 	}
 
-	if _, ok := rowsByGranule[nil]; ok {
-		return nil, errors.New(
-			"unexpectedly found rows that do not belong to any granule before exhausting search",
-		)
+	preCompactionSize := int64(0)
+	for _, p := range compact {
+		preCompactionSize += p.Size()
 	}
 
-	// Add remaining rows to a new granule.
-	rowsByGranule[nil] = map[int]struct{}{}
-	for ; idx >= 0; idx-- {
-		rowsByGranule[nil][idx] = struct{}{}
+	if t.schema.UniquePrimaryIndex {
+		distinctRecords, err := t.distinctRecordsForCompaction(compact)
+		if err != nil {
+			return 0, err
+		}
+		if distinctRecords != nil {
+			// Arrow distinction was successful.
+			defer func() {
+				for _, r := range distinctRecords {
+					r.Release()
+				}
+			}()
+			// Note that the records must be sorted (sortInput=true) because
+			// there is no guarantee that order is maintained.
+			return preCompactionSize, t.writeRecordsToParquet(w, distinctRecords, true, options...)
+		}
 	}
 
-	return rowsByGranule, nil
-}
-
-// addPartToGranule finds the corresponding granule it belongs to in a sorted list of Granules.
-func addPartToGranule(granules []*Granule, p *parts.Part) error {
-	row, err := p.Least()
+	bufs, err := t.buffersForCompaction(w, compact, options...)
 	if err != nil {
-		return err
+		return 0, err
+	}
+	if bufs == nil {
+		// Optimization.
+		return preCompactionSize, nil
 	}
 
-	var prev *Granule
-	for _, g := range granules {
-		if g.tableConfig.schema.RowLessThan(row, g.Least()) {
-			if prev != nil {
-				if _, err := prev.Append(p); err != nil {
-					return err
-				}
-				return nil
+	merged, err := t.schema.MergeDynamicRowGroups(bufs)
+	if err != nil {
+		return 0, err
+	}
+	err = func() error {
+		var writer dynparquet.ParquetWriter
+		if len(options) > 0 {
+			writer, err = t.schema.NewWriter(w, merged.DynamicColumns(), false, options...)
+			if err != nil {
+				return err
 			}
 		} else {
-			prev = g
+			pw, err := t.schema.GetWriter(w, merged.DynamicColumns(), false)
+			if err != nil {
+				return err
+			}
+			defer t.schema.PutWriter(pw)
+			writer = pw.ParquetWriter
 		}
-	}
-
-	if prev != nil {
-		// Save part to prev
-		if _, err := prev.Append(p); err != nil {
+		p, err := t.active.rowWriter(writer)
+		if err != nil {
 			return err
 		}
-	} else {
-		// NOTE: this should never happen
-		panic("programming error; unable to find granule for part")
-	}
+		defer p.close()
 
-	return nil
-}
+		rows := merged.Rows()
+		defer rows.Close()
 
-// abortCompaction resets state set on compaction so that a granule may be
-// compacted again.
-func (t *TableBlock) abortCompaction(granule *Granule) {
-	t.table.metrics.granulesCompactionAborted.Inc()
-	for {
-		// Unmark pruned, so that we can compact the granule in the future.
-		if granule.metadata.pruned.CompareAndSwap(1, 0) {
-			return
+		var rowReader parquet.RowReader = rows
+		if t.schema.UniquePrimaryIndex {
+			// Given all inputs are sorted, we can deduplicate the rows using
+			// DedupeRowReader, which deduplicates consecutive rows that are
+			// equal on the sorting columns.
+			rowReader = parquet.DedupeRowReader(rows, merged.Schema().Comparator(merged.SortingColumns()...))
 		}
-	}
-}
-
-func (t *TableBlock) Serialize(writer io.Writer) error {
-	ctx := context.Background()
-
-	// Read all row groups
-	rowGroups := []dynparquet.DynamicRowGroup{}
-
-	rowGroupsChan := make(chan any)
-
-	errg := &errgroup.Group{}
-	errg.Go(func() error {
-		for rg := range rowGroupsChan {
-			switch p := rg.(type) {
-			case arrow.Record:
-				b := &bytes.Buffer{}
-				w, err := t.table.config.schema.GetWriter(b, pqarrow.RecordDynamicCols(p))
-				if err != nil {
-					return err
-				}
-
-				if err := pqarrow.RecordToFile(t.table.config.schema, w.ParquetWriter(), p); err != nil {
-					return err
-				}
-
-				f, err := parquet.OpenFile(bytes.NewReader(b.Bytes()), int64(b.Len()))
-				if err != nil {
-					return err
-				}
-
-				buf, err := dynparquet.NewSerializedBuffer(f)
-				if err != nil {
-					return err
-				}
 
-				rowGroups = append(rowGroups, buf.MultiDynamicRowGroup())
-
-			case dynparquet.DynamicRowGroup:
-				rowGroups = append(rowGroups, p)
-			default:
-				return fmt.Errorf("unknown part type: %T", p)
-			}
+		if _, err := p.writeRows(rowReader); err != nil {
+			return err
 		}
-		return nil
-	})
 
-	// Collect all the row groups just to determine the dynamic cols
-	err := t.RowGroupIterator(ctx, math.MaxUint64, &AlwaysTrueFilter{}, rowGroupsChan)
+		return nil
+	}()
 	if err != nil {
-		return err
-	}
-
-	close(rowGroupsChan)
-	if err := errg.Wait(); err != nil {
-		return err
+		return 0, err
 	}
 
-	// Iterate over all the row groups, and write them to storage
-	return t.writeRowGroups(writer, rowGroups)
+	return preCompactionSize, nil
 }
 
-// writeRowGroups writes a set of dynamic row groups to a writer.
-func (t *TableBlock) writeRowGroups(writer io.Writer, rowGroups []dynparquet.DynamicRowGroup) error {
-	merged, err := t.table.config.schema.MergeDynamicRowGroups(rowGroups)
+// buffersForCompaction, given a slice of possibly overlapping parts, returns
+// the minimum slice of dynamic row groups to be merged together for compaction.
+// If nil, nil is returned, the resulting serialized buffer is written directly
+// to w as an optimization.
+func (t *Table) buffersForCompaction(w io.Writer, inputParts []parts.Part, options ...parquet.WriterOption) ([]dynparquet.DynamicRowGroup, error) {
+	nonOverlappingParts, overlappingParts, err := parts.FindMaximumNonOverlappingSet(t.schema, inputParts)
 	if err != nil {
-		return err
+		return nil, err
 	}
-
-	cols := merged.DynamicColumns()
-	rows := merged.Rows()
-	defer rows.Close()
-
-	_, err = t.writeRows(writer, rows, cols, 0)
-	return err
-}
-
-// writeRows writes the given rows to a writer. Up to maxNumRows will be
-// written. If 0, all rows will be written. The number of rows written is
-// returned.
-func (t *TableBlock) writeRows(
-	writer io.Writer, rows parquet.Rows, dynCols map[string][]string, maxNumRows int,
-) (int, error) {
-	w, err := t.table.config.schema.GetWriter(writer, dynCols)
-	if err != nil {
-		return 0, err
+	result := make([]dynparquet.DynamicRowGroup, 0, len(inputParts))
+	for _, p := range overlappingParts {
+		buf, err := p.AsSerializedBuffer(t.schema)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, buf.MultiDynamicRowGroup())
 	}
-	defer t.table.config.schema.PutWriter(w)
-	defer w.Close()
-
-	buffSize := 256
-	if t.table.config.rowGroupSize > 0 {
-		buffSize = t.table.config.rowGroupSize
+	if len(nonOverlappingParts) == 0 {
+		return result, nil
 	}
 
-	rowGroupRowsWritten := 0
-	totalRowsWritten := 0
-	for maxNumRows == 0 || totalRowsWritten < maxNumRows {
-		rowsBuf := make([]parquet.Row, buffSize)
-		if maxNumRows != 0 && totalRowsWritten+len(rowsBuf) > maxNumRows {
-			// Read only as many rows as we need to write if they would bring
-			// us over the limit.
-			rowsBuf = rowsBuf[:maxNumRows-totalRowsWritten]
-		}
-		n, err := rows.ReadRows(rowsBuf)
-		if err != nil && err != io.EOF {
-			return 0, err
-		}
-		if n == 0 {
+	allArrow := true
+	for _, p := range nonOverlappingParts {
+		if p.Record() == nil {
+			allArrow = false
 			break
 		}
-
-		if _, err = w.WriteRows(rowsBuf[:n]); err != nil {
-			return 0, err
+	}
+	if len(nonOverlappingParts) == 1 || !allArrow {
+		// Not worth doing anything if only one part does not overlap. If there
+		// is at least one non-arrow part then optimizations cannot be made.
+		nonOverlappingRowGroups := make([]dynparquet.DynamicRowGroup, 0, len(nonOverlappingParts))
+		for _, p := range nonOverlappingParts {
+			buf, err := p.AsSerializedBuffer(t.schema)
+			if err != nil {
+				return nil, err
+			}
+			nonOverlappingRowGroups = append(nonOverlappingRowGroups, buf.MultiDynamicRowGroup())
 		}
-		rowGroupRowsWritten += n
-		totalRowsWritten += n
-		if t.table.config.rowGroupSize > 0 && rowGroupRowsWritten >= t.table.config.rowGroupSize {
-			if err := w.Flush(); err != nil {
-				return 0, err
+		merged := nonOverlappingRowGroups[0]
+		if len(nonOverlappingRowGroups) > 1 {
+			// WithAlreadySorted ensures that a parquet.MultiRowGroup is created
+			// here, which is much cheaper than actually merging all these row
+			// groups.
+			merged, err = t.schema.MergeDynamicRowGroups(nonOverlappingRowGroups, dynparquet.WithAlreadySorted())
+			if err != nil {
+				return nil, err
 			}
-			rowGroupRowsWritten = 0
 		}
+		result = append(result, merged)
+		return result, nil
 	}
 
-	return totalRowsWritten, nil
-}
-
-func (t *Table) memoryBlocks() ([]*TableBlock, uint64) {
-	t.mtx.RLock()
-	defer t.mtx.RUnlock()
-
-	if t.active == nil { // this is currently a read only table
-		return nil, 0
+	// All the non-overlapping parts are arrow records, and can therefore be
+	// directly written to a parquet file. If there are no overlapping parts,
+	// write directly to w.
+	var b bytes.Buffer
+	if len(overlappingParts) > 0 {
+		w = &b
 	}
 
-	lastReadBlockTimestamp := t.active.ulid.Time()
-	memoryBlocks := []*TableBlock{t.active}
-	for block := range t.pendingBlocks {
-		memoryBlocks = append(memoryBlocks, block)
-
-		if block.ulid.Time() < lastReadBlockTimestamp {
-			lastReadBlockTimestamp = block.ulid.Time()
-		}
+	records := make([]arrow.Record, 0, len(nonOverlappingParts))
+	for _, p := range nonOverlappingParts {
+		records = append(records, p.Record())
 	}
 
-	return memoryBlocks, lastReadBlockTimestamp
-}
+	if err := t.writeRecordsToParquet(w, records, false, options...); err != nil {
+		return nil, err
+	}
 
-// collectRowGroups collects all the row groups from the table for the given filter.
-func (t *Table) collectRowGroups(
-	ctx context.Context,
-	tx uint64,
-	filterExpr logicalplan.Expr,
-	rowGroups chan<- any,
-) error {
-	ctx, span := t.tracer.Start(ctx, "Table/collectRowGroups")
-	defer span.End()
+	if len(overlappingParts) == 0 {
+		// Result was written directly to w.
+		return nil, nil
+	}
 
-	filter, err := BooleanExpr(filterExpr)
+	buf, err := dynparquet.ReaderFromBytes(b.Bytes())
 	if err != nil {
-		return err
+		return nil, err
 	}
+	result = append(result, buf.MultiDynamicRowGroup())
+	return result, nil
+}
 
-	// pending blocks could be uploaded to the bucket while we iterate on them.
-	// to avoid to iterate on them again while reading the block file
-	// we keep the last block timestamp to be read from the bucket and pass it to the IterateBucketBlocks() function
-	// so that every block with a timestamp >= lastReadBlockTimestamp is discarded while being read.
-	memoryBlocks, lastBlockTimestamp := t.memoryBlocks()
-	for _, block := range memoryBlocks {
-		span.AddEvent("memoryBlock")
-		if err := block.RowGroupIterator(ctx, tx, filter, rowGroups); err != nil {
+func (t *Table) writeRecordsToParquet(w io.Writer, records []arrow.Record, sortInput bool, options ...parquet.WriterOption) error {
+	dynColSets := make([]map[string][]string, 0, len(records))
+	for _, r := range records {
+		dynColSets = append(dynColSets, pqarrow.RecordDynamicCols(r))
+	}
+	dynCols := dynparquet.MergeDynamicColumnSets(dynColSets)
+	var writer dynparquet.ParquetWriter
+	if len(options) > 0 {
+		var err error
+		writer, err = t.schema.NewWriter(w, dynCols, false, options...)
+		if err != nil {
+			return err
+		}
+	} else {
+		pw, err := t.schema.GetWriter(w, dynCols, sortInput)
+		if err != nil {
 			return err
 		}
+		defer t.schema.PutWriter(pw)
+		writer = pw
 	}
 
-	if err := t.IterateBucketBlocks(ctx, t.logger, lastBlockTimestamp, filter, rowGroups); err != nil {
-		return err
+	return pqarrow.RecordsToFile(t.schema, writer, records)
+}
+
+// distinctRecordsForCompaction performs a distinct on the given parts. If at
+// least one non-arrow part is found, nil, nil is returned in which case, the
+// caller should fall back to normal compaction. On success, the caller is
+// responsible for releasing the returned records.
+func (t *Table) distinctRecordsForCompaction(compact []parts.Part) ([]arrow.Record, error) {
+	sortingCols := t.schema.ColumnDefinitionsForSortingColumns()
+	columnExprs := make([]logicalplan.Expr, 0, len(sortingCols))
+	for _, col := range sortingCols {
+		var expr logicalplan.Expr
+		if col.Dynamic {
+			expr = logicalplan.DynCol(col.Name)
+		} else {
+			expr = logicalplan.Col(col.Name)
+		}
+		columnExprs = append(columnExprs, expr)
 	}
 
-	return nil
+	d := physicalplan.Distinct(memory.NewGoAllocator(), t.tracer, columnExprs)
+	output := physicalplan.OutputPlan{}
+	newRecords := make([]arrow.Record, 0)
+	output.SetNextCallback(func(_ context.Context, r arrow.Record) error {
+		r.Retain()
+		newRecords = append(newRecords, r)
+		return nil
+	})
+	d.SetNext(&output)
+
+	if ok, err := func() (bool, error) {
+		ctx := context.TODO()
+		for _, p := range compact {
+			if p.Record() == nil {
+				// Caller should fall back to parquet distinction.
+				return false, nil
+			}
+			if err := d.Callback(ctx, p.Record()); err != nil {
+				return false, err
+			}
+		}
+		if err := d.Finish(ctx); err != nil {
+			return false, err
+		}
+		return true, nil
+	}(); !ok || err != nil {
+		for _, r := range newRecords {
+			r.Release()
+		}
+		return nil, err
+	}
+	return newRecords, nil
 }
 
-// close notifies a table to stop accepting writes.
-func (t *Table) close() {
-	t.mtx.Lock()
-	defer t.mtx.Unlock()
+// IndexConfig returns the index configuration for the table. It makes a copy of the column store index config and injects it's compactParts method.
+func (t *Table) IndexConfig() []*index.LevelConfig {
+	config := make([]*index.LevelConfig, 0, len(t.db.columnStore.indexConfig))
+	for i, c := range t.db.columnStore.indexConfig {
+		compactFunc := t.compactParts
+		if i == len(t.db.columnStore.indexConfig)-1 {
+			// The last level is the in-memory level, which is never compacted.
+			compactFunc = nil
+		}
+		config = append(config, &index.LevelConfig{
+			Level:   c.Level,
+			MaxSize: c.MaxSize,
+			Type:    c.Type,
+			Compact: compactFunc, // TODO: this is bad and it should feel bad. We shouldn't need the table object to define how parts are compacted. Refactor needed.
+		})
+	}
 
-	t.active.pendingWritersWg.Wait()
-	t.closing = true
+	return config
 }
diff --git a/table_test.go b/table_test.go
index 15da86ee6..8b7fbe5b7 100644
--- a/table_test.go
+++ b/table_test.go
@@ -4,26 +4,30 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"io"
+	"math"
 	"math/rand"
 	"runtime"
 	"sync"
 	"sync/atomic"
 	"testing"
+	"time"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/array"
-	"github.com/apache/arrow/go/v10/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
+	"github.com/apache/arrow-go/v18/arrow/memory"
+	"github.com/apache/arrow-go/v18/arrow/util"
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
 	"github.com/google/uuid"
-	"github.com/prometheus/client_golang/prometheus/testutil"
-	"github.com/segmentio/parquet-go"
+	"github.com/parquet-go/parquet-go"
 	"github.com/stretchr/testify/require"
-	"github.com/thanos-io/objstore/providers/filesystem"
 
 	"github.com/polarsignals/frostdb/dynparquet"
 	schemapb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/schema/v1alpha1"
+	"github.com/polarsignals/frostdb/index"
 	"github.com/polarsignals/frostdb/pqarrow"
+	"github.com/polarsignals/frostdb/query"
 	"github.com/polarsignals/frostdb/query/logicalplan"
 )
 
@@ -51,7 +55,7 @@ func newTestLogger(t TestLogHelper) log.Logger {
 
 func basicTable(t *testing.T, options ...Option) (*ColumnStore, *Table) {
 	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
+		dynparquet.SampleDefinition(),
 	)
 
 	logger := newTestLogger(t)
@@ -69,235 +73,84 @@ func basicTable(t *testing.T, options ...Option) (*ColumnStore, *Table) {
 	return c, table
 }
 
-func TestTable(t *testing.T) {
+// This test issues concurrent writes to the database, and expects all of them to be recorded successfully.
+func Test_Table_Concurrency(t *testing.T) {
 	c, table := basicTable(t)
 	defer c.Close()
 
-	samples := dynparquet.Samples{{
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 1,
-		Value:     1,
-	}, {
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 2,
-		Value:     2,
-	}, {
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 3,
-		Value:     3,
-	}}
+	generateRows := func(n int) arrow.Record {
+		rows := make(dynparquet.Samples, 0, n)
+		for i := 0; i < n; i++ {
+			rows = append(rows, dynparquet.Sample{
+				Labels: map[string]string{ // TODO would be nice to not have all the same column
+					"label1": "value1",
+					"label2": "value2",
+				},
+				Stacktrace: []uuid.UUID{
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+				},
+				Timestamp: rand.Int63(),
+				Value:     rand.Int63(),
+			})
+		}
+		r, err := rows.ToRecord()
+		require.NoError(t, err)
 
-	buf, err := samples.ToBuffer(table.Schema())
-	require.NoError(t, err)
+		return r
+	}
 
+	// Spawn n workers that will insert values into the table
+	maxTxID := &atomic.Uint64{}
+	n := 8
+	inserts := 100
+	rows := 10
+	wg := &sync.WaitGroup{}
 	ctx := context.Background()
-	_, err = table.InsertBuffer(ctx, buf)
-	require.NoError(t, err)
-
-	samples = dynparquet.Samples{{
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 2,
-		Value:     2,
-	}}
-
-	buf, err = samples.ToBuffer(table.Schema())
-	require.NoError(t, err)
-
-	_, err = table.InsertBuffer(ctx, buf)
-	require.NoError(t, err)
+	for i := 0; i < n; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for i := 0; i < inserts; i++ {
+				tx, err := table.InsertRecord(ctx, generateRows(rows))
+				if err != nil {
+					fmt.Println("Received error on insert: ", err)
+				}
 
-	samples = dynparquet.Samples{{
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 3,
-		Value:     3,
-	}}
+				//	 Set the max tx id that we've seen
+				if maxTX := maxTxID.Load(); tx > maxTX {
+					maxTxID.CompareAndSwap(maxTX, tx)
+				}
+			}
+		}()
+	}
 
-	buf, err = samples.ToBuffer(table.Schema())
-	require.NoError(t, err)
+	// Wait for all our writes to exit
+	wg.Wait()
 
-	_, err = table.InsertBuffer(ctx, buf)
-	require.NoError(t, err)
+	// Wait for our last tx to be marked as complete
+	table.db.Wait(maxTxID.Load())
 
 	pool := memory.NewGoAllocator()
 
-	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
-		return table.Iterator(
+	err := table.View(ctx, func(ctx context.Context, tx uint64) error {
+		totalrows := int64(0)
+		err := table.Iterator(
 			ctx,
 			tx,
 			pool,
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-				t.Log(ar)
-				defer ar.Release()
+			[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
+				totalrows += ar.NumRows()
+
 				return nil
 			}},
 		)
+
+		require.NoError(t, err)
+		require.Equal(t, int64(n*inserts*rows), totalrows)
+		return nil
 	})
 	require.NoError(t, err)
-
-	uuid1 := uuid.MustParse("00000000-0000-0000-0000-000000000001")
-	uuid2 := uuid.MustParse("00000000-0000-0000-0000-000000000002")
-
-	// One granule with 3 parts
-	require.Equal(t, 1, table.active.Index().Len())
-	require.Equal(t, 3, table.active.Index().Min().(*Granule).parts.Total())
-	require.Equal(t, parquet.Row{
-		parquet.ValueOf("test").Level(0, 0, 0),
-		parquet.ValueOf("value1").Level(0, 1, 1),
-		parquet.ValueOf("value2").Level(0, 1, 2),
-		parquet.ValueOf(nil).Level(0, 0, 3),
-		parquet.ValueOf(nil).Level(0, 0, 4),
-		parquet.ValueOf(append(uuid1[:], uuid2[:]...)).Level(0, 0, 5),
-		parquet.ValueOf(1).Level(0, 0, 6),
-		parquet.ValueOf(1).Level(0, 0, 7),
-	}, (*dynparquet.DynamicRow)(table.active.Index().Min().(*Granule).metadata.least).Row)
-	require.Equal(t, 1, table.active.Index().Len())
-}
-
-// This test issues concurrent writes to the database, and expects all of them to be recorded successfully.
-func Test_Table_Concurrency(t *testing.T) {
-	tests := map[string]struct {
-		granuleSize int64
-	}{
-		"25MB": {25 * 1024 * 1024},
-		"15MB": {15 * 1024 * 1024},
-		"8MB":  {8 * 1024 * 1024},
-		"1MB":  {1024 * 1024},
-	}
-
-	for name, test := range tests {
-		t.Run(name, func(t *testing.T) {
-			c, table := basicTable(t, WithGranuleSizeBytes(test.granuleSize))
-			defer c.Close()
-
-			generateRows := func(n int) *dynparquet.Buffer {
-				rows := make(dynparquet.Samples, 0, n)
-				for i := 0; i < n; i++ {
-					rows = append(rows, dynparquet.Sample{
-						Labels: []dynparquet.Label{ // TODO would be nice to not have all the same column
-							{Name: "label1", Value: "value1"},
-							{Name: "label2", Value: "value2"},
-						},
-						Stacktrace: []uuid.UUID{
-							{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-							{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-						},
-						Timestamp: rand.Int63(),
-						Value:     rand.Int63(),
-					})
-				}
-				buf, err := rows.ToBuffer(table.Schema())
-				require.NoError(t, err)
-
-				buf.Sort()
-
-				// This is necessary because sorting a buffer makes concurrent reading not
-				// safe as the internal pages are cyclically sorted at read time. Cloning
-				// executes the cyclic sort once and makes the resulting buffer safe for
-				// concurrent reading as it no longer has to perform the cyclic sorting at
-				// read time. This should probably be improved in the parquet library.
-				buf, err = buf.Clone()
-				require.NoError(t, err)
-
-				return buf
-			}
-
-			// Spawn n workers that will insert values into the table
-			maxTxID := &atomic.Uint64{}
-			n := 8
-			inserts := 100
-			rows := 10
-			wg := &sync.WaitGroup{}
-			ctx := context.Background()
-			for i := 0; i < n; i++ {
-				wg.Add(1)
-				go func() {
-					defer wg.Done()
-					for i := 0; i < inserts; i++ {
-						tx, err := table.InsertBuffer(ctx, generateRows(rows))
-						if err != nil {
-							fmt.Println("Received error on insert: ", err)
-						}
-
-						//	 Set the max tx id that we've seen
-						if maxTX := maxTxID.Load(); tx > maxTX {
-							maxTxID.CompareAndSwap(maxTX, tx)
-						}
-					}
-				}()
-			}
-
-			// Wait for all our writes to exit
-			wg.Wait()
-
-			// Wait for our last tx to be marked as complete
-			table.db.Wait(maxTxID.Load())
-
-			pool := memory.NewGoAllocator()
-
-			err := table.View(ctx, func(ctx context.Context, tx uint64) error {
-				totalrows := int64(0)
-				err := table.Iterator(
-					ctx,
-					tx,
-					pool,
-					[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-						totalrows += ar.NumRows()
-						defer ar.Release()
-
-						return nil
-					}},
-				)
-
-				require.NoError(t, err)
-				require.Equal(t, int64(n*inserts*rows), totalrows)
-				return nil
-			})
-			require.NoError(t, err)
-		})
-	}
 }
 
 func Benchmark_Table_Insert_1000Rows_10Iters_10Writers(b *testing.B) {
@@ -312,70 +165,15 @@ func Benchmark_Table_Insert_100Rows_100Iters_100Writers(b *testing.B) {
 	benchmarkTableInserts(b, 100, 100, 100)
 }
 
-// BenchmarkInsertSimple is a benchmark used to measure the performance of the
-// core insert path without the additional complexity of Benchmark_Table_Insert.
-func BenchmarkInsertSimple(b *testing.B) {
-	var (
-		ctx    = context.Background()
-		schema = dynparquet.NewSampleSchema()
-		config = NewTableConfig(schema)
-	)
-
-	c, err := New()
-	require.NoError(b, err)
-	defer c.Close()
-
-	db, err := c.DB(ctx, "test")
-	require.NoError(b, err)
-
-	const (
-		numInserts = 100
-		// In production, we see anything from a couple of rows to 60 per
-		// insert at the time of writing this benchmark.
-		samplesPerInsert = 30
-	)
-	inserts := make([][]byte, 0, numInserts)
-	for i := 0; i < numInserts; i++ {
-		samples := make([]dynparquet.Sample, 0, samplesPerInsert)
-		for len(samples) < samplesPerInsert {
-			samples = append(samples, dynparquet.NewTestSamples()...)
-		}
-		for _, s := range samples {
-			s.Timestamp += int64(i)
-		}
-		buf, err := dynparquet.Samples(samples).ToBuffer(schema)
-		require.NoError(b, err)
-		var bytesBuf bytes.Buffer
-		require.NoError(b, schema.SerializeBuffer(&bytesBuf, buf))
-		inserts = append(inserts, bytesBuf.Bytes())
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		table, err := db.Table(fmt.Sprintf("test%d", i), config)
-		if err != nil {
-			b.Fatal(err)
-		}
-
-		for j := 0; j < numInserts; j++ {
-			if _, err := table.Insert(ctx, inserts[j]); err != nil {
-				b.Fatal(err)
-			}
-		}
-	}
-}
-
 func benchmarkTableInserts(b *testing.B, rows, iterations, writers int) {
 	var (
-		schema = dynparquet.NewSampleSchema()
+		def    = dynparquet.SampleDefinition()
 		ctx    = context.Background()
-		config = NewTableConfig(schema)
+		config = NewTableConfig(def)
 	)
 
-	logger := log.NewNopLogger()
-
 	c, err := New(
-		WithLogger(logger),
+		WithLogger(log.NewNopLogger()),
 		WithWAL(),
 		WithStoragePath(b.TempDir()),
 	)
@@ -385,13 +183,13 @@ func benchmarkTableInserts(b *testing.B, rows, iterations, writers int) {
 	db, err := c.DB(context.Background(), "test")
 	require.NoError(b, err)
 	ts := &atomic.Int64{}
-	generateRows := func(id string, n int) []byte {
+	generateRows := func(id string, n int) arrow.Record {
 		rows := make(dynparquet.Samples, 0, n)
 		for i := 0; i < n; i++ {
 			rows = append(rows, dynparquet.Sample{
-				Labels: []dynparquet.Label{ // TODO would be nice to not have all the same column
-					{Name: "label1", Value: id},
-					{Name: "label2", Value: "value2"},
+				Labels: map[string]string{ // TODO would be nice to not have all the same column
+					"label1": id,
+					"label2": "value2",
 				},
 				Stacktrace: []uuid.UUID{
 					{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -402,20 +200,16 @@ func benchmarkTableInserts(b *testing.B, rows, iterations, writers int) {
 			})
 		}
 
-		buf, err := rows.ToBuffer(config.schema)
+		r, err := rows.ToRecord()
 		require.NoError(b, err)
-
-		buf.Sort()
-		bytes := bytes.NewBuffer(nil)
-		require.NoError(b, schema.SerializeBuffer(bytes, buf))
-		return bytes.Bytes()
+		return r
 	}
 
 	// Pre-generate all rows we're inserting
-	inserts := make(map[string][][]byte, writers)
+	inserts := make(map[string][]arrow.Record, writers)
 	for i := 0; i < writers; i++ {
 		id := uuid.New().String()
-		inserts[id] = make([][]byte, iterations)
+		inserts[id] = make([]arrow.Record, iterations)
 		for j := 0; j < iterations; j++ {
 			inserts[id][j] = generateRows(id, rows)
 		}
@@ -440,7 +234,7 @@ func benchmarkTableInserts(b *testing.B, rows, iterations, writers int) {
 					err   error
 				)
 				for i := 0; i < iterations; i++ {
-					if maxTx, err = tbl.Insert(ctx, inserts[id][i]); err != nil {
+					if maxTx, err = tbl.InsertRecord(ctx, inserts[id][i]); err != nil {
 						fmt.Println("Received error on insert: ", err)
 					}
 				}
@@ -460,7 +254,7 @@ func benchmarkTableInserts(b *testing.B, rows, iterations, writers int) {
 				ctx,
 				tx,
 				pool,
-				[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
+				[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
 					defer ar.Release()
 					totalrows += ar.NumRows()
 
@@ -468,7 +262,6 @@ func benchmarkTableInserts(b *testing.B, rows, iterations, writers int) {
 				}},
 			)
 		})
-		require.Equal(b, 0., testutil.ToFloat64(table.metrics.granulesCompactionAborted))
 		require.NoError(b, err)
 		require.Equal(b, int64(rows*iterations*writers), totalrows)
 
@@ -480,89 +273,38 @@ func Test_Table_ReadIsolation(t *testing.T) {
 	c, table := basicTable(t)
 	defer c.Close()
 
-	samples := dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 1,
-		Value:     1,
-	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 2,
-		Value:     2,
-	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 3,
-		Value:     3,
-	}}
-
-	buf, err := samples.ToBuffer(table.Schema())
+	samples := dynparquet.GenerateTestSamples(3)
+	r, err := samples.ToRecord()
 	require.NoError(t, err)
 
 	ctx := context.Background()
 
-	_, err = table.InsertBuffer(ctx, buf)
+	_, err = table.InsertRecord(ctx, r)
 	require.NoError(t, err)
 
 	// Perform a new insert that will have a higher tx id
-	samples = dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "blarg", Value: "blarg"},
-			{Name: "blah", Value: "blah"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 1,
-		Value:     1,
-	}}
+	samples = dynparquet.GenerateTestSamples(1)
 
-	buf, err = samples.ToBuffer(table.Schema())
+	r, err = samples.ToRecord()
 	require.NoError(t, err)
 
-	tx, err := table.InsertBuffer(ctx, buf)
+	tx, err := table.InsertRecord(ctx, r)
 	require.NoError(t, err)
 
 	table.db.Wait(tx)
 
-	// Now we cheat and reset our tx and watermark
-	table.db.tx.Store(2)
-	table.db.highWatermark.Store(2)
+	// Reset the database to the previous tx
+	table.db.resetToTxn(tx-1, nil)
 
 	pool := memory.NewGoAllocator()
-
 	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
 		rows := int64(0)
 		err = table.Iterator(
 			ctx,
 			tx,
 			pool,
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
+			[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
 				rows += ar.NumRows()
-				defer ar.Release()
-
 				return nil
 			}},
 		)
@@ -576,15 +318,14 @@ func Test_Table_ReadIsolation(t *testing.T) {
 	table.db.tx.Store(3)
 	table.db.highWatermark.Store(3)
 
-	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
+	err = table.View(ctx, func(ctx context.Context, _ uint64) error {
 		rows := int64(0)
 		err = table.Iterator(
 			ctx,
 			table.db.highWatermark.Load(),
 			pool,
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
+			[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
 				rows += ar.NumRows()
-				defer ar.Release()
 
 				return nil
 			}},
@@ -597,7 +338,7 @@ func Test_Table_ReadIsolation(t *testing.T) {
 }
 
 func Test_Table_NewTableValidIndexDegree(t *testing.T) {
-	config := NewTableConfig(dynparquet.NewSampleSchema())
+	config := NewTableConfig(dynparquet.SampleDefinition())
 	c, err := New(
 		WithLogger(newTestLogger(t)),
 		WithIndexDegree(-1),
@@ -614,7 +355,7 @@ func Test_Table_NewTableValidIndexDegree(t *testing.T) {
 
 func Test_Table_NewTableValidSplitSize(t *testing.T) {
 	config := NewTableConfig(
-		dynparquet.NewSampleSchema(),
+		dynparquet.SampleDefinition(),
 	)
 
 	logger := newTestLogger(t)
@@ -633,7 +374,7 @@ func Test_Table_NewTableValidSplitSize(t *testing.T) {
 	defer c.Close()
 	db, err = c.DB(context.Background(), "test")
 	require.NoError(t, err)
-	_, err = db.Table("test", NewTableConfig(dynparquet.NewSampleSchema()))
+	_, err = db.Table("test", NewTableConfig(dynparquet.SampleDefinition()))
 	require.Error(t, err)
 	require.Equal(t, err.Error(), "failed to create table: Table's columnStore splitSize must be a positive integer > 1 (received -1)")
 
@@ -642,19 +383,24 @@ func Test_Table_NewTableValidSplitSize(t *testing.T) {
 	defer c.Close()
 	db, err = c.DB(context.Background(), "test")
 	require.NoError(t, err)
-	_, err = db.Table("test", NewTableConfig(dynparquet.NewSampleSchema()))
+	_, err = db.Table("test", NewTableConfig(dynparquet.SampleDefinition()))
 	require.NoError(t, err)
 }
 
-func Test_Table_Filter(t *testing.T) {
-	c, table := basicTable(t)
+func Test_Table_Bloomfilter(t *testing.T) {
+	c, table := basicTable(t, WithIndexConfig(
+		[]*index.LevelConfig{
+			{Level: index.L0, MaxSize: 452, Type: index.CompactionTypeParquetMemory}, // NOTE: 452 is the current size of the 3 records that are inserted
+			{Level: index.L1, MaxSize: 100000},
+		},
+	))
 	defer c.Close()
 
 	samples := dynparquet.Samples{{
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -664,10 +410,10 @@ func Test_Table_Filter(t *testing.T) {
 		Value:     1,
 	}, {
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -677,10 +423,10 @@ func Test_Table_Filter(t *testing.T) {
 		Value:     2,
 	}, {
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -690,40 +436,117 @@ func Test_Table_Filter(t *testing.T) {
 		Value:     3,
 	}}
 
-	buf, err := samples.ToBuffer(table.Schema())
-	require.NoError(t, err)
+	for i := range samples {
+		r, err := samples[i : i+1].ToRecord()
+		require.NoError(t, err)
 
-	ctx := context.Background()
+		ctx := context.Background()
 
-	_, err = table.InsertBuffer(ctx, buf)
-	require.NoError(t, err)
+		_, err = table.InsertRecord(ctx, r)
+		require.NoError(t, err)
+	}
 
-	samples = dynparquet.Samples{{
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+	require.NoError(t, table.EnsureCompaction())
+
+	require.Eventually(t, func() bool {
+		iterations := 0
+		err := table.View(context.Background(), func(_ context.Context, tx uint64) error {
+			pool := memory.NewGoAllocator()
+
+			require.NoError(t, table.Iterator(
+				context.Background(),
+				tx,
+				pool,
+				[]logicalplan.Callback{func(_ context.Context, _ arrow.Record) error {
+					iterations++
+					return nil
+				}},
+				logicalplan.WithFilter(logicalplan.Col("labels.label4").Eq(logicalplan.Literal("value4"))),
+			))
+			return nil
+		})
+		require.NoError(t, err)
+		return iterations == 1
+	}, time.Millisecond*60, time.Millisecond*10)
+}
+
+func Test_RecordToRow(t *testing.T) {
+	schema := arrow.NewSchema([]arrow.Field{
+		{
+			Name:     "labels.label1",
+			Type:     &arrow.StringType{},
+			Nullable: true,
 		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		{
+			Name:     "labels.label2",
+			Type:     &arrow.StringType{},
+			Nullable: true,
 		},
-		Timestamp: 2,
-		Value:     2,
-	}}
+		{
+			Name: "timestamp",
+			Type: &arrow.Int64Type{},
+		},
+		{
+			Name: "value",
+			Type: &arrow.Int64Type{},
+		},
+	}, nil)
+
+	bld := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
+	t.Cleanup(bld.Release)
 
-	buf, err = samples.ToBuffer(table.Schema())
+	bld.Field(0).(*array.StringBuilder).Append("hello")
+	bld.Field(1).(*array.StringBuilder).Append("world")
+	bld.Field(2).(*array.Int64Builder).Append(10)
+	bld.Field(3).(*array.Int64Builder).Append(20)
+
+	record := bld.NewRecord()
+
+	dynschema := dynparquet.NewSampleSchema()
+	ps, err := dynschema.GetDynamicParquetSchema(pqarrow.RecordDynamicCols(record))
 	require.NoError(t, err)
+	defer dynschema.PutPooledParquetSchema(ps)
 
-	_, err = table.InsertBuffer(ctx, buf)
+	row, err := pqarrow.RecordToRow(ps.Schema, record, 0)
 	require.NoError(t, err)
+	require.Equal(t, "[<null> hello world <null> 10 20]", fmt.Sprintf("%v", row))
+}
 
-	samples = dynparquet.Samples{{
+func Test_L0Query(t *testing.T) {
+	c, table := basicTable(t)
+	t.Cleanup(func() { c.Close() })
+
+	samples := dynparquet.Samples{{
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 1,
+		Value:     1,
+	}, {
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 2,
+		Value:     2,
+	}, {
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -733,51 +556,44 @@ func Test_Table_Filter(t *testing.T) {
 		Value:     3,
 	}}
 
-	buf, err = samples.ToBuffer(table.Schema())
+	r, err := samples.ToRecord()
 	require.NoError(t, err)
 
-	_, err = table.InsertBuffer(ctx, buf)
+	ctx := context.Background()
+	_, err = table.InsertRecord(ctx, r)
 	require.NoError(t, err)
 
-	filterExpr := logicalplan.And( // Filter that excludes the granule
-		logicalplan.Col("timestamp").Gt(logicalplan.Literal(-10)),
-		logicalplan.Col("timestamp").Lt(logicalplan.Literal(1)),
-	)
-
 	pool := memory.NewGoAllocator()
 
+	records := 0
 	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
-		iterated := false
-
 		err = table.Iterator(
 			ctx,
 			tx,
 			pool,
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-				defer ar.Release()
-
-				iterated = true
-
+			[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
+				records++
+				require.Equal(t, int64(3), ar.NumRows())
+				require.Equal(t, int64(8), ar.NumCols())
 				return nil
 			}},
-			logicalplan.WithFilter(filterExpr),
 		)
 		require.NoError(t, err)
-		require.False(t, iterated)
 		return nil
 	})
 	require.NoError(t, err)
+	require.Equal(t, 1, records)
 }
 
-func Test_Table_Bloomfilter(t *testing.T) {
+func Test_Serialize_DisparateDynamicColumns(t *testing.T) {
 	c, table := basicTable(t)
 	defer c.Close()
 
 	samples := dynparquet.Samples{{
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
+		Labels: map[string]string{
+			"label1": "value1",
+			"label2": "value2",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -787,10 +603,10 @@ func Test_Table_Bloomfilter(t *testing.T) {
 		Value:     1,
 	}, {
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+		Labels: map[string]string{
+			"label1": "value2",
+			"label2": "value2",
+			"label3": "value3",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -800,10 +616,10 @@ func Test_Table_Bloomfilter(t *testing.T) {
 		Value:     2,
 	}, {
 		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
+		Labels: map[string]string{
+			"label1": "value3",
+			"label2": "value2",
+			"label4": "value4",
 		},
 		Stacktrace: []uuid.UUID{
 			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
@@ -813,473 +629,592 @@ func Test_Table_Bloomfilter(t *testing.T) {
 		Value:     3,
 	}}
 
-	for i := range samples {
-		buf, err := samples[i : i+1].ToBuffer(table.Schema())
-		require.NoError(t, err)
-
-		ctx := context.Background()
+	r, err := samples.ToRecord()
+	require.NoError(t, err)
 
-		_, err = table.InsertBuffer(ctx, buf)
-		require.NoError(t, err)
-	}
+	ctx := context.Background()
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
 
-	iterations := 0
-	err := table.View(context.Background(), func(ctx context.Context, tx uint64) error {
-		pool := memory.NewGoAllocator()
+	samples = dynparquet.Samples{{
+		ExampleType: "test",
+		Labels: map[string]string{
+			"label100": "a",
+		},
+		Stacktrace: []uuid.UUID{
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
+			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		},
+		Timestamp: 2,
+		Value:     2,
+	}}
 
-		require.NoError(t, table.Iterator(
-			context.Background(),
-			tx,
-			pool,
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-				defer ar.Release()
-				iterations++
-				return nil
-			}},
-			logicalplan.WithFilter(logicalplan.Col("labels.label4").Eq(logicalplan.Literal("value4"))),
-		))
-		return nil
-	})
+	r, err = samples.ToRecord()
 	require.NoError(t, err)
-	require.Equal(t, 1, iterations)
-}
 
-func Test_DoubleTable(t *testing.T) {
-	schema, err := dynparquet.SchemaFromDefinition(&schemapb.Schema{
-		Name: "test",
-		Columns: []*schemapb.Column{{
-			Name:          "id",
-			StorageLayout: &schemapb.StorageLayout{Type: schemapb.StorageLayout_TYPE_STRING},
-			Dynamic:       false,
-		}, {
-			Name:          "value",
-			StorageLayout: &schemapb.StorageLayout{Type: schemapb.StorageLayout_TYPE_DOUBLE},
-			Dynamic:       false,
-		}},
-		SortingColumns: []*schemapb.SortingColumn{{
-			Name:      "id",
-			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
-		}},
-	})
+	_, err = table.InsertRecord(ctx, r)
 	require.NoError(t, err)
-	config := NewTableConfig(schema)
 
-	bucket, err := filesystem.NewBucket(t.TempDir())
-	require.NoError(t, err)
+	// Serialize the table
+	require.NoError(t, table.active.Serialize(io.Discard))
+}
 
-	logger := newTestLogger(t)
-	c, err := New(
-		WithLogger(logger),
-		WithBucketStorage(bucket),
+func Test_RowWriter(t *testing.T) {
+	config := NewTableConfig(
+		dynparquet.SampleDefinition(),
+		WithRowGroupSize(5),
 	)
+
+	logger := newTestLogger(t)
+
+	c, err := New(WithLogger(logger))
 	require.NoError(t, err)
-	defer c.Close()
 
 	db, err := c.DB(context.Background(), "test")
 	require.NoError(t, err)
 	table, err := db.Table("test", config)
 	require.NoError(t, err)
+	defer c.Close()
 
-	b, err := schema.NewBuffer(nil)
+	b := &bytes.Buffer{}
+	pw, err := table.schema.GetWriter(b, map[string][]string{
+		"labels": {"node"},
+	}, false)
+	defer table.schema.PutWriter(pw)
 	require.NoError(t, err)
-
-	value := rand.Float64()
-
-	_, err = b.WriteRows([]parquet.Row{{
-		parquet.ValueOf("a").Level(0, 0, 0),
-		parquet.ValueOf(value).Level(0, 0, 1),
-	}})
+	rowWriter, err := table.ActiveBlock().rowWriter(pw)
 	require.NoError(t, err)
 
-	ctx := context.Background()
-
-	n, err := table.InsertBuffer(ctx, b)
+	// Write 17(8,9) rows, expect 3 row groups of 5 rows and 1 row group of 2 rows
+	samples := dynparquet.GenerateTestSamples(8)
+	buf, err := dynparquet.ToBuffer(samples, table.Schema())
 	require.NoError(t, err)
+	rows := buf.Rows()
+	_, err = rowWriter.writeRows(rows)
+	require.NoError(t, err)
+	require.NoError(t, rows.Close())
 
-	// Read the schema from a previous transaction. Reading transaction 2 here
-	// because transaction 1 is just the new block creation, therefore there
-	// would be no schema to read (schemas only materialize when data is
-	// inserted).
-	require.Equal(t, uint64(2), n)
+	samples = dynparquet.GenerateTestSamples(9)
+	buf, err = dynparquet.ToBuffer(samples, table.Schema())
+	require.NoError(t, err)
+	rows = buf.Rows()
+	_, err = rowWriter.writeRows(rows)
+	require.NoError(t, err)
+	require.NoError(t, rows.Close())
 
-	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
-		pool := memory.NewGoAllocator()
+	require.NoError(t, rowWriter.close())
 
-		return table.Iterator(
-			ctx,
-			tx,
-			pool,
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-				defer ar.Release()
-				require.Equal(t, value, ar.Column(1).(*array.Float64).Value(0))
-				return nil
-			}},
-		)
-	})
+	f, err := parquet.OpenFile(bytes.NewReader(b.Bytes()), int64(b.Len()))
 	require.NoError(t, err)
+
+	require.Equal(t, 4, len(f.Metadata().RowGroups))
+	for i, rg := range f.Metadata().RowGroups {
+		switch i {
+		case 3:
+			require.Equal(t, int64(2), rg.NumRows)
+		default:
+			require.Equal(t, int64(5), rg.NumRows)
+		}
+	}
 }
 
-func Test_Table_EmptyRowGroup(t *testing.T) {
+// Test_Table_Size ensures the size of the table increases by the size of the inserted data.
+func Test_Table_Size(t *testing.T) {
 	c, table := basicTable(t)
 	defer c.Close()
 
+	before := table.ActiveBlock().Size()
+
+	samples := dynparquet.NewTestSamples()
+	rec, err := samples.ToRecord()
+	require.NoError(t, err)
+
 	ctx := context.Background()
+	_, err = table.InsertRecord(ctx, rec)
+	require.NoError(t, err)
 
-	samples := dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 1,
-		Value:     1,
-	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 2,
-		Value:     2,
-	}, {
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 3,
-		Value:     3,
-	}}
+	after := table.ActiveBlock().Size()
+	require.Equal(t, util.TotalRecordSize(rec), after-before)
+}
+
+func Test_Insert_Repeated(t *testing.T) {
+	schema := &schemapb.Schema{
+		Name: "repeated",
+		Columns: []*schemapb.Column{{
+			Name: "name",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+		}, {
+			Name: "values",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+				Repeated: true,
+			},
+		}, {
+			Name: "value",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+		}},
+		SortingColumns: []*schemapb.SortingColumn{{
+			Name:      "name",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}},
+	}
+	config := NewTableConfig(schema)
+	logger := newTestLogger(t)
+
+	tests := map[string]struct {
+		nilBeg    bool
+		nilMiddle bool
+		nilEnd    bool
+	}{
+		"beginning": {true, false, false},
+		"middle":    {false, true, false},
+		"end":       {false, false, true},
+	}
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			c, err := New(WithLogger(logger))
+			require.NoError(t, err)
+			t.Cleanup(func() {
+				c.Close()
+			})
+
+			db, err := c.DB(context.Background(), "test")
+			require.NoError(t, err)
+			table, err := db.Table("test", config)
+			require.NoError(t, err)
+
+			buffer, err := table.Schema().GetBuffer(nil)
+			require.NoError(t, err)
+
+			var row parquet.Row
+			if test.nilBeg {
+				row = nil
+				row = append(row, parquet.ValueOf("foo").Level(0, 0, 0))
+				row = append(row, parquet.ValueOf(4).Level(0, 0, 1))
+				row = append(row, parquet.ValueOf(nil).Level(0, 0, 2))
+				_, err = buffer.WriteRows([]parquet.Row{row})
+				require.NoError(t, err)
+			}
+
+			row = nil
+			row = append(row, parquet.ValueOf("foo2").Level(0, 0, 0))
+			row = append(row, parquet.ValueOf(3).Level(0, 0, 1))
+			row = append(row, parquet.ValueOf("bar").Level(0, 1, 2))
+			row = append(row, parquet.ValueOf("baz").Level(1, 1, 2))
+			_, err = buffer.WriteRows([]parquet.Row{row})
+			require.NoError(t, err)
+
+			if test.nilMiddle {
+				row = nil
+				row = append(row, parquet.ValueOf("foo").Level(0, 0, 0))
+				row = append(row, parquet.ValueOf(4).Level(0, 0, 1))
+				row = append(row, parquet.ValueOf(nil).Level(0, 0, 2))
+				_, err = buffer.WriteRows([]parquet.Row{row})
+				require.NoError(t, err)
+			}
+
+			row = nil
+			row = append(row, parquet.ValueOf("foo3").Level(0, 0, 0))
+			row = append(row, parquet.ValueOf(6).Level(0, 0, 1))
+			row = append(row, parquet.ValueOf("bar").Level(0, 1, 2))
+			row = append(row, parquet.ValueOf("baz").Level(1, 1, 2))
+			_, err = buffer.WriteRows([]parquet.Row{row})
+			require.NoError(t, err)
+
+			if test.nilEnd {
+				row = nil
+				row = append(row, parquet.ValueOf("foo").Level(0, 0, 0))
+				row = append(row, parquet.ValueOf(4).Level(0, 0, 1))
+				row = append(row, parquet.ValueOf(nil).Level(0, 0, 2))
+				_, err = buffer.WriteRows([]parquet.Row{row})
+				require.NoError(t, err)
+			}
+
+			ctx := context.Background()
+
+			// Test insertion as record
+			converter := pqarrow.NewParquetConverter(memory.NewGoAllocator(), logicalplan.IterOptions{})
+			defer converter.Close()
+
+			require.NoError(t, converter.Convert(ctx, buffer, table.Schema()))
+			record := converter.NewRecord()
+			defer record.Release()
+
+			_, err = table.InsertRecord(ctx, record)
+			require.NoError(t, err)
+
+			err = table.View(ctx, func(ctx context.Context, tx uint64) error {
+				err = table.Iterator(
+					ctx,
+					tx,
+					memory.NewGoAllocator(),
+					[]logicalplan.Callback{func(_ context.Context, ar arrow.Record) error {
+						require.Equal(t, int64(3), ar.NumRows())
+						require.Equal(t, int64(3), ar.NumCols())
+						return nil
+					}},
+				)
+				require.NoError(t, err)
+				return nil
+			})
+
+			engine := query.NewEngine(memory.NewGoAllocator(), db.TableProvider())
+			err = engine.ScanTable("test").
+				Aggregate(
+					[]*logicalplan.AggregationFunction{logicalplan.Sum(logicalplan.Col("value"))},
+					[]logicalplan.Expr{logicalplan.Col("values")},
+				).
+				Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+					require.Equal(t, int64(2), r.NumRows())
+					require.Equal(t, int64(2), r.NumCols())
+					return nil
+				})
+			require.NoError(t, err)
+		})
+	}
+}
 
-	buf, err := samples.ToBuffer(table.Schema())
+func Test_Compact_Repeated(t *testing.T) {
+	schema := &schemapb.Schema{
+		Name: "repeated",
+		Columns: []*schemapb.Column{{
+			Name: "name",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+			},
+		}, {
+			Name: "values",
+			StorageLayout: &schemapb.StorageLayout{
+				Type:     schemapb.StorageLayout_TYPE_STRING,
+				Encoding: schemapb.StorageLayout_ENCODING_RLE_DICTIONARY,
+				Repeated: true,
+			},
+		}, {
+			Name: "value",
+			StorageLayout: &schemapb.StorageLayout{
+				Type: schemapb.StorageLayout_TYPE_INT64,
+			},
+		}},
+		SortingColumns: []*schemapb.SortingColumn{{
+			Name:      "name",
+			Direction: schemapb.SortingColumn_DIRECTION_ASCENDING,
+		}},
+	}
+	config := NewTableConfig(schema)
+	logger := newTestLogger(t)
+
+	c, err := New(
+		WithLogger(logger),
+	)
 	require.NoError(t, err)
+	t.Cleanup(func() {
+		c.Close()
+	})
 
-	_, err = table.InsertBuffer(ctx, buf)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
+	table, err := db.Table("test", config)
 	require.NoError(t, err)
 
-	// Insert new samples / buffer / rowGroup that doesn't have label1
+	buffer, err := table.Schema().GetBuffer(nil)
+	require.NoError(t, err)
 
-	samples = dynparquet.Samples{{
-		Labels: []dynparquet.Label{
-			{Name: "foo", Value: "bar"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 1,
-		Value:     1,
-	}}
+	var row parquet.Row
+	row = nil
+	row = append(row, parquet.ValueOf("foo").Level(0, 0, 0))
+	row = append(row, parquet.ValueOf(4).Level(0, 0, 1))
+	row = append(row, parquet.ValueOf(nil).Level(0, 0, 2))
+	_, err = buffer.WriteRows([]parquet.Row{row})
+	require.NoError(t, err)
 
-	buf, err = samples.ToBuffer(table.Schema())
+	row = nil
+	row = append(row, parquet.ValueOf("foo2").Level(0, 0, 0))
+	row = append(row, parquet.ValueOf(3).Level(0, 0, 1))
+	row = append(row, parquet.ValueOf("bar").Level(0, 1, 2))
+	row = append(row, parquet.ValueOf("baz").Level(1, 1, 2))
+	_, err = buffer.WriteRows([]parquet.Row{row})
 	require.NoError(t, err)
 
-	tx, err := table.InsertBuffer(ctx, buf)
+	row = nil
+	row = append(row, parquet.ValueOf("foo3").Level(0, 0, 0))
+	row = append(row, parquet.ValueOf(6).Level(0, 0, 1))
+	row = append(row, parquet.ValueOf("bar").Level(0, 1, 2))
+	row = append(row, parquet.ValueOf("baz").Level(1, 1, 2))
+	_, err = buffer.WriteRows([]parquet.Row{row})
 	require.NoError(t, err)
 
-	// Wait until data has been written.
-	table.db.Wait(tx)
+	ctx := context.Background()
 
-	pool := memory.NewGoAllocator()
+	converter := pqarrow.NewParquetConverter(memory.NewGoAllocator(), logicalplan.IterOptions{})
+	defer converter.Close()
 
+	require.NoError(t, converter.Convert(ctx, buffer, table.Schema()))
+	before := converter.NewRecord()
+	defer before.Release()
+
+	_, err = table.InsertRecord(ctx, before)
+	require.NoError(t, err)
+
+	// Compact the record
+	require.NoError(t, table.EnsureCompaction())
+
+	// Retrieve the compacted data
 	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
-		rows := int64(0)
 		err = table.Iterator(
 			ctx,
 			tx,
-			pool,
-			// Select all distinct values for the label1 column.
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-				rows += ar.NumRows()
-				defer ar.Release()
-
+			memory.NewGoAllocator(),
+			[]logicalplan.Callback{func(_ context.Context, after arrow.Record) error {
+				require.True(t, array.RecordEqual(before, after))
 				return nil
 			}},
-			logicalplan.WithProjection(&logicalplan.DynamicColumn{ColumnName: "label1"}),
-			logicalplan.WithDistinctColumns(&logicalplan.DynamicColumn{ColumnName: "label1"}),
 		)
 		require.NoError(t, err)
-		require.Equal(t, int64(0), rows)
 		return nil
 	})
-	require.NoError(t, err)
 }
 
-func Test_Table_NestedSchema(t *testing.T) {
-	schema := dynparquet.NewNestedSampleSchema(t)
-
-	ctx := context.Background()
-	config := NewTableConfig(schema)
-	c, err := New(WithLogger(newTestLogger(t)))
-	t.Cleanup(func() { c.Close() })
-	require.NoError(t, err)
-	db, err := c.DB(ctx, "nested")
+func Test_Table_DynamicColumnMap(t *testing.T) {
+	c, err := New()
 	require.NoError(t, err)
+	t.Cleanup(func() {
+		c.Close()
+	})
 
-	tbl, err := db.Table("nested", config)
+	db, err := c.DB(context.Background(), "test")
 	require.NoError(t, err)
 
-	pb, err := schema.NewBufferV2(
-		dynparquet.LabelColumn("label1"),
-		dynparquet.LabelColumn("label2"),
+	type ColMap struct {
+		Name       string
+		Attributes map[string]string
+	}
+	table, err := NewGenericTable[ColMap](
+		db, "test", memory.NewGoAllocator(),
 	)
-
 	require.NoError(t, err)
+	defer table.Release()
 
-	_, err = pb.WriteRows([]parquet.Row{
-		{
-			parquet.ValueOf("value1").Level(0, 1, 0), // labels.label1
-			parquet.ValueOf("value1").Level(0, 1, 1), // labels.label2
-			parquet.ValueOf(1).Level(0, 2, 2),        // timestamps: [1]
-			parquet.ValueOf(2).Level(1, 2, 2),        // timestamps: [1,2]
-			parquet.ValueOf(2).Level(0, 2, 3),        // values: [2]
-			parquet.ValueOf(3).Level(1, 2, 3),        // values: [2,3]
+	_, err = table.Write(context.Background(), ColMap{
+		Name: "albert",
+		Attributes: map[string]string{
+			"age": "9999",
 		},
 	})
 	require.NoError(t, err)
+}
 
-	_, err = tbl.InsertBuffer(ctx, pb)
+func Test_Table_DynamicColumnNotDefined(t *testing.T) {
+	c, err := New()
 	require.NoError(t, err)
+	t.Cleanup(func() {
+		c.Close()
+	})
 
-	pool := memory.NewGoAllocator()
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
 
-	var r arrow.Record
-	records := 0
-	err = tbl.View(ctx, func(ctx context.Context, tx uint64) error {
-		err = tbl.Iterator(
-			ctx,
-			tx,
-			pool,
-			// Select all distinct values for the label1 column.
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-				records++
-				require.Equal(t, int64(1), ar.NumRows())
-				require.Equal(t, int64(3), ar.NumCols())
-				fmt.Println(ar)
-				ar.Retain()
-				r = ar
-				return nil
-			}},
-		)
-		require.NoError(t, err)
-		return nil
-	})
-	t.Cleanup(r.Release)
+	type ColMap struct {
+		Name       string
+		Attributes map[string]string
+	}
+
+	table, err := NewGenericTable[ColMap](
+		db, "test", memory.NewGoAllocator(),
+	)
 	require.NoError(t, err)
-	require.Equal(t, 1, records)
+	defer table.Release()
 
-	require.Equal(t, `{{ dictionary: ["value1"]
-  indices: [0] } { dictionary: ["value1"]
-  indices: [0] }}`, fmt.Sprintf("%v", r.Column(0)))
-	require.Equal(t, `[[1 2]]`, fmt.Sprintf("%v", r.Column(1)))
-	require.Equal(t, `[[2 3]]`, fmt.Sprintf("%v", r.Column(2)))
+	_, err = table.Write(context.Background(), ColMap{
+		Name: "albert",
+	})
+	require.NoError(t, err)
 }
 
-func Test_RecordToRow(t *testing.T) {
-	schema := arrow.NewSchema([]arrow.Field{
-		{
-			Name:     "labels.label1",
-			Type:     &arrow.StringType{},
-			Nullable: true,
-		},
-		{
-			Name:     "labels.label2",
-			Type:     &arrow.StringType{},
-			Nullable: true,
-		},
-		{
-			Name: "timestamp",
-			Type: &arrow.Int64Type{},
-		},
-		{
-			Name: "value",
-			Type: &arrow.Int64Type{},
-		},
-	}, nil)
+func TestTableUniquePrimaryIndex(t *testing.T) {
+	c, err := New(WithIndexConfig([]*index.LevelConfig{
+		{Level: index.L0, MaxSize: 180, Type: index.CompactionTypeParquetMemory},
+		{Level: index.L1, MaxSize: 1 * TiB},
+	}))
+	require.NoError(t, err)
+	defer c.Close()
 
-	bld := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
-	t.Cleanup(bld.Release)
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
 
-	bld.Field(0).(*array.StringBuilder).Append("hello")
-	bld.Field(1).(*array.StringBuilder).Append("world")
-	bld.Field(2).(*array.Int64Builder).Append(10)
-	bld.Field(3).(*array.Int64Builder).Append(20)
+	type Record struct {
+		Name string `frostdb:",asc"`
+	}
+	const tableName = "test"
+	table, err := NewGenericTable[Record](
+		db, tableName, memory.NewGoAllocator(), WithUniquePrimaryIndex(true),
+	)
+	require.NoError(t, err)
+	defer table.Release()
 
-	record := bld.NewRecord()
+	const numRecords = 9
+	for i := 0; i < numRecords; i++ {
+		_, err = table.Write(context.Background(), Record{
+			Name: "duplicate",
+		})
+		require.NoError(t, err)
+	}
 
-	dynschema := dynparquet.NewSampleSchema()
-	ps, err := dynschema.DynamicParquetSchema(pqarrow.RecordDynamicCols(record))
+	rowsRead := 0
+	require.NoError(t, query.NewEngine(
+		memory.DefaultAllocator,
+		db.TableProvider()).ScanTable(tableName).Execute(
+		context.Background(), func(_ context.Context, r arrow.Record) error {
+			rowsRead += int(r.NumRows())
+			return nil
+		}))
+	// Duplicates are only dropped after compaction.
+	require.Equal(t, numRecords, rowsRead)
+
+	// Trigger compaction with a new record.
+	_, err = table.Write(context.Background(), Record{
+		Name: "duplicate",
+	})
 	require.NoError(t, err)
 
-	row, err := pqarrow.RecordToRow(dynschema, ps, record, 0)
 	require.NoError(t, err)
-	require.Equal(t, "[<null> hello world <null> 10 20]", fmt.Sprintf("%v", row))
+	require.NoError(t, table.ActiveBlock().EnsureCompaction())
+
+	rowsRead = 0
+	require.NoError(t, query.NewEngine(
+		memory.DefaultAllocator,
+		db.TableProvider()).ScanTable(tableName).Execute(
+		context.Background(), func(_ context.Context, r arrow.Record) error {
+			rowsRead += int(r.NumRows())
+			return nil
+		}))
+	require.Equal(t, 1, rowsRead)
 }
 
-func Test_L0Query(t *testing.T) {
-	c, table := basicTable(t)
-	t.Cleanup(func() { c.Close() })
+func TestTable_write_ptr_struct(t *testing.T) {
+	columnstore, err := New()
+	require.Nil(t, err)
+	defer columnstore.Close()
 
-	samples := dynparquet.Samples{{
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 1,
-		Value:     1,
-	}, {
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 2,
-		Value:     2,
-	}, {
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 3,
-		Value:     3,
-	}}
+	database, err := columnstore.DB(context.Background(), "simple_db")
+	require.Nil(t, err)
+
+	table, err := NewGenericTable[*dynparquet.Sample](
+		database, "simple_table", memory.NewGoAllocator(),
+	)
+	require.Nil(t, err)
+	defer table.Release()
 
-	ps, err := table.Schema().DynamicParquetSchema(map[string][]string{
-		"labels": {"label1", "label2", "label3", "label4"},
+	_, err = table.Write(context.Background(), &dynparquet.Sample{
+		ExampleType: "ptr",
 	})
-	require.NoError(t, err)
+	require.Nil(t, err)
+}
 
-	ctx := context.Background()
-	sc, err := pqarrow.ParquetSchemaToArrowSchema(ctx, ps, logicalplan.IterOptions{})
+func Test_Issue685(t *testing.T) {
+	c, err := New()
 	require.NoError(t, err)
+	t.Cleanup(func() {
+		c.Close()
+	})
 
-	r, err := samples.ToRecord(sc)
+	db, err := c.DB(context.Background(), "test")
 	require.NoError(t, err)
 
-	_, err = table.InsertRecord(ctx, r)
+	type ColMap struct {
+		Value      map[string]int64
+		Attributes map[string]string
+	}
+	table, err := NewGenericTable[ColMap](
+		db, "test", memory.NewGoAllocator(),
+	)
 	require.NoError(t, err)
+	defer table.Release()
 
-	pool := memory.NewGoAllocator()
-
-	records := 0
-	err = table.View(ctx, func(ctx context.Context, tx uint64) error {
-		err = table.Iterator(
-			ctx,
-			tx,
-			pool,
-			[]logicalplan.Callback{func(ctx context.Context, ar arrow.Record) error {
-				records++
-				require.Equal(t, int64(3), ar.NumRows())
-				require.Equal(t, int64(8), ar.NumCols())
-				return nil
-			}},
-		)
-		require.NoError(t, err)
-		return nil
+	_, err = table.Write(context.Background(), ColMap{
+		Value: map[string]int64{
+			"age": 9999,
+		},
+		Attributes: map[string]string{
+			"age": "9999",
+		},
 	})
 	require.NoError(t, err)
-	require.Equal(t, 1, records)
-}
 
-// This test checks to make sure that if a new row is added that is globally the least it shall be added as a new granule.
-func Test_Table_InsertLeast(t *testing.T) {
-	c, table := basicTable(t)
-	defer c.Close()
-
-	samples := dynparquet.Samples{{
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value1"},
-			{Name: "label2", Value: "value2"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 1,
-		Value:     1,
-	}, {
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value2"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label3", Value: "value3"},
+	_, err = table.Write(context.Background(), ColMap{
+		Value: map[string]int64{
+			"other": 1234,
+			"age":   3,
 		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
+		Attributes: map[string]string{
+			"age": "9999",
 		},
-		Timestamp: 2,
-		Value:     2,
-	}, {
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "value3"},
-			{Name: "label2", Value: "value2"},
-			{Name: "label4", Value: "value4"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 3,
-		Value:     3,
-	}}
-
-	buf, err := samples.ToBuffer(table.Schema())
+	})
 	require.NoError(t, err)
 
-	ctx := context.Background()
-	_, err = table.InsertBuffer(ctx, buf)
+	engine := query.NewEngine(memory.NewGoAllocator(), db.TableProvider())
+	err = engine.ScanTable("test").
+		Aggregate(
+			[]*logicalplan.AggregationFunction{
+				logicalplan.Sum(logicalplan.Col("value.age")),
+			},
+			nil,
+		).
+		Execute(context.Background(), func(_ context.Context, r arrow.Record) error {
+			require.Equal(t, int64(1), r.NumRows())
+			require.Equal(t, int64(10002), r.Column(0).(*array.Int64).Value(0))
+			return nil
+		})
 	require.NoError(t, err)
+}
 
-	before := table.active.Index().Len()
+func Test_Issue741_Deadlock(t *testing.T) {
+	logger := newTestLogger(t)
+	dir := t.TempDir()
+	c, err := New(
+		WithIndexConfig([]*index.LevelConfig{
+			{Level: index.L0, MaxSize: 1 * TiB, Type: index.CompactionTypeParquetDisk},
+			{Level: index.L1, MaxSize: 1 * TiB},
+		}),
+		WithLogger(logger),
+		WithStoragePath(dir),
+		WithWAL(),
+		WithManualBlockRotation(),
+	)
+	require.NoError(t, err)
+	t.Cleanup(func() {
+		require.NoError(t, c.Close())
+	})
 
-	samples = dynparquet.Samples{{
-		ExampleType: "test",
-		Labels: []dynparquet.Label{
-			{Name: "label1", Value: "a"},
-		},
-		Stacktrace: []uuid.UUID{
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-			{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-		Timestamp: 2,
-		Value:     2,
-	}}
+	db, err := c.DB(context.Background(), "test")
+	require.NoError(t, err)
 
-	buf, err = samples.ToBuffer(table.Schema())
+	table, err := db.Table("test", NewTableConfig(dynparquet.SampleDefinition()))
 	require.NoError(t, err)
 
-	_, err = table.InsertBuffer(ctx, buf)
+	// Insert a record
+	r, err := dynparquet.GenerateTestSamples(1).ToRecord()
 	require.NoError(t, err)
+	ctx := context.Background()
+	_, err = table.InsertRecord(ctx, r)
+	require.NoError(t, err)
+
+	// Compact the table to create a Parquet file backed record
+	require.NoError(t, table.EnsureCompaction())
+
+	// Simulate a query that is canceled
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	require.Error(t, table.Iterator(ctx, math.MaxUint64, memory.NewGoAllocator(), []logicalplan.Callback{func(_ context.Context, _ arrow.Record) error {
+		return nil
+	}}))
 
-	require.Equal(t, before+1, table.active.Index().Len())
+	// This releases all the parts and waits for all reads to finish accessing the parts. This was causing a deadlock.
+	table.active.index.Close()
 }
diff --git a/testdata/oldwal/databases/test/wal/00000000000000000001 b/testdata/oldwal/databases/test/wal/00000000000000000001
index ec0d1f1cb..d888092b9 100644
Binary files a/testdata/oldwal/databases/test/wal/00000000000000000001 and b/testdata/oldwal/databases/test/wal/00000000000000000001 differ
diff --git a/testing_opts.go b/testing_opts.go
new file mode 100644
index 000000000..698d7ae52
--- /dev/null
+++ b/testing_opts.go
@@ -0,0 +1,30 @@
+package frostdb
+
+import "github.com/polarsignals/frostdb/wal"
+
+type TestingOption Option
+
+func WithTestingOptions(opts ...TestingOption) Option {
+	return func(c *ColumnStore) error {
+		for _, opt := range opts {
+			if err := opt(c); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+}
+
+func WithTestingNoDiskSpaceReclaimOnSnapshot() TestingOption {
+	return func(c *ColumnStore) error {
+		c.testingOptions.disableReclaimDiskSpaceOnSnapshot = true
+		return nil
+	}
+}
+
+func WithTestingWalOptions(opts ...wal.Option) TestingOption {
+	return func(c *ColumnStore) error {
+		c.testingOptions.walTestingOptions = opts
+		return nil
+	}
+}
diff --git a/tx_list.go b/tx_list.go
index 20ae96b37..0a09c7dd6 100644
--- a/tx_list.go
+++ b/tx_list.go
@@ -1,6 +1,7 @@
 package frostdb
 
 import (
+	"context"
 	"sync/atomic"
 )
 
@@ -11,9 +12,10 @@ type TxNode struct {
 }
 
 type TxPool struct {
-	head  *atomic.Pointer[TxNode]
-	tail  *atomic.Pointer[TxNode]
-	drain chan interface{}
+	head   *atomic.Pointer[TxNode]
+	tail   *atomic.Pointer[TxNode]
+	cancel context.CancelFunc
+	drain  chan interface{}
 }
 
 // NewTxPool returns a new TxPool and starts the pool cleaner routine.
@@ -54,12 +56,10 @@ func NewTxPool(watermark *atomic.Uint64) *TxPool {
 	tail := &TxNode{
 		next:     &atomic.Pointer[TxNode]{},
 		original: &atomic.Pointer[TxNode]{},
-		tx:       0,
 	}
 	head := &TxNode{
 		next:     &atomic.Pointer[TxNode]{},
 		original: &atomic.Pointer[TxNode]{},
-		tx:       0,
 	}
 	txpool := &TxPool{
 		head:  &atomic.Pointer[TxNode]{},
@@ -72,7 +72,9 @@ func NewTxPool(watermark *atomic.Uint64) *TxPool {
 	txpool.head.Store(head)
 	txpool.tail.Store(tail)
 
-	go txpool.cleaner(watermark)
+	ctx, cancel := context.WithCancel(context.Background())
+	txpool.cancel = cancel
+	go txpool.cleaner(ctx, watermark)
 	return txpool
 }
 
@@ -105,6 +107,8 @@ func (l *TxPool) Insert(tx uint64) {
 		return false
 	}
 	for !tryInsert() {
+		// Satisfy linter with statement.
+		continue
 	}
 }
 
@@ -121,7 +125,16 @@ func (l *TxPool) insert(node, prev, next *TxNode) bool {
 	return success
 }
 
-func (l *TxPool) Iterate(iterate func(tx uint64) bool) {
+// notifyWatermark notifies the TxPool that the watermark has been updated. This
+// triggers a sweep of the pool.
+func (l *TxPool) notifyWatermark() {
+	select {
+	case l.drain <- struct{}{}:
+	default:
+	}
+}
+
+func (l *TxPool) Iterate(iterate func(txn uint64) bool) {
 	for node := l.head.Load().next.Load(); node.tx != 0; node = getUnmarked(node) {
 		if isMarked(node) == nil && !iterate(node.tx) {
 			return
@@ -130,7 +143,7 @@ func (l *TxPool) Iterate(iterate func(tx uint64) bool) {
 }
 
 // delete iterates over the list and deletes until the delete function returns false.
-func (l *TxPool) delete(deleteFunc func(tx uint64) bool) {
+func (l *TxPool) delete(deleteFunc func(txn uint64) bool) {
 	for node := l.head.Load().next.Load(); node.tx != 0; node = getUnmarked(node) {
 		if !deleteFunc(node.tx) {
 			return
@@ -159,7 +172,7 @@ func isMarked(node *TxNode) *TxNode {
 	return og
 }
 
-func getMarked(node *TxNode) *TxNode {
+func getMarked(_ *TxNode) *TxNode {
 	// using nil as the marker
 	return nil
 }
@@ -180,24 +193,29 @@ func getUnmarked(node *TxNode) *TxNode {
 
 // cleaner sweeps the pool periodically, and bubbles up the given watermark.
 // this function does not return.
-func (l *TxPool) cleaner(watermark *atomic.Uint64) {
-	for range l.drain {
-		l.delete(func(tx uint64) bool {
-			mark := watermark.Load()
-			switch {
-			case mark+1 == tx:
-				watermark.Add(1)
-				return true // return true to indicate that this node should be removed from the tx list.
-			case mark >= tx:
-				return true
-			default:
-				return false
-			}
-		})
+func (l *TxPool) cleaner(ctx context.Context, watermark *atomic.Uint64) {
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-l.drain:
+			l.delete(func(txn uint64) bool {
+				mark := watermark.Load()
+				switch {
+				case mark+1 == txn:
+					watermark.Store(txn)
+					return true // return true to indicate that this node should be removed from the tx list.
+				case mark >= txn:
+					return true
+				default:
+					return false
+				}
+			})
+		}
 	}
 }
 
 // Stop stops the TxPool's cleaner goroutine.
 func (l *TxPool) Stop() {
-	close(l.drain)
+	l.cancel()
 }
diff --git a/tx_list_test.go b/tx_list_test.go
index 731713ce5..67206fa86 100644
--- a/tx_list_test.go
+++ b/tx_list_test.go
@@ -1,7 +1,7 @@
 package frostdb
 
 import (
-	"sort"
+	"slices"
 	"sync"
 	"sync/atomic"
 	"testing"
@@ -9,12 +9,6 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-type Uint64Slice []uint64
-
-func (x Uint64Slice) Len() int           { return len(x) }
-func (x Uint64Slice) Less(i, j int) bool { return x[i] < x[j] }
-func (x Uint64Slice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-
 func Test_TXList_Mark(t *testing.T) {
 	node := &TxNode{
 		next:     &atomic.Pointer[TxNode]{},
@@ -34,28 +28,28 @@ func Test_TXList_Mark(t *testing.T) {
 }
 
 func Test_TXList_Basic(t *testing.T) {
-	wm := &atomic.Uint64{}
+	wm := atomic.Uint64{}
 	wm.Store(1) // set the watermark so that the sweeper won't remove any of our txs
-	p := NewTxPool(wm)
+	p := NewTxPool(&wm)
 	txs := []uint64{9, 8, 7, 6, 4, 5, 3, 10}
 	for _, tx := range txs {
 		p.Insert(tx)
 	}
 
-	found := make(Uint64Slice, 0, len(txs))
+	found := make([]uint64, 0, len(txs))
 	p.Iterate(func(tx uint64) bool {
 		found = append(found, tx)
 		return true
 	})
 
 	p.Stop() // stop the sweeper
-	require.True(t, sort.IsSorted(found))
+	require.True(t, slices.IsSorted(found))
 	require.Equal(t, 8, len(found))
 }
 
 func Test_TXList_Async(t *testing.T) {
-	wm := &atomic.Uint64{}
-	p := NewTxPool(wm)
+	wm := atomic.Uint64{}
+	p := NewTxPool(&wm)
 
 	tx := &atomic.Uint64{}
 	tx.Add(1) // adjust the tx id to ensure the sweeper doesn't drain the pool
@@ -76,23 +70,25 @@ func Test_TXList_Async(t *testing.T) {
 
 	wg.Wait()
 
-	found := make(Uint64Slice, 0, writers*n)
+	found := make([]uint64, 0, writers*n)
 	p.Iterate(func(tx uint64) bool {
 		found = append(found, tx)
 		return true
 	})
-	require.True(t, sort.IsSorted(found))
+	require.True(t, slices.IsSorted(found))
 	require.Equal(t, n*writers, len(found))
 
 	p.Insert(1) // insert the missing tx to drain the pool
 
 	for v := wm.Load(); v < tx.Load(); v = wm.Load() {
+		// Satisfy linter with statement.
+		continue
 	}
 	require.Equal(t, tx.Load(), wm.Load())
 
 	// Verify the pool is empty
 	foundtx := false
-	p.Iterate(func(tx uint64) bool {
+	p.Iterate(func(_ uint64) bool {
 		foundtx = true
 		return true
 	})
@@ -126,8 +122,8 @@ func Benchmark_TXList_InsertAndDrain(b *testing.B) {
 
 	for name, benchmark := range benchmarks {
 		b.Run(name, func(b *testing.B) {
-			wm := &atomic.Uint64{}
-			p := NewTxPool(wm)
+			wm := atomic.Uint64{}
+			p := NewTxPool(&wm)
 			tx := &atomic.Uint64{}
 			wg := &sync.WaitGroup{}
 			for i := 0; i < b.N; i++ {
@@ -145,6 +141,8 @@ func Benchmark_TXList_InsertAndDrain(b *testing.B) {
 
 				// Wait for the sweeper to drain
 				for v := wm.Load(); v < tx.Load(); v = wm.Load() {
+					// Satisfy linter with statement.
+					continue
 				}
 				require.Equal(b, tx.Load(), wm.Load())
 			}
diff --git a/wal/wal.go b/wal/wal.go
index 88002f907..28ad0d01a 100644
--- a/wal/wal.go
+++ b/wal/wal.go
@@ -4,43 +4,49 @@ import (
 	"bytes"
 	"container/heap"
 	"context"
-	"errors"
 	"fmt"
+	"math"
 	"os"
-	"path/filepath"
 	"sync"
 	"time"
 
-	"github.com/apache/arrow/go/v10/arrow"
-	"github.com/apache/arrow/go/v10/arrow/ipc"
+	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/ipc"
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
+	"github.com/polarsignals/wal"
+	"github.com/polarsignals/wal/types"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
-	"github.com/tidwall/wal"
 
 	walpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/wal/v1alpha1"
 )
 
+type ReplayHandlerFunc func(tx uint64, record *walpb.Record) error
+
 type NopWAL struct{}
 
 func (w *NopWAL) Close() error {
 	return nil
 }
 
-func (w *NopWAL) Log(tx uint64, record *walpb.Record) error {
+func (w *NopWAL) Log(_ uint64, _ *walpb.Record) error {
+	return nil
+}
+
+func (w *NopWAL) Replay(_ uint64, _ ReplayHandlerFunc) error {
 	return nil
 }
 
-func (w *NopWAL) LogRecord(tx uint64, table string, record arrow.Record) error {
+func (w *NopWAL) LogRecord(_ uint64, _ string, _ arrow.Record) error {
 	return nil
 }
 
-func (w *NopWAL) Replay(handler func(tx uint64, record *walpb.Record) error) error {
+func (w *NopWAL) Truncate(_ uint64) error {
 	return nil
 }
 
-func (w *NopWAL) Truncate(tx uint64) error {
+func (w *NopWAL) Reset(_ uint64) error {
 	return nil
 }
 
@@ -48,31 +54,96 @@ func (w *NopWAL) FirstIndex() (uint64, error) {
 	return 0, nil
 }
 
-type fileWALMetrics struct {
-	recordsLogged        prometheus.Counter
-	failedLogs           prometheus.Counter
-	lastTruncationAt     prometheus.Gauge
-	walTruncations       prometheus.Counter
-	walTruncationsFailed prometheus.Counter
+func (w *NopWAL) LastIndex() (uint64, error) {
+	return 0, nil
+}
+
+type Metrics struct {
+	FailedLogs            prometheus.Counter
+	LastTruncationAt      prometheus.Gauge
+	WalRepairs            prometheus.Counter
+	WalRepairsLostRecords prometheus.Counter
+	WalCloseTimeouts      prometheus.Counter
+	WalQueueSize          prometheus.Gauge
+}
+
+func newMetrics(reg prometheus.Registerer) *Metrics {
+	return &Metrics{
+		FailedLogs: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Name: "failed_logs_total",
+			Help: "Number of failed WAL logs",
+		}),
+		LastTruncationAt: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
+			Name: "last_truncation_at",
+			Help: "The last transaction the WAL was truncated to",
+		}),
+		WalRepairs: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Name: "repairs_total",
+			Help: "The number of times the WAL had to be repaired (truncated) due to corrupt records",
+		}),
+		WalRepairsLostRecords: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Name: "repairs_lost_records_total",
+			Help: "The number of WAL records lost due to WAL repairs (truncations)",
+		}),
+		WalCloseTimeouts: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Name: "close_timeouts_total",
+			Help: "The number of times the WAL failed to close due to a timeout",
+		}),
+		WalQueueSize: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
+			Name: "queue_size",
+			Help: "The number of unprocessed requests in the WAL queue",
+		}),
+	}
 }
 
+const (
+	dirPerms           = os.FileMode(0o750)
+	progressLogTimeout = 10 * time.Second
+)
+
 type FileWAL struct {
 	logger log.Logger
 	path   string
-	log    *wal.Log
+	log    wal.LogStore
 
-	nextTx uint64
-	txmtx  *sync.Mutex
+	metrics      *Metrics
+	storeMetrics *wal.Metrics
 
-	metrics        *fileWALMetrics
 	logRequestCh   chan *logRequest
-	queue          *logRequestQueue
 	logRequestPool *sync.Pool
 	arrowBufPool   *sync.Pool
-	mtx            *sync.Mutex
+	protected      struct {
+		sync.Mutex
+		queue logRequestQueue
+		// truncateTx is set when the caller wishes to perform a truncation. The
+		// WAL will keep on logging records up to and including this txn and
+		// then perform a truncation. If another truncate call occurs in the
+		// meantime, the highest txn will be used.
+		truncateTx uint64
+		// nextTx is the next expected txn. The FileWAL will only log a record
+		// with this txn.
+		nextTx uint64
+	}
+
+	// scratch memory reused to reduce allocations.
+	scratch struct {
+		walBatch []types.LogEntry
+		reqBatch []*logRequest
+	}
+
+	// segmentSize indicates what the underlying WAL segment size is. This helps
+	// the run goroutine size batches more or less appropriately.
+	segmentSize int
+	// lastBatchWrite is used to determine when to force a close of the WAL.
+	lastBatchWrite time.Time
 
-	cancel     func()
-	shutdownCh chan struct{}
+	cancel       func()
+	shutdownCh   chan struct{}
+	closeTimeout time.Duration
+
+	newLogStoreWrapper func(wal.LogStore) wal.LogStore
+	ticker             Ticker
+	testingDroppedLogs func([]types.LogEntry)
 }
 
 type logRequest struct {
@@ -98,36 +169,76 @@ func (q *logRequestQueue) Pop() any {
 	old := *q
 	n := len(old)
 	x := old[n-1]
+	// Remove this reference to a logRequest since the GC considers the popped
+	// element still accessible otherwise. Since these are sync pooled, we want
+	// to defer object lifetime management to the pool without interfering.
+	old[n-1] = nil
 	*q = old[0 : n-1]
 	return x
 }
 
+type Option func(*FileWAL)
+
+func WithTestingLogStoreWrapper(newLogStoreWrapper func(wal.LogStore) wal.LogStore) Option {
+	return func(w *FileWAL) {
+		w.newLogStoreWrapper = newLogStoreWrapper
+	}
+}
+
+func WithMetrics(m *Metrics) Option {
+	return func(w *FileWAL) {
+		w.metrics = m
+	}
+}
+
+func WithStoreMetrics(m *wal.Metrics) Option {
+	return func(w *FileWAL) {
+		w.storeMetrics = m
+	}
+}
+
+type Ticker interface {
+	C() <-chan time.Time
+	Stop()
+}
+
+type realTicker struct {
+	*time.Ticker
+}
+
+func (t realTicker) C() <-chan time.Time {
+	return t.Ticker.C
+}
+
+// WithTestingLoopTicker allows the caller to force processing of pending WAL
+// entries by providing a custom ticker implementation.
+func WithTestingLoopTicker(t Ticker) Option {
+	return func(w *FileWAL) {
+		w.ticker = t
+	}
+}
+
+// WithTestingCallbackWithDroppedLogsOnClose is called when the WAL times out on
+// close with all the entries that could not be written.
+func WithTestingCallbackWithDroppedLogsOnClose(cb func([]types.LogEntry)) Option {
+	return func(w *FileWAL) {
+		w.testingDroppedLogs = cb
+	}
+}
+
 func Open(
 	logger log.Logger,
-	reg prometheus.Registerer,
 	path string,
+	opts ...Option,
 ) (*FileWAL, error) {
-	log, err := wal.Open(path, wal.DefaultOptions)
-	if err != nil {
-		if !errors.Is(err, wal.ErrCorrupt) {
-			return nil, err
-		}
-		// Try to repair the corrupt WAL
-		if err := tryRepairWAL(path); err != nil {
-			return nil, fmt.Errorf("failed to repair corrupt WAL: %w", err)
-		}
-		log, err = wal.Open(path, wal.DefaultOptions)
-		if err != nil {
-			return nil, err
-		}
+	if err := os.MkdirAll(path, dirPerms); err != nil {
+		return nil, err
 	}
 
+	segmentSize := wal.DefaultSegmentSize
 	w := &FileWAL{
 		logger:       logger,
 		path:         path,
-		log:          log,
-		nextTx:       1,
-		txmtx:        &sync.Mutex{},
 		logRequestCh: make(chan *logRequest),
 		logRequestPool: &sync.Pool{
 			New: func() any {
@@ -141,162 +252,264 @@ func Open(
 				return &bytes.Buffer{}
 			},
 		},
-		mtx:   &sync.Mutex{},
-		queue: &logRequestQueue{},
-		metrics: &fileWALMetrics{
-			recordsLogged: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "wal_records_logged_total",
-				Help: "Number of records logged to WAL",
-			}),
-			failedLogs: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "wal_failed_logs_total",
-				Help: "Number of failed WAL logs",
-			}),
-			lastTruncationAt: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
-				Name: "last_truncation_at",
-				Help: "The last transaction the WAL was truncated to",
-			}),
-			walTruncations: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "wal_truncations_total",
-				Help: "The number of WAL truncations",
-			}),
-			walTruncationsFailed: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-				Name: "wal_truncations_failed_total",
-				Help: "The number of WAL truncations",
-			}),
-		},
-		shutdownCh: make(chan struct{}),
+		closeTimeout: 1 * time.Second,
+		segmentSize:  segmentSize,
+		shutdownCh:   make(chan struct{}),
 	}
 
-	ctx, cancel := context.WithCancel(context.Background())
-	w.cancel = cancel
-	go func() {
-		w.run(ctx)
-		close(w.shutdownCh)
-	}()
-
-	return w, nil
-}
+	for _, o := range opts {
+		o(w)
+	}
 
-// tryRepairWAL operates on a corrupt WAL directory by removing the last file
-// in the directory. Corruption can occur when the recorded size of an entry in
-// the file does not correspond to its actual size. A better option would be to
-// read the last WAL file and remove the corrupted entry, but this is good
-// enough for now.
-func tryRepairWAL(path string) error {
-	absPath, err := filepath.Abs(path)
+	logStore, err := wal.Open(path, wal.WithLogger(logger), wal.WithMetrics(w.storeMetrics), wal.WithSegmentSize(segmentSize))
 	if err != nil {
-		return err
+		return nil, err
 	}
-	path = absPath
 
-	entries, err := os.ReadDir(path)
+	lastIndex, err := logStore.LastIndex()
 	if err != nil {
-		return err
+		return nil, err
 	}
+	w.protected.nextTx = lastIndex + 1
 
-	if err := os.Remove(filepath.Join(absPath, entries[len(entries)-1].Name())); err != nil {
-		return err
+	if w.newLogStoreWrapper != nil {
+		w.log = w.newLogStoreWrapper(logStore)
+	} else {
+		w.log = logStore
 	}
-	return nil
+	if w.metrics == nil {
+		w.metrics = newMetrics(prometheus.NewRegistry())
+	}
+
+	w.scratch.walBatch = make([]types.LogEntry, 0, 64)
+	w.scratch.reqBatch = make([]*logRequest, 0, 64)
+
+	return w, nil
 }
 
 func (w *FileWAL) run(ctx context.Context) {
-	ticker := time.NewTicker(50 * time.Millisecond)
-	defer ticker.Stop()
-	walBatch := &wal.Batch{}
-	batch := make([]*logRequest, 0, 128) // random number is random
+	const defaultTickTime = 50 * time.Millisecond
+	if w.ticker == nil {
+		w.ticker = realTicker{Ticker: time.NewTicker(defaultTickTime)}
+	}
+	defer w.ticker.Stop()
+	// lastQueueSize is only used on shutdown to reduce debug logging verbosity.
+	lastQueueSize := 0
+	w.lastBatchWrite = time.Now()
+
 	for {
 		select {
 		case <-ctx.Done():
-			w.mtx.Lock()
-			n := w.queue.Len()
-			w.mtx.Unlock()
+			// Need to drain the queue before we can shutdown, so
+			// proactively try to process entries.
+			w.process()
+
+			w.protected.Lock()
+			n := w.protected.queue.Len()
+			w.protected.Unlock()
 			if n > 0 {
-				// Need to drain the queue before we can shutdown.
-				continue
-			}
-			return
-		case <-ticker.C:
-			w.txmtx.Lock()
-			nextTx := w.nextTx
-			w.txmtx.Unlock()
-			batch := batch[:0]
-			w.mtx.Lock()
-			for w.queue.Len() > 0 {
-				if minTx := (*w.queue)[0].tx; minTx != nextTx {
-					if minTx < nextTx {
-						// The next entry must be dropped otherwise progress
-						// will never be made. Log a warning given this could
-						// lead to missing data.
-						level.Warn(w.logger).Log(
-							"msg", "WAL cannot log a txn id that has already been seen; dropping entry",
-							"expected", nextTx,
-							"found", minTx,
-						)
-						_ = heap.Pop(w.queue)
+				// Force the WAL to close after some a timeout.
+				if w.closeTimeout > 0 && time.Since(w.lastBatchWrite) > w.closeTimeout {
+					w.metrics.WalCloseTimeouts.Inc()
+					level.Error(w.logger).Log(
+						"msg", "WAL timed out attempting to close",
+					)
+					if w.testingDroppedLogs != nil {
+						batch := make([]types.LogEntry, 0, n)
+						w.protected.Lock()
+						for _, v := range w.protected.queue {
+							batch = append(batch, types.LogEntry{Index: v.tx, Data: v.data})
+						}
+						w.protected.Unlock()
+						w.testingDroppedLogs(batch)
 					}
-					break
+					return
 				}
-				r := heap.Pop(w.queue).(*logRequest)
-				batch = append(batch, r)
-				nextTx++
-			}
-			w.mtx.Unlock()
-			if len(batch) == 0 {
+
+				if n == lastQueueSize {
+					// No progress made.
+					time.Sleep(defaultTickTime)
+					continue
+				}
+
+				lastQueueSize = n
+				w.protected.Lock()
+				minTx := w.protected.queue[0].tx
+				w.protected.Unlock()
+				lastIdx, err := w.log.LastIndex()
+				logOpts := []any{
+					"msg", "WAL received shutdown request; waiting for log request queue to drain",
+					"queueSize", n,
+					"minTx", minTx,
+					"lastIndex", lastIdx,
+				}
+				if err != nil {
+					logOpts = append(logOpts, "lastIndexErr", err)
+				}
+				level.Debug(w.logger).Log(logOpts...)
 				continue
 			}
+			level.Debug(w.logger).Log("msg", "WAL shut down")
+			return
+		case <-w.ticker.C():
+			w.process()
+		}
+	}
+}
 
-			walBatch.Clear()
-			for _, r := range batch {
-				walBatch.Write(r.tx, r.data)
+func (w *FileWAL) process() {
+	w.scratch.reqBatch = w.scratch.reqBatch[:0]
+
+	w.protected.Lock()
+	batchSize := 0
+	for w.protected.queue.Len() > 0 && batchSize < w.segmentSize {
+		if minTx := w.protected.queue[0].tx; minTx != w.protected.nextTx {
+			if minTx < w.protected.nextTx {
+				// The next entry must be dropped otherwise progress
+				// will never be made. Log a warning given this could
+				// lead to missing data.
+				level.Warn(w.logger).Log(
+					"msg", "WAL cannot log a txn id that has already been seen; dropping entry",
+					"expected", w.protected.nextTx,
+					"found", minTx,
+				)
+				w.logRequestPool.Put(heap.Pop(&w.protected.queue))
+				w.metrics.WalQueueSize.Sub(1)
+				// Keep on going since there might be other transactions
+				// below this one.
+				continue
 			}
-
-			err := w.log.WriteBatch(walBatch)
-			if err != nil {
-				w.metrics.failedLogs.Add(float64(len(batch)))
-				lastIndex, lastIndexErr := w.log.LastIndex()
-				level.Error(w.logger).Log(
-					"msg", "failed to write WAL batch",
-					"err", err,
-					// Sprintf is used here because the logging package does not
-					// support logging arbitrary values.
-					"batch", fmt.Sprintf("%v", walBatch),
-					"lastIndex", lastIndex,
-					"lastIndexErr", lastIndexErr,
+			if sinceProgress := time.Since(w.lastBatchWrite); sinceProgress > progressLogTimeout {
+				level.Info(w.logger).Log(
+					"msg", "wal has not made progress",
+					"since", sinceProgress,
+					"next_expected_tx", w.protected.nextTx,
+					"min_tx", minTx,
 				)
-			} else {
-				w.metrics.recordsLogged.Add(float64(len(batch)))
 			}
+			// Next expected tx has not yet been seen.
+			break
+		}
+		r := heap.Pop(&w.protected.queue).(*logRequest)
+		w.metrics.WalQueueSize.Sub(1)
+		w.scratch.reqBatch = append(w.scratch.reqBatch, r)
+		batchSize += len(r.data)
+		w.protected.nextTx++
+	}
+	// truncateTx will be non-zero if we either are about to log a
+	// record with a txn past the txn to truncate, or we have logged one
+	// in the past.
+	truncateTx := uint64(0)
+	if w.protected.truncateTx != 0 {
+		truncateTx = w.protected.truncateTx
+		w.protected.truncateTx = 0
+	}
+	w.protected.Unlock()
+	if len(w.scratch.reqBatch) == 0 && truncateTx == 0 {
+		// No records to log or truncations.
+		return
+	}
 
-			for _, r := range batch {
-				w.logRequestPool.Put(r)
-			}
+	w.scratch.walBatch = w.scratch.walBatch[:0]
+	for _, r := range w.scratch.reqBatch {
+		w.scratch.walBatch = append(w.scratch.walBatch, types.LogEntry{
+			Index: r.tx,
+			// No copy is needed here since the log request is only
+			// released once these bytes are persisted.
+			Data: r.data,
+		})
+	}
 
-			w.txmtx.Lock()
-			w.nextTx = nextTx
-			w.txmtx.Unlock()
+	if len(w.scratch.walBatch) > 0 {
+		if err := w.log.StoreLogs(w.scratch.walBatch); err != nil {
+			w.metrics.FailedLogs.Add(float64(len(w.scratch.reqBatch)))
+			lastIndex, lastIndexErr := w.log.LastIndex()
+			level.Error(w.logger).Log(
+				"msg", "failed to write WAL batch",
+				"err", err,
+				"lastIndex", lastIndex,
+				"lastIndexErr", lastIndexErr,
+			)
 		}
 	}
-}
 
-func (w *FileWAL) Truncate(tx uint64) error {
-	w.metrics.lastTruncationAt.Set(float64(tx))
-	w.metrics.walTruncations.Inc()
+	if truncateTx != 0 {
+		w.metrics.LastTruncationAt.Set(float64(truncateTx))
+		level.Debug(w.logger).Log("msg", "truncating WAL", "tx", truncateTx)
+		if err := w.log.TruncateFront(truncateTx); err != nil {
+			level.Error(w.logger).Log("msg", "failed to truncate WAL", "tx", truncateTx, "err", err)
+		} else {
+			w.protected.Lock()
+			if truncateTx > w.protected.nextTx {
+				// truncateTx is the new firstIndex of the WAL. If it is
+				// greater than the next expected transaction, this was
+				// a full WAL truncation/reset so both the first and
+				// last index are now 0. The underlying WAL will allow a
+				// record with any index to be written, however we only
+				// want to allow the next index to be logged.
+				w.protected.nextTx = truncateTx
+				// Remove any records that have not yet been written and
+				// are now below the nextTx.
+				for w.protected.queue.Len() > 0 {
+					if minTx := w.protected.queue[0].tx; minTx >= w.protected.nextTx {
+						break
+					}
+					w.logRequestPool.Put(heap.Pop(&w.protected.queue))
+					w.metrics.WalQueueSize.Sub(1)
+				}
+			}
+			w.protected.Unlock()
+			level.Debug(w.logger).Log("msg", "truncated WAL", "tx", truncateTx)
+		}
+	}
 
-	level.Debug(w.logger).Log("msg", "truncating WAL", "tx", tx)
-	err := w.log.TruncateFront(tx)
-	if err != nil {
-		level.Error(w.logger).Log("msg", "failed to truncate WAL", "tx", tx, "err", err)
-		w.metrics.walTruncationsFailed.Inc()
-		return err
+	// Remove references to a logRequest since the GC considers the
+	// popped element still accessible otherwise. Since these are sync
+	// pooled, we want to defer object lifetime management to the pool
+	// without interfering.
+	for i := range w.scratch.walBatch {
+		w.scratch.walBatch[i].Data = nil
+	}
+	for i, r := range w.scratch.reqBatch {
+		w.scratch.reqBatch[i] = nil
+		w.logRequestPool.Put(r)
 	}
-	level.Debug(w.logger).Log("msg", "truncated WAL", "tx", tx)
 
+	w.lastBatchWrite = time.Now()
+}
+
+// Truncate queues a truncation of the WAL at the given tx. Note that the
+// truncation will be performed asynchronously. A nil error does not indicate
+// a successful truncation.
+func (w *FileWAL) Truncate(tx uint64) error {
+	w.protected.Lock()
+	defer w.protected.Unlock()
+	if tx > w.protected.truncateTx {
+		w.protected.truncateTx = tx
+	}
 	return nil
 }
 
+func (w *FileWAL) Reset(nextTx uint64) error {
+	w.protected.Lock()
+	defer w.protected.Unlock()
+	// Drain any pending records.
+	for w.protected.queue.Len() > 0 {
+		_ = heap.Pop(&w.protected.queue)
+	}
+	// Set the next expected transaction.
+	w.protected.nextTx = nextTx
+	// This truncation will fully reset the underlying WAL. Any index can be
+	// logged, but setting the nextTx above will ensure that only a record with
+	// a matching txn will be accepted as the first record.
+	return w.log.TruncateFront(math.MaxUint64)
+}
+
 func (w *FileWAL) Close() error {
+	if w.cancel == nil { // wal was never started
+		return nil
+	}
+	level.Debug(w.logger).Log("msg", "WAL received shutdown request; canceling run loop")
 	w.cancel()
 	<-w.shutdownCh
 	return w.log.Close()
@@ -315,9 +528,10 @@ func (w *FileWAL) Log(tx uint64, record *walpb.Record) error {
 		return err
 	}
 
-	w.mtx.Lock()
-	heap.Push(w.queue, r)
-	w.mtx.Unlock()
+	w.protected.Lock()
+	heap.Push(&w.protected.queue, r)
+	w.metrics.WalQueueSize.Add(1)
+	w.protected.Unlock()
 
 	return nil
 }
@@ -342,6 +556,19 @@ func (w *FileWAL) writeRecord(buf *bytes.Buffer, record arrow.Record) error {
 }
 
 func (w *FileWAL) LogRecord(tx uint64, table string, record arrow.Record) error {
+	w.protected.Lock()
+	nextTx := w.protected.nextTx
+	w.protected.Unlock()
+	if tx < nextTx {
+		// Transaction should not be logged. This could happen if a truncation
+		// has been issued simultaneously as logging a WAL record.
+		level.Warn(w.logger).Log(
+			"msg", "attempted to log txn below next expected txn",
+			"tx", tx,
+			"next_tx", nextTx,
+		)
+		return nil
+	}
 	buf := w.getArrowBuf()
 	defer w.putArrowBuf(buf)
 	if err := w.writeRecord(buf, record); err != nil {
@@ -372,9 +599,10 @@ func (w *FileWAL) LogRecord(tx uint64, table string, record arrow.Record) error
 		return err
 	}
 
-	w.mtx.Lock()
-	heap.Push(w.queue, r)
-	w.mtx.Unlock()
+	w.protected.Lock()
+	heap.Push(&w.protected.queue, r)
+	w.metrics.WalQueueSize.Add(1)
+	w.protected.Unlock()
 
 	return nil
 }
@@ -387,11 +615,18 @@ func (w *FileWAL) LastIndex() (uint64, error) {
 	return w.log.LastIndex()
 }
 
-func (w *FileWAL) Replay(handler func(tx uint64, record *walpb.Record) error) error {
-	firstIndex, err := w.log.FirstIndex()
+func (w *FileWAL) Replay(tx uint64, handler ReplayHandlerFunc) (err error) {
+	if handler == nil { // no handler provided
+		return nil
+	}
+
+	logFirstIndex, err := w.log.FirstIndex()
 	if err != nil {
 		return fmt.Errorf("read first index: %w", err)
 	}
+	if tx == 0 || tx < logFirstIndex {
+		tx = logFirstIndex
+	}
 
 	lastIndex, err := w.log.LastIndex()
 	if err != nil {
@@ -399,30 +634,46 @@ func (w *FileWAL) Replay(handler func(tx uint64, record *walpb.Record) error) er
 	}
 
 	// FirstIndex and LastIndex returns zero when there is no WAL files.
-	if firstIndex == 0 || lastIndex == 0 {
+	if tx == 0 || lastIndex == 0 {
 		return nil
 	}
 
-	level.Debug(w.logger).Log("msg", "replaying WAL", "first_index", firstIndex, "last_index", lastIndex)
+	level.Debug(w.logger).Log("msg", "replaying WAL", "first_index", tx, "last_index", lastIndex)
 
 	defer func() {
-		// recover the panic to print more context. Exit afterwards regardless.
-		if err := recover(); err != nil {
-			level.Error(w.logger).Log("msg", "replaying WAL failed", "path", w.path, "first_index", firstIndex, "last_index", lastIndex, "err", err)
-			panic(err)
+		// recover a panic of reading a transaction. Truncate the wal to the
+		// last valid transaction.
+		if r := recover(); r != nil {
+			level.Error(w.logger).Log(
+				"msg", "replaying WAL failed",
+				"path", w.path,
+				"first_index", logFirstIndex,
+				"last_index", lastIndex,
+				"offending_index", tx,
+				"err", r,
+			)
+			if err = w.log.TruncateBack(tx - 1); err != nil {
+				return
+			}
+			w.metrics.WalRepairs.Inc()
+			w.metrics.WalRepairsLostRecords.Add(float64((lastIndex - tx) + 1))
 		}
 	}()
 
-	for tx := firstIndex; tx <= lastIndex; tx++ {
+	var entry types.LogEntry
+	for ; tx <= lastIndex; tx++ {
 		level.Debug(w.logger).Log("msg", "replaying WAL record", "tx", tx)
-		data, err := w.log.Read(tx)
-		if err != nil {
-			return fmt.Errorf("read index %d: %w", tx, err)
+		if err := w.log.GetLog(tx, &entry); err != nil {
+			// Panic since this is most likely a corruption issue. The recover
+			// call above will truncate the WAL to the last valid transaction.
+			panic(fmt.Sprintf("read index %d: %v", tx, err))
 		}
 
 		record := &walpb.Record{}
-		if err := record.UnmarshalVT(data); err != nil {
-			return fmt.Errorf("unmarshal WAL record: %w", err)
+		if err := record.UnmarshalVT(entry.Data); err != nil {
+			// Panic since this is most likely a corruption issue. The recover
+			// call above will truncate the WAL to the last valid transaction.
+			panic(fmt.Sprintf("unmarshal WAL record: %v", err))
 		}
 
 		if err := handler(tx, record); err != nil {
@@ -430,8 +681,14 @@ func (w *FileWAL) Replay(handler func(tx uint64, record *walpb.Record) error) er
 		}
 	}
 
-	w.txmtx.Lock()
-	w.nextTx = lastIndex + 1
-	w.txmtx.Unlock()
 	return nil
 }
+
+func (w *FileWAL) RunAsync() {
+	ctx, cancel := context.WithCancel(context.Background())
+	w.cancel = cancel
+	go func() {
+		w.run(ctx)
+		close(w.shutdownCh)
+	}()
+}
diff --git a/wal/wal_test.go b/wal/wal_test.go
index 2f1d98260..fbce2caf2 100644
--- a/wal/wal_test.go
+++ b/wal/wal_test.go
@@ -1,12 +1,13 @@
 package wal
 
 import (
+	"fmt"
 	"os"
 	"path/filepath"
 	"testing"
+	"time"
 
 	"github.com/go-kit/log"
-	"github.com/prometheus/client_golang/prometheus"
 	"github.com/stretchr/testify/require"
 
 	walpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/wal/v1alpha1"
@@ -16,10 +17,10 @@ func TestWAL(t *testing.T) {
 	dir := t.TempDir()
 	w, err := Open(
 		log.NewNopLogger(),
-		prometheus.NewRegistry(),
 		dir,
 	)
 	require.NoError(t, err)
+	w.RunAsync()
 
 	require.NoError(t, w.Log(1, &walpb.Record{
 		Entry: &walpb.Entry{
@@ -36,12 +37,12 @@ func TestWAL(t *testing.T) {
 
 	w, err = Open(
 		log.NewNopLogger(),
-		prometheus.NewRegistry(),
 		dir,
 	)
 	require.NoError(t, err)
+	w.RunAsync()
 
-	err = w.Replay(func(tx uint64, r *walpb.Record) error {
+	err = w.Replay(0, func(tx uint64, r *walpb.Record) error {
 		require.Equal(t, uint64(1), tx)
 		require.Equal(t, []byte("test-data"), r.Entry.GetWrite().Data)
 		require.Equal(t, "test-table", r.Entry.GetWrite().TableName)
@@ -66,16 +67,11 @@ func TestWAL(t *testing.T) {
 	require.NoError(t, os.RemoveAll(dir))
 	w, err = Open(
 		log.NewNopLogger(),
-		prometheus.NewRegistry(),
 		dir,
 	)
 	require.NoError(t, err)
+	w.RunAsync()
 	defer w.Close()
-
-	err = w.Replay(func(tx uint64, r *walpb.Record) error {
-		return nil
-	})
-	require.NoError(t, err)
 }
 
 func TestCorruptWAL(t *testing.T) {
@@ -83,10 +79,10 @@ func TestCorruptWAL(t *testing.T) {
 
 	w, err := Open(
 		log.NewNopLogger(),
-		prometheus.NewRegistry(),
 		path,
 	)
 	require.NoError(t, err)
+	w.RunAsync()
 
 	require.NoError(t, w.Log(1, &walpb.Record{
 		Entry: &walpb.Entry{
@@ -106,10 +102,10 @@ func TestCorruptWAL(t *testing.T) {
 
 	w, err = Open(
 		log.NewNopLogger(),
-		prometheus.NewRegistry(),
 		path,
 	)
 	require.NoError(t, err)
+	w.RunAsync()
 	defer w.Close()
 
 	lastIdx, err := w.LastIndex()
@@ -126,10 +122,10 @@ func TestUnexpectedTxn(t *testing.T) {
 	func() {
 		w, err := Open(
 			log.NewNopLogger(),
-			prometheus.NewRegistry(),
 			walDir,
 		)
 		require.NoError(t, err)
+		w.RunAsync()
 		defer w.Close()
 
 		emptyRecord := &walpb.Record{}
@@ -144,12 +140,153 @@ func TestUnexpectedTxn(t *testing.T) {
 	}()
 	w, err := Open(
 		log.NewNopLogger(),
-		prometheus.NewRegistry(),
 		walDir,
 	)
 	require.NoError(t, err)
+	w.RunAsync()
 	defer w.Close()
 	lastIndex, err := w.LastIndex()
 	require.NoError(t, err)
 	require.Equal(t, lastIndex, uint64(2))
 }
+
+func TestWALTruncate(t *testing.T) {
+	logRecord := func(data string) *walpb.Record {
+		return &walpb.Record{
+			Entry: &walpb.Entry{
+				EntryType: &walpb.Entry_Write_{
+					Write: &walpb.Entry_Write{
+						Data:      []byte(data),
+						TableName: "test-table",
+					},
+				},
+			},
+		}
+	}
+	for i, tc := range []string{"BeforeLog", "AfterLog"} {
+		t.Run(tc, func(t *testing.T) {
+			dir := t.TempDir()
+			w, err := Open(
+				log.NewNopLogger(),
+				dir,
+			)
+			require.NoError(t, err)
+			defer w.Close()
+			w.RunAsync()
+
+			for j := uint64(1); j < 10; j++ {
+				require.NoError(t, w.Log(j, logRecord(fmt.Sprintf("test-data-%d", j))))
+			}
+			if i == 1 {
+				// Wait until the last entry is written before issuing the
+				// truncate call.
+				require.Eventually(t, func() bool {
+					tx, _ := w.LastIndex()
+					return tx == 9
+				}, time.Second, 10*time.Millisecond)
+			}
+			require.NoError(t, w.Truncate(9))
+
+			// Wait for the WAL to asynchronously log and truncate.
+			require.Eventually(t, func() bool {
+				tx, _ := w.FirstIndex()
+				return tx == 9
+			}, time.Second, 10*time.Millisecond)
+
+			numRecords := 0
+			require.NoError(
+				t,
+				w.Replay(0, func(tx uint64, r *walpb.Record) error {
+					numRecords++
+					require.Equal(t, uint64(9), tx)
+					require.Equal(t, []byte("test-data-9"), r.Entry.GetWrite().Data)
+					return nil
+				}),
+			)
+			require.Equal(t, 1, numRecords)
+		})
+	}
+
+	t.Run("Reset", func(t *testing.T) {
+		dir := t.TempDir()
+		w, err := Open(
+			log.NewNopLogger(),
+			dir,
+		)
+		require.NoError(t, err)
+		defer w.Close()
+		w.RunAsync()
+
+		for i := uint64(1); i < 10; i++ {
+			require.NoError(t, w.Log(i, logRecord("test-data-%d")))
+		}
+
+		// Truncate way past the current last index, which should be 9.
+		const truncateIdx = 20
+		require.NoError(t, w.Truncate(truncateIdx))
+
+		require.Eventually(t, func() bool {
+			first, _ := w.FirstIndex()
+			last, _ := w.LastIndex()
+			return first == 0 && last == 0
+		}, time.Second, 10*time.Millisecond)
+
+		// Even though the WAL has been reset, we should not allow logging a
+		// record with a txn lower than the last truncation. This will only
+		// be observed on replay.
+		require.NoError(t, w.Log(1, logRecord("should-not-be-logged")))
+
+		// The only valid record to log is truncateIdx. Note that Truncate
+		// semantics are that Truncate truncates up to but not including the
+		// truncateIdx. In other words, truncateIdx becomes the first entry in
+		// the WAL.
+		require.NoError(t, w.Log(truncateIdx, logRecord("should-be-logged")))
+
+		// Wait for record to be logged.
+		require.Eventually(t, func() bool {
+			first, _ := w.FirstIndex()
+			last, _ := w.LastIndex()
+			return first == truncateIdx && last == truncateIdx
+		}, time.Second, 10*time.Millisecond)
+
+		numRecords := 0
+		require.NoError(
+			t,
+			w.Replay(0, func(tx uint64, r *walpb.Record) error {
+				numRecords++
+				require.Equal(t, uint64(truncateIdx), tx)
+				require.Equal(t, []byte("should-be-logged"), r.Entry.GetWrite().Data)
+				return nil
+			}),
+		)
+		require.Equal(t, 1, numRecords)
+	})
+}
+
+func TestWALCloseTimeout(t *testing.T) {
+	dir := t.TempDir()
+	w, err := Open(
+		log.NewNopLogger(),
+		dir,
+	)
+	require.NoError(t, err)
+
+	w.RunAsync()
+
+	// This will cause the WAL to enter a state where it will not close
+	// b/c it was expecting the next transaction to be 1.
+	require.NoError(t, w.Log(2, &walpb.Record{
+		Entry: &walpb.Entry{
+			EntryType: &walpb.Entry_Write_{
+				Write: &walpb.Entry_Write{
+					Data:      []byte("test-data"),
+					TableName: "test-table",
+				},
+			},
+		},
+	}))
+
+	// This should not block forever, otherwise the test will fail by timeout
+	err = w.Close()
+	require.NoError(t, err)
+}