diff --git a/Cargo.toml b/Cargo.toml
index 3c2683a..23d4477 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -63,6 +63,8 @@ tokio = { version = "1.28", optional = true, features = [
 # cli
 anyhow = { version = "1.0", optional = true }
 clap = { version = "4.5.4", features = ["derive"], optional = true }
+serde = { version = "1.0", features = ["derive"], optional = true }
+serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
 
 # opendal
 opendal = { version = "0.53", optional = true, default-features = false }
@@ -74,13 +76,12 @@ criterion = { version = "0.5", default-features = false, features = ["async_toki
 opendal = { version = "0.53", default-features = false, features = ["services-memory"] }
 pretty_assertions = "1.3.0"
 proptest = "1.0.0"
-serde_json = { version = "1.0", default-features = false, features = ["std"] }
 
 [features]
 default = ["async"]
 
 async = ["async-trait", "futures", "futures-util", "tokio"]
-cli = ["anyhow", "clap"]
+cli = ["anyhow", "clap", "serde", "serde_json"]
 # Enable opendal support.
 opendal = ["dep:opendal"]
 
@@ -105,3 +106,23 @@ required-features = ["cli"]
 [[bin]]
 name = "orc-stats"
 required-features = ["cli"]
+
+[[bin]]
+name = "orc-read"
+required-features = ["cli"]
+
+[[bin]]
+name = "orc-schema"
+required-features = ["cli"]
+
+[[bin]]
+name = "orc-rowcount"
+required-features = ["cli"]
+
+[[bin]]
+name = "orc-index"
+required-features = ["cli"]
+
+[[bin]]
+name = "orc-layout"
+required-features = ["cli"]
diff --git a/src/bin/orc-index.rs b/src/bin/orc-index.rs
new file mode 100644
index 0000000..3823687
--- /dev/null
+++ b/src/bin/orc-index.rs
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Inspect row indexes for a specific ORC column.
+//!
+//! Row indexes carry per-row-group statistics and positions; this tool surfaces
+//! them for debugging predicate pushdown and verifying writer-produced indexes.
+
+use std::{fs::File, path::PathBuf};
+
+use anyhow::{anyhow, Context, Result};
+use clap::Parser;
+use orc_rust::reader::metadata::read_metadata;
+use orc_rust::schema::{DataType, RootDataType};
+use orc_rust::statistics::{ColumnStatistics, TypeStatistics};
+use orc_rust::stripe::Stripe;
+
+#[derive(Debug, Parser)]
+#[command(
+    author,
+    version,
+    about = "Print row group index information for an ORC column"
+)]
+struct Args {
+    /// Path to the ORC file
+    file: PathBuf,
+    /// Column name to inspect (top-level columns only)
+    column: String,
+}
+
+fn find_column<'a>(root: &'a RootDataType, name: &str) -> Option<(usize, &'a DataType, &'a str)> {
+    root.children()
+        .iter()
+        .find(|c| c.name() == name)
+        .map(|col| (col.data_type().column_index(), col.data_type(), col.name()))
+}
+
+fn fmt_stats(stats: &ColumnStatistics) -> String {
+    let mut parts = vec![format!("values={}", stats.number_of_values())];
+    if stats.has_null() {
+        parts.push("has_nulls=true".to_string());
+    }
+    if let Some(ts) = stats.type_statistics() {
+        match ts {
+            TypeStatistics::Integer { min, max, .. } => {
+                parts.push(format!("min={min}"));
+                parts.push(format!("max={max}"));
+            }
+            TypeStatistics::Double { min, max, .. } => {
+                parts.push(format!("min={min}"));
+                parts.push(format!("max={max}"));
+            }
+            TypeStatistics::String { min, max, .. } => {
+                parts.push(format!("min={min}"));
+                parts.push(format!("max={max}"));
+            }
+            TypeStatistics::Bucket { true_count } => {
+                parts.push(format!("true_count={true_count}"));
+            }
+            TypeStatistics::Decimal { min, max, .. } => {
+                parts.push(format!("min={min}"));
+                parts.push(format!("max={max}"));
+            }
+            TypeStatistics::Date { min, max } => {
+                parts.push(format!("min={min}"));
+                parts.push(format!("max={max}"));
+            }
+            TypeStatistics::Binary { sum } => {
+                parts.push(format!("total_bytes={sum}"));
+            }
+            TypeStatistics::Timestamp { min, max, .. } => {
+                parts.push(format!("min={min}"));
+                parts.push(format!("max={max}"));
+            }
+            TypeStatistics::Collection {
+                min_children,
+                max_children,
+                total_children,
+            } => {
+                parts.push(format!("min_children={min_children}"));
+                parts.push(format!("max_children={max_children}"));
+                parts.push(format!("total_children={total_children}"));
+            }
+        }
+    }
+    parts.join(", ")
+}
+
+fn main() -> Result<()> {
+    let args = Args::parse();
+    let mut file = File::open(&args.file)
+        .with_context(|| format!("failed to open {:?}", args.file.display()))?;
+    let metadata = read_metadata(&mut file)?;
+
+    let Some((column_index, data_type, name)) =
+        find_column(metadata.root_data_type(), &args.column)
+    else {
+        let available = metadata
+            .root_data_type()
+            .children()
+            .iter()
+            .map(|c| c.name().to_string())
+            .collect::<Vec<_>>()
+            .join(", ");
+        return Err(anyhow!(
+            "column '{}' not found. Available columns: {available}",
+            args.column
+        ));
+    };
+
+    println!(
+        "File: {} | Column: {} (index {})",
+        args.file.display(),
+        name,
+        column_index
+    );
+    println!("Type: {data_type}");
+    println!("Stripes: {}", metadata.stripe_metadatas().len());
+
+    for (stripe_idx, stripe_meta) in metadata.stripe_metadatas().iter().enumerate() {
+        let stripe = Stripe::new(&mut file, &metadata, metadata.root_data_type(), stripe_meta)?;
+        let row_index = stripe.read_row_indexes(&metadata)?;
+
+        let Some(col_index) = row_index.column(column_index) else {
+            println!("Stripe {stripe_idx}: no row index for column");
+            continue;
+        };
+
+        if col_index.num_row_groups() == 0 {
+            println!("Stripe {stripe_idx}: no row groups recorded");
+            continue;
+        }
+
+        println!(
+            "Stripe {stripe_idx}: rows_per_group={} total_rows={}",
+            col_index.rows_per_group(),
+            row_index.total_rows()
+        );
+        for (row_group_idx, entry) in col_index.entries().enumerate() {
+            let start = row_group_idx * col_index.rows_per_group();
+            let end = (start + col_index.rows_per_group()).min(row_index.total_rows());
+            print!("  Row group {row_group_idx} rows [{start},{end})");
+            if let Some(stats) = &entry.statistics {
+                println!(" -> {}", fmt_stats(stats));
+            } else {
+                println!(" -> no statistics");
+            }
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/bin/orc-layout.rs b/src/bin/orc-layout.rs
new file mode 100644
index 0000000..d67dbc3
--- /dev/null
+++ b/src/bin/orc-layout.rs
@@ -0,0 +1,192 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Emit a JSON description of the physical layout of an ORC file.
+//!
+//! Useful for inspecting stripe offsets, stream kinds/sizes, and column encodings
+//! to debug writer output or validate round trips.
+
+use std::fs::File;
+use std::io::Read;
+use std::path::PathBuf;
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use orc_rust::compression::{Compression, Decompressor};
+use orc_rust::proto::{column_encoding, stream::Kind, StripeFooter};
+use orc_rust::reader::metadata::{read_metadata, FileMetadata};
+use orc_rust::reader::ChunkReader;
+use orc_rust::stripe::StripeMetadata;
+use prost::Message;
+use serde::Serialize;
+
+#[derive(Debug, Parser)]
+#[command(author, version, about = "Print ORC stripe and stream layout as JSON")]
+struct Args {
+    /// Path to the ORC file
+    file: PathBuf,
+}
+
+#[derive(Serialize)]
+struct Layout {
+    file: String,
+    format_version: String,
+    compression: Option<String>,
+    rows: u64,
+    stripes: Vec<StripeLayout>,
+}
+
+#[derive(Serialize)]
+struct StripeLayout {
+    index: usize,
+    offset: u64,
+    index_length: u64,
+    data_length: u64,
+    footer_length: u64,
+    rows: u64,
+    streams: Vec<StreamLayout>,
+    encodings: Vec<ColumnEncodingLayout>,
+}
+
+#[derive(Serialize)]
+struct StreamLayout {
+    column: u32,
+    kind: String,
+    length: u64,
+    offset: u64,
+}
+
+#[derive(Serialize)]
+struct ColumnEncodingLayout {
+    column: usize,
+    kind: String,
+    dictionary_size: Option<u32>,
+}
+
+fn read_stripe_footer<R: ChunkReader>(
+    reader: &R,
+    stripe: &StripeMetadata,
+    compression: Option<Compression>,
+) -> Result<StripeFooter> {
+    let footer_bytes = reader
+        .get_bytes(stripe.footer_offset(), stripe.footer_length())
+        .context("reading stripe footer")?;
+    let mut buffer = Vec::new();
+    Decompressor::new(footer_bytes, compression, vec![])
+        .read_to_end(&mut buffer)
+        .context("decompressing stripe footer")?;
+    StripeFooter::decode(buffer.as_slice()).context("decoding stripe footer")
+}
+
+fn kind_to_str(kind: Kind) -> &'static str {
+    match kind {
+        Kind::Present => "PRESENT",
+        Kind::Data => "DATA",
+        Kind::Length => "LENGTH",
+        Kind::DictionaryData => "DICTIONARY_DATA",
+        Kind::Secondary => "SECONDARY",
+        Kind::RowIndex => "ROW_INDEX",
+        Kind::BloomFilter => "BLOOM_FILTER",
+        Kind::BloomFilterUtf8 => "BLOOM_FILTER_UTF8",
+        Kind::DictionaryCount => "DICTIONARY_COUNT",
+        Kind::EncryptedIndex => "ENCRYPTED_INDEX",
+        Kind::EncryptedData => "ENCRYPTED_DATA",
+        Kind::StripeStatistics => "STRIPE_STATISTICS",
+        Kind::FileStatistics => "FILE_STATISTICS",
+    }
+}
+
+fn encoding_to_str(kind: column_encoding::Kind) -> &'static str {
+    match kind {
+        column_encoding::Kind::Direct => "DIRECT",
+        column_encoding::Kind::Dictionary => "DICTIONARY",
+        column_encoding::Kind::DirectV2 => "DIRECT_V2",
+        column_encoding::Kind::DictionaryV2 => "DICTIONARY_V2",
+    }
+}
+
+fn build_stripe_layout<R: ChunkReader>(
+    reader: &R,
+    metadata: &FileMetadata,
+    stripe_idx: usize,
+    stripe: &StripeMetadata,
+) -> Result<StripeLayout> {
+    let footer = read_stripe_footer(reader, stripe, metadata.compression())?;
+
+    let mut offset = stripe.offset();
+    let streams = footer
+        .streams
+        .iter()
+        .map(|s| {
+            let stream = StreamLayout {
+                column: s.column(),
+                kind: kind_to_str(s.kind()).to_string(),
+                length: s.length(),
+                offset,
+            };
+            offset += s.length();
+            stream
+        })
+        .collect();
+
+    let encodings = footer
+        .columns
+        .iter()
+        .enumerate()
+        .map(|(idx, enc)| ColumnEncodingLayout {
+            column: idx,
+            kind: encoding_to_str(enc.kind()).to_string(),
+            dictionary_size: enc.dictionary_size,
+        })
+        .collect();
+
+    Ok(StripeLayout {
+        index: stripe_idx,
+        offset: stripe.offset(),
+        index_length: stripe.index_length(),
+        data_length: stripe.data_length(),
+        footer_length: stripe.footer_length(),
+        rows: stripe.number_of_rows(),
+        streams,
+        encodings,
+    })
+}
+
+fn main() -> Result<()> {
+    let args = Args::parse();
+    let mut file = File::open(&args.file)
+        .with_context(|| format!("failed to open {:?}", args.file.display()))?;
+    let metadata = read_metadata(&mut file)?;
+
+    let stripes = metadata
+        .stripe_metadatas()
+        .iter()
+        .enumerate()
+        .map(|(idx, stripe)| build_stripe_layout(&file, &metadata, idx, stripe))
+        .collect::<Result<Vec<_>>>()?;
+
+    let layout = Layout {
+        file: args.file.display().to_string(),
+        format_version: metadata.file_format_version().to_string(),
+        compression: metadata.compression().map(|c| c.to_string()),
+        rows: metadata.number_of_rows(),
+        stripes,
+    };
+
+    serde_json::to_writer_pretty(std::io::stdout(), &layout).context("writing layout")?;
+    Ok(())
+}
diff --git a/src/bin/orc-read.rs b/src/bin/orc-read.rs
new file mode 100644
index 0000000..890aed5
--- /dev/null
+++ b/src/bin/orc-read.rs
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Stream an ORC file to stdout as CSV or JSON lines.
+//!
+//! This is a thin wrapper around `ArrowReaderBuilder` so that CLI behavior mirrors
+//! library reads (projection/predicate defaults, batch sizing, etc).
+
+use std::fs::File;
+use std::io::{self, Read};
+
+use anyhow::{Context, Result};
+use arrow::{csv, error::ArrowError, json, record_batch::RecordBatch};
+use bytes::Bytes;
+use clap::Parser;
+use orc_rust::reader::ChunkReader;
+use orc_rust::ArrowReaderBuilder;
+
+#[derive(Debug, Parser)]
+#[command(author, version, about = "Read ORC data and print to stdout")]
+struct Args {
+    /// Path to an ORC file or "-" to read from stdin
+    file: String,
+    /// Number of records to read (0 = all)
+    #[arg(short, long, default_value_t = 0)]
+    num_records: usize,
+    /// Output as JSON lines instead of CSV
+    #[arg(short, long)]
+    json: bool,
+    /// Batch size to use when reading
+    #[arg(long, default_value_t = 8192)]
+    batch_size: usize,
+}
+
+#[allow(clippy::large_enum_variant)]
+enum OutputWriter<W: io::Write, F: json::writer::JsonFormat> {
+    Csv(csv::Writer<W>),
+    Json(json::Writer<W, F>),
+}
+
+impl<W, F> OutputWriter<W, F>
+where
+    W: io::Write,
+    F: json::writer::JsonFormat,
+{
+    fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
+        match self {
+            OutputWriter::Csv(w) => w.write(batch),
+            OutputWriter::Json(w) => w.write(batch),
+        }
+    }
+
+    fn finish(&mut self) -> Result<(), ArrowError> {
+        match self {
+            OutputWriter::Csv(_) => Ok(()),
+            OutputWriter::Json(w) => w.finish(),
+        }
+    }
+}
+
+fn run_reader<R: ChunkReader>(
+    source: R,
+    args: &Args,
+    mut writer: OutputWriter<impl io::Write, impl json::writer::JsonFormat>,
+) -> Result<()> {
+    let reader = ArrowReaderBuilder::try_new(source)?
+        .with_batch_size(args.batch_size)
+        .build();
+
+    let mut remaining = if args.num_records == 0 {
+        usize::MAX
+    } else {
+        args.num_records
+    };
+
+    for batch in reader {
+        if remaining == 0 {
+            break;
+        }
+        let mut batch = batch?;
+        if remaining < batch.num_rows() {
+            batch = batch.slice(0, remaining);
+        }
+        writer.write(&batch)?;
+
+        remaining = remaining.saturating_sub(batch.num_rows());
+    }
+
+    writer.finish().context("closing writer")?;
+    Ok(())
+}
+
+fn main() -> Result<()> {
+    let args = Args::parse();
+
+    let stdout = io::stdout();
+    let handle = stdout.lock();
+
+    if args.file == "-" {
+        let mut buf = Vec::new();
+        io::stdin().read_to_end(&mut buf).context("reading stdin")?;
+        let bytes = Bytes::from(buf);
+        let writer: OutputWriter<_, json::writer::LineDelimited> = if args.json {
+            OutputWriter::Json(
+                json::WriterBuilder::new().build::<_, json::writer::LineDelimited>(handle),
+            )
+        } else {
+            OutputWriter::Csv(csv::WriterBuilder::new().with_header(true).build(handle))
+        };
+        run_reader(bytes, &args, writer)
+    } else {
+        let file = File::open(&args.file).with_context(|| format!("opening {}", args.file))?;
+        let writer: OutputWriter<_, json::writer::LineDelimited> = if args.json {
+            OutputWriter::Json(
+                json::WriterBuilder::new().build::<_, json::writer::LineDelimited>(handle),
+            )
+        } else {
+            OutputWriter::Csv(csv::WriterBuilder::new().with_header(true).build(handle))
+        };
+        run_reader(file, &args, writer)
+    }
+}
diff --git a/src/bin/orc-rowcount.rs b/src/bin/orc-rowcount.rs
new file mode 100644
index 0000000..bfcea82
--- /dev/null
+++ b/src/bin/orc-rowcount.rs
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Return the number of rows in one or more ORC files.
+//!
+//! Uses metadata only (no row decoding), so it is fast even on large files.
+
+use std::{fs::File, path::PathBuf};
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use orc_rust::reader::metadata::read_metadata;
+
+#[derive(Debug, Parser)]
+#[command(author, version, about = "Return the number of rows in ORC files")]
+struct Args {
+    /// List of ORC files to inspect
+    #[arg(required = true)]
+    files: Vec<PathBuf>,
+}
+
+fn main() -> Result<()> {
+    let args = Args::parse();
+
+    for path in args.files {
+        let mut file =
+            File::open(&path).with_context(|| format!("failed to open {:?}", path.display()))?;
+        let metadata = read_metadata(&mut file)?;
+        println!("{}: {}", path.display(), metadata.number_of_rows());
+    }
+
+    Ok(())
+}
diff --git a/src/bin/orc-schema.rs b/src/bin/orc-schema.rs
new file mode 100644
index 0000000..05e6f85
--- /dev/null
+++ b/src/bin/orc-schema.rs
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Print the schema and metadata of an ORC file.
+
+use std::{fs::File, path::PathBuf};
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use orc_rust::reader::metadata::read_metadata;
+
+#[derive(Debug, Parser)]
+#[command(
+    author,
+    version,
+    about = "Print the schema and metadata of an ORC file"
+)]
+struct Args {
+    /// Path to the ORC file
+    file: PathBuf,
+    /// Include stripe offsets and row counts
+    #[arg(short, long)]
+    verbose: bool,
+}
+
+fn main() -> Result<()> {
+    let args = Args::parse();
+    let mut file = File::open(&args.file)
+        .with_context(|| format!("failed to open {:?}", args.file.display()))?;
+    let metadata = read_metadata(&mut file)?;
+
+    println!("File: {}", args.file.display());
+    println!("Format version: {}", metadata.file_format_version());
+    println!(
+        "Compression: {}",
+        metadata
+            .compression()
+            .map(|c| c.to_string())
+            .unwrap_or_else(|| "None".to_string())
+    );
+    if let Some(stride) = metadata.row_index_stride() {
+        println!("Row index stride: {stride}");
+    } else {
+        println!("Row index stride: None");
+    }
+    println!("Rows: {}", metadata.number_of_rows());
+    println!("Stripes: {}", metadata.stripe_metadatas().len());
+    println!();
+    println!("Schema:\n{}", metadata.root_data_type());
+
+    if args.verbose {
+        println!("\nStripe layout:");
+        for (idx, stripe) in metadata.stripe_metadatas().iter().enumerate() {
+            println!("Stripe {idx}:");
+            println!("  offset: {}", stripe.offset());
+            println!("  index length: {}", stripe.index_length());
+            println!("  data length: {}", stripe.data_length());
+            println!("  footer length: {}", stripe.footer_length());
+            println!("  rows: {}", stripe.number_of_rows());
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/lib.rs b/src/lib.rs
index f7477a3..84110e8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -60,7 +60,7 @@ mod memory;
 pub mod predicate;
 pub mod projection;
 #[allow(dead_code)]
-mod proto;
+pub mod proto;
 pub mod reader;
 pub mod row_group_filter;
 pub mod row_index;
diff --git a/tests/bin/main.rs b/tests/bin/main.rs
new file mode 100644
index 0000000..3c759de
--- /dev/null
+++ b/tests/bin/main.rs
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Smoke tests for CLI binaries.
+
+#![cfg(feature = "cli")]
+
+use std::fs::File;
+use std::path::PathBuf;
+use std::process::Command;
+
+use orc_rust::reader::metadata::read_metadata;
+use serde_json::Value;
+
+fn data_path(name: &str) -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("tests")
+        .join("integration")
+        .join("data")
+        .join(name)
+}
+
+fn run_cmd(bin_env: &str, args: &[&str]) -> (bool, String) {
+    let output = Command::new(bin_env).args(args).output().unwrap();
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    (output.status.success(), stdout)
+}
+
+#[test]
+fn orc_rowcount_matches_metadata() {
+    let file = data_path("TestOrcFile.test1.orc");
+    let mut fh = File::open(&file).unwrap();
+    let expected = read_metadata(&mut fh).unwrap().number_of_rows().to_string();
+
+    let (ok, stdout) = run_cmd(
+        env!("CARGO_BIN_EXE_orc-rowcount"),
+        &[file.to_str().unwrap()],
+    );
+    assert!(ok, "orc-rowcount failed");
+    assert!(
+        stdout.contains(&expected),
+        "expected rowcount {expected}, got {stdout}"
+    );
+}
+
+#[test]
+fn orc_schema_prints_schema() {
+    let file = data_path("TestOrcFile.test1.orc");
+    let (ok, stdout) = run_cmd(env!("CARGO_BIN_EXE_orc-schema"), &[file.to_str().unwrap()]);
+    assert!(ok);
+    assert!(
+        stdout.contains("Schema:"),
+        "schema output missing Schema header"
+    );
+}
+
+#[test]
+fn orc_read_limits_records() {
+    let file = data_path("TestOrcFile.test1.orc");
+    let (ok, stdout) = run_cmd(
+        env!("CARGO_BIN_EXE_orc-read"),
+        &["--json", "--num-records", "2", file.to_str().unwrap()],
+    );
+    assert!(ok);
+    let lines: Vec<_> = stdout.lines().collect();
+    assert_eq!(2, lines.len(), "expected exactly 2 JSON lines");
+    for line in lines {
+        serde_json::from_str::<Value>(line).expect("valid JSON line");
+    }
+}
+
+#[test]
+fn orc_layout_json_matches_stripe_count() {
+    let file = data_path("TestOrcFile.test1.orc");
+    let mut fh = File::open(&file).unwrap();
+    let metadata = read_metadata(&mut fh).unwrap();
+
+    let (ok, stdout) = run_cmd(env!("CARGO_BIN_EXE_orc-layout"), &[file.to_str().unwrap()]);
+    assert!(ok);
+    let v: Value = serde_json::from_str(&stdout).unwrap();
+    let stripes = v["stripes"].as_array().expect("stripes is array").len();
+    assert_eq!(metadata.stripe_metadatas().len(), stripes);
+}
+
+#[test]
+fn orc_index_completes() {
+    let file = data_path("TestOrcFile.testPredicatePushdown.orc");
+    let (ok, stdout) = run_cmd(
+        env!("CARGO_BIN_EXE_orc-index"),
+        &[file.to_str().unwrap(), "int1"],
+    );
+    assert!(ok);
+    assert!(
+        stdout.contains("Stripe"),
+        "expected stripe output from orc-index"
+    );
+}