From b5c6c80b3e82c30e1a46c4225b37bf49beb9cfc5 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 12 May 2024 19:03:55 +1000 Subject: [PATCH] Update documentation --- src/datafusion/mod.rs | 30 ++++++++++++++++++++++++++++++ src/lib.rs | 3 +++ src/reader/metadata.rs | 2 +- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/datafusion/mod.rs b/src/datafusion/mod.rs index 7d008191..06fd8d06 100644 --- a/src/datafusion/mod.rs +++ b/src/datafusion/mod.rs @@ -1,3 +1,30 @@ +//! Integration with [Apache DataFusion](https://datafusion.apache.org/) query engine to +//! allow querying ORC files with a SQL/DataFrame API. +//! +//! # Example usage +//! +//! ```no_run +//! # use datafusion::prelude::*; +//! # use datafusion::error::Result; +//! # use orc_rust::datafusion::{OrcReadOptions, SessionContextOrcExt}; +//! # #[tokio::main] +//! # async fn main() -> Result<()> { +//! let ctx = SessionContext::new(); +//! ctx.register_orc( +//! "table1", +//! "/path/to/file.orc", +//! OrcReadOptions::default(), +//! ) +//! .await?; +//! +//! ctx.sql("select a, b from table1") +//! .await? +//! .show() +//! .await?; +//! # Ok(()) +//! # } +//! ``` + use std::sync::Arc; use datafusion::arrow::datatypes::SchemaRef; @@ -22,6 +49,7 @@ mod file_format; mod object_store_reader; mod physical_exec; +/// Configuration options for reading ORC files. #[derive(Clone)] pub struct OrcReadOptions<'a> { pub file_extension: &'a str, @@ -57,6 +85,8 @@ impl ReadOptions<'_> for OrcReadOptions<'_> { } } +/// Exposes new functions for registering ORC tables onto a DataFusion [`SessionContext`] +/// to enable querying them using the SQL or DataFrame API. pub trait SessionContextOrcExt { fn read_orc( &self, diff --git a/src/lib.rs b/src/lib.rs index dcce86e8..e434967e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,9 @@ //! let reader = ArrowReaderBuilder::try_new(file).unwrap().build(); //! let record_batches = reader.collect::, _>>().unwrap(); //! ``` +//! +//! See the [`datafusion`] module for information on how to integrate with +//! [Apache DataFusion](https://datafusion.apache.org/). pub mod arrow_reader; #[cfg(feature = "async")] diff --git a/src/reader/metadata.rs b/src/reader/metadata.rs index 92345967..752f8168 100644 --- a/src/reader/metadata.rs +++ b/src/reader/metadata.rs @@ -2,7 +2,7 @@ //! //! File tail structure: //! -//! ``` +//! ```text //! ------------------ //! | Metadata | //! | |