A small Rust shared/static library that exposes Apache Parquet read/write operations to C.
This crate is useful when you want Rust's parquet and arrow implementation behind a C-compatible API, without exposing Rust types across the ABI boundary.
- Read a Parquet file and receive an
ArrowArrayStreamin C. - Write a Parquet file from an
ArrowArrayStreamproduced by C, C++, nanoarrow, Arrow C++, or another Arrow-compatible producer. - Query basic Parquet file metadata.
- Get a debug-formatted Arrow schema string for a Parquet file.
- Retrieve thread-local error messages from failed FFI calls.
cargo build --releaseTypical output paths:
Linux: target/release/libparquet_arrow_ffi.so
macOS: target/release/libparquet_arrow_ffi.dylib
Windows: target/release/parquet_arrow_ffi.dll
Static: target/release/libparquet_arrow_ffi.a
Example Linux compile/link command for a C program:
cc main.c \
-I. \
-Ltarget/release \
-lparquet_arrow_ffi \
-o mainAt runtime on Linux, make sure the shared library can be found:
export LD_LIBRARY_PATH="$PWD/target/release:$LD_LIBRARY_PATH"
./mainOn macOS, use DYLD_LIBRARY_PATH or install the .dylib in a loader-visible location. On Windows, place the .dll next to your executable or in a directory on PATH.
Include:
#include "parquet_arrow_ffi.h"#define PARQUET_ARROW_OK 0
#define PARQUET_ARROW_ERROR -1
#define PARQUET_ARROW_PANIC -2PARQUET_ARROW_OK: call succeeded.PARQUET_ARROW_ERROR: normal error; callparquet_arrow_last_error_message().PARQUET_ARROW_PANIC: Rust panic was caught before crossing the C ABI boundary; callparquet_arrow_last_error_message().
const char* parquet_arrow_last_error_message(void);
void parquet_arrow_clear_last_error(void);
void parquet_arrow_free_c_string(char* s);
void parquet_arrow_release_stream(struct ArrowArrayStream* stream);int parquet_arrow_file_info(
const char* path,
ParquetArrowFileInfo* out_info);int parquet_arrow_schema_string(
const char* path,
char** out_schema_text);int parquet_arrow_read_file(
const char* path,
size_t batch_size,
struct ArrowArrayStream* out_stream);int parquet_arrow_write_file(
const char* path,
struct ArrowArrayStream* input_stream,
int compression,
size_t max_row_group_row_count);Use these values with parquet_arrow_write_file():
enum ParquetArrowCompression {
PARQUET_ARROW_COMPRESSION_UNCOMPRESSED = 0,
PARQUET_ARROW_COMPRESSION_SNAPPY = 1,
PARQUET_ARROW_COMPRESSION_GZIP = 2,
PARQUET_ARROW_COMPRESSION_BROTLI = 3,
PARQUET_ARROW_COMPRESSION_ZSTD = 4,
PARQUET_ARROW_COMPRESSION_LZ4_RAW = 5
};parquet_arrow_last_error_message() returns a pointer owned by Rust. Do not free it. The pointer remains valid until the next FFI call on the same thread that changes or clears the stored error.
const char* msg = parquet_arrow_last_error_message();parquet_arrow_schema_string() allocates a C string. Free it with parquet_arrow_free_c_string().
char* schema = NULL;
int rc = parquet_arrow_schema_string("input.parquet", &schema);
if (rc == PARQUET_ARROW_OK) {
puts(schema);
parquet_arrow_free_c_string(schema);
}parquet_arrow_read_file() writes a valid ArrowArrayStream into caller-provided storage. Release it when done:
parquet_arrow_release_stream(&stream);or:
if (stream.release) {
stream.release(&stream);
}Every ArrowSchema and ArrowArray returned by the stream must also be released according to Arrow C Data rules.
parquet_arrow_write_file() imports and consumes input_stream. After calling it, do not use or release that stream from C.
#include <stdio.h>
#include <stdlib.h>
#include "parquet_arrow_ffi.h"
static void check_parquet_arrow(int rc) {
if (rc != PARQUET_ARROW_OK) {
const char* msg = parquet_arrow_last_error_message();
fprintf(stderr, "parquet_arrow_ffi error: %s\n", msg ? msg : "unknown error");
exit(1);
}
}#include <stdio.h>
#include <stdlib.h>
#include "parquet_arrow_ffi.h"
static void check_parquet_arrow(int rc) {
if (rc != PARQUET_ARROW_OK) {
const char* msg = parquet_arrow_last_error_message();
fprintf(stderr, "parquet_arrow_ffi error: %s\n", msg ? msg : "unknown error");
exit(1);
}
}
int main(void) {
ParquetArrowFileInfo info;
check_parquet_arrow(parquet_arrow_file_info("input.parquet", &info));
printf("rows: %lld\n", (long long)info.num_rows);
printf("row groups: %zu\n", info.num_row_groups);
printf("columns: %zu\n", info.num_columns);
char* schema = NULL;
check_parquet_arrow(parquet_arrow_schema_string("input.parquet", &schema));
printf("schema:\n%s\n", schema);
parquet_arrow_free_c_string(schema);
return 0;
}This example opens a Parquet file and iterates over Arrow record batches. It only demonstrates stream handling; actual column decoding should be done with nanoarrow, Arrow C++, or your own Arrow C Data consumer.
#include <stdio.h>
#include <stdlib.h>
#include "parquet_arrow_ffi.h"
static void check_parquet_arrow(int rc) {
if (rc != PARQUET_ARROW_OK) {
const char* msg = parquet_arrow_last_error_message();
fprintf(stderr, "parquet_arrow_ffi error: %s\n", msg ? msg : "unknown error");
exit(1);
}
}
int main(void) {
struct ArrowArrayStream stream = {0};
/* batch_size = 65536 rows. Pass 0 to use the crate default. */
check_parquet_arrow(parquet_arrow_read_file("input.parquet", 65536, &stream));
struct ArrowSchema schema = {0};
check_parquet_arrow(stream.get_schema(&stream, &schema));
long long batch_count = 0;
long long row_count = 0;
while (1) {
struct ArrowArray batch = {0};
check_parquet_arrow(stream.get_next(&stream, &batch));
/* End of stream is represented by release == NULL. */
if (batch.release == NULL) {
break;
}
batch_count += 1;
row_count += batch.length;
/* Consume batch here. Then release it. */
batch.release(&batch);
}
printf("batches: %lld\n", batch_count);
printf("rows: %lld\n", row_count);
if (schema.release) {
schema.release(&schema);
}
parquet_arrow_release_stream(&stream);
return 0;
}Your C code must provide an ArrowArrayStream. This can come from nanoarrow, Arrow C++, or another producer implementing the Arrow C Stream Interface.
#include <stdio.h>
#include <stdlib.h>
#include "parquet_arrow_ffi.h"
static void check_parquet_arrow(int rc) {
if (rc != PARQUET_ARROW_OK) {
const char* msg = parquet_arrow_last_error_message();
fprintf(stderr, "parquet_arrow_ffi error: %s\n", msg ? msg : "unknown error");
exit(1);
}
}
int main(void) {
struct ArrowArrayStream input_stream = {0};
/*
Fill input_stream from your C-side Arrow producer.
For example: nanoarrow, Arrow C++, or a custom ArrowArrayStream.
*/
check_parquet_arrow(parquet_arrow_write_file(
"output.parquet",
&input_stream,
PARQUET_ARROW_COMPRESSION_ZSTD,
0));
/*
input_stream has been consumed by parquet_arrow_write_file().
Do not use it or release it here.
*/
return 0;
}All paths passed to this library must be UTF-8, NUL-terminated C strings.
The last-error message is stored per thread. A failure on one thread does not overwrite the last-error message on another thread.
- Do not pass null pointers unless the function explicitly allows it.
- Do not let Rust-owned pointers escape beyond their documented lifetime.
- Always release
ArrowSchemaandArrowArrayvalues returned throughArrowArrayStream. - Release streams returned by
parquet_arrow_read_file()exactly once. - Treat streams passed to
parquet_arrow_write_file()as consumed, even when the call fails after importing the stream.
Format Rust code:
cargo fmtCheck the Rust crate:
cargo checkBuild release libraries:
cargo build --release