Skip to content

Commit

Permalink
diff pd to_parquet kwargs depending on engine
Browse files Browse the repository at this point in the history
  • Loading branch information
norlandrhagen committed Sep 26, 2024
1 parent 0d9e2be commit 15908c8
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions fsspec/implementations/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,17 +479,25 @@ def write(self, field, record, base_url=None, storage_options=None):
fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)

if self.engine == "pyarrow":
df_backend_kwargs = {}
elif self.engine == "fastparquet":
df_backend_kwargs = {
"stats": False,
"object_encoding": object_encoding,
"has_nulls": has_nulls,
}
else:
raise NotImplementedError(f"{self.engine} not supported")

df.to_parquet(
"tmp.parquet",
fn,
engine=self.engine,
storage_options=storage_options
or getattr(self.fs, "storage_options", None),
compression="zstd",
index=False,
# stats=False,
# object_encoding=object_encoding,
# has_nulls=has_nulls,
# **kwargs,
**df_backend_kwargs,
)
partition.clear()
self._items.pop((field, record))
Expand Down

0 comments on commit 15908c8

Please sign in to comment.