gluonts.dataset.arrow package#

Arrow Dataset#

Fast and efficient datasets using pyarrow.

This module provides three file-types:

  • ArrowFile (arrow random-access binary format)

  • ArrowStreamFile (arrow streaming binary format)

  • ParquetFile

class gluonts.dataset.arrow.ArrowFile(path: pathlib.Path, _start: int = 0, _take: Union[int, NoneType] = None)[source]#

Bases: gluonts.dataset.arrow.file.File

property batch_offsets#
decoder: gluonts.dataset.arrow.dec.ArrowDecoder#
iter_batches()[source]#
location_for(idx)[source]#
metadata() Dict[str, str][source]#
path: pathlib.Path#
reader: pyarrow.ipc.RecordBatchFileReader#
property schema#
class gluonts.dataset.arrow.ArrowStreamFile(path: pathlib.Path, _start: int = 0, _take: Union[int, NoneType] = None)[source]#

Bases: gluonts.dataset.arrow.file.File

metadata() Dict[str, str][source]#
path: pathlib.Path#
class gluonts.dataset.arrow.ArrowWriter(stream: bool = False, suffix: str = '.feather', compression: Union[typing_extensions.Literal['lz4'], typing_extensions.Literal['zstd'], NoneType] = None, flatten_arrays: bool = True, metadata: Union[dict, NoneType] = None)[source]#

Bases: gluonts.dataset.DatasetWriter

compression: Optional[Union[typing_extensions.Literal[lz4], typing_extensions.Literal[zstd]]] = None#
flatten_arrays: bool = True#
metadata: Optional[dict] = None#
stream: bool = False#
suffix: str = '.feather'#
write_to_file(dataset: gluonts.dataset.Dataset, path: pathlib.Path) None[source]#
write_to_folder(dataset: gluonts.dataset.Dataset, folder: pathlib.Path, name: Optional[str] = None) None[source]#
class gluonts.dataset.arrow.File[source]#

Bases: object

SUFFIXES = {'.arrow', '.feather', '.parquet'}#
static infer(path: pathlib.Path) Union[gluonts.dataset.arrow.file.ArrowFile, gluonts.dataset.arrow.file.ArrowStreamFile, gluonts.dataset.arrow.file.ParquetFile][source]#

Return ArrowFile, ArrowStreamFile or ParquetFile by inspecting provided path.

Arrow’s random-access format starts with ARROW1, so we peek the provided file for it.

abstract metadata() Dict[str, str][source]#
class gluonts.dataset.arrow.ParquetFile(path: pathlib.Path, _start: int = 0, _take: Union[int, NoneType] = None, _row_group_sizes: List[int] = <factory>)[source]#

Bases: gluonts.dataset.arrow.file.File

location_for(idx)[source]#
metadata() Dict[str, str][source]#
path: pathlib.Path#
reader: pyarrow.parquet.core.ParquetFile#
class gluonts.dataset.arrow.ParquetWriter(suffix: str = '.parquet', flatten_arrays: bool = True, metadata: Union[dict, NoneType] = None)[source]#

Bases: gluonts.dataset.DatasetWriter

flatten_arrays: bool = True#
metadata: Optional[dict] = None#
suffix: str = '.parquet'#
write_to_file(dataset: gluonts.dataset.Dataset, path: pathlib.Path) None[source]#
write_to_folder(dataset: gluonts.dataset.Dataset, folder: pathlib.Path, name: Optional[str] = None) None[source]#
gluonts.dataset.arrow.write_dataset(Writer, dataset, path, metadata=None, batch_size=1024, flatten_arrays=True)[source]#