From e50e2049e4d0c9ef84ffd15658c52e001a288133 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 18 Mar 2023 13:55:46 -0700 Subject: [PATCH 001/413] Fix minor warning (#289) --- src/expr/literal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expr/literal.rs b/src/expr/literal.rs index b29497e64..f99216957 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::errors::{py_runtime_err, DataFusionError}; +use crate::errors::DataFusionError; use datafusion_common::ScalarValue; use pyo3::prelude::*; From 024ba3aed68fbbe8558d0d4918eb6a559b4e430c Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Tue, 21 Mar 2023 18:18:11 +0100 Subject: [PATCH 002/413] Implement describe method (#293) --- datafusion/tests/test_dataframe.py | 24 ++++++++++++++++++++++++ src/dataframe.rs | 7 +++++++ 2 files changed, 31 insertions(+) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 611bcabe4..db59e3c52 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -607,3 +607,27 @@ def test_to_pydict(df): pydict = df.to_pydict() assert type(pydict) == dict assert pydict == {"a": [1, 2, 3], "b": [4, 5, 6], "c": [8, 5, 8]} + + +def test_describe(df): + + # Calculate statistics + df = df.describe() + + # Collect the result + result = df.to_pydict() + + assert result == { + "describe": [ + "count", + "null_count", + "mean", + "std", + "min", + "max", + "median", + ], + "a": [3.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0], + "b": [3.0, 3.0, 5.0, 1.0, 4.0, 6.0, 5.0], + "c": [3.0, 3.0, 7.0, 1.7320508075688772, 5.0, 8.0, 8.0], + } diff --git a/src/dataframe.rs b/src/dataframe.rs index b21f56104..605e11611 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -76,6 +76,13 @@ impl PyDataFrame { } } + /// Calculate summary statistics for a DataFrame + fn describe(&self, py: Python) -> PyResult { + let df = self.df.as_ref().clone(); + let stat_df = wait_for_future(py, df.describe())?; + Ok(Self::new(stat_df)) + } + /// Returns the schema from the logical plan fn schema(&self) -> PyArrowType { PyArrowType(self.df.schema().into()) From b70f1c0383ffb68f4e95999df7a819a5c43bf1ff Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Wed, 22 Mar 2023 15:35:22 +0100 Subject: [PATCH 003/413] fix: Printed results not visible in debugger & notebooks (#296) * Use builtin print function for show * Refactor --- src/dataframe.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/dataframe.rs b/src/dataframe.rs index 605e11611..fcce90d90 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -172,8 +172,7 @@ impl PyDataFrame { #[pyo3(signature = (num=20))] fn show(&self, py: Python, num: usize) -> PyResult<()> { let df = self.df.as_ref().clone().limit(0, Some(num))?; - let batches = wait_for_future(py, df.collect())?; - pretty::print_batches(&batches).map_err(|err| PyArrowException::new_err(err.to_string())) + print_dataframe(py, df) } /// Filter out duplicate rows @@ -217,8 +216,7 @@ impl PyDataFrame { #[pyo3(signature = (verbose=false, analyze=false))] fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { let df = self.df.as_ref().clone().explain(verbose, analyze)?; - let batches = wait_for_future(py, df.collect())?; - pretty::print_batches(&batches).map_err(|err| PyArrowException::new_err(err.to_string())) + print_dataframe(py, df) } /// Get the logical plan for this `DataFrame` @@ -389,3 +387,20 @@ impl PyDataFrame { Ok(wait_for_future(py, self.df.as_ref().clone().count())?) } } + +/// Print DataFrame +fn print_dataframe(py: Python, df: DataFrame) -> PyResult<()> { + // Get string representation of record batches + let batches = wait_for_future(py, df.collect())?; + let batches_as_string = pretty::pretty_format_batches(&batches); + let result = match batches_as_string { + Ok(batch) => format!("DataFrame()\n{batch}"), + Err(err) => format!("Error: {:?}", err.to_string()), + }; + + // Import the Python 'builtins' module to access the print function + // Note that println! does not print to the Python debug console and is not visible in notebooks for instance + let print = py.import("builtins")?.getattr("print")?; + print.call1((result,))?; + Ok(()) +} From a3880400fb290e9c0428503b3fa2d5dacc7dceed Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 22 Mar 2023 08:35:54 -0600 Subject: [PATCH 004/413] add package.include and remove wildcard dependency (#295) --- Cargo.toml | 3 ++- dev/release/README.md | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 03dafefe8..03cd68422 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ readme = "README.md" license = "Apache-2.0" edition = "2021" rust-version = "1.64" +include = ["/src", "/LICENSE.txt"] [features] default = ["mimalloc"] @@ -41,7 +42,7 @@ datafusion-optimizer = { version = "20.0.0" } datafusion-sql = { version = "20.0.0" } datafusion-substrait = { version = "20.0.0" } uuid = { version = "1.2", features = ["v4"] } -mimalloc = { version = "*", optional = true, default-features = false } +mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" futures = "0.3" object_store = { version = "0.5.3", features = ["aws", "gcp", "azure"] } diff --git a/dev/release/README.md b/dev/release/README.md index cec0eef5e..84058a606 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -198,6 +198,14 @@ Create the source release tarball: ./dev/release/release-tarball.sh 0.8.0 1 ``` +### Publishing Rust Crate to crates.io + +Some projects depend on the Rust crate directly, so we publish this to crates.io + +```shell +cargo publish +``` + ### Publishing Python Artifacts to PyPi Go to the Test PyPI page of Datafusion, and download From 5269719dca2a82d9f8b40305307c02fe7bc44ddf Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 29 Mar 2023 08:39:07 -0600 Subject: [PATCH 005/413] update main branch name in docs workflow (#303) --- .github/workflows/docs.yaml | 2 +- docs/build.sh | 0 2 files changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 docs/build.sh diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index b1c9ffc12..170f77a19 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,7 +1,7 @@ on: push: branches: - - master + - main tags-ignore: - "**-rc**" diff --git a/docs/build.sh b/docs/build.sh old mode 100644 new mode 100755 From 65620f42d35c6c7af0214ef76f502e80339dcfb6 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 30 Mar 2023 10:17:40 -0600 Subject: [PATCH 006/413] Upgrade to DF 21 (#301) --- Cargo.lock | 1309 +++++++++++++++++++++++----- Cargo.toml | 15 +- datafusion/tests/test_dataframe.py | 7 +- datafusion/tests/test_indexing.py | 2 +- datafusion/tests/test_store.py | 2 +- src/common/df_field.rs | 15 +- src/context.rs | 18 +- src/dataframe.rs | 2 +- src/expr/column.rs | 2 +- src/expr/table_scan.rs | 4 +- 10 files changed, 1134 insertions(+), 242 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23f1486b4..91a703ee5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,9 +62,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.69" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" +checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" [[package]] name = "apache-avro" @@ -93,11 +93,17 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + [[package]] name = "arrayref" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" [[package]] name = "arrayvec" @@ -113,16 +119,16 @@ checksum = "f410d3907b6b3647b9e7bca4551274b2e3d716aa940afb67b7287257401da921" dependencies = [ "ahash", "arrow-arith", - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", "arrow-csv", - "arrow-data", + "arrow-data 34.0.0", "arrow-ipc", "arrow-json", "arrow-ord", "arrow-row", - "arrow-schema", + "arrow-schema 34.0.0", "arrow-select", "arrow-string", "comfy-table", @@ -135,10 +141,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87391cf46473c9bc53dab68cb8872c3a81d4dfd1703f1c8aa397dba9880a043" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "half", "num", @@ -151,10 +157,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d35d5475e65c57cffba06d0022e3006b677515f99b54af33a7cd54f6cdd4a5b5" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", + "chrono", + "half", + "hashbrown 0.13.2", + "num", +] + +[[package]] +name = "arrow-array" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43489bbff475545b78b0e20bde1d22abd6c99e54499839f9e815a2fa5134a51b" +dependencies = [ + "ahash", + "arrow-buffer 35.0.0", + "arrow-data 35.0.0", + "arrow-schema 35.0.0", "chrono", + "chrono-tz", "half", "hashbrown 0.13.2", "num", @@ -170,16 +193,26 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3759e4a52c593281184787af5435671dc8b1e78333e5a30242b2e2d6e3c9d1f" +dependencies = [ + "half", + "num", +] + [[package]] name = "arrow-cast" version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a7285272c9897321dfdba59de29f5b05aeafd3cdedf104a941256d155f6d304" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "chrono", "lexical-core", @@ -192,11 +225,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "981ee4e7f6a120da04e00d0b39182e1eeacccb59c8da74511de753c56b7fddf7" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "csv", "csv-core", @@ -211,8 +244,20 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27cc673ee6989ea6e4b4e8c7d461f7e06026a096c8f0b1a7288885ff71ae1e56" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-schema 34.0.0", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19c7787c6cdbf9539b1ffb860bfc18c5848926ec3d62cbd52dc3b1ea35c874fd" +dependencies = [ + "arrow-buffer 35.0.0", + "arrow-schema 35.0.0", "half", "num", ] @@ -223,11 +268,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e37b8b69d9e59116b6b538e8514e0ec63a30f08b617ce800d31cb44e3ef64c1a" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "flatbuffers", ] @@ -237,11 +282,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80c3fa0bed7cfebf6d18e46b733f9cb8a1cb43ce8e6539055ca3e1e48a426266" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "half", "indexmap", @@ -256,10 +301,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d247dce7bed6a8d6a3c6debfa707a3a2f694383f0c692a39d736a593eae5ef94" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "num", ] @@ -271,10 +316,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d609c0181f963cea5c70fddf9a388595b5be441f3aa1d1cdbf728ca834bbd3a" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "half", "hashbrown 0.13.2", ] @@ -288,16 +333,22 @@ dependencies = [ "bitflags", ] +[[package]] +name = "arrow-schema" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6b26f6a6f8410e3b9531cbd1886399b99842701da77d4b4cf2013f7708f20f" + [[package]] name = "arrow-select" version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a513d89c2e1ac22b28380900036cf1f3992c6443efc5e079de631dcf83c6888" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "num", ] @@ -307,10 +358,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5288979b2705dae1114c864d73150629add9153b9b8f1d7ee3963db94c372ba5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "regex", "regex-syntax", @@ -336,24 +387,24 @@ dependencies = [ [[package]] name = "async-recursion" -version = "1.0.2" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b015a331cc64ebd1774ba119538573603427eaace0a1950c423ab971f903796" +checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.10", ] [[package]] name = "async-trait" -version = "0.1.66" +version = "0.1.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84f9ebcc6c1f5b8cb160f6990096a5c127f423fcb6e1ccc46c370cbdfb75dfc" +checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.10", ] [[package]] @@ -427,6 +478,27 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bstr" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" +dependencies = [ + "memchr", + "once_cell", + "regex-automata", + "serde", +] + +[[package]] +name = "btoi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd6407f73a9b8b6162d8a2ef999fe6afd7cc15902ebf42c5cd296addf17e0ad" +dependencies = [ + "num-traits", +] + [[package]] name = "bumpalo" version = "3.12.0" @@ -492,11 +564,39 @@ dependencies = [ "num-integer", "num-traits", "serde", - "time", + "time 0.1.45", "wasm-bindgen", "winapi", ] +[[package]] +name = "chrono-tz" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa48fa079165080f11d7753fd0bc175b7d391f276b965fe4b55bfad67856e463" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9998fb9f7e9b2111641485bf8beb32f92945f97f92a3d061f744cfef335f751" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + +[[package]] +name = "clru" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8191fa7302e03607ff0e237d4246cc043ff5b3cb9409d995172ba3bea16b807" + [[package]] name = "codespan-reporting" version = "0.11.1" @@ -554,9 +654,9 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" dependencies = [ "libc", ] @@ -609,9 +709,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.92" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a140f260e6f3f79013b8bfc65e7ce630c9ab4388c6a89c71e07226f49487b72" +checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" dependencies = [ "cc", "cxxbridge-flags", @@ -621,9 +721,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.92" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da6383f459341ea689374bf0a42979739dc421874f112ff26f829b8040b8e613" +checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" dependencies = [ "cc", "codespan-reporting", @@ -631,24 +731,24 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn", + "syn 2.0.10", ] [[package]] name = "cxxbridge-flags" -version = "1.0.92" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90201c1a650e95ccff1c8c0bb5a343213bdd317c6e600a93075bca2eff54ec97" +checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" [[package]] name = "cxxbridge-macro" -version = "1.0.92" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b75aed41bb2e6367cae39e6326ef817a851db13c13e4f3263714ca3cfb8de56" +checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.10", ] [[package]] @@ -666,9 +766,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c467c5802cb75ecb0acffa2121d8361a8903fef05b21fd1ca12a55797df8a75" +checksum = "c187589ce9ddf0bbc90e2e3dc0a89b90cc3d4bfdeefc7cf2aaa8ac15f7725811" dependencies = [ "ahash", "apache-avro", @@ -699,7 +799,6 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "paste", "percent-encoding", "pin-project-lite", "rand", @@ -717,12 +816,13 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5c60e0a92bae06c6a55c4618947ae0837f833929b8d52ade1d50cf8b5f3b381" +checksum = "ecbbfb88a799beca6a361c1282795f0f185b96201dab496d733a49bdf4684f7f" dependencies = [ "apache-avro", "arrow", + "arrow-array 35.0.0", "chrono", "num_cpus", "object_store", @@ -733,9 +833,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96eab4ef369c3972a4f99418ba8673aa0cf818aa4b892d44c35b473edce8e021" +checksum = "73a38825b879024a87937b3b5ea8e43287ab3432db8786a2839dcbf141b6d938" dependencies = [ "dashmap", "datafusion-common", @@ -751,22 +851,21 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0533db37a619a045a4fd37dabc2d99a85f10127d7f100d75914c194e9e36ed7" +checksum = "05454741d8496faf9f433a666e97ce693807e8374e0fd513eda5a8218ba8456d" dependencies = [ "ahash", "arrow", "datafusion-common", - "log", "sqlparser", ] [[package]] name = "datafusion-optimizer" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dbb8f467d31c3efebd372854e35cf1ca1572f163d0f031e2c6f8d78b317a43b" +checksum = "d5d551c428b8557790cceecb59615f624b24dddf60b4d843c5994f8120b48c7f" dependencies = [ "arrow", "async-trait", @@ -782,14 +881,14 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ceb003f6c0cdc25d2e9e92f8ee0322faca2a8f94f03d070b5ca15224c5503a3c" +checksum = "08aa1047edf92d59f97b18dfbb1cade176a970b1a98b0a27f909409ceb05906e" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-schema 34.0.0", "blake2", "blake3", "chrono", @@ -802,7 +901,6 @@ dependencies = [ "itertools", "lazy_static", "md-5", - "num-traits", "paste", "petgraph", "rand", @@ -814,7 +912,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "20.0.0" +version = "21.0.0" dependencies = [ "async-trait", "datafusion", @@ -831,14 +929,15 @@ dependencies = [ "rand", "regex-syntax", "tokio", + "url", "uuid", ] [[package]] name = "datafusion-row" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba0b4478a205b767f6d98e26730a604562155a8b3cf64fe3600367e741b0f129" +checksum = "2fc83ac8761c251617c1b7e1122adf79ebbf215ecabc4e2346cda1c4307d5152" dependencies = [ "arrow", "datafusion-common", @@ -848,11 +947,11 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a07af4200a9353ebe7bf0d6024894275cdebb303bb534b3a38eb0f532583083d" +checksum = "46d6cbfa8c6ac06202badbac6e4675c33b91d299f711a4fee23327b83906e2ee" dependencies = [ - "arrow-schema", + "arrow-schema 34.0.0", "datafusion-common", "datafusion-expr", "log", @@ -861,9 +960,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "20.0.0" +version = "21.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71a9cc3e51c77ada10990e996c15c5bda293b3e4a508f55c2f3993ea6c6a39a7" +checksum = "92afff2fc078214072992424e56e081fbe4b80c639f037f3f1916a8d6d28c25d" dependencies = [ "async-recursion", "chrono", @@ -872,7 +971,6 @@ dependencies = [ "object_store", "prost 0.11.8", "prost-build 0.9.0", - "prost-types 0.11.8", "substrait", "tokio", ] @@ -888,12 +986,38 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "doc-comment" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dunce" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bd4b30a6560bbd9b4620f4de34c3f14f60848e58a9b7216801afcb4c7b31c3c" + [[package]] name = "dyn-clone" version = "1.0.11" @@ -945,6 +1069,18 @@ dependencies = [ "instant", ] +[[package]] +name = "filetime" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a3de6e8d11b22ff9edc6d916f890800597d60f8b2da1caf2955c274638d6412" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "windows-sys 0.45.0", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1012,88 +1148,588 @@ dependencies = [ ] [[package]] -name = "futures-core" -version = "0.3.27" +name = "futures-core" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86d7a0c1aa76363dac491de0ee99faf6941128376f1cf96f07db7603b7de69dd" + +[[package]] +name = "futures-executor" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1997dd9df74cdac935c76252744c1ed5794fac083242ea4fe77ef3ed60ba0f83" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89d422fa3cbe3b40dca574ab087abb5bc98258ea57eea3fd6f1fa7162c778b91" + +[[package]] +name = "futures-macro" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3eb14ed937631bd8b8b8977f2c198443447a8355b6e3ca599f38c975e5a963b6" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "futures-sink" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec93083a4aecafb2a80a885c9de1f0ccae9dbd32c2bb54b0c3a65690e0b8d2f2" + +[[package]] +name = "futures-task" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd65540d33b37b16542a0438c12e6aeead10d4ac5d05bd3f805b8f35ab592879" + +[[package]] +name = "futures-util" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ef6b17e481503ec85211fed8f39d1970f128935ca1f814cd32ac4a6842e84ab" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "gix" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd5e0d9c5df90c9b4d325ec716762beb7d6c1465a4049fec5c4f6b72e7824656" +dependencies = [ + "gix-actor", + "gix-attributes", + "gix-config", + "gix-credentials", + "gix-date", + "gix-diff", + "gix-discover", + "gix-features", + "gix-glob", + "gix-hash", + "gix-hashtable", + "gix-index", + "gix-lock", + "gix-mailmap", + "gix-object", + "gix-odb", + "gix-pack", + "gix-path", + "gix-prompt", + "gix-ref", + "gix-refspec", + "gix-revision", + "gix-sec", + "gix-tempfile", + "gix-traverse", + "gix-url", + "gix-validate", + "gix-worktree", + "log", + "once_cell", + "signal-hook", + "smallvec", + "thiserror", + "unicode-normalization", +] + +[[package]] +name = "gix-actor" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc22b0cdc52237667c301dd7cdc6ead8f8f73c9f824e9942c8ebd6b764f6c0bf" +dependencies = [ + "bstr", + "btoi", + "gix-date", + "itoa", + "nom", + "thiserror", +] + +[[package]] +name = "gix-attributes" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2231a25934a240d0a4b6f4478401c73ee81d8be52de0293eedbc172334abf3e1" +dependencies = [ + "bstr", + "gix-features", + "gix-glob", + "gix-path", + "gix-quote", + "thiserror", + "unicode-bom", +] + +[[package]] +name = "gix-bitmap" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "024bca0c7187517bda5ea24ab148c9ca8208dd0c3e2bea88cdb2008f91791a6d" +dependencies = [ + "thiserror", +] + +[[package]] +name = "gix-chunk" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0d39583cab06464b8bf73b3f1707458270f0e7383cb24c3c9c1a16e6f792978" +dependencies = [ + "thiserror", +] + +[[package]] +name = "gix-command" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2c6f75c1e0f924de39e750880a6e21307194bb1ab773efe3c7d2d787277f8ab" +dependencies = [ + "bstr", +] + +[[package]] +name = "gix-config" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aa7d7dd60256b7a0c0506a1d708ec92767c2662ee57b3301b538eaa3e064f8a" +dependencies = [ + "bstr", + "gix-config-value", + "gix-features", + "gix-glob", + "gix-path", + "gix-ref", + "gix-sec", + "memchr", + "nom", + "once_cell", + "smallvec", + "thiserror", + "unicode-bom", +] + +[[package]] +name = "gix-config-value" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d4a4ba0531e46fe558459557a5b29fb86c3e4b2666c1c0861d93c7c678331" +dependencies = [ + "bitflags", + "bstr", + "gix-path", + "libc", + "thiserror", +] + +[[package]] +name = "gix-credentials" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "750b684197374518ea057e0a0594713e07683faa0a3f43c0f93d97f64130ad8d" +dependencies = [ + "bstr", + "gix-command", + "gix-config-value", + "gix-path", + "gix-prompt", + "gix-sec", + "gix-url", + "thiserror", +] + +[[package]] +name = "gix-date" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b96271912ce39822501616f177dea7218784e6c63be90d5f36322ff3a722aae2" +dependencies = [ + "bstr", + "itoa", + "thiserror", + "time 0.3.20", +] + +[[package]] +name = "gix-diff" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "103a0fa79b0d438f5ecb662502f052e530ace4fe1fe8e1c83c0c6da76d728e67" +dependencies = [ + "gix-hash", + "gix-object", + "imara-diff", + "thiserror", +] + +[[package]] +name = "gix-discover" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630ac89fa142bd4dff7337104670a0486c39fc3849f7fe0847f6af6648b2cbca" +dependencies = [ + "bstr", + "dunce", + "gix-hash", + "gix-path", + "gix-ref", + "gix-sec", + "thiserror", +] + +[[package]] +name = "gix-features" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b76f9a80f6dd7be66442ae86e1f534effad9546676a392acc95e269d0c21c22" +dependencies = [ + "crc32fast", + "flate2", + "gix-hash", + "libc", + "once_cell", + "prodash", + "sha1_smol", + "thiserror", + "walkdir", +] + +[[package]] +name = "gix-glob" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93e43efd776bc543f46f0fd0ca3d920c37af71a764a16f2aebd89765e9ff2993" +dependencies = [ + "bitflags", + "bstr", +] + +[[package]] +name = "gix-hash" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0c5a9f4d621d4f4ea046bb331df5c746ca735b8cae5b234cc2be70ee4dbef0" +dependencies = [ + "hex", + "thiserror", +] + +[[package]] +name = "gix-hashtable" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9609c1b8f36f12968e6a6098f7cdb52004f7d42d570f47a2d6d7c16612f19acb" +dependencies = [ + "gix-hash", + "hashbrown 0.13.2", + "parking_lot", +] + +[[package]] +name = "gix-index" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "546ee7855d5d8731288f05a63c07ab41b59cb406660a825ed3fe89d7223823df" +dependencies = [ + "bitflags", + "bstr", + "btoi", + "filetime", + "gix-bitmap", + "gix-features", + "gix-hash", + "gix-lock", + "gix-object", + "gix-traverse", + "itoa", + "memmap2", + "smallvec", + "thiserror", +] + +[[package]] +name = "gix-lock" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41b80172055c5d8017a48ddac5cc7a95421c00211047db0165c97853c4f05194" +dependencies = [ + "fastrand", + "gix-tempfile", + "thiserror", +] + +[[package]] +name = "gix-mailmap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b66aea5e52875cd4915f4957a6f4b75831a36981e2ec3f5fad9e370e444fe1a" +dependencies = [ + "bstr", + "gix-actor", + "thiserror", +] + +[[package]] +name = "gix-object" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df068db9180ee935fbb70504848369e270bdcb576b05c0faa8b9fd3b86fc017" +dependencies = [ + "bstr", + "btoi", + "gix-actor", + "gix-features", + "gix-hash", + "gix-validate", + "hex", + "itoa", + "nom", + "smallvec", + "thiserror", +] + +[[package]] +name = "gix-odb" +version = "0.43.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa63fce01e5bce663bb24ad01fa2b77266e91b1d1982aab3f67cb0aed8af8169" +dependencies = [ + "arc-swap", + "gix-features", + "gix-hash", + "gix-object", + "gix-pack", + "gix-path", + "gix-quote", + "parking_lot", + "tempfile", + "thiserror", +] + +[[package]] +name = "gix-pack" +version = "0.33.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9401911c7fe032ad7b31c6a6b5be59cb283d1d6c999417a8215056efe6d635f3" +dependencies = [ + "clru", + "gix-chunk", + "gix-diff", + "gix-features", + "gix-hash", + "gix-hashtable", + "gix-object", + "gix-path", + "gix-tempfile", + "gix-traverse", + "memmap2", + "parking_lot", + "smallvec", + "thiserror", +] + +[[package]] +name = "gix-path" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6c104a66dec149cb8f7aaafc6ab797654cf82d67f050fd0cb7e7294e328354b" +dependencies = [ + "bstr", + "thiserror", +] + +[[package]] +name = "gix-prompt" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3034d4d935aef2c7bf719aaa54b88c520e82413118d886ae880a31d5bdee57" +dependencies = [ + "gix-command", + "gix-config-value", + "nix", + "parking_lot", + "thiserror", +] + +[[package]] +name = "gix-quote" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a282f5a8d9ee0b09ec47390ac727350c48f2f5c76d803cd8da6b3e7ad56e0bcb" +dependencies = [ + "bstr", + "btoi", + "thiserror", +] + +[[package]] +name = "gix-ref" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d7a0c1aa76363dac491de0ee99faf6941128376f1cf96f07db7603b7de69dd" +checksum = "0949e07aa4ed00a5936c2f4529013540708f367906f542cf19db814957e80449" +dependencies = [ + "gix-actor", + "gix-features", + "gix-hash", + "gix-lock", + "gix-object", + "gix-path", + "gix-tempfile", + "gix-validate", + "memmap2", + "nom", + "thiserror", +] [[package]] -name = "futures-executor" -version = "0.3.27" +name = "gix-refspec" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1997dd9df74cdac935c76252744c1ed5794fac083242ea4fe77ef3ed60ba0f83" +checksum = "aba332462bda2e8efeae4302b39a6ed01ad56ef772fd5b7ef197cf2798294d65" dependencies = [ - "futures-core", - "futures-task", - "futures-util", + "bstr", + "gix-hash", + "gix-revision", + "gix-validate", + "smallvec", + "thiserror", ] [[package]] -name = "futures-io" -version = "0.3.27" +name = "gix-revision" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d422fa3cbe3b40dca574ab087abb5bc98258ea57eea3fd6f1fa7162c778b91" +checksum = "b12fc4bbc3161a5b2d68079fce93432cef8771ff88ca017abb01187fddfc41a1" +dependencies = [ + "bstr", + "gix-date", + "gix-hash", + "gix-hashtable", + "gix-object", + "thiserror", +] [[package]] -name = "futures-macro" -version = "0.3.27" +name = "gix-sec" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3eb14ed937631bd8b8b8977f2c198443447a8355b6e3ca599f38c975e5a963b6" +checksum = "e8ffa5bf0772f9b01de501c035b6b084cf9b8bb07dec41e3afc6a17336a65f47" dependencies = [ - "proc-macro2", - "quote", - "syn", + "bitflags", + "dirs", + "gix-path", + "libc", + "windows 0.43.0", ] [[package]] -name = "futures-sink" -version = "0.3.27" +name = "gix-tempfile" +version = "5.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec93083a4aecafb2a80a885c9de1f0ccae9dbd32c2bb54b0c3a65690e0b8d2f2" +checksum = "c2ceb30a610e3f5f2d5f9a5114689fde507ba9417705a8cf3429604275b2153c" +dependencies = [ + "libc", + "once_cell", + "parking_lot", + "signal-hook", + "signal-hook-registry", + "tempfile", +] [[package]] -name = "futures-task" -version = "0.3.27" +name = "gix-traverse" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd65540d33b37b16542a0438c12e6aeead10d4ac5d05bd3f805b8f35ab592879" +checksum = "dd9a4a07bb22168dc79c60e1a6a41919d198187ca83d8a5940ad8d7122a45df3" +dependencies = [ + "gix-hash", + "gix-hashtable", + "gix-object", + "thiserror", +] [[package]] -name = "futures-util" -version = "0.3.27" +name = "gix-url" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ef6b17e481503ec85211fed8f39d1970f128935ca1f814cd32ac4a6842e84ab" +checksum = "b6a22b4b32ad14d68f7b7fb6458fa58d44b01797d94c1b8f4db2d9c7b3c366b5" dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", + "bstr", + "gix-features", + "gix-path", + "home", + "thiserror", + "url", ] [[package]] -name = "generic-array" -version = "0.14.6" +name = "gix-validate" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "7bd629d3680773e1785e585d76fd4295b740b559cad9141517300d99a0c8c049" dependencies = [ - "typenum", - "version_check", + "bstr", + "thiserror", ] [[package]] -name = "getrandom" -version = "0.2.8" +name = "gix-worktree" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "27248bdec2c90cd2d40fc43bb7d782b26bacc53d9248e78c32ac8268044aebbe" dependencies = [ - "cfg-if", - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "bstr", + "gix-attributes", + "gix-features", + "gix-glob", + "gix-hash", + "gix-index", + "gix-object", + "gix-path", + "io-close", + "thiserror", ] [[package]] @@ -1170,6 +1806,18 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "home" version = "0.5.4" @@ -1252,16 +1900,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.53" +version = "0.1.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +checksum = "0c17cc76786e99f8d2f055c11159e7f0091c42474dcc3189fbab96072e873e6d" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "winapi", + "windows 0.46.0", ] [[package]] @@ -1284,11 +1932,21 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "imara-diff" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e98c1d0ad70fc91b8b9654b1f33db55e59579d3b3de2bffdced0fdb810570cb8" +dependencies = [ + "ahash", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", @@ -1315,12 +1973,23 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "io-close" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cadcf447f06744f8ce713d2d6239bb5bde2c357a452397a9ed90c625da390bc" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "io-lifetimes" -version = "1.0.6" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfa919a82ea574332e2de6e74b4c36e74d41982b335080fa59d4ef31be20fdf3" +checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" dependencies = [ + "hermit-abi 0.3.1", "libc", "windows-sys 0.45.0", ] @@ -1556,6 +2225,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.8.0" @@ -1576,9 +2254,15 @@ dependencies = [ [[package]] name = "mime" -version = "0.3.16" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" @@ -1607,6 +2291,28 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +[[package]] +name = "nix" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +dependencies = [ + "bitflags", + "cfg-if", + "libc", + "static_assertions", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num" version = "0.4.0" @@ -1690,7 +2396,16 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ - "hermit-abi", + "hermit-abi 0.2.6", + "libc", +] + +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ "libc", ] @@ -1767,12 +2482,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ac135ecf63ebb5f53dda0921b0b76d6048b3ef631a5f4760b9e8f863ff00cfa" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", + "arrow-data 34.0.0", "arrow-ipc", - "arrow-schema", + "arrow-schema 34.0.0", "arrow-select", "base64", "brotli", @@ -1793,6 +2508,15 @@ dependencies = [ "zstd 0.12.3+zstd.1.5.2", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.12" @@ -1825,6 +2549,44 @@ dependencies = [ "indexmap", ] +[[package]] +name = "phf" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.9" @@ -1857,13 +2619,19 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.52" +version = "1.0.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" +checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" dependencies = [ "unicode-ident", ] +[[package]] +name = "prodash" +version = "23.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9516b775656bc3e8985e19cd4b8c0c0de045095074e453d2c0a513b5f978392d" + [[package]] name = "prost" version = "0.9.0" @@ -1934,7 +2702,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1947,7 +2715,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1971,9 +2739,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.18.1" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06a3d8e8a46ab2738109347433cb7b96dffda2e4a218b03ef27090238886b147" +checksum = "cfb848f80438f926a9ebddf0a539ed6065434fd7aae03a89312a9821f81b8501" dependencies = [ "cfg-if", "indoc", @@ -1988,9 +2756,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.18.1" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75439f995d07ddfad42b192dfcf3bc66a7ecfd8b4a1f5f6f046aa5c2c5d7677d" +checksum = "98a42e7f42e917ce6664c832d5eee481ad514c98250c49e0b03b20593e2c7ed0" dependencies = [ "once_cell", "target-lexicon", @@ -1998,9 +2766,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.18.1" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "839526a5c07a17ff44823679b68add4a58004de00512a95b6c1c98a6dcac0ee5" +checksum = "a0707f0ab26826fe4ccd59b69106e9df5e12d097457c7b8f9c0fd1d2743eec4d" dependencies = [ "libc", "pyo3-build-config", @@ -2008,25 +2776,25 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.18.1" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd44cf207476c6a9760c4653559be4f206efafb924d3e4cbf2721475fc0d6cc5" +checksum = "978d18e61465ecd389e1f235ff5a467146dc4e3c3968b90d274fe73a5dd4a438" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "pyo3-macros-backend" -version = "0.18.1" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1f43d8e30460f36350d18631ccf85ded64c059829208fe680904c65bcd0a4c" +checksum = "8e0e1128f85ce3fca66e435e08aa2089a2689c1c48ce97803e13f63124058462" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2093,22 +2861,39 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom", + "redox_syscall", + "thiserror", +] + [[package]] name = "regex" -version = "1.7.1" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regress" @@ -2121,9 +2906,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.14" +version = "0.11.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9" +checksum = "0ba30cc2c0cd02af1222ed216ba659cdb2f879dfe3181852fe7c50b1d0005949" dependencies = [ "base64", "bytes", @@ -2205,9 +2990,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.9" +version = "0.36.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd5c6ff11fecd55b40746d1995a02f2eb375bf8c00d192d521ee09f42bef37bc" +checksum = "db4165c9963ab29e422d6c26fbc1d37f15bace6b2810221f9d925023480fcf0e" dependencies = [ "bitflags", "errno", @@ -2280,7 +3065,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn", + "syn 1.0.109", ] [[package]] @@ -2337,22 +3122,22 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.156" +version = "1.0.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" +checksum = "771d4d9c4163ee138805e12c710dd365e4f44be8be0503cb1bb9eb989425d9c9" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.156" +version = "1.0.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" +checksum = "e801c1712f48475582b7696ac71e0ca34ebb30e09338425384269d9717c62cad" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.10", ] [[package]] @@ -2363,7 +3148,7 @@ checksum = "85bf8229e7920a9f636479437026331ce11aa132b4dde37d121944a44d6e5f3c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2385,7 +3170,7 @@ checksum = "797ba1d80299b264f3aac68ab5d12e5825a561749db4df7cd7c8083900c5d4e9" dependencies = [ "proc-macro2", "serde", - "syn", + "syn 1.0.109", ] [[package]] @@ -2413,6 +3198,12 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sha1_smol" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" + [[package]] name = "sha2" version = "0.10.6" @@ -2424,6 +3215,31 @@ dependencies = [ "digest", ] +[[package]] +name = "signal-hook" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + [[package]] name = "slab" version = "0.4.8" @@ -2458,7 +3274,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2501,7 +3317,7 @@ checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2526,20 +3342,22 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.109", ] [[package]] name = "substrait" -version = "0.4.2" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e977fc98d1e03cf99220bb6bb96f8838ffa5c1306a8c83c1b25aa20817eb6d0" +checksum = "522c38065c57c4e5b03354157bbe8272b42fbf3af6abff304fad10669436b1fb" dependencies = [ + "gix", "heck 0.4.1", "prost 0.11.8", "prost-build 0.11.8", "prost-types 0.11.8", "schemars", + "semver 1.0.17", "serde", "serde_json", "serde_yaml", @@ -2564,6 +3382,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aad1363ed6d37b84299588d62d3a7d95b5a5c2d9aad5c85609fda12afaa1f40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "target-lexicon" version = "0.12.6" @@ -2594,22 +3423,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.10", ] [[package]] @@ -2634,6 +3463,35 @@ dependencies = [ "winapi", ] +[[package]] +name = "time" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +dependencies = [ + "itoa", + "libc", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +dependencies = [ + "time-core", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -2685,7 +3543,7 @@ checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2772,7 +3630,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2808,7 +3666,7 @@ checksum = "89851716b67b937e393b3daa8423e67ddfc4bbbf1654bcf05488e95e0828db0c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2841,7 +3699,7 @@ dependencies = [ "rustfmt-wrapper", "schemars", "serde_json", - "syn", + "syn 1.0.109", "thiserror", "unicode-ident", ] @@ -2858,7 +3716,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn", + "syn 1.0.109", "typify-impl", ] @@ -2870,9 +3728,15 @@ checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" [[package]] name = "unicode-bidi" -version = "0.3.11" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + +[[package]] +name = "unicode-bom" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524b68aca1d05e03fdf03fcdce2c6c94b6daf6d16861ddaa7e4f2b6638a9052c" +checksum = "63ec69f541d875b783ca40184d655f2927c95f0bffd486faa83cd3ac3529ec32" [[package]] name = "unicode-ident" @@ -2948,12 +3812,11 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -3000,7 +3863,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-shared", ] @@ -3034,7 +3897,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3129,6 +3992,30 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.43.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04662ed0e3e5630dfa9b26e4cb823b817f1a9addda855d973a9458c236556244" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.42.0" @@ -3246,7 +4133,7 @@ checksum = "6505e6815af7de1746a08f69c69606bb45695a17149517680f3b2149713b19a3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 03cd68422..75f458d7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "20.0.0" +version = "21.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -35,12 +35,12 @@ default = ["mimalloc"] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { version = "20.0.0", features = ["pyarrow", "avro"]} -datafusion-common = { version = "20.0.0", features = ["pyarrow"]} -datafusion-expr = { version = "20.0.0" } -datafusion-optimizer = { version = "20.0.0" } -datafusion-sql = { version = "20.0.0" } -datafusion-substrait = { version = "20.0.0" } +datafusion = { version = "21.0.0", features = ["pyarrow", "avro"]} +datafusion-common = { version = "21.0.0", features = ["pyarrow"]} +datafusion-expr = { version = "21.0.0" } +datafusion-optimizer = { version = "21.0.0" } +datafusion-sql = { version = "21.0.0" } +datafusion-substrait = { version = "21.0.0" } uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" @@ -48,6 +48,7 @@ futures = "0.3" object_store = { version = "0.5.3", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.6.28" +url = "2.2" [lib] name = "datafusion_python" diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index db59e3c52..c6ef95772 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -164,16 +164,17 @@ def test_join(): [pa.array([1, 2, 3]), pa.array([4, 5, 6])], names=["a", "b"], ) - df = ctx.create_dataframe([[batch]]) + df = ctx.create_dataframe([[batch]], "l") batch = pa.RecordBatch.from_arrays( [pa.array([1, 2]), pa.array([8, 10])], names=["a", "c"], ) - df1 = ctx.create_dataframe([[batch]]) + df1 = ctx.create_dataframe([[batch]], "r") df = df.join(df1, join_keys=(["a"], ["a"]), how="inner") - df = df.sort(column("a").sort(ascending=True)) + df.show() + df = df.sort(column("l.a").sort(ascending=True)) table = pa.Table.from_batches(df.collect()) expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} diff --git a/datafusion/tests/test_indexing.py b/datafusion/tests/test_indexing.py index 9e65c0efd..1c7f8627d 100644 --- a/datafusion/tests/test_indexing.py +++ b/datafusion/tests/test_indexing.py @@ -44,7 +44,7 @@ def test_err(df): with pytest.raises(Exception) as e_info: df["c"] - assert "Schema error: No field named 'c'" in e_info.value.args[0] + assert 'Schema error: No field named "c"' in e_info.value.args[0] with pytest.raises(Exception) as e_info: df[1] diff --git a/datafusion/tests/test_store.py b/datafusion/tests/test_store.py index d6f0db583..9174c246f 100644 --- a/datafusion/tests/test_store.py +++ b/datafusion/tests/test_store.py @@ -30,7 +30,7 @@ def local(): @pytest.fixture def ctx(local): ctx = SessionContext() - ctx.register_object_store("local", local, None) + ctx.register_object_store("file://local", local, None) return ctx diff --git a/src/common/df_field.rs b/src/common/df_field.rs index fa65e0495..d7745bfb5 100644 --- a/src/common/df_field.rs +++ b/src/common/df_field.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::arrow::datatypes::DataType; -use datafusion_common::DFField; +use datafusion_common::{DFField, OwnedTableReference}; use pyo3::prelude::*; use super::data_type::PyDataType; @@ -46,9 +46,14 @@ impl From for PyDFField { impl PyDFField { #[new] #[pyo3(signature = (qualifier=None, name="", data_type=DataType::Int64.into(), nullable=false))] - fn new(qualifier: Option<&str>, name: &str, data_type: PyDataType, nullable: bool) -> Self { + fn new(qualifier: Option, name: &str, data_type: PyDataType, nullable: bool) -> Self { PyDFField { - field: DFField::new(qualifier, name, data_type.into(), nullable), + field: DFField::new( + qualifier.map(|q| OwnedTableReference::from(q)), + name, + data_type.into(), + nullable, + ), } } @@ -91,8 +96,8 @@ impl PyDFField { // fn py_unqualified_column(&self) -> PyResult {} #[pyo3(name = "qualifier")] - fn py_qualifier(&self) -> PyResult> { - Ok(self.field.qualifier()) + fn py_qualifier(&self) -> PyResult> { + Ok(self.field.qualifier().map(|q| format!("{}", q))) } // TODO: Need bindings for Arrow `Field` first diff --git a/src/context.rs b/src/context.rs index e2c509abe..448fa7742 100644 --- a/src/context.rs +++ b/src/context.rs @@ -20,6 +20,7 @@ use std::path::PathBuf; use std::sync::Arc; use object_store::ObjectStore; +use url::Url; use uuid::Uuid; use pyo3::exceptions::{PyKeyError, PyValueError}; @@ -49,7 +50,6 @@ use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; -use datafusion_common::config::Extensions; use datafusion_common::ScalarValue; use pyo3::types::PyTuple; use tokio::runtime::Runtime; @@ -260,10 +260,9 @@ impl PySessionContext { } else { &upstream_host }; - - self.ctx - .runtime_env() - .register_object_store(scheme, derived_host, store); + let url_string = format!("{}{}", scheme, derived_host); + let url = Url::parse(&url_string).unwrap(); + self.ctx.runtime_env().register_object_store(&url, store); Ok(()) } @@ -699,20 +698,19 @@ impl PySessionContext { part: usize, py: Python, ) -> PyResult { - let ctx = TaskContext::try_new( - "task_id".to_string(), + let ctx = TaskContext::new( + None, "session_id".to_string(), - HashMap::new(), + SessionConfig::new(), HashMap::new(), HashMap::new(), Arc::new(RuntimeEnv::default()), - Extensions::default(), ); // create a Tokio runtime to run the async code let rt = Runtime::new().unwrap(); let plan = plan.plan.clone(); let fut: JoinHandle> = - rt.spawn(async move { plan.execute(part, Arc::new(ctx?)) }); + rt.spawn(async move { plan.execute(part, Arc::new(ctx)) }); let stream = wait_for_future(py, fut).map_err(py_datafusion_err)?; Ok(PyRecordBatchStream::new(stream?)) } diff --git a/src/dataframe.rs b/src/dataframe.rs index fcce90d90..7e1ce03b5 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -20,7 +20,7 @@ use crate::sql::logical::PyLogicalPlan; use crate::utils::wait_for_future; use crate::{errors::DataFusionError, expr::PyExpr}; use datafusion::arrow::datatypes::Schema; -use datafusion::arrow::pyarrow::{PyArrowConvert, PyArrowException, PyArrowType}; +use datafusion::arrow::pyarrow::{PyArrowConvert, PyArrowType}; use datafusion::arrow::util::pretty; use datafusion::dataframe::DataFrame; use datafusion::prelude::*; diff --git a/src/expr/column.rs b/src/expr/column.rs index 16b8bce3c..68123fb04 100644 --- a/src/expr/column.rs +++ b/src/expr/column.rs @@ -45,7 +45,7 @@ impl PyColumn { /// Get the column relation fn relation(&self) -> Option { - self.col.relation.clone() + self.col.relation.as_ref().map(|r| format!("{}", r)) } /// Get the fully-qualified column name diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs index 63684fe7f..ac848d9eb 100644 --- a/src/expr/table_scan.rs +++ b/src/expr/table_scan.rs @@ -67,8 +67,8 @@ impl Display for PyTableScan { impl PyTableScan { /// Retrieves the name of the table represented by this `TableScan` instance #[pyo3(name = "table_name")] - fn py_table_name(&self) -> PyResult<&str> { - Ok(&self.table_scan.table_name) + fn py_table_name(&self) -> PyResult { + Ok(format!("{}", self.table_scan.table_name)) } /// TODO: Bindings for `TableSource` need to exist first. Left as a From a7e9161e99f2e8231f5dbf1c556755d58971aa45 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 31 Mar 2023 08:57:37 -0600 Subject: [PATCH 007/413] Prepare 21.0.0 release (#304) * cargo update * changelog * fix --- CHANGELOG.md | 13 +++ Cargo.lock | 256 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 172 insertions(+), 97 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cf05413a..35982d9a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,19 @@ # Changelog +## [21.0.0](https://github.com/apache/arrow-datafusion-python/tree/21.0.0) (2023-03-30) + +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/20.0.0...21.0.0) + +**Merged pull requests:** + +- minor: Fix minor warning on unused import [#289](https://github.com/apache/arrow-datafusion-python/pull/289) (viirya) +- feature: Implement `describe()` method [#293](https://github.com/apache/arrow-datafusion-python/pull/293) (simicd) +- fix: Printed results not visible in debugger & notebooks [#296](https://github.com/apache/arrow-datafusion-python/pull/296) (simicd) +- add package.include and remove wildcard dependency [#295](https://github.com/apache/arrow-datafusion-python/pull/295) (andygrove) +- Update main branch name in docs workflow [#303](https://github.com/apache/arrow-datafusion-python/pull/303) (andygrove) +- Upgrade to DF 21 [#301](https://github.com/apache/arrow-datafusion-python/pull/301) (andygrove) + ## [20.0.0](https://github.com/apache/arrow-datafusion-python/tree/20.0.0) (2023-03-17) [Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/0.8.0...20.0.0) diff --git a/Cargo.lock b/Cargo.lock index 91a703ee5..dab3530b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -393,7 +393,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.11", ] [[package]] @@ -404,7 +404,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.11", ] [[package]] @@ -731,7 +731,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.10", + "syn 2.0.11", ] [[package]] @@ -748,7 +748,7 @@ checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.11", ] [[package]] @@ -1041,13 +1041,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.2.8" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys", ] [[package]] @@ -1077,8 +1077,8 @@ checksum = "8a3de6e8d11b22ff9edc6d916f890800597d60f8b2da1caf2955c274638d6412" dependencies = [ "cfg-if", "libc", - "redox_syscall", - "windows-sys 0.45.0", + "redox_syscall 0.2.16", + "windows-sys", ] [[package]] @@ -1213,9 +1213,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1353,9 +1353,9 @@ dependencies = [ [[package]] name = "gix-config-value" -version = "0.10.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693d4a4ba0531e46fe558459557a5b29fb86c3e4b2666c1c0861d93c7c678331" +checksum = "d09154c0c8677e4da0ec35e896f56ee3e338e741b9599fae06075edd83a4081c" dependencies = [ "bitflags", "bstr", @@ -1406,9 +1406,9 @@ dependencies = [ [[package]] name = "gix-discover" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630ac89fa142bd4dff7337104670a0486c39fc3849f7fe0847f6af6648b2cbca" +checksum = "6eba8ba458cb8f4a6c33409b0fe650b1258655175a7ffd1d24fafd3ed31d880b" dependencies = [ "bstr", "dunce", @@ -1469,9 +1469,9 @@ dependencies = [ [[package]] name = "gix-index" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "546ee7855d5d8731288f05a63c07ab41b59cb406660a825ed3fe89d7223823df" +checksum = "717ab601ece7921f59fe86849dbe27d44a46ebb883b5885732c4f30df4996177" dependencies = [ "bitflags", "bstr", @@ -1532,9 +1532,9 @@ dependencies = [ [[package]] name = "gix-odb" -version = "0.43.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa63fce01e5bce663bb24ad01fa2b77266e91b1d1982aab3f67cb0aed8af8169" +checksum = "e83af2e3e36005bfe010927f0dff41fb5acc3e3d89c6f1174135b3a34086bda2" dependencies = [ "arc-swap", "gix-features", @@ -1572,9 +1572,9 @@ dependencies = [ [[package]] name = "gix-path" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6c104a66dec149cb8f7aaafc6ab797654cf82d67f050fd0cb7e7294e328354b" +checksum = "32370dce200bb951df013e03dff35b4233fc7a89458642b047629b91734a7e19" dependencies = [ "bstr", "thiserror", @@ -1606,9 +1606,9 @@ dependencies = [ [[package]] name = "gix-ref" -version = "0.27.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0949e07aa4ed00a5936c2f4529013540708f367906f542cf19db814957e80449" +checksum = "e4e909396ed3b176823991ccc391c276ae2a015e54edaafa3566d35123cfac9d" dependencies = [ "gix-actor", "gix-features", @@ -1716,9 +1716,9 @@ dependencies = [ [[package]] name = "gix-worktree" -version = "0.15.1" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27248bdec2c90cd2d40fc43bb7d782b26bacc53d9248e78c32ac8268044aebbe" +checksum = "54ec9a000b4f24af706c3cc680c7cda235656cbe3216336522f5692773b8a301" dependencies = [ "bstr", "gix-attributes", @@ -1900,16 +1900,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.54" +version = "0.1.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c17cc76786e99f8d2f055c11159e7f0091c42474dcc3189fbab96072e873e6d" +checksum = "716f12fbcfac6ffab0a5e9ec51d0a0ff70503742bb2dc7b99396394c9dc323f0" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows 0.46.0", + "windows 0.47.0", ] [[package]] @@ -1991,14 +1991,14 @@ checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] name = "ipnet" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" +checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" [[package]] name = "itertools" @@ -2156,9 +2156,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.1.4" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +checksum = "cd550e73688e6d578f0ac2119e32b797a327631a42f9433e59d02e139c8df60d" [[package]] name = "lock_api" @@ -2282,7 +2282,7 @@ dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -2470,9 +2470,9 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -2611,6 +2611,16 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "prettyplease" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" +dependencies = [ + "proc-macro2", + "syn 1.0.109", +] + [[package]] name = "proc-macro-hack" version = "0.5.20+deprecated" @@ -2861,6 +2871,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -2868,7 +2887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", - "redox_syscall", + "redox_syscall 0.2.16", "thiserror", ] @@ -2906,9 +2925,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.15" +version = "0.11.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ba30cc2c0cd02af1222ed216ba659cdb2f879dfe3181852fe7c50b1d0005949" +checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" dependencies = [ "base64", "bytes", @@ -2990,16 +3009,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.11" +version = "0.37.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4165c9963ab29e422d6c26fbc1d37f15bace6b2810221f9d925023480fcf0e" +checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -3122,22 +3141,22 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.158" +version = "1.0.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "771d4d9c4163ee138805e12c710dd365e4f44be8be0503cb1bb9eb989425d9c9" +checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.158" +version = "1.0.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e801c1712f48475582b7696ac71e0ca34ebb30e09338425384269d9717c62cad" +checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.11", ] [[package]] @@ -3153,9 +3172,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" +checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" dependencies = [ "itoa", "ryu", @@ -3347,12 +3366,13 @@ dependencies = [ [[package]] name = "substrait" -version = "0.5.2" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "522c38065c57c4e5b03354157bbe8272b42fbf3af6abff304fad10669436b1fb" +checksum = "58dbeec746a1da14b7af982eabbd18031563f418fe4217612bc1a4cab5961259" dependencies = [ "gix", "heck 0.4.1", + "prettyplease", "prost 0.11.8", "prost-build 0.11.8", "prost-types 0.11.8", @@ -3361,6 +3381,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", + "syn 1.0.109", "typify", "walkdir", ] @@ -3384,9 +3405,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.10" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aad1363ed6d37b84299588d62d3a7d95b5a5c2d9aad5c85609fda12afaa1f40" +checksum = "21e3787bb71465627110e7d87ed4faaa36c1f61042ee67badb9e2ef173accc40" dependencies = [ "proc-macro2", "quote", @@ -3401,15 +3422,15 @@ checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5" [[package]] name = "tempfile" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", + "redox_syscall 0.3.5", "rustix", - "windows-sys 0.42.0", + "windows-sys", ] [[package]] @@ -3438,7 +3459,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.10", + "syn 2.0.11", ] [[package]] @@ -3518,32 +3539,31 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.26.0" +version = "1.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03201d01c3c27a29c8a5cee5b55a93ddae1ccf6f08f65365c2c918f8c1b76f64" +checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" dependencies = [ "autocfg", "bytes", "libc", - "memchr", "mio", "num_cpus", "parking_lot", "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] name = "tokio-macros" -version = "1.8.2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.11", ] [[package]] @@ -3998,61 +4018,61 @@ version = "0.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04662ed0e3e5630dfa9b26e4cb823b817f1a9addda855d973a9458c236556244" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] name = "windows" -version = "0.46.0" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25" +checksum = "2649ff315bee4c98757f15dac226efe3d81927adbb6e882084bb1ee3e0c330a7" dependencies = [ - "windows-targets", + "windows-targets 0.47.0", ] [[package]] name = "windows-sys" -version = "0.42.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows-targets 0.42.2", ] [[package]] -name = "windows-sys" -version = "0.45.0" +name = "windows-targets" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows-targets", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] name = "windows-targets" -version = "0.42.2" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "2f8996d3f43b4b2d44327cd71b7b0efd1284ab60e6e9d0e8b630e18555d87d3e" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.47.0", + "windows_aarch64_msvc 0.47.0", + "windows_i686_gnu 0.47.0", + "windows_i686_msvc 0.47.0", + "windows_x86_64_gnu 0.47.0", + "windows_x86_64_gnullvm 0.47.0", + "windows_x86_64_msvc 0.47.0", ] [[package]] @@ -4061,42 +4081,84 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831d567d53d4f3cb1db332b68e6e2b6260228eb4d99a777d8b2e8ed794027c90" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a42d54a417c60ce4f0e31661eed628f0fa5aca73448c093ec4d45fab4c51cdf" + [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1925beafdbb22201a53a483db861a5644123157c1c3cee83323a2ed565d71e3" + [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8ef8f2f1711b223947d9b69b596cf5a4e452c930fb58b6fc3fdae7d0ec6b31" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7acaa0c2cf0d2ef99b61c308a0c3dbae430a51b7345dedec470bd8f53f5a3642" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a0628f71be1d11e17ca4a0e9e15b3a5180f6fbf1c2d55e3ba3f850378052c1" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6e62c256dc6d40b8c8707df17df8d774e60e39db723675241e7c15e910bce7" + [[package]] name = "winreg" version = "0.10.1" From 48f759b5355d4f7ceeb46b615252e62fb55d0d3a Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 31 Mar 2023 09:27:23 -0600 Subject: [PATCH 008/413] DataFusion bump to use valid typify instance for build with RustFMT (#307) * DataFusion bump to use valid typify instance for build with RustFMT * specify syn version --------- Co-authored-by: Jeremy Dyer --- Cargo.lock | 619 +++++++++++++++++------------------------------------ Cargo.toml | 13 +- 2 files changed, 200 insertions(+), 432 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dab3530b4..86e323375 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,69 +113,52 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f410d3907b6b3647b9e7bca4551274b2e3d716aa940afb67b7287257401da921" +checksum = "990dfa1a9328504aa135820da1c95066537b69ad94c04881b785f64328e0fa6b" dependencies = [ "ahash", "arrow-arith", - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", + "arrow-array", + "arrow-buffer", "arrow-cast", "arrow-csv", - "arrow-data 34.0.0", + "arrow-data", "arrow-ipc", "arrow-json", "arrow-ord", "arrow-row", - "arrow-schema 34.0.0", + "arrow-schema", "arrow-select", "arrow-string", - "comfy-table", "pyo3", ] [[package]] name = "arrow-arith" -version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f87391cf46473c9bc53dab68cb8872c3a81d4dfd1703f1c8aa397dba9880a043" -dependencies = [ - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", - "chrono", - "half", - "num", -] - -[[package]] -name = "arrow-array" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d35d5475e65c57cffba06d0022e3006b677515f99b54af33a7cd54f6cdd4a5b5" +checksum = "f2b2e52de0ab54173f9b08232b7184c26af82ee7ab4ac77c83396633c90199fa" dependencies = [ - "ahash", - "arrow-buffer 34.0.0", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "half", - "hashbrown 0.13.2", "num", ] [[package]] name = "arrow-array" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43489bbff475545b78b0e20bde1d22abd6c99e54499839f9e815a2fa5134a51b" +checksum = "e10849b60c17dbabb334be1f4ef7550701aa58082b71335ce1ed586601b2f423" dependencies = [ "ahash", - "arrow-buffer 35.0.0", - "arrow-data 35.0.0", - "arrow-schema 35.0.0", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "chrono-tz", "half", @@ -185,19 +168,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b4ec72eda7c0207727df96cf200f539749d736b21f3e782ece113e18c1a0a7" -dependencies = [ - "half", - "num", -] - -[[package]] -name = "arrow-buffer" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3759e4a52c593281184787af5435671dc8b1e78333e5a30242b2e2d6e3c9d1f" +checksum = "b0746ae991b186be39933147117f8339eb1c4bbbea1c8ad37e7bf5851a1a06ba" dependencies = [ "half", "num", @@ -205,31 +178,32 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a7285272c9897321dfdba59de29f5b05aeafd3cdedf104a941256d155f6d304" +checksum = "b88897802515d7b193e38b27ddd9d9e43923d410a9e46307582d756959ee9595" dependencies = [ - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "arrow-select", "chrono", + "comfy-table", "lexical-core", "num", ] [[package]] name = "arrow-csv" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "981ee4e7f6a120da04e00d0b39182e1eeacccb59c8da74511de753c56b7fddf7" +checksum = "1c8220d9741fc37961262710ceebd8451a5b393de57c464f0267ffdda1775c0a" dependencies = [ - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", + "arrow-array", + "arrow-buffer", "arrow-cast", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-data", + "arrow-schema", "chrono", "csv", "csv-core", @@ -240,53 +214,41 @@ dependencies = [ [[package]] name = "arrow-data" -version = "34.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27cc673ee6989ea6e4b4e8c7d461f7e06026a096c8f0b1a7288885ff71ae1e56" -dependencies = [ - "arrow-buffer 34.0.0", - "arrow-schema 34.0.0", - "half", - "num", -] - -[[package]] -name = "arrow-data" -version = "35.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19c7787c6cdbf9539b1ffb860bfc18c5848926ec3d62cbd52dc3b1ea35c874fd" +checksum = "533f937efa1aaad9dc86f6a0e382c2fa736a4943e2090c946138079bdf060cef" dependencies = [ - "arrow-buffer 35.0.0", - "arrow-schema 35.0.0", + "arrow-buffer", + "arrow-schema", "half", "num", ] [[package]] name = "arrow-ipc" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e37b8b69d9e59116b6b538e8514e0ec63a30f08b617ce800d31cb44e3ef64c1a" +checksum = "18b75296ff01833f602552dff26a423fc213db8e5049b540ca4a00b1c957e41c" dependencies = [ - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", + "arrow-array", + "arrow-buffer", "arrow-cast", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-data", + "arrow-schema", "flatbuffers", ] [[package]] name = "arrow-json" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80c3fa0bed7cfebf6d18e46b733f9cb8a1cb43ce8e6539055ca3e1e48a426266" +checksum = "e501d3de4d612c90677594896ca6c0fa075665a7ff980dc4189bb531c17e19f6" dependencies = [ - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", + "arrow-array", + "arrow-buffer", "arrow-cast", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-data", + "arrow-schema", "chrono", "half", "indexmap", @@ -297,71 +259,66 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d247dce7bed6a8d6a3c6debfa707a3a2f694383f0c692a39d736a593eae5ef94" +checksum = "33d2671eb3793f9410230ac3efb0e6d36307be8a2dac5fad58ac9abde8e9f01e" dependencies = [ - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "arrow-select", + "half", "num", ] [[package]] name = "arrow-row" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d609c0181f963cea5c70fddf9a388595b5be441f3aa1d1cdbf728ca834bbd3a" +checksum = "fc11fa039338cebbf4e29cf709c8ac1d6a65c7540063d4a25f991ab255ca85c8" dependencies = [ "ahash", - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "half", "hashbrown 0.13.2", ] [[package]] name = "arrow-schema" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64951898473bfb8e22293e83a44f02874d2257514d49cd95f9aa4afcff183fbc" +checksum = "d04f17f7b86ded0b5baf98fe6123391c4343e031acc3ccc5fa604cc180bff220" dependencies = [ - "bitflags", + "bitflags 2.0.2", ] -[[package]] -name = "arrow-schema" -version = "35.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf6b26f6a6f8410e3b9531cbd1886399b99842701da77d4b4cf2013f7708f20f" - [[package]] name = "arrow-select" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a513d89c2e1ac22b28380900036cf1f3992c6443efc5e079de631dcf83c6888" +checksum = "163e35de698098ff5f5f672ada9dc1f82533f10407c7a11e2cd09f3bcf31d18a" dependencies = [ - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "num", ] [[package]] name = "arrow-string" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5288979b2705dae1114c864d73150629add9153b9b8f1d7ee3963db94c372ba5" +checksum = "bfdfbed1b10209f0dc68e6aa4c43dc76079af65880965c7c3b73f641f23d4aba" dependencies = [ - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", - "arrow-data 34.0.0", - "arrow-schema 34.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "arrow-select", "regex", "regex-syntax", @@ -393,7 +350,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.12", ] [[package]] @@ -404,7 +361,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.12", ] [[package]] @@ -425,6 +382,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487f1e0fcbe47deb8b0574e646def1c903389d95241dd1bbcc6ce4a715dfc0c1" + [[package]] name = "blake2" version = "0.10.6" @@ -731,7 +694,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.11", + "syn 2.0.12", ] [[package]] @@ -748,7 +711,7 @@ checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.12", ] [[package]] @@ -767,8 +730,7 @@ dependencies = [ [[package]] name = "datafusion" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c187589ce9ddf0bbc90e2e3dc0a89b90cc3d4bfdeefc7cf2aaa8ac15f7725811" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ "ahash", "apache-avro", @@ -817,12 +779,11 @@ dependencies = [ [[package]] name = "datafusion-common" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecbbfb88a799beca6a361c1282795f0f185b96201dab496d733a49bdf4684f7f" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ "apache-avro", "arrow", - "arrow-array 35.0.0", + "arrow-array", "chrono", "num_cpus", "object_store", @@ -834,8 +795,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73a38825b879024a87937b3b5ea8e43287ab3432db8786a2839dcbf141b6d938" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ "dashmap", "datafusion-common", @@ -852,8 +812,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05454741d8496faf9f433a666e97ce693807e8374e0fd513eda5a8218ba8456d" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ "ahash", "arrow", @@ -864,8 +823,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5d551c428b8557790cceecb59615f624b24dddf60b4d843c5994f8120b48c7f" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ "arrow", "async-trait", @@ -882,13 +840,13 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08aa1047edf92d59f97b18dfbb1cade176a970b1a98b0a27f909409ceb05906e" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ "ahash", "arrow", - "arrow-buffer 34.0.0", - "arrow-schema 34.0.0", + "arrow-array", + "arrow-buffer", + "arrow-schema", "blake2", "blake3", "chrono", @@ -928,6 +886,7 @@ dependencies = [ "pyo3", "rand", "regex-syntax", + "syn 2.0.12", "tokio", "url", "uuid", @@ -936,8 +895,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fc83ac8761c251617c1b7e1122adf79ebbf215ecabc4e2346cda1c4307d5152" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ "arrow", "datafusion-common", @@ -948,10 +906,9 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46d6cbfa8c6ac06202badbac6e4675c33b91d299f711a4fee23327b83906e2ee" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ - "arrow-schema 34.0.0", + "arrow-schema", "datafusion-common", "datafusion-expr", "log", @@ -961,16 +918,14 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "21.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92afff2fc078214072992424e56e081fbe4b80c639f037f3f1916a8d6d28c25d" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" dependencies = [ "async-recursion", "chrono", "datafusion", "itertools", "object_store", - "prost 0.11.8", - "prost-build 0.9.0", + "prost", "substrait", "tokio", ] @@ -1093,7 +1048,7 @@ version = "23.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619" dependencies = [ - "bitflags", + "bitflags 1.3.2", "rustc_version", ] @@ -1124,9 +1079,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531ac96c6ff5fd7c62263c5e3c67a603af4fcaee2e1a0ae5565ba3a11e69e549" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" dependencies = [ "futures-channel", "futures-core", @@ -1139,9 +1094,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "164713a5a0dcc3e7b4b1ed7d3b433cabc18025386f9339346e8daf15963cf7ac" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -1149,15 +1104,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d7a0c1aa76363dac491de0ee99faf6941128376f1cf96f07db7603b7de69dd" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-executor" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1997dd9df74cdac935c76252744c1ed5794fac083242ea4fe77ef3ed60ba0f83" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" dependencies = [ "futures-core", "futures-task", @@ -1166,38 +1121,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d422fa3cbe3b40dca574ab087abb5bc98258ea57eea3fd6f1fa7162c778b91" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" [[package]] name = "futures-macro" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3eb14ed937631bd8b8b8977f2c198443447a8355b6e3ca599f38c975e5a963b6" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.12", ] [[package]] name = "futures-sink" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec93083a4aecafb2a80a885c9de1f0ccae9dbd32c2bb54b0c3a65690e0b8d2f2" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd65540d33b37b16542a0438c12e6aeead10d4ac5d05bd3f805b8f35ab592879" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-util" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ef6b17e481503ec85211fed8f39d1970f128935ca1f814cd32ac4a6842e84ab" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-channel", "futures-core", @@ -1357,7 +1312,7 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d09154c0c8677e4da0ec35e896f56ee3e338e741b9599fae06075edd83a4081c" dependencies = [ - "bitflags", + "bitflags 1.3.2", "bstr", "gix-path", "libc", @@ -1442,7 +1397,7 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93e43efd776bc543f46f0fd0ca3d920c37af71a764a16f2aebd89765e9ff2993" dependencies = [ - "bitflags", + "bitflags 1.3.2", "bstr", ] @@ -1473,7 +1428,7 @@ version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "717ab601ece7921f59fe86849dbe27d44a46ebb883b5885732c4f30df4996177" dependencies = [ - "bitflags", + "bitflags 1.3.2", "bstr", "btoi", "filetime", @@ -1657,7 +1612,7 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8ffa5bf0772f9b01de501c035b6b084cf9b8bb07dec41e3afc6a17336a65f47" dependencies = [ - "bitflags", + "bitflags 1.3.2", "dirs", "gix-path", "libc", @@ -1782,15 +1737,6 @@ dependencies = [ "ahash", ] -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "heck" version = "0.4.1" @@ -1900,16 +1846,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.55" +version = "0.1.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "716f12fbcfac6ffab0a5e9ec51d0a0ff70503742bb2dc7b99396394c9dc323f0" +checksum = "0c17cc76786e99f8d2f055c11159e7f0091c42474dcc3189fbab96072e873e6d" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows 0.47.0", + "windows 0.46.0", ] [[package]] @@ -2156,9 +2102,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd550e73688e6d578f0ac2119e32b797a327631a42f9433e59d02e139c8df60d" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" [[package]] name = "lock_api" @@ -2297,7 +2243,7 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if", "libc", "static_assertions", @@ -2477,17 +2423,17 @@ dependencies = [ [[package]] name = "parquet" -version = "34.0.0" +version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ac135ecf63ebb5f53dda0921b0b76d6048b3ef631a5f4760b9e8f863ff00cfa" +checksum = "321a15f8332645759f29875b07f8233d16ed8ec1b3582223de81625a9f8506b7" dependencies = [ "ahash", - "arrow-array 34.0.0", - "arrow-buffer 34.0.0", + "arrow-array", + "arrow-buffer", "arrow-cast", - "arrow-data 34.0.0", + "arrow-data", "arrow-ipc", - "arrow-schema 34.0.0", + "arrow-schema", "arrow-select", "base64", "brotli", @@ -2529,16 +2475,6 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" -[[package]] -name = "pest" -version = "2.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cbd939b234e95d72bc393d51788aec68aeeb5d51e748ca08ff3aad58cb722f7" -dependencies = [ - "thiserror", - "ucd-trie", -] - [[package]] name = "petgraph" version = "0.6.3" @@ -2613,12 +2549,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.1.25" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" +checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" dependencies = [ "proc-macro2", - "syn 1.0.109", + "syn 2.0.12", ] [[package]] @@ -2642,16 +2578,6 @@ version = "23.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9516b775656bc3e8985e19cd4b8c0c0de045095074e453d2c0a513b5f978392d" -[[package]] -name = "prost" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "444879275cb4fd84958b1a1d5420d15e6fcf7c235fe47f053c9c2a80aceb6001" -dependencies = [ - "bytes", - "prost-derive 0.9.0", -] - [[package]] name = "prost" version = "0.11.8" @@ -2659,27 +2585,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48e50df39172a3e7eb17e14642445da64996989bc212b583015435d39a58537" dependencies = [ "bytes", - "prost-derive 0.11.8", -] - -[[package]] -name = "prost-build" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62941722fb675d463659e49c4f3fe1fe792ff24fe5bbaa9c08cd3b98a1c354f5" -dependencies = [ - "bytes", - "heck 0.3.3", - "itertools", - "lazy_static", - "log", - "multimap", - "petgraph", - "prost 0.9.0", - "prost-types 0.9.0", - "regex", - "tempfile", - "which", + "prost-derive", ] [[package]] @@ -2689,32 +2595,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c828f93f5ca4826f97fedcbd3f9a536c16b12cff3dbbb4a007f932bbad95b12" dependencies = [ "bytes", - "heck 0.4.1", + "heck", "itertools", "lazy_static", "log", "multimap", "petgraph", - "prost 0.11.8", - "prost-types 0.11.8", + "prost", + "prost-types", "regex", "tempfile", "which", ] -[[package]] -name = "prost-derive" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9cc1a3263e07e0bf68e96268f37665207b49560d98739662cdfaae215c720fe" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "prost-derive" version = "0.11.8" @@ -2728,23 +2621,13 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "prost-types" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534b7a0e836e3c482d2693070f982e39e7611da9695d4d1f5a4b186b51faef0a" -dependencies = [ - "bytes", - "prost 0.9.0", -] - [[package]] name = "prost-types" version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "379119666929a1afd7a043aa6cf96fa67a6dce9af60c88095a4686dbce4c9c88" dependencies = [ - "prost 0.11.8", + "prost", ] [[package]] @@ -2868,7 +2751,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -2877,7 +2760,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -2916,10 +2799,11 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regress" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a92ff21fe8026ce3f2627faaf43606f0b67b014dbc9ccf027181a804f75d92e" +checksum = "d995d590bd8ec096d1893f414bf3f5e8b0ee4c9eed9a5642b9766ef2c8e2e8e9" dependencies = [ + "hashbrown 0.13.2", "memchr", ] @@ -2991,20 +2875,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver 1.0.17", -] - -[[package]] -name = "rustfmt-wrapper" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed729e3bee08ec2befd593c27e90ca9fdd25efdc83c94c3b82eaef16e4f7406e" -dependencies = [ - "serde", - "tempfile", - "thiserror", - "toml", - "toolchain_find", + "semver", ] [[package]] @@ -3013,7 +2884,7 @@ version = "0.37.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", @@ -3109,30 +2980,12 @@ dependencies = [ "untrusted", ] -[[package]] -name = "semver" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" -dependencies = [ - "semver-parser", -] - [[package]] name = "semver" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" -[[package]] -name = "semver-parser" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" -dependencies = [ - "pest", -] - [[package]] name = "seq-macro" version = "0.3.3" @@ -3156,7 +3009,7 @@ checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.12", ] [[package]] @@ -3290,7 +3143,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "475b3bbe5245c26f2d8a6f62d67c1f30eb9fffeccee721c45d162c3ebbdf81b2" dependencies = [ - "heck 0.4.1", + "heck", "proc-macro2", "quote", "syn 1.0.109", @@ -3357,7 +3210,7 @@ version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ - "heck 0.4.1", + "heck", "proc-macro2", "quote", "rustversion", @@ -3366,22 +3219,22 @@ dependencies = [ [[package]] name = "substrait" -version = "0.5.4" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58dbeec746a1da14b7af982eabbd18031563f418fe4217612bc1a4cab5961259" +checksum = "a94bab2309478886b270dbace5adb3927d8e397aa804b67c5c37da8591dff989" dependencies = [ "gix", - "heck 0.4.1", + "heck", "prettyplease", - "prost 0.11.8", - "prost-build 0.11.8", - "prost-types 0.11.8", + "prost", + "prost-build", + "prost-types", "schemars", - "semver 1.0.17", + "semver", "serde", "serde_json", "serde_yaml", - "syn 1.0.109", + "syn 2.0.12", "typify", "walkdir", ] @@ -3405,9 +3258,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e3787bb71465627110e7d87ed4faaa36c1f61042ee67badb9e2ef173accc40" +checksum = "79d9531f94112cfc3e4c8f5f02cb2b58f72c97b7efd85f70203cc6d8efda5927" dependencies = [ "proc-macro2", "quote", @@ -3459,7 +3312,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.12", ] [[package]] @@ -3563,7 +3416,7 @@ checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.11", + "syn 2.0.12", ] [[package]] @@ -3602,28 +3455,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - -[[package]] -name = "toolchain_find" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e85654a10e7a07a47c6f19d93818f3f343e22927f2fa280c84f7c8042743413" -dependencies = [ - "home", - "lazy_static", - "regex", - "semver 0.11.0", - "walkdir", -] - [[package]] name = "tower-service" version = "0.3.2" @@ -3697,9 +3528,9 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" -version = "0.0.10" +version = "0.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e8486352f3c946e69f983558cfc09b295250b01e01b381ec67a05a812d01d63" +checksum = "30bfde96849e25d7feef1bbf652e9cfc51deb63203fdc07b115b8bc3bcfe20b9" dependencies = [ "typify-impl", "typify-macro", @@ -3707,16 +3538,15 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.10" +version = "0.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7624d0b911df6e2bbf34a236f76281f93b294cdde1d4df1dbdb748e5a7fefa5" +checksum = "95d27d749378ceab6ec22188ed7ad102205c89ddb92ab662371c850ffc71aa1a" dependencies = [ - "heck 0.4.1", + "heck", "log", "proc-macro2", "quote", "regress", - "rustfmt-wrapper", "schemars", "serde_json", "syn 1.0.109", @@ -3726,9 +3556,9 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.0.10" +version = "0.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c42802aa033cee7650a4e1509ba7d5848a56f84be7c4b31e4385ee12445e942" +checksum = "35db6fc2bd9220ecdac6eeb88158824b83610de3dda0c6d0f2142b49efd858b0" dependencies = [ "proc-macro2", "quote", @@ -3740,12 +3570,6 @@ dependencies = [ "typify-impl", ] -[[package]] -name = "ucd-trie" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" - [[package]] name = "unicode-bidi" version = "0.3.13" @@ -4018,22 +3842,22 @@ version = "0.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04662ed0e3e5630dfa9b26e4cb823b817f1a9addda855d973a9458c236556244" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] name = "windows" -version = "0.47.0" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2649ff315bee4c98757f15dac226efe3d81927adbb6e882084bb1ee3e0c330a7" +checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25" dependencies = [ - "windows-targets 0.47.0", + "windows-targets", ] [[package]] @@ -4042,7 +3866,7 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets 0.42.2", + "windows-targets", ] [[package]] @@ -4051,28 +3875,13 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - -[[package]] -name = "windows-targets" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f8996d3f43b4b2d44327cd71b7b0efd1284ab60e6e9d0e8b630e18555d87d3e" -dependencies = [ - "windows_aarch64_gnullvm 0.47.0", - "windows_aarch64_msvc 0.47.0", - "windows_i686_gnu 0.47.0", - "windows_i686_msvc 0.47.0", - "windows_x86_64_gnu 0.47.0", - "windows_x86_64_gnullvm 0.47.0", - "windows_x86_64_msvc 0.47.0", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] @@ -4081,84 +3890,42 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "831d567d53d4f3cb1db332b68e6e2b6260228eb4d99a777d8b2e8ed794027c90" - [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" -[[package]] -name = "windows_aarch64_msvc" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a42d54a417c60ce4f0e31661eed628f0fa5aca73448c093ec4d45fab4c51cdf" - [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" -[[package]] -name = "windows_i686_gnu" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1925beafdbb22201a53a483db861a5644123157c1c3cee83323a2ed565d71e3" - [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" -[[package]] -name = "windows_i686_msvc" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8ef8f2f1711b223947d9b69b596cf5a4e452c930fb58b6fc3fdae7d0ec6b31" - [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" -[[package]] -name = "windows_x86_64_gnu" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7acaa0c2cf0d2ef99b61c308a0c3dbae430a51b7345dedec470bd8f53f5a3642" - [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a0628f71be1d11e17ca4a0e9e15b3a5180f6fbf1c2d55e3ba3f850378052c1" - [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" -[[package]] -name = "windows_x86_64_msvc" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d6e62c256dc6d40b8c8707df17df8d774e60e39db723675241e7c15e910bce7" - [[package]] name = "winreg" version = "0.10.1" diff --git a/Cargo.toml b/Cargo.toml index 75f458d7e..ddabb2b8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,12 +35,12 @@ default = ["mimalloc"] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { version = "21.0.0", features = ["pyarrow", "avro"]} -datafusion-common = { version = "21.0.0", features = ["pyarrow"]} -datafusion-expr = { version = "21.0.0" } -datafusion-optimizer = { version = "21.0.0" } -datafusion-sql = { version = "21.0.0" } -datafusion-substrait = { version = "21.0.0" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad", features = ["pyarrow", "avro"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad", features = ["pyarrow"] } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad" } +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad" } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad" } uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" @@ -48,6 +48,7 @@ futures = "0.3" object_store = { version = "0.5.3", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.6.28" +syn = "2.0.11" url = "2.2" [lib] From 5b023942c68b6e2d08ab86975fba7c2b9bf69dda Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 31 Mar 2023 11:12:17 -0600 Subject: [PATCH 009/413] fix invalid build yaml (#308) --- .github/workflows/build.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fc8910766..e57696f8b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -105,8 +105,6 @@ jobs: - name: Build wheels uses: PyO3/maturin-action@v1 with: - env: - RUST_BACKTRACE: 1 rust-toolchain: nightly target: x86_64 manylinux: auto From 9688d57417f7dd17535cac84cc20b6cb4c6c47e3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 31 Mar 2023 13:16:00 -0600 Subject: [PATCH 010/413] Try fix release build (#309) * Fix broken use of environment variables in GitHub actions * Fix broken workflow file and add actionlint pre-commit check to prevent future errors * Install protoc * Add rustup-components * fix maturin-action bugs * add explanation * add protoc to sdist and manylinux * Update .github/workflows/build.yml Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> * install protobuf compiler * add protoc * fix invalid build yaml * try set protoc path * try suggestion from ChatGPT * experiment * revert change --------- Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Co-authored-by: Ian Alexander Joiner <14581281+iajoiner@users.noreply.github.com> --- .github/workflows/build.yml | 31 +++++++++++++++++++++++++++++++ .github/workflows/docs.yaml | 4 ++-- .pre-commit-config.yaml | 4 ++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e57696f8b..a6541f775 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -60,6 +60,12 @@ jobs: with: toolchain: stable + - name: Upgrade pip + run: python -m pip install --upgrade pip + + - name: Install maturin + run: pip install maturin==0.14.2 + - run: rm LICENSE.txt - name: Download LICENSE.txt uses: actions/download-artifact@v3 @@ -67,12 +73,21 @@ jobs: name: python-wheel-license path: . + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Build Python package run: maturin build --release --strip - name: List Windows wheels if: matrix.os == 'windows-latest' run: dir target\wheels\ + # since the runner is dynamic shellcheck (from actionlint) can't infer this is powershell + # so we specify it explicitly + shell: powershell - name: List Mac wheels if: matrix.os != 'windows-latest' @@ -97,17 +112,25 @@ jobs: name: python-wheel-license path: . - run: cat LICENSE.txt + - run: sudo apt-get install protobuf-compiler - name: Install Protoc uses: arduino/setup-protoc@v1 with: version: '3.x' repo-token: ${{ secrets.GITHUB_TOKEN }} + - run: echo "PROTOC=$(which protoc)" >> "$GITHUB_ENV" + - name: Confirm PROTOC is set + - run: echo $PROTOC - name: Build wheels uses: PyO3/maturin-action@v1 + env: + RUST_BACKTRACE: 1 + PROTOC: /opt/hostedtoolcache/protoc/3.20.3/x64/bin/protoc with: rust-toolchain: nightly target: x86_64 manylinux: auto + rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153 args: --release --manylinux 2014 - name: Archive wheels uses: actions/upload-artifact@v3 @@ -128,11 +151,19 @@ jobs: name: python-wheel-license path: . - run: cat LICENSE.txt + - run: sudo apt-get install protobuf-compiler + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' + repo-token: ${{ secrets.GITHUB_TOKEN }} + - run: echo "PROTOC=$(which protoc)" >> "$GITHUB_ENV" - name: Build sdist uses: PyO3/maturin-action@v1 with: rust-toolchain: stable manylinux: auto + rustup-components: rust-std rustfmt args: --release --sdist --out dist - name: Archive wheels uses: actions/upload-artifact@v3 diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 170f77a19..bef42d538 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -17,9 +17,9 @@ jobs: run: | set -x if test '${{ github.ref }}' = 'refs/heads/main'; then - echo "value=asf-staging" >> $GITHUB_OUTPUT + echo "value=asf-staging" >> "$GITHUB_OUTPUT" elif test '${{ github.ref_type }}' = 'tag'; then - echo "value=asf-site" >> $GITHUB_OUTPUT + echo "value=asf-site" >> "$GITHUB_OUTPUT" else echo "Unsupported input: ${{ github.ref }} / ${{ github.ref_type }}" exit 1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c6805322..39049bf49 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,6 +16,10 @@ # under the License. repos: + - repo: https://github.com/rhysd/actionlint + rev: v1.6.23 + hooks: + - id: actionlint-docker - repo: https://github.com/psf/black rev: 22.3.0 hooks: From 503647c4be6caa96d2ec05e63aee12616bdf8ed8 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 31 Mar 2023 14:34:26 -0600 Subject: [PATCH 011/413] Fix release build (#310) --- .github/workflows/build.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a6541f775..115648b07 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -113,19 +113,20 @@ jobs: path: . - run: cat LICENSE.txt - run: sudo apt-get install protobuf-compiler - - name: Install Protoc - uses: arduino/setup-protoc@v1 - with: - version: '3.x' - repo-token: ${{ secrets.GITHUB_TOKEN }} - - run: echo "PROTOC=$(which protoc)" >> "$GITHUB_ENV" - - name: Confirm PROTOC is set - - run: echo $PROTOC + - name: Install protoc + run: | + PROTOC_VERSION=3.12.4 + curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-${PROTOC_OS_ARCH}.zip" + unzip -d "${GITHUB_WORKSPACE}" "protoc-${PROTOC_VERSION}-${PROTOC_OS_ARCH}.zip" bin/protoc + chmod +x "${GITHUB_WORKSPACE}/bin/protoc" + ${GITHUB_WORKSPACE}/bin/protoc --version + export PATH=${GITHUB_WORKSPACE}/bin/:$PATH + which protoc + echo "::set-env name=PATH::$PATH" - name: Build wheels uses: PyO3/maturin-action@v1 env: RUST_BACKTRACE: 1 - PROTOC: /opt/hostedtoolcache/protoc/3.20.3/x64/bin/protoc with: rust-toolchain: nightly target: x86_64 From 5eb0161d581ad1e6d274206e26d58848057c54e2 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 3 Apr 2023 12:42:48 -0600 Subject: [PATCH 012/413] Enable datafusion-substrait protoc feature, to remove compile-time dependency on protoc (#312) --- .github/workflows/build.yml | 35 ++---- Cargo.lock | 212 ++++++++++++++++++++++++------------ Cargo.toml | 12 +- 3 files changed, 161 insertions(+), 98 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 115648b07..5d3199432 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,6 +52,17 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Setup Cpp + uses: aminya/setup-cpp@v1 + with: + compiler: gcc + vcvarsall: ${{ contains(matrix.os, 'windows') }} + cmake: true + ninja: true + vcpkg: true + cppcheck: true + clangtidy: true + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -73,12 +84,6 @@ jobs: name: python-wheel-license path: . - - name: Install Protoc - uses: arduino/setup-protoc@v1 - with: - version: '3.x' - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Build Python package run: maturin build --release --strip @@ -112,17 +117,6 @@ jobs: name: python-wheel-license path: . - run: cat LICENSE.txt - - run: sudo apt-get install protobuf-compiler - - name: Install protoc - run: | - PROTOC_VERSION=3.12.4 - curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-${PROTOC_OS_ARCH}.zip" - unzip -d "${GITHUB_WORKSPACE}" "protoc-${PROTOC_VERSION}-${PROTOC_OS_ARCH}.zip" bin/protoc - chmod +x "${GITHUB_WORKSPACE}/bin/protoc" - ${GITHUB_WORKSPACE}/bin/protoc --version - export PATH=${GITHUB_WORKSPACE}/bin/:$PATH - which protoc - echo "::set-env name=PATH::$PATH" - name: Build wheels uses: PyO3/maturin-action@v1 env: @@ -152,13 +146,6 @@ jobs: name: python-wheel-license path: . - run: cat LICENSE.txt - - run: sudo apt-get install protobuf-compiler - - name: Install Protoc - uses: arduino/setup-protoc@v1 - with: - version: '3.x' - repo-token: ${{ secrets.GITHUB_TOKEN }} - - run: echo "PROTOC=$(which protoc)" >> "$GITHUB_ENV" - name: Build sdist uses: PyO3/maturin-action@v1 with: diff --git a/Cargo.lock b/Cargo.lock index 86e323375..5d5196067 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -350,7 +350,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -361,7 +361,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -370,6 +370,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "autotools" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8da1805e028a172334c3b680f93e71126f2327622faef2ec3d893c0a4ad77" +dependencies = [ + "cc", +] + [[package]] name = "base64" version = "0.21.0" @@ -611,9 +620,9 @@ checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" @@ -694,7 +703,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -711,7 +720,7 @@ checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -729,8 +738,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "ahash", "apache-avro", @@ -778,8 +787,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "apache-avro", "arrow", @@ -794,8 +803,8 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "dashmap", "datafusion-common", @@ -811,8 +820,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "ahash", "arrow", @@ -822,8 +831,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "arrow", "async-trait", @@ -839,8 +848,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "ahash", "arrow", @@ -886,7 +895,7 @@ dependencies = [ "pyo3", "rand", "regex-syntax", - "syn 2.0.12", + "syn 2.0.13", "tokio", "url", "uuid", @@ -894,8 +903,8 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "arrow", "datafusion-common", @@ -905,8 +914,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "arrow-schema", "datafusion-common", @@ -917,8 +926,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "21.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=c09edad#c09edade14d456f1d2161c5ebb8c1e51e592a8ef" +version = "21.1.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" dependencies = [ "async-recursion", "chrono", @@ -1133,7 +1142,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -1594,9 +1603,9 @@ dependencies = [ [[package]] name = "gix-revision" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b12fc4bbc3161a5b2d68079fce93432cef8771ff88ca017abb01187fddfc41a1" +checksum = "3c6f6ff53f888858afc24bf12628446a14279ceec148df6194481f306f553ad2" dependencies = [ "bstr", "gix-date", @@ -1846,16 +1855,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.54" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c17cc76786e99f8d2f055c11159e7f0091c42474dcc3189fbab96072e873e6d" +checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows 0.46.0", + "windows 0.48.0", ] [[package]] @@ -2083,9 +2092,9 @@ checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" [[package]] name = "libmimalloc-sys" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8c7cbf8b89019683667e347572e6d55a7df7ea36b0c4ce69961b0cde67b174" +checksum = "ef2c45001fb108f37d41bed8efd715769acb14674c1ce3e266ef0e317ef5f877" dependencies = [ "cc", "libc", @@ -2357,9 +2366,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1ea8f683b4f89a64181393742c041520a1a87e9775e6b4c0dd5a3281af05fc6" +checksum = "ec9cd6ca25e796a49fa242876d1c4de36a24a6da5258e9f0bc062dbf5e81c53b" dependencies = [ "async-trait", "base64", @@ -2554,7 +2563,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" dependencies = [ "proc-macro2", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -2565,9 +2574,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.54" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] @@ -2630,6 +2639,15 @@ dependencies = [ "prost", ] +[[package]] +name = "protobuf-src" +version = "1.1.0+21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7ac8852baeb3cc6fb83b93646fb93c0ffe5d14bf138c945ceb4b9948ee0e3c1" +dependencies = [ + "autotools", +] + [[package]] name = "pyo3" version = "0.18.2" @@ -2698,9 +2716,9 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" [[package]] name = "quick-xml" -version = "0.27.1" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc053f057dd768a56f62cd7e434c42c831d296968997e9ac1f76ea7c2d14c41" +checksum = "e5c1a97b1bc42b1d550bfb48d4262153fe400a12bab1511821736f7eac76d7e2" dependencies = [ "memchr", "serde", @@ -2880,9 +2898,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.5" +version = "0.37.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" +checksum = "d097081ed288dfe45699b72f5b5d648e5f15d64d900c7080273baa20c16a6849" dependencies = [ "bitflags 1.3.2", "errno", @@ -3009,7 +3027,7 @@ checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -3219,9 +3237,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.6.0" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a94bab2309478886b270dbace5adb3927d8e397aa804b67c5c37da8591dff989" +checksum = "2b121815df0d0ce3f2c3b68d38fb0c5a56e6076ba1c9a713b6ad1fec41674906" dependencies = [ "gix", "heck", @@ -3229,12 +3247,13 @@ dependencies = [ "prost", "prost-build", "prost-types", + "protobuf-src", "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.12", + "syn 2.0.13", "typify", "walkdir", ] @@ -3258,9 +3277,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.12" +version = "2.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79d9531f94112cfc3e4c8f5f02cb2b58f72c97b7efd85f70203cc6d8efda5927" +checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" dependencies = [ "proc-macro2", "quote", @@ -3312,7 +3331,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -3416,7 +3435,7 @@ checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.12", + "syn 2.0.13", ] [[package]] @@ -3842,22 +3861,22 @@ version = "0.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04662ed0e3e5630dfa9b26e4cb823b817f1a9addda855d973a9458c236556244" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] name = "windows" -version = "0.46.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" dependencies = [ - "windows-targets", + "windows-targets 0.48.0", ] [[package]] @@ -3866,7 +3885,7 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", ] [[package]] @@ -3875,13 +3894,28 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] [[package]] @@ -3890,42 +3924,84 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + [[package]] name = "winreg" version = "0.10.1" diff --git a/Cargo.toml b/Cargo.toml index ddabb2b8c..1a51e61ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,12 +35,12 @@ default = ["mimalloc"] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad", features = ["pyarrow", "avro"] } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad", features = ["pyarrow"] } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad" } -datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad" } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "c09edad" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69", features = ["pyarrow", "avro"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69", features = ["pyarrow"] } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69", features = ["protoc"] } uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" From 47dd8e573956f6dcfb6d378b08e17ced25264700 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 4 Apr 2023 08:54:38 -0600 Subject: [PATCH 013/413] Fix Mac/Win release builds in CI (#313) --- .github/workflows/build.yml | 21 ++++++++------------- Cargo.toml | 3 ++- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5d3199432..6296bf49e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,17 +52,6 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Setup Cpp - uses: aminya/setup-cpp@v1 - with: - compiler: gcc - vcvarsall: ${{ contains(matrix.os, 'windows') }} - cmake: true - ninja: true - vcpkg: true - cppcheck: true - clangtidy: true - - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -84,6 +73,12 @@ jobs: name: python-wheel-license path: . + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Build Python package run: maturin build --release --strip @@ -126,7 +121,7 @@ jobs: target: x86_64 manylinux: auto rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153 - args: --release --manylinux 2014 + args: --release --manylinux 2014 --features protoc - name: Archive wheels uses: actions/upload-artifact@v3 with: @@ -152,7 +147,7 @@ jobs: rust-toolchain: stable manylinux: auto rustup-components: rust-std rustfmt - args: --release --sdist --out dist + args: --release --sdist --out dist --features protoc - name: Archive wheels uses: actions/upload-artifact@v3 with: diff --git a/Cargo.toml b/Cargo.toml index 1a51e61ee..bf6cfe9ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ include = ["/src", "/LICENSE.txt"] [features] default = ["mimalloc"] +protoc = [ "datafusion-substrait/protoc" ] [dependencies] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } @@ -40,7 +41,7 @@ datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", re datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69", features = ["protoc"] } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" From 59f90f65010eddd511dbb4460417f098d7fe20f7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 5 Apr 2023 20:21:59 -0600 Subject: [PATCH 014/413] install protoc in docs workflow (#314) --- .github/workflows/docs.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index bef42d538..d9e7ad4ad 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -37,6 +37,12 @@ jobs: with: python-version: "3.10" + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Install dependencies run: | set -x From 31418b5f5cdc876142c34a3b15bbd89fe3a34a04 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 6 Apr 2023 07:56:33 -0600 Subject: [PATCH 015/413] Fix documentation generation in CI (#315) * install protoc in docs workflow * fix docs gen --- docs/README.md | 6 +++--- docs/build.sh | 2 -- docs/requirements.txt | 2 +- docs/source/api.rst | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/README.md b/docs/README.md index a6f4998c8..6d8b2d57d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -20,8 +20,8 @@ # DataFusion Documentation This folder contains the source content of the [python api](./source/api). -These are both published to https://arrow.apache.org/datafusion/ -as part of the release process. +This is published to https://arrow.apache.org/datafusion-python/ by a GitHub action +when changes are merged to the main branch. ## Dependencies @@ -29,7 +29,7 @@ It's recommended to install build dependencies and build the documentation inside a Python virtualenv. - Python -- `pip install -r requirements.txt` +- `pip3 install -r requirements.txt` ## Build & Preview diff --git a/docs/build.sh b/docs/build.sh index 3f24f8eec..7e8bb0b54 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -23,6 +23,4 @@ rm -rf build 2> /dev/null rm -rf temp 2> /dev/null mkdir temp cp -rf source/* temp/ -# replace relative URLs with absolute URLs -#sed -i 's/\.\.\/\.\.\/\.\.\//https:\/\/github.com\/apache\/arrow-datafusion\/blob\/master\//g' temp/contributor-guide/index.md make SOURCEDIR=`pwd`/temp html \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index 2af563587..8eb744968 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -sphinx +sphinx==5.3.0 pydata-sphinx-theme==0.8.0 myst-parser maturin diff --git a/docs/source/api.rst b/docs/source/api.rst index a3e7e24df..a5d65433d 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -27,6 +27,6 @@ API Reference api/config api/dataframe api/execution_context - api/expr + api/expression api/functions api/object_store From bb54b10a2532915fc24b49664fa7db0c5dd14b5b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 10 Apr 2023 14:12:06 -0600 Subject: [PATCH 016/413] Source wheel fix (#319) --- Cargo.toml | 2 +- pyproject.toml | 2 +- requirements.in | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bf6cfe9ac..361a06e04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ readme = "README.md" license = "Apache-2.0" edition = "2021" rust-version = "1.64" -include = ["/src", "/LICENSE.txt"] +include = ["/src", "/datafusion", "/LICENSE.txt", "pyproject.toml", "Cargo.toml", "Cargo.lock"] [features] default = ["mimalloc"] diff --git a/pyproject.toml b/pyproject.toml index 6d8a9d213..87d0be6cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ # under the License. [build-system] -requires = ["maturin>=0.11,<0.15"] +requires = ["maturin>=0.14,<0.15"] build-backend = "maturin" [project] diff --git a/requirements.in b/requirements.in index 7ee6a48dc..098e9f809 100644 --- a/requirements.in +++ b/requirements.in @@ -18,7 +18,7 @@ black flake8 isort -maturin +maturin>=0.14 mypy numpy pyarrow From 5c90187207e218393297ff3cbe4f08b69049d224 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 10 Apr 2023 17:37:22 -0600 Subject: [PATCH 017/413] Prepare for 22.0.0 release (#320) * Prepare for 22.0.0 release * fix tests * lint --- CHANGELOG.md | 15 +++ Cargo.lock | 150 ++++++++++++++++------------- Cargo.toml | 14 +-- datafusion/tests/test_context.py | 4 +- datafusion/tests/test_dataframe.py | 4 +- datafusion/tests/test_indexing.py | 2 +- 6 files changed, 112 insertions(+), 77 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35982d9a9..7f4a35c70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,21 @@ # Changelog +## [22.0.0](https://github.com/apache/arrow-datafusion-python/tree/22.0.0) (2023-04-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/21.0.0...22.0.0) + +**Merged pull requests:** + +- Fix invalid build yaml [#308](https://github.com/apache/arrow-datafusion-python/pull/308) (andygrove) +- Try fix release build [#309](https://github.com/apache/arrow-datafusion-python/pull/309) (andygrove) +- Fix release build [#310](https://github.com/apache/arrow-datafusion-python/pull/310) (andygrove) +- Enable datafusion-substrait protoc feature, to remove compile-time dependency on protoc [#312](https://github.com/apache/arrow-datafusion-python/pull/312) (andygrove) +- Fix Mac/Win release builds in CI [#313](https://github.com/apache/arrow-datafusion-python/pull/313) (andygrove) +- install protoc in docs workflow [#314](https://github.com/apache/arrow-datafusion-python/pull/314) (andygrove) +- Fix documentation generation in CI [#315](https://github.com/apache/arrow-datafusion-python/pull/315) (andygrove) +- Source wheel fix [#319](https://github.com/apache/arrow-datafusion-python/pull/319) (andygrove) + ## [21.0.0](https://github.com/apache/arrow-datafusion-python/tree/21.0.0) (2023-03-30) [Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/20.0.0...21.0.0) diff --git a/Cargo.lock b/Cargo.lock index 5d5196067..3451163dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -293,7 +293,7 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d04f17f7b86ded0b5baf98fe6123391c4343e031acc3ccc5fa604cc180bff220" dependencies = [ - "bitflags 2.0.2", + "bitflags 2.1.0", ] [[package]] @@ -393,9 +393,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "487f1e0fcbe47deb8b0574e646def1c903389d95241dd1bbcc6ce4a715dfc0c1" +checksum = "c70beb79cbb5ce9c4f8e20849978f34225931f665bb49efa6982875a4d5facb3" [[package]] name = "blake2" @@ -543,9 +543,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa48fa079165080f11d7753fd0bc175b7d391f276b965fe4b55bfad67856e463" +checksum = "cf9cc2b23599e6d7479755f3594285efb3f74a1bdca7a7374948bc831e23a552" dependencies = [ "chrono", "chrono-tz-build", @@ -738,8 +738,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bdb93fee4f30368f1f71bfd5cd28882ec9fab0183db7924827b76129d33227c" dependencies = [ "ahash", "apache-avro", @@ -787,8 +788,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82401ce129e601d406012b6d718f8978ba84c386e1c342fa155877120d68824" dependencies = [ "apache-avro", "arrow", @@ -803,8 +805,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b08b2078aed21a27239cd93f3015e492a58b0d50ebeeaf8d2236cf108ef583ce" dependencies = [ "dashmap", "datafusion-common", @@ -820,8 +823,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b5b977ce9695fb4c67614266ec57f384fc11e9a9f9b3e6d0e62b9c5a9f2c1f" dependencies = [ "ahash", "arrow", @@ -831,8 +835,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0b2bb9e73ed778d1bc5af63a270f0154bf6eab5099c77668a6362296888e46b" dependencies = [ "arrow", "async-trait", @@ -848,8 +853,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80cd8ea5ab0a07b1b2a3e17d5909f1b1035bd129ffeeb5c66842a32e682f8f79" dependencies = [ "ahash", "arrow", @@ -879,7 +885,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "21.0.0" +version = "22.0.0" dependencies = [ "async-trait", "datafusion", @@ -903,8 +909,9 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a95d6badab19fd6e9195fdc5209ac0a7e5ce9bcdedc67767b9ffc1b4e645760" dependencies = [ "arrow", "datafusion-common", @@ -914,9 +921,11 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37a78f8fc67123c4357e63bc0c87622a2a663d26f074958d749a633d0ecde90f" dependencies = [ + "arrow", "arrow-schema", "datafusion-common", "datafusion-expr", @@ -926,8 +935,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "21.1.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=2191a69#2191a69b2d48b0b7230c9da5a75f86a4e659361b" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae6ed64a2005f0d78f2b1b3ec3f8148183f4523d5d364e5367115f8d8a82b7df" dependencies = [ "async-recursion", "chrono", @@ -1005,13 +1015,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1035,14 +1045,14 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a3de6e8d11b22ff9edc6d916f890800597d60f8b2da1caf2955c274638d6412" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" dependencies = [ "cfg-if", "libc", "redox_syscall 0.2.16", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1187,9 +1197,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", @@ -1198,9 +1208,9 @@ dependencies = [ [[package]] name = "gix" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd5e0d9c5df90c9b4d325ec716762beb7d6c1465a4049fec5c4f6b72e7824656" +checksum = "c256ea71cc1967faaefdaad15f334146b7c806f12460dcafd3afed845c8c78dd" dependencies = [ "gix-actor", "gix-attributes", @@ -1296,9 +1306,9 @@ dependencies = [ [[package]] name = "gix-config" -version = "0.19.0" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aa7d7dd60256b7a0c0506a1d708ec92767c2662ee57b3301b538eaa3e064f8a" +checksum = "7fbad5ce54a8fc997acc50febd89ec80fa6e97cb7f8d0654cb229936407489d8" dependencies = [ "bstr", "gix-config-value", @@ -1307,6 +1317,7 @@ dependencies = [ "gix-path", "gix-ref", "gix-sec", + "log", "memchr", "nom", "once_cell", @@ -1940,13 +1951,13 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -2060,9 +2071,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.140" +version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "libflate" @@ -2092,9 +2103,9 @@ checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" [[package]] name = "libmimalloc-sys" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef2c45001fb108f37d41bed8efd715769acb14674c1ce3e266ef0e317ef5f877" +checksum = "43a558e3d911bc3c7bfc8c78bc580b404d6e51c1cefbf656e176a94b49b0df40" dependencies = [ "cc", "libc", @@ -2200,9 +2211,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcb174b18635f7561a0c6c9fc2ce57218ac7523cf72c50af80e2d79ab8f3ba1" +checksum = "3d88dad3f985ec267a3fcb7a1726f5cb1a7e8cad8b646e70a84f967210df23da" dependencies = [ "libmimalloc-sys", ] @@ -2237,7 +2248,7 @@ dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -2427,7 +2438,7 @@ dependencies = [ "libc", "redox_syscall 0.2.16", "smallvec", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -2898,16 +2909,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.6" +version = "0.37.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d097081ed288dfe45699b72f5b5d648e5f15d64d900c7080273baa20c16a6849" +checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" dependencies = [ "bitflags 1.3.2", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -3077,9 +3088,9 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.19" +version = "0.9.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f82e6c8c047aa50a7328632d067bcae6ef38772a79e28daf32f735e0e4f3dd10" +checksum = "d9d684e3ec7de3bf5466b32bd75303ac16f0736426e5a4e0d6e489559ce1249c" dependencies = [ "indexmap", "itoa", @@ -3237,9 +3248,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.6.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b121815df0d0ce3f2c3b68d38fb0c5a56e6076ba1c9a713b6ad1fec41674906" +checksum = "fcd7c95895a69f92b0491cb0764d49140f57ad918a8abb3b7ec7f8e507d2a240" dependencies = [ "gix", "heck", @@ -3302,7 +3313,7 @@ dependencies = [ "fastrand", "redox_syscall 0.3.5", "rustix", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -3424,7 +3435,7 @@ dependencies = [ "pin-project-lite", "socket2", "tokio-macros", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -3636,9 +3647,9 @@ checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" [[package]] name = "unsafe-libyaml" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad2024452afd3874bf539695e04af6732ba06517424dbf958fdb16a01f3bef6c" +checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6" [[package]] name = "untrusted" @@ -3659,9 +3670,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" dependencies = [ "getrandom", "serde", @@ -3888,6 +3899,15 @@ dependencies = [ "windows-targets 0.42.2", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", +] + [[package]] name = "windows-targets" version = "0.42.2" @@ -4056,7 +4076,7 @@ version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe 6.0.4+zstd.1.5.4", + "zstd-safe 6.0.5+zstd.1.5.4", ] [[package]] @@ -4071,9 +4091,9 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "6.0.4+zstd.1.5.4" +version = "6.0.5+zstd.1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543" +checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" dependencies = [ "libc", "zstd-sys", @@ -4081,9 +4101,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.7+zstd.1.5.4" +version = "2.0.8+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" dependencies = [ "cc", "libc", diff --git a/Cargo.toml b/Cargo.toml index 361a06e04..5ca3eee50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "21.0.0" +version = "22.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69", features = ["pyarrow", "avro"] } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69", features = ["pyarrow"] } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } -datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2191a69" } +datafusion = { version = "22.0.0", features = ["pyarrow", "avro"] } +datafusion-common = { version = "22.0.0", features = ["pyarrow"] } +datafusion-expr = { version = "22.0.0" } +datafusion-optimizer = { version = "22.0.0" } +datafusion-sql = { version = "22.0.0" } +datafusion-substrait = { version = "22.0.0" } uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" diff --git a/datafusion/tests/test_context.py b/datafusion/tests/test_context.py index 1aea21c21..6b1223a16 100644 --- a/datafusion/tests/test_context.py +++ b/datafusion/tests/test_context.py @@ -282,10 +282,10 @@ def test_dataset_filter_nested_data(ctx): # This filter will not be pushed down to DatasetExec since it # isn't supported - df = df.select( + df = df.filter(column("nested_data")["b"] > literal(5)).select( column("nested_data")["a"] + column("nested_data")["b"], column("nested_data")["a"] - column("nested_data")["b"], - ).filter(column("nested_data")["b"] > literal(5)) + ) result = df.collect() diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index c6ef95772..cd78f3c88 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -84,10 +84,10 @@ def test_select_columns(df): def test_filter(df): - df = df.select( + df = df.filter(column("a") > literal(2)).select( column("a") + column("b"), column("a") - column("b"), - ).filter(column("a") > literal(2)) + ) # execute and collect the first (and only) batch result = df.collect()[0] diff --git a/datafusion/tests/test_indexing.py b/datafusion/tests/test_indexing.py index 1c7f8627d..8ca3eab19 100644 --- a/datafusion/tests/test_indexing.py +++ b/datafusion/tests/test_indexing.py @@ -44,7 +44,7 @@ def test_err(df): with pytest.raises(Exception) as e_info: df["c"] - assert 'Schema error: No field named "c"' in e_info.value.args[0] + assert "Schema error: No field named c." in e_info.value.args[0] with pytest.raises(Exception) as e_info: df[1] From 6ef1c5b554c452bbae2d25b44308722ccfa8fbf7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 12 Apr 2023 04:26:52 -0600 Subject: [PATCH 018/413] Improve API docs, README, and examples for configuring context (#321) --- README.md | 33 +++++++++++++++++++++++ dev/release/rat_exclude_files.txt | 2 +- docs/README.md | 4 +-- docs/source/api/config.rst | 27 ------------------- docs/source/api/execution_context.rst | 4 ++- examples/README.md | 4 +++ examples/create-context.py | 39 +++++++++++++++++++++++++++ src/context.rs | 29 +++++++++++++++----- 8 files changed, 104 insertions(+), 38 deletions(-) delete mode 100644 docs/source/api/config.rst create mode 100644 examples/create-context.py diff --git a/README.md b/README.md index 7c29defdc..506a38227 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,39 @@ This produces the following chart: ![Chart](examples/chart.png) +## Configuration + +It is possible to configure runtime (memory and disk settings) and configuration settings when creating a context. + +```python +runtime = ( + RuntimeConfig() + .with_disk_manager_os() + .with_fair_spill_pool(10000000) +) +config = ( + SessionConfig() + .with_create_default_catalog_and_schema(True) + .with_default_catalog_and_schema("foo", "bar") + .with_target_partitions(8) + .with_information_schema(True) + .with_repartition_joins(False) + .with_repartition_aggregations(False) + .with_repartition_windows(False) + .with_parquet_pruning(False) + .set("datafusion.execution.parquet.pushdown_filters", "true") +) +ctx = SessionContext(config, runtime) +``` + +Refer to the [API documentation](https://arrow.apache.org/datafusion-python/#api-reference) for more information. + +Printing the context will show the current configuration settings. + +```python +print(ctx) +``` + ## More Examples See [examples](examples/README.md) for more information. diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index db5379d89..c7754f350 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -42,4 +42,4 @@ Cargo.lock .history *rat.txt */.git -docs.yaml \ No newline at end of file +.github/* \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index 6d8b2d57d..04f46a907 100644 --- a/docs/README.md +++ b/docs/README.md @@ -20,7 +20,7 @@ # DataFusion Documentation This folder contains the source content of the [python api](./source/api). -This is published to https://arrow.apache.org/datafusion-python/ by a GitHub action +This is published to https://arrow.apache.org/datafusion-python/ by a GitHub action when changes are merged to the main branch. ## Dependencies @@ -61,4 +61,4 @@ version of the docs, follow these steps: - `cp -rT ./build/html/ ../../arrow-site/datafusion/` (doesn't work on mac) - `rsync -avzr ./build/html/ ../../arrow-site/datafusion/` -5. Commit changes in `arrow-site` and send a PR. \ No newline at end of file +5. Commit changes in `arrow-site` and send a PR. diff --git a/docs/source/api/config.rst b/docs/source/api/config.rst deleted file mode 100644 index df244aec5..000000000 --- a/docs/source/api/config.rst +++ /dev/null @@ -1,27 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, -.. software distributed under the License is distributed on an -.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -.. KIND, either express or implied. See the License for the -.. specific language governing permissions and limitations -.. under the License. - -.. _api.config: -.. currentmodule:: datafusion - -Config -========= - -.. autosummary:: - :toctree: ../generated/ - - Config diff --git a/docs/source/api/execution_context.rst b/docs/source/api/execution_context.rst index 5b7e0f82f..a3bda76d7 100644 --- a/docs/source/api/execution_context.rst +++ b/docs/source/api/execution_context.rst @@ -19,9 +19,11 @@ .. currentmodule:: datafusion SessionContext -================ +============== .. autosummary:: :toctree: ../generated/ + SessionConfig + RuntimeConfig SessionContext diff --git a/examples/README.md b/examples/README.md index 2c4775ea4..82405955b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -27,6 +27,10 @@ Here is a direct link to the file used in the examples: - https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet +### Creating a SessionContext + +- [Creating a SessionContext](./create-context.py) + ### Executing Queries with DataFusion - [Query a Parquet file using SQL](./sql-parquet.py) diff --git a/examples/create-context.py b/examples/create-context.py new file mode 100644 index 000000000..3184d4085 --- /dev/null +++ b/examples/create-context.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datafusion import RuntimeConfig, SessionConfig, SessionContext + +# create a session context with default settings +ctx = SessionContext() +print(ctx) + +# create a session context with explicit runtime and config settings +runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000) +config = ( + SessionConfig() + .with_create_default_catalog_and_schema(True) + .with_default_catalog_and_schema("foo", "bar") + .with_target_partitions(8) + .with_information_schema(True) + .with_repartition_joins(False) + .with_repartition_aggregations(False) + .with_repartition_windows(False) + .with_parquet_pruning(False) + .set("datafusion.execution.parquet.pushdown_filters", "true") +) +ctx = SessionContext(config, runtime) +print(ctx) diff --git a/src/context.rs b/src/context.rs index 448fa7742..0ba120022 100644 --- a/src/context.rs +++ b/src/context.rs @@ -55,6 +55,7 @@ use pyo3::types::PyTuple; use tokio::runtime::Runtime; use tokio::task::JoinHandle; +/// Configuration options for a SessionContext #[pyclass(name = "SessionConfig", module = "datafusion", subclass, unsendable)] #[derive(Clone, Default)] pub(crate) struct PySessionConfig { @@ -141,8 +142,13 @@ impl PySessionConfig { fn with_parquet_pruning(&self, enabled: bool) -> Self { Self::from(self.config.clone().with_parquet_pruning(enabled)) } + + fn set(&self, key: &str, value: &str) -> Self { + Self::from(self.config.clone().set_str(key, value)) + } } +/// Runtime options for a SessionContext #[pyclass(name = "RuntimeConfig", module = "datafusion", subclass, unsendable)] #[derive(Clone)] pub(crate) struct PyRuntimeConfig { @@ -549,8 +555,8 @@ impl PySessionContext { Ok(PyDataFrame::new(self.ctx.read_empty()?)) } - fn session_id(&self) -> PyResult { - Ok(self.ctx.session_id()) + fn session_id(&self) -> String { + self.ctx.session_id() } #[allow(clippy::too_many_arguments)] @@ -684,11 +690,20 @@ impl PySessionContext { } fn __repr__(&self) -> PyResult { - let id = self.session_id(); - match id { - Ok(value) => Ok(format!("SessionContext(session_id={value})")), - Err(err) => Ok(format!("Error: {:?}", err.to_string())), - } + let config = self.ctx.copied_config(); + let mut config_entries = config + .options() + .entries() + .iter() + .filter(|e| e.value.is_some()) + .map(|e| format!("{} = {}", e.key, e.value.as_ref().unwrap())) + .collect::>(); + config_entries.sort(); + Ok(format!( + "SessionContext: id={}; configs=[\n\t{}]", + self.session_id(), + config_entries.join("\n\t") + )) } /// Execute a partition of an execution plan and return a stream of record batches From 94936380e58a266f5dd5de6b70a06d3aa36fbe22 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 19 Apr 2023 12:08:04 -0400 Subject: [PATCH 019/413] Osx build linker args (#330) * Add linker flags for macOS to ensure proper linking with macOS libpython library * Include custom build.rs script * Add apache license to build.rs * Add apache license v2 --- .cargo/config.toml | 12 +++++ Cargo.lock | 111 +++++++++++++++++++++++---------------------- Cargo.toml | 3 ++ build.rs | 20 ++++++++ 4 files changed, 91 insertions(+), 55 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 build.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 000000000..91a099a61 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,12 @@ +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + diff --git a/Cargo.lock b/Cargo.lock index 3451163dc..69505061f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -350,7 +350,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -361,7 +361,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -703,7 +703,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -720,7 +720,7 @@ checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -899,9 +899,10 @@ dependencies = [ "object_store", "parking_lot", "pyo3", + "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.13", + "syn 2.0.15", "tokio", "url", "uuid", @@ -988,9 +989,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "dunce" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bd4b30a6560bbd9b4620f4de34c3f14f60848e58a9b7216801afcb4c7b31c3c" +checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" [[package]] name = "dyn-clone" @@ -1152,7 +1153,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -1423,9 +1424,9 @@ dependencies = [ [[package]] name = "gix-hash" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0c5a9f4d621d4f4ea046bb331df5c746ca735b8cae5b234cc2be70ee4dbef0" +checksum = "2a258595457bc192d1f1c59d0d168a1e34e2be9b97a614e14995416185de41a7" dependencies = [ "hex", "thiserror", @@ -1433,9 +1434,9 @@ dependencies = [ [[package]] name = "gix-hashtable" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9609c1b8f36f12968e6a6098f7cdb52004f7d42d570f47a2d6d7c16612f19acb" +checksum = "e4e55e40dfd694884f0eb78796c5bddcf2f8b295dace47039099dd7e76534973" dependencies = [ "gix-hash", "hashbrown 0.13.2", @@ -1715,9 +1716,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.16" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5be7b54589b581f624f566bf5d8eb2bab1db736c51528720b6bd36b96b55924d" +checksum = "17f8a914c2987b688368b5138aa05321db91f4090cf26118185672ad588bce21" dependencies = [ "bytes", "fnv", @@ -1829,9 +1830,9 @@ checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" [[package]] name = "hyper" -version = "0.14.25" +version = "0.14.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" +checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" dependencies = [ "bytes", "futures-channel", @@ -2122,9 +2123,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" +checksum = "3f508063cc7bb32987c71511216bd5a32be15bccb6a80b52df8b9d7f01fc3aa2" [[package]] name = "lock_api" @@ -2574,7 +2575,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" dependencies = [ "proc-macro2", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -2600,9 +2601,9 @@ checksum = "9516b775656bc3e8985e19cd4b8c0c0de045095074e453d2c0a513b5f978392d" [[package]] name = "prost" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48e50df39172a3e7eb17e14642445da64996989bc212b583015435d39a58537" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" dependencies = [ "bytes", "prost-derive", @@ -2610,9 +2611,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c828f93f5ca4826f97fedcbd3f9a536c16b12cff3dbbb4a007f932bbad95b12" +checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", "heck", @@ -2630,9 +2631,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea9b0f8cbe5e15a8a042d030bd96668db28ecb567ec37d691971ff5731d2b1b" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", "itertools", @@ -2643,9 +2644,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "379119666929a1afd7a043aa6cf96fa67a6dce9af60c88095a4686dbce4c9c88" +checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" dependencies = [ "prost", ] @@ -2661,9 +2662,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfb848f80438f926a9ebddf0a539ed6065434fd7aae03a89312a9821f81b8501" +checksum = "e3b1ac5b3731ba34fdaa9785f8d74d17448cd18f30cf19e0c7e7b1fdb5272109" dependencies = [ "cfg-if", "indoc", @@ -2678,9 +2679,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98a42e7f42e917ce6664c832d5eee481ad514c98250c49e0b03b20593e2c7ed0" +checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3" dependencies = [ "once_cell", "target-lexicon", @@ -2688,9 +2689,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0707f0ab26826fe4ccd59b69106e9df5e12d097457c7b8f9c0fd1d2743eec4d" +checksum = "fd4d7c5337821916ea2a1d21d1092e8443cf34879e53a0ac653fbb98f44ff65c" dependencies = [ "libc", "pyo3-build-config", @@ -2698,9 +2699,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978d18e61465ecd389e1f235ff5a467146dc4e3c3968b90d274fe73a5dd4a438" +checksum = "a9d39c55dab3fc5a4b25bbd1ac10a2da452c4aca13bb450f22818a002e29648d" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2710,9 +2711,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e0e1128f85ce3fca66e435e08aa2089a2689c1c48ce97803e13f63124058462" +checksum = "97daff08a4c48320587b5224cc98d609e3c27b6d437315bd40b605c98eeb5918" dependencies = [ "proc-macro2", "quote", @@ -2727,9 +2728,9 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" [[package]] name = "quick-xml" -version = "0.28.1" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5c1a97b1bc42b1d550bfb48d4262153fe400a12bab1511821736f7eac76d7e2" +checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1" dependencies = [ "memchr", "serde", @@ -2909,9 +2910,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.11" +version = "0.37.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" +checksum = "722529a737f5a942fdbac3a46cee213053196737c5eaa3386d52e85b786f2659" dependencies = [ "bitflags 1.3.2", "errno", @@ -3023,22 +3024,22 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.159" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" +checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.159" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" +checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -3054,9 +3055,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.95" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ "itoa", "ryu", @@ -3248,9 +3249,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.7.3" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcd7c95895a69f92b0491cb0764d49140f57ad918a8abb3b7ec7f8e507d2a240" +checksum = "e3ae64fb7ad0670c7d6d53d57b1b91beb2212afc30e164cc8edb02d6b2cff32a" dependencies = [ "gix", "heck", @@ -3264,7 +3265,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.13", + "syn 2.0.15", "typify", "walkdir", ] @@ -3288,9 +3289,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.13" +version = "2.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" dependencies = [ "proc-macro2", "quote", @@ -3342,7 +3343,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -3446,7 +3447,7 @@ checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 5ca3eee50..22a2989ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,9 @@ regex-syntax = "0.6.28" syn = "2.0.11" url = "2.2" +[build-dependencies] +pyo3-build-config = "0.18.3" + [lib] name = "datafusion_python" crate-type = ["cdylib", "rlib"] diff --git a/build.rs b/build.rs new file mode 100644 index 000000000..4878d8b0e --- /dev/null +++ b/build.rs @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +fn main() { + pyo3_build_config::add_extension_module_link_args(); +} From 16298058cb10139a96e1b10fb7424a36c3c1c686 Mon Sep 17 00:00:00 2001 From: "r.4ntix" Date: Sun, 23 Apr 2023 23:11:07 +0800 Subject: [PATCH 020/413] Add requirements file for python 3.11 (#332) --- requirements-311.txt | 199 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 requirements-311.txt diff --git a/requirements-311.txt b/requirements-311.txt new file mode 100644 index 000000000..9161e3f5b --- /dev/null +++ b/requirements-311.txt @@ -0,0 +1,199 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --generate-hashes --output-file=requirements-311.txt +# +black==23.3.0 \ + --hash=sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5 \ + --hash=sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915 \ + --hash=sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326 \ + --hash=sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940 \ + --hash=sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b \ + --hash=sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30 \ + --hash=sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c \ + --hash=sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c \ + --hash=sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab \ + --hash=sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27 \ + --hash=sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2 \ + --hash=sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961 \ + --hash=sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9 \ + --hash=sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb \ + --hash=sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70 \ + --hash=sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331 \ + --hash=sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2 \ + --hash=sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266 \ + --hash=sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d \ + --hash=sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6 \ + --hash=sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b \ + --hash=sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925 \ + --hash=sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8 \ + --hash=sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4 \ + --hash=sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3 + # via -r requirements.in +click==8.1.3 \ + --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ + --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 + # via black +flake8==6.0.0 \ + --hash=sha256:3833794e27ff64ea4e9cf5d410082a8b97ff1a06c16aa3d2027339cd0f1195c7 \ + --hash=sha256:c61007e76655af75e6785a931f452915b371dc48f56efd765247c8fe68f2b181 + # via -r requirements.in +iniconfig==2.0.0 \ + --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ + --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 + # via pytest +isort==5.12.0 \ + --hash=sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504 \ + --hash=sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6 + # via -r requirements.in +maturin==0.14.17 \ + --hash=sha256:0308d4124311c92ec5c08600f93385a5ff16f7764410585261d1ca0aed739764 \ + --hash=sha256:2684302ded1559a26635ec45e91ae7913aeb386a8b658d5c2eb13ff410df7930 \ + --hash=sha256:3bf173adac3e9f84a84123ec0b5b4b28b2de79446a49da2b4549016614025b83 \ + --hash=sha256:64dd1b017eab2f9c938d833522bb505506908d824388927011dbdfff26ccea2c \ + --hash=sha256:683e1127b54b9fbdbd661ce3b27b073bd5d7c51d7e980870f4caffdb40f364b0 \ + --hash=sha256:7fef3950a34c0b5c1806637169fca12f89b7440d7608832bf7765b34f394b06a \ + --hash=sha256:8076993118089abb19e7d2312b138afea418dccea60be4198ab9f348618b47df \ + --hash=sha256:a7f06984bd3ffa4ab7be67f7352353a486f262d7a0dbd4dc305944e0b09c8f35 \ + --hash=sha256:abfd98529cfab59deb59639962d93df72121013d5151a5f01c66cda19d9c88a9 \ + --hash=sha256:bb8f8ac375d022c5de5da3efed79e712129de13ebdb9f668b96bf5df0673bc03 \ + --hash=sha256:cf1429a7854ecd4830bb42407d30ff0625c3f6480fa7b1e90da2896ba7a1b2fc \ + --hash=sha256:dcd44c2fa66d79497e2f4361654ed0d1c2ef1c58139469e8e0083409f948eea4 \ + --hash=sha256:fb4e3311e8ce707843235fbe8748a05a3ae166c3efd6d2aa335b53dfc2bd3b88 + # via -r requirements.in +mccabe==0.7.0 \ + --hash=sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325 \ + --hash=sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e + # via flake8 +mypy==1.2.0 \ + --hash=sha256:023fe9e618182ca6317ae89833ba422c411469156b690fde6a315ad10695a521 \ + --hash=sha256:031fc69c9a7e12bcc5660b74122ed84b3f1c505e762cc4296884096c6d8ee140 \ + --hash=sha256:2de7babe398cb7a85ac7f1fd5c42f396c215ab3eff731b4d761d68d0f6a80f48 \ + --hash=sha256:2e93a8a553e0394b26c4ca683923b85a69f7ccdc0139e6acd1354cc884fe0128 \ + --hash=sha256:390bc685ec209ada4e9d35068ac6988c60160b2b703072d2850457b62499e336 \ + --hash=sha256:3a2d219775a120581a0ae8ca392b31f238d452729adbcb6892fa89688cb8306a \ + --hash=sha256:3efde4af6f2d3ccf58ae825495dbb8d74abd6d176ee686ce2ab19bd025273f41 \ + --hash=sha256:4a99fe1768925e4a139aace8f3fb66db3576ee1c30b9c0f70f744ead7e329c9f \ + --hash=sha256:4b41412df69ec06ab141808d12e0bf2823717b1c363bd77b4c0820feaa37249e \ + --hash=sha256:4c8d8c6b80aa4a1689f2a179d31d86ae1367ea4a12855cc13aa3ba24bb36b2d8 \ + --hash=sha256:4d19f1a239d59f10fdc31263d48b7937c585810288376671eaf75380b074f238 \ + --hash=sha256:4e4a682b3f2489d218751981639cffc4e281d548f9d517addfd5a2917ac78119 \ + --hash=sha256:695c45cea7e8abb6f088a34a6034b1d273122e5530aeebb9c09626cea6dca4cb \ + --hash=sha256:701189408b460a2ff42b984e6bd45c3f41f0ac9f5f58b8873bbedc511900086d \ + --hash=sha256:70894c5345bea98321a2fe84df35f43ee7bb0feec117a71420c60459fc3e1eed \ + --hash=sha256:8293a216e902ac12779eb7a08f2bc39ec6c878d7c6025aa59464e0c4c16f7eb9 \ + --hash=sha256:8d26b513225ffd3eacece727f4387bdce6469192ef029ca9dd469940158bc89e \ + --hash=sha256:a197ad3a774f8e74f21e428f0de7f60ad26a8d23437b69638aac2764d1e06a6a \ + --hash=sha256:bea55fc25b96c53affab852ad94bf111a3083bc1d8b0c76a61dd101d8a388cf5 \ + --hash=sha256:c9a084bce1061e55cdc0493a2ad890375af359c766b8ac311ac8120d3a472950 \ + --hash=sha256:d0e9464a0af6715852267bf29c9553e4555b61f5904a4fc538547a4d67617937 \ + --hash=sha256:d8e9187bfcd5ffedbe87403195e1fc340189a68463903c39e2b63307c9fa0394 \ + --hash=sha256:eaeaa0888b7f3ccb7bcd40b50497ca30923dba14f385bde4af78fac713d6d6f6 \ + --hash=sha256:f46af8d162f3d470d8ffc997aaf7a269996d205f9d746124a179d3abe05ac602 \ + --hash=sha256:f70a40410d774ae23fcb4afbbeca652905a04de7948eaf0b1789c8d1426b72d1 \ + --hash=sha256:fe91be1c51c90e2afe6827601ca14353bbf3953f343c2129fa1e247d55fd95ba + # via -r requirements.in +mypy-extensions==1.0.0 \ + --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ + --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 + # via + # black + # mypy +numpy==1.24.2 \ + --hash=sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22 \ + --hash=sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f \ + --hash=sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9 \ + --hash=sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96 \ + --hash=sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0 \ + --hash=sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a \ + --hash=sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281 \ + --hash=sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04 \ + --hash=sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468 \ + --hash=sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253 \ + --hash=sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756 \ + --hash=sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a \ + --hash=sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb \ + --hash=sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d \ + --hash=sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0 \ + --hash=sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910 \ + --hash=sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978 \ + --hash=sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5 \ + --hash=sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f \ + --hash=sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a \ + --hash=sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5 \ + --hash=sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2 \ + --hash=sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d \ + --hash=sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95 \ + --hash=sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5 \ + --hash=sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d \ + --hash=sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780 \ + --hash=sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa + # via + # -r requirements.in + # pyarrow +packaging==23.1 \ + --hash=sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61 \ + --hash=sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f + # via + # black + # pytest +pathspec==0.11.1 \ + --hash=sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687 \ + --hash=sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293 + # via black +platformdirs==3.2.0 \ + --hash=sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08 \ + --hash=sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +pyarrow==11.0.0 \ + --hash=sha256:1cbcfcbb0e74b4d94f0b7dde447b835a01bc1d16510edb8bb7d6224b9bf5bafc \ + --hash=sha256:25aa11c443b934078bfd60ed63e4e2d42461682b5ac10f67275ea21e60e6042c \ + --hash=sha256:2d53ba72917fdb71e3584ffc23ee4fcc487218f8ff29dd6df3a34c5c48fe8c06 \ + --hash=sha256:2d942c690ff24a08b07cb3df818f542a90e4d359381fbff71b8f2aea5bf58841 \ + --hash=sha256:2f51dc7ca940fdf17893227edb46b6784d37522ce08d21afc56466898cb213b2 \ + --hash=sha256:362a7c881b32dc6b0eccf83411a97acba2774c10edcec715ccaab5ebf3bb0835 \ + --hash=sha256:3e99be85973592051e46412accea31828da324531a060bd4585046a74ba45854 \ + --hash=sha256:40bb42afa1053c35c749befbe72f6429b7b5f45710e85059cdd534553ebcf4f2 \ + --hash=sha256:410624da0708c37e6a27eba321a72f29d277091c8f8d23f72c92bada4092eb5e \ + --hash=sha256:41a1451dd895c0b2964b83d91019e46f15b5564c7ecd5dcb812dadd3f05acc97 \ + --hash=sha256:5461c57dbdb211a632a48facb9b39bbeb8a7905ec95d768078525283caef5f6d \ + --hash=sha256:69309be84dcc36422574d19c7d3a30a7ea43804f12552356d1ab2a82a713c418 \ + --hash=sha256:7c28b5f248e08dea3b3e0c828b91945f431f4202f1a9fe84d1012a761324e1ba \ + --hash=sha256:8f40be0d7381112a398b93c45a7e69f60261e7b0269cc324e9f739ce272f4f70 \ + --hash=sha256:a37bc81f6c9435da3c9c1e767324ac3064ffbe110c4e460660c43e144be4ed85 \ + --hash=sha256:aaee8f79d2a120bf3e032d6d64ad20b3af6f56241b0ffc38d201aebfee879d00 \ + --hash=sha256:ad42bb24fc44c48f74f0d8c72a9af16ba9a01a2ccda5739a517aa860fa7e3d56 \ + --hash=sha256:ad7c53def8dbbc810282ad308cc46a523ec81e653e60a91c609c2233ae407689 \ + --hash=sha256:becc2344be80e5dce4e1b80b7c650d2fc2061b9eb339045035a1baa34d5b8f1c \ + --hash=sha256:caad867121f182d0d3e1a0d36f197df604655d0b466f1bc9bafa903aa95083e4 \ + --hash=sha256:ccbf29a0dadfcdd97632b4f7cca20a966bb552853ba254e874c66934931b9841 \ + --hash=sha256:da93340fbf6f4e2a62815064383605b7ffa3e9eeb320ec839995b1660d69f89b \ + --hash=sha256:e217d001e6389b20a6759392a5ec49d670757af80101ee6b5f2c8ff0172e02ca \ + --hash=sha256:f010ce497ca1b0f17a8243df3048055c0d18dcadbcc70895d5baf8921f753de5 \ + --hash=sha256:f12932e5a6feb5c58192209af1d2607d488cb1d404fbc038ac12ada60327fa34 + # via -r requirements.in +pycodestyle==2.10.0 \ + --hash=sha256:347187bdb476329d98f695c213d7295a846d1152ff4fe9bacb8a9590b8ee7053 \ + --hash=sha256:8a4eaf0d0495c7395bdab3589ac2db602797d76207242c17d470186815706610 + # via flake8 +pyflakes==3.0.1 \ + --hash=sha256:ec55bf7fe21fff7f1ad2f7da62363d749e2a470500eab1b555334b67aa1ef8cf \ + --hash=sha256:ec8b276a6b60bd80defed25add7e439881c19e64850afd9b346283d4165fd0fd + # via flake8 +pytest==7.3.1 \ + --hash=sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362 \ + --hash=sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3 + # via -r requirements.in +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via -r requirements.in +typing-extensions==4.5.0 \ + --hash=sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb \ + --hash=sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4 + # via mypy From fb3c6e22789a2ddf33505c0b29a4464105c6c1ba Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 23 Apr 2023 13:52:08 -0600 Subject: [PATCH 021/413] mac arm64 build (#338) --- .github/workflows/build.yml | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6296bf49e..c667dab80 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -99,6 +99,58 @@ jobs: name: dist path: target/wheels/* + build-macos-aarch64: + needs: [generate-license] + name: Mac arm64 + runs-on: macos-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + + - name: Set up Rust targets + run: rustup target add aarch64-apple-darwin + + - name: Upgrade pip + run: python -m pip install --upgrade pip + + - name: Install maturin + run: pip install maturin==0.14.2 + + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v3 + with: + name: python-wheel-license + path: . + + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Build Python package + run: maturin build --release --strip --target aarch64-apple-darwin + - name: List Mac wheels + run: find target/wheels/ + + - name: Archive wheels + uses: actions/upload-artifact@v3 + with: + name: dist + path: target/wheels/* + build-manylinux: needs: [generate-license] name: Manylinux From b6c600d301ea56f9f56440f2a6e3670a0e917ce5 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sun, 23 Apr 2023 15:52:28 -0400 Subject: [PATCH 022/413] Add conda.yaml baseline workflow file (#281) * point to custom upstream arrow-datafusion with updated build script * Add conda.yaml baseline workflow file * Change conda recipe path * Add some debug output * fix syntax issues * update rustfmt path * test invoking rustfmt * Allow for RUSTFMT environment to pass through to the conda build * conda build updates * github workflow syntax issue * workflow syntax * more syntax * updates * use minimal rustup profile * use minimal rustup profile * use minimal rustup profile * use minimal rustup profile * use minimal rustup profile * changes * changes * trigger workflow rebuild * Add container * Ouput Rust env varialbes * full rustup command path * Point to typify fork for testing * Adjust conda CI workflow * Adjust conda build instead of mambabuild * Adjust command * Install conda-build * Some initial changes to get noarch builds working * Remove beta rust toolchain from CI * Add libprotobuf to host/build deps * Bump libprotobuf version * Revert back to 3 for libprotobuf --------- Co-authored-by: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> --- .github/workflows/conda.yml | 61 +++++++++++++++++++++++++++++++++++++ .github/workflows/test.yaml | 1 - Cargo.lock | 12 ++++---- conda/recipes/build.sh | 20 ------------ conda/recipes/meta.yaml | 25 ++++++++++----- pyproject.toml | 2 +- 6 files changed, 85 insertions(+), 36 deletions(-) create mode 100644 .github/workflows/conda.yml delete mode 100644 conda/recipes/build.sh diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml new file mode 100644 index 000000000..9853230de --- /dev/null +++ b/.github/workflows/conda.yml @@ -0,0 +1,61 @@ +name: Build conda nightly +on: [push, pull_request] + +# Cancel any already running instances of this build +concurrency: + group: conda-${{ github.head_ref }} + cancel-in-progress: true + +# Required shell entrypoint to have properly activated conda environments +defaults: + run: + shell: bash -l {0} + +jobs: + conda: + name: Build (and optionally upload) the conda nightly + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python + uses: conda-incubator/setup-miniconda@v2.2.0 + with: + miniforge-variant: Mambaforge + use-mamba: true + python-version: "3.10" + channel-priority: strict + - name: Install dependencies + run: | + mamba install boa conda-verify + + which python + pip list + mamba list + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + components: clippy, rustfmt + - name: Build conda package + run: | + # suffix for nightly package versions + export VERSION_SUFFIX=a`date +%y%m%d` + + conda mambabuild conda/recipes \ + --no-anaconda-upload \ + --output-folder . + # - name: Upload conda package + # if: | + # github.event_name == 'push' + # && github.repository == 'apache/arrow-datafusion-python' + # env: + # ANACONDA_API_TOKEN: ${{ secrets.DASK_CONDA_TOKEN }} + # LABEL: ${{ github.ref == 'refs/heads/datafusion-sql-planner' && 'dev_datafusion' || 'dev' }} + # run: | + # # install anaconda for upload + # mamba install anaconda-client + + # anaconda upload --label $LABEL linux-64/*.tar.bz2 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 164b09e15..c69205be3 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -36,7 +36,6 @@ jobs: - "3.10" toolchain: - "stable" - - "beta" # we are not that much eager in walking on the edge yet # - nightly # build stable for only 3.7 diff --git a/Cargo.lock b/Cargo.lock index 69505061f..f4d8e0f7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2072,9 +2072,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.141" +version = "0.2.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" [[package]] name = "libflate" @@ -2123,9 +2123,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f508063cc7bb32987c71511216bd5a32be15bccb6a80b52df8b9d7f01fc3aa2" +checksum = "9b085a4f2cde5781fc4b1717f2e86c62f5cda49de7ba99a7c2eae02b61c9064c" [[package]] name = "lock_api" @@ -2910,9 +2910,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.12" +version = "0.37.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "722529a737f5a942fdbac3a46cee213053196737c5eaa3386d52e85b786f2659" +checksum = "f79bef90eb6d984c72722595b5b1348ab39275a5e5123faca6863bf07d75a4e0" dependencies = [ "bitflags 1.3.2", "errno", diff --git a/conda/recipes/build.sh b/conda/recipes/build.sh deleted file mode 100644 index f10be3581..000000000 --- a/conda/recipes/build.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -maturin build -r diff --git a/conda/recipes/meta.yaml b/conda/recipes/meta.yaml index cce614b2f..48e95eb08 100644 --- a/conda/recipes/meta.yaml +++ b/conda/recipes/meta.yaml @@ -15,11 +15,14 @@ # specific language governing permissions and limitations # under the License. -{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} -{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set name = "datafusion" %} +{% set major_minor_patch = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').split('.') %} +{% set new_patch = major_minor_patch[2] | int + 1 %} +{% set version = (major_minor_patch[:2] + [new_patch]) | join('.') + environ.get('VERSION_SUFFIX', '') %} + package: - name: datafusion + name: {{ name|lower }} version: {{ version }} source: @@ -27,20 +30,26 @@ source: build: noarch: python + script: {{ PYTHON }} -m pip install . -vv number: 0 requirements: host: - - {{ compiler('rust') }} - - python - - setuptools - - maturin + - python >=3.6 + - maturin >=0.14,<0.15 + - libprotobuf =3 + - pip run: - - python + - python >=3.6 + - pyarrow >=11.0.0 test: imports: - datafusion + commands: + - pip check + requires: + - pip about: home: https://arrow.apache.org/datafusion diff --git a/pyproject.toml b/pyproject.toml index 87d0be6cf..cf68a33c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ classifier = [ "Programming Language :: Rust", ] dependencies = [ - "pyarrow>=6.0.1", + "pyarrow>=11.0.0", ] [project.urls] From e6e8e128b95ceb91b5baaec6d04389b1c642c9d6 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 24 Apr 2023 16:14:29 -0600 Subject: [PATCH 023/413] Prepare for 23.0.0 release (#335) --- CHANGELOG.md | 25 +++-- Cargo.lock | 182 ++++++++++++++++++------------------- Cargo.toml | 14 +-- dev/release/README.md | 12 ++- src/common/data_type.rs | 2 +- src/expr/literal.rs | 4 +- src/expr/subquery_alias.rs | 2 +- src/udaf.rs | 4 +- src/udf.rs | 2 +- 9 files changed, 127 insertions(+), 120 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f4a35c70..b8942f944 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,15 @@ # Changelog +## [23.0.0](https://github.com/apache/arrow-datafusion-python/tree/23.0.0) (2023-04-23) + +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/22.0.0...23.0.0) + +**Merged pull requests:** + +- Improve API docs, README, and examples for configuring context [#321](https://github.com/apache/arrow-datafusion-python/pull/321) (andygrove) +- Osx build linker args [#330](https://github.com/apache/arrow-datafusion-python/pull/330) (jdye64) + ## [22.0.0](https://github.com/apache/arrow-datafusion-python/tree/22.0.0) (2023-04-10) [Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/21.0.0...22.0.0) @@ -163,7 +172,7 @@ - Arrow type bindings [\#173](https://github.com/apache/arrow-datafusion-python/pull/173) ([jdye64](https://github.com/jdye64)) - Pyo3 bump [\#171](https://github.com/apache/arrow-datafusion-python/pull/171) ([jdye64](https://github.com/jdye64)) - feature: Add additional aggregation functions [\#170](https://github.com/apache/arrow-datafusion-python/pull/170) ([simicd](https://github.com/simicd)) -- Make from\_substrait\_plan return DataFrame instead of LogicalPlan [\#164](https://github.com/apache/arrow-datafusion-python/pull/164) ([andygrove](https://github.com/andygrove)) +- Make from_substrait_plan return DataFrame instead of LogicalPlan [\#164](https://github.com/apache/arrow-datafusion-python/pull/164) ([andygrove](https://github.com/andygrove)) - feature: Implement count method [\#163](https://github.com/apache/arrow-datafusion-python/pull/163) ([simicd](https://github.com/simicd)) - CI Fixes [\#162](https://github.com/apache/arrow-datafusion-python/pull/162) ([jdye64](https://github.com/jdye64)) - Upgrade to DataFusion 17 [\#160](https://github.com/apache/arrow-datafusion-python/pull/160) ([andygrove](https://github.com/andygrove)) @@ -180,7 +189,7 @@ - Prepare for 0.8.0 release [\#141](https://github.com/apache/arrow-datafusion-python/pull/141) ([andygrove](https://github.com/andygrove)) - Improve README and add more examples [\#137](https://github.com/apache/arrow-datafusion-python/pull/137) ([andygrove](https://github.com/andygrove)) - test: Expand tests for built-in functions [\#129](https://github.com/apache/arrow-datafusion-python/pull/129) ([simicd](https://github.com/simicd)) -- build\(deps\): bump object\_store from 0.5.2 to 0.5.3 [\#126](https://github.com/apache/arrow-datafusion-python/pull/126) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump object_store from 0.5.2 to 0.5.3 [\#126](https://github.com/apache/arrow-datafusion-python/pull/126) ([dependabot[bot]](https://github.com/apps/dependabot)) - build\(deps\): bump mimalloc from 0.1.32 to 0.1.34 [\#125](https://github.com/apache/arrow-datafusion-python/pull/125) ([dependabot[bot]](https://github.com/apps/dependabot)) - Introduce conda directory containing datafusion-dev.yaml conda enviro… [\#124](https://github.com/apache/arrow-datafusion-python/pull/124) ([jdye64](https://github.com/jdye64)) - build\(deps\): bump bzip2 from 0.4.3 to 0.4.4 [\#121](https://github.com/apache/arrow-datafusion-python/pull/121) ([dependabot[bot]](https://github.com/apps/dependabot)) @@ -188,7 +197,7 @@ - build\(deps\): bump async-trait from 0.1.60 to 0.1.61 [\#118](https://github.com/apache/arrow-datafusion-python/pull/118) ([dependabot[bot]](https://github.com/apps/dependabot)) - Upgrade to DataFusion 16.0.0 [\#115](https://github.com/apache/arrow-datafusion-python/pull/115) ([andygrove](https://github.com/andygrove)) - Bump async-trait from 0.1.57 to 0.1.60 [\#114](https://github.com/apache/arrow-datafusion-python/pull/114) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump object\_store from 0.5.1 to 0.5.2 [\#112](https://github.com/apache/arrow-datafusion-python/pull/112) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump object_store from 0.5.1 to 0.5.2 [\#112](https://github.com/apache/arrow-datafusion-python/pull/112) ([dependabot[bot]](https://github.com/apps/dependabot)) - Bump tokio from 1.21.2 to 1.23.0 [\#109](https://github.com/apache/arrow-datafusion-python/pull/109) ([dependabot[bot]](https://github.com/apps/dependabot)) - Add entries for publishing production \(asf-site\) and staging docs [\#107](https://github.com/apache/arrow-datafusion-python/pull/107) ([martin-g](https://github.com/martin-g)) - Add a workflow that builds the docs and deploys them at staged or production [\#104](https://github.com/apache/arrow-datafusion-python/pull/104) ([martin-g](https://github.com/martin-g)) @@ -212,9 +221,9 @@ **Implemented enhancements:** -- Add bindings for datafusion\_common::DFField [\#184](https://github.com/apache/arrow-datafusion-python/issues/184) +- Add bindings for datafusion_common::DFField [\#184](https://github.com/apache/arrow-datafusion-python/issues/184) - Add bindings for DFSchema/DFSchemaRef [\#181](https://github.com/apache/arrow-datafusion-python/issues/181) -- Add bindings for datafusion\_expr Projection [\#179](https://github.com/apache/arrow-datafusion-python/issues/179) +- Add bindings for datafusion_expr Projection [\#179](https://github.com/apache/arrow-datafusion-python/issues/179) - Add bindings for `TableScan` struct from `datafusion_expr::TableScan` [\#177](https://github.com/apache/arrow-datafusion-python/issues/177) - Add a "mapping" struct for types [\#172](https://github.com/apache/arrow-datafusion-python/issues/172) - Improve string representation of datafusion classes \(dataframe, context, expression, ...\) [\#158](https://github.com/apache/arrow-datafusion-python/issues/158) @@ -233,7 +242,7 @@ - Build is broken [\#161](https://github.com/apache/arrow-datafusion-python/issues/161) - Out of memory when sorting [\#157](https://github.com/apache/arrow-datafusion-python/issues/157) -- window\_lead test appears to be non-deterministic [\#135](https://github.com/apache/arrow-datafusion-python/issues/135) +- window_lead test appears to be non-deterministic [\#135](https://github.com/apache/arrow-datafusion-python/issues/135) - Reading csv does not work [\#130](https://github.com/apache/arrow-datafusion-python/issues/130) - Github actions produce a lot of warnings [\#94](https://github.com/apache/arrow-datafusion-python/issues/94) - ASF source release tarball has wrong directory name [\#90](https://github.com/apache/arrow-datafusion-python/issues/90) @@ -252,7 +261,6 @@ [Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/0.7.0...0.7.0-rc2) - ## [Unreleased](https://github.com/datafusion-contrib/datafusion-python/tree/HEAD) [Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1...HEAD) @@ -319,5 +327,4 @@ \* _This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)_ - -\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* +\* _This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)_ diff --git a/Cargo.lock b/Cargo.lock index f4d8e0f7b..2622625c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,9 +29,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" dependencies = [ "memchr", ] @@ -113,9 +113,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990dfa1a9328504aa135820da1c95066537b69ad94c04881b785f64328e0fa6b" +checksum = "1aea9fcb25bbb70f7f922f95b99ca29c1013dab47f6df61a6f24861842dd7f2e" dependencies = [ "ahash", "arrow-arith", @@ -136,9 +136,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b2e52de0ab54173f9b08232b7184c26af82ee7ab4ac77c83396633c90199fa" +checksum = "8d967b42f7b12c91fd78acd396b20c2973b184c8866846674abbb00c963e93ab" dependencies = [ "arrow-array", "arrow-buffer", @@ -151,9 +151,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10849b60c17dbabb334be1f4ef7550701aa58082b71335ce1ed586601b2f423" +checksum = "3190f208ee7aa0f3596fa0098d42911dec5e123ca88c002a08b24877ad14c71e" dependencies = [ "ahash", "arrow-buffer", @@ -168,9 +168,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0746ae991b186be39933147117f8339eb1c4bbbea1c8ad37e7bf5851a1a06ba" +checksum = "5d33c733c5b6c44a0fc526f29c09546e04eb56772a7a21e48e602f368be381f6" dependencies = [ "half", "num", @@ -178,9 +178,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b88897802515d7b193e38b27ddd9d9e43923d410a9e46307582d756959ee9595" +checksum = "abd349520b6a1ed4924ae2afc9d23330a3044319e4ec3d5b124c09e4d440ae87" dependencies = [ "arrow-array", "arrow-buffer", @@ -195,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c8220d9741fc37961262710ceebd8451a5b393de57c464f0267ffdda1775c0a" +checksum = "c80af3c3e290a2a7e1cc518f1471dff331878cb4af9a5b088bf030b89debf649" dependencies = [ "arrow-array", "arrow-buffer", @@ -214,9 +214,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f937efa1aaad9dc86f6a0e382c2fa736a4943e2090c946138079bdf060cef" +checksum = "b1c8361947aaa96d331da9df3f7a08bdd8ab805a449994c97f5c4d24c4b7e2cf" dependencies = [ "arrow-buffer", "arrow-schema", @@ -226,9 +226,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b75296ff01833f602552dff26a423fc213db8e5049b540ca4a00b1c957e41c" +checksum = "9a46ee000b9fbd1e8db6e8b26acb8c760838512b39d8c9f9d73892cb55351d50" dependencies = [ "arrow-array", "arrow-buffer", @@ -240,9 +240,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e501d3de4d612c90677594896ca6c0fa075665a7ff980dc4189bb531c17e19f6" +checksum = "4bf2366607be867ced681ad7f272371a5cf1fc2941328eef7b4fee14565166fb" dependencies = [ "arrow-array", "arrow-buffer", @@ -254,14 +254,15 @@ dependencies = [ "indexmap", "lexical-core", "num", + "serde", "serde_json", ] [[package]] name = "arrow-ord" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d2671eb3793f9410230ac3efb0e6d36307be8a2dac5fad58ac9abde8e9f01e" +checksum = "304069901c867200e21ec868ae7521165875470ef2f1f6d58f979a443d63997e" dependencies = [ "arrow-array", "arrow-buffer", @@ -274,9 +275,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc11fa039338cebbf4e29cf709c8ac1d6a65c7540063d4a25f991ab255ca85c8" +checksum = "0d57fe8ceef3392fdd493269d8a2d589de17bafce151aacbffbddac7a57f441a" dependencies = [ "ahash", "arrow-array", @@ -289,18 +290,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d04f17f7b86ded0b5baf98fe6123391c4343e031acc3ccc5fa604cc180bff220" +checksum = "a16b88a93ac8350f0200b1cd336a1f887315925b8dd7aa145a37b8bdbd8497a4" dependencies = [ - "bitflags 2.1.0", + "bitflags 2.2.1", ] [[package]] name = "arrow-select" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "163e35de698098ff5f5f672ada9dc1f82533f10407c7a11e2cd09f3bcf31d18a" +checksum = "98e8a4d6ca37d5212439b24caad4d80743fcbb706706200dd174bb98e68fe9d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -311,9 +312,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdfbed1b10209f0dc68e6aa4c43dc76079af65880965c7c3b73f641f23d4aba" +checksum = "cbb594efa397eb6a546f42b1f8df3d242ea84dbfda5232e06035dc2b2e2c8459" dependencies = [ "arrow-array", "arrow-buffer", @@ -321,7 +322,7 @@ dependencies = [ "arrow-schema", "arrow-select", "regex", - "regex-syntax", + "regex-syntax 0.6.29", ] [[package]] @@ -393,9 +394,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.1.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c70beb79cbb5ce9c4f8e20849978f34225931f665bb49efa6982875a4d5facb3" +checksum = "24a6904aef64d73cf10ab17ebace7befb918b82164785cb89907993be7f83813" [[package]] name = "blake2" @@ -473,9 +474,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8" [[package]] name = "byteorder" @@ -532,12 +533,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ "iana-time-zone", - "js-sys", "num-integer", "num-traits", "serde", - "time 0.1.45", - "wasm-bindgen", "winapi", ] @@ -626,9 +624,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" +checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" dependencies = [ "libc", ] @@ -738,13 +736,15 @@ dependencies = [ [[package]] name = "datafusion" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bdb93fee4f30368f1f71bfd5cd28882ec9fab0183db7924827b76129d33227c" +checksum = "a8a7d4b334f4512ff2fdbce87f511f570ae895af1ac7c729e77c12583253b22a" dependencies = [ "ahash", "apache-avro", "arrow", + "arrow-array", + "arrow-schema", "async-compression", "async-trait", "bytes", @@ -788,9 +788,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e82401ce129e601d406012b6d718f8978ba84c386e1c342fa155877120d68824" +checksum = "80abfcb1dbc6390f952f21de9069e6177ad6318fcae5fbceabb50666d96533dd" dependencies = [ "apache-avro", "arrow", @@ -805,9 +805,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b08b2078aed21a27239cd93f3015e492a58b0d50ebeeaf8d2236cf108ef583ce" +checksum = "df2524f1b4b58319895b112809d2a59e54fa662d0e46330a455f22882c2cb7b9" dependencies = [ "dashmap", "datafusion-common", @@ -823,9 +823,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b5b977ce9695fb4c67614266ec57f384fc11e9a9f9b3e6d0e62b9c5a9f2c1f" +checksum = "af8040b7a75b04685f4db0a1b11ffa93cd163c1bc13751df3f5cf76baabaf5a1" dependencies = [ "ahash", "arrow", @@ -835,9 +835,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0b2bb9e73ed778d1bc5af63a270f0154bf6eab5099c77668a6362296888e46b" +checksum = "74ceae25accc0f640a4238283f55f3a9fd181d55398703a4330fb2c46261e6a2" dependencies = [ "arrow", "async-trait", @@ -848,14 +848,14 @@ dependencies = [ "hashbrown 0.13.2", "itertools", "log", - "regex-syntax", + "regex-syntax 0.6.29", ] [[package]] name = "datafusion-physical-expr" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80cd8ea5ab0a07b1b2a3e17d5909f1b1035bd129ffeeb5c66842a32e682f8f79" +checksum = "df4cf228b312f2758cb78e93fe3d2dc602345028efdf7cfa5b338cb370d0a347" dependencies = [ "ahash", "arrow", @@ -873,6 +873,7 @@ dependencies = [ "indexmap", "itertools", "lazy_static", + "libc", "md-5", "paste", "petgraph", @@ -885,7 +886,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "22.0.0" +version = "23.0.0" dependencies = [ "async-trait", "datafusion", @@ -901,7 +902,7 @@ dependencies = [ "pyo3", "pyo3-build-config", "rand", - "regex-syntax", + "regex-syntax 0.6.29", "syn 2.0.15", "tokio", "url", @@ -910,9 +911,9 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a95d6badab19fd6e9195fdc5209ac0a7e5ce9bcdedc67767b9ffc1b4e645760" +checksum = "b52b486fb3d81bb132e400304be01af5aba0ad6737e3518045bb98944991fe32" dependencies = [ "arrow", "datafusion-common", @@ -922,9 +923,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37a78f8fc67123c4357e63bc0c87622a2a663d26f074958d749a633d0ecde90f" +checksum = "773e985c182e41cfd68f7a7b483ab6bfb68beaac241c348cd4b1bf9f9d61b762" dependencies = [ "arrow", "arrow-schema", @@ -936,9 +937,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "22.0.0" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae6ed64a2005f0d78f2b1b3ec3f8148183f4523d5d364e5367115f8d8a82b7df" +checksum = "836e9b1c0ea430199c9bd4b88024cb8d617e3768ffdb412064169e2504a850ed" dependencies = [ "async-recursion", "chrono", @@ -1204,7 +1205,7 @@ checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -1365,7 +1366,7 @@ dependencies = [ "bstr", "itoa", "thiserror", - "time 0.3.20", + "time", ] [[package]] @@ -2123,9 +2124,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b085a4f2cde5781fc4b1717f2e86c62f5cda49de7ba99a7c2eae02b61c9064c" +checksum = "36eb31c1778188ae1e64398743890d0877fef36d11521ac60406b42016e8c2cf" [[package]] name = "lock_api" @@ -2248,7 +2249,7 @@ checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.45.0", ] @@ -2444,9 +2445,9 @@ dependencies = [ [[package]] name = "parquet" -version = "36.0.0" +version = "37.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "321a15f8332645759f29875b07f8233d16ed8ec1b3582223de81625a9f8506b7" +checksum = "b5022d98333271f4ca3e87bab760498e61726bf5a6ca919123c80517e20ded29" dependencies = [ "ahash", "arrow-array", @@ -2806,13 +2807,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.3" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.7.1", ] [[package]] @@ -2827,6 +2828,12 @@ version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +[[package]] +name = "regex-syntax" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" + [[package]] name = "regress" version = "0.5.0" @@ -2910,9 +2917,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.13" +version = "0.37.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f79bef90eb6d984c72722595b5b1348ab39275a5e5123faca6863bf07d75a4e0" +checksum = "d9b864d3c18a5785a05953adeed93e2dca37ed30f18e69bba9f30079d51f363f" dependencies = [ "bitflags 1.3.2", "errno", @@ -3203,9 +3210,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.32.0" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0366f270dbabb5cc2e4c88427dc4c08bba144f81e32fbd459a013f26a4d16aa0" +checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a" dependencies = [ "log", "sqlparser_derive", @@ -3357,17 +3364,6 @@ dependencies = [ "ordered-float", ] -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - [[package]] name = "time" version = "0.3.20" @@ -3506,13 +3502,13 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" +checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.15", ] [[package]] @@ -3705,12 +3701,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 22a2989ae..8c205a4cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "22.0.0" +version = "23.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { version = "22.0.0", features = ["pyarrow", "avro"] } -datafusion-common = { version = "22.0.0", features = ["pyarrow"] } -datafusion-expr = { version = "22.0.0" } -datafusion-optimizer = { version = "22.0.0" } -datafusion-sql = { version = "22.0.0" } -datafusion-substrait = { version = "22.0.0" } +datafusion = { version = "23.0.0" , features = ["pyarrow", "avro"] } +datafusion-common = { version = "23.0.0", features = ["pyarrow"] } +datafusion-expr = "23.0.0" +datafusion-optimizer = "23.0.0" +datafusion-sql = "23.0.0" +datafusion-substrait = "23.0.0" uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" diff --git a/dev/release/README.md b/dev/release/README.md index 84058a606..1ba44fb50 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -145,14 +145,22 @@ Pushing an `rc` tag to the release branch will cause a GitHub Workflow to run th Go to https://github.com/apache/arrow-datafusion-python/actions and look for an action named "Python Release Build" that has run against the pushed tag. -Click on the action and scroll down to the bottom of the page titled "Artifacts". Download `dist.zip`. +Click on the action and scroll down to the bottom of the page titled "Artifacts". Download `dist.zip`. It should +contain files such as: + +```text +datafusion-22.0.0-cp37-abi3-macosx_10_7_x86_64.whl +datafusion-22.0.0-cp37-abi3-macosx_11_0_arm64.whl +datafusion-22.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl +datafusion-22.0.0-cp37-abi3-win_amd64.whl +``` Upload the wheels to testpypi. ```bash unzip dist.zip python3 -m pip install --upgrade setuptools twine build -python3 -m twine upload --repository testpypi datafusion-0.7.0-cp37-abi3-*.whl +python3 -m twine upload --repository testpypi datafusion-22.0.0-cp37-abi3-*.whl ``` When prompted for username, enter `__token__`. When prompted for a password, enter a valid GitHub Personal Access Token diff --git a/src/common/data_type.rs b/src/common/data_type.rs index e07805c52..a7b79f49f 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -180,7 +180,7 @@ impl DataTypeMap { DataType::Struct(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( format!("{:?}", arrow_type), ))), - DataType::Union(_, _, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( + DataType::Union(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( format!("{:?}", arrow_type), ))), DataType::Dictionary(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( diff --git a/src/expr/literal.rs b/src/expr/literal.rs index f99216957..076f89a66 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -117,7 +117,9 @@ impl PyLiteral { ScalarValue::TimestampNanosecond(iv, tz) | ScalarValue::TimestampMicrosecond(iv, tz) | ScalarValue::TimestampMillisecond(iv, tz) - | ScalarValue::TimestampSecond(iv, tz) => Ok((*iv, tz.clone())), + | ScalarValue::TimestampSecond(iv, tz) => { + Ok((*iv, tz.as_ref().map(|s| s.as_ref().to_string()))) + } other => Err(unexpected_literal_value(other)), } } diff --git a/src/expr/subquery_alias.rs b/src/expr/subquery_alias.rs index 5360bbbc4..d3abd2e8c 100644 --- a/src/expr/subquery_alias.rs +++ b/src/expr/subquery_alias.rs @@ -68,7 +68,7 @@ impl PySubqueryAlias { } fn alias(&self) -> PyResult { - Ok(self.subquery_alias.alias.clone()) + Ok(self.subquery_alias.alias.to_string()) } fn __repr__(&self) -> PyResult { diff --git a/src/udaf.rs b/src/udaf.rs index d5866f840..756fa5659 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -56,7 +56,7 @@ impl Accumulator for RustAccumulator { // 1. cast args to Pyarrow array let py_args = values .iter() - .map(|arg| arg.data().to_owned().to_pyarrow(py).unwrap()) + .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) .collect::>(); let py_args = PyTuple::new(py, py_args); @@ -76,7 +76,7 @@ impl Accumulator for RustAccumulator { // 1. cast states to Pyarrow array let state = state - .data() + .into_data() .to_pyarrow(py) .map_err(|e| DataFusionError::Execution(format!("{e}")))?; diff --git a/src/udf.rs b/src/udf.rs index f3e6cfb58..dcb9cd572 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -41,7 +41,7 @@ fn to_rust_function(func: PyObject) -> ScalarFunctionImplementation { // 1. cast args to Pyarrow arrays let py_args = args .iter() - .map(|arg| arg.data().to_owned().to_pyarrow(py).unwrap()) + .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) .collect::>(); let py_args = PyTuple::new(py, py_args); From 545e93e1739a71ef77cee1afe9196c318aabb667 Mon Sep 17 00:00:00 2001 From: Kyle Brooks <84413234+kylebrooks-8451@users.noreply.github.com> Date: Mon, 24 Apr 2023 19:48:04 -0400 Subject: [PATCH 024/413] Store the Tokio Runtime in an _internal module level attribute and reuse it. (#341) --- src/context.rs | 5 ++--- src/lib.rs | 9 +++++++++ src/utils.rs | 11 +++++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/context.rs b/src/context.rs index 0ba120022..3dc8a8fba 100644 --- a/src/context.rs +++ b/src/context.rs @@ -36,7 +36,7 @@ use crate::sql::logical::PyLogicalPlan; use crate::store::StorageContexts; use crate::udaf::PyAggregateUDF; use crate::udf::PyScalarUDF; -use crate::utils::wait_for_future; +use crate::utils::{get_tokio_runtime, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; @@ -52,7 +52,6 @@ use datafusion::prelude::{ }; use datafusion_common::ScalarValue; use pyo3::types::PyTuple; -use tokio::runtime::Runtime; use tokio::task::JoinHandle; /// Configuration options for a SessionContext @@ -722,7 +721,7 @@ impl PySessionContext { Arc::new(RuntimeEnv::default()), ); // create a Tokio runtime to run the async code - let rt = Runtime::new().unwrap(); + let rt = &get_tokio_runtime(py).0; let plan = plan.plan.clone(); let fut: JoinHandle> = rt.spawn(async move { plan.execute(part, Arc::new(ctx)) }); diff --git a/src/lib.rs b/src/lib.rs index 4a6574c16..0bb4d9a12 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -59,12 +59,21 @@ pub mod utils; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; +// Used to define Tokio Runtime as a Python module attribute +#[pyclass] +pub(crate) struct TokioRuntime(tokio::runtime::Runtime); + /// Low-level DataFusion internal package. /// /// The higher-level public API is defined in pure python files under the /// datafusion directory. #[pymodule] fn _internal(py: Python, m: &PyModule) -> PyResult<()> { + // Register the Tokio Runtime as a module attribute so we can reuse it + m.add( + "runtime", + TokioRuntime(tokio::runtime::Runtime::new().unwrap()), + )?; // Register the python classes m.add_class::()?; m.add_class::()?; diff --git a/src/utils.rs b/src/utils.rs index 4158b7485..427a8a064 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -16,19 +16,26 @@ // under the License. use crate::errors::DataFusionError; +use crate::TokioRuntime; use datafusion_expr::Volatility; use pyo3::prelude::*; use std::future::Future; use tokio::runtime::Runtime; +/// Utility to get the Tokio Runtime from Python +pub(crate) fn get_tokio_runtime(py: Python) -> PyRef { + let datafusion = py.import("datafusion._internal").unwrap(); + datafusion.getattr("runtime").unwrap().extract().unwrap() +} + /// Utility to collect rust futures with GIL released pub fn wait_for_future(py: Python, f: F) -> F::Output where F: Send, F::Output: Send, { - let rt = Runtime::new().unwrap(); - py.allow_threads(|| rt.block_on(f)) + let runtime: &Runtime = &get_tokio_runtime(py).0; + py.allow_threads(|| runtime.block_on(f)) } pub(crate) fn parse_volatility(value: &str) -> Result { From 14f48408ee56694ca17e6fa67408be626bb56fe0 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 24 Apr 2023 18:27:41 -0600 Subject: [PATCH 025/413] update changelog (#342) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8942f944..26c4eb18e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,8 +25,15 @@ **Merged pull requests:** +**Merged pull requests:** + - Improve API docs, README, and examples for configuring context [#321](https://github.com/apache/arrow-datafusion-python/pull/321) (andygrove) - Osx build linker args [#330](https://github.com/apache/arrow-datafusion-python/pull/330) (jdye64) +- Add requirements file for python 3.11 [#332](https://github.com/apache/arrow-datafusion-python/pull/332) (r4ntix) +- mac arm64 build [#338](https://github.com/apache/arrow-datafusion-python/pull/338) (andygrove) +- Add conda.yaml baseline workflow file [#281](https://github.com/apache/arrow-datafusion-python/pull/281) (jdye64) +- Prepare for 23.0.0 release [#335](https://github.com/apache/arrow-datafusion-python/pull/335) (andygrove) +- Reuse the Tokio Runtime [#341](https://github.com/apache/arrow-datafusion-python/pull/341) (kylebrooks-8451) ## [22.0.0](https://github.com/apache/arrow-datafusion-python/tree/22.0.0) (2023-04-10) From b6c21150d84d390418bf7514983771e483375828 Mon Sep 17 00:00:00 2001 From: Kyle Brooks <84413234+kylebrooks-8451@users.noreply.github.com> Date: Tue, 25 Apr 2023 19:53:55 -0400 Subject: [PATCH 026/413] Add interface to directly serialize Substrait plans to Python Bytes. (#344) Clean up existing substrait bindings to return bytes instead of List[int]. --- Cargo.lock | 2 ++ Cargo.toml | 2 ++ datafusion/tests/test_substrait.py | 2 ++ examples/substrait.py | 7 ++++++- src/errors.rs | 3 +++ src/substrait.rs | 24 ++++++++++++++++++++---- 6 files changed, 35 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2622625c7..db7c9a768 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -899,6 +899,8 @@ dependencies = [ "mimalloc", "object_store", "parking_lot", + "prost", + "prost-types", "pyo3", "pyo3-build-config", "rand", diff --git a/Cargo.toml b/Cargo.toml index 8c205a4cd..4c5203d69 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,8 @@ datafusion-expr = "23.0.0" datafusion-optimizer = "23.0.0" datafusion-sql = "23.0.0" datafusion-substrait = "23.0.0" +prost = "0.11" +prost-types = "0.11" uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" diff --git a/datafusion/tests/test_substrait.py b/datafusion/tests/test_substrait.py index 9a08b760e..01df2d746 100644 --- a/datafusion/tests/test_substrait.py +++ b/datafusion/tests/test_substrait.py @@ -41,6 +41,8 @@ def test_substrait_serialization(ctx): substrait_plan = ss.substrait.serde.serialize_to_plan( "SELECT * FROM t", ctx ) + substrait_bytes = substrait_plan.encode() + assert type(substrait_bytes) is bytes substrait_bytes = ss.substrait.serde.serialize_bytes( "SELECT * FROM t", ctx ) diff --git a/examples/substrait.py b/examples/substrait.py index c167f7d90..515311d85 100644 --- a/examples/substrait.py +++ b/examples/substrait.py @@ -32,8 +32,13 @@ ) # type(substrait_plan) -> +# Encode it to bytes +substrait_bytes = substrait_plan.encode() +# type(substrait_bytes) -> , at this point the bytes can be distributed to file, network, etc safely +# where they could subsequently be deserialized on the receiving end. + # Alternative serialization approaches -# type(substrait_bytes) -> , at this point the bytes can be distributed to file, network, etc safely +# type(substrait_bytes) -> , at this point the bytes can be distributed to file, network, etc safely # where they could subsequently be deserialized on the receiving end. substrait_bytes = ss.substrait.serde.serialize_bytes( "SELECT * FROM aggregate_test_data", ctx diff --git a/src/errors.rs b/src/errors.rs index ce6b3c28a..d12b6ade1 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -21,6 +21,7 @@ use std::fmt::Debug; use datafusion::arrow::error::ArrowError; use datafusion::error::DataFusionError as InnerDataFusionError; +use prost::EncodeError; use pyo3::{exceptions::PyException, PyErr}; pub type Result = std::result::Result; @@ -31,6 +32,7 @@ pub enum DataFusionError { ArrowError(ArrowError), Common(String), PythonError(PyErr), + EncodeError(EncodeError), } impl fmt::Display for DataFusionError { @@ -40,6 +42,7 @@ impl fmt::Display for DataFusionError { DataFusionError::ArrowError(e) => write!(f, "Arrow error: {e:?}"), DataFusionError::PythonError(e) => write!(f, "Python error {e:?}"), DataFusionError::Common(e) => write!(f, "{e}"), + DataFusionError::EncodeError(e) => write!(f, "Failed to encode substrait plan: {e}"), } } } diff --git a/src/substrait.rs b/src/substrait.rs index 2bde01123..361e9ac4b 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use pyo3::prelude::*; +use pyo3::{prelude::*, types::PyBytes}; use crate::context::PySessionContext; use crate::errors::{py_datafusion_err, DataFusionError}; @@ -25,6 +25,7 @@ use crate::utils::wait_for_future; use datafusion_substrait::logical_plan::{consumer, producer}; use datafusion_substrait::serializer; use datafusion_substrait::substrait::proto::Plan; +use prost::Message; #[pyclass(name = "plan", module = "datafusion.substrait", subclass, unsendable)] #[derive(Debug, Clone)] @@ -32,6 +33,17 @@ pub(crate) struct PyPlan { pub(crate) plan: Plan, } +#[pymethods] +impl PyPlan { + fn encode(&self, py: Python) -> PyResult { + let mut proto_bytes = Vec::::new(); + self.plan + .encode(&mut proto_bytes) + .map_err(|e| DataFusionError::EncodeError(e))?; + Ok(PyBytes::new(py, &proto_bytes).into()) + } +} + impl From for Plan { fn from(plan: PyPlan) -> Plan { plan.plan @@ -63,16 +75,19 @@ impl PySubstraitSerializer { #[staticmethod] pub fn serialize_to_plan(sql: &str, ctx: PySessionContext, py: Python) -> PyResult { match PySubstraitSerializer::serialize_bytes(sql, ctx, py) { - Ok(proto_bytes) => PySubstraitSerializer::deserialize_bytes(proto_bytes, py), + Ok(proto_bytes) => { + let proto_bytes: &PyBytes = proto_bytes.as_ref(py).downcast().unwrap(); + PySubstraitSerializer::deserialize_bytes(proto_bytes.as_bytes().to_vec(), py) + } Err(e) => Err(py_datafusion_err(e)), } } #[staticmethod] - pub fn serialize_bytes(sql: &str, ctx: PySessionContext, py: Python) -> PyResult> { + pub fn serialize_bytes(sql: &str, ctx: PySessionContext, py: Python) -> PyResult { let proto_bytes: Vec = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx)) .map_err(DataFusionError::from)?; - Ok(proto_bytes) + Ok(PyBytes::new(py, &proto_bytes).into()) } #[staticmethod] @@ -136,6 +151,7 @@ impl PySubstraitConsumer { } pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; From af1a8ebade3d537ce24f6f01a037a56177a917fa Mon Sep 17 00:00:00 2001 From: Kyle Brooks <84413234+kylebrooks-8451@users.noreply.github.com> Date: Tue, 25 Apr 2023 19:54:40 -0400 Subject: [PATCH 027/413] Add partition_count property to ExecutionPlan. (#346) --- datafusion/tests/test_dataframe.py | 3 +++ examples/substrait.py | 2 +- src/physical_plan.rs | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index cd78f3c88..221b0cc09 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -370,6 +370,9 @@ def test_execution_plan(aggregate_df): assert expected == plan.display() + # Check the number of partitions is as expected. + assert type(plan.partition_count) is int + expected = ( "ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n" " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" diff --git a/examples/substrait.py b/examples/substrait.py index 515311d85..c579751d2 100644 --- a/examples/substrait.py +++ b/examples/substrait.py @@ -23,7 +23,7 @@ ctx = SessionContext() # Register table with context -ctx.register_parquet( +ctx.register_csv( "aggregate_test_data", "./testing/data/csv/aggregate_test_100.csv" ) diff --git a/src/physical_plan.rs b/src/physical_plan.rs index 340d527fa..4c35f3e60 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -53,6 +53,11 @@ impl PyExecutionPlan { let d = displayable(self.plan.as_ref()); format!("{}", d.indent()) } + + #[getter] + pub fn partition_count(&self) -> usize { + self.plan.output_partitioning().partition_count() + } } impl From for Arc { From a1e2b8ba8a233c7ea2edde3918334200a5727226 Mon Sep 17 00:00:00 2001 From: Kyle Brooks <84413234+kylebrooks-8451@users.noreply.github.com> Date: Thu, 27 Apr 2023 17:59:03 -0400 Subject: [PATCH 028/413] Remove unsendable from all Rust pyclass types. (#348) --- src/context.rs | 6 +++--- src/store.rs | 23 ++++------------------- src/substrait.rs | 18 ++++-------------- 3 files changed, 11 insertions(+), 36 deletions(-) diff --git a/src/context.rs b/src/context.rs index 3dc8a8fba..0733a58f4 100644 --- a/src/context.rs +++ b/src/context.rs @@ -55,7 +55,7 @@ use pyo3::types::PyTuple; use tokio::task::JoinHandle; /// Configuration options for a SessionContext -#[pyclass(name = "SessionConfig", module = "datafusion", subclass, unsendable)] +#[pyclass(name = "SessionConfig", module = "datafusion", subclass)] #[derive(Clone, Default)] pub(crate) struct PySessionConfig { pub(crate) config: SessionConfig, @@ -148,7 +148,7 @@ impl PySessionConfig { } /// Runtime options for a SessionContext -#[pyclass(name = "RuntimeConfig", module = "datafusion", subclass, unsendable)] +#[pyclass(name = "RuntimeConfig", module = "datafusion", subclass)] #[derive(Clone)] pub(crate) struct PyRuntimeConfig { pub(crate) config: RuntimeConfig, @@ -210,7 +210,7 @@ impl PyRuntimeConfig { /// `PySessionContext` is able to plan and execute DataFusion plans. /// It has a powerful optimizer, a physical planner for local execution, and a /// multi-threaded execution engine to perform the execution. -#[pyclass(name = "SessionContext", module = "datafusion", subclass, unsendable)] +#[pyclass(name = "SessionContext", module = "datafusion", subclass)] #[derive(Clone)] pub(crate) struct PySessionContext { pub(crate) ctx: SessionContext, diff --git a/src/store.rs b/src/store.rs index 7d9bb7518..542cfa925 100644 --- a/src/store.rs +++ b/src/store.rs @@ -32,12 +32,7 @@ pub enum StorageContexts { LocalFileSystem(PyLocalFileSystemContext), } -#[pyclass( - name = "LocalFileSystem", - module = "datafusion.store", - subclass, - unsendable -)] +#[pyclass(name = "LocalFileSystem", module = "datafusion.store", subclass)] #[derive(Debug, Clone)] pub struct PyLocalFileSystemContext { pub inner: Arc, @@ -63,12 +58,7 @@ impl PyLocalFileSystemContext { } } -#[pyclass( - name = "MicrosoftAzure", - module = "datafusion.store", - subclass, - unsendable -)] +#[pyclass(name = "MicrosoftAzure", module = "datafusion.store", subclass)] #[derive(Debug, Clone)] pub struct PyMicrosoftAzureContext { pub inner: Arc, @@ -140,12 +130,7 @@ impl PyMicrosoftAzureContext { } } -#[pyclass( - name = "GoogleCloud", - module = "datafusion.store", - subclass, - unsendable -)] +#[pyclass(name = "GoogleCloud", module = "datafusion.store", subclass)] #[derive(Debug, Clone)] pub struct PyGoogleCloudContext { pub inner: Arc, @@ -175,7 +160,7 @@ impl PyGoogleCloudContext { } } -#[pyclass(name = "AmazonS3", module = "datafusion.store", subclass, unsendable)] +#[pyclass(name = "AmazonS3", module = "datafusion.store", subclass)] #[derive(Debug, Clone)] pub struct PyAmazonS3Context { pub inner: Arc, diff --git a/src/substrait.rs b/src/substrait.rs index 361e9ac4b..5d2e7a485 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -27,7 +27,7 @@ use datafusion_substrait::serializer; use datafusion_substrait::substrait::proto::Plan; use prost::Message; -#[pyclass(name = "plan", module = "datafusion.substrait", subclass, unsendable)] +#[pyclass(name = "plan", module = "datafusion.substrait", subclass)] #[derive(Debug, Clone)] pub(crate) struct PyPlan { pub(crate) plan: Plan, @@ -59,7 +59,7 @@ impl From for PyPlan { /// A PySubstraitSerializer is a representation of a Serializer that is capable of both serializing /// a `LogicalPlan` instance to Substrait Protobuf bytes and also deserialize Substrait Protobuf bytes /// to a valid `LogicalPlan` instance. -#[pyclass(name = "serde", module = "datafusion.substrait", subclass, unsendable)] +#[pyclass(name = "serde", module = "datafusion.substrait", subclass)] #[derive(Debug, Clone)] pub(crate) struct PySubstraitSerializer; @@ -105,12 +105,7 @@ impl PySubstraitSerializer { } } -#[pyclass( - name = "producer", - module = "datafusion.substrait", - subclass, - unsendable -)] +#[pyclass(name = "producer", module = "datafusion.substrait", subclass)] #[derive(Debug, Clone)] pub(crate) struct PySubstraitProducer; @@ -126,12 +121,7 @@ impl PySubstraitProducer { } } -#[pyclass( - name = "consumer", - module = "datafusion.substrait", - subclass, - unsendable -)] +#[pyclass(name = "consumer", module = "datafusion.substrait", subclass)] #[derive(Debug, Clone)] pub(crate) struct PySubstraitConsumer; From e1a8a3c623110b5214ce417c12431d2ae4238c60 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 27 Apr 2023 17:30:23 -0600 Subject: [PATCH 029/413] Fix link to user guide (#354) --- .asf.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.asf.yaml b/.asf.yaml index f27975c84..de776871c 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -22,7 +22,7 @@ notifications: jira_options: link label worklog github: description: "Apache Arrow DataFusion Python Bindings" - homepage: https://arrow.apache.org/datafusion + homepage: https://arrow.apache.org/datafusion-python enabled_merge_buttons: squash: true merge: false From 6642660dcfa4ab9e7c743b6537e0e3499f0923d5 Mon Sep 17 00:00:00 2001 From: Kyle Brooks <84413234+kylebrooks-8451@users.noreply.github.com> Date: Thu, 27 Apr 2023 19:31:05 -0400 Subject: [PATCH 030/413] Fix SessionContext execute. (#353) --- src/context.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/context.rs b/src/context.rs index 0733a58f4..b7f82230f 100644 --- a/src/context.rs +++ b/src/context.rs @@ -712,14 +712,7 @@ impl PySessionContext { part: usize, py: Python, ) -> PyResult { - let ctx = TaskContext::new( - None, - "session_id".to_string(), - SessionConfig::new(), - HashMap::new(), - HashMap::new(), - Arc::new(RuntimeEnv::default()), - ); + let ctx: TaskContext = TaskContext::from(&self.ctx.state()); // create a Tokio runtime to run the async code let rt = &get_tokio_runtime(py).0; let plan = plan.plan.clone(); From befd967558a77ab6048e5130354d29063c843b9c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 28 Apr 2023 19:17:31 -0400 Subject: [PATCH 031/413] Make expr module public for library consumption (#357) --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 0bb4d9a12..2512aefa4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,7 +40,7 @@ mod dataset; mod dataset_exec; pub mod errors; #[allow(clippy::borrow_deref_ref)] -mod expr; +pub mod expr; #[allow(clippy::borrow_deref_ref)] mod functions; pub mod physical_plan; From 0911bf31b85624e76e12eb2f93c71ad1caa6da68 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 3 May 2023 11:50:32 -0600 Subject: [PATCH 032/413] Add benchmark derived from TPC-H (#355) --- benchmarks/tpch/.gitignore | 2 + benchmarks/tpch/README.md | 78 ++++++++++++++++++++ benchmarks/tpch/create_tables.sql | 119 ++++++++++++++++++++++++++++++ benchmarks/tpch/queries/q1.sql | 23 ++++++ benchmarks/tpch/queries/q10.sql | 33 +++++++++ benchmarks/tpch/queries/q11.sql | 29 ++++++++ benchmarks/tpch/queries/q12.sql | 30 ++++++++ benchmarks/tpch/queries/q13.sql | 22 ++++++ benchmarks/tpch/queries/q14.sql | 15 ++++ benchmarks/tpch/queries/q15.sql | 33 +++++++++ benchmarks/tpch/queries/q16.sql | 32 ++++++++ benchmarks/tpch/queries/q17.sql | 19 +++++ benchmarks/tpch/queries/q18.sql | 34 +++++++++ benchmarks/tpch/queries/q19.sql | 37 ++++++++++ benchmarks/tpch/queries/q2.sql | 45 +++++++++++ benchmarks/tpch/queries/q20.sql | 39 ++++++++++ benchmarks/tpch/queries/q21.sql | 41 ++++++++++ benchmarks/tpch/queries/q22.sql | 39 ++++++++++ benchmarks/tpch/queries/q3.sql | 24 ++++++ benchmarks/tpch/queries/q4.sql | 23 ++++++ benchmarks/tpch/queries/q5.sql | 26 +++++++ benchmarks/tpch/queries/q6.sql | 11 +++ benchmarks/tpch/queries/q7.sql | 41 ++++++++++ benchmarks/tpch/queries/q8.sql | 39 ++++++++++ benchmarks/tpch/queries/q9.sql | 34 +++++++++ benchmarks/tpch/tpch-gen.sh | 53 +++++++++++++ benchmarks/tpch/tpch.py | 102 +++++++++++++++++++++++++ dev/release/rat_exclude_files.txt | 4 +- src/context.rs | 2 +- 29 files changed, 1027 insertions(+), 2 deletions(-) create mode 100644 benchmarks/tpch/.gitignore create mode 100644 benchmarks/tpch/README.md create mode 100644 benchmarks/tpch/create_tables.sql create mode 100644 benchmarks/tpch/queries/q1.sql create mode 100644 benchmarks/tpch/queries/q10.sql create mode 100644 benchmarks/tpch/queries/q11.sql create mode 100644 benchmarks/tpch/queries/q12.sql create mode 100644 benchmarks/tpch/queries/q13.sql create mode 100644 benchmarks/tpch/queries/q14.sql create mode 100644 benchmarks/tpch/queries/q15.sql create mode 100644 benchmarks/tpch/queries/q16.sql create mode 100644 benchmarks/tpch/queries/q17.sql create mode 100644 benchmarks/tpch/queries/q18.sql create mode 100644 benchmarks/tpch/queries/q19.sql create mode 100644 benchmarks/tpch/queries/q2.sql create mode 100644 benchmarks/tpch/queries/q20.sql create mode 100644 benchmarks/tpch/queries/q21.sql create mode 100644 benchmarks/tpch/queries/q22.sql create mode 100644 benchmarks/tpch/queries/q3.sql create mode 100644 benchmarks/tpch/queries/q4.sql create mode 100644 benchmarks/tpch/queries/q5.sql create mode 100644 benchmarks/tpch/queries/q6.sql create mode 100644 benchmarks/tpch/queries/q7.sql create mode 100644 benchmarks/tpch/queries/q8.sql create mode 100644 benchmarks/tpch/queries/q9.sql create mode 100755 benchmarks/tpch/tpch-gen.sh create mode 100644 benchmarks/tpch/tpch.py diff --git a/benchmarks/tpch/.gitignore b/benchmarks/tpch/.gitignore new file mode 100644 index 000000000..4471c6d15 --- /dev/null +++ b/benchmarks/tpch/.gitignore @@ -0,0 +1,2 @@ +data +results.csv \ No newline at end of file diff --git a/benchmarks/tpch/README.md b/benchmarks/tpch/README.md new file mode 100644 index 000000000..a118a7449 --- /dev/null +++ b/benchmarks/tpch/README.md @@ -0,0 +1,78 @@ + + +# DataFusion Python Benchmarks Derived from TPC-H + +## Create Release Build + +From repo root: + +```bash +maturin develop --release +``` + +Note that release builds take a really long time, so you may want to temporarily comment out this section of the +root Cargo.toml when frequently building. + +```toml +[profile.release] +lto = true +codegen-units = 1 +``` + +## Generate Data + +```bash +./tpch-gen.sh 1 +``` + +## Run Benchmarks + +```bash +python tpch.py ./data ./queries +``` + +A summary of the benchmark timings will be written to `results.csv`. For example: + +```csv +setup,1.4 +q1,2978.6 +q2,679.7 +q3,2943.7 +q4,2894.9 +q5,3592.3 +q6,1691.4 +q7,3003.9 +q8,3818.7 +q9,4237.9 +q10,2344.7 +q11,526.1 +q12,2284.6 +q13,1009.2 +q14,1738.4 +q15,1942.1 +q16,499.8 +q17,5178.9 +q18,4127.7 +q19,2056.6 +q20,2162.5 +q21,8046.5 +q22,754.9 +total,58513.2 +``` \ No newline at end of file diff --git a/benchmarks/tpch/create_tables.sql b/benchmarks/tpch/create_tables.sql new file mode 100644 index 000000000..4b2209c4b --- /dev/null +++ b/benchmarks/tpch/create_tables.sql @@ -0,0 +1,119 @@ +-- Schema derived from TPC-H schema under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. + +CREATE EXTERNAL TABLE customer ( + c_custkey INT NOT NULL, + c_name VARCHAR NOT NULL, + c_address VARCHAR NOT NULL, + c_nationkey INT NOT NULL, + c_phone VARCHAR NOT NULL, + c_acctbal DECIMAL(15, 2) NOT NULL, + c_mktsegment VARCHAR NOT NULL, + c_comment VARCHAR NOT NULL, + c_extra VARCHAR NOT NULL, +) +STORED AS CSV +WITH HEADER ROW DELIMITER '|' +LOCATION '$PATH/customer.csv'; + +CREATE EXTERNAL TABLE lineitem ( + l_orderkey INT NOT NULL, + l_partkey INT NOT NULL, + l_suppkey INT NOT NULL, + l_linenumber INT NOT NULL, + l_quantity DECIMAL(15, 2) NOT NULL, + l_extendedprice DECIMAL(15, 2) NOT NULL, + l_discount DECIMAL(15, 2) NOT NULL, + l_tax DECIMAL(15, 2) NOT NULL, + l_returnflag VARCHAR NOT NULL, + l_linestatus VARCHAR NOT NULL, + l_shipdate DATE NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct VARCHAR NOT NULL, + l_shipmode VARCHAR NOT NULL, + l_comment VARCHAR NOT NULL, + l_extra VARCHAR NOT NULL, +) +STORED AS CSV +WITH HEADER ROW DELIMITER '|' +LOCATION '$PATH/lineitem.csv'; + +CREATE EXTERNAL TABLE nation ( + n_nationkey INT NOT NULL, + n_name VARCHAR NOT NULL, + n_regionkey INT NOT NULL, + n_comment VARCHAR NOT NULL, + n_extra VARCHAR NOT NULL, +) +STORED AS CSV +WITH HEADER ROW DELIMITER '|' +LOCATION '$PATH/nation.csv'; + +CREATE EXTERNAL TABLE orders ( + o_orderkey INT NOT NULL, + o_custkey INT NOT NULL, + o_orderstatus VARCHAR NOT NULL, + o_totalprice DECIMAL(15, 2) NOT NULL, + o_orderdate DATE NOT NULL, + o_orderpriority VARCHAR NOT NULL, + o_clerk VARCHAR NOT NULL, + o_shippriority INT NULL, + o_comment VARCHAR NOT NULL, + o_extra VARCHAR NOT NULL, +) +STORED AS CSV +WITH HEADER ROW DELIMITER '|' +LOCATION '$PATH/orders.csv'; + +CREATE EXTERNAL TABLE part ( + p_partkey INT NOT NULL, + p_name VARCHAR NOT NULL, + p_mfgr VARCHAR NOT NULL, + p_brand VARCHAR NOT NULL, + p_type VARCHAR NOT NULL, + p_size INT NULL, + p_container VARCHAR NOT NULL, + p_retailprice DECIMAL(15, 2) NOT NULL, + p_comment VARCHAR NOT NULL, + p_extra VARCHAR NOT NULL, +) +STORED AS CSV +WITH HEADER ROW DELIMITER '|' +LOCATION '$PATH/part.csv'; + +CREATE EXTERNAL TABLE partsupp ( + ps_partkey INT NOT NULL, + ps_suppkey INT NOT NULL, + ps_availqty INT NOT NULL, + ps_supplycost DECIMAL(15, 2) NOT NULL, + ps_comment VARCHAR NOT NULL, + ps_extra VARCHAR NOT NULL, +) +STORED AS CSV +WITH HEADER ROW DELIMITER '|' +LOCATION '$PATH/partsupp.csv'; + +CREATE EXTERNAL TABLE region ( + r_regionkey INT NOT NULL, + r_name VARCHAR NOT NULL, + r_comment VARCHAR NOT NULL, + r_extra VARCHAR NOT NULL, +) +STORED AS CSV +WITH HEADER ROW DELIMITER '|' +LOCATION '$PATH/region.csv'; + +CREATE EXTERNAL TABLE supplier ( + s_suppkey INT NOT NULL, + s_name VARCHAR NOT NULL, + s_address VARCHAR NOT NULL, + s_nationkey INT NOT NULL, + s_phone VARCHAR NOT NULL, + s_acctbal DECIMAL(15, 2) NOT NULL, + s_comment VARCHAR NOT NULL, + s_extra VARCHAR NOT NULL, +) +STORED AS CSV +WITH HEADER ROW DELIMITER '|' +LOCATION '$PATH/supplier.csv'; \ No newline at end of file diff --git a/benchmarks/tpch/queries/q1.sql b/benchmarks/tpch/queries/q1.sql new file mode 100644 index 000000000..e7e8e32b8 --- /dev/null +++ b/benchmarks/tpch/queries/q1.sql @@ -0,0 +1,23 @@ +-- Benchmark Query 1 derived from TPC-H query 1 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval '68 days' +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; diff --git a/benchmarks/tpch/queries/q10.sql b/benchmarks/tpch/queries/q10.sql new file mode 100644 index 000000000..8391f6277 --- /dev/null +++ b/benchmarks/tpch/queries/q10.sql @@ -0,0 +1,33 @@ +-- Benchmark Query 10 derived from TPC-H query 10 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc limit 20; diff --git a/benchmarks/tpch/queries/q11.sql b/benchmarks/tpch/queries/q11.sql new file mode 100644 index 000000000..58776d369 --- /dev/null +++ b/benchmarks/tpch/queries/q11.sql @@ -0,0 +1,29 @@ +-- Benchmark Query 11 derived from TPC-H query 11 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'ALGERIA' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0001000000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'ALGERIA' + ) +order by + value desc; diff --git a/benchmarks/tpch/queries/q12.sql b/benchmarks/tpch/queries/q12.sql new file mode 100644 index 000000000..0b973de98 --- /dev/null +++ b/benchmarks/tpch/queries/q12.sql @@ -0,0 +1,30 @@ +-- Benchmark Query 12 derived from TPC-H query 12 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('FOB', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date '1995-01-01' + and l_receiptdate < date '1995-01-01' + interval '1' year +group by + l_shipmode +order by + l_shipmode; diff --git a/benchmarks/tpch/queries/q13.sql b/benchmarks/tpch/queries/q13.sql new file mode 100644 index 000000000..145dd6f10 --- /dev/null +++ b/benchmarks/tpch/queries/q13.sql @@ -0,0 +1,22 @@ +-- Benchmark Query 13 derived from TPC-H query 13 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%express%requests%' + group by + c_custkey + ) as c_orders (c_custkey, c_count) +group by + c_count +order by + custdist desc, + c_count desc; diff --git a/benchmarks/tpch/queries/q14.sql b/benchmarks/tpch/queries/q14.sql new file mode 100644 index 000000000..1a91a04df --- /dev/null +++ b/benchmarks/tpch/queries/q14.sql @@ -0,0 +1,15 @@ +-- Benchmark Query 14 derived from TPC-H query 14 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + 100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= date '1995-02-01' + and l_shipdate < date '1995-02-01' + interval '1' month; diff --git a/benchmarks/tpch/queries/q15.sql b/benchmarks/tpch/queries/q15.sql new file mode 100644 index 000000000..68cc32cb7 --- /dev/null +++ b/benchmarks/tpch/queries/q15.sql @@ -0,0 +1,33 @@ +-- Benchmark Query 15 derived from TPC-H query 15 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +create view revenue0 (supplier_no, total_revenue) as + select + l_suppkey, + sum(l_extendedprice * (1 - l_discount)) + from + lineitem + where + l_shipdate >= date '1996-08-01' + and l_shipdate < date '1996-08-01' + interval '3' month + group by + l_suppkey; +select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +from + supplier, + revenue0 +where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue0 + ) +order by + s_suppkey; +drop view revenue0; diff --git a/benchmarks/tpch/queries/q16.sql b/benchmarks/tpch/queries/q16.sql new file mode 100644 index 000000000..098b4f3b3 --- /dev/null +++ b/benchmarks/tpch/queries/q16.sql @@ -0,0 +1,32 @@ +-- Benchmark Query 16 derived from TPC-H query 16 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#14' + and p_type not like 'SMALL PLATED%' + and p_size in (14, 6, 5, 31, 49, 15, 41, 47) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; diff --git a/benchmarks/tpch/queries/q17.sql b/benchmarks/tpch/queries/q17.sql new file mode 100644 index 000000000..ed02d7b77 --- /dev/null +++ b/benchmarks/tpch/queries/q17.sql @@ -0,0 +1,19 @@ +-- Benchmark Query 17 derived from TPC-H query 17 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#42' + and p_container = 'LG BAG' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ); diff --git a/benchmarks/tpch/queries/q18.sql b/benchmarks/tpch/queries/q18.sql new file mode 100644 index 000000000..cf1f8c89a --- /dev/null +++ b/benchmarks/tpch/queries/q18.sql @@ -0,0 +1,34 @@ +-- Benchmark Query 18 derived from TPC-H query 18 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 313 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate limit 100; diff --git a/benchmarks/tpch/queries/q19.sql b/benchmarks/tpch/queries/q19.sql new file mode 100644 index 000000000..3968f0d24 --- /dev/null +++ b/benchmarks/tpch/queries/q19.sql @@ -0,0 +1,37 @@ +-- Benchmark Query 19 derived from TPC-H query 19 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#21' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 8 and l_quantity <= 8 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#13' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 20 and l_quantity <= 20 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#52' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 30 and l_quantity <= 30 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ); diff --git a/benchmarks/tpch/queries/q2.sql b/benchmarks/tpch/queries/q2.sql new file mode 100644 index 000000000..46ec5d239 --- /dev/null +++ b/benchmarks/tpch/queries/q2.sql @@ -0,0 +1,45 @@ +-- Benchmark Query 2 derived from TPC-H query 2 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 48 + and p_type like '%TIN' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey limit 100; diff --git a/benchmarks/tpch/queries/q20.sql b/benchmarks/tpch/queries/q20.sql new file mode 100644 index 000000000..5bb16563b --- /dev/null +++ b/benchmarks/tpch/queries/q20.sql @@ -0,0 +1,39 @@ +-- Benchmark Query 20 derived from TPC-H query 20 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'blanched%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1993-01-01' + and l_shipdate < date '1993-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'KENYA' +order by + s_name; diff --git a/benchmarks/tpch/queries/q21.sql b/benchmarks/tpch/queries/q21.sql new file mode 100644 index 000000000..6f84b876e --- /dev/null +++ b/benchmarks/tpch/queries/q21.sql @@ -0,0 +1,41 @@ +-- Benchmark Query 21 derived from TPC-H query 21 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'ARGENTINA' +group by + s_name +order by + numwait desc, + s_name limit 100; diff --git a/benchmarks/tpch/queries/q22.sql b/benchmarks/tpch/queries/q22.sql new file mode 100644 index 000000000..65ea49b04 --- /dev/null +++ b/benchmarks/tpch/queries/q22.sql @@ -0,0 +1,39 @@ +-- Benchmark Query 22 derived from TPC-H query 22 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('24', '34', '16', '30', '33', '14', '13') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('24', '34', '16', '30', '33', '14', '13') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; diff --git a/benchmarks/tpch/queries/q3.sql b/benchmarks/tpch/queries/q3.sql new file mode 100644 index 000000000..161f2e1e4 --- /dev/null +++ b/benchmarks/tpch/queries/q3.sql @@ -0,0 +1,24 @@ +-- Benchmark Query 3 derived from TPC-H query 3 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate limit 10; diff --git a/benchmarks/tpch/queries/q4.sql b/benchmarks/tpch/queries/q4.sql new file mode 100644 index 000000000..e444dbfce --- /dev/null +++ b/benchmarks/tpch/queries/q4.sql @@ -0,0 +1,23 @@ +-- Benchmark Query 4 derived from TPC-H query 4 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1995-04-01' + and o_orderdate < date '1995-04-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; diff --git a/benchmarks/tpch/queries/q5.sql b/benchmarks/tpch/queries/q5.sql new file mode 100644 index 000000000..4426bd245 --- /dev/null +++ b/benchmarks/tpch/queries/q5.sql @@ -0,0 +1,26 @@ +-- Benchmark Query 5 derived from TPC-H query 5 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AFRICA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year +group by + n_name +order by + revenue desc; diff --git a/benchmarks/tpch/queries/q6.sql b/benchmarks/tpch/queries/q6.sql new file mode 100644 index 000000000..3d6e51cfe --- /dev/null +++ b/benchmarks/tpch/queries/q6.sql @@ -0,0 +1,11 @@ +-- Benchmark Query 6 derived from TPC-H query 6 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + and l_discount between 0.04 - 0.01 and 0.04 + 0.01 + and l_quantity < 24; diff --git a/benchmarks/tpch/queries/q7.sql b/benchmarks/tpch/queries/q7.sql new file mode 100644 index 000000000..6e36ad616 --- /dev/null +++ b/benchmarks/tpch/queries/q7.sql @@ -0,0 +1,41 @@ +-- Benchmark Query 7 derived from TPC-H query 7 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + extract(year from l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'GERMANY' and n2.n_name = 'IRAQ') + or (n1.n_name = 'IRAQ' and n2.n_name = 'GERMANY') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year; diff --git a/benchmarks/tpch/queries/q8.sql b/benchmarks/tpch/queries/q8.sql new file mode 100644 index 000000000..e28235ed4 --- /dev/null +++ b/benchmarks/tpch/queries/q8.sql @@ -0,0 +1,39 @@ +-- Benchmark Query 8 derived from TPC-H query 8 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + o_year, + sum(case + when nation = 'IRAQ' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'MIDDLE EAST' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'LARGE PLATED STEEL' + ) as all_nations +group by + o_year +order by + o_year; diff --git a/benchmarks/tpch/queries/q9.sql b/benchmarks/tpch/queries/q9.sql new file mode 100644 index 000000000..86ae02482 --- /dev/null +++ b/benchmarks/tpch/queries/q9.sql @@ -0,0 +1,34 @@ +-- Benchmark Query 9 derived from TPC-H query 9 under the terms of the TPC Fair Use Policy. +-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%moccasin%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; diff --git a/benchmarks/tpch/tpch-gen.sh b/benchmarks/tpch/tpch-gen.sh new file mode 100755 index 000000000..e27472a3d --- /dev/null +++ b/benchmarks/tpch/tpch-gen.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +mkdir -p data/answers 2>/dev/null + +set -e + +#pushd .. +#. ./dev/build-set-env.sh +#popd + +# Generate data into the ./data directory if it does not already exist +FILE=./data/supplier.tbl +if test -f "$FILE"; then + echo "$FILE exists." +else + docker run -v `pwd`/data:/data -it --rm ghcr.io/databloom-ai/tpch-docker:main -vf -s $1 + + # workaround for https://github.com/apache/arrow-datafusion/issues/6147 + mv data/customer.tbl data/customer.csv + mv data/lineitem.tbl data/lineitem.csv + mv data/nation.tbl data/nation.csv + mv data/orders.tbl data/orders.csv + mv data/part.tbl data/part.csv + mv data/partsupp.tbl data/partsupp.csv + mv data/region.tbl data/region.csv + mv data/supplier.tbl data/supplier.csv + + ls -l data +fi + +# Copy expected answers (at SF=1) into the ./data/answers directory if it does not already exist +FILE=./data/answers/q1.out +if test -f "$FILE"; then + echo "$FILE exists." +else + docker run -v `pwd`/data:/data -it --entrypoint /bin/bash --rm ghcr.io/databloom-ai/tpch-docker:main -c "cp /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/" +fi \ No newline at end of file diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py new file mode 100644 index 000000000..ea830a1ff --- /dev/null +++ b/benchmarks/tpch/tpch.py @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import argparse +from datafusion import SessionContext +import time + + +def bench(data_path, query_path): + with open("results.csv", "w") as results: + # register tables + start = time.time() + total_time_millis = 0 + + # create context + # runtime = ( + # RuntimeConfig() + # .with_disk_manager_os() + # .with_fair_spill_pool(10000000) + # ) + # config = ( + # SessionConfig() + # .with_create_default_catalog_and_schema(True) + # .with_default_catalog_and_schema("datafusion", "tpch") + # .with_information_schema(True) + # ) + # ctx = SessionContext(config, runtime) + + ctx = SessionContext() + print("Configuration:\n", ctx) + + # register tables + with open("create_tables.sql") as f: + sql = "" + for line in f.readlines(): + if line.startswith("--"): + continue + sql = sql + line + if sql.strip().endswith(";"): + sql = sql.strip().replace("$PATH", data_path) + ctx.sql(sql) + sql = "" + + end = time.time() + time_millis = (end - start) * 1000 + total_time_millis += time_millis + print("setup,{}".format(round(time_millis, 1))) + results.write("setup,{}\n".format(round(time_millis, 1))) + results.flush() + + # run queries + for query in range(1, 23): + with open("{}/q{}.sql".format(query_path, query)) as f: + text = f.read() + tmp = text.split(";") + queries = [] + for str in tmp: + if len(str.strip()) > 0: + queries.append(str.strip()) + + try: + start = time.time() + for sql in queries: + print(sql) + df = ctx.sql(sql) + # result_set = df.collect() + df.show() + end = time.time() + time_millis = (end - start) * 1000 + total_time_millis += time_millis + print("q{},{}".format(query, round(time_millis, 1))) + results.write( + "q{},{}\n".format(query, round(time_millis, 1)) + ) + results.flush() + except Exception as e: + print("query", query, "failed", e) + + print("total,{}".format(round(total_time_millis, 1))) + results.write("total,{}\n".format(round(total_time_millis, 1))) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("data_path") + parser.add_argument("query_path") + args = parser.parse_args() + bench(args.data_path, args.query_path) diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index c7754f350..6d0fee185 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -42,4 +42,6 @@ Cargo.lock .history *rat.txt */.git -.github/* \ No newline at end of file +.github/* +benchmarks/tpch/queries/q*.sql +benchmarks/tpch/create_tables.sql \ No newline at end of file diff --git a/src/context.rs b/src/context.rs index b7f82230f..b603c010e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -224,7 +224,7 @@ impl PySessionContext { let config = if let Some(c) = config { c.config } else { - SessionConfig::default() + SessionConfig::default().with_information_schema(true) }; let runtime_config = if let Some(c) = runtime { c.config From 31a86eefddbb00600c95fc4b0a73cb82704d5d35 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 3 May 2023 19:47:15 -0600 Subject: [PATCH 033/413] Add db-benchmark (#365) * Add db-benchmark * python lint --- benchmarks/db-benchmark/README.md | 31 ++ .../db-benchmark/db-benchmark.dockerfile | 106 +++++++ benchmarks/db-benchmark/groupby-datafusion.py | 288 ++++++++++++++++++ benchmarks/db-benchmark/join-datafusion.py | 238 +++++++++++++++ benchmarks/db-benchmark/run-bench.sh | 21 ++ 5 files changed, 684 insertions(+) create mode 100644 benchmarks/db-benchmark/README.md create mode 100644 benchmarks/db-benchmark/db-benchmark.dockerfile create mode 100644 benchmarks/db-benchmark/groupby-datafusion.py create mode 100755 benchmarks/db-benchmark/join-datafusion.py create mode 100644 benchmarks/db-benchmark/run-bench.sh diff --git a/benchmarks/db-benchmark/README.md b/benchmarks/db-benchmark/README.md new file mode 100644 index 000000000..fe268199f --- /dev/null +++ b/benchmarks/db-benchmark/README.md @@ -0,0 +1,31 @@ + + +# Run db-benchmark + +This directory contains scripts for running DataFusion with the https://github.com/h2oai/db-benchmark + +## Directions + +Run the following from root `arrow-datafusion` directory + +```bash +$ docker buildx build -t db-benchmark -f benchmarks/db-benchmark/db-benchmark.dockerfile . +$ docker run --privileged db-benchmark +``` diff --git a/benchmarks/db-benchmark/db-benchmark.dockerfile b/benchmarks/db-benchmark/db-benchmark.dockerfile new file mode 100644 index 000000000..b21d3a0d1 --- /dev/null +++ b/benchmarks/db-benchmark/db-benchmark.dockerfile @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM ubuntu +ARG DEBIAN_FRONTEND=noninteractive +ARG TARGETPLATFORM + +RUN apt-get update && \ + apt-get install -y git build-essential + +# Install R, curl, and python deps +RUN apt-get -y install --no-install-recommends --no-install-suggests \ + ca-certificates software-properties-common gnupg2 gnupg1 \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \ + && add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' \ + && apt-get -y install r-base \ + && apt-get -y install curl \ + && apt-get -y install python3.8 \ + && apt-get -y install python3-pip + +# Install R libraries +RUN R -e "install.packages('data.table',dependencies=TRUE, repos='http://cran.rstudio.com/')" \ + && R -e "install.packages('dplyr',dependencies=TRUE, repos='http://cran.rstudio.com/')" + +# Install Rust +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +# Clone db-benchmark and download data +RUN git clone https://github.com/h2oai/db-benchmark \ + && cd db-benchmark \ + && Rscript _data/groupby-datagen.R 1e7 1e2 0 0 \ + && Rscript _data/join-datagen.R 1e7 0 0 0 \ + && mkdir data \ + && mv G1_1e7_1e2_0_0.csv data \ + && mv J1_1e7_1e1_0_0.csv data \ + && mv J1_1e7_1e4_0_0.csv data \ + && mv J1_1e7_1e7_0_0.csv data \ + && mv J1_1e7_NA_0_0.csv data \ + && cd .. + +# Clone datafusion-python and build python library +# Not sure if the wheel will be the same on all computers +RUN git clone https://github.com/datafusion-contrib/datafusion-python \ + && cd datafusion-python && git reset --hard 368b50ed9662d5e93c70b539f94cceace685265e \ + && python3 -m pip install pip \ + && python3 -m pip install pandas \ + && python3 -m pip install -r requirements.txt \ + && cd .. + +# Copy local arrow-datafusion +COPY . arrow-datafusion + +# 1. datafusion-python that builds from datafusion version referenced datafusion-python +RUN cd datafusion-python \ + && maturin build --release \ + && case "${TARGETPLATFORM}" in \ + */amd64) CPUARCH=x86_64 ;; \ + */arm64) CPUARCH=aarch64 ;; \ + *) exit 1 ;; \ + esac \ + # Version will need to be updated in conjunction with datafusion-python version + && python3 -m pip install target/wheels/datafusion-0.4.0-cp36-abi3-linux_${CPUARCH}.whl \ + && cd .. + +# 2. datafusion-python that builds from local datafusion. use this when making local changes to datafusion. +# Currently, as of March 5th 2022, this done not build (i think) because datafusion is being split into multiple crates +# and datafusion-python has not yet been updated to reflect this. +# RUN cd datafusion-python \ +# && sed -i '/datafusion =/c\datafusion = { path = "../arrow-datafusion/datafusion", features = ["pyarrow"] }' Cargo.toml \ +# && sed -i '/fuzz-utils/d' ../arrow-datafusion/datafusion/Cargo.toml \ +# && maturin build --release \ +# && case "${TARGETPLATFORM}" in \ +# */amd64) CPUARCH=x86_64 ;; \ +# */amd64) CPUARCH=aarch64 ;; \ +# *) exit 1 ;; \ +# esac \ +# && python3 -m pip install target/wheels/datafusion-0.4.0-cp36-abi3-linux_${CPUARCH}.whl \ +# && cd .. + +# Make datafusion directory in db-benchmark +RUN mkdir db-benchmark/datafusion \ + && cp ../arrow-datafusion/benchmarks/db-benchmark/groupby-datafusion.py db-benchmark/datafusion \ + && cp ../arrow-datafusion/benchmarks/db-benchmark/join-datafusion.py db-benchmark/datafusion \ + && cp ../arrow-datafusion/benchmarks/db-benchmark/run-bench.sh db-benchmark/ \ + && chmod +x db-benchmark/run-bench.sh + +WORKDIR /db-benchmark + +RUN ls && ls -al data/ + +ENTRYPOINT ./run-bench.sh \ No newline at end of file diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py new file mode 100644 index 000000000..7268cc872 --- /dev/null +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -0,0 +1,288 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import gc +import timeit +import datafusion as df +from datafusion import functions as f +from datafusion import col +from pyarrow import csv as pacsv + + +print("# groupby-datafusion.py", flush=True) + +# exec(open("./_helpers/helpers.py").read()) + + +def ans_shape(batches): + rows, cols = 0, 0 + for batch in batches: + rows += batch.num_rows + if cols == 0: + cols = batch.num_columns + else: + assert cols == batch.num_columns + return rows, cols + + +# ver = df.__version__ +ver = "7.0.0" +git = "" +task = "groupby" +solution = "datafusion" +fun = ".groupby" +cache = "TRUE" +on_disk = "FALSE" + +data_name = os.environ["SRC_DATANAME"] +src_grp = os.path.join("data", data_name + ".csv") +print("loading dataset %s" % src_grp, flush=True) + +data = pacsv.read_csv( + src_grp, convert_options=pacsv.ConvertOptions(auto_dict_encode=True) +) +print("dataset loaded") + +ctx = df.ExecutionContext() +ctx.register_record_batches("x", [data.to_batches()]) +print("registered record batches") +# cols = ctx.sql("SHOW columns from x") +# ans.show() + +in_rows = data.num_rows +# print(in_rows, flush=True) + +task_init = timeit.default_timer() + +question = "sum v1 by id1" # q1 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql("SELECT id1, SUM(v1) AS v1 FROM x GROUP BY id1").collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q1: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "sum v1 by id1:id2" # q2 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT id1, id2, SUM(v1) AS v1 FROM x GROUP BY id1, id2" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q2: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "sum v1 mean v3 by id3" # q3 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT id3, SUM(v1) AS v1, AVG(v3) AS v3 FROM x GROUP BY id3" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q3: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("v1")), f.sum(col("v3"))]) + .collect()[0] + .to_pandas() + .to_numpy()[0] +) +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "mean v1:v3 by id4" # q4 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT id4, AVG(v1) AS v1, AVG(v2) AS v2, AVG(v3) AS v3 FROM x GROUP BY id4" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q4: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("v1")), f.sum(col("v2")), f.sum(col("v3"))]) + .collect()[0] + .to_pandas() + .to_numpy()[0] +) +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "sum v1:v3 by id6" # q5 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT id6, SUM(v1) AS v1, SUM(v2) AS v2, SUM(v3) AS v3 FROM x GROUP BY id6" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q5: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("v1")), f.sum(col("v2")), f.sum(col("v3"))]) + .collect()[0] + .to_pandas() + .to_numpy()[0] +) +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "median v3 sd v3 by id4 id5" # q6 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT id4, id5, approx_percentile_cont(v3, .5) AS median_v3, stddev(v3) AS stddev_v3 FROM x GROUP BY id4, id5" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q6: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("median_v3")), f.sum(col("stddev_v3"))]) + .collect()[0] + .to_pandas() + .to_numpy()[0] +) +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "max v1 - min v2 by id3" # q7 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT id3, MAX(v1) - MIN(v2) AS range_v1_v2 FROM x GROUP BY id3" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q7: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = df.aggregate([], [f.sum(col("range_v1_v2"))]).collect()[0].column(0)[0] +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "largest two v3 by id6" # q8 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT id6, v3 from (SELECT id6, v3, row_number() OVER (PARTITION BY id6 ORDER BY v3 DESC) AS row FROM x) t WHERE row <= 2" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q8: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = df.aggregate([], [f.sum(col("v3"))]).collect()[0].column(0)[0] +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "regression v1 v2 by id2 id4" # q9 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql("SELECT corr(v1, v2) as corr FROM x GROUP BY id2, id4").collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q9: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = df.aggregate([], [f.sum(col("corr"))]).collect()[0].column(0)[0] +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "sum v3 count by id1:id6" # q10 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT id1, id2, id3, id4, id5, id6, SUM(v3) as v3, COUNT(*) AS cnt FROM x GROUP BY id1, id2, id3, id4, id5, id6" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q10: {t}") +# m = memory_usage() +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("v3")), f.sum(col("cnt"))]) + .collect()[0] + .to_pandas() + .to_numpy()[0] +) +chkt = timeit.default_timer() - t_start +# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +print( + "grouping finished, took %0.fs" % (timeit.default_timer() - task_init), + flush=True, +) + +exit(0) diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py new file mode 100755 index 000000000..1993a5c83 --- /dev/null +++ b/benchmarks/db-benchmark/join-datafusion.py @@ -0,0 +1,238 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import gc +import timeit +import datafusion as df +from datafusion import functions as f +from datafusion import col +from pyarrow import csv as pacsv + + +print("# join-datafusion.py", flush=True) + +# exec(open("./_helpers/helpers.py").read()) + + +def join_to_tbls(data_name): + x_n = int(float(data_name.split("_")[1])) + y_n = [ + "{:.0e}".format(x_n / 1e6), + "{:.0e}".format(x_n / 1e3), + "{:.0e}".format(x_n), + ] + y_n = [ + y_n[0].replace("+0", ""), + y_n[1].replace("+0", ""), + y_n[2].replace("+0", ""), + ] + return [ + data_name.replace("NA", y_n[0]), + data_name.replace("NA", y_n[1]), + data_name.replace("NA", y_n[2]), + ] + + +def ans_shape(batches): + rows, cols = 0, 0 + for batch in batches: + rows += batch.num_rows + if cols == 0: + cols = batch.num_columns + else: + assert cols == batch.num_columns + return rows, cols + + +ver = "6.0.0" +task = "join" +git = "" +solution = "datafusion" +fun = ".join" +cache = "TRUE" +on_disk = "FALSE" + +data_name = os.environ["SRC_DATANAME"] +src_jn_x = os.path.join("data", data_name + ".csv") +y_data_name = join_to_tbls(data_name) +src_jn_y = [ + os.path.join("data", y_data_name[0] + ".csv"), + os.path.join("data", y_data_name[1] + ".csv"), + os.path.join("data", y_data_name[2] + ".csv"), +] +if len(src_jn_y) != 3: + raise Exception("Something went wrong in preparing files used for join") + +print( + "loading datasets " + + data_name + + ", " + + y_data_name[0] + + ", " + + y_data_name[2] + + ", " + + y_data_name[2], + flush=True, +) + +ctx = df.ExecutionContext() + +x_data = pacsv.read_csv( + src_jn_x, convert_options=pacsv.ConvertOptions(auto_dict_encode=True) +) +ctx.register_record_batches("x", [x_data.to_batches()]) +small_data = pacsv.read_csv( + src_jn_y[0], convert_options=pacsv.ConvertOptions(auto_dict_encode=True) +) +ctx.register_record_batches("small", [small_data.to_batches()]) +medium_data = pacsv.read_csv( + src_jn_y[1], convert_options=pacsv.ConvertOptions(auto_dict_encode=True) +) +ctx.register_record_batches("medium", [medium_data.to_batches()]) +large_data = pacsv.read_csv( + src_jn_y[2], convert_options=pacsv.ConvertOptions(auto_dict_encode=True) +) +ctx.register_record_batches("large", [large_data.to_batches()]) + +print(x_data.num_rows, flush=True) +print(small_data.num_rows, flush=True) +print(medium_data.num_rows, flush=True) +print(large_data.num_rows, flush=True) + +task_init = timeit.default_timer() +print("joining...", flush=True) + +question = "small inner on int" # q1 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT x.id1, x.id2, x.id3, x.id4 as xid4, small.id4 as smallid4, x.id5, x.id6, x.v1, small.v2 FROM x INNER JOIN small ON x.id1 = small.id1" +).collect() +# ans = ctx.sql("SELECT * FROM x INNER JOIN small ON x.id1 = small.id1").collect() +# print(set([b.schema for b in ans])) +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q1: {t}") +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] +chkt = timeit.default_timer() - t_start +# m = memory_usage() +# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "medium inner on int" # q2 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT x.id1 as xid1, medium.id1 as mediumid1, x.id2, x.id3, x.id4 as xid4, medium.id4 as mediumid4, x.id5 as xid5, medium.id5 as mediumid5, x.id6, x.v1, medium.v2 FROM x INNER JOIN medium ON x.id2 = medium.id2" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q2: {t}") +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]) + .collect()[0] + .column(0)[0] +) +chkt = timeit.default_timer() - t_start +# m = memory_usage() +# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "medium outer on int" # q3 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT x.id1 as xid1, medium.id1 as mediumid1, x.id2, x.id3, x.id4 as xid4, medium.id4 as mediumid4, x.id5 as xid5, medium.id5 as mediumid5, x.id6, x.v1, medium.v2 FROM x LEFT JOIN medium ON x.id2 = medium.id2" +).collect() +shape = ans_shape(ans) +# print(shape, flush=True) +t = timeit.default_timer() - t_start +print(f"q3: {t}") +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]) + .collect()[0] + .column(0)[0] +) +chkt = timeit.default_timer() - t_start +# m = memory_usage() +# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "medium inner on factor" # q4 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT x.id1 as xid1, medium.id1 as mediumid1, x.id2, x.id3, x.id4 as xid4, medium.id4 as mediumid4, x.id5 as xid5, medium.id5 as mediumid5, x.id6, x.v1, medium.v2 FROM x LEFT JOIN medium ON x.id5 = medium.id5" +).collect() +shape = ans_shape(ans) +# print(shape) +t = timeit.default_timer() - t_start +print(f"q4: {t}") +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]) + .collect()[0] + .column(0)[0] +) +chkt = timeit.default_timer() - t_start +# m = memory_usage() +# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +question = "big inner on int" # q5 +gc.collect() +t_start = timeit.default_timer() +ans = ctx.sql( + "SELECT x.id1 as xid1, large.id1 as largeid1, x.id2 as xid2, large.id2 as largeid2, x.id3, x.id4 as xid4, large.id4 as largeid4, x.id5 as xid5, large.id5 as largeid5, x.id6 as xid6, large.id6 as largeid6, x.v1, large.v2 FROM x LEFT JOIN large ON x.id3 = large.id3" +).collect() +shape = ans_shape(ans) +# print(shape) +t = timeit.default_timer() - t_start +print(f"q5: {t}") +t_start = timeit.default_timer() +df = ctx.create_dataframe([ans]) +chk = ( + df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]) + .collect()[0] + .column(0)[0] +) +chkt = timeit.default_timer() - t_start +# m = memory_usage() +# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() + +print( + "joining finished, took %0.fs" % (timeit.default_timer() - task_init), + flush=True, +) + +exit(0) diff --git a/benchmarks/db-benchmark/run-bench.sh b/benchmarks/db-benchmark/run-bench.sh new file mode 100644 index 000000000..9ccc26804 --- /dev/null +++ b/benchmarks/db-benchmark/run-bench.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +set -e + +SRC_DATANAME=G1_1e7_1e2_0_0 python3 datafusion/groupby-datafusion.py +SRC_DATANAME=J1_1e7_NA_0_0 python3 datafusion/join-datafusion.py From 4175134cdf0ed5168282eb88feeb35a7540ac312 Mon Sep 17 00:00:00 2001 From: Matthew Powers Date: Thu, 4 May 2023 10:43:29 -0300 Subject: [PATCH 034/413] First pass of documentation in mdBook (#364) * First pass of documentation in mdBook * Address code review comments * Add missing ASF headers --- .gitignore | 4 +- docs/mdbook/README.md | 33 +++++ docs/mdbook/book.toml | 23 ++++ docs/mdbook/src/SUMMARY.md | 24 ++++ .../src/images/datafusion-jupyterlab.png | Bin 0 -> 86143 bytes docs/mdbook/src/index.md | 43 ++++++ docs/mdbook/src/installation.md | 62 +++++++++ docs/mdbook/src/quickstart.md | 77 +++++++++++ docs/mdbook/src/usage/create-table.md | 59 +++++++++ docs/mdbook/src/usage/index.md | 25 ++++ docs/mdbook/src/usage/query-table.md | 125 ++++++++++++++++++ 11 files changed, 474 insertions(+), 1 deletion(-) create mode 100644 docs/mdbook/README.md create mode 100644 docs/mdbook/book.toml create mode 100644 docs/mdbook/src/SUMMARY.md create mode 100644 docs/mdbook/src/images/datafusion-jupyterlab.png create mode 100644 docs/mdbook/src/index.md create mode 100644 docs/mdbook/src/installation.md create mode 100644 docs/mdbook/src/quickstart.md create mode 100644 docs/mdbook/src/usage/create-table.md create mode 100644 docs/mdbook/src/usage/index.md create mode 100644 docs/mdbook/src/usage/query-table.md diff --git a/.gitignore b/.gitignore index 4e4450082..1d0a84a43 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,6 @@ venv apache-rat-*.jar *rat.txt .env -CHANGELOG.md.bak \ No newline at end of file +CHANGELOG.md.bak + +docs/mdbook/book \ No newline at end of file diff --git a/docs/mdbook/README.md b/docs/mdbook/README.md new file mode 100644 index 000000000..664b4b4ce --- /dev/null +++ b/docs/mdbook/README.md @@ -0,0 +1,33 @@ + +# DataFusion Book + +This folder builds a DataFusion user guide using [mdBook](https://github.com/rust-lang/mdBook). + +## Build and run book locally + +Build the latest files with `mdbook build`. + +Open the book locally by running `open book/index.html`. + +## Install mdBook + +Download the `mdbook` binary. + +Then manually open it, so you have permissions to run it on your Mac. + +Add it to your path with a command like this so you can easily run the commands: `mv ~/Downloads/mdbook /Users/matthew.powers/.local/bin`. diff --git a/docs/mdbook/book.toml b/docs/mdbook/book.toml new file mode 100644 index 000000000..089cb9a97 --- /dev/null +++ b/docs/mdbook/book.toml @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[book] +authors = ["Apache Arrow "] +language = "en" +multilingual = false +src = "src" +title = "DataFusion Book" diff --git a/docs/mdbook/src/SUMMARY.md b/docs/mdbook/src/SUMMARY.md new file mode 100644 index 000000000..b4908994d --- /dev/null +++ b/docs/mdbook/src/SUMMARY.md @@ -0,0 +1,24 @@ + +# Summary + +- [Index](./index.md) +- [Installation](./installation.md) +- [Quickstart](./quickstart.md) +- [Usage](./usage/index.md) + - [Create a table](./usage/create-table.md) + - [Query a table](./usage/query-table.md) \ No newline at end of file diff --git a/docs/mdbook/src/images/datafusion-jupyterlab.png b/docs/mdbook/src/images/datafusion-jupyterlab.png new file mode 100644 index 0000000000000000000000000000000000000000..c4d46884e93a077c7ae8e3a13f4077854709b2d6 GIT binary patch literal 86143 zcmeFZXIN89_dXmzKtaR;qI4?)(xgcVMd?T{p%)S9gx;$t2uM*t5Tr=20ci<@fQSl$ z5PIkV=_T|KAe47=p5k--9l!oRy6=|p#sX!nQjk1!Q zHV8!C1_F`Ro~8iK_#uMjK%lcSb`KwFDnES4uIUDSYUgMT0x3l%K`0G$zF*ETeiJYA z;0!zat?k<{`$5_dY)Nt%niZ5tpI>~;UO3bvPmbZeT=AjgdKAlfiN`itjzbL8r0w+v z72Hs58BE};|5l*;eADs!J@h+wn^hE9$UY~`;C+}qD4v@u{!92N{%^Mn_?1jSl*J(w z??Wh)llh*?D=LBvC`Ox-z2Tti4&@SU)knj}=Fzv!0i@ zfMpg}WSkPe|8-bd$?Id)s>Esd)nh|Astc9RS=qSq!!<$F~# zY>@I}!b?6nlB}+fAyHA$I9gY(FCyIvXFop5e76&({tYYC7cNvc_Uc+`N_0cvDXOin zVqe#JJnXUbFCUQ7?HgFu-@LDHXP%lSec#;CJwN_Z(HBap zd(#i;_nK3EO?(erc!%^tUncPv-e_ZE61dOM$j+}M;9Y9WuJTf6Dd0TAsV`jLe$u(B zSNF1tUYdE{<|Fvhbc=dA(Q4xhDT1AT-=`kIE^c;#!z}SDO|<@(Fg~&iAvB~UWJ;E5 zWWMawr;YuW{io8iIj*>bo_YczanZaNcH_+q__|LFnf#>2%Tw&Zwk9DB5D?Y1W6zg* zCM+`b4RB4V>r9ei_ZgmtG5hc(#a*QFRXwF~?Z!cbH*cxr73jIr%A;U3Z6)^VVfGOuU#xvGmbu0KhLMSh=kG;Ej z_w51Q?3Hd3d{|PQ5>bEwLErDuLgvC&crMA+bPRNV64p&iO$f^vXYbJe#?wJH&4Fv` zoQx>=P<=zD`1qbzya|x;b4S z`Lm)*Zy}5M$O7f(SKpDJy?k)}^cCg{XxCFM!{3L=LBj<3jeWmMH;$dz>v^zszQLeL zcQ*@*$pvrTm75?UcydR2Grh|4vql6Lh#_`8_ss+Hhz{OUyKglkB-Gfhkhxos%}<{l z3l&|blx06F7wQv#_V(um!71_N5Kaq<6lxWjs|FPMVR(y6LsVYRz>}at%GOVIF6T@r zAkAtnw6F~Ib<*un?@7UD>^GlBbzcbla_Xbp$_1O}k-OJrl}=rZzImfaiQ@W$+_tk% zShd4TWd?5;$J;-Vb~_~&VOrKMl?%u5)uBI|#!7<|U#0|%_)AOXXP)aD1!>oOJyctbpcoKziSc!mY z&V%Wf3P^>f+otY`PZOWQ<{o#NrZNa4Ym|)ijbx1i%Jz(mYr1T~AFMpw#nZZS&^geY z%sg-VOEqORVk3ql)+7Ga#x+EbY7Z07i=O#Qx(m#h#qO^jWgcSnV)d$?v_Q>tf?~sxL5t=Uhs+leFcc%A=($oG1KH;hVxWg~vIb3O29&VmxCGW2#<3u4eMD z3VcZ0O+Brq|L(nzZ&$Zv#TBtvHLsp3WEOp}D=g~wh_)+Pv=MZuI#ueM10Ddic1n6u^Qm~Ec+?1;YGwMw)G81g zZa-}!IGo=;7tYi3)-KHn*}PNCVn6>h*$!gq{gt)G8_qBSt$;GgUYQbmAe3z{hYU+4 z>-f<9;oH>f9GKN}tB+Qw>5kWzZ|ErSviSzhh;oP)Fc&zf6BO)XsF?I5i(UI2Y$R+l z++S?bFchm+bT}qcBtu|3Fj7nm3_iZ#Y&CWnAzT;eNpML}6Tc zv+%OCVV%zepE*1uJ{WtDAMu{**7NWPURtO5E3Q88?bXdw6o-CP%(>L8Bi8X#aW_`3 z-@FiG)h%FYr5{_zW_Oh{sx*#N>wRI87HxJ}4u>LJ%(ZWFt-4<%qgm)BZtMgwgPpa< zUjMWc+r;xwjn_44r> zLk+?W3<1GMTtRW46e}KdC#@zlUPno^a9CL@Xylte^?h0%&tt+DkVKza!?_&jrMnZm z68b*(J*tkYuBc8?&M4ba%RR$&qQ61pBMKomCNS)2RsHjOy8!pHzJPub%n#|;yA(YOn@tB=zhA#3<3HJ=oqZ#r{ z>Q-JqQ%Sc--=d>R;%xn8_}i8y=Dcy+_)p98&MwX- zjaPlEaQdIiMsd@l63$ytZ|wKFttIEFaWr9WjAM*1Fl!_0;KNi)v9#my(CE~J>w(r} z&f8YkxHa$;*mpzdcrm$a4_%0EM?R`AMy4a<@TUkSfl{l!^VA#BXw`y<1>-nrQi zO$W#Ji%g@KJ(W%SN&HXIeW+K1XW(c*+xYRi=_(SvrYgM3AmCb{ zA#q^GVH&;;B~ZFiQhYHv13BUjX^zS-AY1pN1ob}wEvyG9TrIeN{GB@cepUO0T@3?Q zg+vB#kp0xL|0BKwq#dONR>bGv+5XZr$;*7w^J`}`8InJU`{*ZRa$CPRjV4L6YRsR_ z4-M9#v*}{{c~nP!PY<&0PUL%L9S|q;UEUQ4DWa?ml%J}pfw+M3Y0xPWMi4nrA^|>9 zBuxJ;E0Ej)k^Oa^6a6Udt!Y$JV?)PA(_U0ZICZ14Som4@-6*Cr4*@aUbyYziNmB<&)EV*V+H7 z;^6?kZlI>g{t)VB%`U>r&&z*Zii(|`UDEBzQ*muMg@4};9D%RfdU&{q^YM9md-Hk= z@?D}uS(Gr2o>X? zqj!HG|LO{Vmc{3g%*S2V=UzURk>{trn20?7%xLe2w6S?=W8-Cmt&e-8(Yj<#*q&I^ z#TQYyb46*LmjRKS`p2(A8&aCn@t7*^uEpyiAkx$9|L}F6gmh-?^gmuYsgBnU0r{dD zR^AK!^X)(#61UYqyk7c=S)(qLxhrOx=LIw2r2!XMhk9wLLHW0x`Gjw+bo$WOp)JfNB_$(!#F&Ni+SIftJ?Hs%X;wvAfxTdGK+G zQa=2XbrkXdEp_Nf%3}Rvcjf5;6V-J-7yd_y8J_}-=qbL<;sV58IL3VTj}ogr z4Hz;HXO8ze{5i)N|ru(+i`~Q_|YMO&88HEh+5 za9p~$^VV(EQIw)&`hM4`f7D6d#XEK(2ZY6@5!3RsH3Dps_Q3~-X@^_pE+$u~Vx_%S z9P-?XbXxT+8Ul$sur8DzzOt%IyhMFK8q*$hh{t#h=s|{=pbeJN#|Q5ATdZKep%4-> zZLGue7cE5=Ypd4Sn%TF;T^T+np?o6Ljuz8cj!+I8?XH8@o?Q{am?szt^g z_LKZw^jXfQ6NO}g+IYZ@pEWYSs_^h*pY43vfPUk(dRi~@k#vTt_AHag50-$Evf9d; zK_hO|b@^Qp>vp=)$I(2G<8&EFXMk?jFqru?zcur-BVvz_82njhMG`>1tB~y~Ht9;m zi8{U-%QS0DtrKQM7>~-oO-IKjWE+p#?p2M97;>!YT-jeP*Dx{TrCu6=dX7}vdCk4E z8A;HPy6S)CvZ;p_a`G8BYE9;;zlc* z&Q`3{zOBScMGK(y4mqdY(KqGtrZE4;`5#_?vu>HXGl_R)j@l5!m8gO)RbLTP9{~wC zk$%wrBmG#z9%H9XG*6|Rz1XGiq~g*F&`|7 zp)T66<2b4PHs`w8%l?2)mgGYkUGt!W}wH)_-}qG%j-1Isi3Vb2=ZV$B|oJMsqyp8Hiv?RT9fy7c-G zXCi1F=T_d;Ds+g{&fRku9t#v!H8#hi@L!unY`T~o^tCu}5VGhX;aTd@CwLy*r_R_) zKJLvzhYY{(ueP`M84f=5%8|+dYb=eCV~N;|ZD{MHYi^jhzi=+0*^V z&B5i^K!RzSklh=mF}JVukZR#UgBJV~eV0<%Fp8w&B9GfuHfXOc&jbH86h@7kc0+u* zG@-VUluyO?BF~b*VLNyE&~kZ`b1^)&sb5>waCxxI2|oaVJHf|AK5`)ZO1MB%9l{us z>Dx)-GC*h{53Ur&25j}0VuO#zS$!r+2!Sm$qOWit%K|1dgOBQ{DRx%JC2E}RZ=j`* zhjcW+vj>fPb4-2gKE+KGO~%28c&9!!QBQ1((?RvKy(QFD9>xV1jA>N|Cy(f!-6X0= zr(lGur1bGIA^3QHOg93t`JE#Z(JWK*$WD-s*?+Zeeq{h+E}R}#TJMERO+ka5GwXgJ z*YKk&1=6=$ohv>1-qsTLiLJa$M@u|ATjn}rmAH{L`x_I8mhDp8QG1rr;Di>Z9`dvy zHMFVqTEJmFPYXD>iopeyGfm{)D*3793b97P4B?$`(9^zYi&Q7oMzf8XXk2 zUxc#~-fs}gx}a)AO{m@B9R59V6}jd(_v0Newuz!>>`8)JsZN1|jgDeB!Ir5%>V(J~B@*K*zBdJDrZTACZTNIu#0Bo)lpe*7 z;7bTTZr*8VEIGE1VH8o3h|o(dcfM4hu9Q~bxhm;c%`+|pzMnaw%z9~`Z3~!l5!8-~ zqS8NK-dL$$hPhYK3rQ`PR`u#@X&xr(s4J&29A;Y-nRP%WMeh2UN{?oP68$p5<(r2P zS-yx^Z4wi}H`ey5aKES;uU;zKx6#QC^Wd7yW1Z(JNcT#`p$HR6Uhdzn+6)zH<%}I5 z8rm5{)MU9c8?$=PaXo#)Sk3=kDos}0rZd$VV&)P2o*TcvLxr5LG1l&&mkdT_Pu;DL(eJL3MZdJ& z1J+1RlQ%dZj*vRbmU90m$qA0|SmIu}U8Z-VakYbPEMq@yLnQ6e77lXYQUmqn5HWTpQ@*sdr8B6F zZ%hUHdga#mi@F+)wB!T0*#@4E za()xO4ywcf|`iN}lmGzG9IWBrmQy6CXkLhO0YgBA&;n+6Am@J=+mT7`Jg6sopRjx4FVj zpr+e3@tsY=0jB7J-`UCx+!@w|*-4kzXTCTcZ(|(c0_w00X($dfX~gi@(L8paJ8&SL z8=h!9#vzMEY9`WHwKi&%uqw;)WHU+nr=Rw}1=abDheL(Nf{zbZ;f%U9?aKIxabWV% zUS7PKXOon{wsh-ou^_YF+HYCDdBU~`@NJ>Jyr%WJjG^1_`;2b{g=m2tw*A37Lsq!S zFxptwkw7#7&WQV3Ha|LO48_HkWlSFJ<=MQQkc-^887D%z1czaZ2Qh17$i=;!i49BI z1Hi(D&w;l!A;$=t;FU-UK~+O<;VZiH&}j7=4K_3RS%iBXcdahne#Cz`z9qki*cRo! zyTk%(qUK^;_wCM-E&A$btU^O;@7NQ><~wtNu!85N%U%sK5BLq9NHohTB-x~919u*! zZWKr?qk|6k%OC~Xr>hOUaEWb?qwhL%IC?uTAv0g@uoJchMm>_|98$Ea?i_yamkxGA zJq)DXtYTfU;S1V%Wj-G0toyn*PwT5iR(XJw2#-tS*hTIUTp;U53i@LPns^7p)+CWy zEhw!9P8Buo?0XVx=WV)2+O!VzG?n=wQM7LgF*A+C)wGd7 zlXy84BYkd>F&O^prPI$Z7OO0O!PXI0f= zXDJNTZ)jI;|HDHD`DahGu)g!@yeu)&;f>}VJ@Tt1z0)Rv_<^9;#8(t;{gX+81w-` zl(D;DbT!nxm}smcev!reR#CGGS|!QW{*I{NT&81i3Dx6e9#(o`z;p+z?Q{sH^JVij z&?kK!yA6XXmLVo+o5R|`3>TVMzqV@;LkQRDv7>AnhN(3ZRJz{0i3;a3>!4s}6OaA& zI+U!?6|x9F$f_KHAI>)^k|)O+sl9^*zjyUN&02sqFYi!D^7P)Ni1R zeZrT_gB=Y2{M^MV<898Bz}QPBn90A|J0? z1Wk4#^WWP299xA9J#}!z%?0jED|!t%E$)>bZrX+%yh^7S)~63Ga^hPyc-<5bK5Eu_ z-)cTujf9tcRkxqa6x>r{Oy{Gmp_C~i1BTJ_(?lOdpwKj|4D7U+m^6K~a%RxX^GoV3 zobo>hOCn`Lo?W2gDk{VjXwYBs4(Q~58e+278#rm)EQwWmtqXUYvtjbki8aQvLzI{B z#S?F&@X>Bk+~`U^%gMXE*5Q6i`?6P)phmt04#fcnhyGq)Do{E|w1`Zj8q#!ShNTPM zr@(Ai9wY#74d{EpP^Hz{U=bI?W|<5YfRm_&E(vgS(FA=XZzFn2jC&b+Yj_`wa(%m` zsghqI*}ry6G&XeZMUz7y263v@$-VYA4p==_g*mUTGGfPGMFoGJ4Kf309`b$Gj_X0* zBX44icL&C+f=$Cz#cQ%4* z?@pFx+-kIOMm!#}pr?5ZA;9y;ut%)we#SRTRYm%SkMwCJR`^AwLU)QLy@WvJ3J+%M z^fyqS$CHnb&XCQh#Lb4)2c>gl_k~yVU^ip!2zhy`9(_b zW|997=ad(JN0c-lgo?AV)B0Q77L(dLdML8+QBXS?V&G_9-8N4^Vw$;MP90Ed;#TbvQDn@6DrWNf*Vx`x5>?IsBH+*A$qAK;c zDFN#V4Ui+7S@J!!m0#PWpkcHZP7_#$j&8Km=s^8!C*sLg-N~M+7hTb<#&~Y|2|E~G zk;D-A`ODWB!zS~?di$8alDDy~9#;y(u2=|-YDaOQp6CVSxHC~UD{RJ<@;4h+UL zs96ir`1~Hnn3P}O^^#5c-vFz(RQ-FPz2-DFE23kfZ+x?DcPXS`g1&jJ4!n8b8A;J2 zYU1gRv>-GmkIxHPPc|U_<%{|J<}vj9R0acpDwX^ zgpf#(?-h0kyQyc)Wj5vxp*CBOw1CEWCU&fU%fprUuESo&i~yPaB)*KQKE)qQKh}G! z%st6`6oQvka)ew6b)m-Q-YA}IxI%zfdFw#!_a);IMASBSE7tvGCJWO+EJe>W#idw^ zwk_YZTwM~k;7!IGGR^XNx3nxo#Y4&_4D3d0%gn(%YD)G`=`SsLHUgR|7%T=>QFuMF zY({K=R!9{P5yCRsUrgjG@6gbVD2g-<@M|*R=+~A%0OoSIduA!G)RjE|a1PV0apj2= zGXPwmPTrUU-L-X5$-As223Um?uom4!mTgw`d(v{JEMwoUJ3V)iewQzZ;>-z69k{a? z&FI4Fx9~wFhCax5_npIdi_^Q`auRH=)XJ5$Tm3N(*XFZA{Py8fi|(j`}!l(tWP!QI>uGLA6O<}r7igM zQE>zUt4;#SEY~B;D}ofLz@`_5r9qK>0XHrMwyDW(#Hwl9?g?zFz-nq(8i!L}Nku#X^JG*86vqaDzo@7Q9=p@shEdwOIoVJ5(bD0(M_iZ&NuWXHk+y(L_cBtfqF znKe8zFqLJ|Vr;R>cH>!h!={7Hjv`sE@piEzopEgH1VS?>i^A~LQT~8IfzXxVM8}{K zswHUsV!pHYJ8gQ-!&kkHg>vqe5DppEbGq+1W!BCy|4Ro6fDT+-mL5UDFIwA#%INfZ zLSq$Uncs=!+=+bhwjIsk?C$ck5~ebg>wodK}Ibk6ncH~mOsGGVU!*?{p$6IZH=s_!mcD9CW_ z$;RvtAhmBTUNL&>z|MYltfaNb@3E+Dw+vDx$tAw)0cBfDMsiR8sYX(&A)-lJk436E z$9~aPZCE3^A6@YWAOXTjMNbK4KT;Kaj5*1&D8}MYM*qh=9V=}EpFPi0QoAfd}%)@B7cUd5b$3}R~o;C4+ z>0EJ=%?bttBTwQvj#1XlVpYy%*fjCYGWIfldyt8r%YGfT{+ap^=5}FV}QSa6yO zp+fu2cVV6HHLLG-zTW#A01gW6J>4G-@)a_7%?XcLnF``dqgdB@r?V04Z-x;TUdW%r zF9ww2YNljwt;w>^MXI&;fFFN3t#i&k$u`8~n9Q70Mt3{)Sb@oDCch>4Gv-*3ysagq ziUP0eBBpbOjFL)(#dqd)l0GdZhT9|L0FDh3%#+a)n+zcvn+&xir+beD@D82w0UIWE z0r=!!^g=uw=#GD*6)sv3IqeI9<9b>w(eNTZ1ctXjeq#6&~- zPG~oO=fgK;VeR5C_zcK?EX)*{znK%^Jd!U?;^wrrT(qdlhw4uZ=Ni>ssweu*?Zf3q zPf}g5P;HFjwA|f_PozQKhQaoq?1lI5Ou}Rkt2Dy*c6G)4tbS%k^R^?J0+updBks!m zHj?UPP^aV3DR?*@QCp>!DaJ$I0D$&hgS^Qj&$f7MF;d{R4B}0&`7SuH)gU^bN0pTw z@*>ID=s3qzzOF)8CYO(D)80aatvZULttFH>seFGXL1QPa9t|Mb^(2!48Vip|+-RO- zK3XZlP4hiSk3g1uO^8YO5{wlW#NUqaJZLaa zJRuf}TY7XuS0%hkc^cL~uOg2)twRo4u^hu#!GeON&Ec6i+%rXWCPB?TG;Ly9s_1#Slcsttk*3Sf(Bx5!Ybz z9@+p7E;V3K_{NEr00hPD{;U3l4p~|>6-;mEGyHi6?iXT9d&{2Z)dSPY_{$7##s*Nm$!kokUu|TqJ96srN;Ywev;#e+&pS5?Bt^zOw%HC@y4eQ+kW1M(w@JEP zc~vK55AISO1f-X=q3d@DN6yGwaqe$4#{0NFGyFEV-%taCyDl+@Y~A|J+CUMUrE&I* zbCPy}f!3|hW`k5xoP{5|#FtZ^Y45Y)skZk4hZ}(!rK`~9SCCDWzijwrwol(eHcWQ^ z4Up0E3-y3h0>Z{>lR)eTE#R=&Olpx7`>5e2<&>=V;V_)UEaEqP3q zUWxwH>Lb)~(!A(hsx}7~HB&|W0j+AnZ8dFKXL7qlzG2aT6#xt3P00z>qhHY)Pm+3Y zi}wzs9dKeVB1@joA8#*?HT{f<33Iq9ld;&{vvKpWR>+wogO`6M#%a{~TZ%oV#W7*%LoC zr_+~U)pAS2WddZ?kK{>a3NnMl%RVY^m24T)FqKsEq13YH#}2m8P(IZ58zzYdTYCL{ z69vNWBntMyZ8e$eVg>UDKyqbupEaJdBDYpgZ*fE<08Gh}F9g?s+>EGk8J?y>A zc_pt|J|XIvJe9(oH3v1oHE&LKKctNTH$3a_;6*yxvp3m-zEa{Ii=KPRHSkKiOD@{4 zhs!73No=Vw$K?E2dAUrz-~Iip+)TEhc8$>Q6$HfFOYE5?xvYbYihc|Ei(fAres*5l zdMRvJXg87#AdT>nR+D%UzI}Nj%5BZqDKAeynwLu{#6&_5iYJ<7jy|mImF0TcuNxw{ zYA_`!Tp^FO?RF!7C%oV^WWnIfpk5Nn!7IZGyCj;vZA`7B{MBiGz9>0hzAOC@({JM$ zT;f-yNE7rf%I|(j54+Km4`w*79-H{N7cuX9ntf_a;)A4TN z$^2W+7(|-KaL4ekq_Jn{O^tY`iUO=D3WtJXchZLI#>*=@2ZzM*sNzTtY;YUQXHzmw z%wJ)oxLtL^KcTS}{Vl>MEb4g6dMPuZY<`mWsKoT&Sp&x=Chv;1puLe!2+VEvZqv?U z^hz^)?p$x}qbhwb%#=oMZOQ6$KRT>S3r!I#G$@#gi+dr5)sbsSw#dTI;OUQ)sm(YC zVnt!W2ft9R)1yT?9Pm555N||G{y>=T?)nt(`O6fyh0YiF&B{kVlU8v(4=eHPN1B8Y zi)`wiH7kn^wjAzd__;Rkx}aRd&t4|(x#`fl2s7K(sa%oi@y5R&a5!Mp{dq-C}&58QZ`F8=ehsK&A6$T7Wf?se1l7_0IMf3q}Eb}KZVAWD-fwq3`K z%M3nNE^7kPsUx@i$5K+V_O{|3n5JsWqp0I z<8ybLd0{#`*rjJlbCpTM%6wKSEi@QN$8CG5W(uUyhjq&g8w{m=n9SA^~8%l zK;;b>Ok#OAI<||#X}}ISMGDoW{hkmOUAM#0Y$uPk!VC9{TxEij>php>s)PzVjR+CK zp9kl2fZ1x+Dw2KC8DJbhgBe^I0%k2M?=O#jo3*m30aAw9wqOjiOFLVFnry}Jao0Wa zj<<%6b=9AIp5TqPJO@h+^Gzy&39S2qn%$eC?DoDA3@*t#AW2W(wMhLZDe~RU>$Q92 zleg91FZe~l9fy1Mjh$*5DD*lAyY07N9@LD8DyPxf+L<@1Pf^@7l?AM;0DhFvF;$Xy zL7WoRjs=pSr39UH`oKf_v)UF~$MnlX14Z5+;W5fz8=gNNp&gP!Od9eme^E$}q{)Giqbph*a8F~n8BEzbr0q?JCH)V|O85&^Io z>H?jJ2#*~*e@nbnf$^fK&Iz{jZAGMnm*0l%(A z!`F-N9kR{D2^!Le{iyUDcT5$R-Q5@KyTD9f8R+&-U)>d4fcI7)6m=OxPZxzgd-iuc zGeqWgs2>wM`uS%d4k%0`RVLZ2_Z8{?C#Gmt{s|AWnxCi$3e7$PaP}C5DIffOd3wSc zY<~bS4-HKe)JaKB-2{lC#Q$66ybL=7Kntw1{m4sty5IhUA-KP(DfV}*pCD^lgTQ;r zJ(HhPT?>(EBxUp=970?3M4<4o`oVhZ(SGmS|H$S$4Ky@@@zEdYqWw(3EAeyM?%BUN z&tHch0P3S=*q_Osem4NJ;Dm|2I^Ew9z+atZ2UxSL_WXb3%xXeU7>Q}BosqvK-hMHJ zp98$dhcx-Wr8Rzac_{{HrJs9|nAZOl-M<e4?uN*`2K2&kCbi|4 z9)2tJ^PX7jvY#f?AH{zCWPDr{iuetG`qi64Q=p{|7<`t^#DZYUvQEEs^4DP$FajHtx|u2dWTjbNCnL-h6E^Z&Cx0E%DFZh2I!o;AA0>u6 zX(s6kKX>;gAa?m@bNs)_>DOV6D$r6~J!}7; zyZishKtk=w+c?zU8z8&F|14Pc_&h)=cRv`={82j%r>HKn1C&o+-y)YkTZ%OwAiEVl z0gjh{H;`YX?DHHlA(HAZe1FO6x6=Rrh+hKve=Q3c67ryf4aGEm%V)y&{cYl&OS-ce ztuahC0Luhk*09+=SZUqCg|Q+3hrtti0Qi9U`EU9_^JNA}DVbeDPKZ3U-*2Z4#fVNA z%z7!Z`sT&}e3?O0Z&T!Sti%b^QqQ@C>Q{e%mFlR^v!lmL)u9_x5p4Atr2u)jrZZKz z!>HQUvV#Zyp8CrPp<6xExHbujfu$SQK+4U$1`Tbjk%r$fC#=qY(Yt|mWAJyK$LoQu z#e+t-%9-euF8cr+-Iax$I4|zAY*J$*aA;=3X0WQ%?j%{?VUgS3Z1SkcsN)v)qSPt~vfU0D*To16`h9{fDoB#&uD^s)fcK|jXO#?8=^(+A< zd9hCO!Gr$5ot5F*mU9wA07U7K25t0JZz{Kn75jEXXgVGScqC35QhT|b08KuxA(5Hs z{|?|gs-faQ+RP%1W(A0}-N4*@$d$^yi|yv^KqmVO!A6g*O6{5bN~nCBhMq(Ne@80Gzg3?0&*U zXZ2svGA`D)22*JfDhRtkH0=mbR?9XKK`$WYfdl8Hb}a}=W;13lH2$yoWK{sO5cRSl zH=!1I=+N%QwCQ-igV7>uswi}|5uGB?BDKG80|`+6A2m1ZpJz}0`d-Z4@%ydhYJk4o zsBb2yr$Q#-wb*-0Pg>ZG8OWH`{i@;U%`&iJhlA7&r5-w0jrEQNS@_?+DG!_#CJ&)h$9D2NY+I>f03+p z_y)sin7(Di`K6Xue5U%?kzyDS3J|>SCuKZ2J|w^|X-EoqReS^XB{W|)d;UX+j9!)U_F0=i;c^qy+v{LUVnKkFAaM$Dtft(33o`-Qj=vn} ze@p$>L*J0M4TKSfZ8C*v6bfYezeHXd&k8Fkt&Hm6WMyevjpWF1S@uCX!0NuKaUE}} zOW^%j6>rrz}EMUWxS)kR{`X`T#em3h0EOMyPV_EkBEBEp%WMdpRcEm14in=TO)f; z@6G`U{UVNSaM1zqrkzh@v_r_XMu(G3O6PmAzS*bxwCidj!(RX=^W=n!zcL5pLQ|2g z+0+nYC*e^be9x)k4#b3{P^mWpWLxKuN3%(Tfw}G&6ukQZwcdz=;0b_~%xFEE1gQlu z?-5|*3KZI~5xywp(3=~{InY5u$!r1mjFRd@AS8KW>|AGU-`MLkBlyyua|sp#d!^ei z)Z5J>%x%&=$EuGNY3?aoo!+&14y&7Ri&st)aT*IouGW`0(5(|UGR5DnmH}LY zp$z}ktE0AA{`0XCt1h1XRS)bQ1>|UbSc2^rdLLi4c|TR`HSUS2!$y|d)MoNE?q;(9 zdncUn?pxIGN06Q_Odrk)mwGtHxgjth#+Q>3qs z{Pi*|i{lsj3Xq?AfzdQ8f&_pVQy0Oph-Xby^_=B(z1Ji++inbI%SXE;iHnuV9yR3n zRn4W99|Bt`M9CLsEl@-qpsWi0<8 zlzv+^$FmANmdmpqKHDn>Oav0Mi8>e8^k^=Ym^IoLMqbY3Ft^jL9wVGOhtc8P1ynmq zPHfq;KidtJlNW11ORP1xxatGUQY3mKL+#c8;scy9Ti#9{0Cn#&Nq9MWye9IZfyn=} zzIhIpTBz~qd$R?xgX zrf}x?;Nau11A~x~L8>HjT-M!jkLOLY4N?v@PrpmJ1#|txr@G&;)<|^x6hcz;m8zeU zzZA5R6DPIgcGKYl>4qgbuqpHz=EQ3gi*fx{2B`cbD`E?+boI&hAUtUtbz&DV)6Gup zkZC&udjMb)jFy8%}wZY{q%|xR$W!>>*7IY4XD>_CXhD&aAlt&hzF@sA9e&F>RyECxn+U` zfTyXTzTn5M0Qj=LYm7Kx2gtNR>800Y;mq^{iglwc4U+?9$kC19%mfUbB{Zng*gy&drK0JZpvb;+k1(uPLwqdpC3TyOsFA?HvL z9T!)9*HRCw8hv&mx5{X?(9NDrU>nw`EG5g#Qx3d{KCOF_?fyA{Z^&kTTueHn5}7T^ zpgU;h?}R+sU)E_hAYUD*8lyZCMgjJa&(ev_NiC``X{~a5QS!WQ3U6vB7m!LDh&k%t z&B~tWn&|Fq`%w28bCcxKB%M$5<%#!yve3fB}Q;11j=75fg z{N9%wl3jicgdc(H4$I}>IQtAno5U^2yDDivx7ASnT#{KDbK~Bm6^vxlc1;^^-&LzJ2%^3nTEV^m#BsvxG`Gb ztZXlH8hb^HEL@|{9iYh?93LGXI$x4@FY`y-KwX=pG}Kf10OXg2Oy2i-q;!}-ZD;)W zW<2=y{{f|0dUwKtrRv^i{Q@jyk${i)^K~aqS_!eIb)IYalx!}Y#+n!O|FQR$VO4GI zyEq_?N`s^zAPoxAwG?Ry=|-ibySo&T?pz?fXz4~IC8d#Wq`T{kwcm4od!McQ{q+A| z=gYq2!^CSX=A2`W@r>uWpZmV&bnWFCkfPQJ_DRd6Q})ll%I0T38b5+jA5IIH@h7Fv z)Ox;p)^rVWn|MqkQ|Mr=IUo>@HhQ=@VH;qc!WvySAbjy2FTdzC*r~{8QJR2UKred4 zm>Y3Ffzw#?{auQw>qy1Ax1UgD56^aEO7#IQD>c$QxyEVf-S@~l-@91Q9fRy)baB1I z#;E7ZnAEHyH}&S-s_d%U(L+K?FFl>Jbh@(OTe*8u^z#AQQq4Um8J$gHJ6^4MIlD)GNkX) zJdwkk+CI{Epz%+>&kJ8{F3!9_KTy8V@9;1JO)F4G$t-@_;;PQ?ccGZp8~>T1`~>WS zuT2TP!X9SD0&1Jnv_o9C-8PTvBaMbGIG0m=ENw1M4%&BfLr%<0HE?wjhwmro2WXl!X4iy5>V-0ryh%LN=PlY|id%DOBnQU-#x~X`T8pVvdcBx0{ zXYa^w7+P3s*PIt&BSHE~$&WaXVL2E(mU*lU^`7!T#eN8xr8wmc!~Ggpjrs!Fqzn*oI;^r4B_SY!_PL4xoml=p&7sp#kT>?cO zxe*e6$nRm+k;f@!(-{{pU+FHgoz5yZ`dDyjG71Hg5?rN~4CwDs{94*Us6^nSKp0Q@ z=#_Uq4Cy0jJ=^SpUs!@~oXrK4-ysjnpLs6xON=YU#Z>Faorw;LV(#06Bx=P!>){nC z==QD~F^7mQwuelYyUKg*wGkKbSlQ!r{S>x6D{8(+xT*9=S!hJXc8#$xt+8kf&H>^t zK)R^SR*5l3TAFyTCm({0>DdvpTZo?3jPBNa;r1jI;S8SY%-DW^+@gDB;}Dpn1`jXa zz*NxQxjH|xtIM^_7y-56W};b<&4uDC?UUdxP?;Ezrd8!d!tY<{D#lJv?8H^rs^=2O z-m)(w#QfIIS96k@MgGJG{yZ8X`eU`I-3iZ0uOIaE{_5WM75iB{qlmqI)A~!-@z?Kv zFc;S^5HY6s?dzSak92#e;mE>pN-;{sPf`vWijX@Eci|Yrkb?XcDCBC_K*L&i#Ow&j zkw3?%>GG{9A0!sLe+Ow{!sKxa+-K}f zm$q5<_iIz(8q2OQ%3718b>z+**`yUzm%|V=(&W#E-LVrEs#(#1?s+epm+yb8Z|)MS zC92Yd6nzYuF|GcY{*kFLs`qw-VVdAJlk_M1)fA@*SPnF?HZDe05^?nfU=&K6aJe47 zRl4>d%tBYx*DVqoo94O4klS?7C+1p6I~G(b{OgiYkU$9{r3$-SeNR?u{!Yb=KL)<` ztM2}fp4%WAv#}TN6EFvse?cOYs>^2}WsnydQy~acfv7HnDn}A!BcH6=N#OQ%0#`4f zZu*jF$Z;1*_BVcC&?riN&2<#2mKSvEF7KD<`+`OOjK)s_KMA_K9RDzu^+V2MxK{wt zQ_Mp@`mTb#{>Y_aL|Q#N99v2?KjnGnk$KpQI(C6hciZ)hewlcTTezvr zRVP0pBQFvp0;}t8xy;{x)H&^x>xh8h=C~9gQS0JL_H|avGYgDxMcAY1PQdgs0Qx^e z^TiO?V`ec~K<&T$H0{7-IPD>JsI@)*IpZR+caqZ7{=3x>&nR3tRk@OAc0Qfu(FfoK zf>vD+podoE6FAyXeq1F{V=BYT`D`~SljuwDVim63CC!s^X=V3;NhaZ0m&S&se!f-m zD4(Di1$Ec8tm!8=f8x}C9ubHG>5{FA##1OjY#yq(p}?x9Y-NbmRbI)|WQ~+*pQU;r z2vpcU!bKG)bXHQZfC0Ncp;W>zDi9VH>85$NVy5`qwDo6LtV=*kxt*I4%8~l)`BNC6 zaR^P-_sPKt9WZEqmp-c*N<pvH}#7PN7|0i-qnpuQA+Am=z|Qyu66RQ-GjP zmXV5}NBNxmsAnw4b@O~-NSUE9AhcL}1>hCcjQ7v9tQrjA#XVD7$e@D%Q{64mF)1HY z9X%c65uGtv6TM+O?QUfPUS(0KC(gxl^{)Ew4FODLt5?wkdFe_(>=>pt5D8)kINO=D?sOwukKZ(0MD`Kft0_vOKUW4=x5 zMd@{E!=rUl=e6>D?BhidLIct!zwtEMyi0pwzu>PJ6@t$cN)TYN5ymKr!ninMO5Qt7 zxdQf2DZm%oSZxo!J|*`$U!#wcYd&G10QFE|<%Frpq-g?DwI{`$dp|(Sx!hrW0q}}- zRY2=jwQuEZxW5pI=`V-y3Y!{ zz-FtS^9@b{ki`tBlyg)x0Z-0$OTovGi$TTkeqmK5tHZPkS_~YC0c*CpM(@GR!z@Wm zrJd*8QWtq&fD-sF8jnh1g3m3q?80Jp0Ag}wsVV*dWBtMpjXuVGopM_Srh9n&E{N1_ zm?6qyz`NWuSn(xIYyrcc;r7p?073>SZ|$74a!O41JbLSL2AoXYZXo-Wvwt~p@-2$K zjxr6?=%O6MGvE=VXl5Qbb~t?O6TUFt0Fa{|>lWvxVbM&Mbnl#Ia-&B+FH338y2>cs=gy4KgsZ`{8RE_P%@Sf~k@C-BPTySH4-#l^&ZWj1<2agzpZa+>kSejMVit zVaqArlhFXz+6x}C^o`$d{$6kZ&?APBC5hK2U8d#}LMoD$bj&lxdT+%L2D%_pq9r!t zJBsT{Lk4!wQ7(sR^!OGCt5y1JwnU#EzYu#Wb@Xnu5)L?%?JD}oF=a_}`!s%{Vs z-oyo24robAKh7Y(SQpzyi|JJsopsZ0Sc*9g{Ovb6;qA-zmSA|8_Ji1loNLJBcs#e-jh_ zNXmSWfFBP#!h^a$+p2#%%5?*mxiQ0)(f;QG`r8o-yzz|i-0`2imQLVZ)&?{2kbmrN zV^V?s8$#eFU_AWUv-s;#+BUq)>NXZk`j4Ch0to0`BI5i&`ORScr$Cv;2UmJhM8fr( zBJV#ZWAFz3L?o(Gtbe!+1fN=frPpukaGD1Hrk?%-<5oZb!;S(mApYAp{rRhdJh;;7 zI-}ux|9HCy@V0_?C);1g;7?`*I9i87p2DAQIqdO2-meESxZl|wgueU#aGu;8q!G;U zF(pp9-}GPpGZWHogNGN_EVe}OkC$Q#P6MqY(fZr;`}5amA26yM)q%-K|9Gj};C}6R z^#6I>IpDJll{iA_Zxi&-UpsZcsBRmX|MNUV1t;mAdtm$9&fOo6=RfVcVAg-U*Dm-Z zBhh{?_cx=ZKc5Yi6L|bJNuU3D8j`>lo!`e!#y>~sZ%0dPpkd?0S^4M9{q2b17C4Ea zH7?)pI-37k+EakSvufE+^yjqu+mRuB&D~~ukn=yCnZI2HRd^kEZd1ME|GX;yog9B% zS^rLszpk5qBgdaX0UZ51IsTm-e;t9pO^&oa9qju zE}+h`rr`bOQ2zjHcmn9$=Yh#hi80irz43gVIf>uFlqJM>30U71S^ydAC?JfR4d=ex zbnST#D05Rb9TdlaG_k5215)2{xZw$KfGNp})18n7XwW|2mS0R13J^4tCkcDF?qj;J ztbPCFw*~C=T+F{c{M%0Q$FFS!xekc?HjC{EzJVFyAzeRS4~P+0fS%N}>(RO)oEzfe z#ti`!Sm5fzKqlmBUx7VTq$A*zx>M3}w-U&d)!e*}T+rZ>C}5+gheD^&G7WqbUw-!} z<@`-H=JOS6I1%9t&`0#uv~2=(0H8Y}+zr}@{it`z-2~V3Kb&IjTAt zG(fbk@1XZf2YS1lW^a2R%4?+?6)+(t-r0OU_guKv^a^YKoJggHX zZSUl#+)>+XE5E54mgy8q5zc^zoyl~qgI$CEqU6bh89Nz3qzwglmn80c7{IMB0riOA z(&_kgF-!|s<>|*gwu3^z2=(P#Oef{Zc?$|^P4@$Wm;o=dg{GB`Hh5wt2hw|4|9Lbv z#@2sh;{Wlq9pM&yQ#~9De0||iQX?1OU^%)B_5iIxJHOL5dEIH6-`%e9Zb8S4 zSJ{I)mw?5hfwt)<3*-;iHmidruUo7gn{ynSpy8n$*J)g&`5kHt~~QC%gdQna`o z4uk}gz@wqDdR>*z3i!xvpTuk&lyy;M+-X-0Jg1r&0DXY-(M)=F`kU6urFT(MTnS`AmIHh1KlL(ctXcj1cqr0OHW; ztuT@6pNhk$0f6c9##|0AF`JInFd}J6WCigr+eIY??ymDEv6ry2MZek~#n*$q%i;(px zN?L+RJ9x?8itCEn7Ec0XBDg#Xj(N2uAkyj8!y)%Tx#RAGRpIDXasMBPRu#8_oS zsblOh=m;2rI9^JI_3Qxn#I8X_!XKVFR-M0BodIl?oUDh>9_Pdz1;@EWH z4yu*~7+aiU`)$N}<<7^eTO4G4XzeVnh&rB=(fp5R?*?#L6&=06RJ#OLV0gE7;6^V*U?)(kmRczHHF!_q7%vm0q=Lh4Lr+3PFu~gL ziGz*F-dh`O=v8Ew3?(ba2ts9o>jUD6tn6)iuIvO5zrc!c2K$%43=h+z)dWTiM6eL3V|oE zRK9w)Yu@K&fbKdrZM0G-vxHZTZj67{H|63={z;N)Y@m|mb#u+;p-|b6$fdi!^A5WD zg8-!q%u8n_(g&Eo*2)Tz2ri1Qghz)U%8V7%!XychN%?FIvhwq(r0OUElL>u zSpE^c3 z`*5)a_PjE9BYpd@G%-iFP0GssEn3#?@Qss#Dv`s7lG-1z!t+-vXYO`ZF;u7wYM~_n z(_!`eRp-jKSvOZob3i!#VYxD@ByTIEl+PqDxaUt{(~7H7NK^-JqB;5Oo8Q{|Eb#i_ zFQYN+;u8r(jvNpZymYY!K$|O3rvsW=NQ3-GJT(AT7u7F;wp}-j-gruwa9AzuC^P+> zfmNol*`{e0tif4$ZgVq|MZ;i#ZF{6X`q(`vaM;H-UIAO4%lZk>SG_M>P%C7tI?YfK zs(J{hB&Ld8>6ud(T+50-5zcyfX3cM*-7O&HZXsQlDpr=3F2hxmCZQ@`*TBCMo*!3J zVaURs>XkbU(L_azR-lYrGqFWSPF7A(RXM}y?T9l)~0yH+`vA6dU zdewf3skstJbQt0JJg>D$@laZ#MXKRe zY03+k<&)i0(J|{{weK{n%|=?PrH}EY)r>ZJE>x0{m935GB4S8tKb9Ic=)%y{xPN8L z)MZsz^=v_!@z#>f5yol&qHP0g5NKb6SMz2^@nl$DgC;}QxMbF98=w^HGKwiGdJ7gP zu??Lek6v9@6t>$1Zp;@Nn`nE_TII`UMGwpscF96v&2lE6;e7&YrC)VBX_Oezv=B<- z@W>ih36Oj(h%NR%gcKo8bC18e6<4L^!;7L7jrzQ%htPk}Ui;;a)bhL0S3bgDG7w6A zhcGYOQOrm6ygYa$^`5l7$X;aF&7n#}ojX{Dn<#K`=f=I&{*cWcFYHkZr)(GIO*NCo zKhYf4QW|4rAq`~jG4TZZYlmgN#o_1|U_|oM_tf(~X32&#DKb~l&v>b`rNz;(zI>Ca zpL?Zex=M()8to`&aMHLNFuEMfQypRps7;OZJ;mIH+7C)uLOR+*$m;Bd=BhDI*Q=J% zhGhlTg4hA&)83qYzj`&ow+ygC#qEh*y!s#R!gswo0tK+~Kv{vcb~Y-~onW{I)JglO zeRPt^W*JC*9jq*KBYNUK;M3O*Kaq0jLc2q%4C8U-J{nd>lvc?y4qa!U&$9TI&2UmR zh{)(%C>{dtTVS!$rXPWe;3r9}eaeVJ=JNH-_jA(k>U@)nq(%-RFdM~NWJg2_V*u$NX|}bhQfFLmQ|FhqG5XNY6m82zrxv46_UE&MKnH z_brrK2H9c-SayPrHj4acg#`p${AehL8Db&$PO~0P_Ku^oAJ<|nvP_vE&?mbV+G$ns zY-cm_`@@h%8!HlqxjK+qv`6OSN(+5lRK#V3OysK-OHTqfts}Az7Zl19yhh?+KW1vc zP*j=IFi|=b2;>e8sFNoRuE<-BMr}}T(Jp`c$RMN}CpK?A0odUAY`<7ju)lp3dz2I! z3%a*$(Ynk^MU=qoVQzcLw8Jz8(xY4S1OR(dyP}SC&!!I{p6!?G@1E# zu+1BCh0oyXEf@#UmDLLc9umuIs%%@C>5wK@*Y+diF)CN-t8kd$*;3SKmDQ|t{@dO9 zw^luw&>t#C)oR6>S>u5lT(FtAk1VBEFB5J?vW+-lYW zF#5*g3=sE80TmcL33&6JP40sKjtHimOt+jRjW9=4P4FuJN1=yzjOUrxNs)P=>Uvdsl+PnQbW%L1}K zmIz~K9uK-Z<)!&XLb+}c+u4-7R7aoVHE;#D07e?H^4h_8h8@T9Xnu%p=LWtI&yCJ% z5s%zgyf{yKuq&@dKuHp$2d#LMbhklL9xB^}=kX4nPQ@q|<;vAO$z(J>hs!Y5l-U{% z6*mSlB(lzW{)Tyj7liB+!B+?)0?i5r{zC#4EyPs6yZpI>k7FpQ*{-Cj(_=f1oWroh z;nu+X*PRhGEgF`ZLX86UKNUQsEKHm|`cw84xeL>PpeY&r*OnnNF}y+7-iqIWIM{ghM%~$=Mve#hY|x>j*#* zIZV2aScpggD$ld=`t$4LNMSo*Vg^JjCTb+B`FkuAUt$QHN9Upe;YYXqs*P2-&n4@L z;sm}B+AuT!Sfq@%;G}E2&?H!OdZ4_9#Eg8L#r7r9O!RCSVnPC7|INCdgcoc)Wn=Qxc6O(H*3Nc*Ppkd+!)Hen2{Qa|I zjB3#vaq3%3wU!{{x)VzDSlMu&&E-$nJ)X+T^V0(x^FEkgb(bsGyw^%kXFiF(KL;My zZPqYLZ@4wE4WlpSKsSFJf{XSp?;Z9I_5jR3s^ab14K^kIaCg%7ucjR-`VJc;#7k@j z-r7P<0Ajk{eS*o|aL^~vahE`|pIuS+%tp2soYm}!S~Twh&GKJFk_dXft`Kv*eN zyn-f23@Ty0CA27{Ee^H`;Ll!yim~W3G;ZWhIB_?t)Z3F3 z&!XO(}#SXTd+-!5@;ttaz6}n`LwjKR<U_O>e$w`ZMkP-e zy-8};eT|8AJ35!bRCoD8kR`o;`JuAJ4J7g*|7-`?ASWBNr+pK0&*!bfs7p4doZs2MAB&pWX`yB4l>Go%0=pY?x|c zV$cSZQRyMwT}|wd&gquxvF1 z>GY?|O``B59WI{r(v>3dE$r%RIlYsM1CHS`&Auoq=^E(Gdkbxx`42GkBOZwL7z*J> zp#X9z50jOv2iqv4zomU%43moV{?fM$ehfTX$ybHx{Af5W)25GK{$X;C$*wPi-9vNN ztf3Q?!R&b z4AV$jDZDZ&93S$>Rk&`Gmd$ccg6{flPuERiXVe?oT$1zIRu_~J)dPhUhvTNYSwtds zHl)#NRv5T&g?2ywZs}y<1iOcV@@ZvdgArMaN5w=m@g{SHJmflK&yr5-8DPxy(j+Q|O6Q3pMHP)!;?U3o|O`hRejnGlaKbQH~8* zoPB|;xG0gNJ1vq9}_{34~vb81F^U5BFRwnkvR)5qg1Kx;BkU$~#lWTH* zd&#*E8-16Aop5qmbBzm!ywn4F*+kkHX0_w;1P#xv2`(S!;PD|HKIXP+?#}YL| zVUs8d#nwG^&!v(~+{(EOBsDs&hd<{PSs!99Ic-n9q|Vt)NedJS4SnG5<;v+iI4MCw zZ7CEP1tqo8{afZ z>sOb9l@CH}R$$GCF0z>;b^&vA3X`!S3L~KjP{k>$M%^nH`5dSY@tsOV+r~u*6kzx-Ns#`O0s&bh*nP7#3#opa@|n>VE`qK7k1+O}rpCUqG3fF6JRaxmvVZQIa>oXaFcb@js-G;bZWLW!woya`r%X()U52lqT zYDL=!McwMt)cN}{hV8l*->@{?!2b!Hx3v?)$ zJd3S@0U>#MO^u|~xC2*+J``toQ&*}i>Lrk6+eh;@?|uo8V4+aS;RLGskn7IErDYPh z#A*nihydv@yS}3EE85pbU)=CSw?%-8whAe{(HEayPWf^FQ7IIaDZE8sVyMK^SsA1v zZFD9NL+=#pIoE8@HtF%SC4;xpOiI0d$Kwiil-?s(RLUq$qn_}o8oP}LcCK?@5T8n- zI(?9Kpp!>B1EYyzQ9>rDKHbJKb~J~m+u-U@mCowl3;GkPm;?-I`AJIXjUbHHxPuhN zC;{5c2XvdNk@WIKAKjtv5|fJW9MFdQ6$L2UpT*b9~+o{f@J*D2})@hCFoOxEopC(%HVQ41!|oOjBt(BK_fT`xqN>9W+-| zzdk}1LSFzn(*1j!q!->)$&dBL?VO-^uhubl@3QJO*5fTySxi0z9g(P!e6@`c${Z-N zt$CQ8tuapO_fI~@AZ7wjpwIMBS3UFur8-9oLR7>tYhdlwRX0V+_H^w1JyYy}v@w?? zFnU$TKaaq-8-%|z77YM{HH%P4$qqZ1j&33@(!L)Lo=S8B`|%vXoB;c4Z&()lHLDjL~MO%L)mjPHlN6s7pb! zQ{+d5Ssc}N1HC93rLe%0-|bL7)e=+_D7I7*ibCi=wgts+)zHE-%iF+Tv~5y(XDBLd zgEcvDhEmDOH>0Ac%EkGmD5O#=M=r$|x;YpHxjRqlTgP7Map4$uZw$I1a#1Q`M$szp z-q?IFQ>w7dv-dqnNA7%-_94$8()Bzu3|pwS)Mu!9e(d>-7uTN)r|U>01V;45Q0w?2X?hSgkLRUQ>tPng+6QwBRm41!$OyOp_B>%CK@fM_m(Kf&&j$(AKqbhAF0O|C@ny?I3tAVY&B z@zyQLF6405A@uko8&ulVxU_=0G-l}eoZ!uHQh*_9;Pj~yi;G8(l#CFhD}go*ro*}O zGWgLs4lk-xaa`%s!x^D+L=?>1wg|1@hE{`|7q|Pm@zHcb${V> z120MPbLv;-EQ6ev;P1d26C~cQG$i>=1{n_7q{87w;&tE(yeA2eeccUlGImxz5(G^Ou1M4y}yH`^@s7%-M}iX z=SY0@D)S9>ZYP!(MsnTT$hJC&9bz0pUUw+&?E1>Zv86O6)fV2DwzMRB#e6#(5_&p> zKJRB1qJVd;{>KBWbvr2vqQ3%=)TzDcPX>qtK0js<0pmic>Ica?)VN>?g(YpI!k&6&afwDKdprVBT# zMW;caemhldqbsJ_APZ;#BjIl7y^WZZfhQsA6^?#rrWK|mn{B#rxKZvP9_#~@xeLTP zngZbJ)X@ogWA19m>oI6wf+cAHk-MS+yWH6x(J>&xf4sLF^uouCW8DSByme zX&|!g=B*EOUZCGgHz=`mb@wwSW$#=Bs}{F8sWKKFkJX*`48$*444fwYoE`BMO_B%VMn#;)lvmpj3v(rkd#7b^{TBub=Zl<-?pcs<`xcJ4ugbR3m;z$r8sZy z%?THHhkqO(cWe=w&5*!$v+?i#8QijpHyZ(cR+Kh`3|*8A#}ZxjW=C>K+p>3(uQVAF zN5!TjpXYm>Rc8^Q{~BcCx>aqRH16zj@sFs#)>aq&^!KCpG>E(rueK|q{Y?gn7Z`#3 zRgr3pc4xYdz9W=E>CL0()IFk?zJXC7#s3z~7-i6xtRWrEfU8ho>QVnnu0K_zxW0?p zU{R#iE-)FVz;>x~9EsFJqt{S$fVS!fC{ z{T~MI;s%rqY<;P@FYK;v14*J5&z4WsrPD^i*6vmS^C)1!-Pa-bkI@26-{kd2qHp?u zz9p|;q0#+pxM9aEN4+FIpv4oX5B-Bn^R0Iq=#-KM$+S|D)^Fc*PI}LxUg`MlOat3J zDNrx;>?FSO5_xxt>;n01XiLLKH}etvY1~Z_`Ad2Be@(tG9!Lb8F2A3U|MS6CDe$4i zCBs#z--e_A`J4jPs?e|5(O>@uhaN+LDl954gqwPitWs;-{fq6Ih#!+0?IzG%MX{q|Nc2JP}u)-ufIL`ccA_l)_(`;-x&p8 zfd4MNe`6FpApCz8%pnk@vt~ausqo*GKYu~w-q0dqELb@F? z*}Nt7mp||un1?D8nIZf=0S~tLf8Duo*x(=Qj(H{Uo4@gwGdz>_Msks~Ie7KouWz2*P^F3!{ULhAo`0sOZc|9^g=|Bo*24XY2pDLNZHE@+?nA}cn!yEOG5 z>@Rnhx*nPGd7PI6LaMmsOg*EJ>*21O$kZ1eB*4Im1ZZomG8=&548BFSWhJA~;+P`Q z%R{5LCadbmD*<8(Fi6rkTcTEBB&|yGmrZD+oHCKLACwE!2Q*t<@B+D!;Pp75Q}R)W zfS#OUsex1u$HiBGRTu#A-g+GiX!AybES^y*JJ|JI>DV#{JV`=+HU}CQu^VH>GQcJA zEciaUq@@hfXMcz~#?s05H0S+Ha9XCl#f}p!&c>qK$tA_;UC8(FVW+m^gs)yXlzQY?NgUfFj-8 zT2+<~jQUTl(dS;*0Lyna0W$E~@N)3t^Z$M8IAP$b7j4JEN^I5j8Fm)0x0|M}Y?DuZ zzCc0|YV$)EymMQl#x@Jzmw-_b{kaq=hW{;}i3&3;a^wQlT6>cO-~v(6Y3R88slqKxHoxOjoJA|>l_@h4v(UX4c zUuvUfHav=rilouUOb{xEj&V+x-XsfhT7Sqpk&3e&OnY;;c+y`cKpm=)`h`Ov$4eNeSt?-S)zO_SDiB1 zO@;=$ws8TH3t)uCoRChyo5wJXvg|RV`$ArlY_rvw>&udH5|Q+B^A1Tr3rbFg<$ky< z=<(TJi|~0J6&T1QDcNRqGiTYUL`O?0iWv_*5InBXQZww>^D^v*QP{2z+SI&x)St|s zwDe>`_n{h{R1}Ma|K$52X|ElUV1_e5p&MpQ=68_3dtbO$ zvQ)X)ME9M`k!k$F2>sOqDNaKUK+`qZq12e!CQ(tXck=pn_M9{4Xol$cMo`1i>!czt z_TU43)-LmOuk5vFU2(EznKUc$jsT=|G*7<>Ym6ix5=!iLPaON_#n(8;`@&8TInq=L z_4P~>TCeP{jpjsy{HN}E^^P+ek7bq^m@yXV4>j3qnWh@uw2nz;D<;FNRA)85;nBqv znAl3zOsUXG)J_eIuvB$i?M!(t3XEusJ8JEDzRp(Gn10OHRmy*%%6M(f-xTI@acp5S zoEw2xoTpaI24n&IX7>TA7zz5)`4*41W}BKiw)!ouB~+cZGDqqAV|7fgFY7d%X0h=N zI&$YtNswIjgsl=bd>`NPad$kzTq!cW;cmgH;>ce<(7K6HNfF3(LD#MDN-`P!&r_RC z2gKL&;bE_9EqZ-557=|9hNs1@JJQo%<$t!7Bwja~lwQ`qPEKyz)J2y{rutx?J1CnN z=Q_HZi%~qlH(PY?VbkSNg9H>~fj&H^yOFA2#aZgn{UV}ZHzUw^*$4A5PwBRkkGsW5 zk?l(HQC?vdY9srP^I51_>P!-UC z-~O#{fJUW4yZ4&D%~g2!i{4FpzOCt&X+_bJcXQ;`9_oxE@zzEKBvek&zV+NJ&PqF4 zb#^o&$%#A^700Bb@@i;y`nk&$@xv$h=#?|(Uk}P?W!5)m^2EB&W#5m<+|256YfO?2 zz%;R4@?HqI*_5+lC-2o3Ol({TsnjWmt|{HLpif@3;>&l=G`M--bruRd3ydY@GG&ke zz`iYdeYeTyFo^kuXXqv&f2DP!_7l$yJX$HHvq?T^3QZlmUZucW^d+rZIGyq=dQ4tN zTWiM(FImIU4xr7P1kkKqx1gQ)=PwyH>jK)IfiPJ@jgx_5dw2{Qy|T@l!>}k!J@0YH zN39=ZL3cKBH5Cnc#(DA1QCryS>fOdL@UrjKLJr3jwvo+uF}Bivn0&Mn*-NH;vO%qp zk1Yn{9i?qb8y-KUnzcJkf3nl0pp#lP5fhnXcCo7BbEj}-5xk~8M7dCNq=I|(SZ9E+%@{x-zjHiG89>EWfvr?Tjq?)UHlfw6z(6X;HgY+XI zjYhqsE`YhNT(>4>*_5aK(5G6AsvY#Fg5x##zjXui8Yb|*zA!B{wn$^LA(Z{NcSVsg zH~jS!VDl*~fSfsfOs$cj|y} zbjiWBFko_IwPL*hST^X58-gS#w(?aabv(9fr4ii?;sWZY12u0rbLhkM~wYQ~4 zx|3i@anO+87L;k?M846`%3fEB#4YrU_D%Sh*gYV0aJ7zGdUI$Adzl7}?oxR=NXN4< zNcuRhitDyj+C7@64wh!OUmbi=##iH}Tlu+lACqsD0nS30G$Lgy73tx5?{>Vi~^uBPzCu- z0VFiuaUu!tvvb5NpOq8{0D{Sy`1I~lHnC)33DV-vU`ea=kHy4>^AP{PR|xI`%I_L5yLL!F-5 zmF60Bjx@Sf&mVVE8``Hld;$Jvb)f+>A@+SlP=>Ys|m+ccsj z4=dChb5VD^a(WtjZKpkICBkZ=M90mW8P~yV5Ld5}g1I@tx0{6OKSCgH}`EyJ=Bpt=_c##nxp5 zTS7ga=^gs1+~Vu`?<^zlUd?jas8IFqzUAYqboNQUa?6;ib=MdqWM(7jQV9EYbxizd?Wxtv^1{l-q#fhAZfA##$xHvhs$uv%?r@Ca+a$%;Ib z9OuN##l~iQ**0*K1UrETvSQ`bn!htUhYx$XYg1eo#EiuEY-4_!MJkeZ5SRfH;u$S> z#Q{us7=myj>-rgKgkxAUq)+$Kdo zD=+HIm;$lX{GO+3mNX$;uSu0~0KAn|N|g&FCc_V>Q>Hm}4T#uvS)6C58a?VJDkUQx z_}&1nd(@KnY1k_*s@Z;t)!vBwj!?;UB$VZM^N8;D*V-S5MH;1%Qu1A}_!=xjE_oKQ zYfO2IGFdfWVA{J&;gMm4ymlTH;&6&hlvk_I6jaVGNOU?uyWnf`$k&X?7{HHw;^(%C zxHZY;6f*sgX8nN2Sfr+iS3J*g{V^K60Sak{j!-_kIE8@64&$(N> zD)2j&TTAI|ZBFEB41SV*7|P--GN%5foRv3MTU`d(CULrGF)p$*mY{zqUI-_kZs&at z1{8!?eDoT~U7t!Ty^}o%uEZr^o2ztA)2$UKeDY-scdKLj{8);fG92f!pk+>YWzJ zUFW}k(UjVtC7ejY-#e9)4wM~0{Ec=M+4vSAW1j|mr@s~@Vz{qK5orpH8c^@z`QVUsI|gXJ_a({8bd zbI|Kh&-QefFwWyA(!~+mjAf$kvP00p>BQXXH&FM}dtL6m*K9EOhW9Og;0Dd4zGtrP zHqi*>lEg}1tn5x>Vd(Ojd~9Mx*7H=v$nUYU)bU7X0IyJ%NU6935Q(rzVvdc+1qavY zOQTzd?&i3i#H2`P9VaNZ{v2zw)`7xpC8?T=*W8SI<@Wi@IOi=D^FC9YHZQ6AubN_Q z4UTWucpT+*)Af#JHxRgg{)i`8!0dO%aVMhN6ll#&dhWkK>0p)3+T8^j|IRw^&ZnC>Iy2)BIQVaGLA9ne zpn@(2<0Y@3Iz7W$y7-ZJ`D4Ziz3aU(Kfm1}KYCG&IG_00Qyl2*+~W$&KB|%So7{Xp z`4Bn3tU|5$g`-8RSSjDkZ^;|a-@^S-vQ+!HbS8=# z>H_oGk;?8G(auvx*+=Skik@C1*WM)TuD|(!*R{2LyxfVtBthI0#hO~bb9_R6AyKzq zwZpK7M{CWmO?47H6uQkmH`8H^GMf962Iv*!w~Ocf!5U-3MTbE`p%#a4%v$2)L4#9y zhu?Loa7B*lJ@JZRiwwHs-h!A?bpV5Q_6w4YmBDHo{gI(9<$E6j>Z-(69D?XilI>z$ zrj@uwd6>@zg^G{d3%zID50CZVe&SBnzrH*p{0PkP)Tkab`F>?!0ZhGK$j#&!OQHbcirp!^FvER>~Z-Dj%>NMVr(`i_4Vr@ZI#T)j2 z^t!fq&ZLW@r_BuqY;a88<3sQ})tjnTD?nv;ya3D+SAVsq@iBna+6~an!hCnA#7~B+ zNDg3S6*eP0{yL3Xbr`Gh-}N%&#S1bPKOygOs-2v*&BdSBLMqEXJ)aPq4ms#_V@;{_?);B^A%R zCu*$5y6OL^xP_gGnuBEAoVxcN3w!zFeZEJ0au=(ySt%z!ziRG``dWM(UeB6plSkGQ zw+2{hvz+Y%Psv4`jf$5%8B&8S5AwAeJs-EDFIC59*{O!ptYS+A(K6Cmz4KLkq=ajt zTpvIy6+gVD`M%|=m6v>EaU35%VFVhrM*32zd%wFznH6lR)~&nqY&S5v>!ln{oFH_M zJ$rnUwdq?)fSnk2L#)If&&9tRJ>GN^U}VtaO(NJvYZ6(aslL{qJ3m>xW56w=li|{-`TuEXN{V*X|>1n>U+xBP#BndSMf9 zSj*dv+Lu2H-!TS%>Sfq2twg#^{_@-J>T*RIbUYyu5ha>UmnN<%R@bk>$)U^lFmxO? zX{B^Z!X1>ju;q7DQYc8pw^vv*f09|%4u5#joX~W`Ay^rgkn~15nyBtTcd(@X0fGw(V#9HvPv3pf& z_R_ecw;l{L;=z`+-i>4J9M^1w<}UN3vQk^DFDo6W`qZl?6zm(G1a(r-%bxQtH1bJvxQAv(P;Wx8AV#fm6Sj1M zP5!^wd&{7@x;0%pAtbnl0Kr{@h2ZY)?!kTGZowhA2M-q9-4op1Z3TB0EZo1D-M#ne zx4Yk6->>uQR3%j@n5pS=jPX48ecjotwE--wt=EKT zUcaxR95R)uWQyjpx&8FAp6#2zlE(x9A|a>HK4jjd^+nWks#q|)@Xn{h#tB~WarNFU zt~{T!O}-lf19ss?Z!VYg-~1(}L9-6#{(U6#Fo^L8&o_B+F<5SJ9R?veO>_Af`0O@{ z**psAE3B9!f)oM0GT$WQahZ&lPINn|8YAV!G-0|ED3pd&J(IY4(Z`W^8fC^pF;EA9-!+*bRkmM_a!b$G?y5KP-%jYt>{!~Oj^1dT$upKdX!2>d ztNwiIpmk|T20beE=D}t#V@azj@=AQo?q?-F^-%BO?rVgzxnlzm;-;ov?Ps6;`QN{4 z#hU6C*P@%qV!@xdfeuJhg}i{nk~5}&gVEdIOCE}F7H2|{t#UJ*0Vlpm-LF?i#&fMz z#;s*XdwVziZkopig*?a4WA&wuhf|dGGWXZt>-EB?@{?f2-Gjnp{W=f1S$+k2F9`?? z6cwPxG{ud}#yzAWMzA4EHhINb&t1`WrCiwEgkz2$ALcr}-{2H=#1%Dh=4`2b2vOv} z31^}dk=~v3J~;>x(b^@}E`lAj7#^4tKv;rA0Obs03Tr?BJ8PKb4VChj@Mua?BMBV{ zne>AXL}+!`41qrM(XipJ6&@MNpSFcD(*o>pHA8LH-h02XYyqa<@Cq8Z1mn(4Z%YNA z03$IyH4CTg<-!M+1%Pt0%}_yy>n9$CNBKuWrXA#jHTF8ksE*WT z7{FFkVq;)lj&cx^orOn{Md*8+MwTCXr6+rxXKmRX6uld`uEic$r`0pxfeFv1iL<%A zFy{I+Iw_?1pLKg7++UOogpk_*sI+o&YnP!?p%);zd94=g>x81j1$I7_4Xsnan8y`W z4fK$JpSI1#Y2vM-Pnnb52uuPZVkuq{PjqfxIR~z__g_aScN`in7O<8eNV2P zD7Ux8eyBIt*##lo&S`b?N>Zb`Q8ukZB`u9 z1fG9)6%RVkaY+m*wAGXcO6slfL9+kQ!+)|pxOpe~PzR83xZ;9BKY6yUJ0Ro z9fZ%SeLt@4_I_t*6L0N|0jyOv_k8tato$=9#g|A`260y^!f3k1yJhHNPyfO6)nM$7|8?>uZ~yy#25SPaRT#pW^_Hg^ z7NGv@ns2+z%$t6xe@@(~I_K%0y$ei-*Vk2mo~KJ$8k=<*69EPx!I=%vv7!V1#sd@Y zp+6X0g}ec(+sHllrwX1<+lVsHGmm<$b&VF8HFevE)nJ87+SvOb+9CSheArAm33W%z zUbYO51JwBb2u}SF7p*4w&98z)@P4g=`fgXkQi(&i98UMAhBeRB5}_3OTW7+=@35y~ zq0=b}Lr$9qrKpuH!P)2(Xd%56fX;k#acT?bo5l|*bw@lLy^{#7>gX_DdSV2^eP$yj z`3XO5x@a}K#aT%N+#q1x%NExl#%t?0J4mq;=vul8^2Zl6dEeHF$i`fSNKAYQG{2TA z3?EfP$r#Q@bs{S|ZleQi9a*F3?)B!ZM(*ACcsYl^R|({~3d?|%ev)8NLz-G$AC0cN z#l7*|L@Ih&1P)ULdn4l3DSBvk{N*aX;j?z5t2+g`D3Um+yOt2unt97_a-H&bSa5z( zIC9RSl^e1-f;;5!@O#W#Wd9XU0>9mQ!Kc``l-w>H^4T4A!}$wD4t-xUH7DM599G_+ z7}>h1N)_FOoT68^Ap=j}&#$5&_qjGs6T zoo0oyX*SFqsrJ>VktpcYHK{(CefXMbqm`cQc>B{=P2FqhLpMU7(}1KOEHu-=kJ5|U zr}FA>|N8E9WWR53?pMhexn9d)_c%BAFEt^ohhOHe3=cm8-K_Xe*T%CFzjB&nfbIY@ zCHhBy0)d{JDSy(lmqehWD?m(Jgu6_GhU5RRrWeQ8xvX>G!)?x8Nn@`w5s?}fjC7lt*9l!Bl=$D7}5SI#q zu9N3mMRcJ`o%ow%DEA&!qyhNh^!2uE!ee!KO`;6BtS>VWm*nvV>508Y&y<5EXtP~P zziSj**7_ouK;mQ$cuBK#_!^a7u4@gs+^Nh)A^JZAhNqiA1AMHWJ3BOVF2Y`@{qD~e zgoP%Ee-M0373-ROj8fv@Z7M*Z%UZFDrll z*jk&q{v;pR%*KbyX*-{megBGt|D;teG&Iw^7?kZI?Y|=3{n(~Pa=kU^dIBD;X&FYc zf_u&5s_r1O3Sp?40!mPvl>4I51YRG&Kcj#9So;~pUZ>7!u@hmvcal~oK z5~-29xOjW&e<{h}^RBRzynnMx8+6xZcA-Y-d%EzW)HBoU37BOq__k)EOCEpcZy&JI8>!9F}&sHBl>bp^x?eIyU9vn;>p7n0`kWqumt=g9g zp3GQ&r^lUc7l=&B%o8t)|4QD$($$n$UFMQ?DM4bed`u}6_Fgl1-9X0M_~P)DQyrm) z_+01uL=5^}!Nu<|z6D|%;$|-ECmL_xT2Dz3UURz>J@8_F6C^Q#;qp~H+eGe;aR=&= zdN+T{KnlamV6KO(#o}U=OPc!YuXg8Pp#iK z3YZ8dCr+27zo)UUNxtB|@P$#d9&ud-W$t0FtNFUOhA_IO9l&NNk;Zm%niQm}UP7wn zC=CM!5WIlZ*xV`29`95&YbMD8Haa^Z7hYBQ)tJz`AP!;NVp(zC)1romR#!SZRIelP zdk13BV+QZk?AcE%104M1%^E?F+z5id$AwcTo_@M`j2&{7mw3!ob%cb;fx)I}UuWkx z2BN16G{%jz=8v*wG_hWt=ztZc>kTYws~!gdA~u_w^QRp^h4J;;0iNQ@v)XV`!{7V% z)A*+P=SH?b^8?IuMG!+3a-HH{*@bd`%+qdF7Lq zIHN-kxYq(F(i1_W)n_+3E~EDAN}H1in}gMELBJ_{hxT_EF30BItLXZZOR;VUFdm$g z)4)7nsav}dXBx0aq>iD(`vEy;dT^wQ$&7k*f8y@m)9Y5&Dqx_B@hDpO>-KX?=#^?T zvDMKM%%W!#9HFPZ4IxlDb-(V9#lm7q&s(k&%z8WvQFi>Y$?9x$i+NjW$242F)QPV@ zF22C{P2M84a}`OsYFefQ*$;i})dm;xy7Z9mLuy)oHcGoeN2DE z^ww$VI%2!%YOSB;kAX~FiguoAdxphj0b>?Gyg0P^Qc^Gwc(+2Q_Tc52S6hkUd z)`nm(2CD7&V2P!ISX5_;TTOZfd)-JdIe-1m|sYDx|#>i}PP{lX( z@Rg1|c0nFrCyUhk0tA3&h=Jed;EU&OQf@Me7eS^yw9(-qO6%db`u)MxiVj~1H20`o?l!dgn`qi+g1NQ*fYgLI#6r(>(_g}Hs1J8BBa!ZsN zNk7jbuxzo@E8hqrXG)nR@jP@Zj}Yg7&lfO{m)n~u==&?|EK>0kv$bB2eZLcJmO`Pd z!%CiJW&2=xe{DlIe~gnDnNnm!2iVT25_8=#phvs~FBBpdblFxu1LF}L5>+7D>89%9 zE(@4OL(?i*eJWCQ;8+IEJ{*E5^05o6=C6hgO_3Foq?(&wu?Ap0| zd9j9Va{Be)HEJt&7gGtv=a<0WhW`#nDP=XzB6$vF2fpsK4cuL|B(g{&I^W(JymqXE zgK1``(6}_ieflWyTq)VYgFz*JI@z#YswYq}ct*&o}ISQ7BS!WMCw`+$BnNr+zTzc%5x6=h&i(F2V zL*|{E5Vl(MuHIHrEj|21ho-OKKwa7+G(b{G6{yv%o`WBtYBAC^z3}bOk)M<$Loz_a zA|;(C!hFsth9<5AwHxytO!MryPZY)#RSk9#;1i;SPY=L-l$1Y$h+xNaCDERLXN6l+ zakmUd6u}&Cb}313WxVrb8Z@KWuckV$>!461;ZSPML=Zi4ZtX_L0`c9&v^~l2oJ*n$ zkOcIYSTnapzIQd%zm_xfp#v&KVJXET3%@XOjC#Ks`c%Z(W&msBU1HuB$F7G{VyQqC zU5()D`oh&_sN*8=EMIKDo$`!46Z8Z#)AfLs$Z@wlA9>_Lq?%Vy4M+6_R+7uJGf|Ta zlGL|#ArX5PwQQA=5>8|fgG~2AGxLO+A$`fPSPLS3HUpQ4`xeea{%~&aftSNUiT^zhe8gKBS8w1 znGp9(Zuwfqb9NL!(dK%CNw<|*jlbPk0H;=q<2xz%u|N*b73MajQY}%tptj|^SO%!) zFeGYUMQsRYY)=_(kP+kS>xR?wJ3w~?x;5-Dc0U|ZIFPCsq1A1eBwA5d@bD`5nprrU zbl8~J6WF$^acFYF9a?z)H7<$*&?Q}U-`5co38JDA9;XI|e&DsawRWFO0{~)6$lw^U zuUKbpIQ+8$)kdigv;AjX+k-r?YqX`@Zi@GLfWpp;qBvL5*o}d!uZ3l?*mn5boz0RWV(XDzxI$ zWpA~iIP`{E_lLo`-^AG77s_PX({O@umIG)4l#lQu+YoH`Sk7q-taa6v-)EsgG)v4HR2W; zNW6!FQKzAdZXkoJRysPK(_u?g!2P>OQHbqYn-;)MsN89Ptfo=~x=7{D$4hEy?6y{Y z(&BsXO!#63oCgr}s6XaV3)9A0Gnv=9Z4dOta|b((WB{p8Qtcc&L*cdaGqkpN^CQ3C zGabP~T~QDv$JNIM%O^gU#`10^w8Zl`NxKlu3E;bB#<@b6RK!lPCHAo4m%}WhV?Umh zt-=kPY>u&7%_@_V(g|m3+JYx#rs^OtK=p-aYi=tKDCTMdMO|aQw(;{&dEGei_TA#U z%Ek|Lgm_}!e#>sW{}5aclm7l~`bH||fvqPj2o3|8+)g@$@fgDBUE*i4q`C;*?=PvK zMoaytKryojAWI}JrPa{^x`dBF|3E5jhDCLz@GWAMvU9NJFQjTzu@IFaKmrVA5qjji z?A}EkkP)c+v^jKD#Htu8J7y3O-5@a|UTRKt@HpvGFSy`fzCLlahdwZAxC-?33Ztzf zuK%mj0L zsWWdf^k#g$xniyDL+A5(o)ukNzYgyKu9$MA{oBFhIhO%1&NWshSqrM?%;n0lm!#6@ zB|VwJEywCg%_h-~AGiF#&7Z9A&qk*n3wKpLg4<@dSE)l z4-H+v_fcZ(6dHE#Atv)~;G$Uzw59FAC!z_BXa?6dH1+3o9H~^En8FfLS_x(ORjHuK zvzxKpIKWTL_h)`U3MGph05}>mfhb+Yn#A_81d*o{vh-QyWgT6|8dsg2kfA58Hoy7i6OW?!Qjj0A-UmrTFmW%97=n%6+ z3@@fG*(Bu%o*4y!QEZ`Pnj$uCb(ad27qjU3*6krw(@g@~Gi-(x#h+lKV-?-2Q;Jw( z#Nn_-p(Qcl&AU}VPqT9rN1E`gHVbABy1fb!w5wN$?D_F__e4nym>28tb&`&edsrEi@TBJDLpt*8kM%dmf#5xOKYom1ZRRa;5AXUP=|9Kd518DZo1O?&tI<(Z&)R zG>EcaIfdPN=CQ!IQudCmS-*L+AL!*Rs75Tj=1BPbz?@0b+Z29HQkQ%{ z_kRF-Y}kZ;D|Ehx4cbmFIV0NCifE3JSmHNnw#%ne^Z`l{!ALxzN~-jc0`M=xG!O0w zF&q0{_y1Pb>n*gD-ee;yKIH+x4bLiuDT7bQy{`?C9_*4RzKcIJDoVx>31tOgY7j99c9+128A1isCgy&qD)~LtP z7Jw#!aZsQR5-K7H(wJlivbBWFoHYR*y{hB;_A&nYPQ{Q`xZ8cC`GUDpRE+n#Q=PU_ zmYOVu6wQ7!>vi9LF32<|XMjnI;9_v)Gc|KuLPx^%)wN8HC&=PE1V#GpbcqCE;18l%CSRRX~`d5jddz%R{dx8Oh~ z;~J}0eF{~j{uMW^F?;Fd*G2VChsgHa#Ipxxt~;>9@k}DRV&bJnA&+$Pi#q%s$$BPQqG!# zZKdOL{Vk*2QJd}MTHbgf#KJau+MAo4SE}2HA!DcvDaYpi7pqsnbUrnuSbXBt_q~Jw zii}g&C_8WFm<%48P8$Lyo@jxaF(|exX`=1?NPx@uXTLa1 z$BN7KNg%b^v?Mj-Ezgl+L2Dh|@~_Q~yiMOf0$}fk&)==yidS7&pH?RhY3Wa}oK1%~ zK>L3TS`o&Rks~J-Mt|9}XnoUc1vYxm`N&oeZ%6>+VFJ9M1mye7xu`pkT2)!h0U&ci zVSm0dn_J;AIwS*=!)^`By|Lr5LBw74U8Dd&o2niM7M+6t$-LC9GD~2+EqM~1YW?>c zC4x>4`vpPh4S*br+S_;-bA{(qROSrj~ zh^A&0UA_q?;(XL>Z~fDV^FXFfsS}GBMCq0?|D#ZPrtXI)^(R{|rRpD1AU7V7+)`6cqGDZJY6Fvr94VsvGP@>afC zi9i)7UIv@eXb2&?_-ZNd3|eOxlQp|SgLGG#MNl8l(lhis)k`8XX)F=>b2(5Bu)x*! zR$V^P%=GJyc5s&z&8z@WAsaqOl|*9a7_hX_3$5rJ(!{o6)=-%_=pEAdQ@a5o8$xp0 zHz5;55N^F6%i#uz^(op={h#lZ+P?Fl3R@YgDMZoTA^)s3;VB+!Nm;nKmJ9 zS#+|=H$Hk_Kv`Xgl6}YdaD}m>VVXguKH5JfM4+~ilK2JmN4~|y2#$Plj?ld?+zRt! z^mg_1+r=$(&px+8?@w0)Nz1e7yZ}2#S6!iP`L*+MRe?eQ@ z%JxinNPYs`8y%AV>au8sD`TA$(u~lEFkBm8MgsKYZ42OhNVJF}T6XPx2xQ~h96egi z6FF@e=FgMFqgX_DFg=wjh}PkwC>pa$HVT;-BEAuHk~yEgzFTsM zXqp_)Tj(}pVQ5nhA&+a~p8!MvsP}V|;4BVfeEbRh3K+Si!!e_Y*8msD0Cl!^E_>7f z&jXGj z<<1e!%@$PNVp1%QiX-oeFgbmwM`-A1Xy$O*Vd!JqEc{}POxYlPd5(O?v7Q(m_GrEv z@dQ?NmjEDOJ>Cp$v>qxHc(m?&JJajL!B}%mD?^?Fl zOgrrJIkxXhMSFf`pFuC69$hYNYx*-cVQok~>)LuS@}3Vv1Co73qU@Q%(m@;i z=0S}xTc{`Kg3azrW&gzG39g9QwfuuVy1;~$`{%LQwqPua=YWxN9g*^(<&r2Y--6b9 z?TH;X$lCD~MyO`SV7!=jX29GFV~M8_&D6Rp{M0J5d<8l8>u?NZQaiRrmot2LG)7yc zRoaah!7P5UH>pz&IiR%RPl>jj?vHJenh!ZPymjKtkTH@yxALo;O<@*7clFQ898HAh^boEm_*qS^5#_UPUrcRFYN@oMirTXIS^ zHFGGi{WI{AMfdpsg0D7`!`zQ>voC_AYU%hUK{R4w&o#zd+JjDqe0$Mt5Aj&0+qHmY zlkNDoB!hvvPCsTRzCD-Xgda&Sqe2jk5+zI~XI&2;{jVM;O_L~Oj=Uc14!OkCNhF$D z0KaU}btqtSEWO4#^d(R0_d?(=RgXBBK;WK#D=cpXM}ByAMpOv4WASVOz{Yd40oOV~ z+LL5W`l&!1(f)E{WxiaxIuK=&{}sSik;3=B>2^cUdXJ6`(}}?BT&7xapPLK<0W6*o-}0>ywR%c7q>Du(C4C+d)Oi^X*SxF~2ymgXnti zImPv|bnAuLFMRdba>hn(<2mKD**Y&$p9hnkQKg=)2n#;)4NfMd%O+spQjwo-~pM0P-?d$LGQX&bt^`ovVs$!Ic zN#n3Cu`qnYbn~j8aX6TsBZ(wy+9HCDs`W7=u-WFF4q*1ME><6H=R7$1h6|mqG8AEb z&&a=ddeVcWtZJ&!>9U_oGDo}-O>QXfNFGwzNFj8xM3gXGHb3G?b&I=R)UJ*lkr(LulBf*DLXMCB^ocsXbhLdu~KqwhD){S9sFH~rgOt;%9TgS zt25ZKm#!l58pwm!y(HgAUN*9aF#w>gsR$OMuhH!hAz$qK_ihyQ_Apdn!!e@Js^YXs z8c*JIVz?TPw(w8f?zNc9>v1zuGZLv08l`}AKXs{^Cg|=HV;czUazgg3T9ddi-N@RM zYkCH^*O6ZXLM|}3V^_}c-1(`Ce#FMyT?c3X&*B%TpLWNyaY{wdjY7#onkqvk%WpC) z0&#&1E4y~wITiSAc)qW@KY@w&vAqkBYH8(X9&fLZxQ@8;ev|G6g7`h z*h(LYI$*oW!Dkr~DZX+&5};(LWI2Z_8M#r%A0e^r&};{Ej87@hZDBmzH$In2lDMMV zQ^!hNjreM{8NBg(j#+B6OUImQ0d)+V(2arMNRe{IOu7))f1xlrbEAC;frmJ1&!C@D z#CGSEh#`%4|9)jT3h-|8op#~YWF_aqd*)bvEnH0)6aeU7q!8%xC}4;K5QXe~u;i>= zy}mY#p~+u({0Y`FCCiA^uoEEf8~9325XbL>3t1d#w16(uiXI$E{BIyRSyfr5&7l0= zg##_-06;s=>DD{%D-Xv0Y8+E>A|LDFg7NEI1?nf--^W@H7Np`^KId@ulQ3phT@m$P z&z`QIh;;W*67gzah8+SlkN!)ZFT2F*u;?MCI|t8Xt3RrJN@(rQKErRpzpM4_x7>f~ z#bd1-_|H0V9LPX-6`kCSSr%91>bK8krD4xNG$9Y{dgLkbFg%h_1Rl(9FF@g&lDXN7 zNA8;21=yH>gh(2LJr|s0-(&PNNc0k7obhAOp-9yoCsWE`7m6-lU3%V570AeE?L;;g z5B6lx`XW{6w;#-Wc6TLP^i;zZk3%OgI6W_l0AG;~dS4g@ig16X1ew1*CBWm- ztwdgaM~%?NXO<_c%YPi$PvLhpgBb=GCXdVCFeW>tfDnZfEWFxHmG&o!zUyx# z0)Bxo*s}1Rhcg&dqqk)3^bLGdd%3nEK@9Hu1t~LUK4*K84I$8oC4*+OBdEUcxBYBUp6hZEy_ONh(qc=vqh_l+?AiG`D>p+;lw8xPO2*qeNp3v(>j- zQ%1AalKlrK1YHlfuM#*OL@BT<=-(Qj!faL{{QDW{2aICJ_**H?#qqEdu?8KkI&|%y zRxQDqX;|?ZI#>>Ln=n9c;}rgAsa}SXRAj`#J+`01c-eSr*0Js#QN3hXgc2g|?8ayw z>rN)A4uK5}%Wan7a-a`K(DTy?9!sjrly#7)5T+3xLKC^Y)hL}7c-q5RXikGLF_8cv+xk6ngM z6Skz@-g*D#aXT3tVS65IZzV%3kRqdm0#}aK$W53id%cS6uY@$gG7|g83tGV9RUQ}1 zpG7gAr80$!kiAK5#yP0z;NXGz2TAjr8rYvNjIqoA#+UmSY|no|2;kEkHAV9x{oCzc z3IN=<>$kY2{~jRMg9?D=jQd@-wEkDT+P{8yBlQKQIs2z(=D&yHxT1d-B;u(c9}oDq z+x=7k_WB6hckt)D|Nc*PAb^Qma3J)+{x=%|zw*WWE_g!CtWN!}tMtG9BaHCRmnHge zp!R=*XaApzd71!Vv;_RCx&L|({}12pA3q0pnZVoUsw3?Ee zz1V0;l{875g6lQI{q6MK4{xb#ITlSg(np_lQ_@X#WbY|k5BR9ZrQ9a1;(HnZDg^~- z(CmNYi2W;ySX7;a@TG?d_(gQ|-{owhzxR)YfevJRlo0z%mgavAt08fI2?L!#S$y?B z;-mgEenk)q5j|F7J-v_ie@O`ZkHZUM6ci+CA@%wnaTNcID+m(?c0%Jl&1w2SsQ>@@ zfBYw2!w-mXn4tab2`kY6I|;VKEBxD7arBlKFFp1`j{ZJ)8WGq@-l^PX;@`%s3*(TG zpsmL8{g3RKe_mtJ9AGC{9hMm%|Mr6I!N6|i;Mc(Y?YYumk&wi6aYU*7ZOS^l-j^@D z&LUU;y4(N#c+i7^ohaS>a2WdA3zma~9vcc8Ao{O&`!A0{t|G7#MDQp1_y4?4|NRF3 zUwSzHJf#de4WjFmPJe$o@L}k&=d}O*2>*3LVN(88uv>taJxYp~5$|tL$n(z=aNWK+ z^7l^wI`9O%jFJ1>4RSSr%jOB@86)`Hy}$yGJNoGY*57XM{|@87+(?rDzgL54jzh1_ zy*h)(O&(aV(gK|17#GwE;(uIa|8M9Ejds~(2A%pJO$JEVtY*T(t#`=&>s=%{2EJs4 ztJibKL$1WU`&Uc84x?(RTVIj)k5gU>FlyIH0zC)wJ2ZTDSvrjhg42e{nX{3cK*#kX ztxA;+GB4C-GHdn9?GKWVFuMwwn9c6LGA!<7Qz`ZCehWMoKhtq_Ct^0aKeg#{IxP_} ziX`Vh%5m1a#f(pnCi2AOw{9S2A1V)bS@2)a7~w|kR$I-|SVeL}j*=+4yao%{^_71+@W>I;rLXRFOK_>Y= z-rXM3rHen19Cm~o802dq(SnO3N+DAm1_?e$Pt28e?&YmmHVZxVjP<;!kEf5Ajkcxp z`-#5L_q3}O?emV<`SSj+WUz-@gkEj!wfJqXgukswXsf>GakQQ0(XI3d5UhgH>5ql&CrjUr9Ph;eJ+UKi7V||?FdwtsHm52(Ljx6Vs6)m%|3|au9 z7GG`LpEkMGaaWD}m$y&y7)A;IACVREnnD_YHGM0Z2fhhl`xA1zY9F{D8VK_HKDc`T zv5_OBf8rF1HLG*M-FCRp@`vST`r0+tQ_Y7dry9F;?^?$CH%U%JK8n$HYg;ZplUT9V zBsJ!1Hclv%MDCIw^)r_bJ*S{B?Fh9u+ru1Y8S`n{0&ZhYTORf>t-hIz5*%d-$a9Ua zD!d-Tf3wRALm1QrJGg_sJdCW1eB3-)_9o)0hua!-IB^A1`;Jlcz^D81x=0+p0;*59 z2QszyA>{os$pNq5@+IQ!MSZrtwJT<7uBeRNW6yC0p0-Mt+R9Fw`)%{i*2(lyQy$mk zve!r+my?PeYy)H|I z&f`C`MW}cQW)bWX*}lB3j>0zqrH3Mm^`<=0i$SdS2Qla75mrx~h$QWT=7{-~D@IbM zn-?Ov(KG}rcziD=B~16H3Y7n_3OF5d^JP-Tj@{e-%a;ilU2=thuRR{%IL$Vuc?pz* zhYNt0Z6~tD4Z?`VheeEh!y^Xc==YJfkvI93V>udM^V8@Xp6jTj?VaaK1upk!EDT^Q z2&+mD3?W|;X!9&!6ZmnK;Gd93qbt-06@*1 zc!-$(GTQrF0E1afh#T$5F0QoT6A)WGv^SYI`hGN%N5fHvR9raf3omJAqE-qX476Mz z27}>Jd8@)}>p+^LU?f;aV)Cc8**VYXS)Ey1^h05REpR3S?VaoJ1LSU3Qap~*a9h-_ zHaL&FX$Ne*cooMkQ`(raGJ%W7(OgKYRAi`bg1$hFWga#Yh?a}-zA=1gOgh>;|7+s4 zXrN#Z3=}A`4FUBADj;$&8MwTIKs7lZpxON4GpeYx0Sp_EFA!YI;&T+-?$tj(_(8v| zY416aO9$WwY!?@9Slb-@(Jc)F6bY-ALe0BNh~U_7N+b#y?9Hrqo& z-tmH)u=v;#1-={6Tc516uNU#xR;B7E^;`vOQLnc`fYs_-J&$LF%B-1v)?L$~E&wI6 zDWui8S(M0>CR3vI)`q=ATajuM%orMz9p^pQ0V?$^-z{ivN2Mg0m*PUxP8ibjd%XMz zB$GEj-g#nFe0j)FlQ&-ua!?;=8IqNDT_4dJ%;2beZ}_coXEeT0nr$$aE22_`d075r zk-=+c(Mlw+Aa^8esc8@Nouxyta`TwaW74;iuA({!Yi4yWNdo zBuBMo=uT5G9{7ND%brLhJA0*lxP+WvB3j2_SD?kPy*}nV_ifpBg>&8>%`f^Ib2nSZ zyPv*|cXgra74~u=B`&{bM;D>0#h}cHdq}#I4`2;ag2#n?7ep=(aAhn8sVf+NKs}xiBSAQ{wVE9DVFsesHMenxMH9?;E8-tsiCW&4*_X#R)rZOZ7j` z@&{H&DMXUXqA63dLAW5Fwed_=B>7{ooiE3w*jl=^*CWHIr!5BUOpjaC?XVXzq_#Px zkv-dZ)jE*4UvjQXOcxH`4fr5}fayq?cD-~BP~Jwu(f0S|20x>0awBB@b@fp!OG5VY z7SNoIYB=tW$7Fw>ObLIpqSvCErCDj*AN>)4BO2)WFit;f)h+{4r)?mGEp{}WBV7#d zD4EG%P2ZNt?Rbe2_z8#0=%pW&O7q9m=R+&(RGoV@i_th#BH{ zzo_GV-WJwsIr%bwg~rB6d#(XVVrY$#*`D$+8Nk16yW_;6U?TyyxRhE8t1{G1mFsZq zf_(VVB!B@FvXR2Ale3GE-9Hh$&CO3usUxuC^f>l@+C;7L`T-ofp%-3Q`pJLqY-)B05`-`4=myVi~H=Cgzje;WLDXjLmMVfB3G ztYfh1%}BqTlaT1S<&b^YRnJWM3ToLndws9(r61EN#XX+QOR8d5Zr74xF{|weR@F*w zyZXAjv*U89(82ucTRU_>z}jT+kSRj`$8PC6n;B7%dwu${9Ev$DuE8=^$2n+`e;=n& ztsvW7T3X<&Zn-AELvx`Z&q8s()be|q&xU)Wnx{&!I@nHk_{n_-!ReI{&Zku7S%_#A zcWS|#p(W>gK|A?Wl!u3fi%n)={)Hr9-HtPVz%xPLqkC(p(3md0rW2m%Q4&aN8OP}3 z**H7)MfW+m?Eb1IqSiR++B5b0O7qD4d4ABy`W^o6p?82*a-x%g<)iHy`x8_&?&=IP zjl%Kqw@{6%ph0Wta$d+xKYNAS{bMUISuTFw)KqEcUDUGUeVVIDb&5Om5KeGja+%W0 zeCr6{R4#sn6WH4xzES>uLv#vvcwhqgpltp z=({&T+6*xGVtV0ppf- z`I&ti?tq8w6a!brkIQRbb`JxyUBO7{A4&0otaa^+Q!S(ofS5G}Q1dpn4_-rq z>oP|3b5v@@W4(={?hsb9^jHQT)AChcc*q=H0Gdx;$BOC_A4%g!Wy{%34z1eTs>Kfb zHJhm_X(2ApjmQzrHkA$eOs8WGu2)d01gb&$%AU|5fG6CskW~?Q?~Z8&cr5UWd1`xC z9d{;#YOlTP?O5^o#Z!Hc?9V->Eth1RNy0q~YeEdqK0M0SZag+?NaGRf zGs}8PjEp@4B0Gsdi*M$LHNHWmo?C7n>A7kJ8O~T{3ZIjCtN6`+*g9VJr2}FAt;y>5 zmo=b??UqH)NA9~7JXgm4xk@(juk`?ykPBF?SkKf|O#jrC(6yS5i%f;;8fPsF*r?pQ z(8O?eJ%c-Z^1V}1C;*(6#K;}efXk<|AH8352tR(Z-5)!nBC}|^ea;&WJ3wRFRH`$$X@Vm$ZE*F=Tqm0jd zQ!g~Z*G>-oVj{Bl=RfksN_|hmiH1Inf6*1`sO^t#7B)Z5Cp8c{e5~yjl&rpIN^!5c&LIcS ziW6SqUUoJHK$#;y@l`p8vl-n(M^zqW7dfaec0$bQpMHwO7YGV9_6~!6WL#x4O(JLc zbtX6F_O=KdhwgU5*S^=-jaiPGKkBU8jD1EZS{3UWz;#~j`g%g`b2X5&|c-1|LbCLvtO@MqyrSP4-WNv za^q^_Z`-k(2b=EhF0U0n-Y9_ADloS0q6i;X=YDiTfcgGsH`bWNuh*A4bM$r(&z zTJyZeS0Ov-9Z%vE5`tNY&+L-&udbs0S+@Ge7aW#8cH`wO52THpTs)F_*JEDKEqt!7 zA<#Wt-`|0P`9vc-OL0)`rPXQykH8LQ&1PSQ&&{4xl zjGEVWYK$Mpa7p4Qxk0_9e$P)8EqP}C z*~ECCtR@N>0^gnLrSgTA?xvDUY<2Cc>|7>kY{w&~er%pwRJgFdK*~P+tbd0eUPwMt z?~#l!`8e+RJr$wxlW6g+n89T13WC@HInS6M0W`5mOwIH~5F(VT|IvvK7VzP7omdg=X$C*Lz2pTNfg_K^S?0eKrR zRcV=0Gv3o)zH3Jst6(Y54IF(_>zGDk)d!u0TdjGv6!%lZ<}o~_Qd4x^SI|uz_F-_g zAK5())2Duh`J7q{%OHD>cCGImnsu)o-}dl#u#z;UcqE5TDtg`LBT%8&4x8_dzGUe9 zE+?{aSdRbtD$KY%w1VGZxgqV+<&C!Ceu4ek``Felr$g3Htu8Ve&z{{9(pN^kaH`B5m{@O8Hl;c}-N{Y7kG-c8k zK0t|h#<%0vzsnIin>wjX)!63YW1=MT>}lq<89yFR(;v2-HiN4JGvrBVlHWN=`oD>4 zdqUj@V-Ut!gI6rLeHxZW3C6zwrlD1C-?B@T2!D+ zqn+6PmC@Sivy@l+3T5CB8IMjQiPk)+It|oOo_AkQT{ttv)mh)xzrs8TReoo1asa6^ zyX}L74hb0a!tEfu25{>6&M1z%)joXlodS1mlId$Cbl}_9ji9?maSH5&#!}J)O||*J0%VOLFpy?qQ@gO@*9Omdj;$zfUfMT=a^LkL^;1inP*4 zLE>%GibFWV?@k$?&;8C#$Dpe}p3(U5BzrM6T5WkuhCFX?LX*t29N%pHiOWOAW&TMZ zVdtn?^qXIK>^Q&c!e}?LxTZ|vfdk#2DF+?TH_Pz(9W+TvT zWTQuT=`#$}XHIgtm#IGme(JmG_-d^spBai^{Vn3NM%#6RP4+{CmK|K;j##XK@<)=G zTA2I#$4*SIi|UZXclvjg#e-!pi{Ip#M-wEYh;%yX1t&Mso+(;4rc+lWk@6#G2cN!qHk6a$`{0?}uGyBV91R83-~L)W6z~RxXkA zUZ^XR;F#Z=;n`*UNL~?B_9)@&N7j*{*aUK4c-b87T6rt>Fe+?b@Qf{^e8N|G z6>kTRUK>(6N8OlS>;08bd+G2($&dKrT383zVcBQK7!RToVkv}yw>+Uy0?4)Ecz%X0 zvnPOqfyNN{YOu{(`|%A}yMZW~`m`yTcz0gfgV6uuc^f;I96J9zwhDdYj5YyX+p z?_00mW?^2l4bp0pI!1Od?fFwuN&G@0G)FOufiz| zl{59kh{}(Bc>q$^6MzZB8i7GM5E8KM!nm$)Z-1+-b(&o`3UW!TNUoKn(RCyObEP$# zL44!a@EF{h^*$-oJ9llQWU-Fde-Pewdx$wfrXn$TUgfB{O0OLC9dghiduzID|hzCou7#z^`l0Q>>`;j14o^J2V=cgqd+qP zZp4w$qfDqiL{EEJC;hzdYo+JX50%*euf4a9in8tch6OQDQ4ldeV3ZOhl$0J72`L4o z!=O`I8pJ{*q#Grrr5R#S5ov}V8ikP>LR$LWb6wB(+aJ*Ja<(N|I=nH}a@|^8 znI9%?jUjhY{bZH4G7}QDI%ujV5_muN*!aWuB&hHwZE_6C+QXOHR&T_3gxqMPH|=m~ zce&nZ1@?I{)qAj3IAe88LSI{8CbK8Ag29J8s(MSP1#j>~hq4K^(eLnqWG!y-WQ^`4 zb4t~qXP~pkD4jU2uYc6~YyS_s;~a*D3G)00a3TxZo;yp!Y<)}85ko(3`i{jWNR7Kr z!ocjCXTu-<87bYTCg>CyM$h>hn-|2mu(yF4!>lh0!Xf?*%CB+gjafTUNARjrtOm=h z8b7Z+7+gtePwdaND$jCyNRqC#LYuC^8^P{x`-5h3up57vt5RjnH=%h!L()O42bo2io6mg6C@=FP$%j{b1&Z-_R9!wv0&4u?sUx4Y#(+ zOf4y}@LObl5|wqkWKo^=^N{f+fVP~53vb?6C>Ela$gBS{8QjJpkTf%bvg1WvbvrnI zwu`PcKuqMuc|?iw{o&7RYMTBzIkYWhI>VA&>WM$D1(!c#LSR%;850!YU!uc1+CHji z_a5D5lPHp*7PWqLWv{KVaJ)dJB;PVlv3ZtJI>ud;?PS)xXJuu>Y^2iF?GJkw%wvX3 z!|=}i7L~@coY*hPEwrnA+5?6jjJd9xa^@oqzwkxJ(vs92PGq9e7aQ&6JSXX{wfOSK z3G~m*d|rzT_0o~IncjLBzh>mq9i#mIE`HfFICtfTr#d{`6FbJa^Ih{v&7#I92xPYD z#+|u8p^uIYY|V&9dJQ;_PW+9&{-UdEu|Cop;B2jzwKl4xo^za(ZU#7%A@C@JA*hLszEydQTV0IWV zqKA%99Otpdf_AHcYLK}Mzxd8Ya)a{TM#n~DCbjzK^tBab=8E&`TEmD{EYEDQ71o>D zw%>1_NoKAT^kC(?3cqG{KPzhFyMmpxi?dg5 zOzOEG%iZzjJ1s-6;@+=cw7Sg?$52geukWJ{+S%?LO-y=UWXNTo%}(;ox@u)XYioZQ z=>m1RqtvQ{GF%0+ei&f?fs;V?`U|HKd^1K6AHiSlu$UyX@}vg|pP(*6kK_(Dy!(QoqLAa^C?sx=v%Z#o=(XxY{%KIK@5t-x z)t{J@Rx~KGrp|ZU@KSAO{i@MG*B6W9W^%7WkJe!rB#oYx4Qqe@C8QUT^)88?e`;}I z$GJt}eFXRQ{kiDx8kC^yu~JUx7E4TpaDFmg-YBeF7fZJX-N%R#JGXM-hv*n5f5>AI z$=tTHK?&kU&qAHPya-)08$RFV;%?-jzWn@+AbnRxl6FYJKDr=AEtJlf(@RFn%!GP6 zJ$J>0uCkiwk3SD#O-0yFD_%fUp_Dx{-EJCU9apI7~$c_Jfd^qm!74fTK4`>}l5 zq;7h%=_%ei7F$n-LbTc`o0O2@$R|s4JFF!pbZA6VzwQWK^LC-glDj9V&`o!1)|hg~ zY`P~ehrD&UYH_xL4;>qvi{`xk$I(?uBJ83_-l*u@7v+~dY08YIt#7XeD+bd-58Ou| z$-W;r#=!4T3u9iH*LAcJ@3wB6hemg+I2A)?fX(!C!(lbcP(%FDhLVu(hOCtNkVk49 zVcO5DoSkt6E2uon&E&+p4P$RU%BH7`aBJXr~{)5u)Z>lg{$;r{VX?^ zV)ju;f7!7Tt!^sT7cu1=#!vfk z=Id@&gRN(*lw{a?I#tfbEAF&xc9fK8#@<5e)f{3w%>TOT>bHSfN4#~{t79WOdQ*yf zE1kdP3SS@la&zM8hIMR}-64DDOXIWF31ql!1F>-J9m@~vi^>L#6I4dex;1iF+ICRx zAq@}x3|!N9bXOr}joMO<6A?(ZKET>9m;dmOv?_vx;I%EAm(KLp26mR?bINf^fg@}_ z^4~T$b6^TfJb5v`Go4A_bE|0BqlM;FR+}0|Z1Htd0;6QUI{w`$PK{d(Za=SfWIvuC z8S`AQA5rqOgdAFXe#%B&Yg&TGUxS6LL!=@YC7qR`#Uy)Q@4>2EJd6%VFlie4IlH42 z0qA8+gx>-Fkft+yJ(*2O&!+>IV=dzk91q%A!+4mke$wfvYTQecyfx?@h_mi1Ot%jLGIHH5V$P|Hp<>V!w$uOHkw?H5IE>Qoz)bIoi=Pt++}Ei^0nPD=K z4W_}PL{PM_kc*#kXr&R8b7@dBpn6FC3WF*y!l@g%qCVFn{3cCl={<`|VM>bEX6q|M z-l|d6u`>I@U#X1Gqv=@Rb+-$N9{Khn;QgS4u*BOEnF+SSsKq&{jx%=d;ahp~trH%{ z57qgVSg>Fvck;2{V}&Thc9fhQB}%7xG+WuX$8Po-XZhY@G{^VWEcZFje*Z-`-e+?> zfy{1p`zRIU`Fv;k4Z5$-4~kQxd3jE;8M7h#oKrKTtURdL zygMt(qUXa^+wm4IyYa6(B{yV64gMhx&-SVCBP0zKnNvQM21s=a!d#RDYH_nFm`nAl zd;cny!FBqwW|@8Z8e{$$J|o$$@*$tr@VSjeR>c82*cIqzjY1z6v!A%7u92Ye2<(d6 z@OM>PU>3NC@9Vlf(p=VZ>D=Sl*eC->>(yMg)pd`QBXMOzv8x9#cS6d|%L6~UjmEGn zEDmHFxg!8p-(pLil%IEKHrGt5-Ox^|+iMAZaBait<$T{f(8@n|J+;Y0rlxn<5M zSz%6*dM!vSPTv@_>HlSuNZn-GsGgQ{$9?Bvf_|Lj0*1EU%DcPzqvt0u1lwuygvS{`9+A(+ZMR`+EtJ_M+>w|LQ5^@JwuxIX-Qg5&BvcGJ1*&) zSjj2OfTh^f>UQ7o?{ygbB&i|PoFo79Qj4|g6?64|x*7dK%vbW5^d=e%x?a^-A z_a~b`mz1402%mz11_|-5v%(Rd>8vZxQP2s7x~V<-;JM?WS!5PQo@osbvbxvI)b!)T zAL7%wOH=QSC@OMMW3+M!5|v@zsUe9s2H6aCVE(iD@pMRYMp^P1w^*#aXKEC6m}0gY zYr33Ugf1SxV6)LGM^UH0K74-72~Xwx-bOCn^`QrQC|jmZp<|iZ=(#gOPJXZB3Tn!u#!<%#`%I8Sf zTT6WnN?7|9toM!83R&_q)z0J}pcNCKwe+Vg8gezZ3%G>+AP0TECdQW3_1M`~XWu04 zY%^({lwwoN;xl=#b2l~C&MtS{MXWr_FTCzTS87}jwU-h5tyV6aN4Sa?$=qU z4NOqsqcKAD0qoR@!mse<6$Y$y4h*-hcSy#Y;ScH94QpsNIJxNMM{pWSjM5^l68NtQ zZY@>v=d(&Qxhd+6kL8TF1ZnIQ6IW|#H)Qcy*3tf|gUYHuXji(n?M^-Y#+{M!ysxkhYKci47Pr9k~$%5f zVprr#WL!tIbl(q0*J&pix#{DpWbDsB@brusbUgm;@Hh@t=Ic?7up4soC~48o3@|8? z*s1S=@+Rg4kM;E4t@pQ+q~u%L{=r+ImYw&~@m}&wMMIK@EIXE|!rcLiq0$B%`z_IG zQHHdI{gx0n+wN?wmYewp(g`U+Cf5DVyY6$=Llvk@4A2}>EPC@-xOD=-1@AaH^Ju0P zOU;$bn_()Cdi_CG4*euuYL(gqww(ix-W)f2Wxg$9M z38AnWzf)|^eBUuj*GN#a#>>Wx@YP6QEO~MY8wdM5yquU8Bn+jN(Hk%G8*vsf%JaepR!^t`xaqc&}@T zm~Qhamy(+5_fOQ_E7wtldk!7^fS+@n9iO7J3Z+fZG`z$;7`d!7=jQ&XS0%Hlp`+zS z&bWZYPJ)hV_H(>L>CRV_W9%0;P>{O8?5)=a2gc3Kk9Tt1C$EbW!jKB1!~HUPMYs1y z81D>LZE<)y%uS(sDjdUA7JARsJ=4PM%<+uuokR*QZ&vKjSG?qIf2{?WoU%2qXmou} zKIx4|{|U01QNDzgKD7;Xv(a)44pRBX9c3f*BKAQsR%P{eVg2|@*VR`68ts?*x%(eD zJpxZ$#0?7ljk>h0D_M?-{q!1ao@ouWnZCJ?IVH0G3uqZ8bGGcu8Q3gEd==hg2WR$T zM~?+xMQ7sQ6gR#|Nh=c8MwJj@rn@nfOw#lGzK?EEkod;L`*~7-TKT*&u0d%dmz4tj z1Yz4Rul2+}802Z6A*UC9e-35baZgdEBR*duH(pT&eagZ<^UU4oW(@8b5DY_$wl*3d z#(ivMB|R4Yc!G+3*+d(AP%~Glo0PscJ>@*VPFvYn3uW$rXPIpM3&!Ol&7SZ{vhcK( zxsX_F7**w1sF{m`tCSh)Xj}@8$&ys0u)yM)?+b<^>DdF9YrSZKtr?a>FqHCDmXA8L z#eZbj&L(j#Z$;_krV5wPl~*xg1uLt?c1o-ndoQ_tSnKxbeuo{aV8wjsw3q!Hc!S5D zcQ~h>R6x3RtOKNuoTjL>2&5! zt~%1aC}ezG;2Xu>iT8`Bp2Y??m{J0xIVwE&#I*mzZMaKHb-2#**%kc7Zjp6ayvA64 zhzx&Zw+g#y5xuC5TJfA;$X*$x#@@wK$sfBklWz@W+1@Q~)L&Dc=qt)<=(ChD#K(`S zls!^H&GyBrX-3&UP{y56K*qP)9M?|UQO(-1%S!s{u{kfJy1cWF=38vd*rB3S($8&!+2%7z1FctB{6SrSs&`$xo_Wa_R5!}i2;)JjT9kS+i><6T>I(ua*tng zU1RPT%`ydK(%0)^%fFhLHifu3u^*KZrIF?VJo;#z47F^tG1j&PNB%#9INE*Z8G>Y( zw9LfD0lbJ~nsiv`D0AQme6Qdy5lc;aFge!C?oLtn5YJHl-3IztrgL7%{KHpd0s~>% z3k*K>A2m~TN+TW}}kzDneiYPiKyGW*WW zPbuH2=P771RZIm?oO4mWdPQ?tGn>UQVro(G$ogxsPr(-2*qQzDc1-z}S>hHcw)QPHg(hMuK`n3FS zw?x{*BFkVmYI@KA`_um3j;C_KxKs$5-v7rc{l>~fl#v06AHOwX@jJWw4;be&GZc%k z$~yY|k^lY;oTt=WcOslKI<< zcwd6AeL)@!JNo;be|w}<1Z@7_8}-{yf4>|V313@HR&Vv&lYd(bcv_-7ELhnPPs_<& zS4;uE_WlzCp+j6 z_ANoI@F^W%`s;4|_QT(90c%8d5jHOC^wzFB+x`?5Ec!7W>fo-6eh+Fqd}xv+cYV`n zV8tp^TQi;V-ep%1lv(hU4-b}Pe%s7H7FU)UR@8LF5_f*rZFxci@}u^8d)GBLy$1WH zSmCcjw(FaIfTw(V)%JIZ!yhm3w+J8<;ee+3mb~%%QzU+Sse@|p6phR4yH;-{lYOD&vp1DXOPSCRK3%%d&IZpxGOf-%(}DKZES4X(^c7==W9+;Ph)Np7lfA--X{*% z9fJ=2F+$dks?7Txvk0ODCFqSx-7>f_pg!IbXE^f3R|4zIodrYTk1$FM*j>)$ z@&?`$5cCuc2<=OtiNcvHkM1G4*_7k?lbT`XEn;=hDZvd!Bav9juo3@$*>ci!NRa;3 z9)i(*>CJ%ic-%^&fph0ilVx{5sS~O_u3l*j$p{!WheVKW9twLd$+Nm{8ATRtUlI+lEC#xs+A;jNs^dhq@vL zVG81>_W?|dBUvMaV1bg`rSe;wYm2-v&UUi5AWc)P(B$?S#ke7~OA}D9WaR{jHp_`W1pi-3$W#?^ zmkcL3!xSiXesLTl#N)a=#RAb~p?Xy9;Zxjo!=5`_1;&_gNU+H`&JM6^X_qNP@g_Pn zLF-bZ1Zu(A!E$x!*QdYtVFAi#DKKj0&0UTZ(R^Rfm7}YjaVLF5#~|K(&^+$jZ5i&l zp1kXY@ea^wusQGIo%aanp_WOg2Qe4b{l=LW49UX1BS6^coz`z`J#kU3sZ(VKSfmCp zKP(t}4NzAQ8U^Sbl@nfto)doE0oc@Kv?sXXeC<_cT^Xs4v#4=~wwUw20><>N+2T3lCoDF8jRvMz52K9=lF`RL zF@)@5OoBxAsoX`*X7S9JlYY(7EicD9LHyQr2G1j(ov2RZRo zfI;M8K#ALIm6Y%C)aNDc*Q;mw0rrON4uD8HUffm9Wp$QIOY5t8hb=p;PNx2~nJ{&| zH^7XD;U;Jm=!C@5IXGSh6-20^{?%bm{UsyP-9!%dejo0|QTpTbaaoFcHMj~!ENDFf z=|#Ic6RcX_dZ&-NcimNwdV`bPS_=76Qxi3-HL6p6r+fxl4A_6k8|1%KVjhp5_)+1! z*q7P#5cVlWEnCZ~Q99r(`_9&!XEQIwoWqixH|ajy&+pZ|HX~BKg{C2z>gg&hZOFcY zzqz_B8V^{2ha4XhgKhDufY(lBdRsI(;Y@!ccRUfBauisn>Ep9!g77ar=t**z8fQ}H zI^ZZ+AF=NRlH>(KTl~sCF_B!|>PPgZdeF0rKuexL^(rRx&s9}6t-YT^1w6&JO}SXf znXYI1Xj`FbEHbc3!*|~Po8SDG)kInxO3nb9!+Ra4L5CctYkTh)*9V`w{#+IsBsR;A zHbwE3uTN4_U>!)Mxm*(!@e0Px(OqJzn{PsCaUgc(Svn{chR5GXs9Qx=(*-(39j8qi z;wEhMujcYGJ6hrKsBroDu2>RO4%SOg#!$5J6AZkU0*;C(-%SyyzcCV$6L&I34pq83 zsjA(mw&A*(r08QtU!U!b9ujNCLlo<{Mmv*)2P=}Te8ZYyX>NGf5?6v&);gsVvww@4 zRpAlcmbIGB`h8^!Env`LWo^gTk@0ww6;O)NIY9j6Q}ecvVcbQzx{{`IAk>GT&vp4V zE3R|4%>r1mEQxy`_pE*MIi}!AZ|X7XFSI@&$WDU7&D?EMRr$?fGRO7{`jNjX_Q553pgWegSIR zg!!9x6c#Y;b_OP zLPGksVwvjPyG%+pSe8yVp+tc-5-f15JC&)&{2cIpT4^>Ds*46O?}1GXeYH^|lp(Bw(uOJJ+W%Fb;4 z=5GGxha&vRlgqpY7~-;8gVN@vWtfMCw-}F+dmZ`<@~n;h1xD0>y{${Qm5e!URL>bG0-mpy5L}4P~{y z83khH`c z)om#%O%=Th+_!Sjmfk%#>8I(xtj_QMqKqNP9QMsOk=|s%xTZGEjO!7T6Av)=?%?m4 z>)Kk%1Tr{;8Mh&{&4V}0fSs785H&)3&T38-b6FqlHkK05)J@I|bWG=J`XDGOO|yle zAyK0&D@>p^j?@+9ly;xOIzrp}2FabxQOCHd%>_R(D3&v-L|dCBvD_t;BZmuTH<3z8 z7Bc&3t(iG~n6W;8N5B6uH{^#xqwd^^U~@Qu9+z<5CFE*j56U35H9kXuS#O1rKbNTu zI0~MR%vG+&pE>7IL!(}bD>@>ZrWDtN2dEYLuRh9L&Q1azPLYqZ_}j$+)$<7DiFF-B zcgU?4@lLNzI7+r*e)9quL(cKuVkM1pCOv2vwOvB4v$t1o!(ynSce(iqbXoe+sd&s@ zdwmxrMoay{>}r~9f!Q5>&fAGk*T%u!TXb%Ecl|}#`sTRgjt%~yKZ8?t!4vfohn2$@ z`5l+@8X|lADj8l-(2Y$Q`#)XLnnT@#(!ZQ-mm~U$n0mB?dH&5@ZnZ~qT2+oSTDi;K zoRNIS0p^V4x(Rmr?_$dZsWu($DE0O!+o zFrf9#cadv4G2I<4!7P5}aa&R{WS9iW;R*$lr^}G+z;&tX3v4YphjHAU*8SyXA8wF$ z&5d9~)vLgby(zJVqb?8=@JAf`F>sy!YR+j#Mdvt5)yBdJ=43fbwSzpmW%ie2Rwp}Z zLtV%n^;>wwTkx_ZSwdAY$P)12Giv@q*1F^!*EXv&UI z*Wj4RV->b|=~G5dhlF+Ia`>5nIgIA4`yhbm){pkRsY+K@u%CcB-*Y2k=;AJgn8S}J$8|1DI?m1{q zA^YOUnFrQWEAM}(6Y?A-hI18*0UAXegW0ihj9D`aKi zH18tjxm{tRd6J?s0s@nhL+F)0^9AVYin6qu>SS+g=Y~{|qc&t;AX2*BMm6H-90Yx1 z8}YeSv)ywL!a41m;4^9p0{6Ns`xx$70l&fn?D&Ab3P$%j4(fxWbv3D=05&;~XVeE!H34 z*Mxu%5Q%y1k#l^#sR!iNUX}nh>#di0-(MZ2+9ei}LDY~gJPI}wrsk@B_h}CNJsv1P zJ2pN`J%D|~B%^SKGW&%t47z8BB4t8+^T|@Bv7z(Ptjoh|jTsXgX;v;FQjOU?KP*Y} z+UaN=gCtB^5!5+s#e>EP?nYKt6E^(QP+XHq-T!oXmjm*3Um`C;d326E;*^(HfExBT ze!bZThH0{t)_kvi0X+~nQk7R{2UDQl;lCC27saCjMleC=d@`5(s{A@{dNsjxMnLk*!c4Su!?@KZO3JT~M9e(0o5BwpCa02n=g-3Wt658X`m&Ez~NwECa z(*eF4=lhq&_{YB}LjW|pyjE=2MtuF?ax$FT{@-r*o6iDR)dLj(%vgsVckd$OSofW$4N8C(D{5I9_rEoUvNQD-7zaiP-3L!AADAt6%`P)}ooe8( zr70(bUwd{4rW@>gFxifRPJhHcO7;dR6-8Z%^)NTV!9QF5oFWALnzZxLIPowDRU2MX zF(pHc3^nt0IB^Y*{&F-1=7PY4wX#T0mEK7k5#nD;$ppJd<&s2RM_$LY)HeEo_+^g> z$%Jc+`$%m`7Y@2FUEw4yk~)C(2e!(1d&Ls27C-7oe0w7Rzh+vxjXp45>Mf^^%j}%LQE4Hu>3^K6#1{1CCZ0ARAlq~yJUVs5B zow@q@W0(XUXoIbgRgPT;@{#A(~1%%~JuAcbFxP)aNf3BUp#Z_4I6#L2cg?{U*dI%CL%Z>r$#n~YN zCiSVx~Pa0KI+* zgTE_)ssXsrx3?t;0_Nc-xU}woCE$z^v^-?CA?CagiP!yb&)cMx!2%D!Q7?D!xCAug zob1Va_#uru!EM2}(4_4edUHX34ZL`tA&;rfbk=*vT&@celPbc`o=_5A3y?Lvq*6cz z#ty{`yle@(7}1|T-V~+!{K%Q}BJDadk+n57sBGow8}K5@;IOkY&tY@*9}HG@V`7Aq z57BTKNI%Zi$op*We(kWmec}E}LK6xo$8dHSglHY_0#*-KgoXit?S-ZtFXPJtwylQI zl~@!@G16xqTvu%Zq_ARQGcH)1GPH;NZbqnGnDY=2huL*XKm1pVI)Ie}bZ3xkxXh~O z!+n)p+ZC;ySe5rY1 z5`!HFU`^qG7S=Y=6_~W0*R1lWL>X1CXqW*O5Cp9Pl~ zb_~wl4^TeAqE1%zFj%Llf(dd8`NkLq0v8e>RR!qO8exIWK?6L`S6c^uUp_rsD?MR= zVE}_Thh~9M80o%4yQ2+zygwY05<}$bRhFH-@(4-rlbx5l)T>==(w&WA9*C{(9KcM> z0CUtkmO;C%V8Ht2iYjh;Y?tS$2W^xB0?F&K`3otO#7JBw5tM2maUX2= zqxlGS<)b9mk`z&c>Ww308jp7ES&<_g!jH92xrxhQ&}%q^^)F*ua|ref8k2rFO58sR z)`z3N=I_oxFjWKN2Yhymq8^Z5rU_z{aMKtAl$j6!Knwdw&m7>MvJ>3-i|R_V6^lys zFn|2jHyI9ue%^y;3b9w(PIGGJOTB`g*RVz5fS<|6i#e;{`l6EgZbS+nB)%NX=PCK1 zy;HV!3XH2Gwnk%CS>(eNKImUjljcBJLt~L6wET7WRuc2YogRvH;U z@OgTd)Fvs_%P2252(H?b<)|MfGNlqg`5S&G!yt#!@MPa%gYC^V?p%PR*)~k3WXW(V z`-~dH6=5_ZH?#l@1CON<#HpD>7Y%9$Iso|%3TQHr*M&R_iz9BckwK)Fse_<^PJMlK zjz{E4ANIl|$=>~S2N>P%{nCS350kC$t_OvFatOSG%sKZ9Y_Ou-8Szm_OEhNYqB%g9 z37jG{&}x8W#qog}vfdakG1*IH#9csS{=qLm+KG(cU*q^_=s!|T4`QAcN!L+%WOtM&}zKVZdUdmZKva#c!(e#QmD?8Tb{@|ANI`9dkS8 zhAPVm`87`LTLNeAzjNm(FCDhY3YZDP0;RLfnUHJ9!Gh`L#T0cmz_1_P#*k6DWKvs) zpIwlpWLfd|f&7A>l0mc%^s+)3^zsmwQ8FfwQD?Z1!BBn$LcUCbC#)EoN73r_>%5zIOqJgUh1b)&_EP^?TkGN$3|_MB%_mNnn*P2)vv3 z;KmtA54ZfYKD$Wg9#UWOFH*>y5TzlTTKcaesPXs^z(R%^4IyFGo^khLff&~ToWA>V z*nr2zj1@Ll=)hEWwmBQEpAY}<8d82{rKBcV^WJ+P8J2W^v-+=W5*zR- zZ(O&vM6XBE+~1^1r^Nxd6$}{NO#t3>)%idkokx7xn(z>czv^aEZh_MrTQ*HcTwq9P zUiy0JTn2*WTOI8PZe1L3=HGytjcRKbcUcO8_&LU5oe4g{#r>GXueWDzK>T54HwaqV zR#8vHdBK1kTw_Q1Vt9x};q;&MssHp2pHPCz&|!<8hRmpbG(^Gm5DgQ2V^a|XCWI{z zFYV3tHAGp0eXz`7QRnOm7AP@nhA(ze$YkD;oMt(x*_1*duUa2@`5`VnEuL41H1<>~H>9NOxNjDZ zgMycC3?S|yAP#y*9;)m50Ro=yyl6y7>7IZxqXvk+pycGp6HYN*sJ>Zs_zF3jAaJ{7 zzkhz#{BrHc8Qx!0;hc>J{|n7_D(Jvuh{YfieGps!vwa^Pk}m8Cns`E7i_}9i-}vb- z=wnEvgc^rh)&)RGS1G=z?BZJ?qJV7^a;9f^mSDNI+yymufxFn5;J&;qdNSuAdRkq- z^?|6xBq39|1G?u1iihzvSh&rFG;d?#J48wD{qg_~y#Fvz!tw!Ug_&Jfrr%t8S$7#! z8~cl6jNuUS5MtBwwnyKHOaL^41GhAoP?Z@c>XfYHGrEft=rk}vUPjB4-z7TgYmn>u^=B(1oBY+_E~!3=lXwWDUeco7l3?G4;5}r zDNr{nO>hg(Jr9+f2B*SsXM2kf>OM@*)~nQllo!{tLo~tQ3Vs3gnoV3bf?kmUhyjt6 zE@TUhufV7PRssX$27kRP2cTkMR7}#cpr{RTb#LljKinyHa!-J{Gh75{eJ44;W7K{Z zB`^I~{4Y)YKNKp#{=Ew%{hx~B*iQxlP34xL_0aTwPw)W1AO~xvhB%yLSNUS;4(t#d z*s}=LG^KiQWqv@8&k9fNdb`*{GhM$5-IO4y4^})%!zp(e2JnSL(!Y_AtoZiHM-NY} z6~rWS)cM!<7tI3rBG+O-(i>oBSRkd+Ug2zq>1xl?_HDmoB;_p026mL zuKal|t;Jk_i87Fp&U6;{p`br{d!{;%^Ttex;m>cMu38gZBzCRB_xx4^8juh?(YfNh zfV+$>hpO$ix8gnBkjX62WyZLJPHl%CvGsR!WLSD` z@efS#KmU{L9O1P9U9FV1VH9<2q7gHvi~}^iL=_Xomj?P+%03wta4FEw&rUm`$L~Kf zlM1p!!14J~=9--#WF3Ek<4)b{pL*bNW7nUaoR~17a3Q2W31(@k4ALFgW$@MN&D|mU zLg=!zmmC{G||Eons z<%SHQ-OI_*1c0MD89EZ6%TnqMI}vI7oufDJ+1)03rlj5w6xI;u|GPjMD6oU>Cl3Bs z7#|`LYC@^m48U5C{RKLKM*)zf5#&_zJ+@r_hAatv2BAG%2zUl^;GjPIuOLg+H+$1| zv5}x#AA$+Eyz<-*MRc_YNZLGH;dcBVV*RiG{J#=r-WeVmtaTCI@!aDXcHjPHT-ud`)@!qNs#aROc~+b+*l&D4 z(cFx6*b*_0n-g}hT6fFAj63Dj8Yf{KDu=NfWAh%Xn1a=uVzaT}OpbQ$f?NyBa?dsqD9)d>Vzc`3f0avBbP{{PRtN(hJ zzpuaGjMP(CKLs4efQE*K2?#c=)RNI?3BbhZ!MgZENOS|7TwA_XFgjmh;Nx+C30?2p zz5Byxi1j9sk3c$;hl(K>#U4M((k|iV1{y%*rTf=XMSv}!%Y4;g0j7Cb)Akq%c!44n zY;gjqhL|TXtITTJcx!z*4;<+w5%P(^D8s>ev2 z+k72}EjE&l%$6nyvfPqT82{-PBz$Aqa9jw27yp`0D96{wtAhN+VPdN(fO z)~{mzD;cEL42CSz-eH10*GVhR?sGLj6!ID5+VS(l5#IGHcz}_2oW*zzJm}VahAu& zm3X-=jy2m%rKf#u2)j7?Co z!gVXoX~-8$EP0*imc9)&Gm&k$r%g(_5KD#w&TAmVIl_YnoQ9l@U(*V3Y|WCb6>poB zDieE{G2vkd#13@38m(Atfoa31zCu!e6f9z3oMRvRU_a?X%eMN!=5I#;&gI`9-4hND z;sKI1JB;`#pjo~k5JnBIiB&71|8nH*Vkdfiy#1Q%gMv6!1Mfj)i_zqPZ zcnYo|T(Byizds?iqG!nu)`>bJJ%IQ11>BPk#xXZA{z>`y`SmL&gM_|6x=QR(0e?SA zSt&4A2gN%G!Q84nx34@;W#PQXyDH$&N(8G{-ceVVv9z>&7lUOW zCLv^f%m@VLHgYJHe%?9-vaU67cKW-gW^&jN)Li4rSEC>SraFzlW4#CnV1ypFvoo13jqZ@L$Yk;akc6uZKkdW{+Zi#l6^CGe z@tFvzk$rPXToi}u`V*6n6213r!o42`ik~bQbkvUNwgD*MvO!&tVAqT+3kiyGa=H#Z ze}vdyKBKO?i`<6fe+@*;eaWlS1(>gSwG92KW%TKHxJlL^2lxQoyvY-2$b9zzYg&{y z&QW}n9yc?h67`Q|AS{gcd2$>Ff+zU?i)gM0uZrtF!=GM|;Y1Hk*bq|^ht4AqJO>|c z&`|}s*4*51@@FHuNy)IhLD4fN@<@=uTF2XAL?3+cV4a^=UpVVN9O**wG_?cKTgAhz zVD_h_CfsUxcK>bJf9@s0uC;+(yTN9K+Dn4+s`wH29jb}`&A5?j5-bDZoNR(pY6IbY z|NecQ*$~T|G3c^9urw2}*I-)0+{84G-h)J69*AU*16CK#1#e@{@(m(S-s3Ip9ee=i z&ouvx$R`rs!r%hPIHk7wzrPRRC$}U(&n##jW;*w8GWyr%9Dxrz^8o*f_{|$+kVriV z^ZU%iZyxjx6v}lAU8;XS0E9(bI|%EMo0;hc?W1K34Got-6Sx_?5({2mU)O-S^!77- z0?-Ti6ohi@wt+u7HX+wM3REHAL8oEyjEsy1<9pIXRZYg>hC!;;Hhf;;uCLuc-LfJ{b`o9=_3vi{r(Hj2?p(y-s5xek4JnppaH&L0PK1I zG^yc+L}OI^lvSPziM7$0AVPZ^AdJVW$w(SDgq{|((8yeNEPUeSCbLhRsGdJZW=kCe zncoV{EAg?XmGvC*m&c(^+-pA`^kcmCjWuqB)02t|bn;2WUaU|P0R|9&t&t`^%xhpb zUvWRM0xkPbf*AU}U~qX)H3NM3fa}M^3+KyD>))gaBC!7RuzIdVS5naK8`3v!-0(Ae zRxwHDd`_TQ? zl^)*4?YDf2oPwOBFnq*b`NU0v2{yk6v^N(Wm&p{wj?O^ ztm)SM3zvu;J5f#qV))vxKJ(lf!wB7QJ%0vq5t#p5SY6t0jwBrFq3v7JM2;469%Ntp zH584+qW#2BvAl*ikI)Hbydya)?{}c7B6W8y? z5c`HgbtG~-w+DjhkR3Nbkc?J{k;&dmrka}B&MdOri?rp zrVct4d&h^^?d*|oJ07j;_hNn-&hL$rlS~2Zj`0HhrF+O}5T)DT)h0mRj5qWv1|nf5 z;rn{i%04BGlbpqcE5uAvPdsvV?*gX5A{Ky_{xR4vn5?Gz1*~UXQ_!C`k)kK&fbM= z&qzLDU0rQ$x{c`qW!7!Y{kZ;^RdW>(5oV{tdCgK z@ivLLUlZAUx-dTZJhp36oTFHb-{d-23DpJC8d$aWC=ffTKU7ej@aP=XF zh1yPP9SwngGwzK)w&vG)1kdvfL1MyVl1or^xa418!Bi8Vs-X6+ zx^9tTCJ@cP0x(gmsrT{;JOuK2H7pT2$Jyc^KM+YH#8$jJgt!Wt#GimN2=!6>U}5Db z-jw+=4Sh0PO>h@cJ32#f>^)kq9w_Qff!@-Won{9Fv2(IDS$h|{zdruveIPa09XKrN z=69V+b&%-lr?f;w5&3oxSiX~#$3&+;0K-`6Fw3*@u)C1>h^~U;_W&^Y{29JX8d_GUuw0WSbN-gY7Szca#6MJF@+sKy+x3MR97qL;S9M_ltO&J!nD z%X*)a6VTV!C-^Fzm^$0z{rS+ry2JS1v2HT(cLc3Z(h%Fp3$MW6xsa7D#DO>!RDzc8 zB)WEy@Yn}aR%w>t@3@KfUx&ds|E{CX5bXOu{hfOaKiX=5Z2JJB-8MMu?=vzu040px6c~BI0&X8YD7B(7$L1)G$2N&8CqQrH^V?9!)&t085^SBas*pOd zM>fd5O(t>H)zu}~4EEZk$&QD0LFWvmR0rPaz*L?20Xt01+p9hgzP>(FGn3V%rQE#+ zf#yrdzl&_;Ha#~cx<)uDSir<*CxZ^x$rmo$%M&r$A5X%E`W8R&(t#_p)3k}bTQT_k z@`W^nQ*hi;eDF9C3IA||@T6=6oPh`v{A4(hHTUKsjoOC`lvp=$TWl6m{05ba;{-+G z!n2mw0(QMhZVjq2PYMk@y1u!+F*m=|pF4^0mRE)Ea*L)l|3gisMJ!ciML9z!sdOQXo_u06h+3yLAL8I12SGcxkN2-iTpD1SSYUfXeqJFjhE1%!G8Xx;Z`ncxm72=RsvLh-XxPn8 z3>?E5kL~P|p;vp4UO_xRN>2-HM+@vC__<2s8xR^xA1^Ffwd=PmNx_8KPUluE+|;qD zp_!p~8Gw(u3`EgVaIHtdB{yGPgiM0s-W5m538Dx8VBvK{ftMRp`!*}t>;UJ||C8Nu zsZ|IX>RVlxCa9Ffm+vsMHHT1-fx=a6&@MkM?qUyQO?HZU=<40AwQTeQycE4@0PrTs z=YVb^L|9>*z5Vee4=9DUfr>N1%rYo!{JM7J?3E9iYo#`$BLJbXPxgd#3Nvwb8j7x{wo}Aue$@!u?^_4|NFJornQHILF>k52(f!0 z)ch6R$yOa&JZd|KmSfBq!)Z7!~ZQG`BVRy;F|PI1CRe6_@CSjC8?Zi_n!PO>7;pG literal 0 HcmV?d00001 diff --git a/docs/mdbook/src/index.md b/docs/mdbook/src/index.md new file mode 100644 index 000000000..3cd0fec1d --- /dev/null +++ b/docs/mdbook/src/index.md @@ -0,0 +1,43 @@ + +# DataFusion Book + +DataFusion is a blazing fast query engine that lets you run data analyses quickly and reliably. + +DataFusion is written in Rust, but also exposes Python and SQL bindings, so you can easily query data in your langauge of choice. You don't need to know any Rust to be a happy and productive user of DataFusion. + +DataFusion lets you run queries faster than pandas. Let's compare query runtimes for a 5GB CSV file with 100 million rows of data. + +Take a look at a few rows of the data: + +``` ++-------+-------+--------------+-----+-----+-------+----+----+-----------+ +| id1 | id2 | id3 | id4 | id5 | id6 | v1 | v2 | v3 | ++-------+-------+--------------+-----+-----+-------+----+----+-----------+ +| id016 | id016 | id0000042202 | 15 | 24 | 5971 | 5 | 11 | 37.211254 | +| id039 | id045 | id0000029558 | 40 | 49 | 39457 | 5 | 4 | 48.951141 | +| id047 | id023 | id0000071286 | 68 | 20 | 74463 | 2 | 14 | 60.469241 | ++-------+-------+--------------+-----+-----+-------+----+----+-----------+ +``` + +Suppose you'd like to run the following query: `SELECT id1, sum(v1) AS v1 from the_table GROUP BY id1`. + +If you use pandas, then this query will take 43.6 seconds to execute. + +It only takes DataFusion 9.8 seconds to execute the same query. + +DataFusion is easy to use, powerful, and fast. Let's learn more! diff --git a/docs/mdbook/src/installation.md b/docs/mdbook/src/installation.md new file mode 100644 index 000000000..ba00c8b80 --- /dev/null +++ b/docs/mdbook/src/installation.md @@ -0,0 +1,62 @@ + +# Installation + +DataFusion is easy to install, just like any other Python library. + +## Using pip + +``` bash +pip install datafusion +``` + +## Conda & JupyterLab setup + +This section explains how to install DataFusion in a conda environment with other libraries that allow for a nice Jupyter workflow. This setup is completely optional. These steps are only needed if you'd like to run DataFusion in a Jupyter notebook and have an interface like this: + +![DataFusion in Jupyter](https://github.com/MrPowers/datafusion-book/raw/main/src/images/datafusion-jupyterlab.png) + +Create a conda environment with DataFusion, Jupyter, and other useful dependencies in the `datafusion-env.yml` file: + +``` +name: datafusion-env +channels: + - conda-forge + - defaults +dependencies: + - python=3.9 + - ipykernel + - nb_conda + - jupyterlab + - jupyterlab_code_formatter + - isort + - black + - pip + - pip: + - datafusion + +``` + +Create the environment with `conda env create -f datafusion-env.yml`. + +Activate the environment with `conda activate datafusion-env`. + +Run `jupyter lab` or open the [JupyterLab Desktop application](https://github.com/jupyterlab/jupyterlab-desktop) to start running DataFusion in a Jupyter notebook. + +## Examples + +See the [DataFusion Python Examples](https://github.com/apache/arrow-datafusion-python/tree/main/examples) for a variety of Python scripts that show DataFusion in action! diff --git a/docs/mdbook/src/quickstart.md b/docs/mdbook/src/quickstart.md new file mode 100644 index 000000000..bba0b36ae --- /dev/null +++ b/docs/mdbook/src/quickstart.md @@ -0,0 +1,77 @@ + +# DataFusion Quickstart + +You can easily query a DataFusion table with the Python API or with pure SQL. + +Let's create a small DataFrame and then run some queries with both APIs. + +Start by creating a DataFrame with four rows of data and two columns: `a` and `b`. + +```python +from datafusion import SessionContext + +ctx = SessionContext() + +df = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [4, 5, 6, 7]}, name="my_table") +``` + +Let's append a column to this DataFrame that adds columns `a` and `b` with the SQL API. + +``` +ctx.sql("select a, b, a + b as sum_a_b from my_table") + ++---+---+---------+ +| a | b | sum_a_b | ++---+---+---------+ +| 1 | 4 | 5 | +| 2 | 5 | 7 | +| 3 | 6 | 9 | +| 1 | 7 | 8 | ++---+---+---------+ +``` + +DataFusion makes it easy to run SQL queries on DataFrames. + +Now let's run the same query with the DataFusion Python API: + +```python +from datafusion import col + +df.select( + col("a"), + col("b"), + col("a") + col("b"), +) +``` + +We get the same result as before: + +``` ++---+---+-------------------------+ +| a | b | my_table.a + my_table.b | ++---+---+-------------------------+ +| 1 | 4 | 5 | +| 2 | 5 | 7 | +| 3 | 6 | 9 | +| 1 | 7 | 8 | ++---+---+-------------------------+ +``` + +DataFusion also allows you to query data with a well-designed Python interface. + +Python users have two great ways to query DataFusion tables. diff --git a/docs/mdbook/src/usage/create-table.md b/docs/mdbook/src/usage/create-table.md new file mode 100644 index 000000000..332863a16 --- /dev/null +++ b/docs/mdbook/src/usage/create-table.md @@ -0,0 +1,59 @@ + +# DataFusion Create Table + +It's easy to create DataFusion tables from a variety of data sources. + +## Create Table from Python Dictionary + +Here's how to create a DataFusion table from a Python dictionary: + +```python +from datafusion import SessionContext + +ctx = SessionContext() + +df = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [4, 5, 6, 7]}, name="my_table") +``` + +Supplying the `name` parameter is optional. You only need to name the table if you'd like to query it with the SQL API. + +You can also create a DataFrame without a name that can be queried with the Python API: + +```python +from datafusion import SessionContext + +ctx = SessionContext() + +df = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [4, 5, 6, 7]}) +``` + +## Create Table from CSV + +You can read a CSV into a DataFusion DataFrame. Here's how to read the `G1_1e8_1e2_0_0.csv` file into a table named `csv_1e8`: + +```python +ctx.register_csv("csv_1e8", "G1_1e8_1e2_0_0.csv") +``` + +## Create Table from Parquet + +You can read a Parquet file into a DataFusion DataFrame. Here's how to read the `yellow_tripdata_2021-01.parquet` file into a table named `taxi`. + +```python +ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet") +``` diff --git a/docs/mdbook/src/usage/index.md b/docs/mdbook/src/usage/index.md new file mode 100644 index 000000000..1ef4406f7 --- /dev/null +++ b/docs/mdbook/src/usage/index.md @@ -0,0 +1,25 @@ + +# Usage + +This section shows how to create DataFusion DataFrames from a variety of data sources like CSV files and Parquet files. + +You'll learn more about the SQL statements that are supported by DataFusion. + +You'll also learn about the DataFusion's Python API for querying data. + +The documentation will wrap up with a variety of real-world data processing tasks that are well suited for DataFusion. The lightning-fast speed and reliable execution makes DataFusion the best technology for a variety of data processing tasks. diff --git a/docs/mdbook/src/usage/query-table.md b/docs/mdbook/src/usage/query-table.md new file mode 100644 index 000000000..5e4e38001 --- /dev/null +++ b/docs/mdbook/src/usage/query-table.md @@ -0,0 +1,125 @@ + +# DataFusion Query Table + +DataFusion tables can be queried with SQL or with the Python API. + +Let's create a small table and show the different types of queries that can be run. + +```python +df = ctx.from_pydict( + { + "first_name": ["li", "wang", "ron", "amanda"], + "age": [25, 75, 68, 18], + "country": ["china", "china", "us", "us"], + }, + name="some_people", +) +``` + +Here's the data in the table: + +``` ++------------+-----+---------+ +| first_name | age | country | ++------------+-----+---------+ +| li | 25 | china | +| wang | 75 | china | +| ron | 68 | us | +| amanda | 18 | us | ++------------+-----+---------+ +``` + +## DataFusion Filter DataFrame + +Here's how to find all individuals that are older than 65 years old in the data with SQL: + +``` +ctx.sql("select * from some_people where age > 65") + ++------------+-----+---------+ +| first_name | age | country | ++------------+-----+---------+ +| wang | 75 | china | +| ron | 68 | us | ++------------+-----+---------+ +``` + +Here's how to run the same query with Python: + +```python +df.filter(col("age") > lit(65)) +``` + +``` ++------------+-----+---------+ +| first_name | age | country | ++------------+-----+---------+ +| wang | 75 | china | +| ron | 68 | us | ++------------+-----+---------+ +``` + +## DataFusion Select Columns from DataFrame + +Here's how to select the `first_name` and `country` columns from the DataFrame with SQL: + +``` +ctx.sql("select first_name, country from some_people") + + ++------------+---------+ +| first_name | country | ++------------+---------+ +| li | china | +| wang | china | +| ron | us | +| amanda | us | ++------------+---------+ +``` + +Here's how to run the same query with Python: + +```python +df.select(col("first_name"), col("country")) +``` + +``` ++------------+---------+ +| first_name | country | ++------------+---------+ +| li | china | +| wang | china | +| ron | us | +| amanda | us | ++------------+---------+ +``` + +## DataFusion Aggregation Query + +Here's how to run a group by aggregation query: + +``` +ctx.sql("select country, count(*) as num_people from some_people group by country") + ++---------+------------+ +| country | num_people | ++---------+------------+ +| china | 2 | +| us | 2 | ++---------+------------+ +``` From 0f2cd2e1be1406f0a462f2b106c41a111f90f37c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 4 May 2023 20:54:10 -0400 Subject: [PATCH 035/413] Add 'pub' and '#[pyo3(get, set)]' to DataTypeMap (#371) --- src/common/data_type.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index a7b79f49f..a9b0d17d5 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -33,12 +33,12 @@ use crate::errors::py_datafusion_err; #[derive(Debug, Clone)] #[pyclass(name = "DataTypeMap", module = "datafusion.common", subclass)] pub struct DataTypeMap { - #[allow(dead_code)] - arrow_type: PyDataType, - #[allow(dead_code)] - python_type: PythonType, - #[allow(dead_code)] - sql_type: SqlType, + #[pyo3(get, set)] + pub arrow_type: PyDataType, + #[pyo3(get, set)] + pub python_type: PythonType, + #[pyo3(get, set)] + pub sql_type: SqlType, } impl DataTypeMap { @@ -421,7 +421,7 @@ impl DataTypeMap { #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[pyclass(name = "DataType", module = "datafusion.common")] pub struct PyDataType { - data_type: DataType, + pub data_type: DataType, } impl From for DataType { From e16af72a3eff4a13f3419e09faf49847debd7ddd Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 6 May 2023 10:42:02 -0600 Subject: [PATCH 036/413] Fix db-benchmark (#369) --- .dockerignore | 12 ++ benchmarks/db-benchmark/README.md | 11 +- .../db-benchmark/db-benchmark.dockerfile | 177 ++++++++++-------- benchmarks/db-benchmark/groupby-datafusion.py | 2 +- benchmarks/db-benchmark/join-datafusion.py | 2 +- benchmarks/db-benchmark/run-bench.sh | 4 +- 6 files changed, 117 insertions(+), 91 deletions(-) create mode 100644 .dockerignore mode change 100644 => 100755 benchmarks/db-benchmark/run-bench.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..411e60291 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,12 @@ +.cargo +.github +.pytest_cache +ci +conda +dev +docs +examples +parquet +target +testing +venv \ No newline at end of file diff --git a/benchmarks/db-benchmark/README.md b/benchmarks/db-benchmark/README.md index fe268199f..93293b0dc 100644 --- a/benchmarks/db-benchmark/README.md +++ b/benchmarks/db-benchmark/README.md @@ -17,15 +17,16 @@ under the License. --> -# Run db-benchmark +# DataFusion Implementation of db-benchmark -This directory contains scripts for running DataFusion with the https://github.com/h2oai/db-benchmark +This directory contains scripts for running [db-benchmark](https://github.com/duckdblabs/db-benchmark) with +DataFusion's Python bindings. ## Directions -Run the following from root `arrow-datafusion` directory +Run the following from root of this project. ```bash -$ docker buildx build -t db-benchmark -f benchmarks/db-benchmark/db-benchmark.dockerfile . -$ docker run --privileged db-benchmark +$ docker build -t db-benchmark -f benchmarks/db-benchmark/db-benchmark.dockerfile . +$ docker run --privileged -it db-benchmark ``` diff --git a/benchmarks/db-benchmark/db-benchmark.dockerfile b/benchmarks/db-benchmark/db-benchmark.dockerfile index b21d3a0d1..2876b5b63 100644 --- a/benchmarks/db-benchmark/db-benchmark.dockerfile +++ b/benchmarks/db-benchmark/db-benchmark.dockerfile @@ -15,92 +15,105 @@ # specific language governing permissions and limitations # under the License. -FROM ubuntu +FROM ubuntu:22.04 ARG DEBIAN_FRONTEND=noninteractive ARG TARGETPLATFORM -RUN apt-get update && \ - apt-get install -y git build-essential - -# Install R, curl, and python deps -RUN apt-get -y install --no-install-recommends --no-install-suggests \ - ca-certificates software-properties-common gnupg2 gnupg1 \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \ - && add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' \ - && apt-get -y install r-base \ - && apt-get -y install curl \ - && apt-get -y install python3.8 \ - && apt-get -y install python3-pip - -# Install R libraries -RUN R -e "install.packages('data.table',dependencies=TRUE, repos='http://cran.rstudio.com/')" \ - && R -e "install.packages('dplyr',dependencies=TRUE, repos='http://cran.rstudio.com/')" - -# Install Rust -RUN curl https://sh.rustup.rs -sSf | bash -s -- -y -ENV PATH="/root/.cargo/bin:${PATH}" - -# Clone db-benchmark and download data -RUN git clone https://github.com/h2oai/db-benchmark \ - && cd db-benchmark \ - && Rscript _data/groupby-datagen.R 1e7 1e2 0 0 \ - && Rscript _data/join-datagen.R 1e7 0 0 0 \ - && mkdir data \ - && mv G1_1e7_1e2_0_0.csv data \ - && mv J1_1e7_1e1_0_0.csv data \ - && mv J1_1e7_1e4_0_0.csv data \ - && mv J1_1e7_1e7_0_0.csv data \ - && mv J1_1e7_NA_0_0.csv data \ - && cd .. - -# Clone datafusion-python and build python library -# Not sure if the wheel will be the same on all computers -RUN git clone https://github.com/datafusion-contrib/datafusion-python \ - && cd datafusion-python && git reset --hard 368b50ed9662d5e93c70b539f94cceace685265e \ - && python3 -m pip install pip \ - && python3 -m pip install pandas \ - && python3 -m pip install -r requirements.txt \ - && cd .. - -# Copy local arrow-datafusion -COPY . arrow-datafusion - -# 1. datafusion-python that builds from datafusion version referenced datafusion-python -RUN cd datafusion-python \ - && maturin build --release \ - && case "${TARGETPLATFORM}" in \ - */amd64) CPUARCH=x86_64 ;; \ - */arm64) CPUARCH=aarch64 ;; \ - *) exit 1 ;; \ - esac \ - # Version will need to be updated in conjunction with datafusion-python version - && python3 -m pip install target/wheels/datafusion-0.4.0-cp36-abi3-linux_${CPUARCH}.whl \ - && cd .. - -# 2. datafusion-python that builds from local datafusion. use this when making local changes to datafusion. -# Currently, as of March 5th 2022, this done not build (i think) because datafusion is being split into multiple crates -# and datafusion-python has not yet been updated to reflect this. -# RUN cd datafusion-python \ -# && sed -i '/datafusion =/c\datafusion = { path = "../arrow-datafusion/datafusion", features = ["pyarrow"] }' Cargo.toml \ -# && sed -i '/fuzz-utils/d' ../arrow-datafusion/datafusion/Cargo.toml \ -# && maturin build --release \ -# && case "${TARGETPLATFORM}" in \ -# */amd64) CPUARCH=x86_64 ;; \ -# */amd64) CPUARCH=aarch64 ;; \ -# *) exit 1 ;; \ -# esac \ -# && python3 -m pip install target/wheels/datafusion-0.4.0-cp36-abi3-linux_${CPUARCH}.whl \ -# && cd .. - -# Make datafusion directory in db-benchmark -RUN mkdir db-benchmark/datafusion \ - && cp ../arrow-datafusion/benchmarks/db-benchmark/groupby-datafusion.py db-benchmark/datafusion \ - && cp ../arrow-datafusion/benchmarks/db-benchmark/join-datafusion.py db-benchmark/datafusion \ - && cp ../arrow-datafusion/benchmarks/db-benchmark/run-bench.sh db-benchmark/ \ - && chmod +x db-benchmark/run-bench.sh +# This section is based on https://github.com/duckdblabs/db-benchmark/blob/master/_utils/repro.sh + +RUN apt-get -qq update +RUN apt-get -qq -y upgrade +RUN apt-get -qq install -y apt-utils + +RUN apt-get -qq install -y lsb-release software-properties-common wget curl vim htop git byobu libcurl4-openssl-dev libssl-dev +RUN apt-get -qq install -y libfreetype6-dev +RUN apt-get -qq install -y libfribidi-dev +RUN apt-get -qq install -y libharfbuzz-dev +RUN apt-get -qq install -y git +RUN apt-get -qq install -y libxml2-dev +RUN apt-get -qq install -y make +RUN apt-get -qq install -y libfontconfig1-dev +RUN apt-get -qq install -y libicu-dev pandoc zlib1g-dev libgit2-dev libcurl4-openssl-dev libssl-dev libjpeg-dev libpng-dev libtiff-dev +# apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 +RUN add-apt-repository "deb [arch=amd64,i386] https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/" + +RUN apt-get -qq install -y r-base-dev virtualenv + +RUN cd /usr/local/lib/R && \ + chmod o+w site-library + +RUN cd / && \ + git clone https://github.com/duckdblabs/db-benchmark.git WORKDIR /db-benchmark -RUN ls && ls -al data/ +RUN mkdir -p .R && \ + echo 'CFLAGS=-O3 -mtune=native' >> .R/Makevars && \ + echo 'CXXFLAGS=-O3 -mtune=native' >> .R/Makevars + +RUN cd pydatatable && \ + virtualenv py-pydatatable --python=/usr/bin/python3.10 +RUN cd pandas && \ + virtualenv py-pandas --python=/usr/bin/python3.10 +RUN cd modin && \ + virtualenv py-modin --python=/usr/bin/python3.10 + +RUN Rscript -e 'install.packages(c("jsonlite","bit64","devtools","rmarkdown"), dependecies=TRUE, repos="https://cloud.r-project.org")' + +SHELL ["/bin/bash", "-c"] + +RUN source ./pandas/py-pandas/bin/activate && \ + python3 -m pip install --upgrade psutil && \ + python3 -m pip install --upgrade pandas && \ + deactivate + +RUN source ./modin/py-modin/bin/activate && \ + python3 -m pip install --upgrade modin && \ + deactivate + +RUN source ./pydatatable/py-pydatatable/bin/activate && \ + python3 -m pip install --upgrade git+https://github.com/h2oai/datatable && \ + deactivate + +## install dplyr +#RUN Rscript -e 'devtools::install_github(c("tidyverse/readr","tidyverse/dplyr"))' + +# install data.table +RUN Rscript -e 'install.packages("data.table", repos="https://rdatatable.gitlab.io/data.table/")' + +## generate data for groupby 0.5GB +RUN Rscript _data/groupby-datagen.R 1e7 1e2 0 0 +RUN #Rscript _data/groupby-datagen.R 1e8 1e2 0 0 +RUN #Rscript _data/groupby-datagen.R 1e9 1e2 0 0 + +RUN mkdir data && \ + mv G1_1e7_1e2_0_0.csv data/ + +# set only groupby task +RUN echo "Changing run.conf and _control/data.csv to run only groupby at 0.5GB" && \ + cp run.conf run.conf.original && \ + sed -i 's/groupby join groupby2014/groupby/g' run.conf && \ + sed -i 's/data.table dplyr pandas pydatatable spark dask clickhouse polars arrow duckdb/data.table dplyr duckdb/g' run.conf && \ + sed -i 's/DO_PUBLISH=true/DO_PUBLISH=false/g' run.conf + +## set sizes +RUN mv _control/data.csv _control/data.csv.original && \ + echo "task,data,nrow,k,na,sort,active" > _control/data.csv && \ + echo "groupby,G1_1e7_1e2_0_0,1e7,1e2,0,0,1" >> _control/data.csv + +RUN #./dplyr/setup-dplyr.sh +RUN #./datatable/setup-datatable.sh +RUN #./duckdb/setup-duckdb.sh + +# END OF SETUP + +RUN python3 -m pip install --upgrade pandas +RUN python3 -m pip install --upgrade datafusion + +# Now add our solution +RUN rm -rf datafusion-python 2>/dev/null && \ + mkdir datafusion-python +ADD benchmarks/db-benchmark/*.py datafusion-python/ +ADD benchmarks/db-benchmark/run-bench.sh . -ENTRYPOINT ./run-bench.sh \ No newline at end of file +ENTRYPOINT [ "/db-benchmark/run-bench.sh" ] \ No newline at end of file diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py index 7268cc872..76dd38fee 100644 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -58,7 +58,7 @@ def ans_shape(batches): ) print("dataset loaded") -ctx = df.ExecutionContext() +ctx = df.SessionContext() ctx.register_record_batches("x", [data.to_batches()]) print("registered record batches") # cols = ctx.sql("SHOW columns from x") diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py index 1993a5c83..8843b55c0 100755 --- a/benchmarks/db-benchmark/join-datafusion.py +++ b/benchmarks/db-benchmark/join-datafusion.py @@ -90,7 +90,7 @@ def ans_shape(batches): flush=True, ) -ctx = df.ExecutionContext() +ctx = df.SessionContext() x_data = pacsv.read_csv( src_jn_x, convert_options=pacsv.ConvertOptions(auto_dict_encode=True) diff --git a/benchmarks/db-benchmark/run-bench.sh b/benchmarks/db-benchmark/run-bench.sh old mode 100644 new mode 100755 index 9ccc26804..2c3080929 --- a/benchmarks/db-benchmark/run-bench.sh +++ b/benchmarks/db-benchmark/run-bench.sh @@ -17,5 +17,5 @@ # under the License. set -e -SRC_DATANAME=G1_1e7_1e2_0_0 python3 datafusion/groupby-datafusion.py -SRC_DATANAME=J1_1e7_NA_0_0 python3 datafusion/join-datafusion.py +SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/datafusion-python/groupby-datafusion.py +#SRC_DATANAME=J1_1e7_NA_0_0 python3 /db-benchmark/datafusion-python/join-datafusion.py From b8c74a7f50cdc74b163bf4ba071662d93cabfbfd Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 8 May 2023 07:49:54 -0600 Subject: [PATCH 037/413] Docs explaining how to view query plans (#373) * Docs explaining how to view query plans * diagram --- docs/mdbook/README.md | 2 +- docs/mdbook/src/SUMMARY.md | 3 +- docs/mdbook/src/images/plan.svg | 111 +++++++++++++++++ docs/mdbook/src/usage/query-plans.md | 170 +++++++++++++++++++++++++++ src/physical_plan.rs | 4 + src/sql/logical.rs | 2 +- 6 files changed, 289 insertions(+), 3 deletions(-) create mode 100644 docs/mdbook/src/images/plan.svg create mode 100644 docs/mdbook/src/usage/query-plans.md diff --git a/docs/mdbook/README.md b/docs/mdbook/README.md index 664b4b4ce..6dae6bc62 100644 --- a/docs/mdbook/README.md +++ b/docs/mdbook/README.md @@ -26,7 +26,7 @@ Open the book locally by running `open book/index.html`. ## Install mdBook -Download the `mdbook` binary. +Download the `mdbook` binary or run `cargo install mdbook`. Then manually open it, so you have permissions to run it on your Mac. diff --git a/docs/mdbook/src/SUMMARY.md b/docs/mdbook/src/SUMMARY.md index b4908994d..23467ed4c 100644 --- a/docs/mdbook/src/SUMMARY.md +++ b/docs/mdbook/src/SUMMARY.md @@ -21,4 +21,5 @@ - [Quickstart](./quickstart.md) - [Usage](./usage/index.md) - [Create a table](./usage/create-table.md) - - [Query a table](./usage/query-table.md) \ No newline at end of file + - [Query a table](./usage/query-table.md) + - [Viewing Query Plans](./usage/query-plans.md) \ No newline at end of file diff --git a/docs/mdbook/src/images/plan.svg b/docs/mdbook/src/images/plan.svg new file mode 100644 index 000000000..927147985 --- /dev/null +++ b/docs/mdbook/src/images/plan.svg @@ -0,0 +1,111 @@ + + + + + + +%3 + + +cluster_1 + +LogicalPlan + + +cluster_6 + +Detailed LogicalPlan + + + +2 + +Projection: my_table.a, SUM(my_table.b) + + + +3 + +Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]] + + + +2->3 + + + + + +4 + +Filter: my_table.a < Int64(3) + + + +3->4 + + + + + +5 + +TableScan: my_table + + + +4->5 + + + + + +7 + +Projection: my_table.a, SUM(my_table.b) +Schema: [a:Int64;N, SUM(my_table.b):Int64;N] + + + +8 + +Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]] +Schema: [a:Int64;N, SUM(my_table.b):Int64;N] + + + +7->8 + + + + + +9 + +Filter: my_table.a < Int64(3) +Schema: [a:Int64;N, b:Int64;N] + + + +8->9 + + + + + +10 + +TableScan: my_table +Schema: [a:Int64;N, b:Int64;N] + + + +9->10 + + + + + diff --git a/docs/mdbook/src/usage/query-plans.md b/docs/mdbook/src/usage/query-plans.md new file mode 100644 index 000000000..a39aa9e42 --- /dev/null +++ b/docs/mdbook/src/usage/query-plans.md @@ -0,0 +1,170 @@ + + +# DataFusion Query Plans + +DataFusion's `DataFrame` is a wrapper around a query plan. In this chapter we will learn how to view +logical and physical query plans for DataFrames. + +## Sample Data + +Let's go ahead and create a simple DataFrame. You can do this in the Python shell or in a notebook. + +```python +from datafusion import SessionContext + +ctx = SessionContext() + +df = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [4, 5, 6, 7]}, name="my_table") +``` + +## Logical Plan + +Next, let's look at the logical plan for this dataframe. + +```python +>>> df.logical_plan() +TableScan: my_table +``` + +The logical plan here consists of a single `TableScan` operator. Let's make a more interesting plan by creating a new +`DataFrame` representing an aggregate query with a filter. + +```python +>>> df = ctx.sql("SELECT a, sum(b) FROM my_table WHERE a < 3 GROUP BY a") +``` + +When we view the plan for this `DataFrame` we can see that there are now four operators in the plan, each +representing a logical transformation of the data. We start with a `TableScan` to read the data, followed by +a `Filter` to filter out rows that do not match the filter expression, then an `Aggregate` is performed. Finally, +a `Projection` is applied to ensure that the order of the final columns matches the `SELECT` part of the SQL query. + +```python +>>> df.logical_plan() +Projection: my_table.a, SUM(my_table.b) + Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]] + Filter: my_table.a < Int64(3) + TableScan: my_table +``` + +## Optimized Logical Plan + +DataFusion has a powerful query optimizer which will rewrite query plans to make them more efficient before they are +executed. We can view the output of the optimized by viewint the optimized logical plan. + +```python +>>> df.optimized_logical_plan() +Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]] + Filter: my_table.a < Int64(3) + TableScan: my_table projection=[a, b] +``` + +We can see that there are two key differences compared to the unoptimized logical plan: + +- The `Projection` has been removed because it was redundant in this case (the output of the `Aggregatge` plan + already had the columns in the correct order). +- The `TableScan` now has a projection pushed down so that it only reads the columns required to be able to execute + the query. In this case the table only has two columns and we referenced them both in the query, but this optimization + can be very effective in real-world queries against large tables. + +## Physical Plan + +Logical plans provide a representation of "what" the query should do it. Physical plans explain "how" the query +should be executed. + +We can view the physical plan (also known as an execution plan) using the `execution_plan` method. + +```python +>>> df.execution_plan() +AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[SUM(my_table.b)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([Column { name: "a", index: 0 }], 48), input_partitions=48 + AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[SUM(my_table.b)] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: a@0 < 3 + RepartitionExec: partitioning=RoundRobinBatch(48), input_partitions=1 + MemoryExec: partitions=1, partition_sizes=[1] +``` + +The `TableScan` has now been replaced by a more specific `MemoryExec` for scanning the in-memory data. If we were +querying a CSV file on disk then we would expect to see a `CsvExec` instead. + +This plan has additional operators that were not in the logical plan: + +- `RepartionExec` has been added so that the data can be split into partitions and processed in parallel using + multiple cores. +- `CoalesceBatchesExec` will combine small batches into larger batches to ensure that processing remains efficient. + +The `Aggregate` operator now appears twice. This is because aggregates are performed in a two step process. Data is +aggregated within each partition in parallel and then those results (which could contain duplicate grouping keys) are +combined and the aggregate operations is applied again. + +## Creating Query Plan Diagrams + +DataFusion supports generating query plan diagrams in [DOT format](). + +DOT is a language for describing graphs and there are open source tools such as GraphViz that can render diagrams +from DOT files. + +We can use the following code to generate a DOT file for a logical query plan. + +```python +>>> diagram = df.logical_plan().display_graphviz() +>>> with open('plan.dot', 'w') as f: +>>> f.write(diagram) +``` + +If we view the view, we will see the following content. + +``` +// Begin DataFusion GraphViz Plan (see https://graphviz.org) +digraph { + subgraph cluster_1 + { + graph[label="LogicalPlan"] + 2[shape=box label="Projection: my_table.a, SUM(my_table.b)"] + 3[shape=box label="Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]]"] + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] + 4[shape=box label="Filter: my_table.a < Int64(3)"] + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] + 5[shape=box label="TableScan: my_table"] + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back] + } + subgraph cluster_6 + { + graph[label="Detailed LogicalPlan"] + 7[shape=box label="Projection: my_table.a, SUM(my_table.b)\nSchema: [a:Int64;N, SUM(my_table.b):Int64;N]"] + 8[shape=box label="Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]]\nSchema: [a:Int64;N, SUM(my_table.b):Int64;N]"] + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back] + 9[shape=box label="Filter: my_table.a < Int64(3)\nSchema: [a:Int64;N, b:Int64;N]"] + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back] + 10[shape=box label="TableScan: my_table\nSchema: [a:Int64;N, b:Int64;N]"] + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back] + } +} +// End DataFusion GraphViz Plan +``` + +We can use GraphViz from the command-line to convert this DOT file into an image. + +```bash +dot -Tsvg plan.dot > plan.svg +``` + +This generates the following diagram: + +![Query Plan Diagram](../images/plan.svg) diff --git a/src/physical_plan.rs b/src/physical_plan.rs index 4c35f3e60..6f02cefaa 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -54,6 +54,10 @@ impl PyExecutionPlan { format!("{}", d.indent()) } + fn __repr__(&self) -> String { + self.display_indent() + } + #[getter] pub fn partition_count(&self) -> usize { self.plan.output_partitioning().partition_count() diff --git a/src/sql/logical.rs b/src/sql/logical.rs index a22f269f1..a75315d36 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -100,7 +100,7 @@ impl PyLogicalPlan { } fn display_graphviz(&self) -> String { - format!("{}", self.plan.display_indent_schema()) + format!("{}", self.plan.display_graphviz()) } } From 6a2df777508ff335f5bad4a97e8af9da6014eaa4 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 8 May 2023 13:06:34 -0600 Subject: [PATCH 038/413] Improve db-benchmark (#372) --- .github/workflows/test.yaml | 2 +- benchmarks/db-benchmark/README.md | 4 +- .../db-benchmark/db-benchmark.dockerfile | 1 + benchmarks/db-benchmark/groupby-datafusion.py | 330 +++++++++++++++--- benchmarks/db-benchmark/join-datafusion.py | 138 ++++++-- benchmarks/db-benchmark/run-bench.sh | 8 +- dev/python_lint.sh | 2 +- 7 files changed, 404 insertions(+), 81 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c69205be3..f672c8129 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -101,7 +101,7 @@ jobs: if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} run: | source venv/bin/activate - flake8 --exclude venv --ignore=E501,W503 + flake8 --exclude venv,benchmarks/db-benchmark --ignore=E501,W503 black --line-length 79 --diff --check . - name: Run tests diff --git a/benchmarks/db-benchmark/README.md b/benchmarks/db-benchmark/README.md index 93293b0dc..8ce45344d 100644 --- a/benchmarks/db-benchmark/README.md +++ b/benchmarks/db-benchmark/README.md @@ -27,6 +27,6 @@ DataFusion's Python bindings. Run the following from root of this project. ```bash -$ docker build -t db-benchmark -f benchmarks/db-benchmark/db-benchmark.dockerfile . -$ docker run --privileged -it db-benchmark +docker build -t db-benchmark -f benchmarks/db-benchmark/db-benchmark.dockerfile . +docker run --privileged -it db-benchmark ``` diff --git a/benchmarks/db-benchmark/db-benchmark.dockerfile b/benchmarks/db-benchmark/db-benchmark.dockerfile index 2876b5b63..d8842b250 100644 --- a/benchmarks/db-benchmark/db-benchmark.dockerfile +++ b/benchmarks/db-benchmark/db-benchmark.dockerfile @@ -108,6 +108,7 @@ RUN #./duckdb/setup-duckdb.sh # END OF SETUP RUN python3 -m pip install --upgrade pandas +RUN python3 -m pip install --upgrade polars psutil RUN python3 -m pip install --upgrade datafusion # Now add our solution diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py index 76dd38fee..2c35259e8 100644 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -19,14 +19,20 @@ import gc import timeit import datafusion as df -from datafusion import functions as f -from datafusion import col +from datafusion import ( + col, + functions as f, + RuntimeConfig, + SessionConfig, + SessionContext, +) +import pyarrow from pyarrow import csv as pacsv print("# groupby-datafusion.py", flush=True) -# exec(open("./_helpers/helpers.py").read()) +exec(open("./_helpers/helpers.py").read()) def ans_shape(batches): @@ -40,8 +46,12 @@ def ans_shape(batches): return rows, cols -# ver = df.__version__ -ver = "7.0.0" +def execute(df): + print(df.execution_plan().display_indent()) + return df.collect() + + +ver = df.__version__ git = "" task = "groupby" solution = "datafusion" @@ -49,16 +59,47 @@ def ans_shape(batches): cache = "TRUE" on_disk = "FALSE" +# experimental - support running with both DataFrame and SQL APIs +sql = True + data_name = os.environ["SRC_DATANAME"] src_grp = os.path.join("data", data_name + ".csv") print("loading dataset %s" % src_grp, flush=True) +schema = pyarrow.schema( + [ + ("id4", pyarrow.int32()), + ("id5", pyarrow.int32()), + ("id6", pyarrow.int32()), + ("v1", pyarrow.int32()), + ("v2", pyarrow.int32()), + ("v3", pyarrow.float64()), + ] +) + data = pacsv.read_csv( - src_grp, convert_options=pacsv.ConvertOptions(auto_dict_encode=True) + src_grp, + convert_options=pacsv.ConvertOptions( + auto_dict_encode=True, column_types=schema + ), ) print("dataset loaded") -ctx = df.SessionContext() +# create a session context with explicit runtime and config settings +runtime = ( + RuntimeConfig() + .with_disk_manager_os() + .with_fair_spill_pool(64 * 1024 * 1024 * 1024) +) +config = ( + SessionConfig() + .with_repartition_joins(False) + .with_repartition_aggregations(False) + .set("datafusion.execution.coalesce_batches", "false") +) +ctx = SessionContext(config, runtime) +print(ctx) + ctx.register_record_batches("x", [data.to_batches()]) print("registered record batches") # cols = ctx.sql("SHOW columns from x") @@ -72,50 +113,107 @@ def ans_shape(batches): question = "sum v1 by id1" # q1 gc.collect() t_start = timeit.default_timer() -ans = ctx.sql("SELECT id1, SUM(v1) AS v1 FROM x GROUP BY id1").collect() +if sql: + df = ctx.sql("SELECT id1, SUM(v1) AS v1 FROM x GROUP BY id1") +else: + df = ctx.table("x").aggregate( + [f.col("id1")], [f.sum(f.col("v1")).alias("v1")] + ) +ans = execute(df) + shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q1: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() question = "sum v1 by id1:id2" # q2 gc.collect() t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT id1, id2, SUM(v1) AS v1 FROM x GROUP BY id1, id2" -).collect() +if sql: + df = ctx.sql("SELECT id1, id2, SUM(v1) AS v1 FROM x GROUP BY id1, id2") +else: + df = ctx.table("x").aggregate( + [f.col("id1"), f.col("id2")], [f.sum(f.col("v1")).alias("v1")] + ) +ans = execute(df) shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q2: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() question = "sum v1 mean v3 by id3" # q3 gc.collect() t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT id3, SUM(v1) AS v1, AVG(v3) AS v3 FROM x GROUP BY id3" -).collect() +if sql: + df = ctx.sql( + "SELECT id3, SUM(v1) AS v1, AVG(v3) AS v3 FROM x GROUP BY id3" + ) +else: + df = ctx.table("x").aggregate( + [f.col("id3")], + [ + f.sum(f.col("v1")).alias("v1"), + f.avg(f.col("v3")).alias("v3"), + ], + ) +ans = execute(df) shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q3: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = ( @@ -125,7 +223,25 @@ def ans_shape(batches): .to_numpy()[0] ) chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -136,10 +252,10 @@ def ans_shape(batches): "SELECT id4, AVG(v1) AS v1, AVG(v2) AS v2, AVG(v3) AS v3 FROM x GROUP BY id4" ).collect() shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q4: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = ( @@ -149,7 +265,25 @@ def ans_shape(batches): .to_numpy()[0] ) chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -160,10 +294,10 @@ def ans_shape(batches): "SELECT id6, SUM(v1) AS v1, SUM(v2) AS v2, SUM(v3) AS v3 FROM x GROUP BY id6" ).collect() shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q5: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = ( @@ -173,7 +307,25 @@ def ans_shape(batches): .to_numpy()[0] ) chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -184,10 +336,10 @@ def ans_shape(batches): "SELECT id4, id5, approx_percentile_cont(v3, .5) AS median_v3, stddev(v3) AS stddev_v3 FROM x GROUP BY id4, id5" ).collect() shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q6: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = ( @@ -197,7 +349,25 @@ def ans_shape(batches): .to_numpy()[0] ) chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -208,15 +378,33 @@ def ans_shape(batches): "SELECT id3, MAX(v1) - MIN(v2) AS range_v1_v2 FROM x GROUP BY id3" ).collect() shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q7: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = df.aggregate([], [f.sum(col("range_v1_v2"))]).collect()[0].column(0)[0] chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -227,15 +415,33 @@ def ans_shape(batches): "SELECT id6, v3 from (SELECT id6, v3, row_number() OVER (PARTITION BY id6 ORDER BY v3 DESC) AS row FROM x) t WHERE row <= 2" ).collect() shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q8: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = df.aggregate([], [f.sum(col("v3"))]).collect()[0].column(0)[0] chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -244,15 +450,33 @@ def ans_shape(batches): t_start = timeit.default_timer() ans = ctx.sql("SELECT corr(v1, v2) as corr FROM x GROUP BY id2, id4").collect() shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q9: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = df.aggregate([], [f.sum(col("corr"))]).collect()[0].column(0)[0] chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -263,10 +487,10 @@ def ans_shape(batches): "SELECT id1, id2, id3, id4, id5, id6, SUM(v3) as v3, COUNT(*) AS cnt FROM x GROUP BY id1, id2, id3, id4, id5, id6" ).collect() shape = ans_shape(ans) -# print(shape, flush=True) +print(shape, flush=True) t = timeit.default_timer() - t_start print(f"q10: {t}") -# m = memory_usage() +m = memory_usage() t_start = timeit.default_timer() df = ctx.create_dataframe([ans]) chk = ( @@ -276,7 +500,25 @@ def ans_shape(batches): .to_numpy()[0] ) chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +write_log( + task=task, + data=data_name, + in_rows=in_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py index 8843b55c0..602cee697 100755 --- a/benchmarks/db-benchmark/join-datafusion.py +++ b/benchmarks/db-benchmark/join-datafusion.py @@ -26,26 +26,7 @@ print("# join-datafusion.py", flush=True) -# exec(open("./_helpers/helpers.py").read()) - - -def join_to_tbls(data_name): - x_n = int(float(data_name.split("_")[1])) - y_n = [ - "{:.0e}".format(x_n / 1e6), - "{:.0e}".format(x_n / 1e3), - "{:.0e}".format(x_n), - ] - y_n = [ - y_n[0].replace("+0", ""), - y_n[1].replace("+0", ""), - y_n[2].replace("+0", ""), - ] - return [ - data_name.replace("NA", y_n[0]), - data_name.replace("NA", y_n[1]), - data_name.replace("NA", y_n[2]), - ] +exec(open("./_helpers/helpers.py").read()) def ans_shape(batches): @@ -59,7 +40,7 @@ def ans_shape(batches): return rows, cols -ver = "6.0.0" +ver = df.__version__ task = "join" git = "" solution = "datafusion" @@ -84,13 +65,16 @@ def ans_shape(batches): + ", " + y_data_name[0] + ", " - + y_data_name[2] + + y_data_name[1] + ", " + y_data_name[2], flush=True, ) ctx = df.SessionContext() +print(ctx) + +# TODO we should be applying projections to these table reads to crete relations of different sizes x_data = pacsv.read_csv( src_jn_x, convert_options=pacsv.ConvertOptions(auto_dict_encode=True) @@ -133,8 +117,26 @@ def ans_shape(batches): df = ctx.create_dataframe([ans]) chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] chkt = timeit.default_timer() - t_start -# m = memory_usage() -# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +m = memory_usage() +write_log( + task=task, + data=data_name, + in_rows=x_data.num_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -156,8 +158,26 @@ def ans_shape(batches): .column(0)[0] ) chkt = timeit.default_timer() - t_start -# m = memory_usage() -# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +m = memory_usage() +write_log( + task=task, + data=data_name, + in_rows=x_data.num_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -179,8 +199,26 @@ def ans_shape(batches): .column(0)[0] ) chkt = timeit.default_timer() - t_start -# m = memory_usage() -# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +m = memory_usage() +write_log( + task=task, + data=data_name, + in_rows=x_data.num_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -202,8 +240,26 @@ def ans_shape(batches): .column(0)[0] ) chkt = timeit.default_timer() - t_start -# m = memory_usage() -# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +m = memory_usage() +write_log( + task=task, + data=data_name, + in_rows=x_data.num_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() @@ -225,8 +281,26 @@ def ans_shape(batches): .column(0)[0] ) chkt = timeit.default_timer() - t_start -# m = memory_usage() -# write_log(task=task, data=data_name, in_rows=x_data.num_rows, question=question, out_rows=shape[0], out_cols=shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk([chk]), chk_time_sec=chkt, on_disk=on_disk) +m = memory_usage() +write_log( + task=task, + data=data_name, + in_rows=x_data.num_rows, + question=question, + out_rows=shape[0], + out_cols=shape[1], + solution=solution, + version=ver, + git=git, + fun=fun, + run=1, + time_sec=t, + mem_gb=m, + cache=cache, + chk=make_chk([chk]), + chk_time_sec=chkt, + on_disk=on_disk, +) del ans gc.collect() diff --git a/benchmarks/db-benchmark/run-bench.sh b/benchmarks/db-benchmark/run-bench.sh index 2c3080929..36a6087d9 100755 --- a/benchmarks/db-benchmark/run-bench.sh +++ b/benchmarks/db-benchmark/run-bench.sh @@ -17,5 +17,11 @@ # under the License. set -e +#SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/polars/groupby-polars.py SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/datafusion-python/groupby-datafusion.py -#SRC_DATANAME=J1_1e7_NA_0_0 python3 /db-benchmark/datafusion-python/join-datafusion.py + +# joins need more work still +#SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/datafusion-python/join-datafusion.py +#SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/polars/join-polars.py + +cat time.csv diff --git a/dev/python_lint.sh b/dev/python_lint.sh index 949346294..3bc67fb12 100755 --- a/dev/python_lint.sh +++ b/dev/python_lint.sh @@ -22,5 +22,5 @@ set -e source venv/bin/activate -flake8 --exclude venv --ignore=E501,W503 +flake8 --exclude venv,benchmarks/db-benchmark --ignore=E501,W503 black --line-length 79 . From bdcbb9f12e59ee863c418fdf21f64182aab66b11 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 9 May 2023 08:39:50 -0400 Subject: [PATCH 039/413] Make expr member of PyExpr public (#375) --- src/common/data_type.rs | 11 ++++++++++ src/expr.rs | 48 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index a9b0d17d5..d55a0e86d 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -21,6 +21,17 @@ use pyo3::prelude::*; use crate::errors::py_datafusion_err; +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(name = "RexType", module = "datafusion.common")] +pub enum RexType { + Alias, + Literal, + Call, + Reference, + ScalarSubquery, + Other, +} + /// These bindings are tying together several disparate systems. /// You have SQL types for the SQL strings and RDBMS systems itself. /// Rust types for the DataFusion code diff --git a/src/expr.rs b/src/expr.rs index 7c80d0d82..4ada4c16d 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -22,6 +22,7 @@ use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::PyArrowType; use datafusion_expr::{col, lit, Cast, Expr, GetIndexedField}; +use crate::common::data_type::RexType; use crate::errors::py_runtime_err; use crate::expr::aggregate_expr::PyAggregateFunction; use crate::expr::binary_expr::PyBinaryExpr; @@ -83,7 +84,7 @@ pub mod union; #[pyclass(name = "Expr", module = "datafusion.expr", subclass)] #[derive(Debug, Clone)] pub struct PyExpr { - pub(crate) expr: Expr, + pub expr: Expr, } impl From for Expr { @@ -228,6 +229,51 @@ impl PyExpr { let expr = Expr::Cast(Cast::new(Box::new(self.expr.clone()), to.0)); expr.into() } + + /// A Rex (Row Expression) specifies a single row of data. That specification + /// could include user defined functions or types. RexType identifies the row + /// as one of the possible valid `RexTypes`. + pub fn rex_type(&self) -> PyResult { + Ok(match self.expr { + Expr::Alias(..) => RexType::Alias, + Expr::Column(..) | Expr::QualifiedWildcard { .. } | Expr::GetIndexedField { .. } => { + RexType::Reference + } + Expr::ScalarVariable(..) | Expr::Literal(..) => RexType::Literal, + Expr::BinaryExpr { .. } + | Expr::Not(..) + | Expr::IsNotNull(..) + | Expr::Negative(..) + | Expr::IsNull(..) + | Expr::Like { .. } + | Expr::ILike { .. } + | Expr::SimilarTo { .. } + | Expr::Between { .. } + | Expr::Case { .. } + | Expr::Cast { .. } + | Expr::TryCast { .. } + | Expr::Sort { .. } + | Expr::ScalarFunction { .. } + | Expr::AggregateFunction { .. } + | Expr::WindowFunction { .. } + | Expr::AggregateUDF { .. } + | Expr::InList { .. } + | Expr::Wildcard + | Expr::ScalarUDF { .. } + | Expr::Exists { .. } + | Expr::InSubquery { .. } + | Expr::GroupingSet(..) + | Expr::IsTrue(..) + | Expr::IsFalse(..) + | Expr::IsUnknown(_) + | Expr::IsNotTrue(..) + | Expr::IsNotFalse(..) + | Expr::Placeholder { .. } + | Expr::OuterReferenceColumn(_, _) + | Expr::IsNotUnknown(_) => RexType::Call, + Expr::ScalarSubquery(..) => RexType::ScalarSubquery, + }) + } } /// Initializes the `expr` module to match the pattern of `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/ From 9c75d03d7fa315251e0df2fbc97f081a8257c9bc Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 9 May 2023 08:26:46 -0600 Subject: [PATCH 040/413] Prepare 24.0.0 Release (#376) * bump version * update changelog --- CHANGELOG.md | 27 +++- Cargo.lock | 348 +++++++++++++++++++++++++-------------------------- Cargo.toml | 14 +-- 3 files changed, 203 insertions(+), 186 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26c4eb18e..22d7c0f61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,12 +19,35 @@ # Changelog -## [23.0.0](https://github.com/apache/arrow-datafusion-python/tree/23.0.0) (2023-04-23) +## [24.0.0](https://github.com/apache/arrow-datafusion-python/tree/24.0.0) (2023-05-09) -[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/22.0.0...23.0.0) +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/23.0.0...24.0.0) + +**Documentation updates:** + +- Fix link to user guide [#354](https://github.com/apache/arrow-datafusion-python/pull/354) (andygrove) **Merged pull requests:** +- Add interface to serialize Substrait plans to Python Bytes. [#344](https://github.com/apache/arrow-datafusion-python/pull/344) (kylebrooks-8451) +- Add partition_count property to ExecutionPlan. [#346](https://github.com/apache/arrow-datafusion-python/pull/346) (kylebrooks-8451) +- Remove unsendable from all Rust pyclass types. [#348](https://github.com/apache/arrow-datafusion-python/pull/348) (kylebrooks-8451) +- Fix link to user guide [#354](https://github.com/apache/arrow-datafusion-python/pull/354) (andygrove) +- Fix SessionContext execute. [#353](https://github.com/apache/arrow-datafusion-python/pull/353) (kylebrooks-8451) +- Pub mod expr in lib.rs [#357](https://github.com/apache/arrow-datafusion-python/pull/357) (jdye64) +- Add benchmark derived from TPC-H [#355](https://github.com/apache/arrow-datafusion-python/pull/355) (andygrove) +- Add db-benchmark [#365](https://github.com/apache/arrow-datafusion-python/pull/365) (andygrove) +- First pass of documentation in mdBook [#364](https://github.com/apache/arrow-datafusion-python/pull/364) (MrPowers) +- Add 'pub' and '#[pyo3(get, set)]' to DataTypeMap [#371](https://github.com/apache/arrow-datafusion-python/pull/371) (jdye64) +- Fix db-benchmark [#369](https://github.com/apache/arrow-datafusion-python/pull/369) (andygrove) +- Docs explaining how to view query plans [#373](https://github.com/apache/arrow-datafusion-python/pull/373) (andygrove) +- Improve db-benchmark [#372](https://github.com/apache/arrow-datafusion-python/pull/372) (andygrove) +- Make expr member of PyExpr public [#375](https://github.com/apache/arrow-datafusion-python/pull/375) (jdye64) + +## [23.0.0](https://github.com/apache/arrow-datafusion-python/tree/23.0.0) (2023-04-23) + +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/22.0.0...23.0.0) + **Merged pull requests:** - Improve API docs, README, and examples for configuring context [#321](https://github.com/apache/arrow-datafusion-python/pull/321) (andygrove) diff --git a/Cargo.lock b/Cargo.lock index db7c9a768..17acaf7aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,9 +113,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aea9fcb25bbb70f7f922f95b99ca29c1013dab47f6df61a6f24861842dd7f2e" +checksum = "c107a57b5913d852da9d5a40e280e4695f2258b5b87733c13b770c63a7117287" dependencies = [ "ahash", "arrow-arith", @@ -136,9 +136,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d967b42f7b12c91fd78acd396b20c2973b184c8866846674abbb00c963e93ab" +checksum = "ace6aa3d5617c5d03041a05e01c6819428a8ddf49dd0b055df9b40fef9d96094" dependencies = [ "arrow-array", "arrow-buffer", @@ -151,9 +151,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3190f208ee7aa0f3596fa0098d42911dec5e123ca88c002a08b24877ad14c71e" +checksum = "104a04520692cc674e6afd7682f213ca41f9b13ff1873f63a5a2857a590b87b3" dependencies = [ "ahash", "arrow-buffer", @@ -168,9 +168,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d33c733c5b6c44a0fc526f29c09546e04eb56772a7a21e48e602f368be381f6" +checksum = "72c875bcb9530ec403998fb0b2dc6d180a7c64563ca4bc22b90eafb84b113143" dependencies = [ "half", "num", @@ -178,9 +178,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abd349520b6a1ed4924ae2afc9d23330a3044319e4ec3d5b124c09e4d440ae87" +checksum = "d6d6e18281636c8fc0b93be59834da6bf9a72bb70fd0c98ddfdaf124da466c28" dependencies = [ "arrow-array", "arrow-buffer", @@ -195,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c80af3c3e290a2a7e1cc518f1471dff331878cb4af9a5b088bf030b89debf649" +checksum = "3197dab0963a236ff8e7c82e2272535745955ac1321eb740c29f2f88b353f54e" dependencies = [ "arrow-array", "arrow-buffer", @@ -214,9 +214,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c8361947aaa96d331da9df3f7a08bdd8ab805a449994c97f5c4d24c4b7e2cf" +checksum = "eb68113d6ecdbe8bba48b2c4042c151bf9e1c61244e45072a50250a6fc59bafe" dependencies = [ "arrow-buffer", "arrow-schema", @@ -226,9 +226,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a46ee000b9fbd1e8db6e8b26acb8c760838512b39d8c9f9d73892cb55351d50" +checksum = "eab4bbf2dd3078facb5ce0a9641316a64f42bfd8cf357e6775c8a5e6708e3a8d" dependencies = [ "arrow-array", "arrow-buffer", @@ -240,9 +240,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bf2366607be867ced681ad7f272371a5cf1fc2941328eef7b4fee14565166fb" +checksum = "48c5b650d23746a494665d914a7fa3d21d939153cff9d53bdebe39bffa88f263" dependencies = [ "arrow-array", "arrow-buffer", @@ -260,9 +260,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "304069901c867200e21ec868ae7521165875470ef2f1f6d58f979a443d63997e" +checksum = "68c6fce28e5011e30acc7466b5efcb8ed0197c396240bd2b10e167f275a3c208" dependencies = [ "arrow-array", "arrow-buffer", @@ -275,9 +275,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d57fe8ceef3392fdd493269d8a2d589de17bafce151aacbffbddac7a57f441a" +checksum = "f20a421f19799d8b93eb8edde5217e910fa1e2d6ceb3c529f000e57b6db144c0" dependencies = [ "ahash", "arrow-array", @@ -290,18 +290,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a16b88a93ac8350f0200b1cd336a1f887315925b8dd7aa145a37b8bdbd8497a4" +checksum = "bc85923d8d6662cc66ac6602c7d1876872e671002d60993dfdf492a6badeae92" dependencies = [ "bitflags 2.2.1", ] [[package]] name = "arrow-select" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98e8a4d6ca37d5212439b24caad4d80743fcbb706706200dd174bb98e68fe9d8" +checksum = "f6ab6613ce65b61d85a3410241744e84e48fbab0fe06e1251b4429d21b3470fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -312,9 +312,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbb594efa397eb6a546f42b1f8df3d242ea84dbfda5232e06035dc2b2e2c8459" +checksum = "f3008641239e884aefba66d8b8532da6af40d14296349fcc85935de4ba67b89e" dependencies = [ "arrow-array", "arrow-buffer", @@ -736,9 +736,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a7d4b334f4512ff2fdbce87f511f570ae895af1ac7c729e77c12583253b22a" +checksum = "0404a559d5a6d8320369bb0a290b43bbc4f8622d0ef6f04bd095ace9a663f439" dependencies = [ "ahash", "apache-avro", @@ -788,9 +788,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80abfcb1dbc6390f952f21de9069e6177ad6318fcae5fbceabb50666d96533dd" +checksum = "4653b79a55161852973760db69ea6dcd05c9966a1b588fd83028f625536a1d7f" dependencies = [ "apache-avro", "arrow", @@ -805,9 +805,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2524f1b4b58319895b112809d2a59e54fa662d0e46330a455f22882c2cb7b9" +checksum = "53481c334b73c6759697919d1d05690392381145fa1890849a65b5a71a24a1ec" dependencies = [ "dashmap", "datafusion-common", @@ -823,9 +823,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af8040b7a75b04685f4db0a1b11ffa93cd163c1bc13751df3f5cf76baabaf5a1" +checksum = "a8ecd7c6605d0b4269346d03289e2ced1715a303e75e6d313dba0bafb1f823f2" dependencies = [ "ahash", "arrow", @@ -835,9 +835,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74ceae25accc0f640a4238283f55f3a9fd181d55398703a4330fb2c46261e6a2" +checksum = "70a7c04e94cb4aa9c323993856e18b91f690dda0358a34ab07a3fe0f14bc6600" dependencies = [ "arrow", "async-trait", @@ -848,14 +848,14 @@ dependencies = [ "hashbrown 0.13.2", "itertools", "log", - "regex-syntax 0.6.29", + "regex-syntax 0.7.1", ] [[package]] name = "datafusion-physical-expr" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df4cf228b312f2758cb78e93fe3d2dc602345028efdf7cfa5b338cb370d0a347" +checksum = "9e34eb8668fee1443965fff41ba73b2956d50a07ed8dd929cfa2e839ab91da5a" dependencies = [ "ahash", "arrow", @@ -886,7 +886,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "23.0.0" +version = "24.0.0" dependencies = [ "async-trait", "datafusion", @@ -913,9 +913,9 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b52b486fb3d81bb132e400304be01af5aba0ad6737e3518045bb98944991fe32" +checksum = "efa800ae88dfd62ea6c58c24a1154d92937c755672f522b84e8ea6539fad369b" dependencies = [ "arrow", "datafusion-common", @@ -925,9 +925,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773e985c182e41cfd68f7a7b483ab6bfb68beaac241c348cd4b1bf9f9d61b762" +checksum = "556642ef90073e39af721362353ccce4e1f418da7a8e31c23510ed9de6eb71f2" dependencies = [ "arrow", "arrow-schema", @@ -939,9 +939,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "23.0.0" +version = "24.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836e9b1c0ea430199c9bd4b88024cb8d617e3768ffdb412064169e2504a850ed" +checksum = "0d7643a77bb446047095ec21b913adb900b71c7a2ae600f8062906dd2e5642b9" dependencies = [ "async-recursion", "chrono", @@ -964,26 +964,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "dirs" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - [[package]] name = "doc-comment" version = "0.3.3" @@ -1212,9 +1192,9 @@ dependencies = [ [[package]] name = "gix" -version = "0.43.1" +version = "0.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c256ea71cc1967faaefdaad15f334146b7c806f12460dcafd3afed845c8c78dd" +checksum = "6bf41b61f7df395284f7a579c0fa1a7e012c5aede655174d4e91299ef1cac643" dependencies = [ "gix-actor", "gix-attributes", @@ -1224,9 +1204,11 @@ dependencies = [ "gix-diff", "gix-discover", "gix-features", + "gix-fs", "gix-glob", "gix-hash", "gix-hashtable", + "gix-ignore", "gix-index", "gix-lock", "gix-mailmap", @@ -1242,6 +1224,7 @@ dependencies = [ "gix-tempfile", "gix-traverse", "gix-url", + "gix-utils", "gix-validate", "gix-worktree", "log", @@ -1254,9 +1237,9 @@ dependencies = [ [[package]] name = "gix-actor" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc22b0cdc52237667c301dd7cdc6ead8f8f73c9f824e9942c8ebd6b764f6c0bf" +checksum = "848efa0f1210cea8638f95691c82a46f98a74b9e3524f01d4955ebc25a8f84f3" dependencies = [ "bstr", "btoi", @@ -1268,24 +1251,26 @@ dependencies = [ [[package]] name = "gix-attributes" -version = "0.10.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2231a25934a240d0a4b6f4478401c73ee81d8be52de0293eedbc172334abf3e1" +checksum = "3015baa01ad2122fbcaab7863c857a603eb7b7ec12ac8141207c42c6439805e2" dependencies = [ "bstr", - "gix-features", "gix-glob", "gix-path", "gix-quote", + "kstring", + "log", + "smallvec", "thiserror", "unicode-bom", ] [[package]] name = "gix-bitmap" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "024bca0c7187517bda5ea24ab148c9ca8208dd0c3e2bea88cdb2008f91791a6d" +checksum = "55a95f4942360766c3880bdb2b4b57f1ef73b190fc424755e7fdf480430af618" dependencies = [ "thiserror", ] @@ -1310,9 +1295,9 @@ dependencies = [ [[package]] name = "gix-config" -version = "0.20.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fbad5ce54a8fc997acc50febd89ec80fa6e97cb7f8d0654cb229936407489d8" +checksum = "1d252a0eddb6df74600d3d8872dc9fe98835a7da43110411d705b682f49d4ac1" dependencies = [ "bstr", "gix-config-value", @@ -1332,11 +1317,11 @@ dependencies = [ [[package]] name = "gix-config-value" -version = "0.10.2" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09154c0c8677e4da0ec35e896f56ee3e338e741b9599fae06075edd83a4081c" +checksum = "786861e84a5793ad5f863d846de5eb064cd23b87e61ad708c8c402608202e7be" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.2.1", "bstr", "gix-path", "libc", @@ -1345,9 +1330,9 @@ dependencies = [ [[package]] name = "gix-credentials" -version = "0.12.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "750b684197374518ea057e0a0594713e07683faa0a3f43c0f93d97f64130ad8d" +checksum = "4874a4fc11ffa844a3c2b87a66957bda30a73b577ef1acf15ac34df5745de5ff" dependencies = [ "bstr", "gix-command", @@ -1361,9 +1346,9 @@ dependencies = [ [[package]] name = "gix-date" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b96271912ce39822501616f177dea7218784e6c63be90d5f36322ff3a722aae2" +checksum = "99056f37270715f5c7584fd8b46899a2296af9cae92463bf58b8bd1f5a78e553" dependencies = [ "bstr", "itoa", @@ -1373,9 +1358,9 @@ dependencies = [ [[package]] name = "gix-diff" -version = "0.28.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "103a0fa79b0d438f5ecb662502f052e530ace4fe1fe8e1c83c0c6da76d728e67" +checksum = "644a0f2768bc42d7a69289ada80c9e15c589caefc6a315d2307202df83ed1186" dependencies = [ "gix-hash", "gix-object", @@ -1385,9 +1370,9 @@ dependencies = [ [[package]] name = "gix-discover" -version = "0.16.2" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eba8ba458cb8f4a6c33409b0fe650b1258655175a7ffd1d24fafd3ed31d880b" +checksum = "1a6b61363e63e7cdaa3e6f96acb0257ebdb3d8883e21eba5930c99f07f0a5fc0" dependencies = [ "bstr", "dunce", @@ -1400,9 +1385,9 @@ dependencies = [ [[package]] name = "gix-features" -version = "0.28.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b76f9a80f6dd7be66442ae86e1f534effad9546676a392acc95e269d0c21c22" +checksum = "cf69b0f5c701cc3ae22d3204b671907668f6437ca88862d355eaf9bc47a4f897" dependencies = [ "crc32fast", "flate2", @@ -1415,21 +1400,32 @@ dependencies = [ "walkdir", ] +[[package]] +name = "gix-fs" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b37a1832f691fdc09910bd267f9a2e413737c1f9ec68c6e31f9e802616278a9" +dependencies = [ + "gix-features", +] + [[package]] name = "gix-glob" -version = "0.5.5" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93e43efd776bc543f46f0fd0ca3d920c37af71a764a16f2aebd89765e9ff2993" +checksum = "c07c98204529ac3f24b34754540a852593d2a4c7349008df389240266627a72a" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.2.1", "bstr", + "gix-features", + "gix-path", ] [[package]] name = "gix-hash" -version = "0.10.4" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a258595457bc192d1f1c59d0d168a1e34e2be9b97a614e14995416185de41a7" +checksum = "078eec3ac2808cc03f0bddd2704cb661da5c5dc33b41a9d7947b141d499c7c42" dependencies = [ "hex", "thiserror", @@ -1437,22 +1433,34 @@ dependencies = [ [[package]] name = "gix-hashtable" -version = "0.1.3" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4e55e40dfd694884f0eb78796c5bddcf2f8b295dace47039099dd7e76534973" +checksum = "afebb85691c6a085b114e01a27f4a61364519298c5826cb87a45c304802299bc" dependencies = [ "gix-hash", "hashbrown 0.13.2", "parking_lot", ] +[[package]] +name = "gix-ignore" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba205b6df563e2906768bb22834c82eb46c5fdfcd86ba2c347270bc8309a05b2" +dependencies = [ + "bstr", + "gix-glob", + "gix-path", + "unicode-bom", +] + [[package]] name = "gix-index" -version = "0.15.1" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "717ab601ece7921f59fe86849dbe27d44a46ebb883b5885732c4f30df4996177" +checksum = "f39c1ccc8f1912cbbd5191efc28dbc5f0d0598042aa56bc09427b7c34efab3ba" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.2.1", "bstr", "btoi", "filetime", @@ -1481,9 +1489,9 @@ dependencies = [ [[package]] name = "gix-mailmap" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b66aea5e52875cd4915f4957a6f4b75831a36981e2ec3f5fad9e370e444fe1a" +checksum = "e8856cec3bdc3610c06970d28b6cb20a0c6621621cf9a8ec48cbd23f2630f362" dependencies = [ "bstr", "gix-actor", @@ -1492,9 +1500,9 @@ dependencies = [ [[package]] name = "gix-object" -version = "0.28.0" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df068db9180ee935fbb70504848369e270bdcb576b05c0faa8b9fd3b86fc017" +checksum = "c9bb30ce0818d37096daa29efe361a4bc6dd0b51a5726598898be7e9a40a01e1" dependencies = [ "bstr", "btoi", @@ -1511,9 +1519,9 @@ dependencies = [ [[package]] name = "gix-odb" -version = "0.43.1" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83af2e3e36005bfe010927f0dff41fb5acc3e3d89c6f1174135b3a34086bda2" +checksum = "bca2f324aa67672b6d0f2c0fa93f96eb6a7029d260e4c1df5dce3c015f5e5add" dependencies = [ "arc-swap", "gix-features", @@ -1529,9 +1537,9 @@ dependencies = [ [[package]] name = "gix-pack" -version = "0.33.2" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9401911c7fe032ad7b31c6a6b5be59cb283d1d6c999417a8215056efe6d635f3" +checksum = "164a515900a83257ae4aa80e741655bee7a2e39113fb535d7a5ac623b445ff20" dependencies = [ "clru", "gix-chunk", @@ -1551,24 +1559,26 @@ dependencies = [ [[package]] name = "gix-path" -version = "0.7.3" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32370dce200bb951df013e03dff35b4233fc7a89458642b047629b91734a7e19" +checksum = "4fc78f47095a0c15aea0e66103838f0748f4494bf7a9555dfe0f00425400396c" dependencies = [ "bstr", + "home", + "once_cell", "thiserror", ] [[package]] name = "gix-prompt" -version = "0.3.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3034d4d935aef2c7bf719aaa54b88c520e82413118d886ae880a31d5bdee57" +checksum = "330d11fdf88fff3366c2491efde2f3e454958efe7d5ddf60272e8fb1d944bb01" dependencies = [ "gix-command", "gix-config-value", - "nix", "parking_lot", + "rustix", "thiserror", ] @@ -1585,12 +1595,13 @@ dependencies = [ [[package]] name = "gix-ref" -version = "0.27.2" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4e909396ed3b176823991ccc391c276ae2a015e54edaafa3566d35123cfac9d" +checksum = "1e03989e9d49954368e1b526578230fc7189d1634acdfbe79e9ba1de717e15d5" dependencies = [ "gix-actor", "gix-features", + "gix-fs", "gix-hash", "gix-lock", "gix-object", @@ -1604,9 +1615,9 @@ dependencies = [ [[package]] name = "gix-refspec" -version = "0.9.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aba332462bda2e8efeae4302b39a6ed01ad56ef772fd5b7ef197cf2798294d65" +checksum = "0a6ea733820df67e4cd7797deb12727905824d8f5b7c59d943c456d314475892" dependencies = [ "bstr", "gix-hash", @@ -1618,9 +1629,9 @@ dependencies = [ [[package]] name = "gix-revision" -version = "0.12.2" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6f6ff53f888858afc24bf12628446a14279ceec148df6194481f306f553ad2" +checksum = "810f35e9afeccca999d5d348b239f9c162353127d2e13ff3240e31b919e35476" dependencies = [ "bstr", "gix-date", @@ -1632,15 +1643,14 @@ dependencies = [ [[package]] name = "gix-sec" -version = "0.6.2" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8ffa5bf0772f9b01de501c035b6b084cf9b8bb07dec41e3afc6a17336a65f47" +checksum = "794520043d5a024dfeac335c6e520cb616f6963e30dab995892382e998c12897" dependencies = [ - "bitflags 1.3.2", - "dirs", + "bitflags 2.2.1", "gix-path", "libc", - "windows 0.43.0", + "windows", ] [[package]] @@ -1659,9 +1669,9 @@ dependencies = [ [[package]] name = "gix-traverse" -version = "0.24.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9a4a07bb22168dc79c60e1a6a41919d198187ca83d8a5940ad8d7122a45df3" +checksum = "a5be1e807f288c33bb005075111886cceb43ed8a167b3182a0f62c186e2a0dd1" dependencies = [ "gix-hash", "gix-hashtable", @@ -1671,9 +1681,9 @@ dependencies = [ [[package]] name = "gix-url" -version = "0.16.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6a22b4b32ad14d68f7b7fb6458fa58d44b01797d94c1b8f4db2d9c7b3c366b5" +checksum = "dfc77f89054297cc81491e31f1bab4027e554b5ef742a44bd7035db9a0f78b76" dependencies = [ "bstr", "gix-features", @@ -1683,6 +1693,15 @@ dependencies = [ "url", ] +[[package]] +name = "gix-utils" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c10b69beac219acb8df673187a1f07dde2d74092f974fb3f9eb385aeb667c909" +dependencies = [ + "fastrand", +] + [[package]] name = "gix-validate" version = "0.7.4" @@ -1695,15 +1714,18 @@ dependencies = [ [[package]] name = "gix-worktree" -version = "0.15.2" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54ec9a000b4f24af706c3cc680c7cda235656cbe3216336522f5692773b8a301" +checksum = "a69eaff0ae973a9d37c40f02ae5ae50fa726c8fc2fd3ab79d0a19eb61975aafa" dependencies = [ "bstr", + "filetime", "gix-attributes", "gix-features", + "gix-fs", "gix-glob", "gix-hash", + "gix-ignore", "gix-index", "gix-object", "gix-path", @@ -1879,7 +1901,7 @@ dependencies = [ "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows 0.48.0", + "windows", ] [[package]] @@ -2003,6 +2025,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kstring" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3066350882a1cd6d950d055997f379ac37fd39f81cd4d8ed186032eb3c5747" +dependencies = [ + "static_assertions", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -2261,18 +2292,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "nix" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" -dependencies = [ - "bitflags 1.3.2", - "cfg-if", - "libc", - "static_assertions", -] - [[package]] name = "nom" version = "7.1.3" @@ -2447,9 +2466,9 @@ dependencies = [ [[package]] name = "parquet" -version = "37.0.0" +version = "38.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5022d98333271f4ca3e87bab760498e61726bf5a6ca919123c80517e20ded29" +checksum = "4cbd51311f8d9ff3d2697b1522b18a588782e097d313a1a278b0faf2ccf2d3f6" dependencies = [ "ahash", "arrow-array", @@ -2469,6 +2488,7 @@ dependencies = [ "lz4", "num", "num-bigint", + "object_store", "paste", "seq-macro", "snap", @@ -2796,17 +2816,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "redox_users" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" -dependencies = [ - "getrandom", - "redox_syscall 0.2.16", - "thiserror", -] - [[package]] name = "regex" version = "1.8.1" @@ -3258,9 +3267,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.7.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ae64fb7ad0670c7d6d53d57b1b91beb2212afc30e164cc8edb02d6b2cff32a" +checksum = "54cd43d44620f716d55d46b998b3cf1baab2935aaa8adc14e3d3d9a465ddae15" dependencies = [ "gix", "heck", @@ -3607,9 +3616,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-bom" -version = "1.1.4" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63ec69f541d875b783ca40184d655f2927c95f0bffd486faa83cd3ac3529ec32" +checksum = "98e90c70c9f0d4d1ee6d0a7d04aa06cb9bbd53d8cfbdd62a0269a7c2eb640552" [[package]] name = "unicode-ident" @@ -3859,21 +3868,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.43.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04662ed0e3e5630dfa9b26e4cb823b817f1a9addda855d973a9458c236556244" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 4c5203d69..4c6248a6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "23.0.0" +version = "24.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { version = "23.0.0" , features = ["pyarrow", "avro"] } -datafusion-common = { version = "23.0.0", features = ["pyarrow"] } -datafusion-expr = "23.0.0" -datafusion-optimizer = "23.0.0" -datafusion-sql = "23.0.0" -datafusion-substrait = "23.0.0" +datafusion = { version = "24.0.0" , features = ["pyarrow", "avro"] } +datafusion-common = { version = "24.0.0", features = ["pyarrow"] } +datafusion-expr = "24.0.0" +datafusion-optimizer = "24.0.0" +datafusion-sql = "24.0.0" +datafusion-substrait = "24.0.0" prost = "0.11" prost-types = "0.11" uuid = { version = "1.2", features = ["v4"] } From 228b6e515b1b3569fbffc7c34d84ba29d3a98e75 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 09:03:37 +0800 Subject: [PATCH 041/413] build(deps): bump uuid from 1.3.1 to 1.3.2 (#359) Bumps [uuid](https://github.com/uuid-rs/uuid) from 1.3.1 to 1.3.2. - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.3.1...1.3.2) --- updated-dependencies: - dependency-name: uuid dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 17acaf7aa..65340664a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3678,9 +3678,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" +checksum = "4dad5567ad0cf5b760e5665964bec1b47dfd077ba8a2544b513f3556d3d239a2" dependencies = [ "getrandom", "serde", diff --git a/Cargo.toml b/Cargo.toml index 4c6248a6f..4d9ab90d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ datafusion-sql = "24.0.0" datafusion-substrait = "24.0.0" prost = "0.11" prost-types = "0.11" -uuid = { version = "1.2", features = ["v4"] } +uuid = { version = "1.3", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" futures = "0.3" From 5984bc7db420f688e0989b26662c24d8b2f5768e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 09:03:51 +0800 Subject: [PATCH 042/413] build(deps): bump mimalloc from 0.1.36 to 0.1.37 (#361) Bumps [mimalloc](https://github.com/purpleprotocol/mimalloc_rust) from 0.1.36 to 0.1.37. - [Release notes](https://github.com/purpleprotocol/mimalloc_rust/releases) - [Commits](https://github.com/purpleprotocol/mimalloc_rust/compare/v0.1.36...v0.1.37) --- updated-dependencies: - dependency-name: mimalloc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65340664a..9ef4aaf62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2138,9 +2138,9 @@ checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" [[package]] name = "libmimalloc-sys" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a558e3d911bc3c7bfc8c78bc580b404d6e51c1cefbf656e176a94b49b0df40" +checksum = "f4ac0e912c8ef1b735e92369695618dc5b1819f5a7bf3f167301a3ba1cea515e" dependencies = [ "cc", "libc", @@ -2246,9 +2246,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.36" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d88dad3f985ec267a3fcb7a1726f5cb1a7e8cad8b646e70a84f967210df23da" +checksum = "4e2894987a3459f3ffb755608bd82188f8ed00d0ae077f1edea29c068d639d98" dependencies = [ "libmimalloc-sys", ] From 21ad90f2bebebd4732db9477dd5311721ae6d70f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 09:04:08 +0800 Subject: [PATCH 043/413] build(deps): bump regex-syntax from 0.6.29 to 0.7.1 (#334) Bumps [regex-syntax](https://github.com/rust-lang/regex) from 0.6.29 to 0.7.1. - [Release notes](https://github.com/rust-lang/regex/releases) - [Changelog](https://github.com/rust-lang/regex/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-lang/regex/commits) --- updated-dependencies: - dependency-name: regex-syntax dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9ef4aaf62..080f506f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -904,7 +904,7 @@ dependencies = [ "pyo3", "pyo3-build-config", "rand", - "regex-syntax 0.6.29", + "regex-syntax 0.7.1", "syn 2.0.15", "tokio", "url", diff --git a/Cargo.toml b/Cargo.toml index 4d9ab90d2..63481c0eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ async-trait = "0.1" futures = "0.3" object_store = { version = "0.5.3", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" -regex-syntax = "0.6.28" +regex-syntax = "0.7.1" syn = "2.0.11" url = "2.2" From 43b3105cf859e95905b85f3efae69f7c4a4c0bee Mon Sep 17 00:00:00 2001 From: Jiayu Liu Date: Wed, 10 May 2023 20:53:03 +0800 Subject: [PATCH 044/413] upgrade maturin to 0.15.1 (#379) * upgrade maturin to 0.15.1 * migrate maturin meta --- .github/workflows/build.yml | 8 +- Cargo.toml | 3 - conda/environments/datafusion-dev.yaml | 48 ++-- conda/recipes/meta.yaml | 6 +- docs/README.md | 15 +- pyproject.toml | 3 +- requirements-310.txt | 340 ++++++++++++++----------- requirements.in | 4 +- requirements.txt | 284 --------------------- 9 files changed, 232 insertions(+), 479 deletions(-) delete mode 100644 requirements.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c667dab80..fe06b9c86 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -64,7 +64,7 @@ jobs: run: python -m pip install --upgrade pip - name: Install maturin - run: pip install maturin==0.14.2 + run: pip install maturin==0.15.1 - run: rm LICENSE.txt - name: Download LICENSE.txt @@ -76,7 +76,7 @@ jobs: - name: Install Protoc uses: arduino/setup-protoc@v1 with: - version: '3.x' + version: "3.x" repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Build Python package @@ -125,7 +125,7 @@ jobs: run: python -m pip install --upgrade pip - name: Install maturin - run: pip install maturin==0.14.2 + run: pip install maturin==0.15.1 - run: rm LICENSE.txt - name: Download LICENSE.txt @@ -137,7 +137,7 @@ jobs: - name: Install Protoc uses: arduino/setup-protoc@v1 with: - version: '3.x' + version: "3.x" repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Build Python package diff --git a/Cargo.toml b/Cargo.toml index 63481c0eb..cad16cc86 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,9 +61,6 @@ pyo3-build-config = "0.18.3" name = "datafusion_python" crate-type = ["cdylib", "rlib"] -[package.metadata.maturin] -name = "datafusion._internal" - [profile.release] lto = true codegen-units = 1 diff --git a/conda/environments/datafusion-dev.yaml b/conda/environments/datafusion-dev.yaml index d9405e4fe..ceab504c8 100644 --- a/conda/environments/datafusion-dev.yaml +++ b/conda/environments/datafusion-dev.yaml @@ -16,29 +16,29 @@ # under the License. channels: -- conda-forge + - conda-forge dependencies: -- black -- flake8 -- isort -- maturin -- mypy -- numpy -- pyarrow -- pytest -- toml -- importlib_metadata -- python>=3.10 -# Packages useful for building distributions and releasing -- mamba -- conda-build -- anaconda-client -# Packages for documentation building -- sphinx -- pydata-sphinx-theme==0.8.0 -- myst-parser -- jinja2 -# GPU packages -- cudf -- cudatoolkit=11.8 + - black + - flake8 + - isort + - maturin>=0.15 + - mypy + - numpy + - pyarrow>=11.0.0 + - pytest + - toml + - importlib_metadata + - python>=3.10 + # Packages useful for building distributions and releasing + - mamba + - conda-build + - anaconda-client + # Packages for documentation building + - sphinx + - pydata-sphinx-theme==0.8.0 + - myst-parser + - jinja2 + # GPU packages + - cudf + - cudatoolkit=11.8 name: datafusion-dev diff --git a/conda/recipes/meta.yaml b/conda/recipes/meta.yaml index 48e95eb08..e2bb8bee3 100644 --- a/conda/recipes/meta.yaml +++ b/conda/recipes/meta.yaml @@ -35,12 +35,12 @@ build: requirements: host: - - python >=3.6 - - maturin >=0.14,<0.15 + - python >=3.7 + - maturin >=0.15,<0.16 - libprotobuf =3 - pip run: - - python >=3.6 + - python >=3.7 - pyarrow >=11.0.0 test: diff --git a/docs/README.md b/docs/README.md index 04f46a907..85278588e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -19,17 +19,18 @@ # DataFusion Documentation -This folder contains the source content of the [python api](./source/api). +This folder contains the source content of the [Python API](./source/api). This is published to https://arrow.apache.org/datafusion-python/ by a GitHub action when changes are merged to the main branch. ## Dependencies It's recommended to install build dependencies and build the documentation -inside a Python virtualenv. +inside a Python `venv`. -- Python -- `pip3 install -r requirements.txt` +```bash +python -m pip install -r requirements-310.txt +``` ## Build & Preview @@ -57,8 +58,6 @@ version of the docs, follow these steps: 2. Clone the arrow-site repo 3. Checkout to the `asf-site` branch (NOT `master`) 4. Copy build artifacts into `arrow-site` repo's `datafusion` folder with a command such as - -- `cp -rT ./build/html/ ../../arrow-site/datafusion/` (doesn't work on mac) -- `rsync -avzr ./build/html/ ../../arrow-site/datafusion/` - + - `cp -rT ./build/html/ ../../arrow-site/datafusion/` (doesn't work on mac) + - `rsync -avzr ./build/html/ ../../arrow-site/datafusion/` 5. Commit changes in `arrow-site` and send a PR. diff --git a/pyproject.toml b/pyproject.toml index cf68a33c3..4fdc4586f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ # under the License. [build-system] -requires = ["maturin>=0.14,<0.15"] +requires = ["maturin>=0.15,<0.16"] build-backend = "maturin" [project] @@ -55,6 +55,7 @@ repository = "https://github.com/apache/arrow-datafusion-python" profile = "black" [tool.maturin] +module-name = "datafusion._internal" include = [ { path = "Cargo.lock", format = "sdist" } ] diff --git a/requirements-310.txt b/requirements-310.txt index 332abdbcb..517c31905 100644 --- a/requirements-310.txt +++ b/requirements-310.txt @@ -1,41 +1,20 @@ # -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # -# pip-compile --generate-hashes +# pip-compile --generate-hashes --resolver=backtracking requirements.in # -attrs==21.4.0 \ - --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ - --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd +attrs==21.2.0 \ + --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ + --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb # via pytest -black==22.3.0 \ - --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ - --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ - --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ - --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ - --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ - --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ - --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ - --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ - --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ - --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ - --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ - --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ - --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ - --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ - --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ - --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ - --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ - --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ - --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ - --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ - --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ - --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ - --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d +black==21.9b0 \ + --hash=sha256:380f1b5da05e5a1429225676655dddb96f5ae8c75bdf91e53d798871b902a115 \ + --hash=sha256:7de4cfc7eb6b710de325712d40125689101d21d25283eed7e9998722cf10eb91 # via -r requirements.in -click==8.1.3 \ - --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ - --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 +click==8.0.3 \ + --hash=sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3 \ + --hash=sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b # via black flake8==4.0.1 \ --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ @@ -45,53 +24,53 @@ iniconfig==1.1.1 \ --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 # via pytest -isort==5.10.1 \ - --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ - --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 +isort==5.9.3 \ + --hash=sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899 \ + --hash=sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2 # via -r requirements.in -maturin==0.14.2 \ - --hash=sha256:065a0b48259624af1330bbc2d9bd447615ff07603710ad4a839478aa8e9e98cd \ - --hash=sha256:2966ed9b2e66760ffa5e476dab20c9563dd14efa93ed40b706cbe0a03907d824 \ - --hash=sha256:50a7a15d02f0b407eb5cf4cbc072d27e302993d5734af71bb6d6c567da8c6396 \ - --hash=sha256:539545c666506d34474c1133a11e90b4e6b4b6a7e590f5727ae18a49d3b647d2 \ - --hash=sha256:9f843736fc1c065ffba2dcfc581811c2eb7032c7119ff23012ccde68607e549c \ - --hash=sha256:ace60890f19a87e4a421fd9753e42e5fb46b11e770feacd8ff6749d3bf114764 \ - --hash=sha256:ae3a3a754c3b935420eeb1ec34d98d1693251537331feb43277bae7892b7c51a \ - --hash=sha256:bbaa0689a87124c99adaff99e93dcf415e36f7068d5442f311b49ede4bb7b6c3 \ - --hash=sha256:c65b00f4a4773bc56ca690d93abd09c85cc59b161a313fba3d03aca1ffbba7ca \ - --hash=sha256:daefa75e3f03e914924cb5913b40ada47ff646562c3b1874ddaf5b1e4b0f41ef \ - --hash=sha256:e0b808d1a0b78532c1bcbeb2d01d90281585dc460a966f1949fd337a3fb614c0 \ - --hash=sha256:ebc722e9661a40c3d36a06ecc0d1c637ae1af3335ea0ab69230f46a71e344088 \ - --hash=sha256:fa01eeacfd1798cde3ba1c7a0b8d7a889d2ed0e207f4860beb819c5ddd351f43 +maturin==0.15.1 \ + --hash=sha256:0e89a87549d671056f9358832c8aa1bb522abcf13eeca83327a58b091d4f5a98 \ + --hash=sha256:229d7eb6e14455a3c69a10a4546f082c7bd5490b8ec7f50d5d10edcea600dc64 \ + --hash=sha256:247bec13d82021972e5cb4eb38e7a7aea0e7a034beab60f0e0464ffe7423f24b \ + --hash=sha256:37ddbc261db778c73829173f43624b6178dd4244fae8e4b278323e050e899a14 \ + --hash=sha256:3b4c296b12736756d1d084c04b5a60281011e77b763eb9c69c39444d2421ad31 \ + --hash=sha256:616220c8c875526b22605bbb0be189ff859956abad8fe6e49c9ad2caa761ebca \ + --hash=sha256:61eda19c37394660c3b0b9c6dbfa7d411a5dca1a93c9dcf7baa718f3a03fda04 \ + --hash=sha256:7a7e02f265c30bac9f16e65c9d14184b3228c26b03dbe83a15220ab09fceabd7 \ + --hash=sha256:a40c96dc135e1571159155bdbd8c06509dea14a9317ca0e5b7cae4c8d2016b93 \ + --hash=sha256:ab5f8d8858440f35d833bce8bbf8233e595afa23fe00c513f7b671183946ea02 \ + --hash=sha256:b54421ae71e1e3dde92105d300187b9ac3b257a5b33c1d670ff9ee5266a1bd81 \ + --hash=sha256:dabb8ff46461c6fb1d68e8972a172cf1dede3c9825a41e4a6caecc95c26ca3b4 \ + --hash=sha256:f604b65fd9f0b94856e88cf8b345e21a27276297e6df4ad9305937887feda13b # via -r requirements.in mccabe==0.6.1 \ --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f # via flake8 -mypy==0.950 \ - --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ - --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ - --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ - --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ - --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ - --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ - --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ - --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ - --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ - --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ - --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ - --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ - --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ - --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ - --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ - --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ - --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ - --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ - --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ - --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ - --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ - --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ - --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b +mypy==0.910 \ + --hash=sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9 \ + --hash=sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a \ + --hash=sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9 \ + --hash=sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e \ + --hash=sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2 \ + --hash=sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212 \ + --hash=sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b \ + --hash=sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885 \ + --hash=sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150 \ + --hash=sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703 \ + --hash=sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072 \ + --hash=sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457 \ + --hash=sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e \ + --hash=sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0 \ + --hash=sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb \ + --hash=sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97 \ + --hash=sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8 \ + --hash=sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811 \ + --hash=sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6 \ + --hash=sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de \ + --hash=sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504 \ + --hash=sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921 \ + --hash=sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d # via -r requirements.in mypy-extensions==0.4.3 \ --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ @@ -99,81 +78,100 @@ mypy-extensions==0.4.3 \ # via # black # mypy -numpy==1.22.3 \ - --hash=sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676 \ - --hash=sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4 \ - --hash=sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce \ - --hash=sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123 \ - --hash=sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1 \ - --hash=sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e \ - --hash=sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5 \ - --hash=sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d \ - --hash=sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a \ - --hash=sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab \ - --hash=sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75 \ - --hash=sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168 \ - --hash=sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4 \ - --hash=sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f \ - --hash=sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18 \ - --hash=sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62 \ - --hash=sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe \ - --hash=sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430 \ - --hash=sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802 \ - --hash=sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa +numpy==1.21.3 \ + --hash=sha256:043e83bfc274649c82a6f09836943e4a4aebe5e33656271c7dbf9621dd58b8ec \ + --hash=sha256:160ccc1bed3a8371bf0d760971f09bfe80a3e18646620e9ded0ad159d9749baa \ + --hash=sha256:188031f833bbb623637e66006cf75e933e00e7231f67e2b45cf8189612bb5dc3 \ + --hash=sha256:28f15209fb535dd4c504a7762d3bc440779b0e37d50ed810ced209e5cea60d96 \ + --hash=sha256:29fb3dcd0468b7715f8ce2c0c2d9bbbaf5ae686334951343a41bd8d155c6ea27 \ + --hash=sha256:2a6ee9620061b2a722749b391c0d80a0e2ae97290f1b32e28d5a362e21941ee4 \ + --hash=sha256:300321e3985c968e3ae7fbda187237b225f3ffe6528395a5b7a5407f73cf093e \ + --hash=sha256:32437f0b275c1d09d9c3add782516413e98cd7c09e6baf4715cbce781fc29912 \ + --hash=sha256:3c09418a14471c7ae69ba682e2428cae5b4420a766659605566c0fa6987f6b7e \ + --hash=sha256:49c6249260890e05b8111ebfc391ed58b3cb4b33e63197b2ec7f776e45330721 \ + --hash=sha256:4cc9b512e9fb590797474f58b7f6d1f1b654b3a94f4fa8558b48ca8b3cfc97cf \ + --hash=sha256:508b0b513fa1266875524ba8a9ecc27b02ad771fe1704a16314dc1a816a68737 \ + --hash=sha256:50cd26b0cf6664cb3b3dd161ba0a09c9c1343db064e7c69f9f8b551f5104d654 \ + --hash=sha256:5c4193f70f8069550a1788bd0cd3268ab7d3a2b70583dfe3b2e7f421e9aace06 \ + --hash=sha256:5dfe9d6a4c39b8b6edd7990091fea4f852888e41919d0e6722fe78dd421db0eb \ + --hash=sha256:63571bb7897a584ca3249c86dd01c10bcb5fe4296e3568b2e9c1a55356b6410e \ + --hash=sha256:75621882d2230ab77fb6a03d4cbccd2038511491076e7964ef87306623aa5272 \ + --hash=sha256:75eb7cadc8da49302f5b659d40ba4f6d94d5045fbd9569c9d058e77b0514c9e4 \ + --hash=sha256:88a5d6b268e9ad18f3533e184744acdaa2e913b13148160b1152300c949bbb5f \ + --hash=sha256:8a10968963640e75cc0193e1847616ab4c718e83b6938ae74dea44953950f6b7 \ + --hash=sha256:90bec6a86b348b4559b6482e2b684db4a9a7eed1fa054b86115a48d58fbbf62a \ + --hash=sha256:98339aa9911853f131de11010f6dd94c8cec254d3d1f7261528c3b3e3219f139 \ + --hash=sha256:a99a6b067e5190ac6d12005a4d85aa6227c5606fa93211f86b1dafb16233e57d \ + --hash=sha256:bffa2eee3b87376cc6b31eee36d05349571c236d1de1175b804b348dc0941e3f \ + --hash=sha256:c6c2d535a7beb1f8790aaa98fd089ceab2e3dd7ca48aca0af7dc60e6ef93ffe1 \ + --hash=sha256:cc14e7519fab2a4ed87d31f99c31a3796e4e1fe63a86ebdd1c5a1ea78ebd5896 \ + --hash=sha256:dd0482f3fc547f1b1b5d6a8b8e08f63fdc250c58ce688dedd8851e6e26cff0f3 \ + --hash=sha256:dde972a1e11bb7b702ed0e447953e7617723760f420decb97305e66fb4afc54f \ + --hash=sha256:e54af82d68ef8255535a6cdb353f55d6b8cf418a83e2be3569243787a4f4866f \ + --hash=sha256:e606e6316911471c8d9b4618e082635cfe98876007556e89ce03d52ff5e8fcf0 \ + --hash=sha256:f41b018f126aac18583956c54544db437f25c7ee4794bcb23eb38bef8e5e192a \ + --hash=sha256:f8f4625536926a155b80ad2bbff44f8cc59e9f2ad14cdda7acf4c135b4dc8ff2 \ + --hash=sha256:fe52dbe47d9deb69b05084abd4b0df7abb39a3c51957c09f635520abd49b29dd # via # -r requirements.in # pyarrow -packaging==21.3 \ - --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ - --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 +packaging==21.0 \ + --hash=sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7 \ + --hash=sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14 # via pytest pathspec==0.9.0 \ --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 # via black -platformdirs==2.5.2 \ - --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ - --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 +platformdirs==2.4.0 \ + --hash=sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2 \ + --hash=sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d # via black pluggy==1.0.0 \ --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 # via pytest -py==1.11.0 \ - --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ - --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 +py==1.10.0 \ + --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ + --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a # via pytest -pyarrow==8.0.0 \ - --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ - --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ - --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ - --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ - --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ - --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ - --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ - --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ - --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ - --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ - --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ - --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ - --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ - --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ - --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ - --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ - --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ - --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ - --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ - --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ - --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ - --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ - --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ - --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ - --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ - --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ - --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ - --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ - --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ - --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb +pyarrow==6.0.0 \ + --hash=sha256:004185e0babc6f3c3fba6ba4f106e406a0113d0f82bb9ad9a8571a1978c45d04 \ + --hash=sha256:0204e80777ab8f4e9abd3a765a8ec07ed1e3c4630bacda50d2ce212ef0f3826f \ + --hash=sha256:072c1a0fca4509eefd7d018b78542fb7e5c63aaf5698f1c0a6e45628ae17ba44 \ + --hash=sha256:15dc0d673d3f865ca63c877bd7a2eced70b0a08969fb733a28247134b8a1f18b \ + --hash=sha256:1c38263ea438a1666b13372e7565450cfeec32dbcd1c2595749476a58465eaec \ + --hash=sha256:281ce5fa03621d786a9beb514abb09846db7f0221b50eabf543caa24037eaacd \ + --hash=sha256:2d2c681659396c745e4f1988d5dd41dcc3ad557bb8d4a8c2e44030edafc08a91 \ + --hash=sha256:376c4b5f248ae63df21fe15c194e9013753164be2d38f4b3fb8bde63ac5a1958 \ + --hash=sha256:465f87fa0be0b2928b2beeba22b5813a0203fb05d90fd8563eea48e08ecc030e \ + --hash=sha256:477c746ef42c039348a288584800e299456c80c5691401bb9b19aa9c02a427b7 \ + --hash=sha256:5144bd9db2920c7cb566c96462d62443cc239104f94771d110f74393f2fb42a2 \ + --hash=sha256:5408fa8d623e66a0445f3fb0e4027fd219bf99bfb57422d543d7b7876e2c5b55 \ + --hash=sha256:5be62679201c441356d3f2a739895dcc8d4d299f2a6eabcd2163bfb6a898abba \ + --hash=sha256:5c666bc6a1cebf01206e2dc1ab05f25f39f35d3a499e0ef5cd635225e07306ca \ + --hash=sha256:6163d82cca7541774b00503c295fe86a1722820eddb958b57f091bb6f5b0a6db \ + --hash=sha256:6a1d9a2f4ee812ed0bd4182cabef99ea914ac297274f0de086f2488093d284ef \ + --hash=sha256:7a683f71b848eb6310b4ec48c0def55dac839e9994c1ac874c9b2d3d5625def1 \ + --hash=sha256:82fe80309e01acf29e3943a1f6d3c98ec109fe1d356bc1ac37d639bcaadcf684 \ + --hash=sha256:8c23f8cdecd3d9e49f9b0f9a651ae5549d1d32fd4901fb1bdc2d327edfba844f \ + --hash=sha256:8d41dfb09ba9236cca6245f33088eb42f3c54023da281139241e0f9f3b4b754e \ + --hash=sha256:a19e58dfb04e451cd8b7bdec3ac8848373b95dfc53492c9a69789aa9074a3c1b \ + --hash=sha256:a50d2f77b86af38ceabf45617208b9105d20e7a5eebc584e7c8c0acededd82ce \ + --hash=sha256:a5bed4f948c032c40597302e9bdfa65f62295240306976ecbe43a54924c6f94f \ + --hash=sha256:ac941a147d14993987cc8b605b721735a34b3e54d167302501fb4db1ad7382c7 \ + --hash=sha256:b86d175262db1eb46afdceb36d459409eb6f8e532d3dec162f8bf572c7f57623 \ + --hash=sha256:bf3400780c4d3c9cb43b1e8a1aaf2e1b7199a0572d0a645529d2784e4d0d8497 \ + --hash=sha256:c7a6e7e0bf8779e9c3428ced85507541f3da9a0675e2f4781d4eb2c7042cbf81 \ + --hash=sha256:cc1d4a70efd583befe92d4ea6f74ed2e0aa31ccdde767cd5cae8e77c65a1c2d4 \ + --hash=sha256:d046dc78a9337baa6415be915c5a16222505233e238a1017f368243c89817eea \ + --hash=sha256:da7860688c33ca88ac05f1a487d32d96d9caa091412496c35f3d1d832145675a \ + --hash=sha256:ddf2e6e3b321adaaf716f2d5af8e92d205a9671e0cb7c0779710a567fd1dd580 \ + --hash=sha256:e81508239a71943759cee272ce625ae208092dd36ef2c6713fccee30bbcf52bb \ + --hash=sha256:ea64a48a85c631eb2a0ea13ccdec5143c85b5897836b16331ee4289d27a57247 \ + --hash=sha256:ed0be080cf595ea15ff1c9ff4097bbf1fcc4b50847d98c0a3c0412fbc6ede7e9 \ + --hash=sha256:fb701ec4a94b92102606d4e88f0b8eba34f09a5ad8e014eaa4af76f42b7f62ae \ + --hash=sha256:fbda7595f24a639bcef3419ecfac17216efacb09f7b0f1b4c4c97f900d65ca0e # via -r requirements.in pycodestyle==2.8.0 \ --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ @@ -183,27 +181,69 @@ pyflakes==2.4.0 \ --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e # via flake8 -pyparsing==3.0.9 \ - --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ - --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc +pyparsing==3.0.3 \ + --hash=sha256:9e3511118010f112a4b4b435ae50e1eaa610cda191acb9e421d60cf5fde83455 \ + --hash=sha256:f8d3fe9fc404576c5164f0f0c4e382c96b85265e023c409c43d48f65da9d60d0 # via packaging -pytest==7.1.2 \ - --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ - --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 +pytest==6.2.5 \ + --hash=sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89 \ + --hash=sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134 # via -r requirements.in +regex==2021.10.23 \ + --hash=sha256:0c186691a7995ef1db61205e00545bf161fb7b59cdb8c1201c89b333141c438a \ + --hash=sha256:0dcc0e71118be8c69252c207630faf13ca5e1b8583d57012aae191e7d6d28b84 \ + --hash=sha256:0f7552429dd39f70057ac5d0e897e5bfe211629652399a21671e53f2a9693a4e \ + --hash=sha256:129472cd06062fb13e7b4670a102951a3e655e9b91634432cfbdb7810af9d710 \ + --hash=sha256:13ec99df95003f56edcd307db44f06fbeb708c4ccdcf940478067dd62353181e \ + --hash=sha256:1f2b59c28afc53973d22e7bc18428721ee8ca6079becf1b36571c42627321c65 \ + --hash=sha256:2b20f544cbbeffe171911f6ce90388ad36fe3fad26b7c7a35d4762817e9ea69c \ + --hash=sha256:2fb698037c35109d3c2e30f2beb499e5ebae6e4bb8ff2e60c50b9a805a716f79 \ + --hash=sha256:34d870f9f27f2161709054d73646fc9aca49480617a65533fc2b4611c518e455 \ + --hash=sha256:391703a2abf8013d95bae39145d26b4e21531ab82e22f26cd3a181ee2644c234 \ + --hash=sha256:450dc27483548214314640c89a0f275dbc557968ed088da40bde7ef8fb52829e \ + --hash=sha256:45b65d6a275a478ac2cbd7fdbf7cc93c1982d613de4574b56fd6972ceadb8395 \ + --hash=sha256:5095a411c8479e715784a0c9236568ae72509450ee2226b649083730f3fadfc6 \ + --hash=sha256:530fc2bbb3dc1ebb17f70f7b234f90a1dd43b1b489ea38cea7be95fb21cdb5c7 \ + --hash=sha256:56f0c81c44638dfd0e2367df1a331b4ddf2e771366c4b9c5d9a473de75e3e1c7 \ + --hash=sha256:5e9c9e0ce92f27cef79e28e877c6b6988c48b16942258f3bc55d39b5f911df4f \ + --hash=sha256:6d7722136c6ed75caf84e1788df36397efdc5dbadab95e59c2bba82d4d808a4c \ + --hash=sha256:74d071dbe4b53c602edd87a7476ab23015a991374ddb228d941929ad7c8c922e \ + --hash=sha256:7b568809dca44cb75c8ebb260844ea98252c8c88396f9d203f5094e50a70355f \ + --hash=sha256:80bb5d2e92b2258188e7dcae5b188c7bf868eafdf800ea6edd0fbfc029984a88 \ + --hash=sha256:8d1cdcda6bd16268316d5db1038965acf948f2a6f43acc2e0b1641ceab443623 \ + --hash=sha256:9f665677e46c5a4d288ece12fdedf4f4204a422bb28ff05f0e6b08b7447796d1 \ + --hash=sha256:a30513828180264294953cecd942202dfda64e85195ae36c265daf4052af0464 \ + --hash=sha256:a7a986c45d1099a5de766a15de7bee3840b1e0e1a344430926af08e5297cf666 \ + --hash=sha256:a940ca7e7189d23da2bfbb38973832813eab6bd83f3bf89a977668c2f813deae \ + --hash=sha256:ab7c5684ff3538b67df3f93d66bd3369b749087871ae3786e70ef39e601345b0 \ + --hash=sha256:be04739a27be55631069b348dda0c81d8ea9822b5da10b8019b789e42d1fe452 \ + --hash=sha256:c0938ddd60cc04e8f1faf7a14a166ac939aac703745bfcd8e8f20322a7373019 \ + --hash=sha256:cb46b542133999580ffb691baf67410306833ee1e4f58ed06b6a7aaf4e046952 \ + --hash=sha256:d134757a37d8640f3c0abb41f5e68b7cf66c644f54ef1cb0573b7ea1c63e1509 \ + --hash=sha256:de557502c3bec8e634246588a94e82f1ee1b9dfcfdc453267c4fb652ff531570 \ + --hash=sha256:ded0c4a3eee56b57fcb2315e40812b173cafe79d2f992d50015f4387445737fa \ + --hash=sha256:e1dae12321b31059a1a72aaa0e6ba30156fe7e633355e445451e4021b8e122b6 \ + --hash=sha256:eb672217f7bd640411cfc69756ce721d00ae600814708d35c930930f18e8029f \ + --hash=sha256:ee684f139c91e69fe09b8e83d18b4d63bf87d9440c1eb2eeb52ee851883b1b29 \ + --hash=sha256:f3f9a91d3cc5e5b0ddf1043c0ae5fa4852f18a1c0050318baf5fc7930ecc1f9c + # via black toml==0.10.2 \ --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via + # -r requirements.in + # mypy + # pytest +tomli==1.2.2 \ + --hash=sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee \ + --hash=sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade # via # black # maturin +typing-extensions==3.10.0.2 \ + --hash=sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e \ + --hash=sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7 \ + --hash=sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34 + # via + # black # mypy - # pytest -typing-extensions==4.2.0 \ - --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ - --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 - # via mypy diff --git a/requirements.in b/requirements.in index 098e9f809..09872b03b 100644 --- a/requirements.in +++ b/requirements.in @@ -18,10 +18,10 @@ black flake8 isort -maturin>=0.14 +maturin>=0.15 mypy numpy -pyarrow +pyarrow>=11.0.0 pytest toml importlib_metadata; python_version < "3.8" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index ce2404f39..000000000 --- a/requirements.txt +++ /dev/null @@ -1,284 +0,0 @@ -# -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: -# -# pip-compile --generate-hashes -# -attrs==21.2.0 \ - --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ - --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb - # via pytest -black==21.9b0 \ - --hash=sha256:380f1b5da05e5a1429225676655dddb96f5ae8c75bdf91e53d798871b902a115 \ - --hash=sha256:7de4cfc7eb6b710de325712d40125689101d21d25283eed7e9998722cf10eb91 - # via -r requirements.in -click==8.0.3 \ - --hash=sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3 \ - --hash=sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b - # via black -flake8==4.0.1 \ - --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ - --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d - # via -r requirements.in -iniconfig==1.1.1 \ - --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ - --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 - # via pytest -isort==5.9.3 \ - --hash=sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899 \ - --hash=sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2 - # via -r requirements.in -maturin==0.14.2 \ - --hash=sha256:065a0b48259624af1330bbc2d9bd447615ff07603710ad4a839478aa8e9e98cd \ - --hash=sha256:2966ed9b2e66760ffa5e476dab20c9563dd14efa93ed40b706cbe0a03907d824 \ - --hash=sha256:50a7a15d02f0b407eb5cf4cbc072d27e302993d5734af71bb6d6c567da8c6396 \ - --hash=sha256:539545c666506d34474c1133a11e90b4e6b4b6a7e590f5727ae18a49d3b647d2 \ - --hash=sha256:9f843736fc1c065ffba2dcfc581811c2eb7032c7119ff23012ccde68607e549c \ - --hash=sha256:ace60890f19a87e4a421fd9753e42e5fb46b11e770feacd8ff6749d3bf114764 \ - --hash=sha256:ae3a3a754c3b935420eeb1ec34d98d1693251537331feb43277bae7892b7c51a \ - --hash=sha256:bbaa0689a87124c99adaff99e93dcf415e36f7068d5442f311b49ede4bb7b6c3 \ - --hash=sha256:c65b00f4a4773bc56ca690d93abd09c85cc59b161a313fba3d03aca1ffbba7ca \ - --hash=sha256:daefa75e3f03e914924cb5913b40ada47ff646562c3b1874ddaf5b1e4b0f41ef \ - --hash=sha256:e0b808d1a0b78532c1bcbeb2d01d90281585dc460a966f1949fd337a3fb614c0 \ - --hash=sha256:ebc722e9661a40c3d36a06ecc0d1c637ae1af3335ea0ab69230f46a71e344088 \ - --hash=sha256:fa01eeacfd1798cde3ba1c7a0b8d7a889d2ed0e207f4860beb819c5ddd351f43 - # via -r requirements.in -mccabe==0.6.1 \ - --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ - --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f - # via flake8 -mypy==0.910 \ - --hash=sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9 \ - --hash=sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a \ - --hash=sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9 \ - --hash=sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e \ - --hash=sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2 \ - --hash=sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212 \ - --hash=sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b \ - --hash=sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885 \ - --hash=sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150 \ - --hash=sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703 \ - --hash=sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072 \ - --hash=sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457 \ - --hash=sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e \ - --hash=sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0 \ - --hash=sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb \ - --hash=sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97 \ - --hash=sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8 \ - --hash=sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811 \ - --hash=sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6 \ - --hash=sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de \ - --hash=sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504 \ - --hash=sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921 \ - --hash=sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d - # via -r requirements.in -mypy-extensions==0.4.3 \ - --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ - --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 - # via - # black - # mypy -numpy==1.21.3 \ - --hash=sha256:043e83bfc274649c82a6f09836943e4a4aebe5e33656271c7dbf9621dd58b8ec \ - --hash=sha256:160ccc1bed3a8371bf0d760971f09bfe80a3e18646620e9ded0ad159d9749baa \ - --hash=sha256:188031f833bbb623637e66006cf75e933e00e7231f67e2b45cf8189612bb5dc3 \ - --hash=sha256:28f15209fb535dd4c504a7762d3bc440779b0e37d50ed810ced209e5cea60d96 \ - --hash=sha256:29fb3dcd0468b7715f8ce2c0c2d9bbbaf5ae686334951343a41bd8d155c6ea27 \ - --hash=sha256:2a6ee9620061b2a722749b391c0d80a0e2ae97290f1b32e28d5a362e21941ee4 \ - --hash=sha256:300321e3985c968e3ae7fbda187237b225f3ffe6528395a5b7a5407f73cf093e \ - --hash=sha256:32437f0b275c1d09d9c3add782516413e98cd7c09e6baf4715cbce781fc29912 \ - --hash=sha256:3c09418a14471c7ae69ba682e2428cae5b4420a766659605566c0fa6987f6b7e \ - --hash=sha256:49c6249260890e05b8111ebfc391ed58b3cb4b33e63197b2ec7f776e45330721 \ - --hash=sha256:4cc9b512e9fb590797474f58b7f6d1f1b654b3a94f4fa8558b48ca8b3cfc97cf \ - --hash=sha256:508b0b513fa1266875524ba8a9ecc27b02ad771fe1704a16314dc1a816a68737 \ - --hash=sha256:50cd26b0cf6664cb3b3dd161ba0a09c9c1343db064e7c69f9f8b551f5104d654 \ - --hash=sha256:5c4193f70f8069550a1788bd0cd3268ab7d3a2b70583dfe3b2e7f421e9aace06 \ - --hash=sha256:5dfe9d6a4c39b8b6edd7990091fea4f852888e41919d0e6722fe78dd421db0eb \ - --hash=sha256:63571bb7897a584ca3249c86dd01c10bcb5fe4296e3568b2e9c1a55356b6410e \ - --hash=sha256:75621882d2230ab77fb6a03d4cbccd2038511491076e7964ef87306623aa5272 \ - --hash=sha256:75eb7cadc8da49302f5b659d40ba4f6d94d5045fbd9569c9d058e77b0514c9e4 \ - --hash=sha256:88a5d6b268e9ad18f3533e184744acdaa2e913b13148160b1152300c949bbb5f \ - --hash=sha256:8a10968963640e75cc0193e1847616ab4c718e83b6938ae74dea44953950f6b7 \ - --hash=sha256:90bec6a86b348b4559b6482e2b684db4a9a7eed1fa054b86115a48d58fbbf62a \ - --hash=sha256:98339aa9911853f131de11010f6dd94c8cec254d3d1f7261528c3b3e3219f139 \ - --hash=sha256:a99a6b067e5190ac6d12005a4d85aa6227c5606fa93211f86b1dafb16233e57d \ - --hash=sha256:bffa2eee3b87376cc6b31eee36d05349571c236d1de1175b804b348dc0941e3f \ - --hash=sha256:c6c2d535a7beb1f8790aaa98fd089ceab2e3dd7ca48aca0af7dc60e6ef93ffe1 \ - --hash=sha256:cc14e7519fab2a4ed87d31f99c31a3796e4e1fe63a86ebdd1c5a1ea78ebd5896 \ - --hash=sha256:dd0482f3fc547f1b1b5d6a8b8e08f63fdc250c58ce688dedd8851e6e26cff0f3 \ - --hash=sha256:dde972a1e11bb7b702ed0e447953e7617723760f420decb97305e66fb4afc54f \ - --hash=sha256:e54af82d68ef8255535a6cdb353f55d6b8cf418a83e2be3569243787a4f4866f \ - --hash=sha256:e606e6316911471c8d9b4618e082635cfe98876007556e89ce03d52ff5e8fcf0 \ - --hash=sha256:f41b018f126aac18583956c54544db437f25c7ee4794bcb23eb38bef8e5e192a \ - --hash=sha256:f8f4625536926a155b80ad2bbff44f8cc59e9f2ad14cdda7acf4c135b4dc8ff2 \ - --hash=sha256:fe52dbe47d9deb69b05084abd4b0df7abb39a3c51957c09f635520abd49b29dd - # via - # -r requirements.in - # pandas - # pyarrow -packaging==21.0 \ - --hash=sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7 \ - --hash=sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14 - # via pytest -pandas==1.3.4 \ - --hash=sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd \ - --hash=sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9 \ - --hash=sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f \ - --hash=sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab \ - --hash=sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143 \ - --hash=sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a \ - --hash=sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4 \ - --hash=sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f \ - --hash=sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b \ - --hash=sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc \ - --hash=sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7 \ - --hash=sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd \ - --hash=sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae \ - --hash=sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c \ - --hash=sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5 \ - --hash=sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9 \ - --hash=sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63 \ - --hash=sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a \ - --hash=sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b \ - --hash=sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5 \ - --hash=sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec - # via -r requirements.in -pathspec==0.9.0 \ - --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ - --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 - # via black -platformdirs==2.4.0 \ - --hash=sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2 \ - --hash=sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d - # via black -pluggy==1.0.0 \ - --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ - --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 - # via pytest -py==1.10.0 \ - --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ - --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a - # via pytest -pyarrow==6.0.0 \ - --hash=sha256:004185e0babc6f3c3fba6ba4f106e406a0113d0f82bb9ad9a8571a1978c45d04 \ - --hash=sha256:0204e80777ab8f4e9abd3a765a8ec07ed1e3c4630bacda50d2ce212ef0f3826f \ - --hash=sha256:072c1a0fca4509eefd7d018b78542fb7e5c63aaf5698f1c0a6e45628ae17ba44 \ - --hash=sha256:15dc0d673d3f865ca63c877bd7a2eced70b0a08969fb733a28247134b8a1f18b \ - --hash=sha256:1c38263ea438a1666b13372e7565450cfeec32dbcd1c2595749476a58465eaec \ - --hash=sha256:281ce5fa03621d786a9beb514abb09846db7f0221b50eabf543caa24037eaacd \ - --hash=sha256:2d2c681659396c745e4f1988d5dd41dcc3ad557bb8d4a8c2e44030edafc08a91 \ - --hash=sha256:376c4b5f248ae63df21fe15c194e9013753164be2d38f4b3fb8bde63ac5a1958 \ - --hash=sha256:465f87fa0be0b2928b2beeba22b5813a0203fb05d90fd8563eea48e08ecc030e \ - --hash=sha256:477c746ef42c039348a288584800e299456c80c5691401bb9b19aa9c02a427b7 \ - --hash=sha256:5144bd9db2920c7cb566c96462d62443cc239104f94771d110f74393f2fb42a2 \ - --hash=sha256:5408fa8d623e66a0445f3fb0e4027fd219bf99bfb57422d543d7b7876e2c5b55 \ - --hash=sha256:5be62679201c441356d3f2a739895dcc8d4d299f2a6eabcd2163bfb6a898abba \ - --hash=sha256:5c666bc6a1cebf01206e2dc1ab05f25f39f35d3a499e0ef5cd635225e07306ca \ - --hash=sha256:6163d82cca7541774b00503c295fe86a1722820eddb958b57f091bb6f5b0a6db \ - --hash=sha256:6a1d9a2f4ee812ed0bd4182cabef99ea914ac297274f0de086f2488093d284ef \ - --hash=sha256:7a683f71b848eb6310b4ec48c0def55dac839e9994c1ac874c9b2d3d5625def1 \ - --hash=sha256:82fe80309e01acf29e3943a1f6d3c98ec109fe1d356bc1ac37d639bcaadcf684 \ - --hash=sha256:8c23f8cdecd3d9e49f9b0f9a651ae5549d1d32fd4901fb1bdc2d327edfba844f \ - --hash=sha256:8d41dfb09ba9236cca6245f33088eb42f3c54023da281139241e0f9f3b4b754e \ - --hash=sha256:a19e58dfb04e451cd8b7bdec3ac8848373b95dfc53492c9a69789aa9074a3c1b \ - --hash=sha256:a50d2f77b86af38ceabf45617208b9105d20e7a5eebc584e7c8c0acededd82ce \ - --hash=sha256:a5bed4f948c032c40597302e9bdfa65f62295240306976ecbe43a54924c6f94f \ - --hash=sha256:ac941a147d14993987cc8b605b721735a34b3e54d167302501fb4db1ad7382c7 \ - --hash=sha256:b86d175262db1eb46afdceb36d459409eb6f8e532d3dec162f8bf572c7f57623 \ - --hash=sha256:bf3400780c4d3c9cb43b1e8a1aaf2e1b7199a0572d0a645529d2784e4d0d8497 \ - --hash=sha256:c7a6e7e0bf8779e9c3428ced85507541f3da9a0675e2f4781d4eb2c7042cbf81 \ - --hash=sha256:cc1d4a70efd583befe92d4ea6f74ed2e0aa31ccdde767cd5cae8e77c65a1c2d4 \ - --hash=sha256:d046dc78a9337baa6415be915c5a16222505233e238a1017f368243c89817eea \ - --hash=sha256:da7860688c33ca88ac05f1a487d32d96d9caa091412496c35f3d1d832145675a \ - --hash=sha256:ddf2e6e3b321adaaf716f2d5af8e92d205a9671e0cb7c0779710a567fd1dd580 \ - --hash=sha256:e81508239a71943759cee272ce625ae208092dd36ef2c6713fccee30bbcf52bb \ - --hash=sha256:ea64a48a85c631eb2a0ea13ccdec5143c85b5897836b16331ee4289d27a57247 \ - --hash=sha256:ed0be080cf595ea15ff1c9ff4097bbf1fcc4b50847d98c0a3c0412fbc6ede7e9 \ - --hash=sha256:fb701ec4a94b92102606d4e88f0b8eba34f09a5ad8e014eaa4af76f42b7f62ae \ - --hash=sha256:fbda7595f24a639bcef3419ecfac17216efacb09f7b0f1b4c4c97f900d65ca0e - # via -r requirements.in -pycodestyle==2.8.0 \ - --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ - --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f - # via flake8 -pyflakes==2.4.0 \ - --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ - --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e - # via flake8 -pyparsing==3.0.3 \ - --hash=sha256:9e3511118010f112a4b4b435ae50e1eaa610cda191acb9e421d60cf5fde83455 \ - --hash=sha256:f8d3fe9fc404576c5164f0f0c4e382c96b85265e023c409c43d48f65da9d60d0 - # via packaging -pytest==6.2.5 \ - --hash=sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89 \ - --hash=sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134 - # via -r requirements.in -python-dateutil==2.8.2 \ - --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ - --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 - # via pandas -pytz==2021.3 \ - --hash=sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c \ - --hash=sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326 - # via pandas -regex==2021.10.23 \ - --hash=sha256:0c186691a7995ef1db61205e00545bf161fb7b59cdb8c1201c89b333141c438a \ - --hash=sha256:0dcc0e71118be8c69252c207630faf13ca5e1b8583d57012aae191e7d6d28b84 \ - --hash=sha256:0f7552429dd39f70057ac5d0e897e5bfe211629652399a21671e53f2a9693a4e \ - --hash=sha256:129472cd06062fb13e7b4670a102951a3e655e9b91634432cfbdb7810af9d710 \ - --hash=sha256:13ec99df95003f56edcd307db44f06fbeb708c4ccdcf940478067dd62353181e \ - --hash=sha256:1f2b59c28afc53973d22e7bc18428721ee8ca6079becf1b36571c42627321c65 \ - --hash=sha256:2b20f544cbbeffe171911f6ce90388ad36fe3fad26b7c7a35d4762817e9ea69c \ - --hash=sha256:2fb698037c35109d3c2e30f2beb499e5ebae6e4bb8ff2e60c50b9a805a716f79 \ - --hash=sha256:34d870f9f27f2161709054d73646fc9aca49480617a65533fc2b4611c518e455 \ - --hash=sha256:391703a2abf8013d95bae39145d26b4e21531ab82e22f26cd3a181ee2644c234 \ - --hash=sha256:450dc27483548214314640c89a0f275dbc557968ed088da40bde7ef8fb52829e \ - --hash=sha256:45b65d6a275a478ac2cbd7fdbf7cc93c1982d613de4574b56fd6972ceadb8395 \ - --hash=sha256:5095a411c8479e715784a0c9236568ae72509450ee2226b649083730f3fadfc6 \ - --hash=sha256:530fc2bbb3dc1ebb17f70f7b234f90a1dd43b1b489ea38cea7be95fb21cdb5c7 \ - --hash=sha256:56f0c81c44638dfd0e2367df1a331b4ddf2e771366c4b9c5d9a473de75e3e1c7 \ - --hash=sha256:5e9c9e0ce92f27cef79e28e877c6b6988c48b16942258f3bc55d39b5f911df4f \ - --hash=sha256:6d7722136c6ed75caf84e1788df36397efdc5dbadab95e59c2bba82d4d808a4c \ - --hash=sha256:74d071dbe4b53c602edd87a7476ab23015a991374ddb228d941929ad7c8c922e \ - --hash=sha256:7b568809dca44cb75c8ebb260844ea98252c8c88396f9d203f5094e50a70355f \ - --hash=sha256:80bb5d2e92b2258188e7dcae5b188c7bf868eafdf800ea6edd0fbfc029984a88 \ - --hash=sha256:8d1cdcda6bd16268316d5db1038965acf948f2a6f43acc2e0b1641ceab443623 \ - --hash=sha256:9f665677e46c5a4d288ece12fdedf4f4204a422bb28ff05f0e6b08b7447796d1 \ - --hash=sha256:a30513828180264294953cecd942202dfda64e85195ae36c265daf4052af0464 \ - --hash=sha256:a7a986c45d1099a5de766a15de7bee3840b1e0e1a344430926af08e5297cf666 \ - --hash=sha256:a940ca7e7189d23da2bfbb38973832813eab6bd83f3bf89a977668c2f813deae \ - --hash=sha256:ab7c5684ff3538b67df3f93d66bd3369b749087871ae3786e70ef39e601345b0 \ - --hash=sha256:be04739a27be55631069b348dda0c81d8ea9822b5da10b8019b789e42d1fe452 \ - --hash=sha256:c0938ddd60cc04e8f1faf7a14a166ac939aac703745bfcd8e8f20322a7373019 \ - --hash=sha256:cb46b542133999580ffb691baf67410306833ee1e4f58ed06b6a7aaf4e046952 \ - --hash=sha256:d134757a37d8640f3c0abb41f5e68b7cf66c644f54ef1cb0573b7ea1c63e1509 \ - --hash=sha256:de557502c3bec8e634246588a94e82f1ee1b9dfcfdc453267c4fb652ff531570 \ - --hash=sha256:ded0c4a3eee56b57fcb2315e40812b173cafe79d2f992d50015f4387445737fa \ - --hash=sha256:e1dae12321b31059a1a72aaa0e6ba30156fe7e633355e445451e4021b8e122b6 \ - --hash=sha256:eb672217f7bd640411cfc69756ce721d00ae600814708d35c930930f18e8029f \ - --hash=sha256:ee684f139c91e69fe09b8e83d18b4d63bf87d9440c1eb2eeb52ee851883b1b29 \ - --hash=sha256:f3f9a91d3cc5e5b0ddf1043c0ae5fa4852f18a1c0050318baf5fc7930ecc1f9c - # via black -six==1.16.0 \ - --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ - --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 - # via python-dateutil -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via - # -r requirements.in - # maturin - # mypy - # pytest -tomli==1.2.2 \ - --hash=sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee \ - --hash=sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade - # via black -typing-extensions==3.10.0.2 \ - --hash=sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e \ - --hash=sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7 \ - --hash=sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34 - # via - # black - # mypy From 433dbca009a25a78e698d2c703ef9f1e4177dae0 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 10 May 2023 08:53:55 -0400 Subject: [PATCH 045/413] Expand Expr to include RexType basic support (#378) * Make expr member of PyExpr public * Add RexType to Expr * Add utility functions for mapping ScalarValue instances to DataTypeMap instances * Add function to get python_value from Expr instance * Fix syntax problems * Add function to get the operands for a Rex::Call * Add function to get operator for RexType::Call * expand types function to include variant support for BinaryExpr * Add variant coverage for Decimal128 and Decimal256 * add function for getting the column name of an Expr from a LogicalPlan * Make PyProjection::projection member public * Add projected_expressions to projection node * Adjust function signature * Add Distinct variant to to_variant function in PyLogicalPlan * Fill in variants for DataType::Timestamp * Address syntax issues * Refactor types() function to extend support for CAST * Update CAST variant handling * Cargo fmt * Cargo clippy * Coverage for INTERVAL in DataType * More cargo fmt changes --- src/common/data_type.rs | 119 +++++++++++++--- src/expr.rs | 300 +++++++++++++++++++++++++++++++++++++++- src/expr/projection.rs | 18 ++- src/sql/logical.rs | 2 + 4 files changed, 414 insertions(+), 25 deletions(-) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index d55a0e86d..622e1aa46 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use datafusion::arrow::datatypes::DataType; -use datafusion_common::DataFusionError; +use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit}; +use datafusion_common::{DataFusionError, ScalarValue}; use pyo3::prelude::*; use crate::errors::py_datafusion_err; @@ -130,9 +130,11 @@ impl DataTypeMap { PythonType::Float, SqlType::FLOAT, )), - DataType::Timestamp(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), - ))), + DataType::Timestamp(unit, tz) => Ok(DataTypeMap::new( + DataType::Timestamp(unit.clone(), tz.clone()), + PythonType::Datetime, + SqlType::DATE, + )), DataType::Date32 => Ok(DataTypeMap::new( DataType::Date32, PythonType::Datetime, @@ -143,18 +145,28 @@ impl DataTypeMap { PythonType::Datetime, SqlType::DATE, )), - DataType::Time32(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), - ))), - DataType::Time64(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), - ))), + DataType::Time32(unit) => Ok(DataTypeMap::new( + DataType::Time32(unit.clone()), + PythonType::Datetime, + SqlType::DATE, + )), + DataType::Time64(unit) => Ok(DataTypeMap::new( + DataType::Time64(unit.clone()), + PythonType::Datetime, + SqlType::DATE, + )), DataType::Duration(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( format!("{:?}", arrow_type), ))), - DataType::Interval(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), - ))), + DataType::Interval(interval_unit) => Ok(DataTypeMap::new( + DataType::Interval(interval_unit.clone()), + PythonType::Datetime, + match interval_unit { + IntervalUnit::DayTime => SqlType::INTERVAL_DAY, + IntervalUnit::MonthDayNano => SqlType::INTERVAL_MONTH, + IntervalUnit::YearMonth => SqlType::INTERVAL_YEAR_MONTH, + }, + )), DataType::Binary => Ok(DataTypeMap::new( DataType::Binary, PythonType::Bytes, @@ -197,12 +209,16 @@ impl DataTypeMap { DataType::Dictionary(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( format!("{:?}", arrow_type), ))), - DataType::Decimal128(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), - ))), - DataType::Decimal256(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), - ))), + DataType::Decimal128(precision, scale) => Ok(DataTypeMap::new( + DataType::Decimal128(*precision, *scale), + PythonType::Float, + SqlType::DECIMAL, + )), + DataType::Decimal256(precision, scale) => Ok(DataTypeMap::new( + DataType::Decimal256(*precision, *scale), + PythonType::Float, + SqlType::DECIMAL, + )), DataType::Map(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( format!("{:?}", arrow_type), ))), @@ -211,6 +227,69 @@ impl DataTypeMap { )), } } + + /// Generate the `DataTypeMap` from a `ScalarValue` instance + pub fn map_from_scalar_value(scalar_val: &ScalarValue) -> Result { + DataTypeMap::map_from_arrow_type(&DataTypeMap::map_from_scalar_to_arrow(scalar_val)?) + } + + /// Maps a `ScalarValue` to an Arrow `DataType` + pub fn map_from_scalar_to_arrow(scalar_val: &ScalarValue) -> Result { + match scalar_val { + ScalarValue::Boolean(_) => Ok(DataType::Boolean), + ScalarValue::Float32(_) => Ok(DataType::Float32), + ScalarValue::Float64(_) => Ok(DataType::Float64), + ScalarValue::Decimal128(_, precision, scale) => { + Ok(DataType::Decimal128(*precision, *scale)) + } + ScalarValue::Dictionary(data_type, scalar_type) => { + // Call this function again to map the dictionary scalar_value to an Arrow type + Ok(DataType::Dictionary( + Box::new(*data_type.clone()), + Box::new(DataTypeMap::map_from_scalar_to_arrow(scalar_type)?), + )) + } + ScalarValue::Int8(_) => Ok(DataType::Int8), + ScalarValue::Int16(_) => Ok(DataType::Int16), + ScalarValue::Int32(_) => Ok(DataType::Int32), + ScalarValue::Int64(_) => Ok(DataType::Int64), + ScalarValue::UInt8(_) => Ok(DataType::UInt8), + ScalarValue::UInt16(_) => Ok(DataType::UInt16), + ScalarValue::UInt32(_) => Ok(DataType::UInt32), + ScalarValue::UInt64(_) => Ok(DataType::UInt64), + ScalarValue::Utf8(_) => Ok(DataType::Utf8), + ScalarValue::LargeUtf8(_) => Ok(DataType::LargeUtf8), + ScalarValue::Binary(_) => Ok(DataType::Binary), + ScalarValue::LargeBinary(_) => Ok(DataType::LargeBinary), + ScalarValue::Date32(_) => Ok(DataType::Date32), + ScalarValue::Date64(_) => Ok(DataType::Date64), + ScalarValue::Time32Second(_) => Ok(DataType::Time32(TimeUnit::Second)), + ScalarValue::Time32Millisecond(_) => Ok(DataType::Time32(TimeUnit::Millisecond)), + ScalarValue::Time64Microsecond(_) => Ok(DataType::Time64(TimeUnit::Microsecond)), + ScalarValue::Time64Nanosecond(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)), + ScalarValue::Null => Ok(DataType::Null), + ScalarValue::TimestampSecond(_, tz) => { + Ok(DataType::Timestamp(TimeUnit::Second, tz.to_owned())) + } + ScalarValue::TimestampMillisecond(_, tz) => { + Ok(DataType::Timestamp(TimeUnit::Millisecond, tz.to_owned())) + } + ScalarValue::TimestampMicrosecond(_, tz) => { + Ok(DataType::Timestamp(TimeUnit::Microsecond, tz.to_owned())) + } + ScalarValue::TimestampNanosecond(_, tz) => { + Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz.to_owned())) + } + ScalarValue::IntervalYearMonth(..) => Ok(DataType::Interval(IntervalUnit::YearMonth)), + ScalarValue::IntervalDayTime(..) => Ok(DataType::Interval(IntervalUnit::DayTime)), + ScalarValue::IntervalMonthDayNano(..) => { + Ok(DataType::Interval(IntervalUnit::MonthDayNano)) + } + ScalarValue::List(_val, field_ref) => Ok(DataType::List(field_ref.to_owned())), + ScalarValue::Struct(_, fields) => Ok(DataType::Struct(fields.to_owned())), + ScalarValue::FixedSizeBinary(size, _) => Ok(DataType::FixedSizeBinary(*size)), + } + } } #[pymethods] diff --git a/src/expr.rs b/src/expr.rs index 4ada4c16d..c002b3291 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -15,19 +15,26 @@ // specific language governing permissions and limitations // under the License. +use datafusion_common::DFField; +use datafusion_expr::expr::{AggregateFunction, Sort, WindowFunction}; +use datafusion_expr::utils::exprlist_to_fields; use pyo3::{basic::CompareOp, prelude::*}; use std::convert::{From, Into}; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::PyArrowType; -use datafusion_expr::{col, lit, Cast, Expr, GetIndexedField}; +use datafusion_expr::{ + col, lit, Between, BinaryExpr, Case, Cast, Expr, GetIndexedField, Like, LogicalPlan, Operator, + TryCast, +}; -use crate::common::data_type::RexType; -use crate::errors::py_runtime_err; +use crate::common::data_type::{DataTypeMap, RexType}; +use crate::errors::{py_runtime_err, py_type_err, DataFusionError}; use crate::expr::aggregate_expr::PyAggregateFunction; use crate::expr::binary_expr::PyBinaryExpr; use crate::expr::column::PyColumn; use crate::expr::literal::PyLiteral; +use crate::sql::logical::PyLogicalPlan; use datafusion::scalar::ScalarValue; use self::alias::PyAlias; @@ -274,11 +281,296 @@ impl PyExpr { Expr::ScalarSubquery(..) => RexType::ScalarSubquery, }) } + + /// Given the current `Expr` return the DataTypeMap which represents the + /// PythonType, Arrow DataType, and SqlType Enum which represents + pub fn types(&self) -> PyResult { + Self::_types(&self.expr) + } + + /// Extracts the Expr value into a PyObject that can be shared with Python + pub fn python_value(&self, py: Python) -> PyResult { + match &self.expr { + Expr::Literal(scalar_value) => Ok(match scalar_value { + ScalarValue::Null => todo!(), + ScalarValue::Boolean(v) => v.into_py(py), + ScalarValue::Float32(v) => v.into_py(py), + ScalarValue::Float64(v) => v.into_py(py), + ScalarValue::Decimal128(_, _, _) => todo!(), + ScalarValue::Int8(v) => v.into_py(py), + ScalarValue::Int16(v) => v.into_py(py), + ScalarValue::Int32(v) => v.into_py(py), + ScalarValue::Int64(v) => v.into_py(py), + ScalarValue::UInt8(v) => v.into_py(py), + ScalarValue::UInt16(v) => v.into_py(py), + ScalarValue::UInt32(v) => v.into_py(py), + ScalarValue::UInt64(v) => v.into_py(py), + ScalarValue::Utf8(v) => v.clone().into_py(py), + ScalarValue::LargeUtf8(v) => v.clone().into_py(py), + ScalarValue::Binary(v) => v.clone().into_py(py), + ScalarValue::FixedSizeBinary(_, _) => todo!(), + ScalarValue::LargeBinary(v) => v.clone().into_py(py), + ScalarValue::List(_, _) => todo!(), + ScalarValue::Date32(v) => v.into_py(py), + ScalarValue::Date64(v) => v.into_py(py), + ScalarValue::Time32Second(v) => v.into_py(py), + ScalarValue::Time32Millisecond(v) => v.into_py(py), + ScalarValue::Time64Microsecond(v) => v.into_py(py), + ScalarValue::Time64Nanosecond(v) => v.into_py(py), + ScalarValue::TimestampSecond(_, _) => todo!(), + ScalarValue::TimestampMillisecond(_, _) => todo!(), + ScalarValue::TimestampMicrosecond(_, _) => todo!(), + ScalarValue::TimestampNanosecond(_, _) => todo!(), + ScalarValue::IntervalYearMonth(v) => v.into_py(py), + ScalarValue::IntervalDayTime(v) => v.into_py(py), + ScalarValue::IntervalMonthDayNano(v) => v.into_py(py), + ScalarValue::Struct(_, _) => todo!(), + ScalarValue::Dictionary(_, _) => todo!(), + }), + _ => Err(py_type_err(format!( + "Non Expr::Literal encountered in types: {:?}", + &self.expr + ))), + } + } + + /// Row expressions, Rex(s), operate on the concept of operands. Different variants of Expressions, Expr(s), + /// store those operands in different datastructures. This function examines the Expr variant and returns + /// the operands to the calling logic as a Vec of PyExpr instances. + pub fn rex_call_operands(&self) -> PyResult> { + match &self.expr { + // Expr variants that are themselves the operand to return + Expr::Column(..) | Expr::ScalarVariable(..) | Expr::Literal(..) => { + Ok(vec![PyExpr::from(self.expr.clone())]) + } + + // Expr(s) that house the Expr instance to return in their bounded params + Expr::Alias(expr, ..) + | Expr::Not(expr) + | Expr::IsNull(expr) + | Expr::IsNotNull(expr) + | Expr::IsTrue(expr) + | Expr::IsFalse(expr) + | Expr::IsUnknown(expr) + | Expr::IsNotTrue(expr) + | Expr::IsNotFalse(expr) + | Expr::IsNotUnknown(expr) + | Expr::Negative(expr) + | Expr::GetIndexedField(GetIndexedField { expr, .. }) + | Expr::Cast(Cast { expr, .. }) + | Expr::TryCast(TryCast { expr, .. }) + | Expr::Sort(Sort { expr, .. }) + | Expr::InSubquery { expr, .. } => Ok(vec![PyExpr::from(*expr.clone())]), + + // Expr variants containing a collection of Expr(s) for operands + Expr::AggregateFunction(AggregateFunction { args, .. }) + | Expr::AggregateUDF { args, .. } + | Expr::ScalarFunction { args, .. } + | Expr::ScalarUDF { args, .. } + | Expr::WindowFunction(WindowFunction { args, .. }) => { + Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) + } + + // Expr(s) that require more specific processing + Expr::Case(Case { + expr, + when_then_expr, + else_expr, + }) => { + let mut operands: Vec = Vec::new(); + + if let Some(e) = expr { + operands.push(PyExpr::from(*e.clone())); + }; + + for (when, then) in when_then_expr { + operands.push(PyExpr::from(*when.clone())); + operands.push(PyExpr::from(*then.clone())); + } + + if let Some(e) = else_expr { + operands.push(PyExpr::from(*e.clone())); + }; + + Ok(operands) + } + Expr::InList { expr, list, .. } => { + let mut operands: Vec = vec![PyExpr::from(*expr.clone())]; + for list_elem in list { + operands.push(PyExpr::from(list_elem.clone())); + } + + Ok(operands) + } + Expr::BinaryExpr(BinaryExpr { left, right, .. }) => Ok(vec![ + PyExpr::from(*left.clone()), + PyExpr::from(*right.clone()), + ]), + Expr::Like(Like { expr, pattern, .. }) => Ok(vec![ + PyExpr::from(*expr.clone()), + PyExpr::from(*pattern.clone()), + ]), + Expr::ILike(Like { expr, pattern, .. }) => Ok(vec![ + PyExpr::from(*expr.clone()), + PyExpr::from(*pattern.clone()), + ]), + Expr::SimilarTo(Like { expr, pattern, .. }) => Ok(vec![ + PyExpr::from(*expr.clone()), + PyExpr::from(*pattern.clone()), + ]), + Expr::Between(Between { + expr, + negated: _, + low, + high, + }) => Ok(vec![ + PyExpr::from(*expr.clone()), + PyExpr::from(*low.clone()), + PyExpr::from(*high.clone()), + ]), + + // Currently un-support/implemented Expr types for Rex Call operations + Expr::GroupingSet(..) + | Expr::OuterReferenceColumn(_, _) + | Expr::Wildcard + | Expr::QualifiedWildcard { .. } + | Expr::ScalarSubquery(..) + | Expr::Placeholder { .. } + | Expr::Exists { .. } => Err(py_runtime_err(format!( + "Unimplemented Expr type: {}", + self.expr + ))), + } + } + + /// Extracts the operator associated with a RexType::Call + pub fn rex_call_operator(&self) -> PyResult { + Ok(match &self.expr { + Expr::BinaryExpr(BinaryExpr { + left: _, + op, + right: _, + }) => format!("{op}"), + Expr::ScalarFunction { fun, args: _ } => format!("{fun}"), + Expr::ScalarUDF { fun, .. } => fun.name.clone(), + Expr::Cast { .. } => "cast".to_string(), + Expr::Between { .. } => "between".to_string(), + Expr::Case { .. } => "case".to_string(), + Expr::IsNull(..) => "is null".to_string(), + Expr::IsNotNull(..) => "is not null".to_string(), + Expr::IsTrue(_) => "is true".to_string(), + Expr::IsFalse(_) => "is false".to_string(), + Expr::IsUnknown(_) => "is unknown".to_string(), + Expr::IsNotTrue(_) => "is not true".to_string(), + Expr::IsNotFalse(_) => "is not false".to_string(), + Expr::IsNotUnknown(_) => "is not unknown".to_string(), + Expr::InList { .. } => "in list".to_string(), + Expr::Negative(..) => "negative".to_string(), + Expr::Not(..) => "not".to_string(), + Expr::Like(Like { negated, .. }) => { + if *negated { + "not like".to_string() + } else { + "like".to_string() + } + } + Expr::ILike(Like { negated, .. }) => { + if *negated { + "not ilike".to_string() + } else { + "ilike".to_string() + } + } + Expr::SimilarTo(Like { negated, .. }) => { + if *negated { + "not similar to".to_string() + } else { + "similar to".to_string() + } + } + _ => { + return Err(py_type_err(format!( + "Catch all triggered in get_operator_name: {:?}", + &self.expr + ))) + } + }) + } + + pub fn column_name(&self, plan: PyLogicalPlan) -> PyResult { + self._column_name(&plan.plan()).map_err(py_runtime_err) + } +} + +impl PyExpr { + pub fn _column_name(&self, plan: &LogicalPlan) -> Result { + let field = Self::expr_to_field(&self.expr, plan)?; + Ok(field.qualified_column().flat_name()) + } + + /// Create a [DFField] representing an [Expr], given an input [LogicalPlan] to resolve against + pub fn expr_to_field( + expr: &Expr, + input_plan: &LogicalPlan, + ) -> Result { + match expr { + Expr::Sort(Sort { expr, .. }) => { + // DataFusion does not support create_name for sort expressions (since they never + // appear in projections) so we just delegate to the contained expression instead + Self::expr_to_field(expr, input_plan) + } + _ => { + let fields = + exprlist_to_fields(&[expr.clone()], input_plan).map_err(PyErr::from)?; + Ok(fields[0].clone()) + } + } + } + + fn _types(expr: &Expr) -> PyResult { + match expr { + Expr::BinaryExpr(BinaryExpr { + left: _, + op, + right: _, + }) => match op { + Operator::Eq + | Operator::NotEq + | Operator::Lt + | Operator::LtEq + | Operator::Gt + | Operator::GtEq + | Operator::And + | Operator::Or + | Operator::IsDistinctFrom + | Operator::IsNotDistinctFrom + | Operator::RegexMatch + | Operator::RegexIMatch + | Operator::RegexNotMatch + | Operator::RegexNotIMatch => DataTypeMap::map_from_arrow_type(&DataType::Boolean), + Operator::Plus | Operator::Minus | Operator::Multiply | Operator::Modulo => { + DataTypeMap::map_from_arrow_type(&DataType::Int64) + } + Operator::Divide => DataTypeMap::map_from_arrow_type(&DataType::Float64), + Operator::StringConcat => DataTypeMap::map_from_arrow_type(&DataType::Utf8), + Operator::BitwiseShiftLeft + | Operator::BitwiseShiftRight + | Operator::BitwiseXor + | Operator::BitwiseAnd + | Operator::BitwiseOr => DataTypeMap::map_from_arrow_type(&DataType::Binary), + }, + Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), + Expr::Literal(scalar_value) => DataTypeMap::map_from_scalar_value(scalar_value), + _ => Err(py_type_err(format!( + "Non Expr::Literal encountered in types: {:?}", + expr + ))), + } + } } /// Initializes the `expr` module to match the pattern of `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { - // expressions m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/expr/projection.rs b/src/expr/projection.rs index f5ba12db2..b3296618f 100644 --- a/src/expr/projection.rs +++ b/src/expr/projection.rs @@ -16,6 +16,7 @@ // under the License. use datafusion_expr::logical_plan::Projection; +use datafusion_expr::Expr; use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; @@ -27,7 +28,7 @@ use crate::sql::logical::PyLogicalPlan; #[pyclass(name = "Projection", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyProjection { - projection: Projection, + pub projection: Projection, } impl PyProjection { @@ -92,6 +93,21 @@ impl PyProjection { } } +impl PyProjection { + /// Projection: Gets the names of the fields that should be projected + pub fn projected_expressions(local_expr: &PyExpr) -> Vec { + let mut projs: Vec = Vec::new(); + match &local_expr.expr { + Expr::Alias(expr, _name) => { + let py_expr: PyExpr = PyExpr::from(*expr.clone()); + projs.extend_from_slice(Self::projected_expressions(&py_expr).as_slice()); + } + _ => projs.push(local_expr.clone()), + } + projs + } +} + impl LogicalNode for PyProjection { fn inputs(&self) -> Vec { vec![PyLogicalPlan::from((*self.projection.input).clone())] diff --git a/src/sql/logical.rs b/src/sql/logical.rs index a75315d36..07a3f65b1 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use crate::errors::py_unsupported_variant_err; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; +use crate::expr::distinct::PyDistinct; use crate::expr::empty_relation::PyEmptyRelation; use crate::expr::explain::PyExplain; use crate::expr::extension::PyExtension; @@ -62,6 +63,7 @@ impl PyLogicalPlan { LogicalPlan::EmptyRelation(plan) => PyEmptyRelation::from(plan.clone()).to_variant(py), LogicalPlan::Explain(plan) => PyExplain::from(plan.clone()).to_variant(py), LogicalPlan::Extension(plan) => PyExtension::from(plan.clone()).to_variant(py), + LogicalPlan::Distinct(plan) => PyDistinct::from(plan.clone()).to_variant(py), LogicalPlan::Filter(plan) => PyFilter::from(plan.clone()).to_variant(py), LogicalPlan::Limit(plan) => PyLimit::from(plan.clone()).to_variant(py), LogicalPlan::Projection(plan) => PyProjection::from(plan.clone()).to_variant(py), From 3711c73f2bb2bf01cd1c6fd50d0264825d2fd821 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 19 May 2023 08:57:53 -0600 Subject: [PATCH 046/413] Add Python script for generating changelog (#383) --- dev/release/README.md | 39 ++++++---- dev/release/generate-changelog.md | 114 ++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 15 deletions(-) create mode 100644 dev/release/generate-changelog.md diff --git a/dev/release/README.md b/dev/release/README.md index 1ba44fb50..e85b288f4 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -56,28 +56,37 @@ Before creating a new release: - a PR should be created and merged to update the major version number of the project - A new release branch should be created, such as `branch-0.8` -### Update CHANGELOG.md +### Change Log -Define release branch (e.g. `branch-0.8`), base version tag (e.g. `0.7.0`) and future version tag (e.g. `0.9.0`). Commits -between the base version tag and the release branch will be used to populate the changelog content. +We maintain a `CHANGELOG.md` so our users know what has been changed between releases. + +The changelog is generated using a Python script: ```bash -# create the changelog -CHANGELOG_GITHUB_TOKEN= ./dev/release/update_change_log-datafusion-python.sh main 0.8.0 0.7.0 -# review change log / edit issues and labels if needed, rerun until you are happy with the result -git commit -a -m 'Create changelog for release' +$ GITHUB_TOKEN= ./dev/release/generate-changelog.py apache/arrow-datafusion-python 24.0.0 HEAD > dev/changelog/25.0.0.md +``` + +This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for +titles starting with `feat:`, `fix:`, or `docs:` . The script will produce output similar to: + ``` +Fetching list of commits between 24.0.0 and HEAD +Fetching pull requests +Categorizing pull requests +Generating changelog content +``` + +This process is not fully automated, so there are some additional manual steps: -_If you see the error `"You have exceeded a secondary rate limit"` when running this script, try reducing the CPU -allocation to slow the process down and throttle the number of GitHub requests made per minute, by modifying the -value of the `--cpus` argument in the `update_change_log.sh` script._ +- Add the ASF header to the generated file +- Add a link to this changelog from the top-level `/datafusion/CHANGELOG.md` +- Add the following content (copy from the previous version's changelog and update as appropriate: -You can add `invalid` or `development-process` label to exclude items from -release notes. +``` +## [24.0.0](https://github.com/apache/arrow-datafusion-python/tree/24.0.0) (2023-05-06) -Send a PR to get these changes merged into the release branch (e.g. `branch-0.8`). If new commits that could change the -change log content landed in the release branch before you could merge the PR, you need to rerun the changelog update -script to regenerate the changelog and update the PR accordingly. +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/23.0.0...24.0.0) +``` ### Preparing a Release Candidate diff --git a/dev/release/generate-changelog.md b/dev/release/generate-changelog.md new file mode 100644 index 000000000..caa6ae647 --- /dev/null +++ b/dev/release/generate-changelog.md @@ -0,0 +1,114 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import sys +from github import Github +import os +import re + + +def print_pulls(repo_name, title, pulls): +if len(pulls) > 0: +print("**{}:**".format(title)) +print() +for (pull, commit) in pulls: +url = "https://github.com/{}/pull/{}".format(repo_name, pull.number) +print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, commit.author.login)) +print() + + +def generate_changelog(repo, repo_name, tag1, tag2): + + # get a list of commits between two tags + print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr) + comparison = repo.compare(tag1, tag2) + + # get the pull requests for these commits + print("Fetching pull requests", file=sys.stderr) + unique_pulls = [] + all_pulls = [] + for commit in comparison.commits: + pulls = commit.get_pulls() + for pull in pulls: + # there can be multiple commits per PR if squash merge is not being used and + # in this case we should get all the author names, but for now just pick one + if pull.number not in unique_pulls: + unique_pulls.append(pull.number) + all_pulls.append((pull, commit)) + + # we split the pulls into categories + #TODO: make categories configurable + breaking = [] + bugs = [] + docs = [] + enhancements = [] + + # categorize the pull requests based on GitHub labels + print("Categorizing pull requests", file=sys.stderr) + for (pull, commit) in all_pulls: + + # see if PR title uses Conventional Commits + cc_type = '' + cc_scope = '' + cc_breaking = '' + parts = re.findall(r'^([a-z]+)(\([a-z]+\))?(!)?:', pull.title) + if len(parts) == 1: + parts_tuple = parts[0] + cc_type = parts_tuple[0] # fix, feat, docs, chore + cc_scope = parts_tuple[1] # component within project + cc_breaking = parts_tuple[2] == '!' + + labels = [label.name for label in pull.labels] + #print(pull.number, labels, parts, file=sys.stderr) + if 'api change' in labels or cc_breaking: + breaking.append((pull, commit)) + elif 'bug' in labels or cc_type == 'fix': + bugs.append((pull, commit)) + elif 'enhancement' in labels or cc_type == 'feat': + enhancements.append((pull, commit)) + elif 'documentation' in labels or cc_type == 'docs': + docs.append((pull, commit)) + + # produce the changelog content + print("Generating changelog content", file=sys.stderr) + print_pulls(repo_name, "Breaking changes", breaking) + print_pulls(repo_name, "Implemented enhancements", enhancements) + print_pulls(repo_name, "Fixed bugs", bugs) + print_pulls(repo_name, "Documentation updates", docs) + print_pulls(repo_name, "Merged pull requests", all_pulls) + + +def cli(args=None): +"""Process command line arguments.""" +if not args: +args = sys.argv[1:] + + parser = argparse.ArgumentParser() + parser.add_argument("project", help="The project name e.g. apache/arrow-datafusion-python") + parser.add_argument("tag1", help="The previous release tag") + parser.add_argument("tag2", help="The current release tag") + args = parser.parse_args() + + token = os.getenv("GITHUB_TOKEN") + + g = Github(token) + repo = g.get_repo(args.project) + generate_changelog(repo, args.project, args.tag1, args.tag2) + +if __name__ == "__main__": +cli() \ No newline at end of file From 82b4a9553ed56cae664a7c637c7ad4d88fb21d35 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 23 May 2023 10:27:44 -0600 Subject: [PATCH 047/413] Update for DataFusion 25.0.0 (#386) * Update for DataFusion 25.0.0 * update changelog * lint --- CHANGELOG.md | 16 +- Cargo.lock | 1200 +++-------------- Cargo.toml | 14 +- ...ate-changelog.md => generate-changelog.py} | 63 +- src/expr.rs | 30 +- src/functions.rs | 17 +- src/pyarrow_filter_expression.rs | 6 +- 7 files changed, 302 insertions(+), 1044 deletions(-) rename dev/release/{generate-changelog.md => generate-changelog.py} (69%) mode change 100644 => 100755 diff --git a/CHANGELOG.md b/CHANGELOG.md index 22d7c0f61..b6d58e4a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,21 @@ under the License. --> -# Changelog +# DataFusion Python Changelog + +## [25.0.0](https://github.com/apache/arrow-datafusion-python/tree/25.0.0) (2023-05-23) + +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/24.0.0...25.0.0) + +**Merged pull requests:** + +- Prepare 24.0.0 Release [#376](https://github.com/apache/arrow-datafusion-python/pull/376) (andygrove) +- build(deps): bump uuid from 1.3.1 to 1.3.2 [#359](https://github.com/apache/arrow-datafusion-python/pull/359) (dependabot[bot]) +- build(deps): bump mimalloc from 0.1.36 to 0.1.37 [#361](https://github.com/apache/arrow-datafusion-python/pull/361) (dependabot[bot]) +- build(deps): bump regex-syntax from 0.6.29 to 0.7.1 [#334](https://github.com/apache/arrow-datafusion-python/pull/334) (dependabot[bot]) +- upgrade maturin to 0.15.1 [#379](https://github.com/apache/arrow-datafusion-python/pull/379) (Jimexist) +- Expand Expr to include RexType basic support [#378](https://github.com/apache/arrow-datafusion-python/pull/378) (jdye64) +- Add Python script for generating changelog [#383](https://github.com/apache/arrow-datafusion-python/pull/383) (andygrove) ## [24.0.0](https://github.com/apache/arrow-datafusion-python/tree/24.0.0) (2023-05-09) diff --git a/Cargo.lock b/Cargo.lock index 080f506f8..d8fa65e64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,9 +62,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.70" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" [[package]] name = "apache-avro" @@ -93,12 +93,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "arc-swap" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" - [[package]] name = "arrayref" version = "0.3.7" @@ -113,9 +107,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c107a57b5913d852da9d5a40e280e4695f2258b5b87733c13b770c63a7117287" +checksum = "218ca81dd088b102c0fd6687c72e73fad1ba93d2ef7b3cf9a1043b04b2c39dbf" dependencies = [ "ahash", "arrow-arith", @@ -136,9 +130,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace6aa3d5617c5d03041a05e01c6819428a8ddf49dd0b055df9b40fef9d96094" +checksum = "d49309fa2299ec34a709cfc9f487c41ecaead96d1ab70e21857466346bbbd690" dependencies = [ "arrow-array", "arrow-buffer", @@ -151,9 +145,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "104a04520692cc674e6afd7682f213ca41f9b13ff1873f63a5a2857a590b87b3" +checksum = "e7a27466d897d99654357a6d95dc0a26931d9e4306e60c14fc31a894edb86579" dependencies = [ "ahash", "arrow-buffer", @@ -168,9 +162,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72c875bcb9530ec403998fb0b2dc6d180a7c64563ca4bc22b90eafb84b113143" +checksum = "9405b78106a9d767c7b97c78a70ee1b23ee51a74f5188a821a716d9a85d1af2b" dependencies = [ "half", "num", @@ -178,9 +172,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6d6e18281636c8fc0b93be59834da6bf9a72bb70fd0c98ddfdaf124da466c28" +checksum = "be0ec5a79a87783dc828b7ff8f89f62880b3f553bc5f5b932a82f4a1035024b4" dependencies = [ "arrow-array", "arrow-buffer", @@ -195,9 +189,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3197dab0963a236ff8e7c82e2272535745955ac1321eb740c29f2f88b353f54e" +checksum = "350d8e55c3b2d602a0a04389bcc1da40167657143a9922a7103190603e7b7692" dependencies = [ "arrow-array", "arrow-buffer", @@ -214,9 +208,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb68113d6ecdbe8bba48b2c4042c151bf9e1c61244e45072a50250a6fc59bafe" +checksum = "c6f710d98964d2c069b8baf566130045e79e11baa105623f038a6c942f805681" dependencies = [ "arrow-buffer", "arrow-schema", @@ -226,9 +220,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eab4bbf2dd3078facb5ce0a9641316a64f42bfd8cf357e6775c8a5e6708e3a8d" +checksum = "9c99787cb8fabc187285da9e7182d22f2b80ecfac61ca0a42c4299e9eecdf903" dependencies = [ "arrow-array", "arrow-buffer", @@ -240,9 +234,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c5b650d23746a494665d914a7fa3d21d939153cff9d53bdebe39bffa88f263" +checksum = "91c95a58ce63f60d80d7a3a1222d65df0bc060b71d31353c34a8118c2a6eae7b" dependencies = [ "arrow-array", "arrow-buffer", @@ -260,9 +254,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68c6fce28e5011e30acc7466b5efcb8ed0197c396240bd2b10e167f275a3c208" +checksum = "4141e6488610cc144e841da3de5f5371488f3cf5bc6bc7b3e752c64e7639c31b" dependencies = [ "arrow-array", "arrow-buffer", @@ -275,9 +269,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f20a421f19799d8b93eb8edde5217e910fa1e2d6ceb3c529f000e57b6db144c0" +checksum = "940191a3c636c111c41e816325b0941484bf904c46de72cd9553acd1afd24d33" dependencies = [ "ahash", "arrow-array", @@ -290,18 +284,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc85923d8d6662cc66ac6602c7d1876872e671002d60993dfdf492a6badeae92" +checksum = "18c41d058b2895a12f46dfafc306ee3529ad9660406be0ab8a7967d5e27c417e" dependencies = [ - "bitflags 2.2.1", + "bitflags 2.3.1", ] [[package]] name = "arrow-select" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ab6613ce65b61d85a3410241744e84e48fbab0fe06e1251b4429d21b3470fd" +checksum = "9fcbdda2772b7e712e77444f3a71f4ee517095aceb993b35de71de41c70d9b4f" dependencies = [ "arrow-array", "arrow-buffer", @@ -312,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3008641239e884aefba66d8b8532da6af40d14296349fcc85935de4ba67b89e" +checksum = "7081c34f4b534ad320a03db79d58e38972041bb7c65686b98bbcc2f9a67a9cee" dependencies = [ "arrow-array", "arrow-buffer", @@ -322,7 +316,7 @@ dependencies = [ "arrow-schema", "arrow-select", "regex", - "regex-syntax 0.6.29", + "regex-syntax", ] [[package]] @@ -351,7 +345,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] @@ -362,7 +356,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] @@ -382,9 +376,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +checksum = "3f1e31e207a6b8fb791a38ea3105e6cb541f55e4d029902d3039a4ad07cc4105" [[package]] name = "bitflags" @@ -394,9 +388,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.2.1" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a6904aef64d73cf10ab17ebace7befb918b82164785cb89907993be7f83813" +checksum = "6776fc96284a0bb647b615056fc496d1fe1644a7ab01829818a6d91cae888b84" [[package]] name = "blake2" @@ -451,32 +445,11 @@ dependencies = [ "alloc-stdlib", ] -[[package]] -name = "bstr" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" -dependencies = [ - "memchr", - "once_cell", - "regex-automata", - "serde", -] - -[[package]] -name = "btoi" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd6407f73a9b8b6162d8a2ef999fe6afd7cc15902ebf42c5cd296addf17e0ad" -dependencies = [ - "num-traits", -] - [[package]] name = "bumpalo" -version = "3.12.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" [[package]] name = "byteorder" @@ -561,22 +534,6 @@ dependencies = [ "phf_codegen", ] -[[package]] -name = "clru" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8191fa7302e03607ff0e237d4246cc043ff5b3cb9409d995172ba3bea16b807" - -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - [[package]] name = "comfy-table" version = "6.1.4" @@ -677,50 +634,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "cxx" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" -dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" -dependencies = [ - "cc", - "codespan-reporting", - "once_cell", - "proc-macro2", - "quote", - "scratch", - "syn 2.0.15", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.15", -] - [[package]] name = "dashmap" version = "5.4.0" @@ -736,9 +649,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0404a559d5a6d8320369bb0a290b43bbc4f8622d0ef6f04bd095ace9a663f439" +checksum = "9a99e1dc003c0b310b203e4dc2aadc6531dccb37fa681630b588c5154d1b7637" dependencies = [ "ahash", "apache-avro", @@ -788,9 +701,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4653b79a55161852973760db69ea6dcd05c9966a1b588fd83028f625536a1d7f" +checksum = "362811fd9cb830877f5a538a19e5f9a1e5519486dcd6dc4f39a3d6192d2f4ba6" dependencies = [ "apache-avro", "arrow", @@ -805,9 +718,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53481c334b73c6759697919d1d05690392381145fa1890849a65b5a71a24a1ec" +checksum = "b052014fe38299019dd2e7dec6c2dd44c7ad9a5375ab4a36e5ed800700d7eb7a" dependencies = [ "dashmap", "datafusion-common", @@ -823,9 +736,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8ecd7c6605d0b4269346d03289e2ced1715a303e75e6d313dba0bafb1f823f2" +checksum = "55b590b184c6f59a65ee3ada666475ff8173bfd409c618c3c29b5abf7ef4a92c" dependencies = [ "ahash", "arrow", @@ -835,9 +748,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a7c04e94cb4aa9c323993856e18b91f690dda0358a34ab07a3fe0f14bc6600" +checksum = "543311151a4c068b2b663f07905dd4d4a605279dacb24ab9434f8f29a1917cd3" dependencies = [ "arrow", "async-trait", @@ -848,14 +761,14 @@ dependencies = [ "hashbrown 0.13.2", "itertools", "log", - "regex-syntax 0.7.1", + "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e34eb8668fee1443965fff41ba73b2956d50a07ed8dd929cfa2e839ab91da5a" +checksum = "b49cfd92e225d948a2199650e8e292ef1f0a3d62ab9b540795c3d3ba9edc2b2e" dependencies = [ "ahash", "arrow", @@ -886,7 +799,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "24.0.0" +version = "25.0.0" dependencies = [ "async-trait", "datafusion", @@ -904,8 +817,8 @@ dependencies = [ "pyo3", "pyo3-build-config", "rand", - "regex-syntax 0.7.1", - "syn 2.0.15", + "regex-syntax", + "syn 2.0.16", "tokio", "url", "uuid", @@ -913,9 +826,9 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efa800ae88dfd62ea6c58c24a1154d92937c755672f522b84e8ea6539fad369b" +checksum = "e98cdd6f0d005bce0035bebe03a35ccb7eca3cf40a9bce27e24b1e2a56941ffc" dependencies = [ "arrow", "datafusion-common", @@ -925,9 +838,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "556642ef90073e39af721362353ccce4e1f418da7a8e31c23510ed9de6eb71f2" +checksum = "9585219bbbad5287c762ad9ede044ab6c0b778fde980c9858b15d6dd5bd80f35" dependencies = [ "arrow", "arrow-schema", @@ -939,9 +852,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "24.0.0" +version = "25.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d7643a77bb446047095ec21b913adb900b71c7a2ae600f8062906dd2e5642b9" +checksum = "6f0f1a60b63cca4fb2efbf572c3f2875bccbb2dfe448dd562f103ca5d9830216" dependencies = [ "async-recursion", "chrono", @@ -955,9 +868,9 @@ dependencies = [ [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", @@ -970,12 +883,6 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" -[[package]] -name = "dunce" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" - [[package]] name = "dyn-clone" version = "1.0.11" @@ -1027,18 +934,6 @@ dependencies = [ "instant", ] -[[package]] -name = "filetime" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall 0.2.16", - "windows-sys 0.48.0", -] - [[package]] name = "fixedbitset" version = "0.4.2" @@ -1057,9 +952,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ "crc32fast", "miniz_oxide", @@ -1136,7 +1031,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] @@ -1191,548 +1086,18 @@ dependencies = [ ] [[package]] -name = "gix" -version = "0.44.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf41b61f7df395284f7a579c0fa1a7e012c5aede655174d4e91299ef1cac643" -dependencies = [ - "gix-actor", - "gix-attributes", - "gix-config", - "gix-credentials", - "gix-date", - "gix-diff", - "gix-discover", - "gix-features", - "gix-fs", - "gix-glob", - "gix-hash", - "gix-hashtable", - "gix-ignore", - "gix-index", - "gix-lock", - "gix-mailmap", - "gix-object", - "gix-odb", - "gix-pack", - "gix-path", - "gix-prompt", - "gix-ref", - "gix-refspec", - "gix-revision", - "gix-sec", - "gix-tempfile", - "gix-traverse", - "gix-url", - "gix-utils", - "gix-validate", - "gix-worktree", - "log", - "once_cell", - "signal-hook", - "smallvec", - "thiserror", - "unicode-normalization", -] - -[[package]] -name = "gix-actor" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "848efa0f1210cea8638f95691c82a46f98a74b9e3524f01d4955ebc25a8f84f3" -dependencies = [ - "bstr", - "btoi", - "gix-date", - "itoa", - "nom", - "thiserror", -] - -[[package]] -name = "gix-attributes" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3015baa01ad2122fbcaab7863c857a603eb7b7ec12ac8141207c42c6439805e2" -dependencies = [ - "bstr", - "gix-glob", - "gix-path", - "gix-quote", - "kstring", - "log", - "smallvec", - "thiserror", - "unicode-bom", -] - -[[package]] -name = "gix-bitmap" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55a95f4942360766c3880bdb2b4b57f1ef73b190fc424755e7fdf480430af618" -dependencies = [ - "thiserror", -] - -[[package]] -name = "gix-chunk" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d39583cab06464b8bf73b3f1707458270f0e7383cb24c3c9c1a16e6f792978" -dependencies = [ - "thiserror", -] - -[[package]] -name = "gix-command" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2c6f75c1e0f924de39e750880a6e21307194bb1ab773efe3c7d2d787277f8ab" -dependencies = [ - "bstr", -] - -[[package]] -name = "gix-config" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d252a0eddb6df74600d3d8872dc9fe98835a7da43110411d705b682f49d4ac1" -dependencies = [ - "bstr", - "gix-config-value", - "gix-features", - "gix-glob", - "gix-path", - "gix-ref", - "gix-sec", - "log", - "memchr", - "nom", - "once_cell", - "smallvec", - "thiserror", - "unicode-bom", -] - -[[package]] -name = "gix-config-value" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "786861e84a5793ad5f863d846de5eb064cd23b87e61ad708c8c402608202e7be" -dependencies = [ - "bitflags 2.2.1", - "bstr", - "gix-path", - "libc", - "thiserror", -] - -[[package]] -name = "gix-credentials" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4874a4fc11ffa844a3c2b87a66957bda30a73b577ef1acf15ac34df5745de5ff" -dependencies = [ - "bstr", - "gix-command", - "gix-config-value", - "gix-path", - "gix-prompt", - "gix-sec", - "gix-url", - "thiserror", -] - -[[package]] -name = "gix-date" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99056f37270715f5c7584fd8b46899a2296af9cae92463bf58b8bd1f5a78e553" -dependencies = [ - "bstr", - "itoa", - "thiserror", - "time", -] - -[[package]] -name = "gix-diff" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644a0f2768bc42d7a69289ada80c9e15c589caefc6a315d2307202df83ed1186" -dependencies = [ - "gix-hash", - "gix-object", - "imara-diff", - "thiserror", -] - -[[package]] -name = "gix-discover" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a6b61363e63e7cdaa3e6f96acb0257ebdb3d8883e21eba5930c99f07f0a5fc0" -dependencies = [ - "bstr", - "dunce", - "gix-hash", - "gix-path", - "gix-ref", - "gix-sec", - "thiserror", -] - -[[package]] -name = "gix-features" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf69b0f5c701cc3ae22d3204b671907668f6437ca88862d355eaf9bc47a4f897" -dependencies = [ - "crc32fast", - "flate2", - "gix-hash", - "libc", - "once_cell", - "prodash", - "sha1_smol", - "thiserror", - "walkdir", -] - -[[package]] -name = "gix-fs" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b37a1832f691fdc09910bd267f9a2e413737c1f9ec68c6e31f9e802616278a9" -dependencies = [ - "gix-features", -] - -[[package]] -name = "gix-glob" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07c98204529ac3f24b34754540a852593d2a4c7349008df389240266627a72a" -dependencies = [ - "bitflags 2.2.1", - "bstr", - "gix-features", - "gix-path", -] - -[[package]] -name = "gix-hash" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "078eec3ac2808cc03f0bddd2704cb661da5c5dc33b41a9d7947b141d499c7c42" -dependencies = [ - "hex", - "thiserror", -] - -[[package]] -name = "gix-hashtable" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afebb85691c6a085b114e01a27f4a61364519298c5826cb87a45c304802299bc" -dependencies = [ - "gix-hash", - "hashbrown 0.13.2", - "parking_lot", -] - -[[package]] -name = "gix-ignore" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba205b6df563e2906768bb22834c82eb46c5fdfcd86ba2c347270bc8309a05b2" -dependencies = [ - "bstr", - "gix-glob", - "gix-path", - "unicode-bom", -] - -[[package]] -name = "gix-index" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f39c1ccc8f1912cbbd5191efc28dbc5f0d0598042aa56bc09427b7c34efab3ba" -dependencies = [ - "bitflags 2.2.1", - "bstr", - "btoi", - "filetime", - "gix-bitmap", - "gix-features", - "gix-hash", - "gix-lock", - "gix-object", - "gix-traverse", - "itoa", - "memmap2", - "smallvec", - "thiserror", -] - -[[package]] -name = "gix-lock" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41b80172055c5d8017a48ddac5cc7a95421c00211047db0165c97853c4f05194" -dependencies = [ - "fastrand", - "gix-tempfile", - "thiserror", -] - -[[package]] -name = "gix-mailmap" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8856cec3bdc3610c06970d28b6cb20a0c6621621cf9a8ec48cbd23f2630f362" -dependencies = [ - "bstr", - "gix-actor", - "thiserror", -] - -[[package]] -name = "gix-object" -version = "0.29.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9bb30ce0818d37096daa29efe361a4bc6dd0b51a5726598898be7e9a40a01e1" -dependencies = [ - "bstr", - "btoi", - "gix-actor", - "gix-features", - "gix-hash", - "gix-validate", - "hex", - "itoa", - "nom", - "smallvec", - "thiserror", -] - -[[package]] -name = "gix-odb" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bca2f324aa67672b6d0f2c0fa93f96eb6a7029d260e4c1df5dce3c015f5e5add" -dependencies = [ - "arc-swap", - "gix-features", - "gix-hash", - "gix-object", - "gix-pack", - "gix-path", - "gix-quote", - "parking_lot", - "tempfile", - "thiserror", -] - -[[package]] -name = "gix-pack" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "164a515900a83257ae4aa80e741655bee7a2e39113fb535d7a5ac623b445ff20" -dependencies = [ - "clru", - "gix-chunk", - "gix-diff", - "gix-features", - "gix-hash", - "gix-hashtable", - "gix-object", - "gix-path", - "gix-tempfile", - "gix-traverse", - "memmap2", - "parking_lot", - "smallvec", - "thiserror", -] - -[[package]] -name = "gix-path" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc78f47095a0c15aea0e66103838f0748f4494bf7a9555dfe0f00425400396c" -dependencies = [ - "bstr", - "home", - "once_cell", - "thiserror", -] - -[[package]] -name = "gix-prompt" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "330d11fdf88fff3366c2491efde2f3e454958efe7d5ddf60272e8fb1d944bb01" -dependencies = [ - "gix-command", - "gix-config-value", - "parking_lot", - "rustix", - "thiserror", -] - -[[package]] -name = "gix-quote" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a282f5a8d9ee0b09ec47390ac727350c48f2f5c76d803cd8da6b3e7ad56e0bcb" -dependencies = [ - "bstr", - "btoi", - "thiserror", -] - -[[package]] -name = "gix-ref" -version = "0.29.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e03989e9d49954368e1b526578230fc7189d1634acdfbe79e9ba1de717e15d5" -dependencies = [ - "gix-actor", - "gix-features", - "gix-fs", - "gix-hash", - "gix-lock", - "gix-object", - "gix-path", - "gix-tempfile", - "gix-validate", - "memmap2", - "nom", - "thiserror", -] - -[[package]] -name = "gix-refspec" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6ea733820df67e4cd7797deb12727905824d8f5b7c59d943c456d314475892" -dependencies = [ - "bstr", - "gix-hash", - "gix-revision", - "gix-validate", - "smallvec", - "thiserror", -] - -[[package]] -name = "gix-revision" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "810f35e9afeccca999d5d348b239f9c162353127d2e13ff3240e31b919e35476" -dependencies = [ - "bstr", - "gix-date", - "gix-hash", - "gix-hashtable", - "gix-object", - "thiserror", -] - -[[package]] -name = "gix-sec" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "794520043d5a024dfeac335c6e520cb616f6963e30dab995892382e998c12897" -dependencies = [ - "bitflags 2.2.1", - "gix-path", - "libc", - "windows", -] - -[[package]] -name = "gix-tempfile" -version = "5.0.2" +name = "git2" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2ceb30a610e3f5f2d5f9a5114689fde507ba9417705a8cf3429604275b2153c" +checksum = "8b7905cdfe33d31a88bb2e8419ddd054451f5432d1da9eaf2ac7804ee1ea12d5" dependencies = [ + "bitflags 1.3.2", "libc", - "once_cell", - "parking_lot", - "signal-hook", - "signal-hook-registry", - "tempfile", -] - -[[package]] -name = "gix-traverse" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5be1e807f288c33bb005075111886cceb43ed8a167b3182a0f62c186e2a0dd1" -dependencies = [ - "gix-hash", - "gix-hashtable", - "gix-object", - "thiserror", -] - -[[package]] -name = "gix-url" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc77f89054297cc81491e31f1bab4027e554b5ef742a44bd7035db9a0f78b76" -dependencies = [ - "bstr", - "gix-features", - "gix-path", - "home", - "thiserror", + "libgit2-sys", + "log", "url", ] -[[package]] -name = "gix-utils" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c10b69beac219acb8df673187a1f07dde2d74092f974fb3f9eb385aeb667c909" -dependencies = [ - "fastrand", -] - -[[package]] -name = "gix-validate" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd629d3680773e1785e585d76fd4295b740b559cad9141517300d99a0c8c049" -dependencies = [ - "bstr", - "thiserror", -] - -[[package]] -name = "gix-worktree" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69eaff0ae973a9d37c40f02ae5ae50fa726c8fc2fd3ab79d0a19eb61975aafa" -dependencies = [ - "bstr", - "filetime", - "gix-attributes", - "gix-features", - "gix-fs", - "gix-glob", - "gix-hash", - "gix-ignore", - "gix-index", - "gix-object", - "gix-path", - "io-close", - "thiserror", -] - [[package]] name = "glob" version = "0.3.1" @@ -1741,9 +1106,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f8a914c2987b688368b5138aa05321db91f4090cf26118185672ad588bce21" +checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782" dependencies = [ "bytes", "fnv", @@ -1804,21 +1169,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "home" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "747309b4b440c06d57b0b25f2aee03ee9b5e5397d288c60e21fc709bb98a7408" -dependencies = [ - "winapi", -] - [[package]] name = "http" version = "0.2.9" @@ -1879,9 +1229,9 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.23.2" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" +checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7" dependencies = [ "http", "hyper", @@ -1906,12 +1256,11 @@ dependencies = [ [[package]] name = "iana-time-zone-haiku" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ - "cxx", - "cxx-build", + "cc", ] [[package]] @@ -1924,16 +1273,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "imara-diff" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e98c1d0ad70fc91b8b9654b1f33db55e59579d3b3de2bffdced0fdb810570cb8" -dependencies = [ - "ahash", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "1.9.3" @@ -1965,16 +1304,6 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" -[[package]] -name = "io-close" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cadcf447f06744f8ce713d2d6239bb5bde2c357a452397a9ed90c625da390bc" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "io-lifetimes" version = "1.0.10" @@ -2018,22 +1347,13 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790" dependencies = [ "wasm-bindgen", ] -[[package]] -name = "kstring" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3066350882a1cd6d950d055997f379ac37fd39f81cd4d8ed186032eb3c5747" -dependencies = [ - "static_assertions", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -2106,15 +1426,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.142" +version = "0.2.144" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" [[package]] name = "libflate" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97822bf791bd4d5b403713886a5fbe8bf49520fe78e323b0dc480ca1a03e50b0" +checksum = "5ff4ae71b685bbad2f2f391fe74f6b7659a34871c08b210fdc039e43bee07d18" dependencies = [ "adler32", "crc32fast", @@ -2130,11 +1450,23 @@ dependencies = [ "rle-decode-fast", ] +[[package]] +name = "libgit2-sys" +version = "0.15.1+1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb4577bde8cdfc7d6a2a4bcb7b049598597de33ffd337276e9c7db6cd4a2cee7" +dependencies = [ + "cc", + "libc", + "libz-sys", + "pkg-config", +] + [[package]] name = "libm" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "libmimalloc-sys" @@ -2147,19 +1479,22 @@ dependencies = [ ] [[package]] -name = "link-cplusplus" -version = "1.0.8" +name = "libz-sys" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +checksum = "56ee889ecc9568871456d42f603d6a0ce59ff328d291063a45cbdf0036baf6db" dependencies = [ "cc", + "libc", + "pkg-config", + "vcpkg", ] [[package]] name = "linux-raw-sys" -version = "0.3.4" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36eb31c1778188ae1e64398743890d0877fef36d11521ac60406b42016e8c2cf" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "lock_api" @@ -2226,15 +1561,6 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" -[[package]] -name = "memmap2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] - [[package]] name = "memoffset" version = "0.8.0" @@ -2259,17 +1585,11 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" -version = "0.6.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" dependencies = [ "adler", ] @@ -2292,16 +1612,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num" version = "0.4.0" @@ -2389,15 +1699,6 @@ dependencies = [ "libc", ] -[[package]] -name = "num_threads" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" -dependencies = [ - "libc", -] - [[package]] name = "object_store" version = "0.5.6" @@ -2466,9 +1767,9 @@ dependencies = [ [[package]] name = "parquet" -version = "38.0.0" +version = "39.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cbd51311f8d9ff3d2697b1522b18a588782e097d313a1a278b0faf2ccf2d3f6" +checksum = "b0a1e6fa27f09ebddba280f5966ef435f3ac4d74cfc3ffe370fd3fd59c2e004d" dependencies = [ "ahash", "arrow-array", @@ -2581,9 +1882,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" [[package]] name = "ppv-lite86" @@ -2593,12 +1894,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" +checksum = "617feabb81566b593beb4886fb8c1f38064169dae4dccad0e3220160c3b37203" dependencies = [ "proc-macro2", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] @@ -2609,19 +1910,13 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8" dependencies = [ "unicode-ident", ] -[[package]] -name = "prodash" -version = "23.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9516b775656bc3e8985e19cd4b8c0c0de045095074e453d2c0a513b5f978392d" - [[package]] name = "prost" version = "0.11.9" @@ -2761,9 +2056,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" dependencies = [ "proc-macro2", ] @@ -2818,38 +2113,26 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" +checksum = "d1a59b5d8e97dee33696bf13c5ba8ab85341c002922fba050069326b9c498974" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.1", + "regex-syntax", ] -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - [[package]] name = "regex-syntax" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] name = "regress" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d995d590bd8ec096d1893f414bf3f5e8b0ee4c9eed9a5642b9766ef2c8e2e8e9" +checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb" dependencies = [ "hashbrown 0.13.2", "memchr", @@ -2857,9 +2140,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.16" +version = "0.11.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" +checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" dependencies = [ "base64", "bytes", @@ -2928,9 +2211,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.14" +version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b864d3c18a5785a05953adeed93e2dca37ed30f18e69bba9f30079d51f363f" +checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ "bitflags 1.3.2", "errno", @@ -2942,14 +2225,14 @@ dependencies = [ [[package]] name = "rustls" -version = "0.20.8" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" +checksum = "c911ba11bc8433e811ce56fde130ccf32f5127cab0e0194e9c68c5a5b671791e" dependencies = [ "log", "ring", + "rustls-webpki", "sct", - "webpki", ] [[package]] @@ -2961,6 +2244,16 @@ dependencies = [ "base64", ] +[[package]] +name = "rustls-webpki" +version = "0.100.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.12" @@ -3012,12 +2305,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "scratch" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" - [[package]] name = "sct" version = "0.7.0" @@ -3042,22 +2329,22 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.160" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" +checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.160" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" +checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] @@ -3084,13 +2371,14 @@ dependencies = [ [[package]] name = "serde_tokenstream" -version = "0.1.7" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "797ba1d80299b264f3aac68ab5d12e5825a561749db4df7cd7c8083900c5d4e9" +checksum = "8a00ffd23fd882d096f09fcaae2a9de8329a328628e86027e049ee051dc1621f" dependencies = [ "proc-macro2", + "quote", "serde", - "syn 1.0.109", + "syn 2.0.16", ] [[package]] @@ -3118,12 +2406,6 @@ dependencies = [ "unsafe-libyaml", ] -[[package]] -name = "sha1_smol" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" - [[package]] name = "sha2" version = "0.10.6" @@ -3135,25 +2417,6 @@ dependencies = [ "digest", ] -[[package]] -name = "signal-hook" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" -dependencies = [ - "libc", - "signal-hook-registry", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" -dependencies = [ - "libc", -] - [[package]] name = "siphasher" version = "0.3.10" @@ -3267,11 +2530,11 @@ dependencies = [ [[package]] name = "substrait" -version = "0.8.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54cd43d44620f716d55d46b998b3cf1baab2935aaa8adc14e3d3d9a465ddae15" +checksum = "9df5d9e071804204172dc77e707c363f187e7f6566f9c78e5100c9a8f5ea434e" dependencies = [ - "gix", + "git2", "heck", "prettyplease", "prost", @@ -3283,16 +2546,16 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.15", + "syn 2.0.16", "typify", "walkdir", ] [[package]] name = "subtle" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" [[package]] name = "syn" @@ -3307,9 +2570,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" dependencies = [ "proc-macro2", "quote", @@ -3318,9 +2581,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.6" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5" +checksum = "fd1ba337640d60c3e96bc6f0638a939b9c9a7f2c316a1598c279828b3d1dc8c5" [[package]] name = "tempfile" @@ -3335,15 +2598,6 @@ dependencies = [ "windows-sys 0.45.0", ] -[[package]] -name = "termcolor" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" -dependencies = [ - "winapi-util", -] - [[package]] name = "thiserror" version = "1.0.40" @@ -3361,7 +2615,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] @@ -3375,35 +2629,6 @@ dependencies = [ "ordered-float", ] -[[package]] -name = "time" -version = "0.3.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" -dependencies = [ - "itoa", - "libc", - "num_threads", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" - -[[package]] -name = "time-macros" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" -dependencies = [ - "time-core", -] - [[package]] name = "tiny-keccak" version = "2.0.2" @@ -3430,9 +2655,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.27.0" +version = "1.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" +checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105" dependencies = [ "autocfg", "bytes", @@ -3443,36 +2668,35 @@ dependencies = [ "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] name = "tokio-rustls" -version = "0.23.4" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" dependencies = [ "rustls", "tokio", - "webpki", ] [[package]] name = "tokio-stream" -version = "0.1.12" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" dependencies = [ "futures-core", "pin-project-lite", @@ -3481,9 +2705,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" dependencies = [ "bytes", "futures-core", @@ -3519,14 +2743,14 @@ checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] name = "tracing-core" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" dependencies = [ "once_cell", ] @@ -3566,9 +2790,9 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" -version = "0.0.11" +version = "0.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bfde96849e25d7feef1bbf652e9cfc51deb63203fdc07b115b8bc3bcfe20b9" +checksum = "a6658d09e71bfe59e7987dc95ee7f71809fdb5793ab0cdc1503cc0073990484d" dependencies = [ "typify-impl", "typify-macro", @@ -3576,9 +2800,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.11" +version = "0.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95d27d749378ceab6ec22188ed7ad102205c89ddb92ab662371c850ffc71aa1a" +checksum = "34d3bb47587b13edf526d6ed02bf360ecefe083ab47a4ef29fc43112828b2bef" dependencies = [ "heck", "log", @@ -3587,16 +2811,16 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 1.0.109", + "syn 2.0.16", "thiserror", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.0.11" +version = "0.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35db6fc2bd9220ecdac6eeb88158824b83610de3dda0c6d0f2142b49efd858b0" +checksum = "d3f7e627c18be12d53bc1f261830b9c2763437b6a86ac57293b9085af2d32ffe" dependencies = [ "proc-macro2", "quote", @@ -3604,7 +2828,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 1.0.109", + "syn 2.0.16", "typify-impl", ] @@ -3614,12 +2838,6 @@ version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" -[[package]] -name = "unicode-bom" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98e90c70c9f0d4d1ee6d0a7d04aa06cb9bbd53d8cfbdd62a0269a7c2eb640552" - [[package]] name = "unicode-ident" version = "1.0.8" @@ -3678,14 +2896,20 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.2" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dad5567ad0cf5b760e5665964bec1b47dfd077ba8a2544b513f3556d3d239a2" +checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" dependencies = [ "getrandom", "serde", ] +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.4" @@ -3720,9 +2944,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -3730,24 +2954,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.16", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.34" +version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e" dependencies = [ "cfg-if", "js-sys", @@ -3757,9 +2981,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3767,22 +2991,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.16", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93" [[package]] name = "wasm-streams" @@ -3799,9 +3023,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.61" +version = "0.3.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index cad16cc86..8745d9263 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "24.0.0" +version = "25.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { version = "24.0.0" , features = ["pyarrow", "avro"] } -datafusion-common = { version = "24.0.0", features = ["pyarrow"] } -datafusion-expr = "24.0.0" -datafusion-optimizer = "24.0.0" -datafusion-sql = "24.0.0" -datafusion-substrait = "24.0.0" +datafusion = { version = "25.0.0" , features = ["pyarrow", "avro"] } +datafusion-common = { version = "25.0.0", features = ["pyarrow"] } +datafusion-expr = "25.0.0" +datafusion-optimizer = "25.0.0" +datafusion-sql = "25.0.0" +datafusion-substrait = "25.0.0" prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } diff --git a/dev/release/generate-changelog.md b/dev/release/generate-changelog.py old mode 100644 new mode 100755 similarity index 69% rename from dev/release/generate-changelog.md rename to dev/release/generate-changelog.py index caa6ae647..58e6781a3 --- a/dev/release/generate-changelog.md +++ b/dev/release/generate-changelog.py @@ -23,19 +23,27 @@ def print_pulls(repo_name, title, pulls): -if len(pulls) > 0: -print("**{}:**".format(title)) -print() -for (pull, commit) in pulls: -url = "https://github.com/{}/pull/{}".format(repo_name, pull.number) -print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, commit.author.login)) -print() + if len(pulls) > 0: + print("**{}:**".format(title)) + print() + for (pull, commit) in pulls: + url = "https://github.com/{}/pull/{}".format( + repo_name, pull.number + ) + print( + "- {} [#{}]({}) ({})".format( + pull.title, pull.number, url, commit.author.login + ) + ) + print() def generate_changelog(repo, repo_name, tag1, tag2): # get a list of commits between two tags - print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr) + print( + f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr + ) comparison = repo.compare(tag1, tag2) # get the pull requests for these commits @@ -52,7 +60,7 @@ def generate_changelog(repo, repo_name, tag1, tag2): all_pulls.append((pull, commit)) # we split the pulls into categories - #TODO: make categories configurable + # TODO: make categories configurable breaking = [] bugs = [] docs = [] @@ -63,25 +71,25 @@ def generate_changelog(repo, repo_name, tag1, tag2): for (pull, commit) in all_pulls: # see if PR title uses Conventional Commits - cc_type = '' - cc_scope = '' - cc_breaking = '' - parts = re.findall(r'^([a-z]+)(\([a-z]+\))?(!)?:', pull.title) + cc_type = "" + # cc_scope = '' + cc_breaking = "" + parts = re.findall(r"^([a-z]+)(\([a-z]+\))?(!)?:", pull.title) if len(parts) == 1: parts_tuple = parts[0] - cc_type = parts_tuple[0] # fix, feat, docs, chore - cc_scope = parts_tuple[1] # component within project - cc_breaking = parts_tuple[2] == '!' + cc_type = parts_tuple[0] # fix, feat, docs, chore + # cc_scope = parts_tuple[1] # component within project + cc_breaking = parts_tuple[2] == "!" labels = [label.name for label in pull.labels] - #print(pull.number, labels, parts, file=sys.stderr) - if 'api change' in labels or cc_breaking: + # print(pull.number, labels, parts, file=sys.stderr) + if "api change" in labels or cc_breaking: breaking.append((pull, commit)) - elif 'bug' in labels or cc_type == 'fix': + elif "bug" in labels or cc_type == "fix": bugs.append((pull, commit)) - elif 'enhancement' in labels or cc_type == 'feat': + elif "enhancement" in labels or cc_type == "feat": enhancements.append((pull, commit)) - elif 'documentation' in labels or cc_type == 'docs': + elif "documentation" in labels or cc_type == "docs": docs.append((pull, commit)) # produce the changelog content @@ -94,12 +102,14 @@ def generate_changelog(repo, repo_name, tag1, tag2): def cli(args=None): -"""Process command line arguments.""" -if not args: -args = sys.argv[1:] + """Process command line arguments.""" + if not args: + args = sys.argv[1:] parser = argparse.ArgumentParser() - parser.add_argument("project", help="The project name e.g. apache/arrow-datafusion-python") + parser.add_argument( + "project", help="The project name e.g. apache/arrow-datafusion-python" + ) parser.add_argument("tag1", help="The previous release tag") parser.add_argument("tag2", help="The current release tag") args = parser.parse_args() @@ -110,5 +120,6 @@ def cli(args=None): repo = g.get_repo(args.project) generate_changelog(repo, args.project, args.tag1, args.tag2) + if __name__ == "__main__": -cli() \ No newline at end of file + cli() diff --git a/src/expr.rs b/src/expr.rs index c002b3291..1a9adf892 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -15,17 +15,22 @@ // specific language governing permissions and limitations // under the License. -use datafusion_common::DFField; -use datafusion_expr::expr::{AggregateFunction, Sort, WindowFunction}; -use datafusion_expr::utils::exprlist_to_fields; use pyo3::{basic::CompareOp, prelude::*}; use std::convert::{From, Into}; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::PyArrowType; +use datafusion::scalar::ScalarValue; +use datafusion_common::DFField; use datafusion_expr::{ - col, lit, Between, BinaryExpr, Case, Cast, Expr, GetIndexedField, Like, LogicalPlan, Operator, - TryCast, + col, + expr::{ + AggregateFunction, AggregateUDF, InList, InSubquery, ScalarFunction, ScalarUDF, Sort, + WindowFunction, + }, + lit, + utils::exprlist_to_fields, + Between, BinaryExpr, Case, Cast, Expr, GetIndexedField, Like, LogicalPlan, Operator, TryCast, }; use crate::common::data_type::{DataTypeMap, RexType}; @@ -35,7 +40,6 @@ use crate::expr::binary_expr::PyBinaryExpr; use crate::expr::column::PyColumn; use crate::expr::literal::PyLiteral; use crate::sql::logical::PyLogicalPlan; -use datafusion::scalar::ScalarValue; use self::alias::PyAlias; use self::bool_expr::{ @@ -360,13 +364,13 @@ impl PyExpr { | Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) | Expr::Sort(Sort { expr, .. }) - | Expr::InSubquery { expr, .. } => Ok(vec![PyExpr::from(*expr.clone())]), + | Expr::InSubquery(InSubquery { expr, .. }) => Ok(vec![PyExpr::from(*expr.clone())]), // Expr variants containing a collection of Expr(s) for operands Expr::AggregateFunction(AggregateFunction { args, .. }) - | Expr::AggregateUDF { args, .. } - | Expr::ScalarFunction { args, .. } - | Expr::ScalarUDF { args, .. } + | Expr::AggregateUDF(AggregateUDF { args, .. }) + | Expr::ScalarFunction(ScalarFunction { args, .. }) + | Expr::ScalarUDF(ScalarUDF { args, .. }) | Expr::WindowFunction(WindowFunction { args, .. }) => { Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) } @@ -394,7 +398,7 @@ impl PyExpr { Ok(operands) } - Expr::InList { expr, list, .. } => { + Expr::InList(InList { expr, list, .. }) => { let mut operands: Vec = vec![PyExpr::from(*expr.clone())]; for list_elem in list { operands.push(PyExpr::from(list_elem.clone())); @@ -451,8 +455,8 @@ impl PyExpr { op, right: _, }) => format!("{op}"), - Expr::ScalarFunction { fun, args: _ } => format!("{fun}"), - Expr::ScalarUDF { fun, .. } => fun.name.clone(), + Expr::ScalarFunction(ScalarFunction { fun, args: _ }) => format!("{fun}"), + Expr::ScalarUDF(ScalarUDF { fun, .. }) => fun.name.clone(), Expr::Cast { .. } => "cast".to_string(), Expr::Between { .. } => "between".to_string(), Expr::Case { .. } => "case".to_string(), diff --git a/src/functions.rs b/src/functions.rs index 8acffeb55..c82361a3b 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -18,10 +18,13 @@ use pyo3::{prelude::*, wrap_pyfunction}; use datafusion_common::Column; -use datafusion_expr::expr::AggregateFunction; -use datafusion_expr::expr::{Sort, WindowFunction}; -use datafusion_expr::window_function::find_df_window_func; -use datafusion_expr::{aggregate_function, lit, BuiltinScalarFunction, Expr, WindowFrame}; +use datafusion_expr::{ + aggregate_function, + expr::{AggregateFunction, ScalarFunction, Sort, WindowFunction}, + lit, + window_function::find_df_window_func, + BuiltinScalarFunction, Expr, WindowFrame, +}; use crate::errors::DataFusionError; use crate::expr::PyExpr; @@ -106,6 +109,7 @@ fn count_star() -> PyResult { args: vec![lit(1)], distinct: false, filter: None, + order_by: None, }), }) } @@ -153,10 +157,10 @@ macro_rules! scalar_function { #[pyfunction] #[pyo3(signature = (*args))] fn $NAME(args: Vec) -> PyExpr { - let expr = datafusion_expr::Expr::ScalarFunction { + let expr = datafusion_expr::Expr::ScalarFunction(ScalarFunction { fun: BuiltinScalarFunction::$FUNC, args: args.into_iter().map(|e| e.into()).collect(), - }; + }); expr.into() } }; @@ -176,6 +180,7 @@ macro_rules! aggregate_function { args: args.into_iter().map(|e| e.into()).collect(), distinct, filter: None, + order_by: None, }); expr.into() } diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs index d35101237..64124fb12 100644 --- a/src/pyarrow_filter_expression.rs +++ b/src/pyarrow_filter_expression.rs @@ -22,7 +22,7 @@ use std::convert::TryFrom; use std::result::Result; use datafusion_common::{Column, ScalarValue}; -use datafusion_expr::{Between, BinaryExpr, Expr, Operator}; +use datafusion_expr::{expr::InList, Between, BinaryExpr, Expr, Operator}; use crate::errors::DataFusionError; @@ -161,11 +161,11 @@ impl TryFrom<&Expr> for PyArrowFilterExpression { Ok(if *negated { invert.call1((ret,))? } else { ret }) } - Expr::InList { + Expr::InList(InList { expr, list, negated, - } => { + }) => { let expr = PyArrowFilterExpression::try_from(expr.as_ref())? .0 .into_ref(py); From d912db5ea6ba5fc373a8412e505399bfe21c9a90 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 26 May 2023 18:44:34 -0400 Subject: [PATCH 048/413] Add Expr::Case when_then_else support to rex_call_operands function (#388) * Add Expr::Case when_then_else support to rex_call_operands function * Update gitignore and formatting * Update gitignore and formatting --- .gitignore | 5 ++++- src/expr.rs | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 1d0a84a43..365b89d5c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ __pycache__/ *.py[cod] *$py.class +# Python dist ignore +dist + # C extensions *.so @@ -24,4 +27,4 @@ apache-rat-*.jar .env CHANGELOG.md.bak -docs/mdbook/book \ No newline at end of file +docs/mdbook/book diff --git a/src/expr.rs b/src/expr.rs index 1a9adf892..819edff2a 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -384,14 +384,21 @@ impl PyExpr { let mut operands: Vec = Vec::new(); if let Some(e) = expr { - operands.push(PyExpr::from(*e.clone())); + for (when, then) in when_then_expr { + operands.push(PyExpr::from(Expr::BinaryExpr(BinaryExpr::new( + Box::new(*e.clone()), + Operator::Eq, + Box::new(*when.clone()), + )))); + operands.push(PyExpr::from(*then.clone())); + } + } else { + for (when, then) in when_then_expr { + operands.push(PyExpr::from(*when.clone())); + operands.push(PyExpr::from(*then.clone())); + } }; - for (when, then) in when_then_expr { - operands.push(PyExpr::from(*when.clone())); - operands.push(PyExpr::from(*then.clone())); - } - if let Some(e) = else_expr { operands.push(PyExpr::from(*e.clone())); }; From 5664a1e38f8b45af15afd60bcef841df01da655e Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 26 May 2023 18:45:05 -0400 Subject: [PATCH 049/413] Introduce BaseSessionContext abstract class (#390) --- datafusion/context.py | 45 +++++++++++++++++++++++++++ datafusion/cudf.py | 14 ++++++--- datafusion/pandas.py | 14 ++++++--- datafusion/polars.py | 14 ++++++--- docs/mdbook/src/usage/create-table.md | 2 +- examples/sql-on-cudf.py | 2 +- examples/sql-on-pandas.py | 2 +- examples/sql-on-polars.py | 2 +- 8 files changed, 76 insertions(+), 19 deletions(-) create mode 100644 datafusion/context.py diff --git a/datafusion/context.py b/datafusion/context.py new file mode 100644 index 000000000..aa9c9a8af --- /dev/null +++ b/datafusion/context.py @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC, abstractmethod + + +class BaseSessionContext(ABC): + """ + Abstraction defining all methods, properties, and common functionality + shared amongst implementations using DataFusion as their SQL Parser/Engine + """ + + @abstractmethod + def register_table( + self, + table_name: str, + path: str, + **kwargs, + ): + pass + + # TODO: Remove abstraction, this functionality can be shared + # between all implementing classes since it just prints the + # logical plan from DataFusion + @abstractmethod + def explain(self, sql): + pass + + @abstractmethod + def sql(self, sql): + pass diff --git a/datafusion/cudf.py b/datafusion/cudf.py index d5f02156f..594e5efea 100644 --- a/datafusion/cudf.py +++ b/datafusion/cudf.py @@ -17,18 +17,15 @@ import cudf import datafusion +from datafusion.context import BaseSessionContext from datafusion.expr import Projection, TableScan, Column -class SessionContext: +class SessionContext(BaseSessionContext): def __init__(self): self.datafusion_ctx = datafusion.SessionContext() self.parquet_tables = {} - def register_parquet(self, name, path): - self.parquet_tables[name] = path - self.datafusion_ctx.register_parquet(name, path) - def to_cudf_expr(self, expr): # get Python wrapper for logical expression expr = expr.to_variant() @@ -55,6 +52,13 @@ def to_cudf_df(self, plan): "unsupported logical operator: {}".format(type(node)) ) + def register_table(self, name, path, **kwargs): + self.parquet_tables[name] = path + self.datafusion_ctx.register_parquet(name, path) + + def explain(self, sql): + super.explain() + def sql(self, sql): datafusion_df = self.datafusion_ctx.sql(sql) plan = datafusion_df.logical_plan() diff --git a/datafusion/pandas.py b/datafusion/pandas.py index f8e56512b..935d9619b 100644 --- a/datafusion/pandas.py +++ b/datafusion/pandas.py @@ -17,18 +17,15 @@ import pandas as pd import datafusion +from datafusion.context import BaseSessionContext from datafusion.expr import Projection, TableScan, Column -class SessionContext: +class SessionContext(BaseSessionContext): def __init__(self): self.datafusion_ctx = datafusion.SessionContext() self.parquet_tables = {} - def register_parquet(self, name, path): - self.parquet_tables[name] = path - self.datafusion_ctx.register_parquet(name, path) - def to_pandas_expr(self, expr): # get Python wrapper for logical expression expr = expr.to_variant() @@ -55,6 +52,13 @@ def to_pandas_df(self, plan): "unsupported logical operator: {}".format(type(node)) ) + def register_table(self, name, path, **kwargs): + self.parquet_tables[name] = path + self.datafusion_ctx.register_parquet(name, path) + + def explain(self, sql): + super.explain() + def sql(self, sql): datafusion_df = self.datafusion_ctx.sql(sql) plan = datafusion_df.logical_plan() diff --git a/datafusion/polars.py b/datafusion/polars.py index a1bafbef8..bbc1fd7c2 100644 --- a/datafusion/polars.py +++ b/datafusion/polars.py @@ -17,19 +17,16 @@ import polars import datafusion +from datafusion.context import BaseSessionContext from datafusion.expr import Projection, TableScan, Aggregate from datafusion.expr import Column, AggregateFunction -class SessionContext: +class SessionContext(BaseSessionContext): def __init__(self): self.datafusion_ctx = datafusion.SessionContext() self.parquet_tables = {} - def register_parquet(self, name, path): - self.parquet_tables[name] = path - self.datafusion_ctx.register_parquet(name, path) - def to_polars_expr(self, expr): # get Python wrapper for logical expression expr = expr.to_variant() @@ -78,6 +75,13 @@ def to_polars_df(self, plan): "unsupported logical operator: {}".format(type(node)) ) + def register_table(self, name, path, **kwargs): + self.parquet_tables[name] = path + self.datafusion_ctx.register_parquet(name, path) + + def explain(self, sql): + super.explain() + def sql(self, sql): datafusion_df = self.datafusion_ctx.sql(sql) plan = datafusion_df.logical_plan() diff --git a/docs/mdbook/src/usage/create-table.md b/docs/mdbook/src/usage/create-table.md index 332863a16..98870fac0 100644 --- a/docs/mdbook/src/usage/create-table.md +++ b/docs/mdbook/src/usage/create-table.md @@ -55,5 +55,5 @@ ctx.register_csv("csv_1e8", "G1_1e8_1e2_0_0.csv") You can read a Parquet file into a DataFusion DataFrame. Here's how to read the `yellow_tripdata_2021-01.parquet` file into a table named `taxi`. ```python -ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet") +ctx.register_table("taxi", "yellow_tripdata_2021-01.parquet") ``` diff --git a/examples/sql-on-cudf.py b/examples/sql-on-cudf.py index 999756fc8..b64d8f046 100644 --- a/examples/sql-on-cudf.py +++ b/examples/sql-on-cudf.py @@ -19,6 +19,6 @@ ctx = SessionContext() -ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet") +ctx.register_table("taxi", "yellow_tripdata_2021-01.parquet") df = ctx.sql("select passenger_count from taxi") print(df) diff --git a/examples/sql-on-pandas.py b/examples/sql-on-pandas.py index 0efd77631..e3312a201 100644 --- a/examples/sql-on-pandas.py +++ b/examples/sql-on-pandas.py @@ -19,6 +19,6 @@ ctx = SessionContext() -ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet") +ctx.register_table("taxi", "yellow_tripdata_2021-01.parquet") df = ctx.sql("select passenger_count from taxi") print(df) diff --git a/examples/sql-on-polars.py b/examples/sql-on-polars.py index c208114c1..dd7a9e021 100644 --- a/examples/sql-on-polars.py +++ b/examples/sql-on-polars.py @@ -19,7 +19,7 @@ ctx = SessionContext() -ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet") +ctx.register_table("taxi", "yellow_tripdata_2021-01.parquet") df = ctx.sql( "select passenger_count, count(*) from taxi group by passenger_count" ) From 931cabc629a28bb47c030e7e17de59a19feb38dd Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 30 May 2023 14:51:27 -0400 Subject: [PATCH 050/413] CRUD Schema support for `BaseSessionContext` (#392) * checkpoint commit * Introduce BaseSessionContext abstract class * Introduce abstract methods for CRUD schema operations * Clean up schema.rs file --- .gitignore | 3 + datafusion/context.py | 52 ++++++++++ datafusion/cudf.py | 42 +++++++- datafusion/pandas.py | 32 +++++- datafusion/polars.py | 22 +++- src/common.rs | 7 ++ src/common/function.rs | 55 ++++++++++ src/common/schema.rs | 222 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 428 insertions(+), 7 deletions(-) create mode 100644 src/common/function.rs create mode 100644 src/common/schema.rs diff --git a/.gitignore b/.gitignore index 365b89d5c..0030b907b 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,9 @@ dist # C extensions *.so +# Python dist +dist + # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: diff --git a/datafusion/context.py b/datafusion/context.py index aa9c9a8af..a36462123 100644 --- a/datafusion/context.py +++ b/datafusion/context.py @@ -16,6 +16,9 @@ # under the License. from abc import ABC, abstractmethod +from typing import Dict + +from datafusion.common import SqlSchema class BaseSessionContext(ABC): @@ -24,6 +27,55 @@ class BaseSessionContext(ABC): shared amongst implementations using DataFusion as their SQL Parser/Engine """ + DEFAULT_CATALOG_NAME = "root" + DEFAULT_SCHEMA_NAME = "datafusion" + + @abstractmethod + def create_schema( + self, + schema_name: str, + **kwargs, + ): + """ + Creates/Registers a logical container that holds database + objects such as tables, views, indexes, and other + related objects. It provides a way to group related database + objects together. A schema can be owned by a database + user and can be used to separate objects in different + logical groups for easy management. + """ + pass + + @abstractmethod + def update_schema( + self, + schema_name: str, + new_schema: SqlSchema, + **kwargs, + ): + """ + Updates an existing schema in the SessionContext + """ + pass + + @abstractmethod + def drop_schema( + self, + schema_name: str, + **kwargs, + ): + """ + Drops the specified Schema, based on name, from the current context + """ + pass + + @abstractmethod + def show_schemas(self, **kwargs) -> Dict[str, SqlSchema]: + """ + Return all schemas in the current SessionContext impl. + """ + pass + @abstractmethod def register_table( self, diff --git a/datafusion/cudf.py b/datafusion/cudf.py index 594e5efea..e39daea31 100644 --- a/datafusion/cudf.py +++ b/datafusion/cudf.py @@ -15,16 +15,36 @@ # specific language governing permissions and limitations # under the License. +import logging import cudf -import datafusion from datafusion.context import BaseSessionContext from datafusion.expr import Projection, TableScan, Column +from datafusion.common import SqlSchema + +logger = logging.getLogger(__name__) + class SessionContext(BaseSessionContext): - def __init__(self): - self.datafusion_ctx = datafusion.SessionContext() - self.parquet_tables = {} + def __init__(self, context, logging_level=logging.INFO): + """ + Create a new Session. + """ + # Cudf requires a provided context + self.context = context + + # Set the logging level for this SQL context + logging.basicConfig(level=logging_level) + + # Name of the root catalog + self.catalog_name = self.DEFAULT_CATALOG_NAME + # Name of the root schema + self.schema_name = self.DEFAULT_SCHEMA_NAME + # Add the schema to the context + sch = SqlSchema(self.schema_name) + self.schemas = {} + self.schemas[self.schema_name] = sch + self.context.register_schema(self.schema_name, sch) def to_cudf_expr(self, expr): # get Python wrapper for logical expression @@ -52,6 +72,20 @@ def to_cudf_df(self, plan): "unsupported logical operator: {}".format(type(node)) ) + def create_schema(self, schema_name: str, **kwargs): + logger.debug(f"Creating schema: {schema_name}") + self.schemas[schema_name] = SqlSchema(schema_name) + self.context.register_schema(schema_name, SqlSchema(schema_name)) + + def update_schema(self, schema_name: str, new_schema: SqlSchema, **kwargs): + self.schemas[schema_name] = new_schema + + def drop_schema(self, schema_name, **kwargs): + del self.schemas[schema_name] + + def show_schemas(self, **kwargs): + return self.schemas + def register_table(self, name, path, **kwargs): self.parquet_tables[name] = path self.datafusion_ctx.register_parquet(name, path) diff --git a/datafusion/pandas.py b/datafusion/pandas.py index 935d9619b..c2da83ff6 100644 --- a/datafusion/pandas.py +++ b/datafusion/pandas.py @@ -15,17 +15,33 @@ # specific language governing permissions and limitations # under the License. +import logging import pandas as pd import datafusion +from datafusion.common import SqlSchema from datafusion.context import BaseSessionContext from datafusion.expr import Projection, TableScan, Column +logger = logging.getLogger(__name__) + class SessionContext(BaseSessionContext): - def __init__(self): + def __init__(self, logging_level=logging.INFO): self.datafusion_ctx = datafusion.SessionContext() self.parquet_tables = {} + # Set the logging level for this SQL context + logging.basicConfig(level=logging_level) + + # Name of the root catalog + self.catalog_name = self.DEFAULT_CATALOG_NAME + # Name of the root schema + self.schema_name = self.DEFAULT_SCHEMA_NAME + # Add the schema to the context + sch = SqlSchema(self.schema_name) + self.schemas[self.schema_name] = sch + self.context.register_schema(self.schema_name, sch) + def to_pandas_expr(self, expr): # get Python wrapper for logical expression expr = expr.to_variant() @@ -52,6 +68,20 @@ def to_pandas_df(self, plan): "unsupported logical operator: {}".format(type(node)) ) + def create_schema(self, schema_name: str, **kwargs): + logger.debug(f"Creating schema: {schema_name}") + self.schemas[schema_name] = SqlSchema(schema_name) + self.context.register_schema(schema_name, SqlSchema(schema_name)) + + def update_schema(self, schema_name: str, new_schema: SqlSchema, **kwargs): + self.schemas[schema_name] = new_schema + + def drop_schema(self, schema_name, **kwargs): + del self.schemas[schema_name] + + def show_schemas(self, **kwargs): + return self.schemas + def register_table(self, name, path, **kwargs): self.parquet_tables[name] = path self.datafusion_ctx.register_parquet(name, path) diff --git a/datafusion/polars.py b/datafusion/polars.py index bbc1fd7c2..e4eb966fc 100644 --- a/datafusion/polars.py +++ b/datafusion/polars.py @@ -14,16 +14,20 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +import logging import polars import datafusion from datafusion.context import BaseSessionContext from datafusion.expr import Projection, TableScan, Aggregate from datafusion.expr import Column, AggregateFunction +from datafusion.common import SqlSchema + +logger = logging.getLogger(__name__) + class SessionContext(BaseSessionContext): - def __init__(self): + def __init__(self, logging_level=logging.INFO): self.datafusion_ctx = datafusion.SessionContext() self.parquet_tables = {} @@ -75,6 +79,20 @@ def to_polars_df(self, plan): "unsupported logical operator: {}".format(type(node)) ) + def create_schema(self, schema_name: str, **kwargs): + logger.debug(f"Creating schema: {schema_name}") + self.schemas[schema_name] = SqlSchema(schema_name) + self.context.register_schema(schema_name, SqlSchema(schema_name)) + + def update_schema(self, schema_name: str, new_schema: SqlSchema, **kwargs): + self.schemas[schema_name] = new_schema + + def drop_schema(self, schema_name, **kwargs): + del self.schemas[schema_name] + + def show_schemas(self, **kwargs): + return self.schemas + def register_table(self, name, path, **kwargs): self.parquet_tables[name] = path self.datafusion_ctx.register_parquet(name, path) diff --git a/src/common.rs b/src/common.rs index 8a8e2adf5..45523173c 100644 --- a/src/common.rs +++ b/src/common.rs @@ -20,6 +20,8 @@ use pyo3::prelude::*; pub mod data_type; pub mod df_field; pub mod df_schema; +pub mod function; +pub mod schema; /// Initializes the `common` module to match the pattern of `datafusion-common` https://docs.rs/datafusion-common/18.0.0/datafusion_common/index.html pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { @@ -29,5 +31,10 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/common/function.rs b/src/common/function.rs new file mode 100644 index 000000000..a8d752f16 --- /dev/null +++ b/src/common/function.rs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashMap; + +use datafusion::arrow::datatypes::DataType; +use pyo3::prelude::*; + +use super::data_type::PyDataType; + +#[pyclass(name = "SqlFunction", module = "datafusion.common", subclass)] +#[derive(Debug, Clone)] +pub struct SqlFunction { + pub name: String, + pub return_types: HashMap, DataType>, + pub aggregation: bool, +} + +impl SqlFunction { + pub fn new( + function_name: String, + input_types: Vec, + return_type: PyDataType, + aggregation_bool: bool, + ) -> Self { + let mut func = Self { + name: function_name, + return_types: HashMap::new(), + aggregation: aggregation_bool, + }; + func.add_type_mapping(input_types, return_type); + func + } + + pub fn add_type_mapping(&mut self, input_types: Vec, return_type: PyDataType) { + self.return_types.insert( + input_types.iter().map(|t| t.clone().into()).collect(), + return_type.into(), + ); + } +} diff --git a/src/common/schema.rs b/src/common/schema.rs new file mode 100644 index 000000000..304319369 --- /dev/null +++ b/src/common/schema.rs @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; + +use datafusion::arrow::datatypes::SchemaRef; +use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource}; +use pyo3::prelude::*; + +use datafusion_optimizer::utils::split_conjunction; + +use super::{data_type::DataTypeMap, function::SqlFunction}; + +#[pyclass(name = "SqlSchema", module = "datafusion.common", subclass)] +#[derive(Debug, Clone)] +pub struct SqlSchema { + #[pyo3(get, set)] + pub name: String, + #[pyo3(get, set)] + pub tables: Vec, + #[pyo3(get, set)] + pub views: Vec, + #[pyo3(get, set)] + pub functions: Vec, +} + +#[pyclass(name = "SqlTable", module = "datafusion.common", subclass)] +#[derive(Debug, Clone)] +pub struct SqlTable { + #[pyo3(get, set)] + pub name: String, + #[pyo3(get, set)] + pub columns: Vec<(String, DataTypeMap)>, + #[pyo3(get, set)] + pub primary_key: Option, + #[pyo3(get, set)] + pub foreign_keys: Vec, + #[pyo3(get, set)] + pub indexes: Vec, + #[pyo3(get, set)] + pub constraints: Vec, + #[pyo3(get, set)] + pub statistics: SqlStatistics, + #[pyo3(get, set)] + pub filepath: Option, +} + +#[pymethods] +impl SqlTable { + #[new] + pub fn new( + _schema_name: String, + table_name: String, + columns: Vec<(String, DataTypeMap)>, + row_count: f64, + filepath: Option, + ) -> Self { + Self { + name: table_name, + columns, + primary_key: None, + foreign_keys: Vec::new(), + indexes: Vec::new(), + constraints: Vec::new(), + statistics: SqlStatistics::new(row_count), + filepath, + } + } +} + +#[pyclass(name = "SqlView", module = "datafusion.common", subclass)] +#[derive(Debug, Clone)] +pub struct SqlView { + #[pyo3(get, set)] + pub name: String, + #[pyo3(get, set)] + pub definition: String, // SQL code that defines the view +} + +#[pymethods] +impl SqlSchema { + #[new] + pub fn new(schema_name: &str) -> Self { + Self { + name: schema_name.to_owned(), + tables: Vec::new(), + views: Vec::new(), + functions: Vec::new(), + } + } + + pub fn table_by_name(&self, table_name: &str) -> Option { + for tbl in &self.tables { + if tbl.name.eq(table_name) { + return Some(tbl.clone()); + } + } + None + } + + pub fn add_table(&mut self, table: SqlTable) { + self.tables.push(table); + } +} + +/// SqlTable wrapper that is compatible with DataFusion logical query plans +pub struct SqlTableSource { + schema: SchemaRef, + statistics: Option, + filepath: Option, +} + +impl SqlTableSource { + /// Initialize a new `EmptyTable` from a schema + pub fn new( + schema: SchemaRef, + statistics: Option, + filepath: Option, + ) -> Self { + Self { + schema, + statistics, + filepath, + } + } + + /// Access optional statistics associated with this table source + pub fn statistics(&self) -> Option<&SqlStatistics> { + self.statistics.as_ref() + } + + /// Access optional filepath associated with this table source + #[allow(dead_code)] + pub fn filepath(&self) -> Option<&String> { + self.filepath.as_ref() + } +} + +/// Implement TableSource, used in the logical query plan and in logical query optimizations +impl TableSource for SqlTableSource { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn supports_filter_pushdown( + &self, + filter: &Expr, + ) -> datafusion_common::Result { + let filters = split_conjunction(filter); + if filters.iter().all(|f| is_supported_push_down_expr(f)) { + // Push down filters to the tablescan operation if all are supported + Ok(TableProviderFilterPushDown::Exact) + } else if filters.iter().any(|f| is_supported_push_down_expr(f)) { + // Partially apply the filter in the TableScan but retain + // the Filter operator in the plan as well + Ok(TableProviderFilterPushDown::Inexact) + } else { + Ok(TableProviderFilterPushDown::Unsupported) + } + } + + fn table_type(&self) -> datafusion_expr::TableType { + datafusion_expr::TableType::Base + } + + #[allow(deprecated)] + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> datafusion_common::Result> { + filters + .iter() + .map(|f| self.supports_filter_pushdown(f)) + .collect() + } + + fn get_logical_plan(&self) -> Option<&datafusion_expr::LogicalPlan> { + None + } +} + +fn is_supported_push_down_expr(_expr: &Expr) -> bool { + // For now we support all kinds of expr's at this level + true +} + +#[pyclass(name = "SqlStatistics", module = "datafusion.common", subclass)] +#[derive(Debug, Clone)] +pub struct SqlStatistics { + row_count: f64, +} + +#[pymethods] +impl SqlStatistics { + #[new] + pub fn new(row_count: f64) -> Self { + Self { row_count } + } + + #[pyo3(name = "getRowCount")] + pub fn get_row_count(&self) -> f64 { + self.row_count + } +} From 51158bd495d215d2260c091af7c0109dbe3b9432 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 30 May 2023 18:21:22 -0400 Subject: [PATCH 051/413] CRUD Table support for `BaseSessionContext` (#394) * checkpoint commit * Introduce BaseSessionContext abstract class * Introduce abstract methods for CRUD schema operations * Clean up schema.rs file * Introduce CRUD methods for table instances * Add function to drop_table * Add schema_name to drop_table function * remove unused parameter in SqlTable new * Update function to allow for modifying existing tables --- datafusion/context.py | 48 +++++++++++++++++++++++++++++++++++++++++-- src/common/schema.rs | 5 ++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/datafusion/context.py b/datafusion/context.py index a36462123..6292177f7 100644 --- a/datafusion/context.py +++ b/datafusion/context.py @@ -16,9 +16,9 @@ # under the License. from abc import ABC, abstractmethod -from typing import Dict +from typing import Dict, List -from datafusion.common import SqlSchema +from datafusion.common import SqlSchema, SqlTable class BaseSessionContext(ABC): @@ -76,6 +76,50 @@ def show_schemas(self, **kwargs) -> Dict[str, SqlSchema]: """ pass + @abstractmethod + def create_table( + self, + table_name: str, + schema_name: str = None, + **kwargs, + ): + """ + Creates/Registers a table in the specied schema instance + """ + pass + + @abstractmethod + def update_table( + self, + schema_name: str, + table_name: str, + new_table: SqlTable, + **kwargs, + ): + """ + Updates an existing table in the SessionContext + """ + pass + + @abstractmethod + def drop_table( + self, + schema_name: str, + table_name: str, + **kwargs, + ): + """ + Drops the specified table, based on name, from the current context + """ + pass + + @abstractmethod + def show_tables(self, **kwargs) -> List[SqlTable]: + """ + Return all tables in the current SessionContext impl. + """ + pass + @abstractmethod def register_table( self, diff --git a/src/common/schema.rs b/src/common/schema.rs index 304319369..a003d0ca1 100644 --- a/src/common/schema.rs +++ b/src/common/schema.rs @@ -63,7 +63,6 @@ pub struct SqlTable { impl SqlTable { #[new] pub fn new( - _schema_name: String, table_name: String, columns: Vec<(String, DataTypeMap)>, row_count: f64, @@ -115,6 +114,10 @@ impl SqlSchema { pub fn add_table(&mut self, table: SqlTable) { self.tables.push(table); } + + pub fn drop_table(&mut self, table_name: String) { + self.tables.retain(|x| !x.name.eq(&table_name)); + } } /// SqlTable wrapper that is compatible with DataFusion logical query plans From 1174969f70ae938f12a1a0372770b994f85b0014 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 11 Jun 2023 10:31:34 -0600 Subject: [PATCH 052/413] Prepare for 26.0.0 release (#410) * Upgrade to DF 26 * cargo update * changelog * python lint --- CHANGELOG.md | 11 + Cargo.lock | 436 ++++++++++++----------------- Cargo.toml | 16 +- datafusion/tests/test_dataframe.py | 1 - datafusion/tests/test_substrait.py | 4 +- dev/release/generate-changelog.py | 6 +- src/common/df_field.rs | 2 +- src/expr.rs | 6 +- src/expr/scalar_function.rs | 2 +- src/substrait.rs | 6 +- 10 files changed, 209 insertions(+), 281 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6d58e4a8..d2f2de219 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,17 @@ # DataFusion Python Changelog +## [26.0.0](https://github.com/apache/arrow-datafusion-python/tree/26.0.0) (2023-06-11) + +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/25.0.0...26.0.0) + +**Merged pull requests:** + +- Add Expr::Case when_then_else support to rex_call_operands function [#388](https://github.com/apache/arrow-datafusion-python/pull/388) (jdye64) +- Introduce BaseSessionContext abstract class [#390](https://github.com/apache/arrow-datafusion-python/pull/390) (jdye64) +- CRUD Schema support for `BaseSessionContext` [#392](https://github.com/apache/arrow-datafusion-python/pull/392) (jdye64) +- CRUD Table support for `BaseSessionContext` [#394](https://github.com/apache/arrow-datafusion-python/pull/394) (jdye64) + ## [25.0.0](https://github.com/apache/arrow-datafusion-python/tree/25.0.0) (2023-05-23) [Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/24.0.0...25.0.0) diff --git a/Cargo.lock b/Cargo.lock index d8fa65e64..2392f5796 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,9 +29,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" dependencies = [ "memchr", ] @@ -51,6 +51,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -107,9 +113,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "218ca81dd088b102c0fd6687c72e73fad1ba93d2ef7b3cf9a1043b04b2c39dbf" +checksum = "6619cab21a0cdd8c9b9f1d9e09bfaa9b1974e5ef809a6566aef0b998caf38ace" dependencies = [ "ahash", "arrow-arith", @@ -130,9 +136,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d49309fa2299ec34a709cfc9f487c41ecaead96d1ab70e21857466346bbbd690" +checksum = "e0dc95485623a76e00929bda8caa40c1f838190952365c4f43a7b9ae86d03e94" dependencies = [ "arrow-array", "arrow-buffer", @@ -145,9 +151,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7a27466d897d99654357a6d95dc0a26931d9e4306e60c14fc31a894edb86579" +checksum = "3267847f53d3042473cfd2c769afd8d74a6d7d201fc3a34f5cb84c0282ef47a7" dependencies = [ "ahash", "arrow-buffer", @@ -162,9 +168,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9405b78106a9d767c7b97c78a70ee1b23ee51a74f5188a821a716d9a85d1af2b" +checksum = "c5f66553e66e120ac4b21570368ee9ebf35ff3f5399f872b0667699e145678f5" dependencies = [ "half", "num", @@ -172,9 +178,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be0ec5a79a87783dc828b7ff8f89f62880b3f553bc5f5b932a82f4a1035024b4" +checksum = "65e6f3579dbf0d97c683d451b2550062b0f0e62a3169bf74238b5f59f44ad6d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -189,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350d8e55c3b2d602a0a04389bcc1da40167657143a9922a7103190603e7b7692" +checksum = "373579c4c1a8f5307d3125b7a89c700fcf8caf85821c77eb4baab3855ae0aba5" dependencies = [ "arrow-array", "arrow-buffer", @@ -208,9 +214,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6f710d98964d2c069b8baf566130045e79e11baa105623f038a6c942f805681" +checksum = "61bc8df9912cca6642665fdf989d6fa0de2570f18a7f709bcf59d29de96d2097" dependencies = [ "arrow-buffer", "arrow-schema", @@ -220,9 +226,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c99787cb8fabc187285da9e7182d22f2b80ecfac61ca0a42c4299e9eecdf903" +checksum = "0105dcf5f91daa7182d87b713ee0b32b3bfc88e0c48e7dc3e9d6f1277a07d1ae" dependencies = [ "arrow-array", "arrow-buffer", @@ -234,9 +240,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91c95a58ce63f60d80d7a3a1222d65df0bc060b71d31353c34a8118c2a6eae7b" +checksum = "e73134fb5b5ec8770f8cbb214c2c487b2d350081e403ca4eeeb6f8f5e19846ac" dependencies = [ "arrow-array", "arrow-buffer", @@ -254,9 +260,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4141e6488610cc144e841da3de5f5371488f3cf5bc6bc7b3e752c64e7639c31b" +checksum = "89f25bc66e18d4c2aa1fe2f9bb03e2269da60e636213210385ae41a107f9965a" dependencies = [ "arrow-array", "arrow-buffer", @@ -269,9 +275,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "940191a3c636c111c41e816325b0941484bf904c46de72cd9553acd1afd24d33" +checksum = "1095ff85ea4f5ff02d17b30b089de31b51a50be01c6b674f0a0509ab771232f1" dependencies = [ "ahash", "arrow-array", @@ -284,18 +290,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18c41d058b2895a12f46dfafc306ee3529ad9660406be0ab8a7967d5e27c417e" +checksum = "25187bbef474151a2e4ddec67b9e34bda5cbfba292dc571392fa3a1f71ff5a82" dependencies = [ "bitflags 2.3.1", ] [[package]] name = "arrow-select" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fcbdda2772b7e712e77444f3a71f4ee517095aceb993b35de71de41c70d9b4f" +checksum = "fd0d4ee884aec3aa05e41478e3cd312bf609de9babb5d187a43fb45931da4da4" dependencies = [ "arrow-array", "arrow-buffer", @@ -306,9 +312,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7081c34f4b534ad320a03db79d58e38972041bb7c65686b98bbcc2f9a67a9cee" +checksum = "d6d71c3ffe4c07e66ce8fdc6aed5b00e0e60c5144911879b10546f5b72d8fa1c" dependencies = [ "arrow-array", "arrow-buffer", @@ -321,9 +327,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.3.15" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a" +checksum = "5b0122885821398cc923ece939e24d1056a2384ee719432397fa9db87230ff11" dependencies = [ "bzip2", "flate2", @@ -333,8 +339,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd 0.11.2+zstd.1.5.2", - "zstd-safe 5.0.2+zstd.1.5.2", + "zstd", + "zstd-safe", ] [[package]] @@ -345,7 +351,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] @@ -356,7 +362,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] @@ -376,9 +382,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.1" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f1e31e207a6b8fb791a38ea3105e6cb541f55e4d029902d3039a4ad07cc4105" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" [[package]] name = "bitflags" @@ -403,9 +409,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.3.3" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef" +checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888" dependencies = [ "arrayref", "arrayvec", @@ -501,12 +507,12 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.24" +version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" +checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" dependencies = [ + "android-tzdata", "iana-time-zone", - "num-integer", "num-traits", "serde", "winapi", @@ -536,9 +542,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "6.1.4" +version = "6.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d" +checksum = "7e959d788268e3bf9d35ace83e81b124190378e4c91c9067524675e33394b8ba" dependencies = [ "strum", "strum_macros", @@ -569,9 +575,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" +checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6" [[package]] name = "core-foundation-sys" @@ -615,9 +621,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad" +checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" dependencies = [ "csv-core", "itoa", @@ -649,9 +655,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a99e1dc003c0b310b203e4dc2aadc6531dccb37fa681630b588c5154d1b7637" +checksum = "9992c267436551d40b52d65289b144712e7b0ebdc62c8c859fd1574e5f73efbb" dependencies = [ "ahash", "apache-avro", @@ -696,14 +702,14 @@ dependencies = [ "url", "uuid", "xz2", - "zstd 0.12.3+zstd.1.5.2", + "zstd", ] [[package]] name = "datafusion-common" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "362811fd9cb830877f5a538a19e5f9a1e5519486dcd6dc4f39a3d6192d2f4ba6" +checksum = "c3be97f7a7c720cdbb71e9eeabf814fa6ad8102b9022390f6cac74d3b4af6392" dependencies = [ "apache-avro", "arrow", @@ -718,9 +724,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b052014fe38299019dd2e7dec6c2dd44c7ad9a5375ab4a36e5ed800700d7eb7a" +checksum = "c77c4b14b809b0e4c5bb101b6834504f06cdbb0d3c643400c61d0d844b33264e" dependencies = [ "dashmap", "datafusion-common", @@ -736,21 +742,24 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55b590b184c6f59a65ee3ada666475ff8173bfd409c618c3c29b5abf7ef4a92c" +checksum = "e6ec7409bd45cf4fae6395d7d1024c8a97e543cadc88363e405d2aad5330e5e7" dependencies = [ "ahash", "arrow", "datafusion-common", + "lazy_static", "sqlparser", + "strum", + "strum_macros", ] [[package]] name = "datafusion-optimizer" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "543311151a4c068b2b663f07905dd4d4a605279dacb24ab9434f8f29a1917cd3" +checksum = "64b537c93f87989c212db92a448a0f5eb4f0995e27199bb7687ae94f8b64a7a8" dependencies = [ "arrow", "async-trait", @@ -766,9 +775,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49cfd92e225d948a2199650e8e292ef1f0a3d62ab9b540795c3d3ba9edc2b2e" +checksum = "f60ee3f53340fdef36ee54d9e12d446ae2718b1d0196ac581f791d34808ec876" dependencies = [ "ahash", "arrow", @@ -799,7 +808,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "25.0.0" +version = "26.0.0" dependencies = [ "async-trait", "datafusion", @@ -818,7 +827,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.16", + "syn 2.0.18", "tokio", "url", "uuid", @@ -826,9 +835,9 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e98cdd6f0d005bce0035bebe03a35ccb7eca3cf40a9bce27e24b1e2a56941ffc" +checksum = "d58fc64058aa3bcb00077a0d19474a0d584d31dec8c7ac3406868f485f659af9" dependencies = [ "arrow", "datafusion-common", @@ -838,9 +847,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9585219bbbad5287c762ad9ede044ab6c0b778fde980c9858b15d6dd5bd80f35" +checksum = "1531f0314151a34bf6c0a83c7261525688b7c729876f53e7896b8f4ca8f57d07" dependencies = [ "arrow", "arrow-schema", @@ -852,9 +861,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "25.0.0" +version = "26.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0f1a60b63cca4fb2efbf572c3f2875bccbb2dfe448dd562f103ca5d9830216" +checksum = "079d5be5ec59580777bfa16d79187fea99b6498e3e8e07eb36d504a5fe708f13" dependencies = [ "async-recursion", "chrono", @@ -862,6 +871,7 @@ dependencies = [ "itertools", "object_store", "prost", + "prost-types", "substrait", "tokio", ] @@ -912,7 +922,7 @@ checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -942,9 +952,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "23.1.21" +version = "23.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -968,9 +978,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -1031,7 +1041,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] @@ -1076,9 +1086,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -1087,9 +1097,9 @@ dependencies = [ [[package]] name = "git2" -version = "0.17.1" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7905cdfe33d31a88bb2e8419ddd054451f5432d1da9eaf2ac7804ee1ea12d5" +checksum = "7b989d6a7ca95a362cf2cfc5ad688b3a467be1f87e480b8dad07fee8c79b0044" dependencies = [ "bitflags 1.3.2", "libc", @@ -1242,9 +1252,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.56" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1265,9 +1275,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -1306,13 +1316,13 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "io-lifetimes" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -1426,9 +1436,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.144" +version = "0.2.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" [[package]] name = "libflate" @@ -1452,9 +1462,9 @@ dependencies = [ [[package]] name = "libgit2-sys" -version = "0.15.1+1.6.4" +version = "0.15.2+1.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb4577bde8cdfc7d6a2a4bcb7b049598597de33ffd337276e9c7db6cd4a2cee7" +checksum = "a80df2e11fb4a61f4ba2ab42dbe7f74468da143f1a75c74e11dee7c813f694fa" dependencies = [ "cc", "libc", @@ -1498,9 +1508,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "lock_api" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" dependencies = [ "autocfg", "scopeguard", @@ -1508,12 +1518,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" [[package]] name = "lz4" @@ -1596,14 +1603,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" dependencies = [ "libc", - "log", "wasi", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -1729,9 +1735,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "ordered-float" @@ -1754,22 +1760,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.7" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.2.16", + "redox_syscall", "smallvec", - "windows-sys 0.45.0", + "windows-targets", ] [[package]] name = "parquet" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0a1e6fa27f09ebddba280f5966ef435f3ac4d74cfc3ffe370fd3fd59c2e004d" +checksum = "d6a656fcc17e641657c955742c689732684e096f790ff30865d9f8dcc39f7c4a" dependencies = [ "ahash", "arrow-array", @@ -1796,7 +1802,7 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd 0.12.3+zstd.1.5.2", + "zstd", ] [[package]] @@ -1816,9 +1822,9 @@ checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "petgraph" @@ -1894,12 +1900,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "617feabb81566b593beb4886fb8c1f38064169dae4dccad0e3220160c3b37203" +checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1" dependencies = [ "proc-macro2", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] @@ -1910,9 +1916,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.58" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8" +checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" dependencies = [ "unicode-ident", ] @@ -2056,9 +2062,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -2093,15 +2099,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.3.5" @@ -2113,9 +2110,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.2" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1a59b5d8e97dee33696bf13c5ba8ab85341c002922fba050069326b9c498974" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" dependencies = [ "aho-corasick", "memchr", @@ -2211,16 +2208,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.19" +version = "0.37.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" +checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0" dependencies = [ "bitflags 1.3.2", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -2329,22 +2326,22 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.163" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.163" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" +checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] @@ -2378,7 +2375,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] @@ -2484,9 +2481,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.33.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a" +checksum = "37d3706eefb17039056234df6b566b0014f303f867f2656108334a55b8096f59" dependencies = [ "log", "sqlparser_derive", @@ -2514,6 +2511,9 @@ name = "strum" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +dependencies = [ + "strum_macros", +] [[package]] name = "strum_macros" @@ -2546,7 +2546,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.16", + "syn 2.0.18", "typify", "walkdir", ] @@ -2570,9 +2570,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.16" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" dependencies = [ "proc-macro2", "quote", @@ -2587,15 +2587,16 @@ checksum = "fd1ba337640d60c3e96bc6f0638a939b9c9a7f2c316a1598c279828b3d1dc8c5" [[package]] name = "tempfile" -version = "3.5.0" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" dependencies = [ + "autocfg", "cfg-if", "fastrand", - "redox_syscall 0.3.5", + "redox_syscall", "rustix", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -2615,7 +2616,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] @@ -2655,9 +2656,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.28.1" +version = "1.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105" +checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" dependencies = [ "autocfg", "bytes", @@ -2668,7 +2669,7 @@ dependencies = [ "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -2679,14 +2680,14 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] name = "tokio-rustls" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ "rustls", "tokio", @@ -2743,7 +2744,7 @@ checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", ] [[package]] @@ -2811,7 +2812,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.16", + "syn 2.0.18", "thiserror", "unicode-ident", ] @@ -2828,7 +2829,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.16", + "syn 2.0.18", "typify-impl", ] @@ -2840,9 +2841,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "unicode-normalization" @@ -2885,9 +2886,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "url" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" dependencies = [ "form_urlencoded", "idna", @@ -2963,7 +2964,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", "wasm-bindgen-shared", ] @@ -2997,7 +2998,7 @@ checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.18", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3098,16 +3099,7 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" dependencies = [ - "windows-targets 0.48.0", -] - -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", + "windows-targets", ] [[package]] @@ -3116,22 +3108,7 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets 0.48.0", -] - -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows-targets", ] [[package]] @@ -3140,93 +3117,51 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" dependencies = [ - "windows_aarch64_gnullvm 0.48.0", - "windows_aarch64_msvc 0.48.0", - "windows_i686_gnu 0.48.0", - "windows_i686_msvc 0.48.0", - "windows_x86_64_gnu 0.48.0", - "windows_x86_64_gnullvm 0.48.0", - "windows_x86_64_msvc 0.48.0", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.0" @@ -3272,32 +3207,13 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "zstd" -version = "0.11.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" -dependencies = [ - "zstd-safe 5.0.2+zstd.1.5.2", -] - [[package]] name = "zstd" version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe 6.0.5+zstd.1.5.4", -] - -[[package]] -name = "zstd-safe" -version = "5.0.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 8745d9263..b7f0db42e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "25.0.0" +version = "26.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,19 +36,19 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { version = "25.0.0" , features = ["pyarrow", "avro"] } -datafusion-common = { version = "25.0.0", features = ["pyarrow"] } -datafusion-expr = "25.0.0" -datafusion-optimizer = "25.0.0" -datafusion-sql = "25.0.0" -datafusion-substrait = "25.0.0" +datafusion = { version = "26.0.0" , features = ["pyarrow", "avro"] } +datafusion-common = { version = "26.0.0", features = ["pyarrow"] } +datafusion-expr = "26.0.0" +datafusion-optimizer = "26.0.0" +datafusion-sql = "26.0.0" +datafusion-substrait = "26.0.0" prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" futures = "0.3" -object_store = { version = "0.5.3", features = ["aws", "gcp", "azure"] } +object_store = { version = "0.5.4", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.7.1" syn = "2.0.11" diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 221b0cc09..4df2061e1 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -614,7 +614,6 @@ def test_to_pydict(df): def test_describe(df): - # Calculate statistics df = df.describe() diff --git a/datafusion/tests/test_substrait.py b/datafusion/tests/test_substrait.py index 01df2d746..7c7a2c1f2 100644 --- a/datafusion/tests/test_substrait.py +++ b/datafusion/tests/test_substrait.py @@ -54,4 +54,6 @@ def test_substrait_serialization(ctx): # demonstrate how to create a DataFrame from a deserialized logical plan df = ctx.create_dataframe_from_logical_plan(logical_plan) - substrait_plan = ss.substrait.producer.to_substrait_plan(df.logical_plan()) + substrait_plan = ss.substrait.producer.to_substrait_plan( + df.logical_plan(), ctx + ) diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 58e6781a3..e97f00304 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -26,7 +26,7 @@ def print_pulls(repo_name, title, pulls): if len(pulls) > 0: print("**{}:**".format(title)) print() - for (pull, commit) in pulls: + for pull, commit in pulls: url = "https://github.com/{}/pull/{}".format( repo_name, pull.number ) @@ -39,7 +39,6 @@ def print_pulls(repo_name, title, pulls): def generate_changelog(repo, repo_name, tag1, tag2): - # get a list of commits between two tags print( f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr @@ -68,8 +67,7 @@ def generate_changelog(repo, repo_name, tag1, tag2): # categorize the pull requests based on GitHub labels print("Categorizing pull requests", file=sys.stderr) - for (pull, commit) in all_pulls: - + for pull, commit in all_pulls: # see if PR title uses Conventional Commits cc_type = "" # cc_scope = '' diff --git a/src/common/df_field.rs b/src/common/df_field.rs index d7745bfb5..703af0aa2 100644 --- a/src/common/df_field.rs +++ b/src/common/df_field.rs @@ -49,7 +49,7 @@ impl PyDFField { fn new(qualifier: Option, name: &str, data_type: PyDataType, nullable: bool) -> Self { PyDFField { field: DFField::new( - qualifier.map(|q| OwnedTableReference::from(q)), + qualifier.map(OwnedTableReference::from), name, data_type.into(), nullable, diff --git a/src/expr.rs b/src/expr.rs index 819edff2a..d519d0c1c 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -186,7 +186,8 @@ impl PyExpr { } fn __mod__(&self, rhs: PyExpr) -> PyResult { - Ok(self.expr.clone().modulus(rhs.expr).into()) + let expr = self.expr.clone() % rhs.expr; + Ok(expr.into()) } fn __and__(&self, rhs: PyExpr) -> PyResult { @@ -198,7 +199,8 @@ impl PyExpr { } fn __invert__(&self) -> PyResult { - Ok(self.expr.clone().not().into()) + let expr = !self.expr.clone(); + Ok(expr.into()) } fn __getitem__(&self, key: &str) -> PyResult { diff --git a/src/expr/scalar_function.rs b/src/expr/scalar_function.rs index 1a71d5692..776ca3297 100644 --- a/src/expr/scalar_function.rs +++ b/src/expr/scalar_function.rs @@ -56,7 +56,7 @@ impl From for BuiltinScalarFunction { #[pymethods] impl PyScalarFunction { fn fun(&self) -> PyResult { - Ok(self.scalar_function.clone().into()) + Ok(self.scalar_function.into()) } fn args(&self) -> PyResult> { diff --git a/src/substrait.rs b/src/substrait.rs index 5d2e7a485..d027ad21f 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -39,7 +39,7 @@ impl PyPlan { let mut proto_bytes = Vec::::new(); self.plan .encode(&mut proto_bytes) - .map_err(|e| DataFusionError::EncodeError(e))?; + .map_err(DataFusionError::EncodeError)?; Ok(PyBytes::new(py, &proto_bytes).into()) } } @@ -113,8 +113,8 @@ pub(crate) struct PySubstraitProducer; impl PySubstraitProducer { /// Convert DataFusion LogicalPlan to Substrait Plan #[staticmethod] - pub fn to_substrait_plan(plan: PyLogicalPlan) -> PyResult { - match producer::to_substrait_plan(&plan.plan) { + pub fn to_substrait_plan(plan: PyLogicalPlan, ctx: &PySessionContext) -> PyResult { + match producer::to_substrait_plan(&plan.plan, &ctx.ctx) { Ok(plan) => Ok(PyPlan { plan: *plan }), Err(e) => Err(py_datafusion_err(e)), } From c0be61bd575ae717ec8c949693abd6cdec39a50c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 3 Jul 2023 11:59:59 -0400 Subject: [PATCH 053/413] LogicalPlan.to_variant() make public (#412) * Make to_variant public * Add to_variant() coverage for Subquery and SubqueryAlias * Add variant_name() function to Expr * Add function for friendly display of arrow DataType to Python layer since DataType enum values cannot be directly accessed * Change signature * Updated to include Decimal128 variant coverage * Add try_from to PyCreateView * Cargo lint fixes --- src/common/data_type.rs | 44 +++++++++++++++++++++++++++++++++++++++++ src/expr.rs | 16 ++++++++++----- src/expr/create_view.rs | 15 ++++++++++++-- src/expr/subquery.rs | 44 +++++++++++++++++++++++++++++++++++++++++ src/sql/logical.rs | 6 +++++- 5 files changed, 117 insertions(+), 8 deletions(-) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 622e1aa46..85d2febb5 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -503,6 +503,50 @@ impl DataTypeMap { )), } } + + /// Unfortunately PyO3 does not allow for us to expose the DataType as an enum since + /// we cannot directly annotae the Enum instance of dependency code. Therefore, here + /// we provide an enum to mimic it. + #[pyo3(name = "friendly_arrow_type_name")] + pub fn friendly_arrow_type_name(&self) -> PyResult<&str> { + Ok(match &self.arrow_type.data_type { + DataType::Null => "Null", + DataType::Boolean => "Boolean", + DataType::Int8 => "Int8", + DataType::Int16 => "Int16", + DataType::Int32 => "Int32", + DataType::Int64 => "Int64", + DataType::UInt8 => "UInt8", + DataType::UInt16 => "UInt16", + DataType::UInt32 => "UInt32", + DataType::UInt64 => "UInt64", + DataType::Float16 => "Float16", + DataType::Float32 => "Float32", + DataType::Float64 => "Float64", + DataType::Timestamp(_, _) => "Timestamp", + DataType::Date32 => "Date32", + DataType::Date64 => "Date64", + DataType::Time32(_) => "Time32", + DataType::Time64(_) => "Time64", + DataType::Duration(_) => "Duration", + DataType::Interval(_) => "Interval", + DataType::Binary => "Binary", + DataType::FixedSizeBinary(_) => "FixedSizeBinary", + DataType::LargeBinary => "LargeBinary", + DataType::Utf8 => "Utf8", + DataType::LargeUtf8 => "LargeUtf8", + DataType::List(_) => "List", + DataType::FixedSizeList(_, _) => "FixedSizeList", + DataType::LargeList(_) => "LargeList", + DataType::Struct(_) => "Struct", + DataType::Union(_, _) => "Union", + DataType::Dictionary(_, _) => "Dictionary", + DataType::Decimal128(_, _) => "Decimal128", + DataType::Decimal256(_, _) => "Decimal256", + DataType::Map(_, _) => "Map", + DataType::RunEndEncoded(_, _) => "RunEndEncoded", + }) + } } /// PyO3 requires that objects passed between Rust and Python implement the trait `PyClass` diff --git a/src/expr.rs b/src/expr.rs index d519d0c1c..17b6c34f5 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -153,6 +153,12 @@ impl PyExpr { Ok(self.expr.canonical_name()) } + /// Returns the name of the Expr variant. + /// Ex: 'IsNotNull', 'Literal', 'BinaryExpr', etc + fn variant_name(&self) -> PyResult<&str> { + Ok(self.expr.variant_name()) + } + fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { let expr = match op { CompareOp::Lt => self.expr.clone().lt(other.expr), @@ -302,7 +308,7 @@ impl PyExpr { ScalarValue::Boolean(v) => v.into_py(py), ScalarValue::Float32(v) => v.into_py(py), ScalarValue::Float64(v) => v.into_py(py), - ScalarValue::Decimal128(_, _, _) => todo!(), + ScalarValue::Decimal128(v, _, _) => v.into_py(py), ScalarValue::Int8(v) => v.into_py(py), ScalarValue::Int16(v) => v.into_py(py), ScalarValue::Int32(v) => v.into_py(py), @@ -323,10 +329,10 @@ impl PyExpr { ScalarValue::Time32Millisecond(v) => v.into_py(py), ScalarValue::Time64Microsecond(v) => v.into_py(py), ScalarValue::Time64Nanosecond(v) => v.into_py(py), - ScalarValue::TimestampSecond(_, _) => todo!(), - ScalarValue::TimestampMillisecond(_, _) => todo!(), - ScalarValue::TimestampMicrosecond(_, _) => todo!(), - ScalarValue::TimestampNanosecond(_, _) => todo!(), + ScalarValue::TimestampSecond(v, _) => v.into_py(py), + ScalarValue::TimestampMillisecond(v, _) => v.into_py(py), + ScalarValue::TimestampMicrosecond(v, _) => v.into_py(py), + ScalarValue::TimestampNanosecond(v, _) => v.into_py(py), ScalarValue::IntervalYearMonth(v) => v.into_py(py), ScalarValue::IntervalDayTime(v) => v.into_py(py), ScalarValue::IntervalMonthDayNano(v) => v.into_py(py), diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs index 9d06239ea..febd723c5 100644 --- a/src/expr/create_view.rs +++ b/src/expr/create_view.rs @@ -17,10 +17,10 @@ use std::fmt::{self, Display, Formatter}; -use datafusion_expr::CreateView; +use datafusion_expr::{CreateView, DdlStatement, LogicalPlan}; use pyo3::prelude::*; -use crate::sql::logical::PyLogicalPlan; +use crate::{errors::py_type_err, sql::logical::PyLogicalPlan}; use super::logical_node::LogicalNode; @@ -92,3 +92,14 @@ impl LogicalNode for PyCreateView { Ok(self.clone().into_py(py)) } } + +impl TryFrom for PyCreateView { + type Error = PyErr; + + fn try_from(logical_plan: LogicalPlan) -> Result { + match logical_plan { + LogicalPlan::Ddl(DdlStatement::CreateView(create)) => Ok(PyCreateView { create }), + _ => Err(py_type_err("unexpected plan")), + } + } +} diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs index 93ff244f6..f6f7b7fe5 100644 --- a/src/expr/subquery.rs +++ b/src/expr/subquery.rs @@ -15,9 +15,15 @@ // specific language governing permissions and limitations // under the License. +use std::fmt::{self, Display, Formatter}; + use datafusion_expr::Subquery; use pyo3::prelude::*; +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + #[pyclass(name = "Subquery", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PySubquery { @@ -35,3 +41,41 @@ impl From for PySubquery { PySubquery { subquery } } } + +impl Display for PySubquery { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "Subquery + Subquery: {:?} + outer_ref_columns: {:?}", + self.subquery.subquery, self.subquery.outer_ref_columns, + ) + } +} + +#[pymethods] +impl PySubquery { + /// Retrieves the input `LogicalPlan` to this `Projection` node + fn input(&self) -> PyResult> { + Ok(Self::inputs(self)) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("Subquery({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("Subquery".to_string()) + } +} + +impl LogicalNode for PySubquery { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 07a3f65b1..2183155bf 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -28,6 +28,8 @@ use crate::expr::filter::PyFilter; use crate::expr::limit::PyLimit; use crate::expr::projection::PyProjection; use crate::expr::sort::PySort; +use crate::expr::subquery::PySubquery; +use crate::expr::subquery_alias::PySubqueryAlias; use crate::expr::table_scan::PyTableScan; use datafusion_expr::LogicalPlan; use pyo3::prelude::*; @@ -56,7 +58,7 @@ impl PyLogicalPlan { #[pymethods] impl PyLogicalPlan { /// Return the specific logical operator - fn to_variant(&self, py: Python) -> PyResult { + pub fn to_variant(&self, py: Python) -> PyResult { Python::with_gil(|_| match self.plan.as_ref() { LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), @@ -69,6 +71,8 @@ impl PyLogicalPlan { LogicalPlan::Projection(plan) => PyProjection::from(plan.clone()).to_variant(py), LogicalPlan::Sort(plan) => PySort::from(plan.clone()).to_variant(py), LogicalPlan::TableScan(plan) => PyTableScan::from(plan.clone()).to_variant(py), + LogicalPlan::Subquery(plan) => PySubquery::from(plan.clone()).to_variant(py), + LogicalPlan::SubqueryAlias(plan) => PySubqueryAlias::from(plan.clone()).to_variant(py), other => Err(py_unsupported_variant_err(format!( "Cannot convert this plan to a LogicalNode: {:?}", other From 3f81513d6c5fd109bdf8c509f81c0a587924d354 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 3 Jul 2023 11:26:58 -0600 Subject: [PATCH 054/413] Prepare 27.0.0 release (#423) --- Cargo.lock | 534 +++++++++++++++++++-------------- Cargo.toml | 20 +- datafusion/tests/test_store.py | 2 +- src/catalog.rs | 4 +- src/context.rs | 2 +- src/dataframe.rs | 2 +- src/dataset.rs | 2 +- src/dataset_exec.rs | 2 +- src/physical_plan.rs | 2 +- src/record_batch.rs | 2 +- src/udaf.rs | 6 +- src/udf.rs | 2 +- 12 files changed, 337 insertions(+), 243 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2392f5796..21124d4fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +dependencies = [ + "gimli", +] + [[package]] name = "adler" version = "1.0.2" @@ -51,6 +60,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocator-api2" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -91,8 +106,8 @@ dependencies = [ "serde", "serde_json", "snap", - "strum", - "strum_macros", + "strum 0.24.1", + "strum_macros 0.24.3", "thiserror", "typed-builder", "uuid", @@ -107,15 +122,15 @@ checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" [[package]] name = "arrayvec" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6619cab21a0cdd8c9b9f1d9e09bfaa9b1974e5ef809a6566aef0b998caf38ace" +checksum = "773d18d72cd290f3f9e2149a714c8ac404b6c3fd614c684f0015449940fca899" dependencies = [ "ahash", "arrow-arith", @@ -136,9 +151,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0dc95485623a76e00929bda8caa40c1f838190952365c4f43a7b9ae86d03e94" +checksum = "93bc0da4b22ba63807fa2a74998e21209179c93c67856ae65d9218b81f3ef918" dependencies = [ "arrow-array", "arrow-buffer", @@ -151,9 +166,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3267847f53d3042473cfd2c769afd8d74a6d7d201fc3a34f5cb84c0282ef47a7" +checksum = "ea9a0fd21121304cad96f307c938d861cb1e7f0c151b93047462cd9817d760fb" dependencies = [ "ahash", "arrow-buffer", @@ -162,15 +177,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "num", ] [[package]] name = "arrow-buffer" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f66553e66e120ac4b21570368ee9ebf35ff3f5399f872b0667699e145678f5" +checksum = "30ce342ecf5971004e23cef8b5fb3bacd2bbc48a381464144925074e1472e9eb" dependencies = [ "half", "num", @@ -178,9 +193,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e6f3579dbf0d97c683d451b2550062b0f0e62a3169bf74238b5f59f44ad6d8" +checksum = "4b94a0ce7d27abbb02e2ee4db770f593127610f57b32625b0bc6a1a90d65f085" dependencies = [ "arrow-array", "arrow-buffer", @@ -189,15 +204,16 @@ dependencies = [ "arrow-select", "chrono", "comfy-table", + "half", "lexical-core", "num", ] [[package]] name = "arrow-csv" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373579c4c1a8f5307d3125b7a89c700fcf8caf85821c77eb4baab3855ae0aba5" +checksum = "0f3be10a00a43c4bf0d243c070754ebdde17c5d576b4928d9c3efbe3005a3853" dependencies = [ "arrow-array", "arrow-buffer", @@ -214,9 +230,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61bc8df9912cca6642665fdf989d6fa0de2570f18a7f709bcf59d29de96d2097" +checksum = "1d9a83dad6a53d6907765106d3bc61d6d9d313cfe1751701b3ef0948e7283dc2" dependencies = [ "arrow-buffer", "arrow-schema", @@ -226,9 +242,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0105dcf5f91daa7182d87b713ee0b32b3bfc88e0c48e7dc3e9d6f1277a07d1ae" +checksum = "a46da5e438a854e0386b38774da88a98782c0973c6dbc5c949ca4e02faf9b016" dependencies = [ "arrow-array", "arrow-buffer", @@ -240,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e73134fb5b5ec8770f8cbb214c2c487b2d350081e403ca4eeeb6f8f5e19846ac" +checksum = "d5f27a1fbc76553ad92dc1a9583e56b7058d8c418c4089b0b689f5b87e2da5e1" dependencies = [ "arrow-array", "arrow-buffer", @@ -251,7 +267,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap", + "indexmap 1.9.3", "lexical-core", "num", "serde", @@ -260,9 +276,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89f25bc66e18d4c2aa1fe2f9bb03e2269da60e636213210385ae41a107f9965a" +checksum = "f2373661f6c2233e18f6fa69c40999a9440231d1e8899be8bbbe73c7e24aa3b4" dependencies = [ "arrow-array", "arrow-buffer", @@ -275,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1095ff85ea4f5ff02d17b30b089de31b51a50be01c6b674f0a0509ab771232f1" +checksum = "377cd5158b7de4034a175e296726c40c3236e65d71d90a5dab2fb4fab526a8f4" dependencies = [ "ahash", "arrow-array", @@ -285,23 +301,23 @@ dependencies = [ "arrow-data", "arrow-schema", "half", - "hashbrown 0.13.2", + "hashbrown 0.14.0", ] [[package]] name = "arrow-schema" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25187bbef474151a2e4ddec67b9e34bda5cbfba292dc571392fa3a1f71ff5a82" +checksum = "ba9ed245bd2d7d97ad1457cb281d4296e8b593588758b8fec6d67b2b2b0f2265" dependencies = [ - "bitflags 2.3.1", + "bitflags 2.3.3", ] [[package]] name = "arrow-select" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd0d4ee884aec3aa05e41478e3cd312bf609de9babb5d187a43fb45931da4da4" +checksum = "0dc9bd6aebc565b1d04bae64a0f4dda3abc677190eb7d960471b1b20e1cebed0" dependencies = [ "arrow-array", "arrow-buffer", @@ -312,9 +328,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6d71c3ffe4c07e66ce8fdc6aed5b00e0e60c5144911879b10546f5b72d8fa1c" +checksum = "23cf2baea2ef53787332050decf7d71aca836a352e188c8ad062892405955d2b" dependencies = [ "arrow-array", "arrow-buffer", @@ -351,7 +367,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -362,7 +378,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -380,6 +396,21 @@ dependencies = [ "cc", ] +[[package]] +name = "backtrace" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "base64" version = "0.21.2" @@ -394,9 +425,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.3.1" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6776fc96284a0bb647b615056fc496d1fe1644a7ab01829818a6d91cae888b84" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" [[package]] name = "blake2" @@ -520,9 +551,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9cc2b23599e6d7479755f3594285efb3f74a1bdca7a7374948bc831e23a552" +checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7" dependencies = [ "chrono", "chrono-tz-build", @@ -531,9 +562,9 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9998fb9f7e9b2111641485bf8beb32f92945f97f92a3d061f744cfef335f751" +checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf" dependencies = [ "parse-zoneinfo", "phf", @@ -542,12 +573,12 @@ dependencies = [ [[package]] name = "comfy-table" -version = "6.2.0" +version = "7.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e959d788268e3bf9d35ace83e81b124190378e4c91c9067524675e33394b8ba" +checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" dependencies = [ - "strum", - "strum_macros", + "strum 0.24.1", + "strum_macros 0.24.3", "unicode-width", ] @@ -587,9 +618,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" +checksum = "03e69e28e9f7f77debdedbaafa2866e1de9ba56df55a8bd7cfc724c25a09987c" dependencies = [ "libc", ] @@ -655,9 +686,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9992c267436551d40b52d65289b144712e7b0ebdc62c8c859fd1574e5f73efbb" +checksum = "e96f6e4eb10bd3e6b709686858246466983e8c5354a928ff77ee34919aa60d00" dependencies = [ "ahash", "apache-avro", @@ -680,9 +711,9 @@ dependencies = [ "flate2", "futures", "glob", - "hashbrown 0.13.2", - "indexmap", - "itertools", + "hashbrown 0.14.0", + "indexmap 1.9.3", + "itertools 0.11.0", "lazy_static", "log", "num-traits", @@ -697,7 +728,6 @@ dependencies = [ "sqlparser", "tempfile", "tokio", - "tokio-stream", "tokio-util", "url", "uuid", @@ -707,9 +737,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3be97f7a7c720cdbb71e9eeabf814fa6ad8102b9022390f6cac74d3b4af6392" +checksum = "00e5fddcc0dd49bbe199e43aa406f39c46c790bb2a43c7b36a478e5f3f971235" dependencies = [ "apache-avro", "arrow", @@ -724,14 +754,14 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77c4b14b809b0e4c5bb101b6834504f06cdbb0d3c643400c61d0d844b33264e" +checksum = "cfd50b6cb17acc78d2473c0d28014b8fd4e2e0a2c067c07645d6547b33b0aeeb" dependencies = [ "dashmap", "datafusion-common", "datafusion-expr", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "log", "object_store", "parking_lot", @@ -742,24 +772,24 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ec7409bd45cf4fae6395d7d1024c8a97e543cadc88363e405d2aad5330e5e7" +checksum = "e1a35dc2cd9eac18063d636f7ddf4f090fe1f34284d80192ac7ade38cc3c6991" dependencies = [ "ahash", "arrow", "datafusion-common", "lazy_static", "sqlparser", - "strum", - "strum_macros", + "strum 0.25.0", + "strum_macros 0.25.0", ] [[package]] name = "datafusion-optimizer" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b537c93f87989c212db92a448a0f5eb4f0995e27199bb7687ae94f8b64a7a8" +checksum = "5f5043afeb45ec1c0f45519e1eed6a477f2d30732e8f975d9cf9a75fba0ca716" dependencies = [ "arrow", "async-trait", @@ -767,17 +797,17 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.13.2", - "itertools", + "hashbrown 0.14.0", + "itertools 0.11.0", "log", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60ee3f53340fdef36ee54d9e12d446ae2718b1d0196ac581f791d34808ec876" +checksum = "6cc892a24f4b829ee7718ad3950884c0346dbdf1517f3df153af4bcf54d8ca4d" dependencies = [ "ahash", "arrow", @@ -791,9 +821,9 @@ dependencies = [ "datafusion-expr", "datafusion-row", "half", - "hashbrown 0.13.2", - "indexmap", - "itertools", + "hashbrown 0.14.0", + "indexmap 1.9.3", + "itertools 0.11.0", "lazy_static", "libc", "md-5", @@ -808,7 +838,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "26.0.0" +version = "27.0.0" dependencies = [ "async-trait", "datafusion", @@ -827,7 +857,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.18", + "syn 2.0.22", "tokio", "url", "uuid", @@ -835,9 +865,9 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d58fc64058aa3bcb00077a0d19474a0d584d31dec8c7ac3406868f485f659af9" +checksum = "ce75c660bbddfdd254109e668e5b5bd69df31ea26e3768e15cef0c68015e650e" dependencies = [ "arrow", "datafusion-common", @@ -847,9 +877,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1531f0314151a34bf6c0a83c7261525688b7c729876f53e7896b8f4ca8f57d07" +checksum = "49cab87e4933a452e0b7b3f0cbd0e760daf7d33fb54d09d70d3ffba229eaa652" dependencies = [ "arrow", "arrow-schema", @@ -861,14 +891,14 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "26.0.0" +version = "27.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "079d5be5ec59580777bfa16d79187fea99b6498e3e8e07eb36d504a5fe708f13" +checksum = "ba77d22232053f6cdd98bd6f5328940850844450253f25b8c50bfc5199c505d4" dependencies = [ "async-recursion", "chrono", "datafusion", - "itertools", + "itertools 0.11.0", "object_store", "prost", "prost-types", @@ -914,6 +944,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "equivalent" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1" + [[package]] name = "errno" version = "0.3.1" @@ -1041,7 +1077,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -1095,6 +1131,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "gimli" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" + [[package]] name = "git2" version = "0.17.2" @@ -1116,9 +1158,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.19" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782" +checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049" dependencies = [ "bytes", "fnv", @@ -1126,7 +1168,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 1.9.3", "slab", "tokio", "tokio-util", @@ -1135,10 +1177,11 @@ dependencies = [ [[package]] name = "half" -version = "2.2.1" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" dependencies = [ + "cfg-if", "crunchy", "num-traits", ] @@ -1159,19 +1202,20 @@ dependencies = [ ] [[package]] -name = "heck" -version = "0.4.1" +name = "hashbrown" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] -name = "hermit-abi" -version = "0.2.6" +name = "heck" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" -dependencies = [ - "libc", -] +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" @@ -1213,11 +1257,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" -version = "0.14.26" +version = "0.14.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" +checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" dependencies = [ "bytes", "futures-channel", @@ -1293,6 +1343,16 @@ dependencies = [ "hashbrown 0.12.3", ] +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", +] + [[package]] name = "indoc" version = "1.0.9" @@ -1320,16 +1380,16 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ - "hermit-abi 0.3.1", + "hermit-abi", "libc", "windows-sys", ] [[package]] name = "ipnet" -version = "2.7.2" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" +checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" [[package]] name = "itertools" @@ -1340,6 +1400,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -1357,9 +1426,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.63" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -1436,9 +1505,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.146" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libflate" @@ -1570,9 +1639,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memoffset" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" dependencies = [ "autocfg", ] @@ -1697,26 +1766,37 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi", "libc", ] +[[package]] +name = "object" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +dependencies = [ + "memchr", +] + [[package]] name = "object_store" -version = "0.5.6" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec9cd6ca25e796a49fa242876d1c4de36a24a6da5258e9f0bc062dbf5e81c53b" +checksum = "27c776db4f332b571958444982ff641d2531417a326ca368995073b639205d58" dependencies = [ "async-trait", "base64", "bytes", "chrono", "futures", - "itertools", + "humantime", + "hyper", + "itertools 0.10.5", "parking_lot", "percent-encoding", "quick-xml", @@ -1773,9 +1853,9 @@ dependencies = [ [[package]] name = "parquet" -version = "40.0.0" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6a656fcc17e641657c955742c689732684e096f790ff30865d9f8dcc39f7c4a" +checksum = "baab9c36b1c8300b81b4d577d306a0a733f9d34021363098d3548e37757ed6c8" dependencies = [ "ahash", "arrow-array", @@ -1791,7 +1871,7 @@ dependencies = [ "chrono", "flate2", "futures", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "lz4", "num", "num-bigint", @@ -1833,23 +1913,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 1.9.3", ] [[package]] name = "phf" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ "phf_shared", ] [[package]] name = "phf_codegen" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ "phf_generator", "phf_shared", @@ -1857,9 +1937,9 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ "phf_shared", "rand", @@ -1867,9 +1947,9 @@ dependencies = [ [[package]] name = "phf_shared" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ "siphasher", ] @@ -1900,12 +1980,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.6" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1" +checksum = "9825a04601d60621feed79c4e6b56d65db77cdca55cef43b46b0de1096d1c282" dependencies = [ "proc-macro2", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -1916,9 +1996,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.60" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" +checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" dependencies = [ "unicode-ident", ] @@ -1941,7 +2021,7 @@ checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", "heck", - "itertools", + "itertools 0.10.5", "lazy_static", "log", "multimap", @@ -1960,7 +2040,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.5", "proc-macro2", "quote", "syn 1.0.109", @@ -1986,9 +2066,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b1ac5b3731ba34fdaa9785f8d74d17448cd18f30cf19e0c7e7b1fdb5272109" +checksum = "cffef52f74ec3b1a1baf295d9b8fcc3070327aefc39a6d00656b13c1d0b8885c" dependencies = [ "cfg-if", "indoc", @@ -2003,9 +2083,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3" +checksum = "713eccf888fb05f1a96eb78c0dbc51907fee42b3377272dc902eb38985f418d5" dependencies = [ "once_cell", "target-lexicon", @@ -2013,9 +2093,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd4d7c5337821916ea2a1d21d1092e8443cf34879e53a0ac653fbb98f44ff65c" +checksum = "5b2ecbdcfb01cbbf56e179ce969a048fd7305a66d4cdf3303e0da09d69afe4c3" dependencies = [ "libc", "pyo3-build-config", @@ -2023,9 +2103,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d39c55dab3fc5a4b25bbd1ac10a2da452c4aca13bb450f22818a002e29648d" +checksum = "b78fdc0899f2ea781c463679b20cb08af9247febc8d052de941951024cd8aea0" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2035,9 +2115,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97daff08a4c48320587b5224cc98d609e3c27b6d437315bd40b605c98eeb5918" +checksum = "60da7b84f1227c3e2fe7593505de274dcf4c8928b4e0a1c23d551a14e4e80a0f" dependencies = [ "proc-macro2", "quote", @@ -2062,9 +2142,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.28" +version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" dependencies = [ "proc-macro2", ] @@ -2197,6 +2277,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "rustc_version" version = "0.4.0" @@ -2208,9 +2294,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.20" +version = "0.37.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0" +checksum = "62f25693a73057a1b4cb56179dd3c7ea21a7c6c5ee7d85781f5749b46f34b79c" dependencies = [ "bitflags 1.3.2", "errno", @@ -2222,9 +2308,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.1" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c911ba11bc8433e811ce56fde130ccf32f5127cab0e0194e9c68c5a5b671791e" +checksum = "e32ca28af694bc1bbf399c33a516dbdf1c90090b8ab23c2bc24f834aa2247f5f" dependencies = [ "log", "ring", @@ -2234,9 +2320,9 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" +checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" dependencies = [ "base64", ] @@ -2341,7 +2427,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -2357,9 +2443,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.96" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" +checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" dependencies = [ "itoa", "ryu", @@ -2375,7 +2461,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -2392,11 +2478,11 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.21" +version = "0.9.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9d684e3ec7de3bf5466b32bd75303ac16f0736426e5a4e0d6e489559ce1249c" +checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85" dependencies = [ - "indexmap", + "indexmap 2.0.0", "itoa", "ryu", "serde", @@ -2405,9 +2491,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" dependencies = [ "cfg-if", "cpufeatures", @@ -2481,9 +2567,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.34.0" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3706eefb17039056234df6b566b0014f303f867f2656108334a55b8096f59" +checksum = "ca597d77c98894be1f965f2e4e2d2a61575d4998088e655476c73715c54b2b43" dependencies = [ "log", "sqlparser_derive", @@ -2511,8 +2597,14 @@ name = "strum" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" + +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "strum_macros", + "strum_macros 0.25.0", ] [[package]] @@ -2528,11 +2620,24 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "strum_macros" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe9f3bd7d2e45dcc5e265fbb88d6513e4747d8ef9444cf01a533119bce28a157" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.22", +] + [[package]] name = "substrait" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9df5d9e071804204172dc77e707c363f187e7f6566f9c78e5100c9a8f5ea434e" +checksum = "7d3b77ddddd080d1bb5ebfe6b62d1c4e2f33c9f6a4586d5eac5306a08f3d4585" dependencies = [ "git2", "heck", @@ -2546,7 +2651,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.18", + "syn 2.0.22", "typify", "walkdir", ] @@ -2570,9 +2675,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.18" +version = "2.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616" dependencies = [ "proc-macro2", "quote", @@ -2581,9 +2686,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.7" +version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd1ba337640d60c3e96bc6f0638a939b9c9a7f2c316a1598c279828b3d1dc8c5" +checksum = "1b1c7f239eb94671427157bd93b3694320f3668d4e1eff08c7285366fd777fac" [[package]] name = "tempfile" @@ -2616,7 +2721,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -2656,11 +2761,12 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.28.2" +version = "1.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" +checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da" dependencies = [ "autocfg", + "backtrace", "bytes", "libc", "mio", @@ -2680,7 +2786,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -2693,17 +2799,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-stream" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-util" version = "0.7.8" @@ -2738,13 +2833,13 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.24" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74" +checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", ] [[package]] @@ -2791,9 +2886,9 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" -version = "0.0.12" +version = "0.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6658d09e71bfe59e7987dc95ee7f71809fdb5793ab0cdc1503cc0073990484d" +checksum = "be9bb640c0eece20cac2028ebbc2ca1a3d17e3b1ddd98540309c309ed178d158" dependencies = [ "typify-impl", "typify-macro", @@ -2801,9 +2896,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.12" +version = "0.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34d3bb47587b13edf526d6ed02bf360ecefe083ab47a4ef29fc43112828b2bef" +checksum = "5c8d9ecedde2fd77e975c38eeb9ca40b34ad0247b2259c6e6bbd2a8d6cc2444f" dependencies = [ "heck", "log", @@ -2812,16 +2907,16 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.18", + "syn 2.0.22", "thiserror", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.0.12" +version = "0.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3f7e627c18be12d53bc1f261830b9c2763437b6a86ac57293b9085af2d32ffe" +checksum = "c08942cd65d458d2da15777a649cb6400cb545f17964f1ca965583f22e9cc3a9" dependencies = [ "proc-macro2", "quote", @@ -2829,7 +2924,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.18", + "syn 2.0.22", "typify-impl", ] @@ -2897,9 +2992,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.3" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" +checksum = "d023da39d1fde5a8a3fe1f3e01ca9632ada0a63e9797de55a879d6e2236277be" dependencies = [ "getrandom", "serde", @@ -2929,11 +3024,10 @@ dependencies = [ [[package]] name = "want" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" dependencies = [ - "log", "try-lock", ] @@ -2945,9 +3039,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2955,24 +3049,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.36" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" dependencies = [ "cfg-if", "js-sys", @@ -2982,9 +3076,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2992,22 +3086,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.22", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-streams" @@ -3024,9 +3118,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.63" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" dependencies = [ "js-sys", "wasm-bindgen", @@ -3113,9 +3207,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.48.0" +version = "0.48.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", diff --git a/Cargo.toml b/Cargo.toml index b7f0db42e..5ca57c68d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "26.0.0" +version = "27.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -35,27 +35,27 @@ protoc = [ "datafusion-substrait/protoc" ] [dependencies] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" -pyo3 = { version = "0.18.1", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { version = "26.0.0" , features = ["pyarrow", "avro"] } -datafusion-common = { version = "26.0.0", features = ["pyarrow"] } -datafusion-expr = "26.0.0" -datafusion-optimizer = "26.0.0" -datafusion-sql = "26.0.0" -datafusion-substrait = "26.0.0" +pyo3 = { version = "0.19", features = ["extension-module", "abi3", "abi3-py38"] } +datafusion = { version = "27.0.0" , features = ["pyarrow", "avro"] } +datafusion-common = { version = "27.0.0", features = ["pyarrow"] } +datafusion-expr = "27.0.0" +datafusion-optimizer = "27.0.0" +datafusion-sql = "27.0.0" +datafusion-substrait = "27.0.0" prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" futures = "0.3" -object_store = { version = "0.5.4", features = ["aws", "gcp", "azure"] } +object_store = { version = "0.6.1", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.7.1" syn = "2.0.11" url = "2.2" [build-dependencies] -pyo3-build-config = "0.18.3" +pyo3-build-config = "0.19.0" [lib] name = "datafusion_python" diff --git a/datafusion/tests/test_store.py b/datafusion/tests/test_store.py index 9174c246f..3ffd9ee49 100644 --- a/datafusion/tests/test_store.py +++ b/datafusion/tests/test_store.py @@ -37,7 +37,7 @@ def ctx(local): def test_read_parquet(ctx): ctx.register_parquet( "test", - f"file://{os.getcwd()}/testing/data/parquet", + f"file://{os.getcwd()}/parquet/data/alltypes_plain.parquet", [], True, ".parquet", diff --git a/src/catalog.rs b/src/catalog.rs index 76521e9af..94faea067 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -24,8 +24,8 @@ use pyo3::prelude::*; use crate::errors::DataFusionError; use crate::utils::wait_for_future; use datafusion::{ - arrow::pyarrow::PyArrowConvert, - catalog::{catalog::CatalogProvider, schema::SchemaProvider}, + arrow::pyarrow::ToPyArrow, + catalog::{schema::SchemaProvider, CatalogProvider}, datasource::{TableProvider, TableType}, }; diff --git a/src/context.rs b/src/context.rs index b603c010e..cf133d79a 100644 --- a/src/context.rs +++ b/src/context.rs @@ -40,8 +40,8 @@ use crate::utils::{get_tokio_runtime, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::datasource::datasource::TableProvider; use datafusion::datasource::MemTable; +use datafusion::datasource::TableProvider; use datafusion::execution::context::{SessionConfig, SessionContext, TaskContext}; use datafusion::execution::disk_manager::DiskManagerConfig; use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool}; diff --git a/src/dataframe.rs b/src/dataframe.rs index 7e1ce03b5..00c12e854 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -20,7 +20,7 @@ use crate::sql::logical::PyLogicalPlan; use crate::utils::wait_for_future; use crate::{errors::DataFusionError, expr::PyExpr}; use datafusion::arrow::datatypes::Schema; -use datafusion::arrow::pyarrow::{PyArrowConvert, PyArrowType}; +use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::arrow::util::pretty; use datafusion::dataframe::DataFrame; use datafusion::prelude::*; diff --git a/src/dataset.rs b/src/dataset.rs index 0a2c7f50f..713610c51 100644 --- a/src/dataset.rs +++ b/src/dataset.rs @@ -28,10 +28,10 @@ use async_trait::async_trait; use datafusion::arrow::datatypes::SchemaRef; use datafusion::arrow::pyarrow::PyArrowType; -use datafusion::datasource::datasource::TableProviderFilterPushDown; use datafusion::datasource::{TableProvider, TableType}; use datafusion::error::{DataFusionError, Result as DFResult}; use datafusion::execution::context::SessionState; +use datafusion::logical_expr::TableProviderFilterPushDown; use datafusion::physical_plan::ExecutionPlan; use datafusion_expr::Expr; diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 859678856..b9d348f79 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -239,7 +239,7 @@ impl ExecutionPlan for DatasetExec { Python::with_gil(|py| { let number_of_fragments = self.fragments.as_ref(py).len(); match t { - DisplayFormatType::Default => { + DisplayFormatType::Default | DisplayFormatType::Verbose => { let projected_columns: Vec = self .schema .fields() diff --git a/src/physical_plan.rs b/src/physical_plan.rs index 6f02cefaa..ab783221b 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -51,7 +51,7 @@ impl PyExecutionPlan { pub fn display_indent(&self) -> String { let d = displayable(self.plan.as_ref()); - format!("{}", d.indent()) + format!("{}", d.indent(false)) } fn __repr__(&self) -> String { diff --git a/src/record_batch.rs b/src/record_batch.rs index 15b70e8ce..aa3a392da 100644 --- a/src/record_batch.rs +++ b/src/record_batch.rs @@ -16,7 +16,7 @@ // under the License. use crate::utils::wait_for_future; -use datafusion::arrow::pyarrow::PyArrowConvert; +use datafusion::arrow::pyarrow::ToPyArrow; use datafusion::arrow::record_batch::RecordBatch; use datafusion::physical_plan::SendableRecordBatchStream; use futures::StreamExt; diff --git a/src/udaf.rs b/src/udaf.rs index 756fa5659..ae2e81a08 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -21,10 +21,10 @@ use pyo3::{prelude::*, types::PyTuple}; use datafusion::arrow::array::{Array, ArrayRef}; use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::pyarrow::{PyArrowConvert, PyArrowType}; +use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::common::ScalarValue; use datafusion::error::{DataFusionError, Result}; -use datafusion_expr::{create_udaf, Accumulator, AccumulatorFunctionImplementation, AggregateUDF}; +use datafusion_expr::{create_udaf, Accumulator, AccumulatorFactoryFunction, AggregateUDF}; use crate::expr::PyExpr; use crate::utils::parse_volatility; @@ -95,7 +95,7 @@ impl Accumulator for RustAccumulator { } } -pub fn to_rust_accumulator(accum: PyObject) -> AccumulatorFunctionImplementation { +pub fn to_rust_accumulator(accum: PyObject) -> AccumulatorFactoryFunction { Arc::new(move |_| -> Result> { let accum = Python::with_gil(|py| { accum diff --git a/src/udf.rs b/src/udf.rs index dcb9cd572..417e5f327 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -21,7 +21,7 @@ use pyo3::{prelude::*, types::PyTuple}; use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef}; use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::pyarrow::{PyArrowConvert, PyArrowType}; +use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; use datafusion::error::DataFusionError; use datafusion::physical_plan::functions::make_scalar_function; use datafusion::physical_plan::udf::ScalarUDF; From 58bdbd88d52a57b98a50eb5f5155b7ee3ecce47e Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sat, 22 Jul 2023 10:05:06 -0400 Subject: [PATCH 055/413] File based input utils (#433) * checkpoint commit * Introduce BaseSessionContext abstract class * Introduce abstract methods for CRUD schema operations * Clean up schema.rs file * Introduce CRUD methods for table instances * Add function to drop_table * Add schema_name to drop_table function * remove unused parameter in SqlTable new * Update function to allow for modifying existing tables * Add functionality for generating SqlTable information from input sources * Add functionality for generating SqlTable information from input sources * Adding a utility method to convert arrow type strings to DataType instances * Add method to DataTypeMap for getting the DataType from an Arrow type string instance * Add function fqn which retrieves the table catalog, schema, and name from the TableReference * Add datatype mapping for parquet int96 -> nanosecond timestamp * Cargo check fixes --- Cargo.lock | 255 +++++++++++++++++---------------- datafusion/context.py | 5 +- datafusion/input/__init__.py | 22 +++ datafusion/input/base.py | 43 ++++++ datafusion/input/location.py | 88 ++++++++++++ datafusion/tests/test_input.py | 33 +++++ src/common/data_type.rs | 56 +++++++- src/expr/table_scan.rs | 26 +++- 8 files changed, 398 insertions(+), 130 deletions(-) create mode 100644 datafusion/input/__init__.py create mode 100644 datafusion/input/base.py create mode 100644 datafusion/input/location.py create mode 100644 datafusion/tests/test_input.py diff --git a/Cargo.lock b/Cargo.lock index 21124d4fd..a6c80f61e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,9 +62,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" [[package]] name = "android-tzdata" @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.71" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" [[package]] name = "apache-avro" @@ -343,9 +343,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0122885821398cc923ece939e24d1056a2384ee719432397fa9db87230ff11" +checksum = "62b74f44609f0f91493e3082d3734d98497e094777144380ea4db9f9905dd5b6" dependencies = [ "bzip2", "flate2", @@ -367,18 +367,18 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] name = "async-trait" -version = "0.1.68" +version = "0.1.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" +checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -440,9 +440,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888" +checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" dependencies = [ "arrayref", "arrayvec", @@ -606,9 +606,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.2.6" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" [[package]] name = "core-foundation-sys" @@ -618,9 +618,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03e69e28e9f7f77debdedbaafa2866e1de9ba56df55a8bd7cfc724c25a09987c" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" dependencies = [ "libc", ] @@ -673,12 +673,12 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.4.0" +version = "5.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +checksum = "6943ae99c34386c84a470c499d3414f66502a41340aa895406e0d2e4a207b91d" dependencies = [ "cfg-if", - "hashbrown 0.12.3", + "hashbrown 0.14.0", "lock_api", "once_cell", "parking_lot_core", @@ -782,7 +782,7 @@ dependencies = [ "lazy_static", "sqlparser", "strum 0.25.0", - "strum_macros 0.25.0", + "strum_macros 0.25.1", ] [[package]] @@ -857,7 +857,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.22", + "syn 2.0.26", "tokio", "url", "uuid", @@ -925,9 +925,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "dyn-clone" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b0cf012f1230e43cd00ebb729c6bb58707ecfa8ad08b52ef3a4ccd2697fc30" +checksum = "304e6508efa593091e97a9abbc10f90aa7ca635b6d2784feff3c89d41dd12272" [[package]] name = "either" @@ -946,9 +946,9 @@ dependencies = [ [[package]] name = "equivalent" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" @@ -1077,7 +1077,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -1219,9 +1219,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] name = "http" @@ -1289,10 +1289,11 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7" +checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" dependencies = [ + "futures-util", "http", "hyper", "rustls", @@ -1411,9 +1412,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.6" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "jobserver" @@ -1689,9 +1690,9 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" [[package]] name = "num" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ "num-bigint", "num-complex", @@ -1896,9 +1897,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "percent-encoding" @@ -1956,9 +1957,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57" [[package]] name = "pin-utils" @@ -1980,12 +1981,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9825a04601d60621feed79c4e6b56d65db77cdca55cef43b46b0de1096d1c282" +checksum = "92139198957b410250d43fad93e630d956499a625c527eda65175c8680f83387" dependencies = [ "proc-macro2", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -1996,9 +1997,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.63" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] @@ -2066,9 +2067,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.19.0" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cffef52f74ec3b1a1baf295d9b8fcc3070327aefc39a6d00656b13c1d0b8885c" +checksum = "ffb88ae05f306b4bfcde40ac4a51dc0b05936a9207a4b75b798c7729c4258a59" dependencies = [ "cfg-if", "indoc", @@ -2083,9 +2084,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.19.0" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "713eccf888fb05f1a96eb78c0dbc51907fee42b3377272dc902eb38985f418d5" +checksum = "554db24f0b3c180a9c0b1268f91287ab3f17c162e15b54caaae5a6b3773396b0" dependencies = [ "once_cell", "target-lexicon", @@ -2093,9 +2094,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.19.0" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b2ecbdcfb01cbbf56e179ce969a048fd7305a66d4cdf3303e0da09d69afe4c3" +checksum = "922ede8759e8600ad4da3195ae41259654b9c55da4f7eec84a0ccc7d067a70a4" dependencies = [ "libc", "pyo3-build-config", @@ -2103,9 +2104,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.19.0" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b78fdc0899f2ea781c463679b20cb08af9247febc8d052de941951024cd8aea0" +checksum = "8a5caec6a1dd355964a841fcbeeb1b89fe4146c87295573f94228911af3cc5a2" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2115,9 +2116,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.19.0" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60da7b84f1227c3e2fe7593505de274dcf4c8928b4e0a1c23d551a14e4e80a0f" +checksum = "e0b78ccbb160db1556cdb6fd96c50334c5d4ec44dc5e0a968d0a1208fa0efa8b" dependencies = [ "proc-macro2", "quote", @@ -2142,9 +2143,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.29" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" +checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" dependencies = [ "proc-macro2", ] @@ -2190,9 +2191,21 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.4" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" +checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" dependencies = [ "aho-corasick", "memchr", @@ -2201,9 +2214,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "regress" @@ -2294,9 +2307,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.21" +version = "0.37.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f25693a73057a1b4cb56179dd3c7ea21a7c6c5ee7d85781f5749b46f34b79c" +checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" dependencies = [ "bitflags 1.3.2", "errno", @@ -2308,9 +2321,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.2" +version = "0.21.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e32ca28af694bc1bbf399c33a516dbdf1c90090b8ab23c2bc24f834aa2247f5f" +checksum = "79ea77c539259495ce8ca47f53e66ae0330a8819f67e23ac96ca02f50e7b7d36" dependencies = [ "log", "ring", @@ -2329,9 +2342,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.100.1" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" +checksum = "15f36a6828982f422756984e47912a7a51dcbc2a197aa791158f8ca61cd8204e" dependencies = [ "ring", "untrusted", @@ -2339,15 +2352,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.13" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -2384,9 +2397,9 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" @@ -2400,34 +2413,34 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "seq-macro" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.164" +version = "1.0.173" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +checksum = "e91f70896d6720bc714a4a57d22fc91f1db634680e65c8efe13323f1fa38d53f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.164" +version = "1.0.173" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +checksum = "a6250dde8342e0232232be9ca3db7aa40aceb5a3e5dd9bddbc00d99a007cde49" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -2443,9 +2456,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.99" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" +checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" dependencies = [ "itoa", "ryu", @@ -2461,7 +2474,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -2478,9 +2491,9 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.22" +version = "0.9.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85" +checksum = "bd5f51e3fdb5b9cdd1577e1cb7a733474191b1aca6a72c2e50913241632c1180" dependencies = [ "indexmap 2.0.0", "itoa", @@ -2517,15 +2530,15 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" [[package]] name = "snafu" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0656e7e3ffb70f6c39b3c2a86332bb74aa3c679da781642590f3c1118c5045" +checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", "snafu-derive", @@ -2533,9 +2546,9 @@ dependencies = [ [[package]] name = "snafu-derive" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "475b3bbe5245c26f2d8a6f62d67c1f30eb9fffeccee721c45d162c3ebbdf81b2" +checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" dependencies = [ "heck", "proc-macro2", @@ -2604,7 +2617,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "strum_macros 0.25.0", + "strum_macros 0.25.1", ] [[package]] @@ -2622,15 +2635,15 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.25.0" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9f3bd7d2e45dcc5e265fbb88d6513e4747d8ef9444cf01a533119bce28a157" +checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -2651,7 +2664,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.22", + "syn 2.0.26", "typify", "walkdir", ] @@ -2675,9 +2688,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.22" +version = "2.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616" +checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" dependencies = [ "proc-macro2", "quote", @@ -2686,9 +2699,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.8" +version = "0.12.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1c7f239eb94671427157bd93b3694320f3668d4e1eff08c7285366fd777fac" +checksum = "1d2faeef5759ab89935255b1a4cd98e0baf99d1085e37d36599c625dac49ae8e" [[package]] name = "tempfile" @@ -2706,22 +2719,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -2786,7 +2799,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -2839,7 +2852,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", ] [[package]] @@ -2907,7 +2920,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.22", + "syn 2.0.26", "thiserror", "unicode-ident", ] @@ -2924,7 +2937,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.22", + "syn 2.0.26", "typify-impl", ] @@ -2936,9 +2949,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-normalization" @@ -2969,9 +2982,9 @@ checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" [[package]] name = "unsafe-libyaml" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6" +checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" [[package]] name = "untrusted" @@ -2992,9 +3005,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d023da39d1fde5a8a3fe1f3e01ca9632ada0a63e9797de55a879d6e2236277be" +checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" dependencies = [ "getrandom", "serde", @@ -3058,7 +3071,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", "wasm-bindgen-shared", ] @@ -3092,7 +3105,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.26", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3303,18 +3316,18 @@ dependencies = [ [[package]] name = "zstd" -version = "0.12.3+zstd.1.5.2" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "6.0.5+zstd.1.5.4" +version = "6.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" dependencies = [ "libc", "zstd-sys", diff --git a/datafusion/context.py b/datafusion/context.py index 6292177f7..30602402c 100644 --- a/datafusion/context.py +++ b/datafusion/context.py @@ -16,7 +16,7 @@ # under the License. from abc import ABC, abstractmethod -from typing import Dict, List +from typing import Any, Dict, List from datafusion.common import SqlSchema, SqlTable @@ -79,8 +79,9 @@ def show_schemas(self, **kwargs) -> Dict[str, SqlSchema]: @abstractmethod def create_table( self, + schema_name: str, table_name: str, - schema_name: str = None, + input_source: Any, **kwargs, ): """ diff --git a/datafusion/input/__init__.py b/datafusion/input/__init__.py new file mode 100644 index 000000000..27e39b8ca --- /dev/null +++ b/datafusion/input/__init__.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .location import LocationInputPlugin + +__all__ = [ + LocationInputPlugin, +] diff --git a/datafusion/input/base.py b/datafusion/input/base.py new file mode 100644 index 000000000..fb1207896 --- /dev/null +++ b/datafusion/input/base.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC, abstractmethod +from typing import Any + +from datafusion.common import SqlTable + + +class BaseInputSource(ABC): + """ + If a consuming library would like to provider their own InputSource + this is the class they should extend to write their own. Once + completed the Plugin InputSource can be registered with the + SessionContext to ensure that it will be used in order + to obtain the SqlTable information from the custom datasource. + """ + + @abstractmethod + def is_correct_input( + self, input_item: Any, table_name: str, **kwargs + ) -> bool: + pass + + @abstractmethod + def build_table( + self, input_item: Any, table_name: str, **kwarg + ) -> SqlTable: + pass diff --git a/datafusion/input/location.py b/datafusion/input/location.py new file mode 100644 index 000000000..121407581 --- /dev/null +++ b/datafusion/input/location.py @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from typing import Any + +from datafusion.common import DataTypeMap, SqlTable +from datafusion.input.base import BaseInputSource + + +class LocationInputPlugin(BaseInputSource): + """ + Input Plugin for everything, which can be read + in from a file (on disk, remote etc.) + """ + + def is_correct_input(self, input_item: Any, table_name: str, **kwargs): + return isinstance(input_item, str) + + def build_table( + self, + input_file: str, + table_name: str, + **kwargs, + ) -> SqlTable: + _, extension = os.path.splitext(input_file) + format = extension.lstrip(".").lower() + num_rows = 0 # Total number of rows in the file. Used for statistics + columns = [] + + if format == "parquet": + import pyarrow.parquet as pq + + # Read the Parquet metadata + metadata = pq.read_metadata(input_file) + num_rows = metadata.num_rows + + # Iterate through the schema and build the SqlTable + print(f"Metadata Schema: {metadata.schema}") + for col in metadata.schema: + columns.append( + ( + col.name, + DataTypeMap.from_parquet_type_str(col.physical_type), + ) + ) + + elif format == "csv": + import csv + + # Consume header row and count number of rows for statistics. + # TODO: Possibly makes sense to have the eager number of rows + # calculated as a configuration since you must read the entire file + # to get that information. However, this should only be occuring + # at table creation time and therefore shouldn't + # slow down query performance. + with open(input_file, "r") as file: + reader = csv.reader(file) + header_row = next(reader) + print(header_row) + for _ in reader: + num_rows += 1 + + # TODO: Need to actually consume this row into resonable columns + raise RuntimeError( + "TODO: Currently unable to support CSV input files." + ) + else: + raise RuntimeError( + f"Input of format: `{format}` is currently not supported.\ + Only Parquet and CSV." + ) + + return SqlTable(table_name, columns, num_rows, input_file) diff --git a/datafusion/tests/test_input.py b/datafusion/tests/test_input.py new file mode 100644 index 000000000..1e2ef4166 --- /dev/null +++ b/datafusion/tests/test_input.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from datafusion.input.location import LocationInputPlugin + + +def test_location_input(): + location_input = LocationInputPlugin() + + cwd = os.getcwd() + input_file = ( + cwd + "/testing/data/parquet/generated_simple_numerics/blogs.parquet" + ) + table_name = "blog" + tbl = location_input.build_table(input_file, table_name) + assert "blog" == tbl.name + assert 3 == len(tbl.columns) + assert "blogs.parquet" in tbl.filepath diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 85d2febb5..d7db138a4 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -17,7 +17,7 @@ use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit}; use datafusion_common::{DataFusionError, ScalarValue}; -use pyo3::prelude::*; +use pyo3::{exceptions::PyValueError, prelude::*}; use crate::errors::py_datafusion_err; @@ -303,12 +303,43 @@ impl DataTypeMap { } } + #[staticmethod] + #[pyo3(name = "from_parquet_type_str")] + /// When using pyarrow.parquet.read_metadata().schema.column(x).physical_type you are presented + /// with a String type for schema rather than an object type. Here we make a best effort + /// to convert that to a physical type. + pub fn py_map_from_parquet_type_str(parquet_str_type: String) -> PyResult { + let arrow_dtype = match parquet_str_type.to_lowercase().as_str() { + "boolean" => Ok(DataType::Boolean), + "int32" => Ok(DataType::Int32), + "int64" => Ok(DataType::Int64), + "int96" => { + // Int96 is an old datatype that is now deprecated. We convert to nanosecond timestamp + Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) + } + "float" => Ok(DataType::Float32), + "double" => Ok(DataType::Float64), + _ => Err(PyValueError::new_err(format!( + "Unable to determine Arrow Data Type from Parquet String type: {:?}", + parquet_str_type + ))), + }; + DataTypeMap::map_from_arrow_type(&arrow_dtype?) + } + #[staticmethod] #[pyo3(name = "arrow")] pub fn py_map_from_arrow_type(arrow_type: &PyDataType) -> PyResult { DataTypeMap::map_from_arrow_type(&arrow_type.data_type) } + #[staticmethod] + #[pyo3(name = "arrow_str")] + pub fn py_map_from_arrow_type_str(arrow_type_str: String) -> PyResult { + let data_type = PyDataType::py_map_from_arrow_type_str(arrow_type_str); + DataTypeMap::map_from_arrow_type(&data_type?.data_type) + } + #[staticmethod] #[pyo3(name = "sql")] pub fn py_map_from_sql_type(sql_type: &SqlType) -> PyResult { @@ -558,6 +589,29 @@ pub struct PyDataType { pub data_type: DataType, } +impl PyDataType { + /// There are situations when obtaining dtypes on the Python side where the Arrow type + /// is presented as a String rather than an actual DataType. This function is used to + /// convert that String to a DataType for the Python side to use. + pub fn py_map_from_arrow_type_str(arrow_str_type: String) -> PyResult { + let arrow_dtype = match arrow_str_type.to_lowercase().as_str() { + "boolean" => Ok(DataType::Boolean), + "int32" => Ok(DataType::Int32), + "int64" => Ok(DataType::Int64), + "float" => Ok(DataType::Float32), + "double" => Ok(DataType::Float64), + "float64" => Ok(DataType::Float64), + _ => Err(PyValueError::new_err(format!( + "Unable to determine Arrow Data Type from Arrow String type: {:?}", + arrow_str_type + ))), + }; + Ok(PyDataType { + data_type: arrow_dtype?, + }) + } +} + impl From for DataType { fn from(data_type: PyDataType) -> DataType { data_type.data_type diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs index ac848d9eb..8fafd12ee 100644 --- a/src/expr/table_scan.rs +++ b/src/expr/table_scan.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use datafusion_common::TableReference; use datafusion_expr::logical_plan::TableScan; use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; @@ -71,12 +72,25 @@ impl PyTableScan { Ok(format!("{}", self.table_scan.table_name)) } - /// TODO: Bindings for `TableSource` need to exist first. Left as a - /// placeholder to display intention to add when able to. - // #[pyo3(name = "source")] - // fn py_source(&self) -> PyResult> { - // Ok(self.table_scan.source) - // } + #[pyo3(name = "fqn")] + fn fqn(&self) -> PyResult<(Option, Option, String)> { + let table_ref: TableReference = self.table_scan.table_name.clone(); + Ok(match table_ref { + TableReference::Bare { table } => (None, None, table.to_string()), + TableReference::Partial { schema, table } => { + (None, Some(schema.to_string()), table.to_string()) + } + TableReference::Full { + catalog, + schema, + table, + } => ( + Some(catalog.to_string()), + Some(schema.to_string()), + table.to_string(), + ), + }) + } /// The column indexes that should be. Note if this is empty then /// all columns should be read by the `TableProvider`. This function From 5793db39bd785479209bcbbce6e1740fc7e08215 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 22 Jul 2023 19:43:04 +0100 Subject: [PATCH 056/413] Upgrade to 28.0.0-rc1 (#434) --- Cargo.lock | 252 +++++++++++++---------------- Cargo.toml | 14 +- datafusion/tests/test_functions.py | 4 +- src/common/data_type.rs | 7 + src/dataset_exec.rs | 13 +- src/expr.rs | 35 ++-- src/expr/projection.rs | 4 +- src/functions.rs | 3 +- 8 files changed, 153 insertions(+), 179 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a6c80f61e..a491051f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -128,9 +128,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773d18d72cd290f3f9e2149a714c8ac404b6c3fd614c684f0015449940fca899" +checksum = "2feeebd77b34b0bc88f224e06d01c27da4733997cc4789a4e056196656cdc59a" dependencies = [ "ahash", "arrow-arith", @@ -151,9 +151,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93bc0da4b22ba63807fa2a74998e21209179c93c67856ae65d9218b81f3ef918" +checksum = "7173f5dc49c0ecb5135f52565af33afd3fdc9a12d13bd6f9973e8b96305e4b2e" dependencies = [ "arrow-array", "arrow-buffer", @@ -166,9 +166,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea9a0fd21121304cad96f307c938d861cb1e7f0c151b93047462cd9817d760fb" +checksum = "63d7ea725f7d1f8bb2cffc53ef538557e95fc802e217d5be25122d402e22f3d0" dependencies = [ "ahash", "arrow-buffer", @@ -183,9 +183,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ce342ecf5971004e23cef8b5fb3bacd2bbc48a381464144925074e1472e9eb" +checksum = "bdbe439e077f484e5000b9e1d47b5e4c0d15f2b311a8f5bcc682553d5d67a722" dependencies = [ "half", "num", @@ -193,9 +193,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b94a0ce7d27abbb02e2ee4db770f593127610f57b32625b0bc6a1a90d65f085" +checksum = "93913cc14875770aa1eef5e310765e855effa352c094cb1c7c00607d0f37b4e1" dependencies = [ "arrow-array", "arrow-buffer", @@ -211,9 +211,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3be10a00a43c4bf0d243c070754ebdde17c5d576b4928d9c3efbe3005a3853" +checksum = "ef55b67c55ed877e6fe7b923121c19dae5e31ca70249ea2779a17b58fb0fbd9a" dependencies = [ "arrow-array", "arrow-buffer", @@ -230,9 +230,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d9a83dad6a53d6907765106d3bc61d6d9d313cfe1751701b3ef0948e7283dc2" +checksum = "d4f4f4a3c54614126a71ab91f6631c9743eb4643d6e9318b74191da9dc6e028b" dependencies = [ "arrow-buffer", "arrow-schema", @@ -242,9 +242,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a46da5e438a854e0386b38774da88a98782c0973c6dbc5c949ca4e02faf9b016" +checksum = "d41a3659f984a524ef1c2981d43747b24d8eec78e2425267fcd0ef34ce71cd18" dependencies = [ "arrow-array", "arrow-buffer", @@ -256,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f27a1fbc76553ad92dc1a9583e56b7058d8c418c4089b0b689f5b87e2da5e1" +checksum = "10b95faa95a378f56ef32d84cc0104ea998c39ef7cd1faaa6b4cebf8ea92846d" dependencies = [ "arrow-array", "arrow-buffer", @@ -267,7 +267,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 1.9.3", + "indexmap 2.0.0", "lexical-core", "num", "serde", @@ -276,9 +276,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2373661f6c2233e18f6fa69c40999a9440231d1e8899be8bbbe73c7e24aa3b4" +checksum = "c68549a4284d9f8b39586afb8d5ff8158b8f0286353a4844deb1d11cf1ba1f26" dependencies = [ "arrow-array", "arrow-buffer", @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "377cd5158b7de4034a175e296726c40c3236e65d71d90a5dab2fb4fab526a8f4" +checksum = "0a75a4a757afc301ce010adadff54d79d66140c4282ed3de565f6ccb716a5cf3" dependencies = [ "ahash", "arrow-array", @@ -306,18 +306,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba9ed245bd2d7d97ad1457cb281d4296e8b593588758b8fec6d67b2b2b0f2265" +checksum = "2bebcb57eef570b15afbcf2d07d813eb476fde9f6dd69c81004d6476c197e87e" dependencies = [ "bitflags 2.3.3", ] [[package]] name = "arrow-select" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc9bd6aebc565b1d04bae64a0f4dda3abc677190eb7d960471b1b20e1cebed0" +checksum = "f6e2943fa433a48921e914417173816af64eef61c0a3d448280e6c40a62df221" dependencies = [ "arrow-array", "arrow-buffer", @@ -328,15 +328,16 @@ dependencies = [ [[package]] name = "arrow-string" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cf2baea2ef53787332050decf7d71aca836a352e188c8ad062892405955d2b" +checksum = "bbc92ed638851774f6d7af1ad900b92bc1486746497511868b4298fcbcfa35af" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "num", "regex", "regex-syntax", ] @@ -367,18 +368,18 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] name = "async-trait" -version = "0.1.71" +version = "0.1.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf" +checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -686,9 +687,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96f6e4eb10bd3e6b709686858246466983e8c5354a928ff77ee34919aa60d00" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" dependencies = [ "ahash", "apache-avro", @@ -706,13 +706,13 @@ dependencies = [ "datafusion-expr", "datafusion-optimizer", "datafusion-physical-expr", - "datafusion-row", "datafusion-sql", "flate2", "futures", "glob", + "half", "hashbrown 0.14.0", - "indexmap 1.9.3", + "indexmap 2.0.0", "itertools 0.11.0", "lazy_static", "log", @@ -737,9 +737,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e5fddcc0dd49bbe199e43aa406f39c46c790bb2a43c7b36a478e5f3f971235" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" dependencies = [ "apache-avro", "arrow", @@ -754,9 +753,8 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfd50b6cb17acc78d2473c0d28014b8fd4e2e0a2c067c07645d6547b33b0aeeb" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" dependencies = [ "dashmap", "datafusion-common", @@ -772,9 +770,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1a35dc2cd9eac18063d636f7ddf4f090fe1f34284d80192ac7ade38cc3c6991" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" dependencies = [ "ahash", "arrow", @@ -787,9 +784,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5043afeb45ec1c0f45519e1eed6a477f2d30732e8f975d9cf9a75fba0ca716" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" dependencies = [ "arrow", "async-trait", @@ -805,27 +801,28 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cc892a24f4b829ee7718ad3950884c0346dbdf1517f3df153af4bcf54d8ca4d" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", "arrow-schema", + "base64", "blake2", "blake3", "chrono", "datafusion-common", "datafusion-expr", - "datafusion-row", "half", "hashbrown 0.14.0", - "indexmap 1.9.3", + "hex", + "indexmap 2.0.0", "itertools 0.11.0", "lazy_static", "libc", + "log", "md-5", "paste", "petgraph", @@ -838,7 +835,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "27.0.0" +version = "28.0.0" dependencies = [ "async-trait", "datafusion", @@ -857,29 +854,16 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.26", + "syn 2.0.27", "tokio", "url", "uuid", ] -[[package]] -name = "datafusion-row" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce75c660bbddfdd254109e668e5b5bd69df31ea26e3768e15cef0c68015e650e" -dependencies = [ - "arrow", - "datafusion-common", - "paste", - "rand", -] - [[package]] name = "datafusion-sql" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49cab87e4933a452e0b7b3f0cbd0e760daf7d33fb54d09d70d3ffba229eaa652" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" dependencies = [ "arrow", "arrow-schema", @@ -891,9 +875,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "27.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba77d22232053f6cdd98bd6f5328940850844450253f25b8c50bfc5199c505d4" +version = "28.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" dependencies = [ "async-recursion", "chrono", @@ -931,9 +914,9 @@ checksum = "304e6508efa593091e97a9abbc10f90aa7ca635b6d2784feff3c89d41dd12272" [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "encoding_rs" @@ -973,12 +956,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "1.9.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] +checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" [[package]] name = "fixedbitset" @@ -1077,7 +1057,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -1223,6 +1203,12 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "0.2.9" @@ -1360,32 +1346,12 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - [[package]] name = "integer-encoding" version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" -[[package]] -name = "io-lifetimes" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys", -] - [[package]] name = "ipnet" version = "2.8.0" @@ -1572,9 +1538,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.8" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" +checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" [[package]] name = "lock_api" @@ -1757,9 +1723,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", "libm", @@ -1854,9 +1820,9 @@ dependencies = [ [[package]] name = "parquet" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baab9c36b1c8300b81b4d577d306a0a733f9d34021363098d3548e37757ed6c8" +checksum = "ec7267a9607c3f955d4d0ac41b88a67cecc0d8d009173ad3da390699a6cb3750" dependencies = [ "ahash", "arrow-array", @@ -1981,12 +1947,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92139198957b410250d43fad93e630d956499a625c527eda65175c8680f83387" +checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" dependencies = [ "proc-macro2", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -2307,13 +2273,12 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.23" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" +checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.3.3", "errno", - "io-lifetimes", "libc", "linux-raw-sys", "windows-sys", @@ -2425,22 +2390,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.173" +version = "1.0.174" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91f70896d6720bc714a4a57d22fc91f1db634680e65c8efe13323f1fa38d53f" +checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.173" +version = "1.0.174" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6250dde8342e0232232be9ca3db7aa40aceb5a3e5dd9bddbc00d99a007cde49" +checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -2474,7 +2439,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -2491,9 +2456,9 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.24" +version = "0.9.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd5f51e3fdb5b9cdd1577e1cb7a733474191b1aca6a72c2e50913241632c1180" +checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" dependencies = [ "indexmap 2.0.0", "itoa", @@ -2643,14 +2608,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] name = "substrait" -version = "0.11.0" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d3b77ddddd080d1bb5ebfe6b62d1c4e2f33c9f6a4586d5eac5306a08f3d4585" +checksum = "2ac1ce8315086b127ca0abf162c62279550942bb26ebf7946fe17fe114446472" dependencies = [ "git2", "heck", @@ -2664,7 +2629,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.26", + "syn 2.0.27", "typify", "walkdir", ] @@ -2688,9 +2653,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.26" +version = "2.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" +checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" dependencies = [ "proc-macro2", "quote", @@ -2705,11 +2670,10 @@ checksum = "1d2faeef5759ab89935255b1a4cd98e0baf99d1085e37d36599c625dac49ae8e" [[package]] name = "tempfile" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" +checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" dependencies = [ - "autocfg", "cfg-if", "fastrand", "redox_syscall", @@ -2719,22 +2683,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -2799,7 +2763,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -2852,7 +2816,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -2920,7 +2884,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.26", + "syn 2.0.27", "thiserror", "unicode-ident", ] @@ -2937,7 +2901,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.26", + "syn 2.0.27", "typify-impl", ] @@ -3071,7 +3035,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", "wasm-bindgen-shared", ] @@ -3105,7 +3069,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index 5ca57c68d..a460d2ccc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "27.0.0" +version = "28.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.19", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { version = "27.0.0" , features = ["pyarrow", "avro"] } -datafusion-common = { version = "27.0.0", features = ["pyarrow"] } -datafusion-expr = "27.0.0" -datafusion-optimizer = "27.0.0" -datafusion-sql = "27.0.0" -datafusion-substrait = "27.0.0" +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1", features = ["pyarrow", "avro"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1", features = ["pyarrow"] } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1" } +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1" } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1" } prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index bea580859..ec334747c 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -382,11 +382,11 @@ def test_temporal_functions(df): assert result.column(1) == pa.array([2022, 2027, 2020], type=pa.float64()) assert result.column(2) == pa.array( [datetime(2022, 12, 1), datetime(2027, 6, 1), datetime(2020, 7, 1)], - type=pa.timestamp("ns"), + type=pa.timestamp("us"), ) assert result.column(3) == pa.array( [datetime(2022, 12, 31), datetime(2027, 6, 26), datetime(2020, 7, 2)], - type=pa.timestamp("ns"), + type=pa.timestamp("us"), ) assert result.column(4) == pa.array( [ diff --git a/src/common/data_type.rs b/src/common/data_type.rs index d7db138a4..fe5f6f060 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -288,6 +288,13 @@ impl DataTypeMap { ScalarValue::List(_val, field_ref) => Ok(DataType::List(field_ref.to_owned())), ScalarValue::Struct(_, fields) => Ok(DataType::Struct(fields.to_owned())), ScalarValue::FixedSizeBinary(size, _) => Ok(DataType::FixedSizeBinary(*size)), + ScalarValue::Fixedsizelist(_, field_ref, size) => { + Ok(DataType::FixedSizeList(field_ref.to_owned(), *size)) + } + ScalarValue::DurationSecond(_) => Ok(DataType::Duration(TimeUnit::Second)), + ScalarValue::DurationMillisecond(_) => Ok(DataType::Duration(TimeUnit::Millisecond)), + ScalarValue::DurationMicrosecond(_) => Ok(DataType::Duration(TimeUnit::Microsecond)), + ScalarValue::DurationNanosecond(_) => Ok(DataType::Duration(TimeUnit::Nanosecond)), } } } diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index b9d348f79..686bbf74c 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -35,7 +35,8 @@ use datafusion::execution::context::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + Statistics, }; use datafusion_expr::Expr; use datafusion_optimizer::utils::conjunction; @@ -235,6 +236,12 @@ impl ExecutionPlan for DatasetExec { }) } + fn statistics(&self) -> Statistics { + self.projected_statistics.clone() + } +} + +impl DisplayAs for DatasetExec { fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result { Python::with_gil(|py| { let number_of_fragments = self.fragments.as_ref(py).len(); @@ -267,8 +274,4 @@ impl ExecutionPlan for DatasetExec { } }) } - - fn statistics(&self) -> Statistics { - self.projected_statistics.clone() - } } diff --git a/src/expr.rs b/src/expr.rs index 17b6c34f5..2fd638a13 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -115,7 +115,7 @@ impl PyExpr { /// Return the specific expression fn to_variant(&self, py: Python) -> PyResult { Python::with_gil(|_| match &self.expr { - Expr::Alias(alias, name) => Ok(PyAlias::new(alias, name).into_py(py)), + Expr::Alias(alias) => Ok(PyAlias::new(&alias.expr, &alias.name).into_py(py)), Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_py(py)), Expr::ScalarVariable(data_type, variables) => { Ok(PyScalarVariable::new(data_type, variables).into_py(py)) @@ -265,7 +265,6 @@ impl PyExpr { | Expr::Negative(..) | Expr::IsNull(..) | Expr::Like { .. } - | Expr::ILike { .. } | Expr::SimilarTo { .. } | Expr::Between { .. } | Expr::Case { .. } @@ -336,8 +335,13 @@ impl PyExpr { ScalarValue::IntervalYearMonth(v) => v.into_py(py), ScalarValue::IntervalDayTime(v) => v.into_py(py), ScalarValue::IntervalMonthDayNano(v) => v.into_py(py), + ScalarValue::DurationSecond(v) => v.into_py(py), + ScalarValue::DurationMicrosecond(v) => v.into_py(py), + ScalarValue::DurationNanosecond(v) => v.into_py(py), + ScalarValue::DurationMillisecond(v) => v.into_py(py), ScalarValue::Struct(_, _) => todo!(), ScalarValue::Dictionary(_, _) => todo!(), + ScalarValue::Fixedsizelist(_, _, _) => todo!(), }), _ => Err(py_type_err(format!( "Non Expr::Literal encountered in types: {:?}", @@ -356,9 +360,10 @@ impl PyExpr { Ok(vec![PyExpr::from(self.expr.clone())]) } + Expr::Alias(alias) => Ok(vec![PyExpr::from(*alias.expr.clone())]), + // Expr(s) that house the Expr instance to return in their bounded params - Expr::Alias(expr, ..) - | Expr::Not(expr) + Expr::Not(expr) | Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsTrue(expr) @@ -429,10 +434,6 @@ impl PyExpr { PyExpr::from(*expr.clone()), PyExpr::from(*pattern.clone()), ]), - Expr::ILike(Like { expr, pattern, .. }) => Ok(vec![ - PyExpr::from(*expr.clone()), - PyExpr::from(*pattern.clone()), - ]), Expr::SimilarTo(Like { expr, pattern, .. }) => Ok(vec![ PyExpr::from(*expr.clone()), PyExpr::from(*pattern.clone()), @@ -486,18 +487,16 @@ impl PyExpr { Expr::InList { .. } => "in list".to_string(), Expr::Negative(..) => "negative".to_string(), Expr::Not(..) => "not".to_string(), - Expr::Like(Like { negated, .. }) => { - if *negated { - "not like".to_string() - } else { - "like".to_string() - } - } - Expr::ILike(Like { negated, .. }) => { + Expr::Like(Like { + negated, + case_insensitive, + .. + }) => { + let name = if *case_insensitive { "ilike" } else { "like" }; if *negated { - "not ilike".to_string() + format!("not {name}") } else { - "ilike".to_string() + name.to_string() } } Expr::SimilarTo(Like { negated, .. }) => { diff --git a/src/expr/projection.rs b/src/expr/projection.rs index b3296618f..8c1423df4 100644 --- a/src/expr/projection.rs +++ b/src/expr/projection.rs @@ -98,8 +98,8 @@ impl PyProjection { pub fn projected_expressions(local_expr: &PyExpr) -> Vec { let mut projs: Vec = Vec::new(); match &local_expr.expr { - Expr::Alias(expr, _name) => { - let py_expr: PyExpr = PyExpr::from(*expr.clone()); + Expr::Alias(alias) => { + let py_expr: PyExpr = PyExpr::from(*alias.expr.clone()); projs.extend_from_slice(Self::projected_expressions(&py_expr).as_slice()); } _ => projs.push(local_expr.clone()), diff --git a/src/functions.rs b/src/functions.rs index c82361a3b..8b60e6433 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -18,6 +18,7 @@ use pyo3::{prelude::*, wrap_pyfunction}; use datafusion_common::Column; +use datafusion_expr::expr::Alias; use datafusion_expr::{ aggregate_function, expr::{AggregateFunction, ScalarFunction, Sort, WindowFunction}, @@ -85,7 +86,7 @@ fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyRes #[pyfunction] fn alias(expr: PyExpr, name: &str) -> PyResult { Ok(PyExpr { - expr: datafusion_expr::Expr::Alias(Box::new(expr.expr), String::from(name)), + expr: datafusion_expr::Expr::Alias(Alias::new(expr.expr, name)), }) } From 93f8063f5a0d6058b3a3406ab87c84426b37a3b7 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 24 Jul 2023 08:46:26 -0400 Subject: [PATCH 057/413] Introduces utility for obtaining SqlTable information from a file like location (#398) * checkpoint commit * Introduce BaseSessionContext abstract class * Introduce abstract methods for CRUD schema operations * Clean up schema.rs file * Introduce CRUD methods for table instances * Add function to drop_table * Add schema_name to drop_table function * remove unused parameter in SqlTable new * Update function to allow for modifying existing tables * Add functionality for generating SqlTable information from input sources * Add functionality for generating SqlTable information from input sources * Adding a utility method to convert arrow type strings to DataType instances * Add method to DataTypeMap for getting the DataType from an Arrow type string instance * Adjust pytests * Add back deprecated int96 parquet datatype --- datafusion/input/location.py | 1 - src/common/data_type.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/input/location.py b/datafusion/input/location.py index 121407581..efbc82f23 100644 --- a/datafusion/input/location.py +++ b/datafusion/input/location.py @@ -50,7 +50,6 @@ def build_table( num_rows = metadata.num_rows # Iterate through the schema and build the SqlTable - print(f"Metadata Schema: {metadata.schema}") for col in metadata.schema: columns.append( ( diff --git a/src/common/data_type.rs b/src/common/data_type.rs index fe5f6f060..199fb616e 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -321,7 +321,7 @@ impl DataTypeMap { "int32" => Ok(DataType::Int32), "int64" => Ok(DataType::Int64), "int96" => { - // Int96 is an old datatype that is now deprecated. We convert to nanosecond timestamp + // Int96 is an old parquet datatype that is now deprecated. We convert to nanosecond timestamp Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) } "float" => Ok(DataType::Float32), From 309fc486c47d86776aeec07d86cd04b5d70d97a1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 24 Jul 2023 05:47:17 -0700 Subject: [PATCH 058/413] feat: expose offset in python API (#437) --- datafusion/tests/test_dataframe.py | 11 +++++++++++ datafusion/tests/test_expr.py | 8 ++++++++ src/dataframe.rs | 5 +++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 4df2061e1..78cb50f12 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -115,6 +115,17 @@ def test_limit(df): assert len(result.column(1)) == 1 +def test_limit_with_offset(df): + # only 3 rows, but limit past the end to ensure that offset is working + df = df.limit(5, offset=2) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert len(result.column(0)) == 1 + assert len(result.column(1)) == 1 + + def test_with_column(df): df = df.with_column("c", column("a") + column("b")) diff --git a/datafusion/tests/test_expr.py b/datafusion/tests/test_expr.py index 0c4869f27..73f7d087a 100644 --- a/datafusion/tests/test_expr.py +++ b/datafusion/tests/test_expr.py @@ -81,6 +81,14 @@ def test_limit(test_ctx): plan = plan.to_variant() assert isinstance(plan, Limit) + assert plan.skip() == 0 + + df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5") + plan = df.logical_plan() + + plan = plan.to_variant() + assert isinstance(plan, Limit) + assert plan.skip() == 5 def test_aggregate_query(test_ctx): diff --git a/src/dataframe.rs b/src/dataframe.rs index 00c12e854..b8d8ddc3c 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -136,8 +136,9 @@ impl PyDataFrame { Ok(Self::new(df)) } - fn limit(&self, count: usize) -> PyResult { - let df = self.df.as_ref().clone().limit(0, Some(count))?; + #[pyo3(signature = (count, offset=0))] + fn limit(&self, count: usize, offset: usize) -> PyResult { + let df = self.df.as_ref().clone().limit(offset, Some(count))?; Ok(Self::new(df)) } From ffd15410c01868f5ed62c5fb2db2a460b42e06b3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 25 Jul 2023 17:01:34 +0100 Subject: [PATCH 059/413] Use DataFusion 28 (#439) --- Cargo.lock | 40 ++++++++++++++++++++++++---------------- Cargo.toml | 12 ++++++------ 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a491051f4..85634fd11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -688,7 +688,8 @@ dependencies = [ [[package]] name = "datafusion" version = "28.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ddbcb2dda5b5033537457992ebde78938014390b2b19f9f4282e3be0e18b0c3" dependencies = [ "ahash", "apache-avro", @@ -738,7 +739,8 @@ dependencies = [ [[package]] name = "datafusion-common" version = "28.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85fbb7b4da925031311743ab96662d55f0f7342d3692744f184f99b2257ef435" dependencies = [ "apache-avro", "arrow", @@ -754,7 +756,8 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "28.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb3617466d894eb0ad11d06bab1e6e89c571c0a27d660685d327d0c6e1e1ccd" dependencies = [ "dashmap", "datafusion-common", @@ -771,7 +774,8 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "28.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bd8220a0dfcdfddcc785cd7e71770ef1ce54fbe1e08984e5adf537027ecb6de" dependencies = [ "ahash", "arrow", @@ -785,7 +789,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "28.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d685a100c66952aaadd0cbe766df46d1887d58fc8bcf3589e6387787f18492b" dependencies = [ "arrow", "async-trait", @@ -802,7 +807,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "28.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f2c635da9b05b4b4c6c8d935f46fd99f9b6225f834091cf4e3c8a045b68beab" dependencies = [ "ahash", "arrow", @@ -863,7 +869,8 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "28.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ef8abf4dd84d3f20c910822b52779c035ab7f4f2d5e7125ede3bae618e9de8" dependencies = [ "arrow", "arrow-schema", @@ -876,7 +883,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "28.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=28.0.0-rc1#51b4392577554becf637a8adcefa0e7fdc79e41f" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c97d351bbd6bd6497e7c9606ddd3c00cd63e9d185d7ab96fc8a66cf3c449177" dependencies = [ "async-recursion", "chrono", @@ -1526,9 +1534,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.9" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ee889ecc9568871456d42f603d6a0ce59ff328d291063a45cbdf0036baf6db" +checksum = "24e6ab01971eb092ffe6a7d42f49f9ff42662f17604681e2843ad65077ba47dc" dependencies = [ "cc", "libc", @@ -2109,9 +2117,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.31" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] @@ -2390,18 +2398,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.174" +version = "1.0.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1" +checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.174" +version = "1.0.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e" +checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index a460d2ccc..8b09bc50c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.19", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1", features = ["pyarrow", "avro"] } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1", features = ["pyarrow"] } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1" } -datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1" } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "28.0.0-rc1" } +datafusion = { version = "28.0.0", features = ["pyarrow", "avro"] } +datafusion-common = { version = "28.0.0", features = ["pyarrow"] } +datafusion-expr = { version = "28.0.0" } +datafusion-optimizer = { version = "28.0.0" } +datafusion-sql = { version = "28.0.0" } +datafusion-substrait = { version = "28.0.0" } prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } From 92ca34bd18f72b533066712dffd51ac23a95fb81 Mon Sep 17 00:00:00 2001 From: Goksel Kabadayi Date: Thu, 3 Aug 2023 06:39:56 +0300 Subject: [PATCH 060/413] Build Linux aarch64 wheel (#443) * Build Linux aarch64 wheel * Remove unnecessary platform tag --- .github/workflows/build.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fe06b9c86..e72c4805a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -180,6 +180,36 @@ jobs: name: dist path: target/wheels/* + build-manylinux-aarch64: + needs: [generate-license] + name: Manylinux arm64 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v3 + with: + name: python-wheel-license + path: . + - run: cat LICENSE.txt + - name: Build wheels + uses: PyO3/maturin-action@v1 + env: + RUST_BACKTRACE: 1 + with: + rust-toolchain: nightly + target: aarch64 + # Use manylinux_2_28-cross because the manylinux2014-cross has GCC 4.8.5, which causes the build to fail + manylinux: 2_28 + rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153 + args: --release --features protoc + - name: Archive wheels + uses: actions/upload-artifact@v3 + with: + name: dist + path: target/wheels/* + build-sdist: needs: [generate-license] name: Source distribution From 1fde8e47ae6714f02c2d2a088577e92e814dfa34 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Sat, 5 Aug 2023 20:12:30 +0200 Subject: [PATCH 061/413] feat: add case function (#447) (#448) --- datafusion/tests/test_functions.py | 22 ++++++++++++ src/expr.rs | 1 + src/expr/conditional_expr.rs | 54 ++++++++++++++++++++++++++++++ src/functions.rs | 15 +++++++-- 4 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 src/expr/conditional_expr.rs diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index ec334747c..c5ddea352 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -411,3 +411,25 @@ def test_temporal_functions(df): assert result.column(9) == pa.array( [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) + + +def test_case(df): + df = df.select( + f.case(column("b")) + .when(literal(4), literal(10)) + .otherwise(literal(8)), + f.case(column("a")) + .when(literal("Hello"), literal("Hola")) + .when(literal("World"), literal("Mundo")) + .otherwise(literal("!!")), + f.case(column("a")) + .when(literal("Hello"), literal("Hola")) + .when(literal("World"), literal("Mundo")) + .end(), + ) + + result = df.collect() + result = result[0] + assert result.column(0) == pa.array([10, 8, 8]) + assert result.column(1) == pa.array(["Hola", "Mundo", "!!"]) + assert result.column(2) == pa.array(["Hola", "Mundo", None]) diff --git a/src/expr.rs b/src/expr.rs index 2fd638a13..d1022e905 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -59,6 +59,7 @@ pub mod bool_expr; pub mod case; pub mod cast; pub mod column; +pub mod conditional_expr; pub mod create_memory_table; pub mod create_view; pub mod cross_join; diff --git a/src/expr/conditional_expr.rs b/src/expr/conditional_expr.rs new file mode 100644 index 000000000..96ef58f56 --- /dev/null +++ b/src/expr/conditional_expr.rs @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::expr::PyExpr; +use datafusion_expr::conditional_expressions::CaseBuilder; +use pyo3::prelude::*; + +#[pyclass(name = "CaseBuilder", module = "datafusion.expr", subclass)] +pub struct PyCaseBuilder { + pub case_builder: CaseBuilder, +} + +impl From for CaseBuilder { + fn from(case_builder: PyCaseBuilder) -> Self { + case_builder.case_builder + } +} + +impl From for PyCaseBuilder { + fn from(case_builder: CaseBuilder) -> PyCaseBuilder { + PyCaseBuilder { case_builder } + } +} + +#[pymethods] +impl PyCaseBuilder { + fn when(&mut self, when: PyExpr, then: PyExpr) -> PyCaseBuilder { + PyCaseBuilder { + case_builder: self.case_builder.when(when.expr, then.expr), + } + } + + fn otherwise(&mut self, else_expr: PyExpr) -> PyResult { + Ok(self.case_builder.otherwise(else_expr.expr)?.clone().into()) + } + + fn end(&mut self) -> PyResult { + Ok(self.case_builder.end()?.clone().into()) + } +} diff --git a/src/functions.rs b/src/functions.rs index 8b60e6433..06d35ef3a 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -17,6 +17,9 @@ use pyo3::{prelude::*, wrap_pyfunction}; +use crate::errors::DataFusionError; +use crate::expr::conditional_expr::PyCaseBuilder; +use crate::expr::PyExpr; use datafusion_common::Column; use datafusion_expr::expr::Alias; use datafusion_expr::{ @@ -27,9 +30,6 @@ use datafusion_expr::{ BuiltinScalarFunction, Expr, WindowFrame, }; -use crate::errors::DataFusionError; -use crate::expr::PyExpr; - #[pyfunction] fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { datafusion_expr::in_list( @@ -115,6 +115,14 @@ fn count_star() -> PyResult { }) } +/// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression. +#[pyfunction] +fn case(expr: PyExpr) -> PyResult { + Ok(PyCaseBuilder { + case_builder: datafusion_expr::case(expr.expr), + }) +} + /// Creates a new Window function expression #[pyfunction] fn window( @@ -355,6 +363,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(chr))?; m.add_wrapped(wrap_pyfunction!(char_length))?; m.add_wrapped(wrap_pyfunction!(coalesce))?; + m.add_wrapped(wrap_pyfunction!(case))?; m.add_wrapped(wrap_pyfunction!(col))?; m.add_wrapped(wrap_pyfunction!(concat_ws))?; m.add_wrapped(wrap_pyfunction!(concat))?; From e34d203fd63760f8660adee251817df2afb542b0 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Thu, 10 Aug 2023 02:03:34 +0200 Subject: [PATCH 062/413] enhancement(docs): Add user guide (#432) (#445) --- docs/requirements.txt | 4 +- docs/source/api.rst | 1 - docs/source/conf.py | 1 + .../source/contributor-guide/introduction.rst | 85 ++++++ docs/source/index.rst | 263 +++--------------- docs/source/user-guide/basics.rst | 89 ++++++ .../common-operations/aggregations.rst | 60 ++++ .../common-operations/basic-info.rst | 61 ++++ .../common-operations/functions.rst | 117 ++++++++ .../user-guide/common-operations/index.rst | 30 ++ .../user-guide/common-operations/joins.rst | 104 +++++++ .../common-operations/select-and-filter.rst | 67 +++++ .../common-operations/udf-and-udfa.rst | 85 ++++++ .../user-guide/common-operations/windows.rst | 93 +++++++ docs/source/user-guide/introduction.rst | 43 +++ docs/source/user-guide/io/avro.rst | 30 ++ docs/source/user-guide/io/csv.rst | 36 +++ docs/source/user-guide/io/index.rst | 28 ++ docs/source/user-guide/io/json.rst | 29 ++ docs/source/user-guide/io/parquet.rst | 36 +++ docs/source/user-guide/sql.rst | 39 +++ 21 files changed, 1075 insertions(+), 226 deletions(-) create mode 100644 docs/source/contributor-guide/introduction.rst create mode 100644 docs/source/user-guide/basics.rst create mode 100644 docs/source/user-guide/common-operations/aggregations.rst create mode 100644 docs/source/user-guide/common-operations/basic-info.rst create mode 100644 docs/source/user-guide/common-operations/functions.rst create mode 100644 docs/source/user-guide/common-operations/index.rst create mode 100644 docs/source/user-guide/common-operations/joins.rst create mode 100644 docs/source/user-guide/common-operations/select-and-filter.rst create mode 100644 docs/source/user-guide/common-operations/udf-and-udfa.rst create mode 100644 docs/source/user-guide/common-operations/windows.rst create mode 100644 docs/source/user-guide/introduction.rst create mode 100644 docs/source/user-guide/io/avro.rst create mode 100644 docs/source/user-guide/io/csv.rst create mode 100644 docs/source/user-guide/io/index.rst create mode 100644 docs/source/user-guide/io/json.rst create mode 100644 docs/source/user-guide/io/parquet.rst create mode 100644 docs/source/user-guide/sql.rst diff --git a/docs/requirements.txt b/docs/requirements.txt index 8eb744968..eb7f5b7bb 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -19,4 +19,6 @@ sphinx==5.3.0 pydata-sphinx-theme==0.8.0 myst-parser maturin -jinja2 \ No newline at end of file +jinja2 +ipython +pandas \ No newline at end of file diff --git a/docs/source/api.rst b/docs/source/api.rst index a5d65433d..d9f4a09dd 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -24,7 +24,6 @@ API Reference .. toctree:: :maxdepth: 2 - api/config api/dataframe api/execution_context api/expression diff --git a/docs/source/conf.py b/docs/source/conf.py index 929c24930..0822e0ab6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,6 +52,7 @@ "sphinx.ext.viewcode", "sphinx.ext.napoleon", "myst_parser", + "IPython.sphinxext.ipython_directive", ] source_suffix = { diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst new file mode 100644 index 000000000..dd61ad8fc --- /dev/null +++ b/docs/source/contributor-guide/introduction.rst @@ -0,0 +1,85 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Introduction +============ +We welcome and encourage contributions of all kinds, such as: + +1. Tickets with issue reports of feature requests +2. Documentation improvements +3. Code, both PR and (especially) PR Review. + +In addition to submitting new PRs, we have a healthy tradition of community members reviewing each other’s PRs. +Doing so is a great way to help the community as well as get more familiar with Rust and the relevant codebases. + +How to develop +-------------- + +This assumes that you have rust and cargo installed. We use the workflow recommended by `pyo3 `_ and `maturin `_. + +Bootstrap: + +.. code-block:: shell + + # fetch this repo + git clone git@github.com:apache/arrow-datafusion-python.git + # prepare development environment (used to build wheel / install in development) + python3 -m venv venv + # activate the venv + source venv/bin/activate + # update pip itself if necessary + python -m pip install -U pip + # install dependencies (for Python 3.8+) + python -m pip install -r requirements-310.txt + +The tests rely on test data in git submodules. + +.. code-block:: shell + + git submodule init + git submodule update + + +Whenever rust code changes (your changes or via `git pull`): + +.. code-block:: shell + + # make sure you activate the venv using "source venv/bin/activate" first + maturin develop + python -m pytest + + +Update Dependencies +------------------- + +To change test dependencies, change the `requirements.in` and run + +.. code-block:: shell + + # install pip-tools (this can be done only once), also consider running in venv + python -m pip install pip-tools + python -m piptools compile --generate-hashes -o requirements-310.txt + + +To update dependencies, run with `-U` + +.. code-block:: shell + + python -m piptools compile -U --generate-hashes -o requirements-310.txt + + +More details about pip-tools `here `_ diff --git a/docs/source/index.rst b/docs/source/index.rst index 78f44ea17..155b0cf9f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,12 +31,17 @@ Its query engine, DataFusion, is written in `Rust `_, Technically, zero-copy is achieved via the `c data interface `_. -How to use it -============= +Install +------- -Simple usage: +.. code-block:: shell + + pip install datafusion -.. code-block:: python +Example +------- + +.. ipython:: python import datafusion from datafusion import col @@ -50,7 +55,7 @@ Simple usage: [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], names=["a", "b"], ) - df = ctx.create_dataframe([[batch]]) + df = ctx.create_dataframe([[batch]], name="batch_array") # create a new statement df = df.select( @@ -58,234 +63,44 @@ Simple usage: col("a") - col("b"), ) - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pyarrow.array([5, 7, 9]) - assert result.column(1) == pyarrow.array([-3, -3, -3]) - - -We can also execute a query against data stored in CSV - -.. code-block:: bash - - echo "a,b\n1,4\n2,5\n3,6" > example.csv - - -.. code-block:: python - - import datafusion - from datafusion import col - import pyarrow - - # create a context - ctx = datafusion.SessionContext() - - # register a CSV - ctx.register_csv('example', 'example.csv') - - # create a new statement - df = ctx.table('example').select( - col("a") + col("b"), - col("a") - col("b"), - ) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pyarrow.array([5, 7, 9]) - assert result.column(1) == pyarrow.array([-3, -3, -3]) - - -And how to execute a query against a CSV using SQL: - - -.. code-block:: python - - import datafusion - from datafusion import col - import pyarrow - - # create a context - ctx = datafusion.SessionContext() - - # register a CSV - ctx.register_csv('example', 'example.csv') - - # create a new statement via SQL - df = ctx.sql("SELECT a+b, a-b FROM example") - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pyarrow.array([5, 7, 9]) - assert result.column(1) == pyarrow.array([-3, -3, -3]) - - - -UDFs ----- - -.. code-block:: python - - import pyarrow - from datafusion import udf - - def is_null(array: pyarrow.Array) -> pyarrow.Array: - return array.is_null() - - is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), 'stable') - - # create a context - ctx = datafusion.SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]]) - - df = df.select(is_null_arr(col("a"))) - - result = df.collect()[0] - - assert result.column(0) == pyarrow.array([False] * 3) - + df -UDAF ----- - -.. code-block:: python - - import pyarrow - import pyarrow.compute - import datafusion - from datafusion import udaf, Accumulator - from datafusion import col - - - class MyAccumulator(Accumulator): - """ - Interface of a user-defined accumulation. - """ - def __init__(self): - self._sum = pyarrow.scalar(0.0) - - def update(self, values: pyarrow.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py()) - - def merge(self, states: pyarrow.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py()) - - def state(self) -> pyarrow.Array: - return pyarrow.array([self._sum.as_py()]) - - def evaluate(self) -> pyarrow.Scalar: - return self._sum - - # create a context - ctx = datafusion.SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]]) - - my_udaf = udaf(MyAccumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()], 'stable') - - df = df.aggregate( - [], - [my_udaf(col("a"))] - ) - - result = df.collect()[0] - - assert result.column(0) == pyarrow.array([6.0]) - -How to install (from pip) -========================= - -.. code-block:: shell - - pip install datafusion - -You can verify the installation by running: - -.. code-block:: python - - >>> import datafusion - >>> datafusion.__version__ - '0.6.0' - - -How to develop -============== - -This assumes that you have rust and cargo installed. We use the workflow recommended by `pyo3 `_ and `maturin `_. - -Bootstrap: - -.. code-block:: shell - - # fetch this repo - git clone git@github.com:apache/arrow-datafusion-python.git - # prepare development environment (used to build wheel / install in development) - python3 -m venv venv - # activate the venv - source venv/bin/activate - # update pip itself if necessary - python -m pip install -U pip - # install dependencies (for Python 3.8+) - python -m pip install -r requirements-310.txt - -The tests rely on test data in git submodules. - -.. code-block:: shell - - git submodule init - git submodule update - - -Whenever rust code changes (your changes or via `git pull`): - -.. code-block:: shell - - # make sure you activate the venv using "source venv/bin/activate" first - maturin develop - python -m pytest - - -How to update dependencies -========================== - -To change test dependencies, change the `requirements.in` and run - -.. code-block:: shell - - # install pip-tools (this can be done only once), also consider running in venv - python -m pip install pip-tools - python -m piptools compile --generate-hashes -o requirements-310.txt +.. _toc.links: +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: LINKS -To update dependencies, run with `-U` + Github and Issue Tracker + Rust's API Docs + Code of conduct -.. code-block:: shell - - python -m piptools compile -U --generate-hashes -o requirements-310.txt +.. _toc.guide: +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: USER GUIDE + user-guide/introduction + user-guide/basics + user-guide/common-operations/index + user-guide/io/index + user-guide/sql -More details about pip-tools `here `_ +.. _toc.contributor_guide: +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: CONTRIBUTOR GUIDE -API reference -============= + contributor-guide/introduction +.. _toc.api: .. toctree:: - :maxdepth: 2 + :hidden: + :maxdepth: 1 + :caption: API api diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst new file mode 100644 index 000000000..438b23199 --- /dev/null +++ b/docs/source/user-guide/basics.rst @@ -0,0 +1,89 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Concepts +======== + +In this section, we will cover a basic example to introduce a few key concepts. + +.. code-block:: python + + import datafusion + from datafusion import col + import pyarrow + + # create a context + ctx = datafusion.SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pyarrow.RecordBatch.from_arrays( + [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]]) + + # create a new statement + df = df.select( + col("a") + col("b"), + col("a") - col("b"), + ) + + # execute and collect the first (and only) batch + result = df.collect()[0] + +The first statement group: + +.. code-block:: python + + # create a context + ctx = datafusion.SessionContext() + +creates a :code:`SessionContext`, that is, the main interface for executing queries with DataFusion. It maintains the state +of the connection between a user and an instance of the DataFusion engine. Additionally it provides the following functionality: + +- Create a DataFrame from a CSV or Parquet data source. +- Register a CSV or Parquet data source as a table that can be referenced from a SQL query. +- Register a custom data source that can be referenced from a SQL query. +- Execute a SQL query + +The second statement group creates a :code:`DataFrame`, + +.. code-block:: python + + # create a RecordBatch and a new DataFrame from it + batch = pyarrow.RecordBatch.from_arrays( + [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]]) + +A DataFrame refers to a (logical) set of rows that share the same column names, similar to a `Pandas DataFrame `_. +DataFrames are typically created by calling a method on :code:`SessionContext`, such as :code:`read_csv`, and can then be modified by +calling the transformation methods, such as :meth:`.DataFrame.filter`, :meth:`.DataFrame.select`, :meth:`.DataFrame.aggregate`, +and :meth:`.DataFrame.limit` to build up a query definition. + +The third statement uses :code:`Expressions` to build up a query definition. + +.. code-block:: python + + df = df.select( + col("a") + col("b"), + col("a") - col("b"), + ) + +Finally the :code:`collect` method converts the logical plan represented by the DataFrame into a physical plan and execute it, +collecting all results into a list of `RecordBatch `_. \ No newline at end of file diff --git a/docs/source/user-guide/common-operations/aggregations.rst b/docs/source/user-guide/common-operations/aggregations.rst new file mode 100644 index 000000000..235d644e6 --- /dev/null +++ b/docs/source/user-guide/common-operations/aggregations.rst @@ -0,0 +1,60 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Aggregation +============ + +An aggregate or aggregation is a function where the values of multiple rows are processed together to form a single summary value. +For performing an aggregation, DataFusion provides the :meth:`.DataFrame.aggregate` + +.. ipython:: python + + from datafusion import SessionContext + from datafusion import column, lit + from datafusion import functions as f + import random + + ctx = SessionContext() + df = ctx.from_pydict( + { + "a": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "b": ["one", "one", "two", "three", "two", "two", "one", "three"], + "c": [random.randint(0, 100) for _ in range(8)], + "d": [random.random() for _ in range(8)], + }, + name="foo_bar" + ) + + col_a = column("a") + col_b = column("b") + col_c = column("c") + col_d = column("d") + + df.aggregate([], [f.approx_distinct(col_c), f.approx_median(col_d), f.approx_percentile_cont(col_d, lit(0.5))]) + +When the :code:`group_by` list is empty the aggregation is done over the whole :class:`.DataFrame`. For grouping +the :code:`group_by` list must contain at least one column + +.. ipython:: python + + df.aggregate([col_a], [f.sum(col_c), f.max(col_d), f.min(col_d)]) + +More than one column can be used for grouping + +.. ipython:: python + + df.aggregate([col_a, col_b], [f.sum(col_c), f.max(col_d), f.min(col_d)]) diff --git a/docs/source/user-guide/common-operations/basic-info.rst b/docs/source/user-guide/common-operations/basic-info.rst new file mode 100644 index 000000000..424e1cc92 --- /dev/null +++ b/docs/source/user-guide/common-operations/basic-info.rst @@ -0,0 +1,61 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Basic Operations +================ + +In this section, you will learn how to display essential details of DataFrames using specific functions. + +.. ipython:: python + + from datafusion import SessionContext + import random + + ctx = SessionContext() + df = ctx.from_pydict({ + "nrs": [1, 2, 3, 4, 5], + "names": ["python", "ruby", "java", "haskell", "go"], + "random": random.sample(range(1000), 5), + "groups": ["A", "A", "B", "C", "B"], + }) + df + +Use :meth:`.DataFrame.limit` to view the top rows of the frame: + +.. ipython:: python + + df.limit(2) + +Display the columns of the DataFrame using :meth:`.DataFrame.schema`: + +.. ipython:: python + + df.schema() + +The method :meth:`.DataFrame.to_pandas` uses pyarrow to convert to pandas DataFrame, by collecting the batches, +passing them to an Arrow table, and then converting them to a pandas DataFrame. + +.. ipython:: python + + df.to_pandas() + +:meth:`.DataFrame.describe` shows a quick statistic summary of your data: + +.. ipython:: python + + df.describe() + diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst new file mode 100644 index 000000000..7e5c592d8 --- /dev/null +++ b/docs/source/user-guide/common-operations/functions.rst @@ -0,0 +1,117 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Functions +========= + +DataFusion provides a large number of built-in functions for performing complex queries without requiring user-defined functions. +In here we will cover some of the more popular use cases. If you want to view all the functions go to the :ref:`Functions` API Reference. + +We'll use the pokemon dataset in the following examples. + +.. ipython:: python + + import urllib.request + from datafusion import SessionContext + + urllib.request.urlretrieve( + "https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv", + "pokemon.csv", + ) + + ctx = SessionContext() + ctx.register_csv("pokemon", "pokemon.csv") + df = ctx.table("pokemon") + +Mathematical +------------ + +DataFusion offers mathematical functions such as :func:`.pow` or :func:`.log` + +.. ipython:: python + + from datafusion import col, literal + from datafusion import functions as f + + df.select( + f.pow(col('"Attack"'), literal(2)) - f.pow(col('"Defense"'), literal(2)) + ).limit(10) + + +Conditional +----------- + +There 3 conditional functions in DataFusion :func:`.coalesce`, :func:`.nullif` and :func:`.case` (not available in Python) + +.. ipython:: python + + df.select( + f.coalesce(col('"Type 1"'), col('"Type 2"')).alias("dominant_type") + ).limit(10) + +Temporal +-------- + +For selecting the current time use :func:`.now` + +.. ipython:: python + + df.select(f.now()) + +Convert to timestamps using :func:`.to_timestamp` + +.. ipython:: python + + df.select(f.to_timestamp(col('"Total"')).alias("timestamp")) + +String +------ + +In the field of data science, working with textual data is a common task. To make string manipulation easier, +DataFusion offers a range of helpful options. + +.. ipython:: python + + df.select( + f.char_length(col('"Name"')).alias("len"), + f.lower(col('"Name"')).alias("lower"), + f.left(col('"Name"'), literal(4)).alias("code") + ) + +This also includes the functions for regular expressions :func:`.regexp_replace` and :func:`.regexp_match` + +.. ipython:: python + + df.select( + f.regexp_match(col('"Name"'), literal("Char")).alias("dragons"), + f.regexp_replace(col('"Name"'), literal("saur"), literal("fleur")).alias("flowers") + ) + + +Other +----- + +The function :func:`.in_list` allows to check a column for the presence of multiple values: + +.. ipython:: python + + types = [literal("Grass"), literal("Fire"), literal("Water")] + ( + df.select(f.in_list(col('"Type 1"'), types, negated=False).alias("basic_types")) + .limit(20) + .to_pandas() + ) diff --git a/docs/source/user-guide/common-operations/index.rst b/docs/source/user-guide/common-operations/index.rst new file mode 100644 index 000000000..950afb93e --- /dev/null +++ b/docs/source/user-guide/common-operations/index.rst @@ -0,0 +1,30 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Common Operations +================= + +.. toctree:: + :maxdepth: 2 + + basic-info + select-and-filter + joins + functions + aggregations + windows + udf-and-udfa diff --git a/docs/source/user-guide/common-operations/joins.rst b/docs/source/user-guide/common-operations/joins.rst new file mode 100644 index 000000000..128203116 --- /dev/null +++ b/docs/source/user-guide/common-operations/joins.rst @@ -0,0 +1,104 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Joins +===== + +DataFusion supports the following join variants via the method :meth:`.DataFrame.join` + +- Inner Join +- Left Join +- Right Join +- Full Join +- Left Semi Join +- Left Anti Join + +For the examples in this section we'll use the following two DataFrames + +.. ipython:: python + + from datafusion import SessionContext + + ctx = SessionContext() + + left = ctx.from_pydict( + { + "customer_id": [1, 2, 3], + "customer": ["Alice", "Bob", "Charlie"], + } + ) + + right = ctx.from_pylist([ + {"id": 1, "name": "CityCabs"}, + {"id": 2, "name": "MetroRide"}, + {"id": 5, "name": "UrbanGo"}, + ]) + +Inner Join +---------- + +When using an inner join, only rows containing the common values between the two join columns present in both DataFrames +will be included in the resulting DataFrame. + +.. ipython:: python + + left.join(right, join_keys=(["customer_id"], ["id"]), how="inner") + +The parameter :code:`join_keys` specifies the columns from the left DataFrame and right DataFrame that contains the values +that should match. + +Left Join +--------- + +A left join combines rows from two DataFrames using the key columns. It returns all rows from the left DataFrame and +matching rows from the right DataFrame. If there's no match in the right DataFrame, it returns null +values for the corresponding columns. + +.. ipython:: python + + left.join(right, join_keys=(["customer_id"], ["id"]), how="left") + +Full Join +--------- + +A full join merges rows from two tables based on a related column, returning all rows from both tables, even if there +is no match. Unmatched rows will have null values. + +.. ipython:: python + + left.join(right, join_keys=(["customer_id"], ["id"]), how="full") + +Left Semi Join +-------------- + +A left semi join retrieves matching rows from the left table while +omitting duplicates with multiple matches in the right table. + +.. ipython:: python + + left.join(right, join_keys=(["customer_id"], ["id"]), how="semi") + +Left Anti Join +-------------- + +A left anti join shows all rows from the left table without any matching rows in the right table, +based on a the specified matching columns. It excludes rows from the left table that have at least one matching row in +the right table. + +.. ipython:: python + + left.join(right, join_keys=(["customer_id"], ["id"]), how="anti") \ No newline at end of file diff --git a/docs/source/user-guide/common-operations/select-and-filter.rst b/docs/source/user-guide/common-operations/select-and-filter.rst new file mode 100644 index 000000000..8ede230e6 --- /dev/null +++ b/docs/source/user-guide/common-operations/select-and-filter.rst @@ -0,0 +1,67 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Column Selections +================= + +Use :meth:`.DataFrame.select_columns` for basic column selection. + +DataFusion can work with several file types, to start simple we can use a subset of the +`TLC Trip Record Data `_ + +.. ipython:: python + + import urllib.request + from datafusion import SessionContext + + urllib.request.urlretrieve("https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet", + "yellow_trip_data.parquet") + + ctx = SessionContext() + df = ctx.read_parquet("yellow_trip_data.parquet") + df.select_columns("trip_distance", "passenger_count") + +For mathematical or logical operations use :func:`.col` to select columns, and give meaningful names to the resulting +operations using :func:`.alias` + + +.. ipython:: python + + from datafusion import col, lit + df.select((col("tip_amount") + col("tolls_amount")).alias("tips_plus_tolls")) + +.. warning:: + + Please be aware that all identifiers are effectively made lower-case in SQL, so if your file has capital letters + (ex: Name) you must put your column name in double quotes or the selection won’t work. As an alternative for simple + column selection use :meth:`.DataFrame.select_columns` without double quotes + +For selecting columns with capital letters use ``'"VendorID"'`` + +.. ipython:: python + + df.select(col('"VendorID"')) + + +To combine it with literal values use the :func:`.lit` + +.. ipython:: python + + large_trip_distance = col("trip_distance") > lit(5.0) + low_passenger_count = col("passenger_count") < lit(4) + df.select((large_trip_distance & low_passenger_count).alias("lonely_trips")) + diff --git a/docs/source/user-guide/common-operations/udf-and-udfa.rst b/docs/source/user-guide/common-operations/udf-and-udfa.rst new file mode 100644 index 000000000..62d249c7e --- /dev/null +++ b/docs/source/user-guide/common-operations/udf-and-udfa.rst @@ -0,0 +1,85 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +User Defined Functions +====================== + +DataFusion provides powerful expressions and functions, reducing the need for custom Python functions. +However you can still incorporate your own functions, i.e. User-Defined Functions (UDFs), with the :func:`.udf` function. + +.. ipython:: python + + import pyarrow + import datafusion + from datafusion import udf, col + + def is_null(array: pyarrow.Array) -> pyarrow.Array: + return array.is_null() + + is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), 'stable') + + ctx = datafusion.SessionContext() + + batch = pyarrow.RecordBatch.from_arrays( + [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]], name="batch_array") + + df.select(is_null_arr(col("a"))).to_pandas() + +Additionally the :func:`.udaf` function allows you to define User-Defined Aggregate Functions (UDAFs) + +.. code-block:: python + + import pyarrow + import pyarrow.compute + import datafusion + from datafusion import col, udaf, Accumulator + + class MyAccumulator(Accumulator): + """ + Interface of a user-defined accumulation. + """ + def __init__(self): + self._sum = pyarrow.scalar(0.0) + + def update(self, values: pyarrow.Array) -> None: + # not nice since pyarrow scalars can't be summed yet. This breaks on `None` + self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py()) + + def merge(self, states: pyarrow.Array) -> None: + # not nice since pyarrow scalars can't be summed yet. This breaks on `None` + self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py()) + + def state(self) -> pyarrow.Array: + return pyarrow.array([self._sum.as_py()]) + + def evaluate(self) -> pyarrow.Scalar: + return self._sum + + ctx = datafusion.SessionContext() + df = ctx.from_pydict( + { + "a": [1, 2, 3], + "b": [4, 5, 6], + } + ) + + my_udaf = udaf(MyAccumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()], 'stable') + + df.aggregate([],[my_udaf(col("a"))]) diff --git a/docs/source/user-guide/common-operations/windows.rst b/docs/source/user-guide/common-operations/windows.rst new file mode 100644 index 000000000..f884c7e0d --- /dev/null +++ b/docs/source/user-guide/common-operations/windows.rst @@ -0,0 +1,93 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Window Functions +================ + +In this section you will learn about window functions. A window function utilizes values from one or multiple rows to +produce a result for each individual row, unlike an aggregate function that provides a single value for multiple rows. + +The functionality of window functions in DataFusion is supported by the dedicated :func:`.window` function. + +We'll use the pokemon dataset (from Ritchie Vink) in the following examples. + +.. ipython:: python + + import urllib.request + from datafusion import SessionContext + from datafusion import col + from datafusion import functions as f + + urllib.request.urlretrieve( + "https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv", + "pokemon.csv", + ) + + ctx = SessionContext() + df = ctx.read_csv("pokemon.csv") + +Here is an example that shows how to compare each pokemons’s attack power with the average attack power in its :code:`"Type 1"` + +.. ipython:: python + + df.select( + col('"Name"'), + col('"Attack"'), + f.alias( + f.window("avg", [col('"Attack"')], partition_by=[col('"Type 1"')]), + "Average Attack", + ) + ) + +You can also control the order in which rows are processed by window functions by providing +a list of :func:`.order_by` functions for the :code:`order_by` parameter. + +.. ipython:: python + + df.select( + col('"Name"'), + col('"Attack"'), + f.alias( + f.window( + "rank", + [], + partition_by=[col('"Type 1"')], + order_by=[f.order_by(col('"Attack"'))], + ), + "rank", + ), + ) + +The possible window functions are: + +1. Rank Functions + - rank + - dense_rank + - row_number + - ntile + +2. Analytical Functions + - cume_dist + - percent_rank + - lag + - lead + - first_value + - last_value + - nth_value + +3. Aggregate Functions + - All aggregate functions can be used as window functions. diff --git a/docs/source/user-guide/introduction.rst b/docs/source/user-guide/introduction.rst new file mode 100644 index 000000000..8abb9113e --- /dev/null +++ b/docs/source/user-guide/introduction.rst @@ -0,0 +1,43 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. _guide: + +Introduction +============ + +Welcome to the User Guide for the Python bindings of Arrow DataFusion. This guide aims to provide an introduction to +DataFusion through various examples and highlight the most effective ways of using it. + +Installation +------------ + +DataFusion is a Python library and, as such, can be installed via pip from `PyPI `__. + +.. code-block:: shell + + pip install datafusion + +You can verify the installation by running: + +.. ipython:: python + + import datafusion + datafusion.__version__ + + + diff --git a/docs/source/user-guide/io/avro.rst b/docs/source/user-guide/io/avro.rst new file mode 100644 index 000000000..85d546e2a --- /dev/null +++ b/docs/source/user-guide/io/avro.rst @@ -0,0 +1,30 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Avro +==== + +`Avro `_ is a serialization format for record data. Reading an avro file is very straightforward +with :meth:`.SessionContext.read_avro` + +.. code-block:: python + + + from datafusion import SessionContext + + ctx = SessionContext() + df = ctx.read_avro("file.avro") \ No newline at end of file diff --git a/docs/source/user-guide/io/csv.rst b/docs/source/user-guide/io/csv.rst new file mode 100644 index 000000000..3f95c54a5 --- /dev/null +++ b/docs/source/user-guide/io/csv.rst @@ -0,0 +1,36 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +CSV +=== + +Reading a csv is very straightforward with :meth:`.SessionContext.read_csv` + +.. code-block:: python + + + from datafusion import SessionContext + + ctx = SessionContext() + df = ctx.read_csv("file.csv") + +An alternative is to use :meth:`.SessionContext.register_csv` + +.. code-block:: python + + ctx.register_csv("file", "file.csv") + df = ctx.table("file") \ No newline at end of file diff --git a/docs/source/user-guide/io/index.rst b/docs/source/user-guide/io/index.rst new file mode 100644 index 000000000..af08240ff --- /dev/null +++ b/docs/source/user-guide/io/index.rst @@ -0,0 +1,28 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +IO +== + +.. toctree:: + :maxdepth: 2 + + csv + parquet + json + avro + diff --git a/docs/source/user-guide/io/json.rst b/docs/source/user-guide/io/json.rst new file mode 100644 index 000000000..5949a0380 --- /dev/null +++ b/docs/source/user-guide/io/json.rst @@ -0,0 +1,29 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +JSON +==== +`JSON `_ (JavaScript Object Notation) is a lightweight data-interchange format. +When it comes to reading a JSON file, using :meth:`.SessionContext.read_json` is a simple and easy + +.. code-block:: python + + + from datafusion import SessionContext + + ctx = SessionContext() + df = ctx.read_avro("file.json") \ No newline at end of file diff --git a/docs/source/user-guide/io/parquet.rst b/docs/source/user-guide/io/parquet.rst new file mode 100644 index 000000000..78bba30c5 --- /dev/null +++ b/docs/source/user-guide/io/parquet.rst @@ -0,0 +1,36 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Parquet +======= + +It is quite simple to read a parquet file using the :meth:`.SessionContext.read_parquet` function. + +.. code-block:: python + + + from datafusion import SessionContext + + ctx = SessionContext() + df = ctx.read_parquet("file.parquet") + +An alternative is to use :meth:`.SessionContext.register_parquet` + +.. code-block:: python + + ctx.register_parquet("file", "file.parquet") + df = ctx.table("file") \ No newline at end of file diff --git a/docs/source/user-guide/sql.rst b/docs/source/user-guide/sql.rst new file mode 100644 index 000000000..6fa7f0c6a --- /dev/null +++ b/docs/source/user-guide/sql.rst @@ -0,0 +1,39 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +SQL +=== + +DataFusion also offers a SQL API, read the full reference `here `_ + +.. ipython:: python + + import datafusion + from datafusion import col + import pyarrow + + # create a context + ctx = datafusion.SessionContext() + + # register a CSV + ctx.register_csv('pokemon', 'pokemon.csv') + + # create a new statement via SQL + df = ctx.sql('SELECT "Attack"+"Defense", "Attack"-"Defense" FROM pokemon') + + # collect and convert to pandas DataFrame + df.to_pandas() \ No newline at end of file From 37c91f4458daa11f50c313e036761f7494ab27f9 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Fri, 11 Aug 2023 21:45:21 +0200 Subject: [PATCH 063/413] docs: include pre-commit hooks section in contributor guide (#455) --- README.md | 6 +++--- docs/source/contributor-guide/introduction.rst | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 506a38227..a682f73d5 100644 --- a/README.md +++ b/README.md @@ -222,11 +222,11 @@ python -m pytest ### Running & Installing pre-commit hooks -arrow-datafusion-python takes advantage of (pre-commit)[https://pre-commit.com/] to assist developers in with code linting to help reduce the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the developer but certainly helpful for keep PRs clean and concise. +arrow-datafusion-python takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the developer but certainly helpful for keeping PRs clean and concise. -Our pre-commit hooks can be installed by running `pre-commit install` which will install the configurations in your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit failing to perform the commit if an offending lint is found giving you the opportunity to make changes locally before pushing. +Our pre-commit hooks can be installed by running `pre-commit install`, which will install the configurations in your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete the commit if an offending lint is found allowing you to make changes locally before pushing. -The pre-commit hooks can also be ran ad-hoc without installing them by simply running `pre-commit run --all-files` +The pre-commit hooks can also be run adhoc without installing them by simply running `pre-commit run --all-files` ## How to update dependencies diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst index dd61ad8fc..6de2b87bc 100644 --- a/docs/source/contributor-guide/introduction.rst +++ b/docs/source/contributor-guide/introduction.rst @@ -62,6 +62,15 @@ Whenever rust code changes (your changes or via `git pull`): maturin develop python -m pytest +Running & Installing pre-commit hooks +------------------------------------- + +arrow-datafusion-python takes advantage of `pre-commit `_ to assist developers with code linting to help reduce the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the developer but certainly helpful for keeping PRs clean and concise. + +Our pre-commit hooks can be installed by running :code:`pre-commit install`, which will install the configurations in your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete the commit if an offending lint is found allowing you to make changes locally before pushing. + +The pre-commit hooks can also be run adhoc without installing them by simply running :code:`pre-commit run --all-files` + Update Dependencies ------------------- From e1b37401a2d1af86ab16b899f1dda8237a0d3535 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Fri, 11 Aug 2023 21:46:22 +0200 Subject: [PATCH 064/413] feat: add compression options (#456) --- datafusion/tests/test_context.py | 38 +++++++++++++++++++++++++++++++- datafusion/tests/test_sql.py | 23 +++++++++++++++++-- src/context.rs | 31 +++++++++++++++++++++----- 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/datafusion/tests/test_context.py b/datafusion/tests/test_context.py index 6b1223a16..55a324aec 100644 --- a/datafusion/tests/test_context.py +++ b/datafusion/tests/test_context.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +import gzip import os import pyarrow as pa @@ -336,11 +336,47 @@ def test_read_json(ctx): assert result[0].column(1) == pa.array([1, 2, 3]) +def test_read_json_compressed(ctx, tmp_path): + path = os.path.dirname(os.path.abspath(__file__)) + test_data_path = os.path.join(path, "data_test_context", "data.json") + + # File compression type + gzip_path = tmp_path / "data.json.gz" + + with open(test_data_path, "rb") as csv_file: + with gzip.open(gzip_path, "wb") as gzipped_file: + gzipped_file.writelines(csv_file) + + df = ctx.read_json( + gzip_path, file_extension=".gz", file_compression_type="gz" + ) + result = df.collect() + + assert result[0].column(0) == pa.array(["a", "b", "c"]) + assert result[0].column(1) == pa.array([1, 2, 3]) + + def test_read_csv(ctx): csv_df = ctx.read_csv(path="testing/data/csv/aggregate_test_100.csv") csv_df.select(column("c1")).show() +def test_read_csv_compressed(ctx, tmp_path): + test_data_path = "testing/data/csv/aggregate_test_100.csv" + + # File compression type + gzip_path = tmp_path / "aggregate_test_100.csv.gz" + + with open(test_data_path, "rb") as csv_file: + with gzip.open(gzip_path, "wb") as gzipped_file: + gzipped_file.writelines(csv_file) + + csv_df = ctx.read_csv( + gzip_path, file_extension=".gz", file_compression_type="gz" + ) + csv_df.select(column("c1")).show() + + def test_read_parquet(ctx): csv_df = ctx.read_parquet(path="parquet/data/alltypes_plain.parquet") csv_df.show() diff --git a/datafusion/tests/test_sql.py b/datafusion/tests/test_sql.py index 638a222dc..608bb1960 100644 --- a/datafusion/tests/test_sql.py +++ b/datafusion/tests/test_sql.py @@ -19,6 +19,7 @@ import pyarrow as pa import pyarrow.dataset as ds import pytest +import gzip from datafusion import udf @@ -32,6 +33,7 @@ def test_no_table(ctx): def test_register_csv(ctx, tmp_path): path = tmp_path / "test.csv" + gzip_path = tmp_path / "test.csv.gz" table = pa.Table.from_arrays( [ @@ -43,6 +45,10 @@ def test_register_csv(ctx, tmp_path): ) pa.csv.write_csv(table, path) + with open(path, "rb") as csv_file: + with gzip.open(gzip_path, "wb") as gzipped_file: + gzipped_file.writelines(csv_file) + ctx.register_csv("csv", path) ctx.register_csv("csv1", str(path)) ctx.register_csv( @@ -52,6 +58,13 @@ def test_register_csv(ctx, tmp_path): delimiter=",", schema_infer_max_records=10, ) + ctx.register_csv( + "csv_gzip", + gzip_path, + file_extension="gz", + file_compression_type="gzip", + ) + alternative_schema = pa.schema( [ ("some_int", pa.int16()), @@ -61,9 +74,9 @@ def test_register_csv(ctx, tmp_path): ) ctx.register_csv("csv3", path, schema=alternative_schema) - assert ctx.tables() == {"csv", "csv1", "csv2", "csv3"} + assert ctx.tables() == {"csv", "csv1", "csv2", "csv3", "csv_gzip"} - for table in ["csv", "csv1", "csv2"]: + for table in ["csv", "csv1", "csv2", "csv_gzip"]: result = ctx.sql(f"SELECT COUNT(int) AS cnt FROM {table}").collect() result = pa.Table.from_batches(result) assert result.to_pydict() == {"cnt": [4]} @@ -77,6 +90,12 @@ def test_register_csv(ctx, tmp_path): ): ctx.register_csv("csv4", path, delimiter="wrong") + with pytest.raises( + ValueError, + match="file_compression_type must one of: gzip, bz2, xz, zstd", + ): + ctx.register_csv("csv4", path, file_compression_type="rar") + def test_register_parquet(ctx, tmp_path): path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) diff --git a/src/context.rs b/src/context.rs index cf133d79a..1dca8a791 100644 --- a/src/context.rs +++ b/src/context.rs @@ -17,6 +17,7 @@ use std::collections::{HashMap, HashSet}; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use object_store::ObjectStore; @@ -40,6 +41,7 @@ use crate::utils::{get_tokio_runtime, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; +use datafusion::datasource::file_format::file_type::FileCompressionType; use datafusion::datasource::MemTable; use datafusion::datasource::TableProvider; use datafusion::execution::context::{SessionConfig, SessionContext, TaskContext}; @@ -469,7 +471,8 @@ impl PySessionContext { has_header=true, delimiter=",", schema_infer_max_records=1000, - file_extension=".csv"))] + file_extension=".csv", + file_compression_type=None))] fn register_csv( &mut self, name: &str, @@ -479,6 +482,7 @@ impl PySessionContext { delimiter: &str, schema_infer_max_records: usize, file_extension: &str, + file_compression_type: Option, py: Python, ) -> PyResult<()> { let path = path @@ -495,7 +499,8 @@ impl PySessionContext { .has_header(has_header) .delimiter(delimiter[0]) .schema_infer_max_records(schema_infer_max_records) - .file_extension(file_extension); + .file_extension(file_extension) + .file_compression_type(parse_file_compression_type(file_compression_type)?); options.schema = schema.as_ref().map(|x| &x.0); let result = self.ctx.register_csv(name, path, options); @@ -559,7 +564,7 @@ impl PySessionContext { } #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (path, schema=None, schema_infer_max_records=1000, file_extension=".json", table_partition_cols=vec![]))] + #[pyo3(signature = (path, schema=None, schema_infer_max_records=1000, file_extension=".json", table_partition_cols=vec![], file_compression_type=None))] fn read_json( &mut self, path: PathBuf, @@ -567,13 +572,15 @@ impl PySessionContext { schema_infer_max_records: usize, file_extension: &str, table_partition_cols: Vec<(String, String)>, + file_compression_type: Option, py: Python, ) -> PyResult { let path = path .to_str() .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; let mut options = NdJsonReadOptions::default() - .table_partition_cols(convert_table_partition_cols(table_partition_cols)?); + .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) + .file_compression_type(parse_file_compression_type(file_compression_type)?); options.schema_infer_max_records = schema_infer_max_records; options.file_extension = file_extension; let df = if let Some(schema) = schema { @@ -595,7 +602,8 @@ impl PySessionContext { delimiter=",", schema_infer_max_records=1000, file_extension=".csv", - table_partition_cols=vec![]))] + table_partition_cols=vec![], + file_compression_type=None))] fn read_csv( &self, path: PathBuf, @@ -605,6 +613,7 @@ impl PySessionContext { schema_infer_max_records: usize, file_extension: &str, table_partition_cols: Vec<(String, String)>, + file_compression_type: Option, py: Python, ) -> PyResult { let path = path @@ -623,7 +632,8 @@ impl PySessionContext { .delimiter(delimiter[0]) .schema_infer_max_records(schema_infer_max_records) .file_extension(file_extension) - .table_partition_cols(convert_table_partition_cols(table_partition_cols)?); + .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) + .file_compression_type(parse_file_compression_type(file_compression_type)?); if let Some(py_schema) = schema { options.schema = Some(&py_schema.0); @@ -743,6 +753,15 @@ fn convert_table_partition_cols( .collect::, _>>() } +fn parse_file_compression_type( + file_compression_type: Option, +) -> Result { + FileCompressionType::from_str(&*file_compression_type.unwrap_or("".to_string()).as_str()) + .map_err(|_| { + PyValueError::new_err("file_compression_type must one of: gzip, bz2, xz, zstd") + }) +} + impl From for SessionContext { fn from(ctx: PySessionContext) -> SessionContext { ctx.ctx From 0b22c977dbdfbfe1e0536e0b1bf79045e7bf0e7e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 11 Aug 2023 13:46:40 -0600 Subject: [PATCH 065/413] Upgrade to DF 28.0.0-rc1 (#457) * Upgrade to DF 28.0.0-rc1 * cargo update --- Cargo.lock | 290 +++++++++++++++++++------------------- Cargo.toml | 14 +- src/common/data_type.rs | 3 + src/expr.rs | 9 +- src/expr/indexed_field.rs | 9 +- src/udaf.rs | 2 +- 6 files changed, 171 insertions(+), 156 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85634fd11..585e64cb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,9 +38,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +checksum = "86b8f9420f797f2d9e935edf629310eb938a0d839f984e25327f3c7eed22300c" dependencies = [ "memchr", ] @@ -89,9 +89,9 @@ checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" [[package]] name = "apache-avro" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf4144857f9e4d7dd6cc4ba4c78efd2a46bad682b029bd0d91e76a021af1b2a" +checksum = "9c0fdddc3fdac97394ffcc5c89c634faa9c1c166ced54189af34e407c97b6ee7" dependencies = [ "byteorder", "crc32fast", @@ -106,8 +106,8 @@ dependencies = [ "serde", "serde_json", "snap", - "strum 0.24.1", - "strum_macros 0.24.3", + "strum 0.25.0", + "strum_macros 0.25.2", "thiserror", "typed-builder", "uuid", @@ -128,9 +128,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2feeebd77b34b0bc88f224e06d01c27da4733997cc4789a4e056196656cdc59a" +checksum = "b7104b9e9761613ae92fe770c741d6bbf1dbc791a0fe204400aebdd429875741" dependencies = [ "ahash", "arrow-arith", @@ -151,9 +151,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7173f5dc49c0ecb5135f52565af33afd3fdc9a12d13bd6f9973e8b96305e4b2e" +checksum = "38e597a8e8efb8ff52c50eaf8f4d85124ce3c1bf20fab82f476d73739d9ab1c2" dependencies = [ "arrow-array", "arrow-buffer", @@ -166,9 +166,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63d7ea725f7d1f8bb2cffc53ef538557e95fc802e217d5be25122d402e22f3d0" +checksum = "2a86d9c1473db72896bd2345ebb6b8ad75b8553ba390875c76708e8dc5c5492d" dependencies = [ "ahash", "arrow-buffer", @@ -183,9 +183,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdbe439e077f484e5000b9e1d47b5e4c0d15f2b311a8f5bcc682553d5d67a722" +checksum = "234b3b1c8ed00c874bf95972030ac4def6f58e02ea5a7884314388307fb3669b" dependencies = [ "half", "num", @@ -193,9 +193,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93913cc14875770aa1eef5e310765e855effa352c094cb1c7c00607d0f37b4e1" +checksum = "22f61168b853c7faea8cea23a2169fdff9c82fb10ae5e2c07ad1cab8f6884931" dependencies = [ "arrow-array", "arrow-buffer", @@ -211,9 +211,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef55b67c55ed877e6fe7b923121c19dae5e31ca70249ea2779a17b58fb0fbd9a" +checksum = "10b545c114d9bf8569c84d2fbe2020ac4eea8db462c0a37d0b65f41a90d066fe" dependencies = [ "arrow-array", "arrow-buffer", @@ -230,9 +230,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f4f4a3c54614126a71ab91f6631c9743eb4643d6e9318b74191da9dc6e028b" +checksum = "c6b6852635e7c43e5b242841c7470606ff0ee70eef323004cacc3ecedd33dd8f" dependencies = [ "arrow-buffer", "arrow-schema", @@ -242,9 +242,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41a3659f984a524ef1c2981d43747b24d8eec78e2425267fcd0ef34ce71cd18" +checksum = "a66da9e16aecd9250af0ae9717ae8dd7ea0d8ca5a3e788fe3de9f4ee508da751" dependencies = [ "arrow-array", "arrow-buffer", @@ -256,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b95faa95a378f56ef32d84cc0104ea998c39ef7cd1faaa6b4cebf8ea92846d" +checksum = "60ee0f9d8997f4be44a60ee5807443e396e025c23cf14d2b74ce56135cb04474" dependencies = [ "arrow-array", "arrow-buffer", @@ -276,9 +276,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c68549a4284d9f8b39586afb8d5ff8158b8f0286353a4844deb1d11cf1ba1f26" +checksum = "7fcab05410e6b241442abdab6e1035177dc082bdb6f17049a4db49faed986d63" dependencies = [ "arrow-array", "arrow-buffer", @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a75a4a757afc301ce010adadff54d79d66140c4282ed3de565f6ccb716a5cf3" +checksum = "91a847dd9eb0bacd7836ac63b3475c68b2210c2c96d0ec1b808237b973bd5d73" dependencies = [ "ahash", "arrow-array", @@ -306,18 +306,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bebcb57eef570b15afbcf2d07d813eb476fde9f6dd69c81004d6476c197e87e" +checksum = "54df8c47918eb634c20e29286e69494fdc20cafa5173eb6dad49c7f6acece733" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.0", ] [[package]] name = "arrow-select" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e2943fa433a48921e914417173816af64eef61c0a3d448280e6c40a62df221" +checksum = "941dbe481da043c4bd40c805a19ec2fc008846080c4953171b62bcad5ee5f7fb" dependencies = [ "arrow-array", "arrow-buffer", @@ -328,9 +328,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbc92ed638851774f6d7af1ad900b92bc1486746497511868b4298fcbcfa35af" +checksum = "359b2cd9e071d5a3bcf44679f9d85830afebc5b9c98a08019a570a65ae933e0f" dependencies = [ "arrow-array", "arrow-buffer", @@ -368,7 +368,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -379,7 +379,7 @@ checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -426,9 +426,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.3.3" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" [[package]] name = "blake2" @@ -524,11 +524,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.79" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" dependencies = [ "jobserver", + "libc", ] [[package]] @@ -687,9 +688,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "28.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ddbcb2dda5b5033537457992ebde78938014390b2b19f9f4282e3be0e18b0c3" +version = "29.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" dependencies = [ "ahash", "apache-avro", @@ -738,9 +738,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "28.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85fbb7b4da925031311743ab96662d55f0f7342d3692744f184f99b2257ef435" +version = "29.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" dependencies = [ "apache-avro", "arrow", @@ -755,13 +754,14 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "28.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb3617466d894eb0ad11d06bab1e6e89c571c0a27d660685d327d0c6e1e1ccd" +version = "29.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" dependencies = [ + "arrow", "dashmap", "datafusion-common", "datafusion-expr", + "futures", "hashbrown 0.14.0", "log", "object_store", @@ -773,9 +773,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "28.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bd8220a0dfcdfddcc785cd7e71770ef1ce54fbe1e08984e5adf537027ecb6de" +version = "29.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" dependencies = [ "ahash", "arrow", @@ -783,14 +782,13 @@ dependencies = [ "lazy_static", "sqlparser", "strum 0.25.0", - "strum_macros 0.25.1", + "strum_macros 0.25.2", ] [[package]] name = "datafusion-optimizer" -version = "28.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d685a100c66952aaadd0cbe766df46d1887d58fc8bcf3589e6387787f18492b" +version = "29.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" dependencies = [ "arrow", "async-trait", @@ -806,9 +804,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "28.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f2c635da9b05b4b4c6c8d935f46fd99f9b6225f834091cf4e3c8a045b68beab" +version = "29.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" dependencies = [ "ahash", "arrow", @@ -841,7 +838,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "28.0.0" +version = "29.0.0" dependencies = [ "async-trait", "datafusion", @@ -860,7 +857,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.27", + "syn 2.0.28", "tokio", "url", "uuid", @@ -868,9 +865,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "28.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3ef8abf4dd84d3f20c910822b52779c035ab7f4f2d5e7125ede3bae618e9de8" +version = "29.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" dependencies = [ "arrow", "arrow-schema", @@ -882,9 +878,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "28.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c97d351bbd6bd6497e7c9606ddd3c00cd63e9d185d7ab96fc8a66cf3c449177" +version = "29.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" dependencies = [ "async-recursion", "chrono", @@ -943,9 +938,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" dependencies = [ "errno-dragonfly", "libc", @@ -1065,7 +1060,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -1274,7 +1269,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.4.9", "tokio", "tower-service", "tracing", @@ -1534,9 +1529,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.10" +version = "1.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24e6ab01971eb092ffe6a7d42f49f9ff42662f17604681e2843ad65077ba47dc" +checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" dependencies = [ "cc", "libc", @@ -1546,9 +1541,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" +checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" [[package]] name = "lock_api" @@ -1828,9 +1823,9 @@ dependencies = [ [[package]] name = "parquet" -version = "43.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7267a9607c3f955d4d0ac41b88a67cecc0d8d009173ad3da390699a6cb3750" +checksum = "49f9739b984380582bdb7749ae5b5d28839bce899212cf16465c1ac1f8b65d79" dependencies = [ "ahash", "arrow-array", @@ -1931,9 +1926,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57" +checksum = "12cc1b0bf1727a77a54b6654e7b5f1af8604923edc8b81885f8ec92f9e3f0a05" [[package]] name = "pin-utils" @@ -1960,7 +1955,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" dependencies = [ "proc-macro2", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -2041,9 +2036,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb88ae05f306b4bfcde40ac4a51dc0b05936a9207a4b75b798c7729c4258a59" +checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" dependencies = [ "cfg-if", "indoc", @@ -2058,9 +2053,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "554db24f0b3c180a9c0b1268f91287ab3f17c162e15b54caaae5a6b3773396b0" +checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" dependencies = [ "once_cell", "target-lexicon", @@ -2068,9 +2063,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "922ede8759e8600ad4da3195ae41259654b9c55da4f7eec84a0ccc7d067a70a4" +checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" dependencies = [ "libc", "pyo3-build-config", @@ -2078,9 +2073,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a5caec6a1dd355964a841fcbeeb1b89fe4146c87295573f94228911af3cc5a2" +checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2090,9 +2085,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0b78ccbb160db1556cdb6fd96c50334c5d4ec44dc5e0a968d0a1208fa0efa8b" +checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" dependencies = [ "proc-macro2", "quote", @@ -2165,9 +2160,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" dependencies = [ "aho-corasick", "memchr", @@ -2177,9 +2172,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.3" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" dependencies = [ "aho-corasick", "memchr", @@ -2281,11 +2276,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.4" +version = "0.38.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" +checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.0", "errno", "libc", "linux-raw-sys", @@ -2294,9 +2289,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.5" +version = "0.21.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79ea77c539259495ce8ca47f53e66ae0330a8819f67e23ac96ca02f50e7b7d36" +checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" dependencies = [ "log", "ring", @@ -2315,9 +2310,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.1" +version = "0.101.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f36a6828982f422756984e47912a7a51dcbc2a197aa791158f8ca61cd8204e" +checksum = "261e9e0888cba427c3316e6322805653c9425240b6fd96cee7cb671ab70ab8d0" dependencies = [ "ring", "untrusted", @@ -2398,22 +2393,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.175" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b" +checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.175" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4" +checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -2429,9 +2424,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.103" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" +checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" dependencies = [ "itoa", "ryu", @@ -2447,7 +2442,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -2545,6 +2540,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "socket2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +dependencies = [ + "libc", + "windows-sys", +] + [[package]] name = "spin" version = "0.5.2" @@ -2553,9 +2558,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.35.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca597d77c98894be1f965f2e4e2d2a61575d4998088e655476c73715c54b2b43" +checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" dependencies = [ "log", "sqlparser_derive", @@ -2590,7 +2595,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "strum_macros 0.25.1", + "strum_macros 0.25.2", ] [[package]] @@ -2608,22 +2613,22 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.25.1" +version = "0.25.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" +checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] name = "substrait" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ac1ce8315086b127ca0abf162c62279550942bb26ebf7946fe17fe114446472" +checksum = "658f6cbbd29a250869b87e1bb5a4b42db534cacfc1c03284f2536cd36b6c1617" dependencies = [ "git2", "heck", @@ -2637,7 +2642,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.27", + "syn 2.0.28", "typify", "walkdir", ] @@ -2661,9 +2666,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.27" +version = "2.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" dependencies = [ "proc-macro2", "quote", @@ -2672,15 +2677,15 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.10" +version = "0.12.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2faeef5759ab89935255b1a4cd98e0baf99d1085e37d36599c625dac49ae8e" +checksum = "9d0e916b1148c8e263850e1ebcbd046f333e0683c724876bb0da63ea4373dc8a" [[package]] name = "tempfile" -version = "3.7.0" +version = "3.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" +checksum = "dc02fddf48964c42031a0b3fe0428320ecf3a73c401040fc0096f97794310651" dependencies = [ "cfg-if", "fastrand", @@ -2706,7 +2711,7 @@ checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -2746,11 +2751,10 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.29.1" +version = "1.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da" +checksum = "2d3ce25f50619af8b0aec2eb23deebe84249e19e2ddd393a6e16e3300a6dadfd" dependencies = [ - "autocfg", "backtrace", "bytes", "libc", @@ -2758,7 +2762,7 @@ dependencies = [ "num_cpus", "parking_lot", "pin-project-lite", - "socket2", + "socket2 0.5.3", "tokio-macros", "windows-sys", ] @@ -2771,7 +2775,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -2824,7 +2828,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", ] [[package]] @@ -2854,9 +2858,9 @@ dependencies = [ [[package]] name = "typed-builder" -version = "0.10.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89851716b67b937e393b3daa8423e67ddfc4bbbf1654bcf05488e95e0828db0c" +checksum = "64cba322cb9b7bc6ca048de49e83918223f35e7a86311267013afff257004870" dependencies = [ "proc-macro2", "quote", @@ -2892,7 +2896,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.27", + "syn 2.0.28", "thiserror", "unicode-ident", ] @@ -2909,7 +2913,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.27", + "syn 2.0.28", "typify-impl", ] @@ -3043,7 +3047,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", "wasm-bindgen-shared", ] @@ -3077,7 +3081,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.27", + "syn 2.0.28", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3267,9 +3271,9 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "332f188cc1bcf1fe1064b8c58d150f497e697f49774aa846f2dc949d9a25f236" +checksum = "f3b9c234616391070b0b173963ebc65a9195068e7ed3731c6edac2ec45ebe106" dependencies = [ "byteorder", "zerocopy-derive", @@ -3277,13 +3281,13 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.3.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6505e6815af7de1746a08f69c69606bb45695a17149517680f3b2149713b19a3" +checksum = "8f7f3a471f98d0a61c34322fbbfd10c384b07687f680d4119813713f72308d91" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.28", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 8b09bc50c..169d3f5d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "28.0.0" +version = "29.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.19", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { version = "28.0.0", features = ["pyarrow", "avro"] } -datafusion-common = { version = "28.0.0", features = ["pyarrow"] } -datafusion-expr = { version = "28.0.0" } -datafusion-optimizer = { version = "28.0.0" } -datafusion-sql = { version = "28.0.0" } -datafusion-substrait = { version = "28.0.0" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1", features = ["pyarrow", "avro"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1", features = ["pyarrow"] } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1" } +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1" } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1" } prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 199fb616e..078b8c841 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -242,6 +242,9 @@ impl DataTypeMap { ScalarValue::Decimal128(_, precision, scale) => { Ok(DataType::Decimal128(*precision, *scale)) } + ScalarValue::Decimal256(_, precision, scale) => { + Ok(DataType::Decimal256(*precision, *scale)) + } ScalarValue::Dictionary(data_type, scalar_type) => { // Call this function again to map the dictionary scalar_value to an Arrow type Ok(DataType::Dictionary( diff --git a/src/expr.rs b/src/expr.rs index d1022e905..ecf8fae32 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -30,7 +30,8 @@ use datafusion_expr::{ }, lit, utils::exprlist_to_fields, - Between, BinaryExpr, Case, Cast, Expr, GetIndexedField, Like, LogicalPlan, Operator, TryCast, + Between, BinaryExpr, Case, Cast, Expr, GetFieldAccess, GetIndexedField, Like, LogicalPlan, + Operator, TryCast, }; use crate::common::data_type::{DataTypeMap, RexType}; @@ -213,7 +214,9 @@ impl PyExpr { fn __getitem__(&self, key: &str) -> PyResult { Ok(Expr::GetIndexedField(GetIndexedField::new( Box::new(self.expr.clone()), - ScalarValue::Utf8(Some(key.to_string())), + GetFieldAccess::NamedStructField { + name: ScalarValue::Utf8(Some(key.to_string())), + }, )) .into()) } @@ -309,6 +312,7 @@ impl PyExpr { ScalarValue::Float32(v) => v.into_py(py), ScalarValue::Float64(v) => v.into_py(py), ScalarValue::Decimal128(v, _, _) => v.into_py(py), + ScalarValue::Decimal256(_, _, _) => todo!(), ScalarValue::Int8(v) => v.into_py(py), ScalarValue::Int16(v) => v.into_py(py), ScalarValue::Int32(v) => v.into_py(py), @@ -577,6 +581,7 @@ impl PyExpr { | Operator::BitwiseXor | Operator::BitwiseAnd | Operator::BitwiseOr => DataTypeMap::map_from_arrow_type(&DataType::Binary), + Operator::AtArrow | Operator::ArrowAt => todo!(), }, Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), Expr::Literal(scalar_value) => DataTypeMap::map_from_scalar_value(scalar_value), diff --git a/src/expr/indexed_field.rs b/src/expr/indexed_field.rs index c98607712..e0dad6a4a 100644 --- a/src/expr/indexed_field.rs +++ b/src/expr/indexed_field.rs @@ -16,7 +16,7 @@ // under the License. use crate::expr::PyExpr; -use datafusion_expr::expr::GetIndexedField; +use datafusion_expr::expr::{GetFieldAccess, GetIndexedField}; use pyo3::prelude::*; use std::fmt::{Display, Formatter}; @@ -47,7 +47,7 @@ impl Display for PyGetIndexedField { "GetIndexedField Expr: {:?} Key: {:?}", - &self.indexed_field.expr, &self.indexed_field.key + &self.indexed_field.expr, &self.indexed_field.field ) } } @@ -59,7 +59,10 @@ impl PyGetIndexedField { } fn key(&self) -> PyResult { - Ok(self.indexed_field.key.clone().into()) + match &self.indexed_field.field { + GetFieldAccess::NamedStructField { name, .. } => Ok(name.clone().into()), + _ => todo!(), + } } /// Get a String representation of this column diff --git a/src/udaf.rs b/src/udaf.rs index ae2e81a08..3b70aeb06 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -126,7 +126,7 @@ impl PyAggregateUDF { ) -> PyResult { let function = create_udaf( name, - input_type.0, + vec![input_type.0], Arc::new(return_type.0), parse_volatility(volatility)?, to_rust_accumulator(accumulator), From 217ede86961d3ffc356556ad75cbe2b514373048 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Thu, 17 Aug 2023 19:19:22 +0200 Subject: [PATCH 066/413] feat: add register_json (#458) --- datafusion/tests/test_sql.py | 53 +++++++++++++++++++++++++++++++++++- src/context.rs | 36 ++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/datafusion/tests/test_sql.py b/datafusion/tests/test_sql.py index 608bb1960..9d42a1f9d 100644 --- a/datafusion/tests/test_sql.py +++ b/datafusion/tests/test_sql.py @@ -14,12 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import gzip +import os import numpy as np import pyarrow as pa import pyarrow.dataset as ds import pytest -import gzip from datafusion import udf @@ -154,6 +155,56 @@ def test_register_dataset(ctx, tmp_path): assert result.to_pydict() == {"cnt": [100]} +def test_register_json(ctx, tmp_path): + path = os.path.dirname(os.path.abspath(__file__)) + test_data_path = os.path.join(path, "data_test_context", "data.json") + gzip_path = tmp_path / "data.json.gz" + + with open(test_data_path, "rb") as json_file: + with gzip.open(gzip_path, "wb") as gzipped_file: + gzipped_file.writelines(json_file) + + ctx.register_json("json", test_data_path) + ctx.register_json("json1", str(test_data_path)) + ctx.register_json( + "json2", + test_data_path, + schema_infer_max_records=10, + ) + ctx.register_json( + "json_gzip", + gzip_path, + file_extension="gz", + file_compression_type="gzip", + ) + + alternative_schema = pa.schema( + [ + ("some_int", pa.int16()), + ("some_bytes", pa.string()), + ("some_floats", pa.float32()), + ] + ) + ctx.register_json("json3", path, schema=alternative_schema) + + assert ctx.tables() == {"json", "json1", "json2", "json3", "json_gzip"} + + for table in ["json", "json1", "json2", "json_gzip"]: + result = ctx.sql(f'SELECT COUNT("B") AS cnt FROM {table}').collect() + result = pa.Table.from_batches(result) + assert result.to_pydict() == {"cnt": [3]} + + result = ctx.sql("SELECT * FROM json3").collect() + result = pa.Table.from_batches(result) + assert result.schema == alternative_schema + + with pytest.raises( + ValueError, + match="file_compression_type must one of: gzip, bz2, xz, zstd", + ): + ctx.register_json("json4", gzip_path, file_compression_type="rar") + + def test_execute(ctx, tmp_path): data = [1, 1, 2, 2, 3, 11, 12] diff --git a/src/context.rs b/src/context.rs index 1dca8a791..317ab785e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -509,6 +509,42 @@ impl PySessionContext { Ok(()) } + #[allow(clippy::too_many_arguments)] + #[pyo3(signature = (name, + path, + schema=None, + schema_infer_max_records=1000, + file_extension=".json", + table_partition_cols=vec![], + file_compression_type=None))] + fn register_json( + &mut self, + name: &str, + path: PathBuf, + schema: Option>, + schema_infer_max_records: usize, + file_extension: &str, + table_partition_cols: Vec<(String, String)>, + file_compression_type: Option, + py: Python, + ) -> PyResult<()> { + let path = path + .to_str() + .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; + + let mut options = NdJsonReadOptions::default() + .file_compression_type(parse_file_compression_type(file_compression_type)?) + .table_partition_cols(convert_table_partition_cols(table_partition_cols)?); + options.schema_infer_max_records = schema_infer_max_records; + options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + + let result = self.ctx.register_json(name, path, options); + wait_for_future(py, result).map_err(DataFusionError::from)?; + + Ok(()) + } + // Registers a PyArrow.Dataset fn register_dataset(&self, name: &str, dataset: &PyAny, py: Python) -> PyResult<()> { let table: Arc = Arc::new(Dataset::new(dataset, py)?); From 499f0458dcceea93d1512258808a805986b677e7 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Tue, 22 Aug 2023 15:44:42 +0200 Subject: [PATCH 067/413] feat: add basic compression configuration to write_parquet (#459) --- datafusion/tests/test_dataframe.py | 67 ++++++++++++++++++++++++++++++ src/dataframe.rs | 58 ++++++++++++++++++++++++-- 2 files changed, 122 insertions(+), 3 deletions(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 78cb50f12..ce7d89e7b 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -14,8 +14,10 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import os import pyarrow as pa +import pyarrow.parquet as pq import pytest from datafusion import functions as f @@ -645,3 +647,68 @@ def test_describe(df): "b": [3.0, 3.0, 5.0, 1.0, 4.0, 6.0, 5.0], "c": [3.0, 3.0, 7.0, 1.7320508075688772, 5.0, 8.0, 8.0], } + + +def test_write_parquet(df, tmp_path): + path = tmp_path + + df.write_parquet(str(path)) + result = pq.read_table(str(path)).to_pydict() + expected = df.to_pydict() + + assert result == expected + + +@pytest.mark.parametrize( + "compression, compression_level", + [("gzip", 6), ("brotli", 7), ("zstd", 15)], +) +def test_write_compressed_parquet( + df, tmp_path, compression, compression_level +): + path = tmp_path + + df.write_parquet( + str(path), compression=compression, compression_level=compression_level + ) + + # test that the actual compression scheme is the one written + for root, dirs, files in os.walk(path): + for file in files: + if file.endswith(".parquet"): + metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict() + for row_group in metadata["row_groups"]: + for columns in row_group["columns"]: + assert columns["compression"].lower() == compression + + result = pq.read_table(str(path)).to_pydict() + expected = df.to_pydict() + + assert result == expected + + +@pytest.mark.parametrize( + "compression, compression_level", + [("gzip", 12), ("brotli", 15), ("zstd", 23), ("wrong", 12)], +) +def test_write_compressed_parquet_wrong_compression_level( + df, tmp_path, compression, compression_level +): + path = tmp_path + + with pytest.raises(ValueError): + df.write_parquet( + str(path), + compression=compression, + compression_level=compression_level, + ) + + +@pytest.mark.parametrize("compression", ["brotli", "zstd", "wrong"]) +def test_write_compressed_parquet_missing_compression_level( + df, tmp_path, compression +): + path = tmp_path + + with pytest.raises(ValueError): + df.write_parquet(str(path), compression=compression) diff --git a/src/dataframe.rs b/src/dataframe.rs index b8d8ddc3c..61a44484c 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -23,8 +23,10 @@ use datafusion::arrow::datatypes::Schema; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::arrow::util::pretty; use datafusion::dataframe::DataFrame; +use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; +use datafusion::parquet::file::properties::WriterProperties; use datafusion::prelude::*; -use pyo3::exceptions::PyTypeError; +use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyTuple; use std::sync::Arc; @@ -308,8 +310,58 @@ impl PyDataFrame { } /// Write a `DataFrame` to a Parquet file. - fn write_parquet(&self, path: &str, py: Python) -> PyResult<()> { - wait_for_future(py, self.df.as_ref().clone().write_parquet(path, None))?; + #[pyo3(signature = ( + path, + compression="uncompressed", + compression_level=None + ))] + fn write_parquet( + &self, + path: &str, + compression: &str, + compression_level: Option, + py: Python, + ) -> PyResult<()> { + fn verify_compression_level(cl: Option) -> Result { + cl.ok_or(PyValueError::new_err("compression_level is not defined")) + } + + let compression_type = match compression.to_lowercase().as_str() { + "snappy" => Compression::SNAPPY, + "gzip" => Compression::GZIP( + GzipLevel::try_new(compression_level.unwrap_or(6)) + .map_err(|e| PyValueError::new_err(format!("{e}")))?, + ), + "brotli" => Compression::BROTLI( + BrotliLevel::try_new(verify_compression_level(compression_level)?) + .map_err(|e| PyValueError::new_err(format!("{e}")))?, + ), + "zstd" => Compression::ZSTD( + ZstdLevel::try_new(verify_compression_level(compression_level)? as i32) + .map_err(|e| PyValueError::new_err(format!("{e}")))?, + ), + "lz0" => Compression::LZO, + "lz4" => Compression::LZ4, + "lz4_raw" => Compression::LZ4_RAW, + "uncompressed" => Compression::UNCOMPRESSED, + _ => { + return Err(PyValueError::new_err(format!( + "Unrecognized compression type {compression}" + ))); + } + }; + + let writer_properties = WriterProperties::builder() + .set_compression(compression_type) + .build(); + + wait_for_future( + py, + self.df + .as_ref() + .clone() + .write_parquet(path, Option::from(writer_properties)), + )?; Ok(()) } From 9c643bf2ae63894952be69b5fa2ae145f160971a Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Tue, 22 Aug 2023 15:44:55 +0200 Subject: [PATCH 068/413] feat: add example of reading parquet from s3 (#460) --- examples/sql-parquet-s3.py | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 examples/sql-parquet-s3.py diff --git a/examples/sql-parquet-s3.py b/examples/sql-parquet-s3.py new file mode 100644 index 000000000..bd7da5e20 --- /dev/null +++ b/examples/sql-parquet-s3.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import datafusion +from datafusion.object_store import AmazonS3 + +region = "us-east-1" +bucket_name = "yellow-trips" + +s3 = AmazonS3( + bucket_name=bucket_name, + region=region, + access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), +) + +ctx = datafusion.SessionContext() +path = f"s3://{bucket_name}/" +ctx.register_object_store(path, s3) + +ctx.register_parquet("trips", path) + +df = ctx.sql("select count(passenger_count) from trips") +df.show() From e24dc75f2fe60efb5bc888fd70d2aede80027c25 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Tue, 22 Aug 2023 15:45:06 +0200 Subject: [PATCH 069/413] feat: add register_avro and read_table (#461) --- datafusion/tests/test_context.py | 10 ++++++++ datafusion/tests/test_sql.py | 26 ++++++++++++++++++++ src/context.rs | 41 ++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/datafusion/tests/test_context.py b/datafusion/tests/test_context.py index 55a324aec..97bff9bb9 100644 --- a/datafusion/tests/test_context.py +++ b/datafusion/tests/test_context.py @@ -214,6 +214,16 @@ def test_register_table(ctx, database): assert public.names() == {"csv", "csv1", "csv2", "csv3"} +def test_read_table(ctx, database): + default = ctx.catalog() + public = default.database("public") + assert public.names() == {"csv", "csv1", "csv2"} + + table = public.table("csv") + table_df = ctx.read_table(table) + table_df.show() + + def test_deregister_table(ctx, database): default = ctx.catalog() public = default.database("public") diff --git a/datafusion/tests/test_sql.py b/datafusion/tests/test_sql.py index 9d42a1f9d..19a2ad2cf 100644 --- a/datafusion/tests/test_sql.py +++ b/datafusion/tests/test_sql.py @@ -205,6 +205,32 @@ def test_register_json(ctx, tmp_path): ctx.register_json("json4", gzip_path, file_compression_type="rar") +def test_register_avro(ctx): + path = "testing/data/avro/alltypes_plain.avro" + ctx.register_avro("alltypes_plain", path) + result = ctx.sql( + "SELECT SUM(tinyint_col) as tinyint_sum FROM alltypes_plain" + ).collect() + result = pa.Table.from_batches(result).to_pydict() + assert result["tinyint_sum"][0] > 0 + + alternative_schema = pa.schema( + [ + pa.field("id", pa.int64()), + ] + ) + + ctx.register_avro( + "alltypes_plain_schema", + path, + schema=alternative_schema, + infinite=False, + ) + result = ctx.sql("SELECT * FROM alltypes_plain_schema").collect() + result = pa.Table.from_batches(result) + assert result.schema == alternative_schema + + def test_execute(ctx, tmp_path): data = [1, 1, 2, 2, 3, 11, 12] diff --git a/src/context.rs b/src/context.rs index 317ab785e..c7f89f2e6 100644 --- a/src/context.rs +++ b/src/context.rs @@ -545,6 +545,39 @@ impl PySessionContext { Ok(()) } + #[allow(clippy::too_many_arguments)] + #[pyo3(signature = (name, + path, + schema=None, + file_extension=".avro", + table_partition_cols=vec![], + infinite=false))] + fn register_avro( + &mut self, + name: &str, + path: PathBuf, + schema: Option>, + file_extension: &str, + table_partition_cols: Vec<(String, String)>, + infinite: bool, + py: Python, + ) -> PyResult<()> { + let path = path + .to_str() + .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; + + let mut options = AvroReadOptions::default() + .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) + .mark_infinite(infinite); + options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + + let result = self.ctx.register_avro(name, path, options); + wait_for_future(py, result).map_err(DataFusionError::from)?; + + Ok(()) + } + // Registers a PyArrow.Dataset fn register_dataset(&self, name: &str, dataset: &PyAny, py: Python) -> PyResult<()> { let table: Arc = Arc::new(Dataset::new(dataset, py)?); @@ -734,6 +767,14 @@ impl PySessionContext { Ok(PyDataFrame::new(df)) } + fn read_table(&self, table: &PyTable) -> PyResult { + let df = self + .ctx + .read_table(table.table()) + .map_err(DataFusionError::from)?; + Ok(PyDataFrame::new(df)) + } + fn __repr__(&self) -> PyResult { let config = self.ctx.copied_config(); let mut config_entries = config From bc62aafa34659a36cf188316e45740935ef333d5 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Wed, 6 Sep 2023 22:18:26 +0200 Subject: [PATCH 070/413] feat: add missing scalar math functions (#465) --- datafusion/tests/test_functions.py | 36 +++++++++++++++++++++++++++++- src/functions.rs | 32 ++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index c5ddea352..1727e7e5b 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import math import numpy as np import pyarrow as pa @@ -85,12 +86,15 @@ def test_math_functions(): ctx = SessionContext() # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( - [pa.array([0.1, -0.7, 0.55])], names=["value"] + [pa.array([0.1, -0.7, 0.55]), pa.array([float("nan"), 0, 2.0])], + names=["value", "na_value"], ) df = ctx.create_dataframe([[batch]]) values = np.array([0.1, -0.7, 0.55]) + na_values = np.array([np.nan, 0, 2.0]) col_v = column("value") + col_nav = column("na_value") df = df.select( f.abs(col_v), f.sin(col_v), @@ -113,6 +117,20 @@ def test_math_functions(): f.sqrt(col_v), f.signum(col_v), f.trunc(col_v), + f.asinh(col_v), + f.acosh(col_v), + f.atanh(col_v), + f.cbrt(col_v), + f.cosh(col_v), + f.degrees(col_v), + f.gcd(literal(9), literal(3)), + f.lcm(literal(6), literal(4)), + f.nanvl(col_nav, literal(5)), + f.pi(), + f.radians(col_v), + f.sinh(col_v), + f.tanh(col_v), + f.factorial(literal(6)), ) batches = df.collect() assert len(batches) == 1 @@ -151,6 +169,22 @@ def test_math_functions(): np.testing.assert_array_almost_equal(result.column(18), np.sqrt(values)) np.testing.assert_array_almost_equal(result.column(19), np.sign(values)) np.testing.assert_array_almost_equal(result.column(20), np.trunc(values)) + np.testing.assert_array_almost_equal(result.column(21), np.arcsinh(values)) + np.testing.assert_array_almost_equal(result.column(22), np.arccosh(values)) + np.testing.assert_array_almost_equal(result.column(23), np.arctanh(values)) + np.testing.assert_array_almost_equal(result.column(24), np.cbrt(values)) + np.testing.assert_array_almost_equal(result.column(25), np.cosh(values)) + np.testing.assert_array_almost_equal(result.column(26), np.degrees(values)) + np.testing.assert_array_almost_equal(result.column(27), np.gcd(9, 3)) + np.testing.assert_array_almost_equal(result.column(28), np.lcm(6, 4)) + np.testing.assert_array_almost_equal( + result.column(29), np.where(np.isnan(na_values), 5, na_values) + ) + np.testing.assert_array_almost_equal(result.column(30), np.pi) + np.testing.assert_array_almost_equal(result.column(31), np.radians(values)) + np.testing.assert_array_almost_equal(result.column(32), np.sinh(values)) + np.testing.assert_array_almost_equal(result.column(33), np.tanh(values)) + np.testing.assert_array_almost_equal(result.column(34), math.factorial(6)) def test_string_functions(df): diff --git a/src/functions.rs b/src/functions.rs index 06d35ef3a..6037ce196 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -198,9 +198,12 @@ macro_rules! aggregate_function { scalar_function!(abs, Abs); scalar_function!(acos, Acos); +scalar_function!(acosh, Acosh); scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character."); scalar_function!(asin, Asin); +scalar_function!(asinh, Asinh); scalar_function!(atan, Atan); +scalar_function!(atanh, Atanh); scalar_function!(atan2, Atan2); scalar_function!( bit_length, @@ -208,6 +211,7 @@ scalar_function!( "Returns number of bits in the string (8 times the octet_length)." ); scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string."); +scalar_function!(cbrt, Cbrt); scalar_function!(ceil, Ceil); scalar_function!( character_length, @@ -219,9 +223,14 @@ scalar_function!(char_length, CharacterLength); scalar_function!(chr, Chr, "Returns the character with the given code."); scalar_function!(coalesce, Coalesce); scalar_function!(cos, Cos); +scalar_function!(cosh, Cosh); +scalar_function!(degrees, Degrees); scalar_function!(exp, Exp); +scalar_function!(factorial, Factorial); scalar_function!(floor, Floor); +scalar_function!(gcd, Gcd); scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); +scalar_function!(lcm, Lcm); scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); scalar_function!(ln, Ln); scalar_function!(log, Log); @@ -235,9 +244,16 @@ scalar_function!( MD5, "Computes the MD5 hash of the argument, with the result written in hexadecimal." ); +scalar_function!( + nanvl, + Nanvl, + "Computes the MD5 hash of the argument, with the result written in hexadecimal." +); scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); +scalar_function!(pi, Pi); scalar_function!(power, Power); scalar_function!(pow, Power); +scalar_function!(radians, Radians); scalar_function!(regexp_match, RegexpMatch); scalar_function!( regexp_replace, @@ -269,6 +285,7 @@ scalar_function!(sha384, SHA384); scalar_function!(sha512, SHA512); scalar_function!(signum, Signum); scalar_function!(sin, Sin); +scalar_function!(sinh, Sinh); scalar_function!( split_part, SplitPart, @@ -283,6 +300,7 @@ scalar_function!( scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)"); scalar_function!(substr, Substr); scalar_function!(tan, Tan); +scalar_function!(tanh, Tanh); scalar_function!( to_hex, ToHex, @@ -343,6 +361,7 @@ aggregate_function!(var_samp, Variance); pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(abs))?; m.add_wrapped(wrap_pyfunction!(acos))?; + m.add_wrapped(wrap_pyfunction!(acosh))?; m.add_wrapped(wrap_pyfunction!(approx_distinct))?; m.add_wrapped(wrap_pyfunction!(alias))?; m.add_wrapped(wrap_pyfunction!(approx_median))?; @@ -353,11 +372,14 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(arrow_typeof))?; m.add_wrapped(wrap_pyfunction!(ascii))?; m.add_wrapped(wrap_pyfunction!(asin))?; + m.add_wrapped(wrap_pyfunction!(asinh))?; m.add_wrapped(wrap_pyfunction!(atan))?; + m.add_wrapped(wrap_pyfunction!(atanh))?; m.add_wrapped(wrap_pyfunction!(atan2))?; m.add_wrapped(wrap_pyfunction!(avg))?; m.add_wrapped(wrap_pyfunction!(bit_length))?; m.add_wrapped(wrap_pyfunction!(btrim))?; + m.add_wrapped(wrap_pyfunction!(cbrt))?; m.add_wrapped(wrap_pyfunction!(ceil))?; m.add_wrapped(wrap_pyfunction!(character_length))?; m.add_wrapped(wrap_pyfunction!(chr))?; @@ -369,6 +391,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(concat))?; m.add_wrapped(wrap_pyfunction!(corr))?; m.add_wrapped(wrap_pyfunction!(cos))?; + m.add_wrapped(wrap_pyfunction!(cosh))?; m.add_wrapped(wrap_pyfunction!(count))?; m.add_wrapped(wrap_pyfunction!(count_star))?; m.add_wrapped(wrap_pyfunction!(covar))?; @@ -376,6 +399,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(covar_samp))?; m.add_wrapped(wrap_pyfunction!(current_date))?; m.add_wrapped(wrap_pyfunction!(current_time))?; + m.add_wrapped(wrap_pyfunction!(degrees))?; m.add_wrapped(wrap_pyfunction!(date_bin))?; m.add_wrapped(wrap_pyfunction!(datepart))?; m.add_wrapped(wrap_pyfunction!(date_part))?; @@ -383,11 +407,14 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(date_trunc))?; m.add_wrapped(wrap_pyfunction!(digest))?; m.add_wrapped(wrap_pyfunction!(exp))?; + m.add_wrapped(wrap_pyfunction!(factorial))?; m.add_wrapped(wrap_pyfunction!(floor))?; m.add_wrapped(wrap_pyfunction!(from_unixtime))?; + m.add_wrapped(wrap_pyfunction!(gcd))?; m.add_wrapped(wrap_pyfunction!(grouping))?; m.add_wrapped(wrap_pyfunction!(in_list))?; m.add_wrapped(wrap_pyfunction!(initcap))?; + m.add_wrapped(wrap_pyfunction!(lcm))?; m.add_wrapped(wrap_pyfunction!(left))?; m.add_wrapped(wrap_pyfunction!(length))?; m.add_wrapped(wrap_pyfunction!(ln))?; @@ -403,12 +430,15 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(mean))?; m.add_wrapped(wrap_pyfunction!(median))?; m.add_wrapped(wrap_pyfunction!(min))?; + m.add_wrapped(wrap_pyfunction!(nanvl))?; m.add_wrapped(wrap_pyfunction!(now))?; m.add_wrapped(wrap_pyfunction!(nullif))?; m.add_wrapped(wrap_pyfunction!(octet_length))?; m.add_wrapped(wrap_pyfunction!(order_by))?; + m.add_wrapped(wrap_pyfunction!(pi))?; m.add_wrapped(wrap_pyfunction!(power))?; m.add_wrapped(wrap_pyfunction!(pow))?; + m.add_wrapped(wrap_pyfunction!(radians))?; m.add_wrapped(wrap_pyfunction!(random))?; m.add_wrapped(wrap_pyfunction!(regexp_match))?; m.add_wrapped(wrap_pyfunction!(regexp_replace))?; @@ -425,6 +455,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(sha512))?; m.add_wrapped(wrap_pyfunction!(signum))?; m.add_wrapped(wrap_pyfunction!(sin))?; + m.add_wrapped(wrap_pyfunction!(sinh))?; m.add_wrapped(wrap_pyfunction!(split_part))?; m.add_wrapped(wrap_pyfunction!(sqrt))?; m.add_wrapped(wrap_pyfunction!(starts_with))?; @@ -436,6 +467,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(substr))?; m.add_wrapped(wrap_pyfunction!(sum))?; m.add_wrapped(wrap_pyfunction!(tan))?; + m.add_wrapped(wrap_pyfunction!(tanh))?; m.add_wrapped(wrap_pyfunction!(to_hex))?; m.add_wrapped(wrap_pyfunction!(to_timestamp))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?; From 944b1c98343aeae3dd6d2d82525a27068d5fb12d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Sep 2023 14:18:50 -0600 Subject: [PATCH 071/413] build(deps): bump arduino/setup-protoc from 1 to 2 (#452) Bumps [arduino/setup-protoc](https://github.com/arduino/setup-protoc) from 1 to 2. - [Release notes](https://github.com/arduino/setup-protoc/releases) - [Commits](https://github.com/arduino/setup-protoc/compare/v1...v2) --- updated-dependencies: - dependency-name: arduino/setup-protoc dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yml | 4 ++-- .github/workflows/docs.yaml | 2 +- .github/workflows/test.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e72c4805a..199487d2c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -74,7 +74,7 @@ jobs: path: . - name: Install Protoc - uses: arduino/setup-protoc@v1 + uses: arduino/setup-protoc@v2 with: version: "3.x" repo-token: ${{ secrets.GITHUB_TOKEN }} @@ -135,7 +135,7 @@ jobs: path: . - name: Install Protoc - uses: arduino/setup-protoc@v1 + uses: arduino/setup-protoc@v2 with: version: "3.x" repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index d9e7ad4ad..d538381db 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -38,7 +38,7 @@ jobs: python-version: "3.10" - name: Install Protoc - uses: arduino/setup-protoc@v1 + uses: arduino/setup-protoc@v2 with: version: '3.x' repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f672c8129..079fb4488 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -53,7 +53,7 @@ jobs: override: true - name: Install Protoc - uses: arduino/setup-protoc@v1 + uses: arduino/setup-protoc@v2 with: version: '3.x' repo-token: ${{ secrets.GITHUB_TOKEN }} From b4d383b5f85128f7f61e533c2f8d09f053189eb5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 8 Sep 2023 06:37:20 -0700 Subject: [PATCH 072/413] Revert "build(deps): bump arduino/setup-protoc from 1 to 2 (#452)" (#474) This reverts commit 944b1c98343aeae3dd6d2d82525a27068d5fb12d. --- .github/workflows/build.yml | 4 ++-- .github/workflows/docs.yaml | 2 +- .github/workflows/test.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 199487d2c..e72c4805a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -74,7 +74,7 @@ jobs: path: . - name: Install Protoc - uses: arduino/setup-protoc@v2 + uses: arduino/setup-protoc@v1 with: version: "3.x" repo-token: ${{ secrets.GITHUB_TOKEN }} @@ -135,7 +135,7 @@ jobs: path: . - name: Install Protoc - uses: arduino/setup-protoc@v2 + uses: arduino/setup-protoc@v1 with: version: "3.x" repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index d538381db..d9e7ad4ad 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -38,7 +38,7 @@ jobs: python-version: "3.10" - name: Install Protoc - uses: arduino/setup-protoc@v2 + uses: arduino/setup-protoc@v1 with: version: '3.x' repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 079fb4488..f672c8129 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -53,7 +53,7 @@ jobs: override: true - name: Install Protoc - uses: arduino/setup-protoc@v2 + uses: arduino/setup-protoc@v1 with: version: '3.x' repo-token: ${{ secrets.GITHUB_TOKEN }} From 0d7c19e44b61a1f269f1b835f529ad35aa76c6b6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 11 Sep 2023 12:50:48 -0700 Subject: [PATCH 073/413] Minor: fix wrongly copied function description (#497) --- src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions.rs b/src/functions.rs index 6037ce196..d9a7d6043 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -247,7 +247,7 @@ scalar_function!( scalar_function!( nanvl, Nanvl, - "Computes the MD5 hash of the argument, with the result written in hexadecimal." + "Returns x if x is not NaN otherwise returns y." ); scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); scalar_function!(pi, Pi); From af4f75829a9aeaa9023ce742c58bbc6c50d5489c Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Mon, 11 Sep 2023 23:27:49 +0100 Subject: [PATCH 074/413] Upgrade to Datafusion 31.0.0 (#491) * Upgrade to Datafusion 30.0.0 * Upgrade to Datafusion 31.0.0-rc1 * Upgrade `object_store` to match Datafusion * Fix incompatibilities with Datafusion 31.0.0 * Add `schema` and `file_sort_order` to `read/register_parquet` * Upgrade to Datafusion 31.0.0 --- Cargo.lock | 474 ++++++++++++++++++++++++----------------------- Cargo.toml | 16 +- src/context.rs | 33 +++- src/dataframe.rs | 27 ++- 4 files changed, 300 insertions(+), 250 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 585e64cb5..d15f24bf8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" dependencies = [ "gimli", ] @@ -38,9 +38,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8f9420f797f2d9e935edf629310eb938a0d839f984e25327f3c7eed22300c" +checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783" dependencies = [ "memchr", ] @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.72" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "apache-avro" @@ -128,9 +128,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7104b9e9761613ae92fe770c741d6bbf1dbc791a0fe204400aebdd429875741" +checksum = "04a8801ebb147ad240b2d978d3ab9f73c9ccd4557ba6a03e7800496770ed10e0" dependencies = [ "ahash", "arrow-arith", @@ -151,9 +151,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38e597a8e8efb8ff52c50eaf8f4d85124ce3c1bf20fab82f476d73739d9ab1c2" +checksum = "895263144bd4a69751cbe6a34a53f26626e19770b313a9fa792c415cd0e78f11" dependencies = [ "arrow-array", "arrow-buffer", @@ -166,9 +166,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a86d9c1473db72896bd2345ebb6b8ad75b8553ba390875c76708e8dc5c5492d" +checksum = "226fdc6c3a4ae154a74c24091d36a90b514f0ed7112f5b8322c1d8f354d8e20d" dependencies = [ "ahash", "arrow-buffer", @@ -183,19 +183,20 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234b3b1c8ed00c874bf95972030ac4def6f58e02ea5a7884314388307fb3669b" +checksum = "fc4843af4dd679c2f35b69c572874da8fde33be53eb549a5fb128e7a4b763510" dependencies = [ + "bytes", "half", "num", ] [[package]] name = "arrow-cast" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22f61168b853c7faea8cea23a2169fdff9c82fb10ae5e2c07ad1cab8f6884931" +checksum = "35e8b9990733a9b635f656efda3c9b8308c7a19695c9ec2c7046dd154f9b144b" dependencies = [ "arrow-array", "arrow-buffer", @@ -211,9 +212,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b545c114d9bf8569c84d2fbe2020ac4eea8db462c0a37d0b65f41a90d066fe" +checksum = "646fbb4e11dd0afb8083e883f53117713b8caadb4413b3c9e63e3f535da3683c" dependencies = [ "arrow-array", "arrow-buffer", @@ -230,9 +231,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6b6852635e7c43e5b242841c7470606ff0ee70eef323004cacc3ecedd33dd8f" +checksum = "da900f31ff01a0a84da0572209be72b2b6f980f3ea58803635de47913191c188" dependencies = [ "arrow-buffer", "arrow-schema", @@ -242,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a66da9e16aecd9250af0ae9717ae8dd7ea0d8ca5a3e788fe3de9f4ee508da751" +checksum = "2707a8d7ee2d345d045283ece3ae43416175873483e5d96319c929da542a0b1f" dependencies = [ "arrow-array", "arrow-buffer", @@ -256,9 +257,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60ee0f9d8997f4be44a60ee5807443e396e025c23cf14d2b74ce56135cb04474" +checksum = "5d1b91a63c356d14eedc778b76d66a88f35ac8498426bb0799a769a49a74a8b4" dependencies = [ "arrow-array", "arrow-buffer", @@ -276,9 +277,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcab05410e6b241442abdab6e1035177dc082bdb6f17049a4db49faed986d63" +checksum = "584325c91293abbca7aaaabf8da9fe303245d641f5f4a18a6058dc68009c7ebf" dependencies = [ "arrow-array", "arrow-buffer", @@ -291,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91a847dd9eb0bacd7836ac63b3475c68b2210c2c96d0ec1b808237b973bd5d73" +checksum = "0e32afc1329f7b372463b21c6ca502b07cf237e1ed420d87706c1770bb0ebd38" dependencies = [ "ahash", "arrow-array", @@ -306,18 +307,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54df8c47918eb634c20e29286e69494fdc20cafa5173eb6dad49c7f6acece733" +checksum = "b104f5daa730f00fde22adc03a12aa5a2ae9ccbbf99cbd53d284119ddc90e03d" dependencies = [ "bitflags 2.4.0", ] [[package]] name = "arrow-select" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "941dbe481da043c4bd40c805a19ec2fc008846080c4953171b62bcad5ee5f7fb" +checksum = "73b3ca55356d1eae07cf48808d8c462cea674393ae6ad1e0b120f40b422eb2b4" dependencies = [ "arrow-array", "arrow-buffer", @@ -328,9 +329,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "359b2cd9e071d5a3bcf44679f9d85830afebc5b9c98a08019a570a65ae933e0f" +checksum = "af1433ce02590cae68da0a18ed3a3ed868ffac2c6f24c533ddd2067f7ee04b4a" dependencies = [ "arrow-array", "arrow-buffer", @@ -344,9 +345,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b74f44609f0f91493e3082d3734d98497e094777144380ea4db9f9905dd5b6" +checksum = "d495b6dc0184693324491a5ac05f559acc97bf937ab31d7a1c33dd0016be6d2b" dependencies = [ "bzip2", "flate2", @@ -362,24 +363,24 @@ dependencies = [ [[package]] name = "async-recursion" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" +checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] name = "async-trait" -version = "0.1.72" +version = "0.1.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09" +checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] @@ -399,9 +400,9 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.68" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" dependencies = [ "addr2line", "cc", @@ -414,9 +415,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.2" +version = "0.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" +checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" [[package]] name = "bitflags" @@ -497,9 +498,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "bzip2" @@ -524,9 +525,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", "libc", @@ -540,15 +541,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.26" +version = "0.4.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" +checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", "serde", - "winapi", + "windows-targets", ] [[package]] @@ -675,9 +676,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.5.0" +version = "5.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6943ae99c34386c84a470c499d3414f66502a41340aa895406e0d2e4a207b91d" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", "hashbrown 0.14.0", @@ -688,8 +689,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "29.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a4e4fc25698a14c90b34dda647ba10a5a966dc04b036d22e77fb1048663375d" dependencies = [ "ahash", "apache-avro", @@ -715,7 +717,6 @@ dependencies = [ "hashbrown 0.14.0", "indexmap 2.0.0", "itertools 0.11.0", - "lazy_static", "log", "num-traits", "num_cpus", @@ -725,7 +726,6 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rand", - "smallvec", "sqlparser", "tempfile", "tokio", @@ -738,24 +738,35 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "29.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c23ad0229ea4a85bf76b236d8e75edf539881fdb02ce4e2394f9a76de6055206" dependencies = [ "apache-avro", "arrow", "arrow-array", + "async-compression", + "bytes", + "bzip2", "chrono", + "flate2", + "futures", "num_cpus", "object_store", "parquet", "pyo3", "sqlparser", + "tokio", + "tokio-util", + "xz2", + "zstd", ] [[package]] name = "datafusion-execution" -version = "29.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b37d2fc1a213baf34e0a57c85b8e6648f1a95152798fd6738163ee96c19203f" dependencies = [ "arrow", "dashmap", @@ -773,13 +784,13 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "29.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6ea9844395f537730a145e5d87f61fecd37c2bc9d54e1dc89b35590d867345d" dependencies = [ "ahash", "arrow", "datafusion-common", - "lazy_static", "sqlparser", "strum 0.25.0", "strum_macros 0.25.2", @@ -787,8 +798,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "29.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8a30e0f79c5d59ba14d3d70f2500e87e0ff70236ad5e47f9444428f054fd2be" dependencies = [ "arrow", "async-trait", @@ -804,8 +816,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "29.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "766c567082c9bbdcb784feec8fe40c7049cedaeb3a18d54f563f75fe0dc1932c" dependencies = [ "ahash", "arrow", @@ -823,7 +836,6 @@ dependencies = [ "hex", "indexmap 2.0.0", "itertools 0.11.0", - "lazy_static", "libc", "log", "md-5", @@ -838,7 +850,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "29.0.0" +version = "31.0.0" dependencies = [ "async-trait", "datafusion", @@ -857,7 +869,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.28", + "syn 2.0.32", "tokio", "url", "uuid", @@ -865,8 +877,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "29.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "811fd084cf2d78aa0c76b74320977c7084ad0383690612528b580795764b4dd0" dependencies = [ "arrow", "arrow-schema", @@ -878,8 +891,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "29.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=29.0.0-rc1#8265e99d05382fca57cc7399f8ee241966f4a1f5" +version = "31.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736201d7981b5ea91bea4245d0d33bd17dccfdb4a739350bbb990e09a57122ec" dependencies = [ "async-recursion", "chrono", @@ -911,9 +925,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "dyn-clone" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "304e6508efa593091e97a9abbc10f90aa7ca635b6d2784feff3c89d41dd12272" +checksum = "bbfc4744c1b8f2a09adc0e55242f60b1af195d88596bd8700be74418c056c555" [[package]] name = "either" @@ -923,9 +937,9 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "encoding_rs" -version = "0.8.32" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" dependencies = [ "cfg-if", ] @@ -938,9 +952,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" +checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd" dependencies = [ "errno-dragonfly", "libc", @@ -981,9 +995,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" +checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" dependencies = [ "crc32fast", "miniz_oxide", @@ -1060,7 +1074,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] @@ -1116,17 +1130,17 @@ dependencies = [ [[package]] name = "gimli" -version = "0.27.3" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" +checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" [[package]] name = "git2" -version = "0.17.2" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b989d6a7ca95a362cf2cfc5ad688b3a467be1f87e480b8dad07fee8c79b0044" +checksum = "12ef350ba88a33b4d524b1d1c79096c9ade5ef8c59395df0e60d1e1889414c0e" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.0", "libc", "libgit2-sys", "log", @@ -1141,9 +1155,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.20" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049" +checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" dependencies = [ "bytes", "fnv", @@ -1212,6 +1226,15 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys", +] + [[package]] name = "http" version = "0.2.9" @@ -1242,9 +1265,9 @@ checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" [[package]] name = "httpdate" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "humantime" @@ -1501,9 +1524,9 @@ dependencies = [ [[package]] name = "libgit2-sys" -version = "0.15.2+1.6.4" +version = "0.16.1+1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a80df2e11fb4a61f4ba2ab42dbe7f74468da143f1a75c74e11dee7c813f694fa" +checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c" dependencies = [ "cc", "libc", @@ -1519,9 +1542,9 @@ checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "libmimalloc-sys" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4ac0e912c8ef1b735e92369695618dc5b1819f5a7bf3f167301a3ba1cea515e" +checksum = "25d058a81af0d1c22d7a1c948576bee6d673f7af3c0f35564abd6c81122f513d" dependencies = [ "cc", "libc", @@ -1541,9 +1564,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.5" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128" [[package]] name = "lock_api" @@ -1557,9 +1580,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.19" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "lz4" @@ -1603,9 +1626,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "memoffset" @@ -1618,9 +1641,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.37" +version = "0.1.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2894987a3459f3ffb755608bd82188f8ed00d0ae077f1edea29c068d639d98" +checksum = "972e5f23f6716f62665760b0f4cbf592576a80c7b879ba9beaafc0e558894127" dependencies = [ "libmimalloc-sys", ] @@ -1673,9 +1696,9 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" dependencies = [ "autocfg", "num-integer", @@ -1684,9 +1707,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" dependencies = [ "num-traits", ] @@ -1746,18 +1769,18 @@ dependencies = [ [[package]] name = "object" -version = "0.31.1" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "memchr", ] [[package]] name = "object_store" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c776db4f332b571958444982ff641d2531417a326ca368995073b639205d58" +checksum = "d359e231e5451f4f9fa889d56e3ce34f8724f1a61db2107739359717cf2bbf08" dependencies = [ "async-trait", "base64", @@ -1823,9 +1846,9 @@ dependencies = [ [[package]] name = "parquet" -version = "45.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f9739b984380582bdb7749ae5b5d28839bce899212cf16465c1ac1f8b65d79" +checksum = "1ad2cba786ae07da4d73371a88b9e0f9d3ffac1a9badc83922e0e15814f5c5fa" dependencies = [ "ahash", "arrow-array", @@ -1878,12 +1901,12 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "petgraph" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 1.9.3", + "indexmap 2.0.0", ] [[package]] @@ -1926,9 +1949,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.12" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cc1b0bf1727a77a54b6654e7b5f1af8604923edc8b81885f8ec92f9e3f0a05" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" [[package]] name = "pin-utils" @@ -1950,12 +1973,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.12" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" +checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] @@ -2112,9 +2135,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.32" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -2160,9 +2183,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.3" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" dependencies = [ "aho-corasick", "memchr", @@ -2172,9 +2195,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.6" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" dependencies = [ "aho-corasick", "memchr", @@ -2183,9 +2206,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "regress" @@ -2199,9 +2222,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.18" +version = "0.11.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" +checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" dependencies = [ "base64", "bytes", @@ -2276,9 +2299,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.8" +version = "0.38.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f" +checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662" dependencies = [ "bitflags 2.4.0", "errno", @@ -2289,9 +2312,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.6" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" +checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8" dependencies = [ "log", "ring", @@ -2310,9 +2333,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.3" +version = "0.101.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "261e9e0888cba427c3316e6322805653c9425240b6fd96cee7cb671ab70ab8d0" +checksum = "7d93931baf2d282fff8d3a532bbfd7653f734643161b87e3e01e59a04439bf0d" dependencies = [ "ring", "untrusted", @@ -2341,9 +2364,9 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.12" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02c613288622e5f0c3fdc5dbd4db1c5fbe752746b1d1a56a0630b78fd00de44f" +checksum = "763f8cd0d4c71ed8389c90cb8100cba87e763bd01a8e614d4f0af97bcd50a161" dependencies = [ "dyn-clone", "schemars_derive", @@ -2353,9 +2376,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.12" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109da1e6b197438deb6db99952990c7f959572794b80ff93707d55a232545e7c" +checksum = "ec0f696e21e10fa546b7ffb1c9672c6de8fbc7a81acf59524386d8639bf12737" dependencies = [ "proc-macro2", "quote", @@ -2393,22 +2416,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.183" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.183" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] @@ -2424,9 +2447,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.104" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" +checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2" dependencies = [ "itoa", "ryu", @@ -2442,7 +2465,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] @@ -2483,15 +2506,15 @@ dependencies = [ [[package]] name = "siphasher" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "slab" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ "autocfg", ] @@ -2542,9 +2565,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" dependencies = [ "libc", "windows-sys", @@ -2558,9 +2581,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.36.1" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" +checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075" dependencies = [ "log", "sqlparser_derive", @@ -2621,14 +2644,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] name = "substrait" -version = "0.12.4" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658f6cbbd29a250869b87e1bb5a4b42db534cacfc1c03284f2536cd36b6c1617" +checksum = "b3b705c631514962d48c0647e3ad2b4d93b7472f5da7bee2d1dc853bf4c61f19" dependencies = [ "git2", "heck", @@ -2642,7 +2665,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.28", + "syn 2.0.32", "typify", "walkdir", ] @@ -2666,9 +2689,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.28" +version = "2.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" +checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2" dependencies = [ "proc-macro2", "quote", @@ -2683,9 +2706,9 @@ checksum = "9d0e916b1148c8e263850e1ebcbd046f333e0683c724876bb0da63ea4373dc8a" [[package]] name = "tempfile" -version = "3.7.1" +version = "3.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc02fddf48964c42031a0b3fe0428320ecf3a73c401040fc0096f97794310651" +checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" dependencies = [ "cfg-if", "fastrand", @@ -2696,22 +2719,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.44" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.44" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] @@ -2751,9 +2774,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.30.0" +version = "1.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d3ce25f50619af8b0aec2eb23deebe84249e19e2ddd393a6e16e3300a6dadfd" +checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" dependencies = [ "backtrace", "bytes", @@ -2762,7 +2785,7 @@ dependencies = [ "num_cpus", "parking_lot", "pin-project-lite", - "socket2 0.5.3", + "socket2 0.5.4", "tokio-macros", "windows-sys", ] @@ -2775,7 +2798,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] @@ -2828,7 +2851,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] @@ -2896,7 +2919,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.28", + "syn 2.0.32", "thiserror", "unicode-ident", ] @@ -2913,7 +2936,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.28", + "syn 2.0.32", "typify-impl", ] @@ -2970,9 +2993,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "url" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" dependencies = [ "form_urlencoded", "idna", @@ -3003,9 +3026,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.3" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" dependencies = [ "same-file", "winapi-util", @@ -3047,7 +3070,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", "wasm-bindgen-shared", ] @@ -3081,7 +3104,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3094,9 +3117,9 @@ checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-streams" -version = "0.2.3" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbae3363c08332cadccd13b67db371814cd214c2524020932f0804b8cf7c078" +checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7" dependencies = [ "futures-util", "js-sys", @@ -3115,34 +3138,22 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "webpki-roots" -version = "0.22.6" +version = "0.25.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" -dependencies = [ - "webpki", -] +checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" [[package]] name = "which" -version = "4.4.0" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" dependencies = [ "either", - "libc", + "home", "once_cell", + "rustix", ] [[package]] @@ -3196,9 +3207,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.48.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -3211,53 +3222,54 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_i686_gnu" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_x86_64_gnu" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_msvc" -version = "0.48.0" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "winreg" -version = "0.10.1" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" dependencies = [ - "winapi", + "cfg-if", + "windows-sys", ] [[package]] @@ -3271,9 +3283,9 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3b9c234616391070b0b173963ebc65a9195068e7ed3731c6edac2ec45ebe106" +checksum = "20707b61725734c595e840fb3704378a0cd2b9c74cc9e6e20724838fc6a1e2f9" dependencies = [ "byteorder", "zerocopy-derive", @@ -3281,13 +3293,13 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f7f3a471f98d0a61c34322fbbfd10c384b07687f680d4119813713f72308d91" +checksum = "56097d5b91d711293a42be9289403896b68654625021732067eac7a4ca388a1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.32", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 169d3f5d4..eaa2602ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "29.0.0" +version = "31.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,19 +36,19 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.19", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1", features = ["pyarrow", "avro"] } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1", features = ["pyarrow"] } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1" } -datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1" } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "29.0.0-rc1" } +datafusion = { version = "31.0.0", features = ["pyarrow", "avro"] } +datafusion-common = { version = "31.0.0", features = ["pyarrow"] } +datafusion-expr = { version = "31.0.0" } +datafusion-optimizer = { version = "31.0.0" } +datafusion-sql = { version = "31.0.0" } +datafusion-substrait = { version = "31.0.0" } prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" futures = "0.3" -object_store = { version = "0.6.1", features = ["aws", "gcp", "azure"] } +object_store = { version = "0.7.0", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.7.1" syn = "2.0.11" diff --git a/src/context.rs b/src/context.rs index c7f89f2e6..632b179a1 100644 --- a/src/context.rs +++ b/src/context.rs @@ -31,6 +31,7 @@ use crate::catalog::{PyCatalog, PyTable}; use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, DataFusionError}; +use crate::expr::PyExpr; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; @@ -41,7 +42,7 @@ use crate::utils::{get_tokio_runtime, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::datasource::file_format::file_type::FileCompressionType; +use datafusion::common::FileCompressionType; use datafusion::datasource::MemTable; use datafusion::datasource::TableProvider; use datafusion::execution::context::{SessionConfig, SessionContext, TaskContext}; @@ -445,7 +446,10 @@ impl PySessionContext { #[allow(clippy::too_many_arguments)] #[pyo3(signature = (name, path, table_partition_cols=vec![], parquet_pruning=true, - file_extension=".parquet"))] + file_extension=".parquet", + skip_metadata=true, + schema=None, + file_sort_order=None))] fn register_parquet( &mut self, name: &str, @@ -453,12 +457,23 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, parquet_pruning: bool, file_extension: &str, + skip_metadata: bool, + schema: Option>, + file_sort_order: Option>>, py: Python, ) -> PyResult<()> { let mut options = ParquetReadOptions::default() .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) - .parquet_pruning(parquet_pruning); + .parquet_pruning(parquet_pruning) + .skip_metadata(skip_metadata); options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + options.file_sort_order = file_sort_order + .unwrap_or(vec![]) + .into_iter() + .map(|e| e.into_iter().map(|f| f.into()).collect()) + .collect(); + let result = self.ctx.register_parquet(name, path, options); wait_for_future(py, result).map_err(DataFusionError::from)?; Ok(()) @@ -722,7 +737,9 @@ impl PySessionContext { table_partition_cols=vec![], parquet_pruning=true, file_extension=".parquet", - skip_metadata=true))] + skip_metadata=true, + schema=None, + file_sort_order=None))] fn read_parquet( &self, path: &str, @@ -730,6 +747,8 @@ impl PySessionContext { parquet_pruning: bool, file_extension: &str, skip_metadata: bool, + schema: Option>, + file_sort_order: Option>>, py: Python, ) -> PyResult { let mut options = ParquetReadOptions::default() @@ -737,6 +756,12 @@ impl PySessionContext { .parquet_pruning(parquet_pruning) .skip_metadata(skip_metadata); options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + options.file_sort_order = file_sort_order + .unwrap_or(vec![]) + .into_iter() + .map(|e| e.into_iter().map(|f| f.into()).collect()) + .collect(); let result = self.ctx.read_parquet(path, options); let df = PyDataFrame::new(wait_for_future(py, result).map_err(DataFusionError::from)?); diff --git a/src/dataframe.rs b/src/dataframe.rs index 61a44484c..ebf06f3d2 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -22,7 +22,7 @@ use crate::{errors::DataFusionError, expr::PyExpr}; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::arrow::util::pretty; -use datafusion::dataframe::DataFrame; +use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use datafusion::parquet::file::properties::WriterProperties; use datafusion::prelude::*; @@ -305,7 +305,13 @@ impl PyDataFrame { /// Write a `DataFrame` to a CSV file. fn write_csv(&self, path: &str, py: Python) -> PyResult<()> { - wait_for_future(py, self.df.as_ref().clone().write_csv(path))?; + wait_for_future( + py, + self.df + .as_ref() + .clone() + .write_csv(path, DataFrameWriteOptions::new(), None), + )?; Ok(()) } @@ -357,17 +363,24 @@ impl PyDataFrame { wait_for_future( py, - self.df - .as_ref() - .clone() - .write_parquet(path, Option::from(writer_properties)), + self.df.as_ref().clone().write_parquet( + path, + DataFrameWriteOptions::new(), + Option::from(writer_properties), + ), )?; Ok(()) } /// Executes a query and writes the results to a partitioned JSON file. fn write_json(&self, path: &str, py: Python) -> PyResult<()> { - wait_for_future(py, self.df.as_ref().clone().write_json(path))?; + wait_for_future( + py, + self.df + .as_ref() + .clone() + .write_json(path, DataFrameWriteOptions::new()), + )?; Ok(()) } From beabf2601038747e5b1a813114d6b22be83bf291 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Tue, 12 Sep 2023 15:20:11 +0100 Subject: [PATCH 075/413] Add `isnan` and `iszero` (#495) * Add `isnan` and `iszero` * Add test for `log` * Add missing newline! Co-authored-by: Liang-Chi Hsieh * Fix linting --------- Co-authored-by: Liang-Chi Hsieh --- datafusion/tests/test_functions.py | 10 ++++++++++ src/functions.rs | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 1727e7e5b..80795bf12 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -131,6 +131,9 @@ def test_math_functions(): f.sinh(col_v), f.tanh(col_v), f.factorial(literal(6)), + f.isnan(col_nav), + f.iszero(col_nav), + f.log(literal(3), col_v + literal(pa.scalar(1))), ) batches = df.collect() assert len(batches) == 1 @@ -185,6 +188,13 @@ def test_math_functions(): np.testing.assert_array_almost_equal(result.column(32), np.sinh(values)) np.testing.assert_array_almost_equal(result.column(33), np.tanh(values)) np.testing.assert_array_almost_equal(result.column(34), math.factorial(6)) + np.testing.assert_array_almost_equal( + result.column(35), np.isnan(na_values) + ) + np.testing.assert_array_almost_equal(result.column(36), na_values == 0) + np.testing.assert_array_almost_equal( + result.column(37), np.emath.logn(3, values + 1.0) + ) def test_string_functions(df): diff --git a/src/functions.rs b/src/functions.rs index d9a7d6043..79cc19b3c 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -230,6 +230,8 @@ scalar_function!(factorial, Factorial); scalar_function!(floor, Floor); scalar_function!(gcd, Gcd); scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); +scalar_function!(isnan, Isnan); +scalar_function!(iszero, Iszero); scalar_function!(lcm, Lcm); scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); scalar_function!(ln, Ln); @@ -414,6 +416,8 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(grouping))?; m.add_wrapped(wrap_pyfunction!(in_list))?; m.add_wrapped(wrap_pyfunction!(initcap))?; + m.add_wrapped(wrap_pyfunction!(isnan))?; + m.add_wrapped(wrap_pyfunction!(iszero))?; m.add_wrapped(wrap_pyfunction!(lcm))?; m.add_wrapped(wrap_pyfunction!(left))?; m.add_wrapped(wrap_pyfunction!(length))?; From a47712e17886d9202fed282963dc449470688fe0 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 13 Sep 2023 08:22:01 -0600 Subject: [PATCH 076/413] Update CHANGELOG and run cargo update (#500) --- CHANGELOG.md | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.lock | 8 +++---- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2f2de219..dbc5a82e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,72 @@ # DataFusion Python Changelog +## [31.0.0](https://github.com/apache/arrow-datafusion-python/tree/31.0.0) (2023-09-12) + +[Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/28.0.0...31.0.0) + +**Implemented enhancements:** + +- feat: add case function (#447) [#448](https://github.com/apache/arrow-datafusion-python/pull/448) (mesejo) +- feat: add compression options [#456](https://github.com/apache/arrow-datafusion-python/pull/456) (mesejo) +- feat: add register_json [#458](https://github.com/apache/arrow-datafusion-python/pull/458) (mesejo) +- feat: add basic compression configuration to write_parquet [#459](https://github.com/apache/arrow-datafusion-python/pull/459) (mesejo) +- feat: add example of reading parquet from s3 [#460](https://github.com/apache/arrow-datafusion-python/pull/460) (mesejo) +- feat: add register_avro and read_table [#461](https://github.com/apache/arrow-datafusion-python/pull/461) (mesejo) +- feat: add missing scalar math functions [#465](https://github.com/apache/arrow-datafusion-python/pull/465) (mesejo) + +**Documentation updates:** + +- docs: include pre-commit hooks section in contributor guide [#455](https://github.com/apache/arrow-datafusion-python/pull/455) (mesejo) + +**Merged pull requests:** + +- Build Linux aarch64 wheel [#443](https://github.com/apache/arrow-datafusion-python/pull/443) (gokselk) +- feat: add case function (#447) [#448](https://github.com/apache/arrow-datafusion-python/pull/448) (mesejo) +- enhancement(docs): Add user guide (#432) [#445](https://github.com/apache/arrow-datafusion-python/pull/445) (mesejo) +- docs: include pre-commit hooks section in contributor guide [#455](https://github.com/apache/arrow-datafusion-python/pull/455) (mesejo) +- feat: add compression options [#456](https://github.com/apache/arrow-datafusion-python/pull/456) (mesejo) +- Upgrade to DF 28.0.0-rc1 [#457](https://github.com/apache/arrow-datafusion-python/pull/457) (andygrove) +- feat: add register_json [#458](https://github.com/apache/arrow-datafusion-python/pull/458) (mesejo) +- feat: add basic compression configuration to write_parquet [#459](https://github.com/apache/arrow-datafusion-python/pull/459) (mesejo) +- feat: add example of reading parquet from s3 [#460](https://github.com/apache/arrow-datafusion-python/pull/460) (mesejo) +- feat: add register_avro and read_table [#461](https://github.com/apache/arrow-datafusion-python/pull/461) (mesejo) +- feat: add missing scalar math functions [#465](https://github.com/apache/arrow-datafusion-python/pull/465) (mesejo) +- build(deps): bump arduino/setup-protoc from 1 to 2 [#452](https://github.com/apache/arrow-datafusion-python/pull/452) (dependabot[bot]) +- Revert "build(deps): bump arduino/setup-protoc from 1 to 2 (#452)" [#474](https://github.com/apache/arrow-datafusion-python/pull/474) (viirya) +- Minor: fix wrongly copied function description [#497](https://github.com/apache/arrow-datafusion-python/pull/497) (viirya) +- Upgrade to Datafusion 31.0.0 [#491](https://github.com/apache/arrow-datafusion-python/pull/491) (judahrand) +- Add `isnan` and `iszero` [#495](https://github.com/apache/arrow-datafusion-python/pull/495) (judahrand) + +## 30.0.0 + +- Skipped due to a breaking change in DataFusion + +## 29.0.0 + +- Skipped + +## [28.0.0](https://github.com/apache/arrow-datafusion-python/tree/28.0.0) (2023-07-25) + +**Implemented enhancements:** + +- feat: expose offset in python API [#437](https://github.com/apache/arrow-datafusion-python/pull/437) (cpcloud) + +**Merged pull requests:** + +- File based input utils [#433](https://github.com/apache/arrow-datafusion-python/pull/433) (jdye64) +- Upgrade to 28.0.0-rc1 [#434](https://github.com/apache/arrow-datafusion-python/pull/434) (andygrove) +- Introduces utility for obtaining SqlTable information from a file like location [#398](https://github.com/apache/arrow-datafusion-python/pull/398) (jdye64) +- feat: expose offset in python API [#437](https://github.com/apache/arrow-datafusion-python/pull/437) (cpcloud) +- Use DataFusion 28 [#439](https://github.com/apache/arrow-datafusion-python/pull/439) (andygrove) + +## [27.0.0](https://github.com/apache/arrow-datafusion-python/tree/27.0.0) (2023-07-03) + +**Merged pull requests:** + +- LogicalPlan.to_variant() make public [#412](https://github.com/apache/arrow-datafusion-python/pull/412) (jdye64) +- Prepare 27.0.0 release [#423](https://github.com/apache/arrow-datafusion-python/pull/423) (andygrove) + ## [26.0.0](https://github.com/apache/arrow-datafusion-python/tree/26.0.0) (2023-06-11) [Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/25.0.0...26.0.0) diff --git a/Cargo.lock b/Cargo.lock index d15f24bf8..c7113ffce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -345,9 +345,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d495b6dc0184693324491a5ac05f559acc97bf937ab31d7a1c33dd0016be6d2b" +checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c" dependencies = [ "bzip2", "flate2", @@ -2333,9 +2333,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.4" +version = "0.101.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d93931baf2d282fff8d3a532bbfd7653f734643161b87e3e01e59a04439bf0d" +checksum = "45a27e3b59326c16e23d30aeb7a36a24cc0d29e71d68ff611cdfb4a01d013bed" dependencies = [ "ring", "untrusted", From 41d65d15283b2ca1b10a714a50167af8a62d3052 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 20 Sep 2023 05:14:16 -0600 Subject: [PATCH 077/413] Improve release process documentation (#505) --- dev/release/README.md | 47 ++++++++++++++++++++++++++++++- dev/release/rat_exclude_files.txt | 3 +- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/dev/release/README.md b/dev/release/README.md index e85b288f4..6bd2c1eb2 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -179,7 +179,7 @@ When prompted for username, enter `__token__`. When prompted for a password, ent Download the source tarball created in the previous step, untar it, and run: ```bash -python3 -m build +maturin sdist ``` This will create a file named `dist/datafusion-0.7.0.tar.gz`. Upload this to testpypi: @@ -263,3 +263,48 @@ git checkout 0.8.0-rc1 git tag 0.8.0 git push apache 0.8.0 ``` + +### Add the release to Apache Reporter + +Add the release to https://reporter.apache.org/addrelease.html?arrow with a version name prefixed with `RS-DATAFUSION-PYTHON`, +for example `RS-DATAFUSION-PYTHON-31.0.0`. + +The release information is used to generate a template for a board report (see example +[here](https://github.com/apache/arrow/pull/14357)). + +### Delete old RCs and Releases + +See the ASF documentation on [when to archive](https://www.apache.org/legal/release-policy.html#when-to-archive) +for more information. + +#### Deleting old release candidates from `dev` svn + +Release candidates should be deleted once the release is published. + +Get a list of DataFusion release candidates: + +```bash +svn ls https://dist.apache.org/repos/dist/dev/arrow | grep datafusion-python +``` + +Delete a release candidate: + +```bash +svn delete -m "delete old DataFusion RC" https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-datafusion-python-7.1.0-rc1/ +``` + +#### Deleting old releases from `release` svn + +Only the latest release should be available. Delete old releases after publishing the new release. + +Get a list of DataFusion releases: + +```bash +svn ls https://dist.apache.org/repos/dist/release/arrow | grep datafusion-python +``` + +Delete a release: + +```bash +svn delete -m "delete old DataFusion release" https://dist.apache.org/repos/dist/release/arrow/arrow-datafusion-python-7.0.0 +``` diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 6d0fee185..f65ddd06e 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -44,4 +44,5 @@ Cargo.lock */.git .github/* benchmarks/tpch/queries/q*.sql -benchmarks/tpch/create_tables.sql \ No newline at end of file +benchmarks/tpch/create_tables.sql +.cargo/config.toml \ No newline at end of file From 106786ab57cfe9b16f2b867fe6d53170bd14c841 Mon Sep 17 00:00:00 2001 From: zhenxing jiang Date: Wed, 20 Sep 2023 19:15:45 +0800 Subject: [PATCH 078/413] add Binary String Functions (#494) --- datafusion/tests/test_functions.py | 14 ++++++++++++++ src/functions.rs | 7 +++++++ 2 files changed, 21 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index 80795bf12..f1f64c30a 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -477,3 +477,17 @@ def test_case(df): assert result.column(0) == pa.array([10, 8, 8]) assert result.column(1) == pa.array(["Hola", "Mundo", "!!"]) assert result.column(2) == pa.array(["Hola", "Mundo", None]) + + +def test_binary_string_functions(df): + df = df.select( + f.encode(column("a"), literal("base64")), + f.decode(f.encode(column("a"), literal("base64")), literal("base64")), + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array(["SGVsbG8", "V29ybGQ", "IQ"]) + assert pa.array(result.column(1)).cast(pa.string()) == pa.array( + ["Hello", "World", "!"] + ) diff --git a/src/functions.rs b/src/functions.rs index 79cc19b3c..ef26240fe 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -332,6 +332,9 @@ scalar_function!(r#struct, Struct); // Use raw identifier since struct is a keyw scalar_function!(from_unixtime, FromUnixtime); scalar_function!(arrow_typeof, ArrowTypeof); scalar_function!(random, Random); +//Binary String Functions +scalar_function!(encode, Encode); +scalar_function!(decode, Decode); aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); @@ -486,5 +489,9 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(var_pop))?; m.add_wrapped(wrap_pyfunction!(var_samp))?; m.add_wrapped(wrap_pyfunction!(window))?; + + //Binary String Functions + m.add_wrapped(wrap_pyfunction!(encode))?; + m.add_wrapped(wrap_pyfunction!(decode))?; Ok(()) } From c574d684f5ac244bf2a8b813f67ce8642babf10f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Sep 2023 09:43:49 -0700 Subject: [PATCH 079/413] build(deps): bump mimalloc from 0.1.38 to 0.1.39 (#502) Bumps [mimalloc](https://github.com/purpleprotocol/mimalloc_rust) from 0.1.38 to 0.1.39. - [Release notes](https://github.com/purpleprotocol/mimalloc_rust/releases) - [Commits](https://github.com/purpleprotocol/mimalloc_rust/compare/v0.1.38...v0.1.39) --- updated-dependencies: - dependency-name: mimalloc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c7113ffce..8fb244715 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1542,9 +1542,9 @@ checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "libmimalloc-sys" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25d058a81af0d1c22d7a1c948576bee6d673f7af3c0f35564abd6c81122f513d" +checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664" dependencies = [ "cc", "libc", @@ -1641,9 +1641,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.38" +version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "972e5f23f6716f62665760b0f4cbf592576a80c7b879ba9beaafc0e558894127" +checksum = "fa01922b5ea280a911e323e4d2fd24b7fe5cc4042e0d2cda3c40775cdc4bdc9c" dependencies = [ "libmimalloc-sys", ] From 31241f8e3711edbf97ea0613f3dc03cee4ee8fb7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Sep 2023 15:20:37 -0700 Subject: [PATCH 080/413] build(deps): bump syn from 2.0.32 to 2.0.35 (#503) Bumps [syn](https://github.com/dtolnay/syn) from 2.0.32 to 2.0.35. - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.32...2.0.35) --- updated-dependencies: - dependency-name: syn dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 42 +++++++++++++++++++++--------------------- Cargo.toml | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8fb244715..a4b3fb769 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,7 +369,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -380,7 +380,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -869,7 +869,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.32", + "syn 2.0.35", "tokio", "url", "uuid", @@ -1074,7 +1074,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -1978,7 +1978,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -1989,9 +1989,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" dependencies = [ "unicode-ident", ] @@ -2431,7 +2431,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -2465,7 +2465,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -2644,7 +2644,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -2665,7 +2665,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.32", + "syn 2.0.35", "typify", "walkdir", ] @@ -2689,9 +2689,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.32" +version = "2.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2" +checksum = "59bf04c28bee9043ed9ea1e41afc0552288d3aba9c6efdd78903b802926f4879" dependencies = [ "proc-macro2", "quote", @@ -2734,7 +2734,7 @@ checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -2798,7 +2798,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -2851,7 +2851,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] @@ -2919,7 +2919,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.32", + "syn 2.0.35", "thiserror", "unicode-ident", ] @@ -2936,7 +2936,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.32", + "syn 2.0.35", "typify-impl", ] @@ -3070,7 +3070,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", "wasm-bindgen-shared", ] @@ -3104,7 +3104,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3299,7 +3299,7 @@ checksum = "56097d5b91d711293a42be9289403896b68654625021732067eac7a4ca388a1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.32", + "syn 2.0.35", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index eaa2602ae..0b685ec74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,7 +51,7 @@ futures = "0.3" object_store = { version = "0.7.0", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.7.1" -syn = "2.0.11" +syn = "2.0.35" url = "2.2" [build-dependencies] From 9ef0a57bd4733fb8314c9aa8ae5408a7411cb004 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 23 Sep 2023 14:18:21 -0700 Subject: [PATCH 081/413] build(deps): bump syn from 2.0.35 to 2.0.37 (#506) Bumps [syn](https://github.com/dtolnay/syn) from 2.0.35 to 2.0.37. - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.35...2.0.37) --- updated-dependencies: - dependency-name: syn dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 38 +++++++++++++++++++------------------- Cargo.toml | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a4b3fb769..91910b5de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,7 +369,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -380,7 +380,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -869,7 +869,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.35", + "syn 2.0.37", "tokio", "url", "uuid", @@ -1074,7 +1074,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -1978,7 +1978,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -2431,7 +2431,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -2465,7 +2465,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -2644,7 +2644,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -2665,7 +2665,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.35", + "syn 2.0.37", "typify", "walkdir", ] @@ -2689,9 +2689,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.35" +version = "2.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59bf04c28bee9043ed9ea1e41afc0552288d3aba9c6efdd78903b802926f4879" +checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" dependencies = [ "proc-macro2", "quote", @@ -2734,7 +2734,7 @@ checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -2798,7 +2798,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -2851,7 +2851,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] @@ -2919,7 +2919,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.35", + "syn 2.0.37", "thiserror", "unicode-ident", ] @@ -2936,7 +2936,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.35", + "syn 2.0.37", "typify-impl", ] @@ -3070,7 +3070,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", "wasm-bindgen-shared", ] @@ -3104,7 +3104,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3299,7 +3299,7 @@ checksum = "56097d5b91d711293a42be9289403896b68654625021732067eac7a4ca388a1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.35", + "syn 2.0.37", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0b685ec74..6545e1c9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,7 +51,7 @@ futures = "0.3" object_store = { version = "0.7.0", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.7.1" -syn = "2.0.35" +syn = "2.0.37" url = "2.2" [build-dependencies] From 8e430ab0102984c55ec715fd2151ff544732cc87 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 7 Oct 2023 16:17:59 -0600 Subject: [PATCH 082/413] Use latest DataFusion (#511) --- Cargo.lock | 487 +++++++++++++++++++++++++------------------- Cargo.toml | 12 +- src/context.rs | 11 +- src/expr/literal.rs | 2 +- 4 files changed, 290 insertions(+), 222 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 91910b5de..ee4eb3f64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,9 +38,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.0.5" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783" +checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" dependencies = [ "memchr", ] @@ -89,11 +89,11 @@ checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "apache-avro" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c0fdddc3fdac97394ffcc5c89c634faa9c1c166ced54189af34e407c97b6ee7" +checksum = "ceb7c683b2f8f40970b70e39ff8be514c95b96fcb9c4af87e1ed2cb2e10801a0" dependencies = [ - "byteorder", + "bzip2", "crc32fast", "digest", "lazy_static", @@ -102,7 +102,7 @@ dependencies = [ "num-bigint", "quad-rand", "rand", - "regex", + "regex-lite", "serde", "serde_json", "snap", @@ -111,7 +111,8 @@ dependencies = [ "thiserror", "typed-builder", "uuid", - "zerocopy", + "xz2", + "zstd", ] [[package]] @@ -128,9 +129,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a8801ebb147ad240b2d978d3ab9f73c9ccd4557ba6a03e7800496770ed10e0" +checksum = "7fab9e93ba8ce88a37d5a30dce4b9913b75413dc1ac56cb5d72e5a840543f829" dependencies = [ "ahash", "arrow-arith", @@ -151,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "895263144bd4a69751cbe6a34a53f26626e19770b313a9fa792c415cd0e78f11" +checksum = "bc1d4e368e87ad9ee64f28b9577a3834ce10fe2703a26b28417d485bbbdff956" dependencies = [ "arrow-array", "arrow-buffer", @@ -166,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226fdc6c3a4ae154a74c24091d36a90b514f0ed7112f5b8322c1d8f354d8e20d" +checksum = "d02efa7253ede102d45a4e802a129e83bcc3f49884cab795b1ac223918e4318d" dependencies = [ "ahash", "arrow-buffer", @@ -177,15 +178,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.0", + "hashbrown 0.14.1", "num", ] [[package]] name = "arrow-buffer" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4843af4dd679c2f35b69c572874da8fde33be53eb549a5fb128e7a4b763510" +checksum = "fda119225204141138cb0541c692fbfef0e875ba01bfdeaed09e9d354f9d6195" dependencies = [ "bytes", "half", @@ -194,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e8b9990733a9b635f656efda3c9b8308c7a19695c9ec2c7046dd154f9b144b" +checksum = "1d825d51b9968868d50bc5af92388754056796dbc62a4e25307d588a1fc84dee" dependencies = [ "arrow-array", "arrow-buffer", @@ -212,9 +213,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646fbb4e11dd0afb8083e883f53117713b8caadb4413b3c9e63e3f535da3683c" +checksum = "43ef855dc6b126dc197f43e061d4de46b9d4c033aa51c2587657f7508242cef1" dependencies = [ "arrow-array", "arrow-buffer", @@ -231,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da900f31ff01a0a84da0572209be72b2b6f980f3ea58803635de47913191c188" +checksum = "475a4c3699c8b4095ca61cecf15da6f67841847a5f5aac983ccb9a377d02f73a" dependencies = [ "arrow-buffer", "arrow-schema", @@ -243,9 +244,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2707a8d7ee2d345d045283ece3ae43416175873483e5d96319c929da542a0b1f" +checksum = "1248005c8ac549f869b7a840859d942bf62471479c1a2d82659d453eebcd166a" dependencies = [ "arrow-array", "arrow-buffer", @@ -257,9 +258,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1b91a63c356d14eedc778b76d66a88f35ac8498426bb0799a769a49a74a8b4" +checksum = "f03d7e3b04dd688ccec354fe449aed56b831679f03e44ee2c1cfc4045067b69c" dependencies = [ "arrow-array", "arrow-buffer", @@ -268,7 +269,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.0.0", + "indexmap 2.0.2", "lexical-core", "num", "serde", @@ -277,9 +278,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "584325c91293abbca7aaaabf8da9fe303245d641f5f4a18a6058dc68009c7ebf" +checksum = "03b87aa408ea6a6300e49eb2eba0c032c88ed9dc19e0a9948489c55efdca71f4" dependencies = [ "arrow-array", "arrow-buffer", @@ -292,9 +293,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e32afc1329f7b372463b21c6ca502b07cf237e1ed420d87706c1770bb0ebd38" +checksum = "114a348ab581e7c9b6908fcab23cb39ff9f060eb19e72b13f8fb8eaa37f65d22" dependencies = [ "ahash", "arrow-array", @@ -302,24 +303,25 @@ dependencies = [ "arrow-data", "arrow-schema", "half", - "hashbrown 0.14.0", + "hashbrown 0.14.1", ] [[package]] name = "arrow-schema" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b104f5daa730f00fde22adc03a12aa5a2ae9ccbbf99cbd53d284119ddc90e03d" +checksum = "5d1d179c117b158853e0101bfbed5615e86fe97ee356b4af901f1c5001e1ce4b" dependencies = [ "bitflags 2.4.0", ] [[package]] name = "arrow-select" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b3ca55356d1eae07cf48808d8c462cea674393ae6ad1e0b120f40b422eb2b4" +checksum = "d5c71e003202e67e9db139e5278c79f5520bb79922261dfe140e4637ee8b6108" dependencies = [ + "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -329,9 +331,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1433ce02590cae68da0a18ed3a3ed868ffac2c6f24c533ddd2067f7ee04b4a" +checksum = "c4cebbb282d6b9244895f4a9a912e55e57bce112554c7fa91fcec5459cb421ab" dependencies = [ "arrow-array", "arrow-buffer", @@ -369,7 +371,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -380,7 +382,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -442,16 +444,15 @@ dependencies = [ [[package]] name = "blake3" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" +checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", - "digest", ] [[package]] @@ -465,9 +466,9 @@ dependencies = [ [[package]] name = "brotli" -version = "3.3.4" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -476,9 +477,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.3.4" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" +checksum = "da74e2b81409b1b743f8f0c62cc6254afefb8b8e50bbfe3735550f7aeefa3448" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -486,15 +487,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -541,9 +542,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.30" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "iana-time-zone", @@ -613,12 +614,31 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.9" @@ -655,9 +675,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", "itoa", @@ -667,13 +687,19 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] +[[package]] +name = "dary_heap" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" + [[package]] name = "dashmap" version = "5.5.3" @@ -681,7 +707,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.0", + "hashbrown 0.14.1", "lock_api", "once_cell", "parking_lot_core", @@ -690,8 +716,7 @@ dependencies = [ [[package]] name = "datafusion" version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a4e4fc25698a14c90b34dda647ba10a5a966dc04b036d22e77fb1048663375d" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" dependencies = [ "ahash", "apache-avro", @@ -709,13 +734,14 @@ dependencies = [ "datafusion-expr", "datafusion-optimizer", "datafusion-physical-expr", + "datafusion-physical-plan", "datafusion-sql", "flate2", "futures", "glob", "half", - "hashbrown 0.14.0", - "indexmap 2.0.0", + "hashbrown 0.14.1", + "indexmap 2.0.2", "itertools 0.11.0", "log", "num-traits", @@ -739,41 +765,35 @@ dependencies = [ [[package]] name = "datafusion-common" version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c23ad0229ea4a85bf76b236d8e75edf539881fdb02ce4e2394f9a76de6055206" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" dependencies = [ + "ahash", "apache-avro", "arrow", "arrow-array", - "async-compression", - "bytes", - "bzip2", + "arrow-buffer", + "arrow-schema", "chrono", - "flate2", - "futures", + "half", "num_cpus", "object_store", "parquet", "pyo3", "sqlparser", - "tokio", - "tokio-util", - "xz2", - "zstd", ] [[package]] name = "datafusion-execution" version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b37d2fc1a213baf34e0a57c85b8e6648f1a95152798fd6738163ee96c19203f" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" dependencies = [ "arrow", + "chrono", "dashmap", "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.0", + "hashbrown 0.14.1", "log", "object_store", "parking_lot", @@ -785,11 +805,11 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6ea9844395f537730a145e5d87f61fecd37c2bc9d54e1dc89b35590d867345d" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" dependencies = [ "ahash", "arrow", + "arrow-array", "datafusion-common", "sqlparser", "strum 0.25.0", @@ -799,8 +819,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8a30e0f79c5d59ba14d3d70f2500e87e0ff70236ad5e47f9444428f054fd2be" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" dependencies = [ "arrow", "async-trait", @@ -808,7 +827,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.0", + "hashbrown 0.14.1", "itertools 0.11.0", "log", "regex-syntax", @@ -817,8 +836,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "766c567082c9bbdcb784feec8fe40c7049cedaeb3a18d54f563f75fe0dc1932c" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" dependencies = [ "ahash", "arrow", @@ -832,9 +850,9 @@ dependencies = [ "datafusion-common", "datafusion-expr", "half", - "hashbrown 0.14.0", + "hashbrown 0.14.1", "hex", - "indexmap 2.0.0", + "indexmap 2.0.2", "itertools 0.11.0", "libc", "log", @@ -848,6 +866,36 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-physical-plan" +version = "31.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "futures", + "half", + "hashbrown 0.14.1", + "indexmap 2.0.2", + "itertools 0.11.0", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", + "uuid", +] + [[package]] name = "datafusion-python" version = "31.0.0" @@ -869,7 +917,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.37", + "syn 2.0.38", "tokio", "url", "uuid", @@ -878,8 +926,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "811fd084cf2d78aa0c76b74320977c7084ad0383690612528b580795764b4dd0" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" dependencies = [ "arrow", "arrow-schema", @@ -892,8 +939,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736201d7981b5ea91bea4245d0d33bd17dccfdb4a739350bbb990e09a57122ec" +source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" dependencies = [ "async-recursion", "chrono", @@ -925,9 +971,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "dyn-clone" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbfc4744c1b8f2a09adc0e55242f60b1af195d88596bd8700be74418c056c555" +checksum = "23d2f3407d9a573d666de4b5bdf10569d73ca9478087346697dcbae6244bfbcd" [[package]] name = "either" @@ -952,9 +998,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd" +checksum = "add4f07d43996f76ef320709726a556a9d4f965d9410d8d0271132d2f8293480" dependencies = [ "errno-dragonfly", "libc", @@ -973,9 +1019,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "fixedbitset" @@ -1074,7 +1120,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1136,9 +1182,9 @@ checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" [[package]] name = "git2" -version = "0.18.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ef350ba88a33b4d524b1d1c79096c9ade5ef8c59395df0e60d1e1889414c0e" +checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" dependencies = [ "bitflags 2.4.0", "libc", @@ -1200,9 +1246,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "7dfda62a12f55daeae5015f81b0baea145391cb4520f86c248fc615d72640d12" dependencies = [ "ahash", "allocator-api2", @@ -1216,9 +1262,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" [[package]] name = "hex" @@ -1358,12 +1404,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.0.0" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" dependencies = [ "equivalent", - "hashbrown 0.14.0", + "hashbrown 0.14.1", ] [[package]] @@ -1498,27 +1544,31 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.147" +version = "0.2.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" [[package]] name = "libflate" -version = "1.4.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ff4ae71b685bbad2f2f391fe74f6b7659a34871c08b210fdc039e43bee07d18" +checksum = "9f7d5654ae1795afc7ff76f4365c2c8791b0feb18e8996a96adad8ffd7c3b2bf" dependencies = [ "adler32", + "core2", "crc32fast", + "dary_heap", "libflate_lz77", ] [[package]] name = "libflate_lz77" -version = "1.2.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a52d3a8bfc85f250440e4424db7d857e241a3aebbbe301f3eb606ab15c39acbf" +checksum = "be5f52fb8c451576ec6b79d3f4deb327398bc05bbdbd99021a6e77a4c855d524" dependencies = [ + "core2", + "hashbrown 0.13.2", "rle-decode-fast", ] @@ -1536,9 +1586,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libmimalloc-sys" @@ -1564,9 +1614,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128" +checksum = "3852614a3bd9ca9804678ba6be5e3b8ce76dfc902cae004e3e0c44051b6e88db" [[package]] name = "lock_api" @@ -1617,18 +1667,19 @@ dependencies = [ [[package]] name = "md-5" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6365506850d44bff6e2fbcb5176cf63650e48bd45ef2fe2665ae1570e0f4b9ca" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ + "cfg-if", "digest", ] [[package]] name = "memchr" -version = "2.6.3" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "memoffset" @@ -1778,9 +1829,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d359e231e5451f4f9fa889d56e3ce34f8724f1a61db2107739359717cf2bbf08" +checksum = "f930c88a43b1c3f6e776dfe495b4afab89882dbc81530c632db2ed65451ebcb4" dependencies = [ "async-trait", "base64", @@ -1789,7 +1840,7 @@ dependencies = [ "futures", "humantime", "hyper", - "itertools 0.10.5", + "itertools 0.11.0", "parking_lot", "percent-encoding", "quick-xml", @@ -1846,9 +1897,9 @@ dependencies = [ [[package]] name = "parquet" -version = "46.0.0" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad2cba786ae07da4d73371a88b9e0f9d3ffac1a9badc83922e0e15814f5c5fa" +checksum = "0463cc3b256d5f50408c49a4be3a16674f4c8ceef60941709620a062b1f6bf4d" dependencies = [ "ahash", "arrow-array", @@ -1864,7 +1915,7 @@ dependencies = [ "chrono", "flate2", "futures", - "hashbrown 0.14.0", + "hashbrown 0.14.1", "lz4", "num", "num-bigint", @@ -1906,7 +1957,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.0.0", + "indexmap 2.0.2", ] [[package]] @@ -1978,7 +2029,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1989,9 +2040,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.67" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c" dependencies = [ "unicode-ident", ] @@ -2125,9 +2176,9 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" [[package]] name = "quick-xml" -version = "0.28.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1" +checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" dependencies = [ "memchr", "serde", @@ -2183,9 +2234,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.5" +version = "1.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +checksum = "ebee201405406dbf528b8b672104ae6d6d63e6d118cb10e4d51abbc7b58044ff" dependencies = [ "aho-corasick", "memchr", @@ -2195,15 +2246,21 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f96ede7f386ba6e910092e7ccdc04176cface62abebea07ed6b46d870ed95ca2" + [[package]] name = "regex-syntax" version = "0.7.5" @@ -2222,9 +2279,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.20" +version = "0.11.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" +checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" dependencies = [ "base64", "bytes", @@ -2248,6 +2305,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", + "system-configuration", "tokio", "tokio-rustls", "tokio-util", @@ -2299,9 +2357,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.13" +version = "0.38.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662" +checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7" dependencies = [ "bitflags 2.4.0", "errno", @@ -2333,9 +2391,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.5" +version = "0.101.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45a27e3b59326c16e23d30aeb7a36a24cc0d29e71d68ff611cdfb4a01d013bed" +checksum = "3c7d5dece342910d9ba34d259310cae3e0154b873b35408b787b59bce53d34fe" dependencies = [ "ring", "untrusted", @@ -2364,9 +2422,9 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.13" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763f8cd0d4c71ed8389c90cb8100cba87e763bd01a8e614d4f0af97bcd50a161" +checksum = "1f7b0ce13155372a76ee2e1c5ffba1fe61ede73fbea5630d61eee6fac4929c0c" dependencies = [ "dyn-clone", "schemars_derive", @@ -2376,9 +2434,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.13" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0f696e21e10fa546b7ffb1c9672c6de8fbc7a81acf59524386d8639bf12737" +checksum = "e85e2a16b12bdb763244c69ab79363d71db2b4b918a2def53f80b02e0574b13c" dependencies = [ "proc-macro2", "quote", @@ -2404,9 +2462,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +checksum = "ad977052201c6de01a8ef2aa3378c4bd23217a056337d1d6da40468d267a4fb0" [[package]] name = "seq-macro" @@ -2431,7 +2489,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2447,9 +2505,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.106" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" dependencies = [ "itoa", "ryu", @@ -2465,7 +2523,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2486,7 +2544,7 @@ version = "0.9.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" dependencies = [ - "indexmap 2.0.0", + "indexmap 2.0.2", "itoa", "ryu", "serde", @@ -2495,9 +2553,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.7" +version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if", "cpufeatures", @@ -2521,9 +2579,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" [[package]] name = "snafu" @@ -2581,9 +2639,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.37.0" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075" +checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75" dependencies = [ "log", "sqlparser_derive", @@ -2644,14 +2702,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] name = "substrait" -version = "0.13.2" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3b705c631514962d48c0647e3ad2b4d93b7472f5da7bee2d1dc853bf4c61f19" +checksum = "27c3d276b85647003f434bfb7ce3ebaf4c47a711018af9d4350d25763535c239" dependencies = [ "git2", "heck", @@ -2665,7 +2723,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.37", + "syn 2.0.38", "typify", "walkdir", ] @@ -2689,15 +2747,36 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.37" +version = "2.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "target-lexicon" version = "0.12.11" @@ -2719,22 +2798,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.48" +version = "1.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" +checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.48" +version = "1.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" +checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2798,7 +2877,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2813,9 +2892,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d" dependencies = [ "bytes", "futures-core", @@ -2851,7 +2930,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2881,20 +2960,29 @@ dependencies = [ [[package]] name = "typed-builder" -version = "0.14.0" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64cba322cb9b7bc6ca048de49e83918223f35e7a86311267013afff257004870" +checksum = "34085c17941e36627a879208083e25d357243812c30e7d7387c3b954f30ade16" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.38", ] [[package]] name = "typenum" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" @@ -2919,7 +3007,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.37", + "syn 2.0.38", "thiserror", "unicode-ident", ] @@ -2936,7 +3024,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.37", + "syn 2.0.38", "typify-impl", ] @@ -2948,9 +3036,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" @@ -2969,9 +3057,9 @@ checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unindent" @@ -3070,7 +3158,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", "wasm-bindgen-shared", ] @@ -3104,7 +3192,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3174,9 +3262,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] @@ -3281,27 +3369,6 @@ dependencies = [ "lzma-sys", ] -[[package]] -name = "zerocopy" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20707b61725734c595e840fb3704378a0cd2b9c74cc9e6e20724838fc6a1e2f9" -dependencies = [ - "byteorder", - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56097d5b91d711293a42be9289403896b68654625021732067eac7a4ca388a1f" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.37", -] - [[package]] name = "zstd" version = "0.12.4" diff --git a/Cargo.toml b/Cargo.toml index 6545e1c9a..b1e432007 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.19", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { version = "31.0.0", features = ["pyarrow", "avro"] } -datafusion-common = { version = "31.0.0", features = ["pyarrow"] } -datafusion-expr = { version = "31.0.0" } -datafusion-optimizer = { version = "31.0.0" } -datafusion-sql = { version = "31.0.0" } -datafusion-substrait = { version = "31.0.0" } +datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83", features = ["pyarrow", "avro"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83", features = ["pyarrow"] } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83" } +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83" } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83" } prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } diff --git a/src/context.rs b/src/context.rs index 632b179a1..a0361acfc 100644 --- a/src/context.rs +++ b/src/context.rs @@ -42,10 +42,10 @@ use crate::utils::{get_tokio_runtime, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::common::FileCompressionType; +use datafusion::datasource::file_format::file_compression_type::FileCompressionType; use datafusion::datasource::MemTable; use datafusion::datasource::TableProvider; -use datafusion::execution::context::{SessionConfig, SessionContext, TaskContext}; +use datafusion::execution::context::{SessionConfig, SessionContext, SessionState, TaskContext}; use datafusion::execution::disk_manager::DiskManagerConfig; use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool}; use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; @@ -235,8 +235,9 @@ impl PySessionContext { RuntimeConfig::default() }; let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let session_state = SessionState::new_with_config_rt(config, runtime); Ok(PySessionContext { - ctx: SessionContext::with_config_rt(config, runtime), + ctx: SessionContext::new_with_state(session_state), }) } @@ -469,7 +470,7 @@ impl PySessionContext { options.file_extension = file_extension; options.schema = schema.as_ref().map(|x| &x.0); options.file_sort_order = file_sort_order - .unwrap_or(vec![]) + .unwrap_or_default() .into_iter() .map(|e| e.into_iter().map(|f| f.into()).collect()) .collect(); @@ -758,7 +759,7 @@ impl PySessionContext { options.file_extension = file_extension; options.schema = schema.as_ref().map(|x| &x.0); options.file_sort_order = file_sort_order - .unwrap_or(vec![]) + .unwrap_or_default() .into_iter() .map(|e| e.into_iter().map(|f| f.into()).collect()) .collect(); diff --git a/src/expr/literal.rs b/src/expr/literal.rs index 076f89a66..bce987f76 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -50,7 +50,7 @@ macro_rules! extract_scalar_value { impl PyLiteral { /// Get the data type of this literal value fn data_type(&self) -> String { - format!("{}", self.value.get_datatype()) + format!("{}", self.value.data_type()) } pub fn value_f32(&self) -> PyResult> { From 4c7b14c6e4da99a7e52fddaf6fed9ae51ca2c66e Mon Sep 17 00:00:00 2001 From: zhenxing jiang Date: Thu, 12 Oct 2023 10:22:27 -0500 Subject: [PATCH 083/413] add bit_and,bit_or,bit_xor,bool_add,bool_or (#496) * add bit_and,bit_or,bit_xor,bool_add,bool_or * Update datafusion/tests/test_aggregation.py Co-authored-by: Liang-Chi Hsieh --------- Co-authored-by: Liang-Chi Hsieh --- datafusion/tests/test_aggregation.py | 38 ++++++++++++++++++++++++++-- src/functions.rs | 10 ++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/datafusion/tests/test_aggregation.py b/datafusion/tests/test_aggregation.py index 2c8c064b1..0a6c90c32 100644 --- a/datafusion/tests/test_aggregation.py +++ b/datafusion/tests/test_aggregation.py @@ -33,8 +33,9 @@ def df(): pa.array([1, 2, 3]), pa.array([4, 4, 6]), pa.array([9, 8, 5]), + pa.array([True, True, False]), ], - names=["a", "b", "c"], + names=["a", "b", "c", "d"], ) return ctx.create_dataframe([[batch]]) @@ -73,7 +74,7 @@ def test_built_in_aggregation(df): ], ) result = agg_df.collect()[0] - values_a, values_b, values_c = df.collect()[0] + values_a, values_b, values_c, values_d = df.collect()[0] assert result.column(0) == pa.array([2], type=pa.uint64()) assert result.column(1) == pa.array([4]) @@ -125,3 +126,36 @@ def test_built_in_aggregation(df): np.testing.assert_array_almost_equal( result.column(21), np.var(values_c, ddof=1) ) + + +def test_bit_add_or_xor(df): + + df = df.aggregate( + [], + [ + f.bit_and(column("a")), + f.bit_or(column("b")), + f.bit_xor(column("c")), + ], + ) + + result = df.collect() + result = result[0] + assert result.column(0) == pa.array([0]) + assert result.column(1) == pa.array([6]) + assert result.column(2) == pa.array([4]) + + +def test_bool_and_or(df): + + df = df.aggregate( + [], + [ + f.bool_and(column("d")), + f.bool_or(column("d")), + ], + ) + result = df.collect() + result = result[0] + assert result.column(0) == pa.array([False]) + assert result.column(1) == pa.array([True]) diff --git a/src/functions.rs b/src/functions.rs index ef26240fe..eed28154e 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -362,6 +362,11 @@ aggregate_function!(stddev_samp, Stddev); aggregate_function!(var, Variance); aggregate_function!(var_pop, VariancePop); aggregate_function!(var_samp, Variance); +aggregate_function!(bit_and, BitAnd); +aggregate_function!(bit_or, BitOr); +aggregate_function!(bit_xor, BitXor); +aggregate_function!(bool_and, BoolAnd); +aggregate_function!(bool_or, BoolOr); pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(abs))?; @@ -489,6 +494,11 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(var_pop))?; m.add_wrapped(wrap_pyfunction!(var_samp))?; m.add_wrapped(wrap_pyfunction!(window))?; + m.add_wrapped(wrap_pyfunction!(bit_and))?; + m.add_wrapped(wrap_pyfunction!(bit_or))?; + m.add_wrapped(wrap_pyfunction!(bit_xor))?; + m.add_wrapped(wrap_pyfunction!(bool_and))?; + m.add_wrapped(wrap_pyfunction!(bool_or))?; //Binary String Functions m.add_wrapped(wrap_pyfunction!(encode))?; From 804d0eb3160b0debe5beed77209ea497b0f026c7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 12 Oct 2023 10:00:25 -0600 Subject: [PATCH 084/413] Use DataFusion 32 (#515) * use DataFusion 32 * update lock file --- Cargo.lock | 131 +++++++++++++++++++++++++++-------------------------- Cargo.toml | 14 +++--- 2 files changed, 74 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee4eb3f64..f739947f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,9 +38,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] @@ -342,7 +342,7 @@ dependencies = [ "arrow-select", "num", "regex", - "regex-syntax", + "regex-syntax 0.7.5", ] [[package]] @@ -715,8 +715,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7014432223f4d721cb9786cd88bb89e7464e0ba984d4a7f49db7787f5f268674" dependencies = [ "ahash", "apache-avro", @@ -764,8 +765,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3903ed8f102892f17b48efa437f3542159241d41c564f0d1e78efdc5e663aa" dependencies = [ "ahash", "apache-avro", @@ -784,8 +786,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "780b73b2407050e53f51a9781868593f694102c59e622de9a8aafc0343c4f237" dependencies = [ "arrow", "chrono", @@ -804,8 +807,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24c382676338d8caba6c027ba0da47260f65ffedab38fda78f6d8043f607557c" dependencies = [ "ahash", "arrow", @@ -818,8 +822,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f2904a432f795484fd45e29ded4537152adb60f636c05691db34fcd94c92c96" dependencies = [ "arrow", "async-trait", @@ -830,13 +835,14 @@ dependencies = [ "hashbrown 0.14.1", "itertools 0.11.0", "log", - "regex-syntax", + "regex-syntax 0.7.5", ] [[package]] name = "datafusion-physical-expr" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57b4968e9a998dc0476c4db7a82f280e2026b25f464e4aa0c3bb9807ee63ddfd" dependencies = [ "ahash", "arrow", @@ -868,8 +874,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efd0d1fe54e37a47a2d58a1232c22786f2c28ad35805fdcd08f0253a8b0aaa90" dependencies = [ "ahash", "arrow", @@ -898,7 +905,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "31.0.0" +version = "32.0.0" dependencies = [ "async-trait", "datafusion", @@ -916,7 +923,7 @@ dependencies = [ "pyo3", "pyo3-build-config", "rand", - "regex-syntax", + "regex-syntax 0.7.5", "syn 2.0.38", "tokio", "url", @@ -925,8 +932,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b568d44c87ead99604d704f942e257c8a236ee1bbf890ee3e034ad659dcb2c21" dependencies = [ "arrow", "arrow-schema", @@ -938,8 +946,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "31.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=e23d34bae60bb2f9c496241e218bab795af3af83#e23d34bae60bb2f9c496241e218bab795af3af83" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2884dff8207774c1ea5f7b008d44b20e9723bd0b2e4b7dd6627390d8b526b50" dependencies = [ "async-recursion", "chrono", @@ -998,25 +1007,14 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.4" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "add4f07d43996f76ef320709726a556a9d4f965d9410d8d0271132d2f8293480" +checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" dependencies = [ - "errno-dragonfly", "libc", "windows-sys", ] -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "fastrand" version = "2.0.1" @@ -1456,9 +1454,9 @@ checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "jobserver" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" dependencies = [ "libc", ] @@ -1614,9 +1612,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.8" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3852614a3bd9ca9804678ba6be5e3b8ce76dfc902cae004e3e0c44051b6e88db" +checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" [[package]] name = "lock_api" @@ -1800,9 +1798,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", "libm", @@ -1865,9 +1863,9 @@ checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "ordered-float" -version = "2.10.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" dependencies = [ "num-traits", ] @@ -2040,9 +2038,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.68" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" dependencies = [ "unicode-ident", ] @@ -2234,32 +2232,32 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.6" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebee201405406dbf528b8b672104ae6d6d63e6d118cb10e4d51abbc7b58044ff" +checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87" dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax", + "regex-syntax 0.8.1", ] [[package]] name = "regex-automata" -version = "0.3.9" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.1", ] [[package]] name = "regex-lite" -version = "0.1.0" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f96ede7f386ba6e910092e7ccdc04176cface62abebea07ed6b46d870ed95ca2" +checksum = "9a6ebcd15653947e6140f59a9811a06ed061d18a5c35dfca2e2e4c5525696878" [[package]] name = "regex-syntax" @@ -2267,6 +2265,12 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +[[package]] +name = "regex-syntax" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56d84fdd47036b038fc80dd333d10b6aab10d5d31f4a366e20014def75328d33" + [[package]] name = "regress" version = "0.6.0" @@ -2357,9 +2361,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.17" +version = "0.38.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7" +checksum = "5a74ee2d7c2581cd139b42447d7d9389b889bdaad3a73f1ebb16f2a3237bb19c" dependencies = [ "bitflags 2.4.0", "errno", @@ -2462,9 +2466,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad977052201c6de01a8ef2aa3378c4bd23217a056337d1d6da40468d267a4fb0" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" [[package]] name = "seq-macro" @@ -2853,9 +2857,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.32.0" +version = "1.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" dependencies = [ "backtrace", "bytes", @@ -3390,11 +3394,10 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.8+zstd.1.5.5" +version = "2.0.9+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" dependencies = [ "cc", - "libc", "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index b1e432007..8c86b5658 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "31.0.0" +version = "32.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,12 +36,12 @@ protoc = [ "datafusion-substrait/protoc" ] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.19", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83", features = ["pyarrow", "avro"] } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83", features = ["pyarrow"] } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83" } -datafusion-sql = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83" } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion", rev = "e23d34bae60bb2f9c496241e218bab795af3af83" } +datafusion = { version = "32.0.0", features = ["pyarrow", "avro"] } +datafusion-common = { version = "32.0.0", features = ["pyarrow"] } +datafusion-expr = { version = "32.0.0" } +datafusion-optimizer = { version = "32.0.0" } +datafusion-sql = { version = "32.0.0" } +datafusion-substrait = { version = "32.0.0" } prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } From a91188c51019a1f9c190a6596970914763415965 Mon Sep 17 00:00:00 2001 From: zhenxing jiang Date: Fri, 13 Oct 2023 17:18:55 -0500 Subject: [PATCH 085/413] add first_value last_value (#498) Co-authored-by: Andy Grove --- datafusion/tests/test_functions.py | 23 +++++++++++++++++++++++ src/functions.rs | 4 ++++ 2 files changed, 27 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index f1f64c30a..e504cc498 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -479,6 +479,29 @@ def test_case(df): assert result.column(2) == pa.array(["Hola", "Mundo", None]) +def test_first_last_value(df): + df = df.aggregate( + [], + [ + f.first_value(column("a")), + f.first_value(column("b")), + f.first_value(column("d")), + f.last_value(column("a")), + f.last_value(column("b")), + f.last_value(column("d")), + ], + ) + + result = df.collect() + result = result[0] + assert result.column(0) == pa.array(["Hello"]) + assert result.column(1) == pa.array([4]) + assert result.column(2) == pa.array([datetime(2022, 12, 31)]) + assert result.column(3) == pa.array(["!"]) + assert result.column(4) == pa.array([6]) + assert result.column(5) == pa.array([datetime(2020, 7, 2)]) + + def test_binary_string_functions(df): df = df.select( f.encode(column("a"), literal("base64")), diff --git a/src/functions.rs b/src/functions.rs index eed28154e..2f2f34ee0 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -362,6 +362,8 @@ aggregate_function!(stddev_samp, Stddev); aggregate_function!(var, Variance); aggregate_function!(var_pop, VariancePop); aggregate_function!(var_samp, Variance); +aggregate_function!(first_value, FirstValue); +aggregate_function!(last_value, LastValue); aggregate_function!(bit_and, BitAnd); aggregate_function!(bit_or, BitOr); aggregate_function!(bit_xor, BitXor); @@ -494,6 +496,8 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(var_pop))?; m.add_wrapped(wrap_pyfunction!(var_samp))?; m.add_wrapped(wrap_pyfunction!(window))?; + m.add_wrapped(wrap_pyfunction!(first_value))?; + m.add_wrapped(wrap_pyfunction!(last_value))?; m.add_wrapped(wrap_pyfunction!(bit_and))?; m.add_wrapped(wrap_pyfunction!(bit_or))?; m.add_wrapped(wrap_pyfunction!(bit_xor))?; From 484ed1108736d4cfd3073ace3743c511a4764bb1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 14 Oct 2023 14:37:43 -0700 Subject: [PATCH 086/413] build(deps): bump regex-syntax from 0.7.5 to 0.8.1 (#517) Bumps [regex-syntax](https://github.com/rust-lang/regex) from 0.7.5 to 0.8.1. - [Release notes](https://github.com/rust-lang/regex/releases) - [Changelog](https://github.com/rust-lang/regex/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-lang/regex/compare/regex-syntax-0.7.5...regex-syntax-0.8.1) --- updated-dependencies: - dependency-name: regex-syntax dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f739947f0..cb49f1aa3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -923,7 +923,7 @@ dependencies = [ "pyo3", "pyo3-build-config", "rand", - "regex-syntax 0.7.5", + "regex-syntax 0.8.1", "syn 2.0.38", "tokio", "url", diff --git a/Cargo.toml b/Cargo.toml index 8c86b5658..556450cd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ async-trait = "0.1" futures = "0.3" object_store = { version = "0.7.0", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" -regex-syntax = "0.7.1" +regex-syntax = "0.8.1" syn = "2.0.37" url = "2.2" From c4675b76926dbc605735d26fc54dd6e9bd67b83e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 14 Oct 2023 16:18:32 -0700 Subject: [PATCH 087/413] build(deps): bump pyo3-build-config from 0.19.2 to 0.20.0 (#516) Bumps [pyo3-build-config](https://github.com/pyo3/pyo3) from 0.19.2 to 0.20.0. - [Release notes](https://github.com/pyo3/pyo3/releases) - [Changelog](https://github.com/PyO3/pyo3/blob/main/CHANGELOG.md) - [Commits](https://github.com/pyo3/pyo3/compare/v0.19.2...v0.20.0) --- updated-dependencies: - dependency-name: pyo3-build-config dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 16 +++++++++++++--- Cargo.toml | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb49f1aa3..76ea5784c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -921,7 +921,7 @@ dependencies = [ "prost", "prost-types", "pyo3", - "pyo3-build-config", + "pyo3-build-config 0.20.0", "rand", "regex-syntax 0.8.1", "syn 2.0.38", @@ -2117,7 +2117,7 @@ dependencies = [ "libc", "memoffset", "parking_lot", - "pyo3-build-config", + "pyo3-build-config 0.19.2", "pyo3-ffi", "pyo3-macros", "unindent", @@ -2133,6 +2133,16 @@ dependencies = [ "target-lexicon", ] +[[package]] +name = "pyo3-build-config" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96fe70b176a89cff78f2fa7b3c930081e163d5379b4dcdf993e3ae29ca662e5" +dependencies = [ + "once_cell", + "target-lexicon", +] + [[package]] name = "pyo3-ffi" version = "0.19.2" @@ -2140,7 +2150,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" dependencies = [ "libc", - "pyo3-build-config", + "pyo3-build-config 0.19.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 556450cd1..7c1a57bd3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,7 +55,7 @@ syn = "2.0.37" url = "2.2" [build-dependencies] -pyo3-build-config = "0.19.0" +pyo3-build-config = "0.20.0" [lib] name = "datafusion_python" From 5ec45ddd5f3b44a3b39d591e3aa3c6eb8880c5ee Mon Sep 17 00:00:00 2001 From: zhenxing jiang Date: Sun, 15 Oct 2023 11:05:57 -0500 Subject: [PATCH 088/413] add regr_* functions (#499) Co-authored-by: Andy Grove --- datafusion/tests/test_functions.py | 22 ++++++++++++++++++++++ src/functions.rs | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index e504cc498..be2a2f1f5 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -479,6 +479,28 @@ def test_case(df): assert result.column(2) == pa.array(["Hola", "Mundo", None]) +def test_regr_funcs(df): + # test case base on + # https://github.com/apache/arrow-datafusion/blob/d1361d56b9a9e0c165d3d71a8df6795d2a5f51dd/datafusion/core/tests/sqllogictests/test_files/aggregate.slt#L2330 + ctx = SessionContext() + result = ctx.sql( + "select regr_slope(1,1), regr_intercept(1,1), " + "regr_count(1,1), regr_r2(1,1), regr_avgx(1,1), " + "regr_avgy(1,1), regr_sxx(1,1), regr_syy(1,1), " + "regr_sxy(1,1);" + ).collect() + + assert result[0].column(0) == pa.array([None], type=pa.float64()) + assert result[0].column(1) == pa.array([None], type=pa.float64()) + assert result[0].column(2) == pa.array([1], type=pa.float64()) + assert result[0].column(3) == pa.array([None], type=pa.float64()) + assert result[0].column(4) == pa.array([1], type=pa.float64()) + assert result[0].column(5) == pa.array([1], type=pa.float64()) + assert result[0].column(6) == pa.array([0], type=pa.float64()) + assert result[0].column(7) == pa.array([0], type=pa.float64()) + assert result[0].column(8) == pa.array([0], type=pa.float64()) + + def test_first_last_value(df): df = df.aggregate( [], diff --git a/src/functions.rs b/src/functions.rs index 2f2f34ee0..e509aff71 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -362,6 +362,15 @@ aggregate_function!(stddev_samp, Stddev); aggregate_function!(var, Variance); aggregate_function!(var_pop, VariancePop); aggregate_function!(var_samp, Variance); +aggregate_function!(regr_avgx, RegrAvgx); +aggregate_function!(regr_avgy, RegrAvgy); +aggregate_function!(regr_count, RegrCount); +aggregate_function!(regr_intercept, RegrIntercept); +aggregate_function!(regr_r2, RegrR2); +aggregate_function!(regr_slope, RegrSlope); +aggregate_function!(regr_sxx, RegrSXX); +aggregate_function!(regr_sxy, RegrSXY); +aggregate_function!(regr_syy, RegrSYY); aggregate_function!(first_value, FirstValue); aggregate_function!(last_value, LastValue); aggregate_function!(bit_and, BitAnd); @@ -496,6 +505,15 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(var_pop))?; m.add_wrapped(wrap_pyfunction!(var_samp))?; m.add_wrapped(wrap_pyfunction!(window))?; + m.add_wrapped(wrap_pyfunction!(regr_avgx))?; + m.add_wrapped(wrap_pyfunction!(regr_avgy))?; + m.add_wrapped(wrap_pyfunction!(regr_count))?; + m.add_wrapped(wrap_pyfunction!(regr_intercept))?; + m.add_wrapped(wrap_pyfunction!(regr_r2))?; + m.add_wrapped(wrap_pyfunction!(regr_slope))?; + m.add_wrapped(wrap_pyfunction!(regr_sxx))?; + m.add_wrapped(wrap_pyfunction!(regr_sxy))?; + m.add_wrapped(wrap_pyfunction!(regr_syy))?; m.add_wrapped(wrap_pyfunction!(first_value))?; m.add_wrapped(wrap_pyfunction!(last_value))?; m.add_wrapped(wrap_pyfunction!(bit_and))?; From 399fa758ccb1dc785929123350144902a9b6c502 Mon Sep 17 00:00:00 2001 From: Dan Lovell Date: Tue, 17 Oct 2023 16:59:41 -0400 Subject: [PATCH 089/413] feat: expose PyWindowFrame (#509) * feat: expose PyWindowFrame * fix: PyWindowFrame: return Err instead of panicking * test: test PyWindowFrame creation --- datafusion/__init__.py | 2 + datafusion/tests/test_dataframe.py | 41 ++++++++++- src/functions.rs | 20 +++++- src/lib.rs | 2 + src/udaf.rs | 30 +++++++- src/window_frame.rs | 110 +++++++++++++++++++++++++++++ 6 files changed, 200 insertions(+), 5 deletions(-) create mode 100644 src/window_frame.rs diff --git a/datafusion/__init__.py b/datafusion/__init__.py index bb1beacd9..4a495b468 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -33,6 +33,7 @@ SessionConfig, RuntimeConfig, ScalarUDF, + WindowFrame, ) from .common import ( @@ -98,6 +99,7 @@ "Expr", "AggregateUDF", "ScalarUDF", + "WindowFrame", "column", "literal", "TableScan", diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index ce7d89e7b..c9b0f076f 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -21,7 +21,14 @@ import pytest from datafusion import functions as f -from datafusion import DataFrame, SessionContext, column, literal, udf +from datafusion import ( + DataFrame, + SessionContext, + WindowFrame, + column, + literal, + udf, +) @pytest.fixture @@ -304,6 +311,38 @@ def test_window_functions(df): assert table.sort_by("a").to_pydict() == expected +@pytest.mark.parametrize( + ("units", "start_bound", "end_bound"), + [ + (units, start_bound, end_bound) + for units in ("rows", "range") + for start_bound in (None, 0, 1) + for end_bound in (None, 0, 1) + ] + + [ + ("groups", 0, 0), + ], +) +def test_valid_window_frame(units, start_bound, end_bound): + WindowFrame(units, start_bound, end_bound) + + +@pytest.mark.parametrize( + ("units", "start_bound", "end_bound"), + [ + ("invalid-units", 0, None), + ("invalid-units", None, 0), + ("invalid-units", None, None), + ("groups", None, 0), + ("groups", 0, None), + ("groups", None, None), + ], +) +def test_invalid_window_frame(units, start_bound, end_bound): + with pytest.raises(RuntimeError): + WindowFrame(units, start_bound, end_bound) + + def test_get_dataframe(tmp_path): ctx = SessionContext() diff --git a/src/functions.rs b/src/functions.rs index e509aff71..42203d7b4 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -17,9 +17,12 @@ use pyo3::{prelude::*, wrap_pyfunction}; +use crate::context::PySessionContext; use crate::errors::DataFusionError; use crate::expr::conditional_expr::PyCaseBuilder; use crate::expr::PyExpr; +use crate::window_frame::PyWindowFrame; +use datafusion::execution::FunctionRegistry; use datafusion_common::Column; use datafusion_expr::expr::Alias; use datafusion_expr::{ @@ -27,7 +30,7 @@ use datafusion_expr::{ expr::{AggregateFunction, ScalarFunction, Sort, WindowFunction}, lit, window_function::find_df_window_func, - BuiltinScalarFunction, Expr, WindowFrame, + BuiltinScalarFunction, Expr, }; #[pyfunction] @@ -130,13 +133,24 @@ fn window( args: Vec, partition_by: Option>, order_by: Option>, + window_frame: Option, + ctx: Option, ) -> PyResult { - let fun = find_df_window_func(name); + let fun = find_df_window_func(name).or_else(|| { + ctx.and_then(|ctx| { + ctx.ctx + .udaf(name) + .map(|fun| datafusion_expr::WindowFunction::AggregateUDF(fun)) + .ok() + }) + }); if fun.is_none() { return Err(DataFusionError::Common("window function not found".to_string()).into()); } let fun = fun.unwrap(); - let window_frame = WindowFrame::new(order_by.is_some()); + let window_frame = window_frame + .unwrap_or_else(|| PyWindowFrame::new("rows", None, Some(0)).unwrap()) + .into(); Ok(PyExpr { expr: datafusion_expr::Expr::WindowFunction(WindowFunction { fun, diff --git a/src/lib.rs b/src/lib.rs index 2512aefa4..b9bd5766e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,6 +54,7 @@ mod udaf; #[allow(clippy::borrow_deref_ref)] mod udf; pub mod utils; +mod window_frame; #[cfg(feature = "mimalloc")] #[global_allocator] @@ -83,6 +84,7 @@ fn _internal(py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/udaf.rs b/src/udaf.rs index 3b70aeb06..6450f03fe 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -17,7 +17,7 @@ use std::sync::Arc; -use pyo3::{prelude::*, types::PyTuple}; +use pyo3::{prelude::*, types::PyBool, types::PyTuple}; use datafusion::arrow::array::{Array, ArrayRef}; use datafusion::arrow::datatypes::DataType; @@ -93,6 +93,34 @@ impl Accumulator for RustAccumulator { fn size(&self) -> usize { std::mem::size_of_val(self) } + + fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + Python::with_gil(|py| { + // 1. cast args to Pyarrow array + let py_args = values + .iter() + .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) + .collect::>(); + let py_args = PyTuple::new(py, py_args); + + // 2. call function + self.accum + .as_ref(py) + .call_method1("retract_batch", py_args) + .map_err(|e| DataFusionError::Execution(format!("{e}")))?; + + Ok(()) + }) + } + + fn supports_retract_batch(&self) -> bool { + Python::with_gil(|py| { + let x: Result<&PyAny, PyErr> = + self.accum.as_ref(py).call_method0("supports_retract_batch"); + let x: &PyAny = x.unwrap_or(PyBool::new(py, false)); + x.extract().unwrap_or(false) + }) + } } pub fn to_rust_accumulator(accum: PyObject) -> AccumulatorFactoryFunction { diff --git a/src/window_frame.rs b/src/window_frame.rs new file mode 100644 index 000000000..b8f414e6a --- /dev/null +++ b/src/window_frame.rs @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::{DataFusionError, ScalarValue}; +use datafusion_expr::window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits}; +use pyo3::prelude::*; +use std::fmt::{Display, Formatter}; + +use crate::errors::py_datafusion_err; + +#[pyclass(name = "WindowFrame", module = "datafusion", subclass)] +#[derive(Clone)] +pub struct PyWindowFrame { + frame: WindowFrame, +} + +impl From for WindowFrame { + fn from(frame: PyWindowFrame) -> Self { + frame.frame + } +} + +impl From for PyWindowFrame { + fn from(frame: WindowFrame) -> PyWindowFrame { + PyWindowFrame { frame } + } +} + +impl Display for PyWindowFrame { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "OVER ({} BETWEEN {} AND {})", + self.frame.units, self.frame.start_bound, self.frame.end_bound + ) + } +} + +#[pymethods] +impl PyWindowFrame { + #[new(unit, start_bound, end_bound)] + pub fn new(units: &str, start_bound: Option, end_bound: Option) -> PyResult { + let units = units.to_ascii_lowercase(); + let units = match units.as_str() { + "rows" => WindowFrameUnits::Rows, + "range" => WindowFrameUnits::Range, + "groups" => WindowFrameUnits::Groups, + _ => { + return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( + "{:?}", + units, + )))); + } + }; + let start_bound = match start_bound { + Some(start_bound) => { + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(start_bound))) + } + None => match units { + WindowFrameUnits::Range => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), + WindowFrameUnits::Rows => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), + WindowFrameUnits::Groups => { + return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( + "{:?}", + units, + )))); + } + }, + }; + let end_bound = match end_bound { + Some(end_bound) => WindowFrameBound::Following(ScalarValue::UInt64(Some(end_bound))), + None => match units { + WindowFrameUnits::Rows => WindowFrameBound::Following(ScalarValue::UInt64(None)), + WindowFrameUnits::Range => WindowFrameBound::Following(ScalarValue::UInt64(None)), + WindowFrameUnits::Groups => { + return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( + "{:?}", + units, + )))); + } + }, + }; + Ok(PyWindowFrame { + frame: WindowFrame { + units, + start_bound, + end_bound, + }, + }) + } + + /// Get a String representation of this window frame + fn __repr__(&self) -> String { + format!("{}", self) + } +} From c2768d8f9d558f3a5e042f1e934c02f945ede797 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 18 Oct 2023 11:40:26 -0400 Subject: [PATCH 090/413] Add random missing bindings (#522) --- src/expr/aggregate.rs | 49 +++++++++++++++++++++++++++++++++++++++++++ src/expr/join.rs | 14 +++++++++++++ src/expr/sort.rs | 4 ++++ 3 files changed, 67 insertions(+) diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index c3de9673a..5ebf8c6cf 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -16,12 +16,15 @@ // under the License. use datafusion_common::DataFusionError; +use datafusion_expr::expr::{AggregateFunction, AggregateUDF, Alias}; use datafusion_expr::logical_plan::Aggregate; +use datafusion_expr::Expr; use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; use super::logical_node::LogicalNode; use crate::common::df_schema::PyDFSchema; +use crate::errors::py_type_err; use crate::expr::PyExpr; use crate::sql::logical::PyLogicalPlan; @@ -84,6 +87,24 @@ impl PyAggregate { .collect()) } + /// Returns the inner Aggregate Expr(s) + pub fn agg_expressions(&self) -> PyResult> { + Ok(self + .aggregate + .aggr_expr + .iter() + .map(|e| PyExpr::from(e.clone())) + .collect()) + } + + pub fn agg_func_name(&self, expr: PyExpr) -> PyResult { + Self::_agg_func_name(&expr.expr) + } + + pub fn aggregation_arguments(&self, expr: PyExpr) -> PyResult> { + self._aggregation_arguments(&expr.expr) + } + // Retrieves the input `LogicalPlan` to this `Aggregate` node fn input(&self) -> PyResult> { Ok(Self::inputs(self)) @@ -99,6 +120,34 @@ impl PyAggregate { } } +impl PyAggregate { + #[allow(clippy::only_used_in_recursion)] + fn _aggregation_arguments(&self, expr: &Expr) -> PyResult> { + match expr { + // TODO: This Alias logic seems to be returning some strange results that we should investigate + Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()), + Expr::AggregateFunction(AggregateFunction { fun: _, args, .. }) + | Expr::AggregateUDF(AggregateUDF { fun: _, args, .. }) => { + Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()) + } + _ => Err(py_type_err( + "Encountered a non Aggregate type in aggregation_arguments", + )), + } + } + + fn _agg_func_name(expr: &Expr) -> PyResult { + match expr { + Expr::Alias(Alias { expr, .. }) => Self::_agg_func_name(expr.as_ref()), + Expr::AggregateFunction(AggregateFunction { fun, .. }) => Ok(fun.to_string()), + Expr::AggregateUDF(AggregateUDF { fun, .. }) => Ok(fun.name.clone()), + _ => Err(py_type_err( + "Encountered a non Aggregate type in agg_func_name", + )), + } + } +} + impl LogicalNode for PyAggregate { fn inputs(&self) -> Vec { vec![PyLogicalPlan::from((*self.aggregate.input).clone())] diff --git a/src/expr/join.rs b/src/expr/join.rs index 801662962..a53ddd3ba 100644 --- a/src/expr/join.rs +++ b/src/expr/join.rs @@ -46,6 +46,10 @@ impl PyJoinType { pub fn is_outer(&self) -> bool { self.join_type.is_outer() } + + fn __repr__(&self) -> PyResult { + Ok(format!("{}", self.join_type)) + } } impl Display for PyJoinType { @@ -72,6 +76,16 @@ impl From for JoinConstraint { } } +#[pymethods] +impl PyJoinConstraint { + fn __repr__(&self) -> PyResult { + match self.join_constraint { + JoinConstraint::On => Ok("On".to_string()), + JoinConstraint::Using => Ok("Using".to_string()), + } + } +} + #[pyclass(name = "Join", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyJoin { diff --git a/src/expr/sort.rs b/src/expr/sort.rs index 8843c638d..f9f9e5899 100644 --- a/src/expr/sort.rs +++ b/src/expr/sort.rs @@ -72,6 +72,10 @@ impl PySort { .collect()) } + fn get_fetch_val(&self) -> PyResult> { + Ok(self.sort.fetch) + } + /// Retrieves the input `LogicalPlan` to this `Sort` node fn input(&self) -> PyResult> { Ok(Self::inputs(self)) From 59140f29e524684ee2250260308b40af932a97c1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 18 Oct 2023 12:29:57 -0700 Subject: [PATCH 091/413] build(deps): bump rustix from 0.38.18 to 0.38.19 (#523) Bumps [rustix](https://github.com/bytecodealliance/rustix) from 0.38.18 to 0.38.19. - [Release notes](https://github.com/bytecodealliance/rustix/releases) - [Commits](https://github.com/bytecodealliance/rustix/compare/v0.38.18...v0.38.19) --- updated-dependencies: - dependency-name: rustix dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 76ea5784c..0495b7e1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2371,9 +2371,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.18" +version = "0.38.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a74ee2d7c2581cd139b42447d7d9389b889bdaad3a73f1ebb16f2a3237bb19c" +checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" dependencies = [ "bitflags 2.4.0", "errno", From 501acfff1b82cda7e4892a57deb3ab2cb83ea2b4 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sat, 21 Oct 2023 08:47:04 -0400 Subject: [PATCH 092/413] Allow for multiple input files per table instead of a single file (#519) --- datafusion/input/location.py | 10 +++++----- datafusion/tests/test_input.py | 2 +- src/common/schema.rs | 16 ++++++++-------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/datafusion/input/location.py b/datafusion/input/location.py index efbc82f23..939c7f415 100644 --- a/datafusion/input/location.py +++ b/datafusion/input/location.py @@ -16,6 +16,7 @@ # under the License. import os +import glob from typing import Any from datafusion.common import DataTypeMap, SqlTable @@ -41,14 +42,12 @@ def build_table( format = extension.lstrip(".").lower() num_rows = 0 # Total number of rows in the file. Used for statistics columns = [] - if format == "parquet": import pyarrow.parquet as pq # Read the Parquet metadata metadata = pq.read_metadata(input_file) num_rows = metadata.num_rows - # Iterate through the schema and build the SqlTable for col in metadata.schema: columns.append( @@ -57,7 +56,6 @@ def build_table( DataTypeMap.from_parquet_type_str(col.physical_type), ) ) - elif format == "csv": import csv @@ -73,7 +71,6 @@ def build_table( print(header_row) for _ in reader: num_rows += 1 - # TODO: Need to actually consume this row into resonable columns raise RuntimeError( "TODO: Currently unable to support CSV input files." @@ -84,4 +81,7 @@ def build_table( Only Parquet and CSV." ) - return SqlTable(table_name, columns, num_rows, input_file) + # Input could possibly be multiple files. Create a list if so + input_files = glob.glob(input_file) + + return SqlTable(table_name, columns, num_rows, input_files) diff --git a/datafusion/tests/test_input.py b/datafusion/tests/test_input.py index 1e2ef4166..5b1decf26 100644 --- a/datafusion/tests/test_input.py +++ b/datafusion/tests/test_input.py @@ -30,4 +30,4 @@ def test_location_input(): tbl = location_input.build_table(input_file, table_name) assert "blog" == tbl.name assert 3 == len(tbl.columns) - assert "blogs.parquet" in tbl.filepath + assert "blogs.parquet" in tbl.filepaths[0] diff --git a/src/common/schema.rs b/src/common/schema.rs index a003d0ca1..77b0ce2ba 100644 --- a/src/common/schema.rs +++ b/src/common/schema.rs @@ -56,7 +56,7 @@ pub struct SqlTable { #[pyo3(get, set)] pub statistics: SqlStatistics, #[pyo3(get, set)] - pub filepath: Option, + pub filepaths: Option>, } #[pymethods] @@ -66,7 +66,7 @@ impl SqlTable { table_name: String, columns: Vec<(String, DataTypeMap)>, row_count: f64, - filepath: Option, + filepaths: Option>, ) -> Self { Self { name: table_name, @@ -76,7 +76,7 @@ impl SqlTable { indexes: Vec::new(), constraints: Vec::new(), statistics: SqlStatistics::new(row_count), - filepath, + filepaths, } } } @@ -124,7 +124,7 @@ impl SqlSchema { pub struct SqlTableSource { schema: SchemaRef, statistics: Option, - filepath: Option, + filepaths: Option>, } impl SqlTableSource { @@ -132,12 +132,12 @@ impl SqlTableSource { pub fn new( schema: SchemaRef, statistics: Option, - filepath: Option, + filepaths: Option>, ) -> Self { Self { schema, statistics, - filepath, + filepaths, } } @@ -148,8 +148,8 @@ impl SqlTableSource { /// Access optional filepath associated with this table source #[allow(dead_code)] - pub fn filepath(&self) -> Option<&String> { - self.filepath.as_ref() + pub fn filepaths(&self) -> Option<&Vec> { + self.filepaths.as_ref() } } From c6a7af590191c7c71e805df826e5ce8535a6f1b9 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sat, 21 Oct 2023 13:21:07 -0400 Subject: [PATCH 093/413] Add support for window function bindings (#521) * Add support for window function bindings * Refactor PyWindow to be moved to Expr submodule * Adjust Expr module imports, moved WindowFrame to expr * Update src/expr/window.rs --------- Co-authored-by: Andy Grove --- Cargo.lock | 117 +++++++++------- datafusion/__init__.py | 4 +- src/common/data_type.rs | 18 +++ src/expr.rs | 13 ++ src/expr/window.rs | 294 ++++++++++++++++++++++++++++++++++++++++ src/functions.rs | 2 +- src/lib.rs | 2 - src/sql/logical.rs | 8 +- src/window_frame.rs | 110 --------------- 9 files changed, 403 insertions(+), 165 deletions(-) create mode 100644 src/expr/window.rs delete mode 100644 src/window_frame.rs diff --git a/Cargo.lock b/Cargo.lock index 0495b7e1d..4ae3ec1f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -107,12 +107,12 @@ dependencies = [ "serde_json", "snap", "strum 0.25.0", - "strum_macros 0.25.2", + "strum_macros 0.25.3", "thiserror", "typed-builder", "uuid", "xz2", - "zstd", + "zstd 0.12.4", ] [[package]] @@ -312,7 +312,7 @@ version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d1d179c117b158853e0101bfbed5615e86fe97ee356b4af901f1c5001e1ce4b" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", ] [[package]] @@ -347,9 +347,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c" +checksum = "f658e2baef915ba0f26f1f7c42bfb8e12f532a01f449a090ded75ae7a07e9ba2" dependencies = [ "bzip2", "flate2", @@ -359,8 +359,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd", - "zstd-safe", + "zstd 0.13.0", + "zstd-safe 7.0.0", ] [[package]] @@ -376,9 +376,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.73" +version = "0.1.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" +checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", @@ -429,9 +429,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "blake2" @@ -760,7 +760,7 @@ dependencies = [ "url", "uuid", "xz2", - "zstd", + "zstd 0.12.4", ] [[package]] @@ -817,7 +817,7 @@ dependencies = [ "datafusion-common", "sqlparser", "strum 0.25.0", - "strum_macros 0.25.2", + "strum_macros 0.25.3", ] [[package]] @@ -923,7 +923,7 @@ dependencies = [ "pyo3", "pyo3-build-config 0.20.0", "rand", - "regex-syntax 0.8.1", + "regex-syntax 0.8.2", "syn 2.0.38", "tokio", "url", @@ -1039,9 +1039,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", @@ -1184,7 +1184,7 @@ version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", "libc", "libgit2-sys", "log", @@ -1359,16 +1359,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows", + "windows-core", ] [[package]] @@ -1924,7 +1924,7 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd", + "zstd 0.12.4", ] [[package]] @@ -2242,32 +2242,32 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.0" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.1", + "regex-syntax 0.8.2", ] [[package]] name = "regex-automata" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.1", + "regex-syntax 0.8.2", ] [[package]] name = "regex-lite" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a6ebcd15653947e6140f59a9811a06ed061d18a5c35dfca2e2e4c5525696878" +checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" [[package]] name = "regex-syntax" @@ -2277,9 +2277,9 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "regex-syntax" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56d84fdd47036b038fc80dd333d10b6aab10d5d31f4a366e20014def75328d33" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "regress" @@ -2375,7 +2375,7 @@ version = "0.38.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", @@ -2488,18 +2488,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.188" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" dependencies = [ "proc-macro2", "quote", @@ -2690,7 +2690,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "strum_macros 0.25.2", + "strum_macros 0.25.3", ] [[package]] @@ -2708,9 +2708,9 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.25.2" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck", "proc-macro2", @@ -2926,11 +2926,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "ee2ef2af84856a50c1d430afce2fdded0a4ec7eda868db86409b4543df0797f9" dependencies = [ - "cfg-if", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -2938,9 +2937,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", @@ -2949,9 +2948,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", ] @@ -3290,10 +3289,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows" -version = "0.48.0" +name = "windows-core" +version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" dependencies = [ "windows-targets", ] @@ -3389,7 +3388,16 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" dependencies = [ - "zstd-safe", + "zstd-safe 6.0.6", +] + +[[package]] +name = "zstd" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +dependencies = [ + "zstd-safe 7.0.0", ] [[package]] @@ -3402,6 +3410,15 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.9+zstd.1.5.5" diff --git a/datafusion/__init__.py b/datafusion/__init__.py index 4a495b468..c854f3f9d 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -33,7 +33,6 @@ SessionConfig, RuntimeConfig, ScalarUDF, - WindowFrame, ) from .common import ( @@ -86,6 +85,8 @@ DropTable, Repartition, Partitioning, + Window, + WindowFrame, ) __version__ = importlib_metadata.version(__name__) @@ -99,6 +100,7 @@ "Expr", "AggregateUDF", "ScalarUDF", + "Window", "WindowFrame", "column", "literal", diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 078b8c841..405a5632d 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -329,6 +329,7 @@ impl DataTypeMap { } "float" => Ok(DataType::Float32), "double" => Ok(DataType::Float64), + "byte_array" => Ok(DataType::Utf8), _ => Err(PyValueError::new_err(format!( "Unable to determine Arrow Data Type from Parquet String type: {:?}", parquet_str_type @@ -604,13 +605,30 @@ impl PyDataType { /// is presented as a String rather than an actual DataType. This function is used to /// convert that String to a DataType for the Python side to use. pub fn py_map_from_arrow_type_str(arrow_str_type: String) -> PyResult { + // Certain string types contain "metadata" that should be trimmed here. Ex: "datetime64[ns, Europe/Berlin]" + let arrow_str_type = match arrow_str_type.find('[') { + Some(index) => arrow_str_type[0..index].to_string(), + None => arrow_str_type, // Return early if ',' is not found. + }; + let arrow_dtype = match arrow_str_type.to_lowercase().as_str() { + "bool" => Ok(DataType::Boolean), "boolean" => Ok(DataType::Boolean), + "uint8" => Ok(DataType::UInt8), + "uint16" => Ok(DataType::UInt16), + "uint32" => Ok(DataType::UInt32), + "uint64" => Ok(DataType::UInt64), + "int8" => Ok(DataType::Int8), + "int16" => Ok(DataType::Int16), "int32" => Ok(DataType::Int32), "int64" => Ok(DataType::Int64), "float" => Ok(DataType::Float32), "double" => Ok(DataType::Float64), + "float16" => Ok(DataType::Float16), + "float32" => Ok(DataType::Float32), "float64" => Ok(DataType::Float64), + "datetime64" => Ok(DataType::Date64), + "object" => Ok(DataType::Utf8), _ => Err(PyValueError::new_err(format!( "Unable to determine Arrow Data Type from Arrow String type: {:?}", arrow_str_type diff --git a/src/expr.rs b/src/expr.rs index ecf8fae32..e502edced 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -92,6 +92,7 @@ pub mod subquery; pub mod subquery_alias; pub mod table_scan; pub mod union; +pub mod window; /// A PyExpr that can be used on a DataFrame #[pyclass(name = "Expr", module = "datafusion.expr", subclass)] @@ -112,6 +113,11 @@ impl From for PyExpr { } } +/// Convert a list of DataFusion Expr to PyExpr +pub fn py_expr_list(expr: &[Expr]) -> PyResult> { + Ok(expr.iter().map(|e| PyExpr::from(e.clone())).collect()) +} + #[pymethods] impl PyExpr { /// Return the specific expression @@ -542,6 +548,10 @@ impl PyExpr { // appear in projections) so we just delegate to the contained expression instead Self::expr_to_field(expr, input_plan) } + Expr::Wildcard => { + // Since * could be any of the valid column names just return the first one + Ok(input_plan.schema().field(0).clone()) + } _ => { let fields = exprlist_to_fields(&[expr.clone()], input_plan).map_err(PyErr::from)?; @@ -652,5 +662,8 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/expr/window.rs b/src/expr/window.rs new file mode 100644 index 000000000..6583c97af --- /dev/null +++ b/src/expr/window.rs @@ -0,0 +1,294 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::{DataFusionError, ScalarValue}; +use datafusion_expr::expr::WindowFunction; +use datafusion_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits}; +use pyo3::prelude::*; +use std::fmt::{self, Display, Formatter}; + +use crate::common::df_schema::PyDFSchema; +use crate::errors::py_type_err; +use crate::expr::logical_node::LogicalNode; +use crate::expr::PyExpr; +use crate::sql::logical::PyLogicalPlan; + +use super::py_expr_list; + +use crate::errors::py_datafusion_err; + +#[pyclass(name = "Window", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyWindow { + window: Window, +} + +#[pyclass(name = "WindowFrame", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyWindowFrame { + window_frame: WindowFrame, +} + +impl From for WindowFrame { + fn from(window_frame: PyWindowFrame) -> Self { + window_frame.window_frame + } +} + +impl From for PyWindowFrame { + fn from(window_frame: WindowFrame) -> PyWindowFrame { + PyWindowFrame { window_frame } + } +} + +#[pyclass(name = "WindowFrameBound", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyWindowFrameBound { + frame_bound: WindowFrameBound, +} + +impl From for Window { + fn from(window: PyWindow) -> Window { + window.window + } +} + +impl From for PyWindow { + fn from(window: Window) -> PyWindow { + PyWindow { window } + } +} + +impl From for PyWindowFrameBound { + fn from(frame_bound: WindowFrameBound) -> Self { + PyWindowFrameBound { frame_bound } + } +} + +impl Display for PyWindow { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "Over\n + Window Expr: {:?} + Schema: {:?}", + &self.window.window_expr, &self.window.schema + ) + } +} + +impl Display for PyWindowFrame { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "OVER ({} BETWEEN {} AND {})", + self.window_frame.units, self.window_frame.start_bound, self.window_frame.end_bound + ) + } +} + +#[pymethods] +impl PyWindow { + /// Returns the schema of the Window + pub fn schema(&self) -> PyResult { + Ok(self.window.schema.as_ref().clone().into()) + } + + /// Returns window expressions + pub fn get_window_expr(&self) -> PyResult> { + py_expr_list(&self.window.window_expr) + } + + /// Returns order by columns in a window function expression + pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult> { + match expr.expr.unalias() { + Expr::WindowFunction(WindowFunction { order_by, .. }) => py_expr_list(&order_by), + other => Err(not_window_function_err(other)), + } + } + + /// Return partition by columns in a window function expression + pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult> { + match expr.expr.unalias() { + Expr::WindowFunction(WindowFunction { partition_by, .. }) => { + py_expr_list(&partition_by) + } + other => Err(not_window_function_err(other)), + } + } + + /// Return input args for window function + pub fn get_args(&self, expr: PyExpr) -> PyResult> { + match expr.expr.unalias() { + Expr::WindowFunction(WindowFunction { args, .. }) => py_expr_list(&args), + other => Err(not_window_function_err(other)), + } + } + + /// Return window function name + pub fn window_func_name(&self, expr: PyExpr) -> PyResult { + match expr.expr.unalias() { + Expr::WindowFunction(WindowFunction { fun, .. }) => Ok(fun.to_string()), + other => Err(not_window_function_err(other)), + } + } + + /// Returns a Pywindow frame for a given window function expression + pub fn get_frame(&self, expr: PyExpr) -> Option { + match expr.expr.unalias() { + Expr::WindowFunction(WindowFunction { window_frame, .. }) => Some(window_frame.into()), + _ => None, + } + } +} + +fn not_window_function_err(expr: Expr) -> PyErr { + py_type_err(format!( + "Provided {} Expr {:?} is not a WindowFunction type", + expr.variant_name(), + expr + )) +} + +#[pymethods] +impl PyWindowFrame { + #[new(unit, start_bound, end_bound)] + pub fn new(units: &str, start_bound: Option, end_bound: Option) -> PyResult { + let units = units.to_ascii_lowercase(); + let units = match units.as_str() { + "rows" => WindowFrameUnits::Rows, + "range" => WindowFrameUnits::Range, + "groups" => WindowFrameUnits::Groups, + _ => { + return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( + "{:?}", + units, + )))); + } + }; + let start_bound = match start_bound { + Some(start_bound) => { + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(start_bound))) + } + None => match units { + WindowFrameUnits::Range => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), + WindowFrameUnits::Rows => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), + WindowFrameUnits::Groups => { + return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( + "{:?}", + units, + )))); + } + }, + }; + let end_bound = match end_bound { + Some(end_bound) => WindowFrameBound::Following(ScalarValue::UInt64(Some(end_bound))), + None => match units { + WindowFrameUnits::Rows => WindowFrameBound::Following(ScalarValue::UInt64(None)), + WindowFrameUnits::Range => WindowFrameBound::Following(ScalarValue::UInt64(None)), + WindowFrameUnits::Groups => { + return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( + "{:?}", + units, + )))); + } + }, + }; + Ok(PyWindowFrame { + window_frame: WindowFrame { + units, + start_bound, + end_bound, + }, + }) + } + + /// Returns the window frame units for the bounds + pub fn get_frame_units(&self) -> PyResult { + Ok(self.window_frame.units.to_string()) + } + /// Returns starting bound + pub fn get_lower_bound(&self) -> PyResult { + Ok(self.window_frame.start_bound.clone().into()) + } + /// Returns end bound + pub fn get_upper_bound(&self) -> PyResult { + Ok(self.window_frame.end_bound.clone().into()) + } + + /// Get a String representation of this window frame + fn __repr__(&self) -> String { + format!("{}", self) + } +} + +#[pymethods] +impl PyWindowFrameBound { + /// Returns if the frame bound is current row + pub fn is_current_row(&self) -> bool { + matches!(self.frame_bound, WindowFrameBound::CurrentRow) + } + + /// Returns if the frame bound is preceding + pub fn is_preceding(&self) -> bool { + matches!(self.frame_bound, WindowFrameBound::Preceding(_)) + } + + /// Returns if the frame bound is following + pub fn is_following(&self) -> bool { + matches!(self.frame_bound, WindowFrameBound::Following(_)) + } + /// Returns the offset of the window frame + pub fn get_offset(&self) -> PyResult> { + match &self.frame_bound { + WindowFrameBound::Preceding(val) | WindowFrameBound::Following(val) => match val { + x if x.is_null() => Ok(None), + ScalarValue::UInt64(v) => Ok(*v), + // The cast below is only safe because window bounds cannot be negative + ScalarValue::Int64(v) => Ok(v.map(|n| n as u64)), + ScalarValue::Utf8(Some(s)) => match s.parse::() { + Ok(s) => Ok(Some(s)), + Err(_e) => Err(DataFusionError::Plan(format!( + "Unable to parse u64 from Utf8 value '{s}'" + )) + .into()), + }, + ref x => { + Err(DataFusionError::Plan(format!("Unexpected window frame bound: {x}")).into()) + } + }, + WindowFrameBound::CurrentRow => Ok(None), + } + } + /// Returns if the frame bound is unbounded + pub fn is_unbounded(&self) -> PyResult { + match &self.frame_bound { + WindowFrameBound::Preceding(v) | WindowFrameBound::Following(v) => Ok(v.is_null()), + WindowFrameBound::CurrentRow => Ok(false), + } + } +} + +impl LogicalNode for PyWindow { + fn inputs(&self) -> Vec { + vec![self.window.input.as_ref().clone().into()] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} diff --git a/src/functions.rs b/src/functions.rs index 42203d7b4..be903609c 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -20,8 +20,8 @@ use pyo3::{prelude::*, wrap_pyfunction}; use crate::context::PySessionContext; use crate::errors::DataFusionError; use crate::expr::conditional_expr::PyCaseBuilder; +use crate::expr::window::PyWindowFrame; use crate::expr::PyExpr; -use crate::window_frame::PyWindowFrame; use datafusion::execution::FunctionRegistry; use datafusion_common::Column; use datafusion_expr::expr::Alias; diff --git a/src/lib.rs b/src/lib.rs index b9bd5766e..2512aefa4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,7 +54,6 @@ mod udaf; #[allow(clippy::borrow_deref_ref)] mod udf; pub mod utils; -mod window_frame; #[cfg(feature = "mimalloc")] #[global_allocator] @@ -84,7 +83,6 @@ fn _internal(py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 2183155bf..3aa8a699f 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -20,17 +20,20 @@ use std::sync::Arc; use crate::errors::py_unsupported_variant_err; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; +use crate::expr::cross_join::PyCrossJoin; use crate::expr::distinct::PyDistinct; use crate::expr::empty_relation::PyEmptyRelation; use crate::expr::explain::PyExplain; use crate::expr::extension::PyExtension; use crate::expr::filter::PyFilter; +use crate::expr::join::PyJoin; use crate::expr::limit::PyLimit; use crate::expr::projection::PyProjection; use crate::expr::sort::PySort; use crate::expr::subquery::PySubquery; use crate::expr::subquery_alias::PySubqueryAlias; use crate::expr::table_scan::PyTableScan; +use crate::expr::window::PyWindow; use datafusion_expr::LogicalPlan; use pyo3::prelude::*; @@ -62,17 +65,20 @@ impl PyLogicalPlan { Python::with_gil(|_| match self.plan.as_ref() { LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), + LogicalPlan::CrossJoin(plan) => PyCrossJoin::from(plan.clone()).to_variant(py), + LogicalPlan::Distinct(plan) => PyDistinct::from(plan.clone()).to_variant(py), LogicalPlan::EmptyRelation(plan) => PyEmptyRelation::from(plan.clone()).to_variant(py), LogicalPlan::Explain(plan) => PyExplain::from(plan.clone()).to_variant(py), LogicalPlan::Extension(plan) => PyExtension::from(plan.clone()).to_variant(py), - LogicalPlan::Distinct(plan) => PyDistinct::from(plan.clone()).to_variant(py), LogicalPlan::Filter(plan) => PyFilter::from(plan.clone()).to_variant(py), + LogicalPlan::Join(plan) => PyJoin::from(plan.clone()).to_variant(py), LogicalPlan::Limit(plan) => PyLimit::from(plan.clone()).to_variant(py), LogicalPlan::Projection(plan) => PyProjection::from(plan.clone()).to_variant(py), LogicalPlan::Sort(plan) => PySort::from(plan.clone()).to_variant(py), LogicalPlan::TableScan(plan) => PyTableScan::from(plan.clone()).to_variant(py), LogicalPlan::Subquery(plan) => PySubquery::from(plan.clone()).to_variant(py), LogicalPlan::SubqueryAlias(plan) => PySubqueryAlias::from(plan.clone()).to_variant(py), + LogicalPlan::Window(plan) => PyWindow::from(plan.clone()).to_variant(py), other => Err(py_unsupported_variant_err(format!( "Cannot convert this plan to a LogicalNode: {:?}", other diff --git a/src/window_frame.rs b/src/window_frame.rs deleted file mode 100644 index b8f414e6a..000000000 --- a/src/window_frame.rs +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion_common::{DataFusionError, ScalarValue}; -use datafusion_expr::window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits}; -use pyo3::prelude::*; -use std::fmt::{Display, Formatter}; - -use crate::errors::py_datafusion_err; - -#[pyclass(name = "WindowFrame", module = "datafusion", subclass)] -#[derive(Clone)] -pub struct PyWindowFrame { - frame: WindowFrame, -} - -impl From for WindowFrame { - fn from(frame: PyWindowFrame) -> Self { - frame.frame - } -} - -impl From for PyWindowFrame { - fn from(frame: WindowFrame) -> PyWindowFrame { - PyWindowFrame { frame } - } -} - -impl Display for PyWindowFrame { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!( - f, - "OVER ({} BETWEEN {} AND {})", - self.frame.units, self.frame.start_bound, self.frame.end_bound - ) - } -} - -#[pymethods] -impl PyWindowFrame { - #[new(unit, start_bound, end_bound)] - pub fn new(units: &str, start_bound: Option, end_bound: Option) -> PyResult { - let units = units.to_ascii_lowercase(); - let units = match units.as_str() { - "rows" => WindowFrameUnits::Rows, - "range" => WindowFrameUnits::Range, - "groups" => WindowFrameUnits::Groups, - _ => { - return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, - )))); - } - }; - let start_bound = match start_bound { - Some(start_bound) => { - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(start_bound))) - } - None => match units { - WindowFrameUnits::Range => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), - WindowFrameUnits::Rows => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), - WindowFrameUnits::Groups => { - return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, - )))); - } - }, - }; - let end_bound = match end_bound { - Some(end_bound) => WindowFrameBound::Following(ScalarValue::UInt64(Some(end_bound))), - None => match units { - WindowFrameUnits::Rows => WindowFrameBound::Following(ScalarValue::UInt64(None)), - WindowFrameUnits::Range => WindowFrameBound::Following(ScalarValue::UInt64(None)), - WindowFrameUnits::Groups => { - return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, - )))); - } - }, - }; - Ok(PyWindowFrame { - frame: WindowFrame { - units, - start_bound, - end_bound, - }, - }) - } - - /// Get a String representation of this window frame - fn __repr__(&self) -> String { - format!("{}", self) - } -} From d7fcea29833a43d2af56d1486e46448ab4261258 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 21 Oct 2023 11:21:32 -0600 Subject: [PATCH 094/413] small clippy fix (#524) --- src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions.rs b/src/functions.rs index be903609c..b8c825552 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -140,7 +140,7 @@ fn window( ctx.and_then(|ctx| { ctx.ctx .udaf(name) - .map(|fun| datafusion_expr::WindowFunction::AggregateUDF(fun)) + .map(datafusion_expr::WindowFunction::AggregateUDF) .ok() }) }); From fc3c24b52e8bfa1e170fb9f3708fe014e41b3e9e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 21 Oct 2023 13:10:30 -0600 Subject: [PATCH 095/413] Prepare 32.0.0 Release (#525) * small clippy fix * update changelog * cargo update --- CHANGELOG.md | 38 ++++++++++++++-- Cargo.lock | 122 +++++++++++++++++++++++---------------------------- 2 files changed, 90 insertions(+), 70 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dbc5a82e6..2890de4f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,40 @@ # DataFusion Python Changelog +## [32.0.0](https://github.com/apache/arrow-datafusion-python/tree/32.0.0) (2023-10-21) + +**Implemented enhancements:** + +- feat: expose PyWindowFrame [#509](https://github.com/apache/arrow-datafusion-python/pull/509) (dlovell) +- add Binary String Functions;encode,decode [#494](https://github.com/apache/arrow-datafusion-python/pull/494) (jiangzhx) +- add bit_and,bit_or,bit_xor,bool_add,bool_or [#496](https://github.com/apache/arrow-datafusion-python/pull/496) (jiangzhx) +- add first_value last_value [#498](https://github.com/apache/arrow-datafusion-python/pull/498) (jiangzhx) +- add regr\_\* functions [#499](https://github.com/apache/arrow-datafusion-python/pull/499) (jiangzhx) +- Add random missing bindings [#522](https://github.com/apache/arrow-datafusion-python/pull/522) (jdye64) +- Allow for multiple input files per table instead of a single file [#519](https://github.com/apache/arrow-datafusion-python/pull/519) (jdye64) +- Add support for window function bindings [#521](https://github.com/apache/arrow-datafusion-python/pull/521) (jdye64) + +**Merged pull requests:** + +- Prepare 31.0.0 release [#500](https://github.com/apache/arrow-datafusion-python/pull/500) (andygrove) +- Improve release process documentation [#505](https://github.com/apache/arrow-datafusion-python/pull/505) (andygrove) +- add Binary String Functions;encode,decode [#494](https://github.com/apache/arrow-datafusion-python/pull/494) (jiangzhx) +- build(deps): bump mimalloc from 0.1.38 to 0.1.39 [#502](https://github.com/apache/arrow-datafusion-python/pull/502) (dependabot[bot]) +- build(deps): bump syn from 2.0.32 to 2.0.35 [#503](https://github.com/apache/arrow-datafusion-python/pull/503) (dependabot[bot]) +- build(deps): bump syn from 2.0.35 to 2.0.37 [#506](https://github.com/apache/arrow-datafusion-python/pull/506) (dependabot[bot]) +- Use latest DataFusion [#511](https://github.com/apache/arrow-datafusion-python/pull/511) (andygrove) +- add bit_and,bit_or,bit_xor,bool_add,bool_or [#496](https://github.com/apache/arrow-datafusion-python/pull/496) (jiangzhx) +- use DataFusion 32 [#515](https://github.com/apache/arrow-datafusion-python/pull/515) (andygrove) +- add first_value last_value [#498](https://github.com/apache/arrow-datafusion-python/pull/498) (jiangzhx) +- build(deps): bump regex-syntax from 0.7.5 to 0.8.1 [#517](https://github.com/apache/arrow-datafusion-python/pull/517) (dependabot[bot]) +- build(deps): bump pyo3-build-config from 0.19.2 to 0.20.0 [#516](https://github.com/apache/arrow-datafusion-python/pull/516) (dependabot[bot]) +- add regr\_\* functions [#499](https://github.com/apache/arrow-datafusion-python/pull/499) (jiangzhx) +- Add random missing bindings [#522](https://github.com/apache/arrow-datafusion-python/pull/522) (jdye64) +- build(deps): bump rustix from 0.38.18 to 0.38.19 [#523](https://github.com/apache/arrow-datafusion-python/pull/523) (dependabot[bot]) +- Allow for multiple input files per table instead of a single file [#519](https://github.com/apache/arrow-datafusion-python/pull/519) (jdye64) +- Add support for window function bindings [#521](https://github.com/apache/arrow-datafusion-python/pull/521) (jdye64) +- Small clippy fix [#524](https://github.com/apache/arrow-datafusion-python/pull/524) (andygrove) + ## [31.0.0](https://github.com/apache/arrow-datafusion-python/tree/31.0.0) (2023-09-12) [Full Changelog](https://github.com/apache/arrow-datafusion-python/compare/28.0.0...31.0.0) @@ -445,7 +479,3 @@ - Why is pandas a requirement? [\#24](https://github.com/datafusion-contrib/datafusion-python/issues/24) - Unable to build [\#18](https://github.com/datafusion-contrib/datafusion-python/issues/18) - Setup CI against multiple Python version [\#6](https://github.com/datafusion-contrib/datafusion-python/issues/6) - -\* _This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)_ - -\* _This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)_ diff --git a/Cargo.lock b/Cargo.lock index 4ae3ec1f9..3887ac10f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -106,8 +106,8 @@ dependencies = [ "serde", "serde_json", "snap", - "strum 0.25.0", - "strum_macros 0.25.3", + "strum", + "strum_macros", "thiserror", "typed-builder", "uuid", @@ -178,7 +178,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "num", ] @@ -303,7 +303,7 @@ dependencies = [ "arrow-data", "arrow-schema", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", ] [[package]] @@ -577,12 +577,12 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.0.1" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" +checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" dependencies = [ - "strum 0.24.1", - "strum_macros 0.24.3", + "strum", + "strum_macros", "unicode-width", ] @@ -641,9 +641,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "3fbc60abd742b35f2492f808e1abbb83d45f72db402e14c55057edc9c7b1e9e4" dependencies = [ "libc", ] @@ -707,7 +707,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "lock_api", "once_cell", "parking_lot_core", @@ -741,7 +741,7 @@ dependencies = [ "futures", "glob", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "indexmap 2.0.2", "itertools 0.11.0", "log", @@ -796,7 +796,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "log", "object_store", "parking_lot", @@ -816,8 +816,8 @@ dependencies = [ "arrow-array", "datafusion-common", "sqlparser", - "strum 0.25.0", - "strum_macros 0.25.3", + "strum", + "strum_macros", ] [[package]] @@ -832,7 +832,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "itertools 0.11.0", "log", "regex-syntax 0.7.5", @@ -856,7 +856,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "hex", "indexmap 2.0.2", "itertools 0.11.0", @@ -891,7 +891,7 @@ dependencies = [ "datafusion-physical-expr", "futures", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "indexmap 2.0.2", "itertools 0.11.0", "log", @@ -1244,9 +1244,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.1" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dfda62a12f55daeae5015f81b0baea145391cb4520f86c248fc615d72640d12" +checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" dependencies = [ "ahash", "allocator-api2", @@ -1336,7 +1336,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.9", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -1407,7 +1407,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" dependencies = [ "equivalent", - "hashbrown 0.14.1", + "hashbrown 0.14.2", ] [[package]] @@ -1618,9 +1618,9 @@ checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" [[package]] name = "lock_api" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -1882,13 +1882,13 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.4.1", "smallvec", "windows-targets", ] @@ -1913,7 +1913,7 @@ dependencies = [ "chrono", "flate2", "futures", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "lz4", "num", "num-bigint", @@ -2240,6 +2240,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "regex" version = "1.10.2" @@ -2371,9 +2380,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.19" +version = "0.38.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" +checksum = "67ce50cb2e16c2903e30d1cbccfd8387a74b9d4c938b6a4c5ec6cc7556f7a8a0" dependencies = [ "bitflags 2.4.1", "errno", @@ -2627,9 +2636,9 @@ checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "socket2" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" dependencies = [ "libc", "winapi", @@ -2637,9 +2646,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" dependencies = [ "libc", "windows-sys", @@ -2678,32 +2687,13 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" - [[package]] name = "strum" version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "strum_macros 0.25.3", -] - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", + "strum_macros", ] [[package]] @@ -2793,9 +2783,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.11" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0e916b1148c8e263850e1ebcbd046f333e0683c724876bb0da63ea4373dc8a" +checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" [[package]] name = "tempfile" @@ -2805,25 +2795,25 @@ checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", + "redox_syscall 0.3.5", "rustix", "windows-sys", ] [[package]] name = "thiserror" -version = "1.0.49" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4" +checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.49" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" +checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", @@ -2878,7 +2868,7 @@ dependencies = [ "num_cpus", "parking_lot", "pin-project-lite", - "socket2 0.5.4", + "socket2 0.5.5", "tokio-macros", "windows-sys", ] @@ -2926,9 +2916,9 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.39" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2ef2af84856a50c1d430afce2fdded0a4ec7eda868db86409b4543df0797f9" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -3105,9 +3095,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" +checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" dependencies = [ "getrandom", "serde", From aaaeeb1a97c0b65a01c752c8359969743eb8745f Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:50:11 -0400 Subject: [PATCH 096/413] First pass at getting architectured builds working (#350) * First pass at getting architectured builds working * Fix RAT failures * Build add builds on windows/macOS * Refactor artifacts upload * Remove windows/macOS builds, include all py versions * Rename conda package to avoid collision * Handle licensing * Update recipes to reflect recent changes * Build for linux-aarch64 * Add licenses to files * Add zlib to host deps * Fixes to build/overlinking errors * Always include c compiler in build deps --- .github/workflows/conda.yml | 94 +++++++++++++++++++++++++++---------- conda/recipes/bld.bat | 26 ++++++++++ conda/recipes/build.sh | 84 +++++++++++++++++++++++++++++++++ conda/recipes/meta.yaml | 31 ++++++++---- 4 files changed, 202 insertions(+), 33 deletions(-) create mode 100644 conda/recipes/bld.bat create mode 100644 conda/recipes/build.sh diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 9853230de..16dfd7a4e 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,7 +1,20 @@ name: Build conda nightly -on: [push, pull_request] +on: + push: + branches: + - main + pull_request: + paths: + - Cargo.toml + - Cargo.lock + - pyproject.toml + - conda/recipes/** + - .github/workflows/conda.yml + schedule: + - cron: '0 0 * * 0' -# Cancel any already running instances of this build +# When this workflow is queued, automatically cancel any previous running +# or pending jobs from the same branch concurrency: group: conda-${{ github.head_ref }} cancel-in-progress: true @@ -13,9 +26,44 @@ defaults: jobs: conda: - name: Build (and optionally upload) the conda nightly + name: "Build conda nightlies (python: ${{ matrix.python }}, arch: ${{ matrix.arch }})" runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python: ["3.8", "3.9", "3.10"] + arch: ["linux-64", "linux-aarch64"] steps: + - name: Manage disk space + if: matrix.arch == 'linux-aarch64' + run: | + sudo mkdir -p /opt/empty_dir || true + for d in \ + /opt/ghc \ + /opt/hostedtoolcache \ + /usr/lib/jvm \ + /usr/local/.ghcup \ + /usr/local/lib/android \ + /usr/local/share/powershell \ + /usr/share/dotnet \ + /usr/share/swift \ + ; do + sudo rsync --stats -a --delete /opt/empty_dir/ $d || true + done + sudo apt-get purge -y -f firefox \ + google-chrome-stable \ + microsoft-edge-stable + sudo apt-get autoremove -y >& /dev/null + sudo apt-get autoclean -y >& /dev/null + sudo docker image prune --all --force + df -h + - name: Create swapfile + if: matrix.arch == 'linux-aarch64' + run: | + sudo fallocate -l 10GiB /swapfile || true + sudo chmod 600 /swapfile || true + sudo mkswap /swapfile || true + sudo swapon /swapfile || true - uses: actions/checkout@v3 with: fetch-depth: 0 @@ -24,38 +72,34 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.10" + python-version: "3.8" channel-priority: strict - name: Install dependencies run: | - mamba install boa conda-verify + mamba install -c conda-forge boa conda-verify which python pip list mamba list - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - components: clippy, rustfmt - - name: Build conda package + - name: Build conda packages run: | # suffix for nightly package versions export VERSION_SUFFIX=a`date +%y%m%d` conda mambabuild conda/recipes \ + --python ${{ matrix.python }} \ + --variants "{target_platform: [${{ matrix.arch }}]}" \ + --error-overlinking \ + --no-test \ --no-anaconda-upload \ - --output-folder . - # - name: Upload conda package - # if: | - # github.event_name == 'push' - # && github.repository == 'apache/arrow-datafusion-python' - # env: - # ANACONDA_API_TOKEN: ${{ secrets.DASK_CONDA_TOKEN }} - # LABEL: ${{ github.ref == 'refs/heads/datafusion-sql-planner' && 'dev_datafusion' || 'dev' }} - # run: | - # # install anaconda for upload - # mamba install anaconda-client - - # anaconda upload --label $LABEL linux-64/*.tar.bz2 + --output-folder packages + - name: Test conda packages + if: matrix.arch == 'linux-64' # can only test native platform packages + run: | + conda mambabuild --test packages/${{ matrix.arch }}/*.tar.bz2 + - name: Upload conda packages as artifacts + uses: actions/upload-artifact@v3 + with: + name: "conda nightlies (python - ${{ matrix.python }}, arch - ${{ matrix.arch }})" + # need to install all conda channel metadata to properly install locally + path: packages/ diff --git a/conda/recipes/bld.bat b/conda/recipes/bld.bat new file mode 100644 index 000000000..fc3ac88b3 --- /dev/null +++ b/conda/recipes/bld.bat @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +maturin build -vv -j %CPU_COUNT% --release --strip --manylinux off --interpreter=%PYTHON% + +FOR /F "delims=" %%i IN ('dir /s /b target\wheels\*.whl') DO set datafusion_wheel=%%i + +%PYTHON% -m pip install --no-deps %datafusion_wheel% -vv + +cargo-bundle-licenses --format yaml --output THIRDPARTY.yml diff --git a/conda/recipes/build.sh b/conda/recipes/build.sh new file mode 100644 index 000000000..af640e6ca --- /dev/null +++ b/conda/recipes/build.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -ex + +# See https://github.com/conda-forge/rust-feedstock/blob/master/recipe/build.sh for cc env explanation +if [ "$c_compiler" = gcc ] ; then + case "$target_platform" in + linux-64) rust_env_arch=X86_64_UNKNOWN_LINUX_GNU ;; + linux-aarch64) rust_env_arch=AARCH64_UNKNOWN_LINUX_GNU ;; + linux-ppc64le) rust_env_arch=POWERPC64LE_UNKNOWN_LINUX_GNU ;; + *) echo "unknown target_platform $target_platform" ; exit 1 ;; + esac + + export CARGO_TARGET_${rust_env_arch}_LINKER=$CC +fi + +declare -a _xtra_maturin_args + +mkdir -p $SRC_DIR/.cargo + +if [ "$target_platform" = "osx-64" ] ; then + cat <> $SRC_DIR/.cargo/config +[target.x86_64-apple-darwin] +linker = "$CC" +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +EOF + + _xtra_maturin_args+=(--target=x86_64-apple-darwin) + +elif [ "$target_platform" = "osx-arm64" ] ; then + cat <> $SRC_DIR/.cargo/config +# Required for intermediate codegen stuff +[target.x86_64-apple-darwin] +linker = "$CC_FOR_BUILD" + +# Required for final binary artifacts for target +[target.aarch64-apple-darwin] +linker = "$CC" +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +EOF + _xtra_maturin_args+=(--target=aarch64-apple-darwin) + + # This variable must be set to the directory containing the target's libpython DSO + export PYO3_CROSS_LIB_DIR=$PREFIX/lib + + # xref: https://github.com/PyO3/pyo3/commit/7beb2720 + export PYO3_PYTHON_VERSION=${PY_VER} + + # xref: https://github.com/conda-forge/python-feedstock/issues/621 + sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/os-patch.py + sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/platform-patch.py +fi + +maturin build -vv -j "${CPU_COUNT}" --release --strip --manylinux off --interpreter="${PYTHON}" "${_xtra_maturin_args[@]}" + +"${PYTHON}" -m pip install $SRC_DIR/target/wheels/datafusion*.whl --no-deps -vv + +cargo-bundle-licenses --format yaml --output THIRDPARTY.yml diff --git a/conda/recipes/meta.yaml b/conda/recipes/meta.yaml index e2bb8bee3..113e7a441 100644 --- a/conda/recipes/meta.yaml +++ b/conda/recipes/meta.yaml @@ -29,18 +29,27 @@ source: git_url: ../.. build: - noarch: python - script: {{ PYTHON }} -m pip install . -vv - number: 0 + number: {{ GIT_DESCRIBE_NUMBER }} + string: py{{ python | replace(".", "") }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} requirements: - host: - - python >=3.7 + build: + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - zlib # [build_platform != target_platform] + - {{ compiler('c') }} + - {{ compiler('rust') }} + - cargo-bundle-licenses - maturin >=0.15,<0.16 - libprotobuf =3 + host: + - python + - maturin >=0.15,<0.16 - pip + - zlib + - xz # [linux64] run: - - python >=3.7 + - python - pyarrow >=11.0.0 test: @@ -55,5 +64,11 @@ about: home: https://arrow.apache.org/datafusion license: Apache-2.0 license_family: APACHE - license_file: LICENSE.txt - summary: Apache Arrow DataFusion Python Bindings + license_file: + - LICENSE.txt + - THIRDPARTY.yml + description: | + DataFusion is an extensible query execution framework, written in Rust, + that uses Apache Arrow as its in-memory format. + doc_url: https://arrow.apache.org/datafusion + dev_url: https://github.com/apache/arrow-datafusion From da6c183ebb673b27808ff35b1b9fd8d577a203c0 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 26 Oct 2023 14:24:43 -0400 Subject: [PATCH 097/413] Remove libprotobuf dep (#527) * Remove libprotobuf as a build time dependency of Conda * Add substrait feature which enables building with substrait features * Add datafusion._internal/substrait to the pyproject.toml file so substrait will be built for pytests when using pip * Rename feature to just substrait * Add protoc since libprotobuf was removed from conda requirements file * Refactor substrait module being initialized as a function * Reintroduce libprotobuf as build dependency for conda build --- Cargo.toml | 3 ++- pyproject.toml | 1 + src/lib.rs | 13 ++++++++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7c1a57bd3..06834b92f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ include = ["/src", "/datafusion", "/LICENSE.txt", "pyproject.toml", "Cargo.toml" [features] default = ["mimalloc"] protoc = [ "datafusion-substrait/protoc" ] +substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } @@ -41,7 +42,7 @@ datafusion-common = { version = "32.0.0", features = ["pyarrow"] } datafusion-expr = { version = "32.0.0" } datafusion-optimizer = { version = "32.0.0" } datafusion-sql = { version = "32.0.0" } -datafusion-substrait = { version = "32.0.0" } +datafusion-substrait = { version = "32.0.0", optional = true } prost = "0.11" prost-types = "0.11" uuid = { version = "1.3", features = ["v4"] } diff --git a/pyproject.toml b/pyproject.toml index 4fdc4586f..d35360519 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,3 +62,4 @@ include = [ exclude = [".github/**", "ci/**", ".asf.yaml"] # Require Cargo.lock is up to date locked = true +features = ["substrait"] diff --git a/src/lib.rs b/src/lib.rs index 2512aefa4..413b2a429 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,8 @@ pub use datafusion_common; pub use datafusion_expr; pub use datafusion_optimizer; pub use datafusion_sql; + +#[cfg(feature = "substrait")] pub use datafusion_substrait; #[allow(clippy::borrow_deref_ref)] @@ -48,6 +50,8 @@ mod pyarrow_filter_expression; mod record_batch; pub mod sql; pub mod store; + +#[cfg(feature = "substrait")] pub mod substrait; #[allow(clippy::borrow_deref_ref)] mod udaf; @@ -108,9 +112,16 @@ fn _internal(py: Python, m: &PyModule) -> PyResult<()> { m.add_submodule(store)?; // Register substrait as a submodule + #[cfg(feature = "substrait")] + setup_substrait_module(py, m)?; + + Ok(()) +} + +#[cfg(feature = "substrait")] +fn setup_substrait_module(py: Python, m: &PyModule) -> PyResult<()> { let substrait = PyModule::new(py, "substrait")?; substrait::init_module(substrait)?; m.add_submodule(substrait)?; - Ok(()) } From e4942b897472ec2f0c59682db4954d0aa2d5c185 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 6 Nov 2023 11:18:18 -0700 Subject: [PATCH 098/413] Upgrade to DataFusion 33.0.0-rc1 (#530) * fix issues * fix TODO * clippy * use df 33.0.0-rc1 --- Cargo.lock | 562 ++++++++++++++++++++-------------------- Cargo.toml | 20 +- src/common/data_type.rs | 2 +- src/dataset_exec.rs | 7 +- src/expr.rs | 2 +- src/expr/window.rs | 7 +- src/substrait.rs | 2 +- src/udaf.rs | 3 +- src/udf.rs | 3 +- 9 files changed, 299 insertions(+), 309 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3887ac10f..9cf592d29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,15 +25,16 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", "const-random", "getrandom", "once_cell", "version_check", + "zerocopy", ] [[package]] @@ -129,9 +130,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fab9e93ba8ce88a37d5a30dce4b9913b75413dc1ac56cb5d72e5a840543f829" +checksum = "edb738d83750ec705808f6d44046d165e6bb8623f64e29a4d53fcb136ab22dfb" dependencies = [ "ahash", "arrow-arith", @@ -152,9 +153,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc1d4e368e87ad9ee64f28b9577a3834ce10fe2703a26b28417d485bbbdff956" +checksum = "c5c3d17fc5b006e7beeaebfb1d2edfc92398b981f82d9744130437909b72a468" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,9 +168,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d02efa7253ede102d45a4e802a129e83bcc3f49884cab795b1ac223918e4318d" +checksum = "55705ada5cdde4cb0f202ffa6aa756637e33fea30e13d8d0d0fd6a24ffcee1e3" dependencies = [ "ahash", "arrow-buffer", @@ -184,9 +185,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda119225204141138cb0541c692fbfef0e875ba01bfdeaed09e9d354f9d6195" +checksum = "a722f90a09b94f295ab7102542e97199d3500128843446ef63e410ad546c5333" dependencies = [ "bytes", "half", @@ -195,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d825d51b9968868d50bc5af92388754056796dbc62a4e25307d588a1fc84dee" +checksum = "af01fc1a06f6f2baf31a04776156d47f9f31ca5939fe6d00cd7a059f95a46ff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -213,9 +214,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ef855dc6b126dc197f43e061d4de46b9d4c033aa51c2587657f7508242cef1" +checksum = "83cbbfde86f9ecd3f875c42a73d8aeab3d95149cd80129b18d09e039ecf5391b" dependencies = [ "arrow-array", "arrow-buffer", @@ -232,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "475a4c3699c8b4095ca61cecf15da6f67841847a5f5aac983ccb9a377d02f73a" +checksum = "d0a547195e607e625e7fafa1a7269b8df1a4a612c919efd9b26bd86e74538f3a" dependencies = [ "arrow-buffer", "arrow-schema", @@ -244,9 +245,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1248005c8ac549f869b7a840859d942bf62471479c1a2d82659d453eebcd166a" +checksum = "e36bf091502ab7e37775ff448413ef1ffff28ff93789acb669fffdd51b394d51" dependencies = [ "arrow-array", "arrow-buffer", @@ -258,9 +259,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03d7e3b04dd688ccec354fe449aed56b831679f03e44ee2c1cfc4045067b69c" +checksum = "7ac346bc84846ab425ab3c8c7b6721db90643bc218939677ed7e071ccbfb919d" dependencies = [ "arrow-array", "arrow-buffer", @@ -269,7 +270,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.0.2", + "indexmap 2.1.0", "lexical-core", "num", "serde", @@ -278,9 +279,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b87aa408ea6a6300e49eb2eba0c032c88ed9dc19e0a9948489c55efdca71f4" +checksum = "4502123d2397319f3a13688432bc678c61cb1582f2daa01253186da650bf5841" dependencies = [ "arrow-array", "arrow-buffer", @@ -293,9 +294,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "114a348ab581e7c9b6908fcab23cb39ff9f060eb19e72b13f8fb8eaa37f65d22" +checksum = "249fc5a07906ab3f3536a6e9f118ec2883fbcde398a97a5ba70053f0276abda4" dependencies = [ "ahash", "arrow-array", @@ -308,18 +309,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1d179c117b158853e0101bfbed5615e86fe97ee356b4af901f1c5001e1ce4b" +checksum = "9d7a8c3f97f5ef6abd862155a6f39aaba36b029322462d72bbcfa69782a50614" dependencies = [ "bitflags 2.4.1", ] [[package]] name = "arrow-select" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5c71e003202e67e9db139e5278c79f5520bb79922261dfe140e4637ee8b6108" +checksum = "f868f4a5001429e20f7c1994b5cd1aa68b82e3db8cf96c559cdb56dc8be21410" dependencies = [ "ahash", "arrow-array", @@ -331,9 +332,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cebbb282d6b9244895f4a9a912e55e57bce112554c7fa91fcec5459cb421ab" +checksum = "a27fdf8fc70040a2dee78af2e217479cb5b263bd7ab8711c7999e74056eb688a" dependencies = [ "arrow-array", "arrow-buffer", @@ -342,7 +343,7 @@ dependencies = [ "arrow-select", "num", "regex", - "regex-syntax 0.7.5", + "regex-syntax", ] [[package]] @@ -371,7 +372,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -382,7 +383,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -417,9 +418,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.4" +version = "0.21.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" [[package]] name = "bitflags" @@ -477,9 +478,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da74e2b81409b1b743f8f0c62cc6254afefb8b8e50bbfe3735550f7aeefa3448" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -555,9 +556,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7" +checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76" dependencies = [ "chrono", "chrono-tz-build", @@ -566,9 +567,9 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" dependencies = [ "parse-zoneinfo", "phf", @@ -588,23 +589,21 @@ dependencies = [ [[package]] name = "const-random" -version = "0.1.15" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" +checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" dependencies = [ "const-random-macro", - "proc-macro-hack", ] [[package]] name = "const-random-macro" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ "getrandom", "once_cell", - "proc-macro-hack", "tiny-keccak", ] @@ -641,9 +640,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fbc60abd742b35f2492f808e1abbb83d45f72db402e14c55057edc9c7b1e9e4" +checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" dependencies = [ "libc", ] @@ -715,9 +714,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7014432223f4d721cb9786cd88bb89e7464e0ba984d4a7f49db7787f5f268674" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "ahash", "apache-avro", @@ -742,15 +740,14 @@ dependencies = [ "glob", "half", "hashbrown 0.14.2", - "indexmap 2.0.2", - "itertools 0.11.0", + "indexmap 2.1.0", + "itertools", "log", "num-traits", "num_cpus", "object_store", "parking_lot", "parquet", - "percent-encoding", "pin-project-lite", "rand", "sqlparser", @@ -760,14 +757,13 @@ dependencies = [ "url", "uuid", "xz2", - "zstd 0.12.4", + "zstd 0.13.0", ] [[package]] name = "datafusion-common" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3903ed8f102892f17b48efa437f3542159241d41c564f0d1e78efdc5e663aa" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "ahash", "apache-avro", @@ -786,9 +782,8 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "780b73b2407050e53f51a9781868593f694102c59e622de9a8aafc0343c4f237" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "arrow", "chrono", @@ -807,9 +802,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24c382676338d8caba6c027ba0da47260f65ffedab38fda78f6d8043f607557c" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "ahash", "arrow", @@ -822,9 +816,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f2904a432f795484fd45e29ded4537152adb60f636c05691db34fcd94c92c96" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "arrow", "async-trait", @@ -833,21 +826,21 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "hashbrown 0.14.2", - "itertools 0.11.0", + "itertools", "log", - "regex-syntax 0.7.5", + "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57b4968e9a998dc0476c4db7a82f280e2026b25f464e4aa0c3bb9807ee63ddfd" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", + "arrow-ord", "arrow-schema", "base64", "blake2", @@ -858,8 +851,8 @@ dependencies = [ "half", "hashbrown 0.14.2", "hex", - "indexmap 2.0.2", - "itertools 0.11.0", + "indexmap 2.1.0", + "itertools", "libc", "log", "md-5", @@ -874,9 +867,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd0d1fe54e37a47a2d58a1232c22786f2c28ad35805fdcd08f0253a8b0aaa90" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "ahash", "arrow", @@ -892,8 +884,8 @@ dependencies = [ "futures", "half", "hashbrown 0.14.2", - "indexmap 2.0.2", - "itertools 0.11.0", + "indexmap 2.1.0", + "itertools", "log", "once_cell", "parking_lot", @@ -905,7 +897,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "32.0.0" +version = "33.0.0" dependencies = [ "async-trait", "datafusion", @@ -921,10 +913,10 @@ dependencies = [ "prost", "prost-types", "pyo3", - "pyo3-build-config 0.20.0", + "pyo3-build-config", "rand", - "regex-syntax 0.8.2", - "syn 2.0.38", + "regex-syntax", + "syn 2.0.39", "tokio", "url", "uuid", @@ -932,9 +924,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b568d44c87ead99604d704f942e257c8a236ee1bbf890ee3e034ad659dcb2c21" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "arrow", "arrow-schema", @@ -946,14 +937,13 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "32.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2884dff8207774c1ea5f7b008d44b20e9723bd0b2e4b7dd6627390d8b526b50" +version = "33.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" dependencies = [ "async-recursion", "chrono", "datafusion", - "itertools 0.11.0", + "itertools", "object_store", "prost", "prost-types", @@ -980,9 +970,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "dyn-clone" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d2f3407d9a573d666de4b5bdf10569d73ca9478087346697dcbae6244bfbcd" +checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d" [[package]] name = "either" @@ -1064,9 +1054,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" dependencies = [ "futures-channel", "futures-core", @@ -1079,9 +1069,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" dependencies = [ "futures-core", "futures-sink", @@ -1089,15 +1079,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" [[package]] name = "futures-executor" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" dependencies = [ "futures-core", "futures-task", @@ -1106,38 +1096,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" [[package]] name = "futures-macro" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" dependencies = [ "futures-channel", "futures-core", @@ -1345,9 +1335,9 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http", @@ -1402,9 +1392,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" dependencies = [ "equivalent", "hashbrown 0.14.2", @@ -1412,9 +1402,9 @@ dependencies = [ [[package]] name = "indoc" -version = "1.0.9" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" +checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" [[package]] name = "integer-encoding" @@ -1424,18 +1414,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "ipnet" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "itertools" @@ -1463,9 +1444,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "54c0c35952f67de54bb584e9fd912b3023117cbafc0a77d8f3dee1fb5f572fe8" dependencies = [ "wasm-bindgen", ] @@ -1542,9 +1523,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.149" +version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "libflate" @@ -1633,23 +1614,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] -name = "lz4" -version = "1.24.0" +name = "lz4_flex" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" -dependencies = [ - "cc", - "libc", + "twox-hash", ] [[package]] @@ -1714,9 +1684,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" dependencies = [ "libc", "wasi", @@ -1838,13 +1808,13 @@ dependencies = [ "futures", "humantime", "hyper", - "itertools 0.11.0", + "itertools", "parking_lot", "percent-encoding", "quick-xml", "rand", "reqwest", - "ring", + "ring 0.16.20", "rustls-pemfile", "serde", "serde_json", @@ -1888,16 +1858,16 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", + "redox_syscall", "smallvec", "windows-targets", ] [[package]] name = "parquet" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0463cc3b256d5f50408c49a4be3a16674f4c8ceef60941709620a062b1f6bf4d" +checksum = "239229e6a668ab50c61de3dce61cf0fa1069345f7aa0f4c934491f92205a4945" dependencies = [ "ahash", "arrow-array", @@ -1914,7 +1884,7 @@ dependencies = [ "flate2", "futures", "hashbrown 0.14.2", - "lz4", + "lz4_flex", "num", "num-bigint", "object_store", @@ -1924,7 +1894,7 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd 0.12.4", + "zstd 0.13.0", ] [[package]] @@ -1955,7 +1925,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.0.2", + "indexmap 2.1.0", ] [[package]] @@ -2027,15 +1997,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.38", + "syn 2.0.39", ] -[[package]] -name = "proc-macro-hack" -version = "0.5.20+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" - [[package]] name = "proc-macro2" version = "1.0.69" @@ -2047,9 +2011,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.9" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" +checksum = "f4fdd22f3b9c31b53c060df4a0613a1c7f062d4115a2b984dd15b1858f7e340d" dependencies = [ "bytes", "prost-derive", @@ -2057,16 +2021,16 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.9" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" +checksum = "8bdf592881d821b83d471f8af290226c8d51402259e9bb5be7f9f8bdebbb11ac" dependencies = [ "bytes", "heck", - "itertools 0.10.5", - "lazy_static", + "itertools", "log", "multimap", + "once_cell", "petgraph", "prost", "prost-types", @@ -2077,22 +2041,22 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.11.9" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" +checksum = "265baba7fabd416cf5078179f7d2cbeca4ce7a9041111900675ea7c4cb8a4c32" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.39", ] [[package]] name = "prost-types" -version = "0.11.9" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" +checksum = "e081b29f63d83a4bc75cfc9f3fe424f9156cf92d8a4f0c9407cce9a1b67327cf" dependencies = [ "prost", ] @@ -2108,31 +2072,21 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.19.2" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" +checksum = "04e8453b658fe480c3e70c8ed4e3d3ec33eb74988bd186561b0cc66b85c3bc4b" dependencies = [ "cfg-if", "indoc", "libc", "memoffset", "parking_lot", - "pyo3-build-config 0.19.2", + "pyo3-build-config", "pyo3-ffi", "pyo3-macros", "unindent", ] -[[package]] -name = "pyo3-build-config" -version = "0.19.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" -dependencies = [ - "once_cell", - "target-lexicon", -] - [[package]] name = "pyo3-build-config" version = "0.20.0" @@ -2145,35 +2099,36 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.19.2" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" +checksum = "214929900fd25e6604661ed9cf349727c8920d47deff196c4e28165a6ef2a96b" dependencies = [ "libc", - "pyo3-build-config 0.19.2", + "pyo3-build-config", ] [[package]] name = "pyo3-macros" -version = "0.19.2" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" +checksum = "dac53072f717aa1bfa4db832b39de8c875b7c7af4f4a6fe93cdbf9264cf8383b" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 1.0.109", + "syn 2.0.39", ] [[package]] name = "pyo3-macros-backend" -version = "0.19.2" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" +checksum = "7774b5a8282bd4f25f803b1f0d945120be959a36c72e08e7cd031c792fdfd424" dependencies = [ + "heck", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.39", ] [[package]] @@ -2231,15 +2186,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -2258,7 +2204,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.2", + "regex-syntax", ] [[package]] @@ -2269,7 +2215,7 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.2", + "regex-syntax", ] [[package]] @@ -2278,12 +2224,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" - [[package]] name = "regex-syntax" version = "0.8.2" @@ -2292,9 +2232,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "regress" -version = "0.6.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb" +checksum = "4ed9969cad8051328011596bf549629f1b800cf1731e7964b1eef8dfc480d2c2" dependencies = [ "hashbrown 0.13.2", "memchr", @@ -2351,12 +2291,26 @@ dependencies = [ "cc", "libc", "once_cell", - "spin", - "untrusted", + "spin 0.5.2", + "untrusted 0.7.1", "web-sys", "winapi", ] +[[package]] +name = "ring" +version = "0.17.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" +dependencies = [ + "cc", + "getrandom", + "libc", + "spin 0.9.8", + "untrusted 0.9.0", + "windows-sys", +] + [[package]] name = "rle-decode-fast" version = "1.0.3" @@ -2380,9 +2334,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.20" +version = "0.38.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ce50cb2e16c2903e30d1cbccfd8387a74b9d4c938b6a4c5ec6cc7556f7a8a0" +checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" dependencies = [ "bitflags 2.4.1", "errno", @@ -2393,12 +2347,12 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.7" +version = "0.21.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8" +checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c" dependencies = [ "log", - "ring", + "ring 0.17.5", "rustls-webpki", "sct", ] @@ -2414,12 +2368,12 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.6" +version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c7d5dece342910d9ba34d259310cae3e0154b873b35408b787b59bce53d34fe" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring", - "untrusted", + "ring 0.17.5", + "untrusted 0.9.0", ] [[package]] @@ -2475,12 +2429,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring", - "untrusted", + "ring 0.17.5", + "untrusted 0.9.0", ] [[package]] @@ -2497,22 +2451,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.189" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" +checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.189" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" +checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2528,9 +2482,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.107" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa", "ryu", @@ -2546,7 +2500,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2563,11 +2517,11 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.25" +version = "0.9.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" +checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" dependencies = [ - "indexmap 2.0.2", + "indexmap 2.1.0", "itoa", "ryu", "serde", @@ -2660,11 +2614,17 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "sqlparser" -version = "0.38.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75" +checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" dependencies = [ "log", "sqlparser_derive", @@ -2706,14 +2666,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] name = "substrait" -version = "0.15.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c3d276b85647003f434bfb7ce3ebaf4c47a711018af9d4350d25763535c239" +checksum = "49fd0bc602d396b95761b348a68ef6190e4cde28a5981e82b8449855db1d321d" dependencies = [ "git2", "heck", @@ -2727,7 +2687,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.38", + "syn 2.0.39", "typify", "walkdir", ] @@ -2751,9 +2711,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.38" +version = "2.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" dependencies = [ "proc-macro2", "quote", @@ -2789,13 +2749,13 @@ checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" [[package]] name = "tempfile" -version = "3.8.0" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.3.5", + "redox_syscall", "rustix", "windows-sys", ] @@ -2817,7 +2777,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2881,7 +2841,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2896,9 +2856,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" dependencies = [ "bytes", "futures-core", @@ -2933,7 +2893,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2978,7 +2938,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2989,9 +2949,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.0.13" +version = "0.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be9bb640c0eece20cac2028ebbc2ca1a3d17e3b1ddd98540309c309ed178d158" +checksum = "c2e3b707a653e2915a2fc2c4ee96a3d30b9554b9435eb4cc8b5c6c74bbdd3044" dependencies = [ "typify-impl", "typify-macro", @@ -2999,9 +2959,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.13" +version = "0.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8d9ecedde2fd77e975c38eeb9ca40b34ad0247b2259c6e6bbd2a8d6cc2444f" +checksum = "9d9c752192779f666e4c868672dee56a652b82c08032c7e9d23f6a845b282298" dependencies = [ "heck", "log", @@ -3010,16 +2970,16 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.38", + "syn 2.0.39", "thiserror", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.0.13" +version = "0.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08942cd65d458d2da15777a649cb6400cb545f17964f1ca965583f22e9cc3a9" +checksum = "a14defd554507e72a2bb93cd081c8b374cfed43b3d986b141ad3839d9fd6986b" dependencies = [ "proc-macro2", "quote", @@ -3027,7 +2987,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.38", + "syn 2.0.39", "typify-impl", ] @@ -3066,9 +3026,9 @@ checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unindent" -version = "0.1.11" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "unsafe-libyaml" @@ -3082,6 +3042,12 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.4.1" @@ -3142,9 +3108,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "7daec296f25a1bae309c0cd5c29c4b260e510e6d813c286b19eaadf409d40fce" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -3152,24 +3118,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "e397f4664c0e4e428e8313a469aaa58310d302159845980fd23b0f22a847f217" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +checksum = "9afec9963e3d0994cac82455b2b3502b81a7f40f9a0d32181f7528d9f4b43e02" dependencies = [ "cfg-if", "js-sys", @@ -3179,9 +3145,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "5961017b3b08ad5f3fe39f1e79877f8ee7c23c5e5fd5eb80de95abc41f1f16b2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3189,22 +3155,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "c5353b8dab669f5e10f5bd76df26a9360c748f054f862ff5f3f8aae0c7fb3907" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "0d046c5d029ba91a1ed14da14dca44b68bf2f124cfbaf741c54151fdb3e0750b" [[package]] name = "wasm-streams" @@ -3221,9 +3187,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.64" +version = "0.3.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +checksum = "5db499c5f66323272151db0e666cd34f78617522fb0c1604d31a27c50c206a85" dependencies = [ "js-sys", "wasm-bindgen", @@ -3372,6 +3338,26 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "zerocopy" +version = "0.7.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cd369a67c0edfef15010f980c3cbe45d7f651deac2cd67ce097cd801de16557" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2f140bda219a26ccc0cdb03dba58af72590c53b22642577d88a927bc5c87d6b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + [[package]] name = "zstd" version = "0.12.4" diff --git a/Cargo.toml b/Cargo.toml index 06834b92f..610fa9d58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "32.0.0" +version = "33.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -36,15 +36,15 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" -pyo3 = { version = "0.19", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { version = "32.0.0", features = ["pyarrow", "avro"] } -datafusion-common = { version = "32.0.0", features = ["pyarrow"] } -datafusion-expr = { version = "32.0.0" } -datafusion-optimizer = { version = "32.0.0" } -datafusion-sql = { version = "32.0.0" } -datafusion-substrait = { version = "32.0.0", optional = true } -prost = "0.11" -prost-types = "0.11" +pyo3 = { version = "0.20", features = ["extension-module", "abi3", "abi3-py38"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1", features = ["pyarrow", "avro"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1", features = ["pyarrow"] } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1" } +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1" } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1", optional = true } +prost = "0.12" +prost-types = "0.12" uuid = { version = "1.3", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false } async-trait = "0.1" diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 405a5632d..9719226a2 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -288,7 +288,7 @@ impl DataTypeMap { ScalarValue::IntervalMonthDayNano(..) => { Ok(DataType::Interval(IntervalUnit::MonthDayNano)) } - ScalarValue::List(_val, field_ref) => Ok(DataType::List(field_ref.to_owned())), + ScalarValue::List(arr) => Ok(arr.data_type().to_owned()), ScalarValue::Struct(_, fields) => Ok(DataType::Struct(fields.to_owned())), ScalarValue::FixedSizeBinary(size, _) => Ok(DataType::FixedSizeBinary(*size)), ScalarValue::Fixedsizelist(_, field_ref, size) => { diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 686bbf74c..0266ad9e3 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -133,13 +133,14 @@ impl DatasetExec { .downcast() .map_err(PyErr::from)?; + let projected_statistics = Statistics::new_unknown(&schema); Ok(DatasetExec { dataset: dataset.into(), schema, fragments: fragments.into(), columns, filter_expr, - projected_statistics: Default::default(), + projected_statistics, }) } } @@ -236,8 +237,8 @@ impl ExecutionPlan for DatasetExec { }) } - fn statistics(&self) -> Statistics { - self.projected_statistics.clone() + fn statistics(&self) -> DFResult { + Ok(self.projected_statistics.clone()) } } diff --git a/src/expr.rs b/src/expr.rs index e502edced..97c168e69 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -332,7 +332,7 @@ impl PyExpr { ScalarValue::Binary(v) => v.clone().into_py(py), ScalarValue::FixedSizeBinary(_, _) => todo!(), ScalarValue::LargeBinary(v) => v.clone().into_py(py), - ScalarValue::List(_, _) => todo!(), + ScalarValue::List(_) => todo!(), ScalarValue::Date32(v) => v.into_py(py), ScalarValue::Date64(v) => v.into_py(py), ScalarValue::Time32Second(v) => v.into_py(py), diff --git a/src/expr/window.rs b/src/expr/window.rs index 6583c97af..3a5760c15 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -166,9 +166,10 @@ fn not_window_function_err(expr: Expr) -> PyErr { #[pymethods] impl PyWindowFrame { - #[new(unit, start_bound, end_bound)] - pub fn new(units: &str, start_bound: Option, end_bound: Option) -> PyResult { - let units = units.to_ascii_lowercase(); + #[new] + #[pyo3(signature=(unit, start_bound, end_bound))] + pub fn new(unit: &str, start_bound: Option, end_bound: Option) -> PyResult { + let units = unit.to_ascii_lowercase(); let units = match units.as_str() { "rows" => WindowFrameUnits::Rows, "range" => WindowFrameUnits::Range, diff --git a/src/substrait.rs b/src/substrait.rs index d027ad21f..73606fdfa 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -134,7 +134,7 @@ impl PySubstraitConsumer { plan: PyPlan, py: Python, ) -> PyResult { - let result = consumer::from_substrait_plan(&mut ctx.ctx, &plan.plan); + let result = consumer::from_substrait_plan(&ctx.ctx, &plan.plan); let logical_plan = wait_for_future(py, result).map_err(DataFusionError::from)?; Ok(PyLogicalPlan::new(logical_plan)) } diff --git a/src/udaf.rs b/src/udaf.rs index 6450f03fe..596ed6904 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -143,7 +143,8 @@ pub struct PyAggregateUDF { #[pymethods] impl PyAggregateUDF { - #[new(name, accumulator, input_type, return_type, state_type, volatility)] + #[new] + #[pyo3(signature=(name, accumulator, input_type, return_type, state_type, volatility))] fn new( name: &str, accumulator: PyObject, diff --git a/src/udf.rs b/src/udf.rs index 417e5f327..bba8ae551 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -68,7 +68,8 @@ pub struct PyScalarUDF { #[pymethods] impl PyScalarUDF { - #[new(name, func, input_types, return_type, volatility)] + #[new] + #[pyo3(signature=(name, func, input_types, return_type, volatility))] fn new( name: &str, func: PyObject, From d1a7505a72400d8f69b63dbad6123eccaef58366 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 16 Nov 2023 11:11:15 -0700 Subject: [PATCH 099/413] Prepare 33.0.0 Release (#533) * Use latest DF 33 branch * use DF 33.0.0-rc2 * use 33.0.0 release * changelog --- CHANGELOG.md | 7 ++ Cargo.lock | 209 +++++++++++++++++++++++++-------------------------- Cargo.toml | 12 +-- src/expr.rs | 11 +-- 4 files changed, 118 insertions(+), 121 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2890de4f9..d875cf13d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,13 @@ # DataFusion Python Changelog +## [33.0.0](https://github.com/apache/arrow-datafusion-python/tree/33.0.0) (2023-11-16) + +**Merged pull requests:** + +- First pass at getting architectured builds working [#350](https://github.com/apache/arrow-datafusion-python/pull/350) (charlesbluca) +- Remove libprotobuf dep [#527](https://github.com/apache/arrow-datafusion-python/pull/527) (jdye64) + ## [32.0.0](https://github.com/apache/arrow-datafusion-python/tree/32.0.0) (2023-10-21) **Implemented enhancements:** diff --git a/Cargo.lock b/Cargo.lock index 9cf592d29..c2d34115d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,9 +130,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edb738d83750ec705808f6d44046d165e6bb8623f64e29a4d53fcb136ab22dfb" +checksum = "a8919668503a4f2d8b6da96fa7c16e93046bfb3412ffcfa1e5dc7d2e3adcb378" dependencies = [ "ahash", "arrow-arith", @@ -153,9 +153,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5c3d17fc5b006e7beeaebfb1d2edfc92398b981f82d9744130437909b72a468" +checksum = "ef983914f477d4278b068f13b3224b7d19eb2b807ac9048544d3bfebdf2554c4" dependencies = [ "arrow-array", "arrow-buffer", @@ -168,9 +168,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55705ada5cdde4cb0f202ffa6aa756637e33fea30e13d8d0d0fd6a24ffcee1e3" +checksum = "d6eaf89041fa5937940ae390294ece29e1db584f46d995608d6e5fe65a2e0e9b" dependencies = [ "ahash", "arrow-buffer", @@ -185,9 +185,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a722f90a09b94f295ab7102542e97199d3500128843446ef63e410ad546c5333" +checksum = "55512d988c6fbd76e514fd3ff537ac50b0a675da5a245e4fdad77ecfd654205f" dependencies = [ "bytes", "half", @@ -196,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af01fc1a06f6f2baf31a04776156d47f9f31ca5939fe6d00cd7a059f95a46ff1" +checksum = "655ee51a2156ba5375931ce21c1b2494b1d9260e6dcdc6d4db9060c37dc3325b" dependencies = [ "arrow-array", "arrow-buffer", @@ -214,9 +214,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83cbbfde86f9ecd3f875c42a73d8aeab3d95149cd80129b18d09e039ecf5391b" +checksum = "258bb689997ad5b6660b3ce3638bd6b383d668ec555ed41ad7c6559cbb2e4f91" dependencies = [ "arrow-array", "arrow-buffer", @@ -233,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a547195e607e625e7fafa1a7269b8df1a4a612c919efd9b26bd86e74538f3a" +checksum = "6dc2b9fec74763427e2e5575b8cc31ce96ba4c9b4eb05ce40e0616d9fad12461" dependencies = [ "arrow-buffer", "arrow-schema", @@ -245,9 +245,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36bf091502ab7e37775ff448413ef1ffff28ff93789acb669fffdd51b394d51" +checksum = "6eaa6ab203cc6d89b7eaa1ac781c1dfeef325454c5d5a0419017f95e6bafc03c" dependencies = [ "arrow-array", "arrow-buffer", @@ -259,9 +259,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ac346bc84846ab425ab3c8c7b6721db90643bc218939677ed7e071ccbfb919d" +checksum = "fb64e30d9b73f66fdc5c52d5f4cf69bbf03d62f64ffeafa0715590a5320baed7" dependencies = [ "arrow-array", "arrow-buffer", @@ -270,7 +270,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.1.0", + "indexmap", "lexical-core", "num", "serde", @@ -279,9 +279,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4502123d2397319f3a13688432bc678c61cb1582f2daa01253186da650bf5841" +checksum = "f9a818951c0d11c428dda03e908175969c262629dd20bd0850bd6c7a8c3bfe48" dependencies = [ "arrow-array", "arrow-buffer", @@ -294,9 +294,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "249fc5a07906ab3f3536a6e9f118ec2883fbcde398a97a5ba70053f0276abda4" +checksum = "a5d664318bc05f930559fc088888f0f7174d3c5bc888c0f4f9ae8f23aa398ba3" dependencies = [ "ahash", "arrow-array", @@ -309,18 +309,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7a8c3f97f5ef6abd862155a6f39aaba36b029322462d72bbcfa69782a50614" +checksum = "aaf4d737bba93da59f16129bec21e087aed0be84ff840e74146d4703879436cb" dependencies = [ "bitflags 2.4.1", ] [[package]] name = "arrow-select" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f868f4a5001429e20f7c1994b5cd1aa68b82e3db8cf96c559cdb56dc8be21410" +checksum = "374c4c3b812ecc2118727b892252a4a4308f87a8aca1dbf09f3ce4bc578e668a" dependencies = [ "ahash", "arrow-array", @@ -332,9 +332,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a27fdf8fc70040a2dee78af2e217479cb5b263bd7ab8711c7999e74056eb688a" +checksum = "b15aed5624bb23da09142f58502b59c23f5bea607393298bb81dab1ce60fc769" dependencies = [ "arrow-array", "arrow-buffer", @@ -348,9 +348,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f658e2baef915ba0f26f1f7c42bfb8e12f532a01f449a090ded75ae7a07e9ba2" +checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5" dependencies = [ "bzip2", "flate2", @@ -715,7 +715,8 @@ dependencies = [ [[package]] name = "datafusion" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "676796427e638d85e9eadf13765705212be60b34f8fc5d3934d95184c63ca1b4" dependencies = [ "ahash", "apache-avro", @@ -740,7 +741,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.2", - "indexmap 2.1.0", + "indexmap", "itertools", "log", "num-traits", @@ -763,7 +764,8 @@ dependencies = [ [[package]] name = "datafusion-common" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e23b3d21a6531259d291bd20ce59282ea794bda1018b0a1e278c13cd52e50c" dependencies = [ "ahash", "apache-avro", @@ -783,7 +785,8 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4de1fd0d8db0f2b8e4f4121bfa1c7c09d3a5c08a0a65c2229cd849eb65cff855" dependencies = [ "arrow", "chrono", @@ -803,7 +806,8 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18e227fe88bf6730cab378d0cd8fc4c6b2ea42bc7e414a8ea9feba7225932735" dependencies = [ "ahash", "arrow", @@ -817,7 +821,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6648e62ea7605b9bfcd87fdc9d67e579c3b9ac563a87734ae5fe6d79ee4547" dependencies = [ "arrow", "async-trait", @@ -834,7 +839,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f32b8574add16a32411a9b3fb3844ac1fc09ab4e7be289f86fd56d620e4f2508" dependencies = [ "ahash", "arrow", @@ -851,7 +857,7 @@ dependencies = [ "half", "hashbrown 0.14.2", "hex", - "indexmap 2.1.0", + "indexmap", "itertools", "libc", "log", @@ -868,7 +874,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "796abd77d5bfecd9e5275a99daf0ec45f5b3a793ec431349ce8211a67826fd22" dependencies = [ "ahash", "arrow", @@ -884,7 +891,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.2", - "indexmap 2.1.0", + "indexmap", "itertools", "log", "once_cell", @@ -925,7 +932,8 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced70b8a5648ba7b95c61fc512183c33287ffe2c9f22ffe22700619d7d48c84f" dependencies = [ "arrow", "arrow-schema", @@ -938,7 +946,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "33.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=33.0.0-rc1#262f08778b8ec231d96792c01fc3e051640eb5d4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f8fe74a4541eccbd096aa9fa3e734f1387a831613c9a3cb4f06bf7283ec77b7" dependencies = [ "async-recursion", "chrono", @@ -997,9 +1006,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.5" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +checksum = "f258a7194e7f7c2a7837a8913aeab7fd8c383457034fa20ce4dd3dcb813e8eb8" dependencies = [ "libc", "windows-sys", @@ -1153,9 +1162,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if", "libc", @@ -1189,9 +1198,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.21" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" dependencies = [ "bytes", "fnv", @@ -1199,7 +1208,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 1.9.3", + "indexmap", "slab", "tokio", "tokio-util", @@ -1217,12 +1226,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.13.2" @@ -1271,9 +1274,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" dependencies = [ "bytes", "fnv", @@ -1380,16 +1383,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.1.0" @@ -1593,9 +1586,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" +checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" [[package]] name = "lock_api" @@ -1865,9 +1858,9 @@ dependencies = [ [[package]] name = "parquet" -version = "48.0.0" +version = "48.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239229e6a668ab50c61de3dce61cf0fa1069345f7aa0f4c934491f92205a4945" +checksum = "6bfe55df96e3f02f11bf197ae37d91bb79801631f82f6195dd196ef521df3597" dependencies = [ "ahash", "arrow-array", @@ -1925,7 +1918,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.1.0", + "indexmap", ] [[package]] @@ -2011,9 +2004,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fdd22f3b9c31b53c060df4a0613a1c7f062d4115a2b984dd15b1858f7e340d" +checksum = "5a5a410fc7882af66deb8d01d01737353cf3ad6204c408177ba494291a626312" dependencies = [ "bytes", "prost-derive", @@ -2021,9 +2014,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bdf592881d821b83d471f8af290226c8d51402259e9bb5be7f9f8bdebbb11ac" +checksum = "1fa3d084c8704911bfefb2771be2f9b6c5c0da7343a71e0021ee3c665cada738" dependencies = [ "bytes", "heck", @@ -2041,9 +2034,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "265baba7fabd416cf5078179f7d2cbeca4ce7a9041111900675ea7c4cb8a4c32" +checksum = "065717a5dfaca4a83d2fe57db3487b311365200000551d7a364e715dbf4346bc" dependencies = [ "anyhow", "itertools", @@ -2054,9 +2047,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e081b29f63d83a4bc75cfc9f3fe424f9156cf92d8a4f0c9407cce9a1b67327cf" +checksum = "8339f32236f590281e2f6368276441394fcd1b2133b549cc895d0ae80f2f9a52" dependencies = [ "prost", ] @@ -2334,9 +2327,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.21" +version = "0.38.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" +checksum = "9ad981d6c340a49cdc40a1028d9c6084ec7e9fa33fcb839cab656a267071e234" dependencies = [ "bitflags 2.4.1", "errno", @@ -2347,9 +2340,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.8" +version = "0.21.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c" +checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" dependencies = [ "log", "ring 0.17.5", @@ -2359,9 +2352,9 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ "base64", ] @@ -2399,9 +2392,9 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.15" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f7b0ce13155372a76ee2e1c5ffba1fe61ede73fbea5630d61eee6fac4929c0c" +checksum = "45a28f4c49489add4ce10783f7911893516f15afe45d015608d41faca6bc4d29" dependencies = [ "dyn-clone", "schemars_derive", @@ -2411,9 +2404,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.15" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e85e2a16b12bdb763244c69ab79363d71db2b4b918a2def53f80b02e0574b13c" +checksum = "c767fd6fa65d9ccf9cf026122c1b555f2ef9a4f0cea69da4d7dbc3e258d30967" dependencies = [ "proc-macro2", "quote", @@ -2451,18 +2444,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.190" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" +checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.190" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" +checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" dependencies = [ "proc-macro2", "quote", @@ -2521,7 +2514,7 @@ version = "0.9.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" dependencies = [ - "indexmap 2.1.0", + "indexmap", "itoa", "ryu", "serde", @@ -2556,9 +2549,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.1" +version = "1.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" [[package]] name = "snafu" @@ -2671,9 +2664,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.18.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49fd0bc602d396b95761b348a68ef6190e4cde28a5981e82b8449855db1d321d" +checksum = "7299fc531294d189834eeaf7928482f311c0ada2cf0007948989cf75d0228183" dependencies = [ "git2", "heck", @@ -2817,9 +2810,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.33.0" +version = "1.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" +checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" dependencies = [ "backtrace", "bytes", @@ -2835,9 +2828,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", @@ -3340,18 +3333,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.25" +version = "0.7.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd369a67c0edfef15010f980c3cbe45d7f651deac2cd67ce097cd801de16557" +checksum = "e97e415490559a91254a2979b4829267a57d2fcd741a98eee8b722fb57289aa0" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.25" +version = "0.7.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f140bda219a26ccc0cdb03dba58af72590c53b22642577d88a927bc5c87d6b" +checksum = "dd7e48ccf166952882ca8bd778a43502c64f33bf94c12ebe2a7f08e5a0f6689f" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 610fa9d58..f16f74ae8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,12 +37,12 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.20", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1", features = ["pyarrow", "avro"] } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1", features = ["pyarrow"] } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1" } -datafusion-sql = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1" } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion", rev = "33.0.0-rc1", optional = true } +datafusion = { version = "33.0.0", features = ["pyarrow", "avro"] } +datafusion-common = { version = "33.0.0", features = ["pyarrow"] } +datafusion-expr = { version = "33.0.0" } +datafusion-optimizer = { version = "33.0.0" } +datafusion-sql = { version = "33.0.0" } +datafusion-substrait = { version = "33.0.0", optional = true } prost = "0.12" prost-types = "0.12" uuid = { version = "1.3", features = ["v4"] } diff --git a/src/expr.rs b/src/expr.rs index 97c168e69..bbab8bf41 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -265,9 +265,7 @@ impl PyExpr { pub fn rex_type(&self) -> PyResult { Ok(match self.expr { Expr::Alias(..) => RexType::Alias, - Expr::Column(..) | Expr::QualifiedWildcard { .. } | Expr::GetIndexedField { .. } => { - RexType::Reference - } + Expr::Column(..) | Expr::GetIndexedField { .. } => RexType::Reference, Expr::ScalarVariable(..) | Expr::Literal(..) => RexType::Literal, Expr::BinaryExpr { .. } | Expr::Not(..) @@ -286,7 +284,7 @@ impl PyExpr { | Expr::WindowFunction { .. } | Expr::AggregateUDF { .. } | Expr::InList { .. } - | Expr::Wildcard + | Expr::Wildcard { .. } | Expr::ScalarUDF { .. } | Expr::Exists { .. } | Expr::InSubquery { .. } @@ -463,8 +461,7 @@ impl PyExpr { // Currently un-support/implemented Expr types for Rex Call operations Expr::GroupingSet(..) | Expr::OuterReferenceColumn(_, _) - | Expr::Wildcard - | Expr::QualifiedWildcard { .. } + | Expr::Wildcard { .. } | Expr::ScalarSubquery(..) | Expr::Placeholder { .. } | Expr::Exists { .. } => Err(py_runtime_err(format!( @@ -548,7 +545,7 @@ impl PyExpr { // appear in projections) so we just delegate to the contained expression instead Self::expr_to_field(expr, input_plan) } - Expr::Wildcard => { + Expr::Wildcard { .. } => { // Since * could be any of the valid column names just return the first one Ok(input_plan.schema().field(0).clone()) } From eaa71311977e1673d962235cee15691ef148fc91 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sun, 3 Dec 2023 07:43:23 -0500 Subject: [PATCH 100/413] Adjust visibility of crate private members & Functions (#537) --- src/catalog.rs | 10 +++--- src/common/df_field.rs | 2 +- src/context.rs | 77 ++++++++++++++++++++++-------------------- src/expr.rs | 1 + src/lib.rs | 2 +- src/substrait.rs | 12 +++---- src/utils.rs | 10 +++++- 7 files changed, 63 insertions(+), 51 deletions(-) diff --git a/src/catalog.rs b/src/catalog.rs index 94faea067..ba7e22255 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -30,18 +30,18 @@ use datafusion::{ }; #[pyclass(name = "Catalog", module = "datafusion", subclass)] -pub(crate) struct PyCatalog { - catalog: Arc, +pub struct PyCatalog { + pub catalog: Arc, } #[pyclass(name = "Database", module = "datafusion", subclass)] -pub(crate) struct PyDatabase { - database: Arc, +pub struct PyDatabase { + pub database: Arc, } #[pyclass(name = "Table", module = "datafusion", subclass)] pub struct PyTable { - table: Arc, + pub table: Arc, } impl PyCatalog { diff --git a/src/common/df_field.rs b/src/common/df_field.rs index 703af0aa2..68c05361f 100644 --- a/src/common/df_field.rs +++ b/src/common/df_field.rs @@ -27,7 +27,7 @@ use super::data_type::PyDataType; #[pyclass(name = "DFField", module = "datafusion.common", subclass)] #[derive(Debug, Clone)] pub struct PyDFField { - field: DFField, + pub field: DFField, } impl From for DFField { diff --git a/src/context.rs b/src/context.rs index a0361acfc..fb7ca8385 100644 --- a/src/context.rs +++ b/src/context.rs @@ -60,8 +60,8 @@ use tokio::task::JoinHandle; /// Configuration options for a SessionContext #[pyclass(name = "SessionConfig", module = "datafusion", subclass)] #[derive(Clone, Default)] -pub(crate) struct PySessionConfig { - pub(crate) config: SessionConfig, +pub struct PySessionConfig { + pub config: SessionConfig, } impl From for PySessionConfig { @@ -153,8 +153,8 @@ impl PySessionConfig { /// Runtime options for a SessionContext #[pyclass(name = "RuntimeConfig", module = "datafusion", subclass)] #[derive(Clone)] -pub(crate) struct PyRuntimeConfig { - pub(crate) config: RuntimeConfig, +pub struct PyRuntimeConfig { + pub config: RuntimeConfig, } #[pymethods] @@ -215,15 +215,18 @@ impl PyRuntimeConfig { /// multi-threaded execution engine to perform the execution. #[pyclass(name = "SessionContext", module = "datafusion", subclass)] #[derive(Clone)] -pub(crate) struct PySessionContext { - pub(crate) ctx: SessionContext, +pub struct PySessionContext { + pub ctx: SessionContext, } #[pymethods] impl PySessionContext { #[pyo3(signature = (config=None, runtime=None))] #[new] - fn new(config: Option, runtime: Option) -> PyResult { + pub fn new( + config: Option, + runtime: Option, + ) -> PyResult { let config = if let Some(c) = config { c.config } else { @@ -242,7 +245,7 @@ impl PySessionContext { } /// Register a an object store with the given name - fn register_object_store( + pub fn register_object_store( &mut self, scheme: &str, store: &PyAny, @@ -276,13 +279,13 @@ impl PySessionContext { } /// Returns a PyDataFrame whose plan corresponds to the SQL statement. - fn sql(&mut self, query: &str, py: Python) -> PyResult { + pub fn sql(&mut self, query: &str, py: Python) -> PyResult { let result = self.ctx.sql(query); let df = wait_for_future(py, result).map_err(DataFusionError::from)?; Ok(PyDataFrame::new(df)) } - fn create_dataframe( + pub fn create_dataframe( &mut self, partitions: PyArrowType>>, name: Option<&str>, @@ -314,13 +317,13 @@ impl PySessionContext { } /// Create a DataFrame from an existing logical plan - fn create_dataframe_from_logical_plan(&mut self, plan: PyLogicalPlan) -> PyDataFrame { + pub fn create_dataframe_from_logical_plan(&mut self, plan: PyLogicalPlan) -> PyDataFrame { PyDataFrame::new(DataFrame::new(self.ctx.state(), plan.plan.as_ref().clone())) } /// Construct datafusion dataframe from Python list #[allow(clippy::wrong_self_convention)] - fn from_pylist( + pub fn from_pylist( &mut self, data: PyObject, name: Option<&str>, @@ -340,7 +343,7 @@ impl PySessionContext { /// Construct datafusion dataframe from Python dictionary #[allow(clippy::wrong_self_convention)] - fn from_pydict( + pub fn from_pydict( &mut self, data: PyObject, name: Option<&str>, @@ -360,7 +363,7 @@ impl PySessionContext { /// Construct datafusion dataframe from Arrow Table #[allow(clippy::wrong_self_convention)] - fn from_arrow_table( + pub fn from_arrow_table( &mut self, data: PyObject, name: Option<&str>, @@ -381,7 +384,7 @@ impl PySessionContext { /// Construct datafusion dataframe from pandas #[allow(clippy::wrong_self_convention)] - fn from_pandas( + pub fn from_pandas( &mut self, data: PyObject, name: Option<&str>, @@ -401,7 +404,7 @@ impl PySessionContext { /// Construct datafusion dataframe from polars #[allow(clippy::wrong_self_convention)] - fn from_polars( + pub fn from_polars( &mut self, data: PyObject, name: Option<&str>, @@ -417,21 +420,21 @@ impl PySessionContext { }) } - fn register_table(&mut self, name: &str, table: &PyTable) -> PyResult<()> { + pub fn register_table(&mut self, name: &str, table: &PyTable) -> PyResult<()> { self.ctx .register_table(name, table.table()) .map_err(DataFusionError::from)?; Ok(()) } - fn deregister_table(&mut self, name: &str) -> PyResult<()> { + pub fn deregister_table(&mut self, name: &str) -> PyResult<()> { self.ctx .deregister_table(name) .map_err(DataFusionError::from)?; Ok(()) } - fn register_record_batches( + pub fn register_record_batches( &mut self, name: &str, partitions: PyArrowType>>, @@ -451,7 +454,7 @@ impl PySessionContext { skip_metadata=true, schema=None, file_sort_order=None))] - fn register_parquet( + pub fn register_parquet( &mut self, name: &str, path: &str, @@ -489,7 +492,7 @@ impl PySessionContext { schema_infer_max_records=1000, file_extension=".csv", file_compression_type=None))] - fn register_csv( + pub fn register_csv( &mut self, name: &str, path: PathBuf, @@ -533,7 +536,7 @@ impl PySessionContext { file_extension=".json", table_partition_cols=vec![], file_compression_type=None))] - fn register_json( + pub fn register_json( &mut self, name: &str, path: PathBuf, @@ -568,7 +571,7 @@ impl PySessionContext { file_extension=".avro", table_partition_cols=vec![], infinite=false))] - fn register_avro( + pub fn register_avro( &mut self, name: &str, path: PathBuf, @@ -595,7 +598,7 @@ impl PySessionContext { } // Registers a PyArrow.Dataset - fn register_dataset(&self, name: &str, dataset: &PyAny, py: Python) -> PyResult<()> { + pub fn register_dataset(&self, name: &str, dataset: &PyAny, py: Python) -> PyResult<()> { let table: Arc = Arc::new(Dataset::new(dataset, py)?); self.ctx @@ -605,18 +608,18 @@ impl PySessionContext { Ok(()) } - fn register_udf(&mut self, udf: PyScalarUDF) -> PyResult<()> { + pub fn register_udf(&mut self, udf: PyScalarUDF) -> PyResult<()> { self.ctx.register_udf(udf.function); Ok(()) } - fn register_udaf(&mut self, udaf: PyAggregateUDF) -> PyResult<()> { + pub fn register_udaf(&mut self, udaf: PyAggregateUDF) -> PyResult<()> { self.ctx.register_udaf(udaf.function); Ok(()) } #[pyo3(signature = (name="datafusion"))] - fn catalog(&self, name: &str) -> PyResult { + pub fn catalog(&self, name: &str) -> PyResult { match self.ctx.catalog(name) { Some(catalog) => Ok(PyCatalog::new(catalog)), None => Err(PyKeyError::new_err(format!( @@ -626,31 +629,31 @@ impl PySessionContext { } } - fn tables(&self) -> HashSet { + pub fn tables(&self) -> HashSet { #[allow(deprecated)] self.ctx.tables().unwrap() } - fn table(&self, name: &str, py: Python) -> PyResult { + pub fn table(&self, name: &str, py: Python) -> PyResult { let x = wait_for_future(py, self.ctx.table(name)).map_err(DataFusionError::from)?; Ok(PyDataFrame::new(x)) } - fn table_exist(&self, name: &str) -> PyResult { + pub fn table_exist(&self, name: &str) -> PyResult { Ok(self.ctx.table_exist(name)?) } - fn empty_table(&self) -> PyResult { + pub fn empty_table(&self) -> PyResult { Ok(PyDataFrame::new(self.ctx.read_empty()?)) } - fn session_id(&self) -> String { + pub fn session_id(&self) -> String { self.ctx.session_id() } #[allow(clippy::too_many_arguments)] #[pyo3(signature = (path, schema=None, schema_infer_max_records=1000, file_extension=".json", table_partition_cols=vec![], file_compression_type=None))] - fn read_json( + pub fn read_json( &mut self, path: PathBuf, schema: Option>, @@ -689,7 +692,7 @@ impl PySessionContext { file_extension=".csv", table_partition_cols=vec![], file_compression_type=None))] - fn read_csv( + pub fn read_csv( &self, path: PathBuf, schema: Option>, @@ -741,7 +744,7 @@ impl PySessionContext { skip_metadata=true, schema=None, file_sort_order=None))] - fn read_parquet( + pub fn read_parquet( &self, path: &str, table_partition_cols: Vec<(String, String)>, @@ -771,7 +774,7 @@ impl PySessionContext { #[allow(clippy::too_many_arguments)] #[pyo3(signature = (path, schema=None, table_partition_cols=vec![], file_extension=".avro"))] - fn read_avro( + pub fn read_avro( &self, path: &str, schema: Option>, @@ -793,7 +796,7 @@ impl PySessionContext { Ok(PyDataFrame::new(df)) } - fn read_table(&self, table: &PyTable) -> PyResult { + pub fn read_table(&self, table: &PyTable) -> PyResult { let df = self .ctx .read_table(table.table()) diff --git a/src/expr.rs b/src/expr.rs index bbab8bf41..3875fb381 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -128,6 +128,7 @@ impl PyExpr { Expr::ScalarVariable(data_type, variables) => { Ok(PyScalarVariable::new(data_type, variables).into_py(py)) } + Expr::Like(value) => Ok(PyLike::from(value.clone()).into_py(py)), Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_py(py)), Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_py(py)), Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_py(py)), diff --git a/src/lib.rs b/src/lib.rs index 413b2a429..5e57db9cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,7 +35,7 @@ pub mod common; #[allow(clippy::borrow_deref_ref)] mod config; #[allow(clippy::borrow_deref_ref)] -mod context; +pub mod context; #[allow(clippy::borrow_deref_ref)] mod dataframe; mod dataset; diff --git a/src/substrait.rs b/src/substrait.rs index 73606fdfa..ff83f6f79 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -29,8 +29,8 @@ use prost::Message; #[pyclass(name = "plan", module = "datafusion.substrait", subclass)] #[derive(Debug, Clone)] -pub(crate) struct PyPlan { - pub(crate) plan: Plan, +pub struct PyPlan { + pub plan: Plan, } #[pymethods] @@ -61,7 +61,7 @@ impl From for PyPlan { /// to a valid `LogicalPlan` instance. #[pyclass(name = "serde", module = "datafusion.substrait", subclass)] #[derive(Debug, Clone)] -pub(crate) struct PySubstraitSerializer; +pub struct PySubstraitSerializer; #[pymethods] impl PySubstraitSerializer { @@ -107,7 +107,7 @@ impl PySubstraitSerializer { #[pyclass(name = "producer", module = "datafusion.substrait", subclass)] #[derive(Debug, Clone)] -pub(crate) struct PySubstraitProducer; +pub struct PySubstraitProducer; #[pymethods] impl PySubstraitProducer { @@ -123,7 +123,7 @@ impl PySubstraitProducer { #[pyclass(name = "consumer", module = "datafusion.substrait", subclass)] #[derive(Debug, Clone)] -pub(crate) struct PySubstraitConsumer; +pub struct PySubstraitConsumer; #[pymethods] impl PySubstraitConsumer { @@ -140,7 +140,7 @@ impl PySubstraitConsumer { } } -pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { +pub fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/utils.rs b/src/utils.rs index 427a8a064..c5965bd2f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -25,7 +25,15 @@ use tokio::runtime::Runtime; /// Utility to get the Tokio Runtime from Python pub(crate) fn get_tokio_runtime(py: Python) -> PyRef { let datafusion = py.import("datafusion._internal").unwrap(); - datafusion.getattr("runtime").unwrap().extract().unwrap() + let tmp = datafusion.getattr("runtime").unwrap(); + match tmp.extract::>() { + Ok(runtime) => runtime, + Err(_e) => { + let rt = TokioRuntime(tokio::runtime::Runtime::new().unwrap()); + let obj: &PyAny = Py::new(py, rt).unwrap().into_ref(py); + obj.extract().unwrap() + } + } } /// Utility to collect rust futures with GIL released From 85366d6a54802a633c7e4b58699eb72821d2b29b Mon Sep 17 00:00:00 2001 From: ray-andrew <76701804+ray-andrew@users.noreply.github.com> Date: Sun, 3 Dec 2023 20:43:56 +0800 Subject: [PATCH 101/413] Update json.rst (#538) --- docs/source/user-guide/io/json.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/io/json.rst b/docs/source/user-guide/io/json.rst index 5949a0380..1ee065c44 100644 --- a/docs/source/user-guide/io/json.rst +++ b/docs/source/user-guide/io/json.rst @@ -26,4 +26,4 @@ When it comes to reading a JSON file, using :meth:`.SessionContext.read_json` is from datafusion import SessionContext ctx = SessionContext() - df = ctx.read_avro("file.json") \ No newline at end of file + df = ctx.read_json("file.json") From 0b2962a7a5369669adbd335a889373379dd8bb6a Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sun, 3 Dec 2023 07:44:53 -0500 Subject: [PATCH 102/413] Enable mimalloc local_dynamic_tls feature (#540) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f16f74ae8..ba4216839 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,7 @@ datafusion-substrait = { version = "33.0.0", optional = true } prost = "0.12" prost-types = "0.12" uuid = { version = "1.3", features = ["v4"] } -mimalloc = { version = "0.1", optional = true, default-features = false } +mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" futures = "0.3" object_store = { version = "0.7.0", features = ["aws", "gcp", "azure"] } From cce468021e369df23c524cb314404303d4677bd5 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sun, 3 Dec 2023 07:45:30 -0500 Subject: [PATCH 103/413] Enable substrait feature to be built by default in CI, for nightlies and releases (#544) --- .github/workflows/build.yml | 4 ++-- conda/recipes/bld.bat | 2 +- conda/recipes/build.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e72c4805a..968bdc1f0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -80,7 +80,7 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Build Python package - run: maturin build --release --strip + run: maturin build --release --strip --features substrait - name: List Windows wheels if: matrix.os == 'windows-latest' @@ -141,7 +141,7 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Build Python package - run: maturin build --release --strip --target aarch64-apple-darwin + run: maturin build --release --strip --target aarch64-apple-darwin --features substrait - name: List Mac wheels run: find target/wheels/ diff --git a/conda/recipes/bld.bat b/conda/recipes/bld.bat index fc3ac88b3..90626a637 100644 --- a/conda/recipes/bld.bat +++ b/conda/recipes/bld.bat @@ -17,7 +17,7 @@ # under the License. # -maturin build -vv -j %CPU_COUNT% --release --strip --manylinux off --interpreter=%PYTHON% +maturin build -vv -j %CPU_COUNT% --release --strip --features substrait --manylinux off --interpreter=%PYTHON% FOR /F "delims=" %%i IN ('dir /s /b target\wheels\*.whl') DO set datafusion_wheel=%%i diff --git a/conda/recipes/build.sh b/conda/recipes/build.sh index af640e6ca..259894313 100644 --- a/conda/recipes/build.sh +++ b/conda/recipes/build.sh @@ -77,7 +77,7 @@ EOF sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/platform-patch.py fi -maturin build -vv -j "${CPU_COUNT}" --release --strip --manylinux off --interpreter="${PYTHON}" "${_xtra_maturin_args[@]}" +maturin build -vv -j "${CPU_COUNT}" --release --strip --features substrait --manylinux off --interpreter="${PYTHON}" "${_xtra_maturin_args[@]}" "${PYTHON}" -m pip install $SRC_DIR/target/wheels/datafusion*.whl --no-deps -vv From 2889de097dbc6002030c54726dd8fd1915df4a76 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 28 Dec 2023 13:09:38 -0700 Subject: [PATCH 104/413] Prepare 34.0.0 Release (#550) --- CHANGELOG.md | 9 + Cargo.lock | 558 ++++++++++++++++++++----------------- Cargo.toml | 16 +- src/common/data_type.rs | 8 +- src/common/schema.rs | 2 +- src/context.rs | 2 +- src/dataframe.rs | 2 +- src/dataset_exec.rs | 2 +- src/expr.rs | 17 +- src/expr/aggregate.rs | 14 +- src/expr/aggregate_expr.rs | 4 +- src/expr/distinct.rs | 27 +- src/functions.rs | 19 +- src/udaf.rs | 2 +- src/udf.rs | 2 +- 15 files changed, 382 insertions(+), 302 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d875cf13d..4af70d568 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,15 @@ # DataFusion Python Changelog +## [34.0.0](https://github.com/apache/arrow-datafusion-python/tree/34.0.0) (2023-12-28) + +**Merged pull requests:** + +- Adjust visibility of crate private members & Functions [#537](https://github.com/apache/arrow-datafusion-python/pull/537) (jdye64) +- Update json.rst [#538](https://github.com/apache/arrow-datafusion-python/pull/538) (ray-andrew) +- Enable mimalloc local_dynamic_tls feature [#540](https://github.com/apache/arrow-datafusion-python/pull/540) (jdye64) +- Enable substrait feature to be built by default in CI, for nightlies … [#544](https://github.com/apache/arrow-datafusion-python/pull/544) (jdye64) + ## [33.0.0](https://github.com/apache/arrow-datafusion-python/tree/33.0.0) (2023-11-16) **Merged pull requests:** diff --git a/Cargo.lock b/Cargo.lock index c2d34115d..db776d5bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,9 +130,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8919668503a4f2d8b6da96fa7c16e93046bfb3412ffcfa1e5dc7d2e3adcb378" +checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" dependencies = [ "ahash", "arrow-arith", @@ -153,9 +153,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef983914f477d4278b068f13b3224b7d19eb2b807ac9048544d3bfebdf2554c4" +checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -168,9 +168,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6eaf89041fa5937940ae390294ece29e1db584f46d995608d6e5fe65a2e0e9b" +checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" dependencies = [ "ahash", "arrow-buffer", @@ -179,15 +179,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.2", + "hashbrown 0.14.3", "num", ] [[package]] name = "arrow-buffer" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55512d988c6fbd76e514fd3ff537ac50b0a675da5a245e4fdad77ecfd654205f" +checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" dependencies = [ "bytes", "half", @@ -196,15 +196,16 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655ee51a2156ba5375931ce21c1b2494b1d9260e6dcdc6d4db9060c37dc3325b" +checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "base64", "chrono", "comfy-table", "half", @@ -214,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258bb689997ad5b6660b3ce3638bd6b383d668ec555ed41ad7c6559cbb2e4f91" +checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" dependencies = [ "arrow-array", "arrow-buffer", @@ -233,9 +234,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dc2b9fec74763427e2e5575b8cc31ce96ba4c9b4eb05ce40e0616d9fad12461" +checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" dependencies = [ "arrow-buffer", "arrow-schema", @@ -245,9 +246,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eaa6ab203cc6d89b7eaa1ac781c1dfeef325454c5d5a0419017f95e6bafc03c" +checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" dependencies = [ "arrow-array", "arrow-buffer", @@ -259,9 +260,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb64e30d9b73f66fdc5c52d5f4cf69bbf03d62f64ffeafa0715590a5320baed7" +checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" dependencies = [ "arrow-array", "arrow-buffer", @@ -279,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a818951c0d11c428dda03e908175969c262629dd20bd0850bd6c7a8c3bfe48" +checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" dependencies = [ "arrow-array", "arrow-buffer", @@ -294,9 +295,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d664318bc05f930559fc088888f0f7174d3c5bc888c0f4f9ae8f23aa398ba3" +checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" dependencies = [ "ahash", "arrow-array", @@ -304,23 +305,23 @@ dependencies = [ "arrow-data", "arrow-schema", "half", - "hashbrown 0.14.2", + "hashbrown 0.14.3", ] [[package]] name = "arrow-schema" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaf4d737bba93da59f16129bec21e087aed0be84ff840e74146d4703879436cb" +checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" dependencies = [ "bitflags 2.4.1", ] [[package]] name = "arrow-select" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374c4c3b812ecc2118727b892252a4a4308f87a8aca1dbf09f3ce4bc578e668a" +checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" dependencies = [ "ahash", "arrow-array", @@ -332,9 +333,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15aed5624bb23da09142f58502b59c23f5bea607393298bb81dab1ce60fc769" +checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" dependencies = [ "arrow-array", "arrow-buffer", @@ -372,7 +373,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -383,7 +384,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -551,7 +552,7 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -615,9 +616,9 @@ checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" [[package]] name = "core-foundation" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ "core-foundation-sys", "libc", @@ -625,9 +626,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "core2" @@ -706,7 +707,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.2", + "hashbrown 0.14.3", "lock_api", "once_cell", "parking_lot_core", @@ -714,9 +715,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "676796427e638d85e9eadf13765705212be60b34f8fc5d3934d95184c63ca1b4" +checksum = "193fd1e7628278d0641c5122860f9a7fd6a1d77d055838d12f55d15bbe28d4d0" dependencies = [ "ahash", "apache-avro", @@ -740,9 +741,9 @@ dependencies = [ "futures", "glob", "half", - "hashbrown 0.14.2", + "hashbrown 0.14.3", "indexmap", - "itertools", + "itertools 0.12.0", "log", "num-traits", "num_cpus", @@ -763,9 +764,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e23b3d21a6531259d291bd20ce59282ea794bda1018b0a1e278c13cd52e50c" +checksum = "548bc49c4a489e3de474813831ea556dc9d368f9ed8d867b1493da42e8e9f613" dependencies = [ "ahash", "apache-avro", @@ -775,6 +776,7 @@ dependencies = [ "arrow-schema", "chrono", "half", + "libc", "num_cpus", "object_store", "parquet", @@ -784,9 +786,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4de1fd0d8db0f2b8e4f4121bfa1c7c09d3a5c08a0a65c2229cd849eb65cff855" +checksum = "ecc865657ffcf4da5ff08bdc6436a9a833bc0aa96c3254c8d18ab8a0ad4e437d" dependencies = [ "arrow", "chrono", @@ -794,7 +796,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.2", + "hashbrown 0.14.3", "log", "object_store", "parking_lot", @@ -805,14 +807,15 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18e227fe88bf6730cab378d0cd8fc4c6b2ea42bc7e414a8ea9feba7225932735" +checksum = "33c473f72d8d81a532e63f6e562ed66dd9209dfd8e433d9712abd42444ee161e" dependencies = [ "ahash", "arrow", "arrow-array", "datafusion-common", + "paste", "sqlparser", "strum", "strum_macros", @@ -820,9 +823,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6648e62ea7605b9bfcd87fdc9d67e579c3b9ac563a87734ae5fe6d79ee4547" +checksum = "cb6218318001d2f6783b7fffa17592318f65f26609d7aab605a3dd0c7c2e2618" dependencies = [ "arrow", "async-trait", @@ -830,17 +833,17 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.2", - "itertools", + "hashbrown 0.14.3", + "itertools 0.12.0", "log", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f32b8574add16a32411a9b3fb3844ac1fc09ab4e7be289f86fd56d620e4f2508" +checksum = "9e1ca7e35ca22f9dc506c2375b92054b03ccf91afe25c0a90b395a1473a09735" dependencies = [ "ahash", "arrow", @@ -855,11 +858,10 @@ dependencies = [ "datafusion-common", "datafusion-expr", "half", - "hashbrown 0.14.2", + "hashbrown 0.14.3", "hex", "indexmap", - "itertools", - "libc", + "itertools 0.12.0", "log", "md-5", "paste", @@ -873,9 +875,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "796abd77d5bfecd9e5275a99daf0ec45f5b3a793ec431349ce8211a67826fd22" +checksum = "ddde97adefcca3a55257c646ffee2a95b6cac66f74d1146a6e3a6dbb37830631" dependencies = [ "ahash", "arrow", @@ -890,9 +892,9 @@ dependencies = [ "datafusion-physical-expr", "futures", "half", - "hashbrown 0.14.2", + "hashbrown 0.14.3", "indexmap", - "itertools", + "itertools 0.12.0", "log", "once_cell", "parking_lot", @@ -904,7 +906,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "33.0.0" +version = "34.0.0" dependencies = [ "async-trait", "datafusion", @@ -923,7 +925,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.39", + "syn 2.0.41", "tokio", "url", "uuid", @@ -931,9 +933,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced70b8a5648ba7b95c61fc512183c33287ffe2c9f22ffe22700619d7d48c84f" +checksum = "a60d9d6460a64fddb8663db41da97e6b8b0bf79da42f997ebe81722731eaf0e5" dependencies = [ "arrow", "arrow-schema", @@ -945,14 +947,14 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f8fe74a4541eccbd096aa9fa3e734f1387a831613c9a3cb4f06bf7283ec77b7" +checksum = "2243428e8df8b2c5ce2c9e3af588312ae89c1b073c52de5693ec44ba29674547" dependencies = [ "async-recursion", "chrono", "datafusion", - "itertools", + "itertools 0.12.0", "object_store", "prost", "prost-types", @@ -1006,12 +1008,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f258a7194e7f7c2a7837a8913aeab7fd8c383457034fa20ce4dd3dcb813e8eb8" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1054,9 +1056,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -1117,7 +1119,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -1173,9 +1175,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "git2" @@ -1237,9 +1239,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.2" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash", "allocator-api2", @@ -1265,11 +1267,11 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "home" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1285,9 +1287,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", "http", @@ -1375,9 +1377,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -1390,7 +1392,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" dependencies = [ "equivalent", - "hashbrown 0.14.2", + "hashbrown 0.14.3", ] [[package]] @@ -1420,11 +1422,20 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" @@ -1437,9 +1448,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.65" +version = "0.3.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54c0c35952f67de54bb584e9fd912b3023117cbafc0a77d8f3dee1fb5f572fe8" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" dependencies = [ "wasm-bindgen", ] @@ -1516,9 +1527,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.150" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libflate" @@ -1586,9 +1597,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" [[package]] name = "lock_api" @@ -1677,13 +1688,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", "wasi", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1790,9 +1801,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.7.1" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f930c88a43b1c3f6e776dfe495b4afab89882dbc81530c632db2ed65451ebcb4" +checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" dependencies = [ "async-trait", "base64", @@ -1801,13 +1812,13 @@ dependencies = [ "futures", "humantime", "hyper", - "itertools", + "itertools 0.11.0", "parking_lot", "percent-encoding", "quick-xml", "rand", "reqwest", - "ring 0.16.20", + "ring", "rustls-pemfile", "serde", "serde_json", @@ -1820,9 +1831,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "ordered-float" @@ -1853,14 +1864,14 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets", + "windows-targets 0.48.5", ] [[package]] name = "parquet" -version = "48.0.1" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bfe55df96e3f02f11bf197ae37d91bb79801631f82f6195dd196ef521df3597" +checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4" dependencies = [ "ahash", "arrow-array", @@ -1876,7 +1887,7 @@ dependencies = [ "chrono", "flate2", "futures", - "hashbrown 0.14.2", + "hashbrown 0.14.3", "lz4_flex", "num", "num-bigint", @@ -1907,9 +1918,9 @@ checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "petgraph" @@ -1990,23 +2001,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] name = "proc-macro2" -version = "1.0.69" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5a410fc7882af66deb8d01d01737353cf3ad6204c408177ba494291a626312" +checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" dependencies = [ "bytes", "prost-derive", @@ -2014,13 +2025,13 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa3d084c8704911bfefb2771be2f9b6c5c0da7343a71e0021ee3c665cada738" +checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" dependencies = [ "bytes", "heck", - "itertools", + "itertools 0.11.0", "log", "multimap", "once_cell", @@ -2034,22 +2045,22 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "065717a5dfaca4a83d2fe57db3487b311365200000551d7a364e715dbf4346bc" +checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" dependencies = [ "anyhow", - "itertools", + "itertools 0.11.0", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] name = "prost-types" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8339f32236f590281e2f6368276441394fcd1b2133b549cc895d0ae80f2f9a52" +checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e" dependencies = [ "prost", ] @@ -2109,7 +2120,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2121,7 +2132,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2132,9 +2143,9 @@ checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" [[package]] name = "quick-xml" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" dependencies = [ "memchr", "serde", @@ -2277,31 +2288,16 @@ dependencies = [ [[package]] name = "ring" -version = "0.16.20" +version = "0.17.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - -[[package]] -name = "ring" -version = "0.17.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" dependencies = [ "cc", "getrandom", "libc", - "spin 0.9.8", - "untrusted 0.9.0", - "windows-sys", + "spin", + "untrusted", + "windows-sys 0.48.0", ] [[package]] @@ -2327,25 +2323,25 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.24" +version = "0.38.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ad981d6c340a49cdc40a1028d9c6084ec7e9fa33fcb839cab656a267071e234" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" dependencies = [ "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "rustls" -version = "0.21.9" +version = "0.21.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" +checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", - "ring 0.17.5", + "ring", "rustls-webpki", "sct", ] @@ -2365,8 +2361,8 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.5", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -2377,9 +2373,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] name = "same-file" @@ -2426,8 +2422,8 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.5", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -2444,22 +2440,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.192" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.192" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2493,7 +2489,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2577,9 +2573,9 @@ dependencies = [ [[package]] name = "snap" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" @@ -2598,15 +2594,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.48.0", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -2615,9 +2605,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.39.0" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" +checksum = "7c80afe31cdb649e56c0d9bb5503be9166600d68a852c38dd445636d126858e5" dependencies = [ "log", "sqlparser_derive", @@ -2625,9 +2615,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.1.1" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" +checksum = "3e9c2e1dde0efa87003e7923d94a90f46e3274ad1649f51de96812be561f041f" dependencies = [ "proc-macro2", "quote", @@ -2659,14 +2649,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] name = "substrait" -version = "0.19.0" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7299fc531294d189834eeaf7928482f311c0ada2cf0007948989cf75d0228183" +checksum = "bdbe404d89610b54be889fb6540bcc6a9e234d0991cde0a8a9e341559768aa75" dependencies = [ "git2", "heck", @@ -2680,7 +2670,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.39", + "syn 2.0.41", "typify", "walkdir", ] @@ -2704,9 +2694,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.39" +version = "2.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" dependencies = [ "proc-macro2", "quote", @@ -2750,27 +2740,27 @@ dependencies = [ "fastrand", "redox_syscall", "rustix", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "thiserror" -version = "1.0.50" +version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.50" +version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2810,9 +2800,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.34.0" +version = "1.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" +checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c" dependencies = [ "backtrace", "bytes", @@ -2823,7 +2813,7 @@ dependencies = [ "pin-project-lite", "socket2 0.5.5", "tokio-macros", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -2834,7 +2824,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2886,7 +2876,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2900,9 +2890,9 @@ dependencies = [ [[package]] name = "try-lock" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "twox-hash" @@ -2931,7 +2921,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2942,9 +2932,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.0.14" +version = "0.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e3b707a653e2915a2fc2c4ee96a3d30b9554b9435eb4cc8b5c6c74bbdd3044" +checksum = "63ed4d717aa95e598e2f9183376b060e95669ef8f444701ea6afb990fde1cf69" dependencies = [ "typify-impl", "typify-macro", @@ -2952,9 +2942,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.14" +version = "0.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d9c752192779f666e4c868672dee56a652b82c08032c7e9d23f6a845b282298" +checksum = "89057244dfade7c58af9e62beccbcbeb7a7e7701697a33b06dbe0b7331fb79cf" dependencies = [ "heck", "log", @@ -2963,16 +2953,16 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.39", + "syn 2.0.41", "thiserror", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.0.14" +version = "0.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a14defd554507e72a2bb93cd081c8b374cfed43b3d986b141ad3839d9fd6986b" +checksum = "2ddade397f5957d2cd7fb27f905a9a569db20e8e1e3ea589edce40be07b92825" dependencies = [ "proc-macro2", "quote", @@ -2980,15 +2970,15 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.39", + "syn 2.0.41", "typify-impl", ] [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" [[package]] name = "unicode-ident" @@ -3025,15 +3015,9 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "unsafe-libyaml" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" - -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b" [[package]] name = "untrusted" @@ -3043,9 +3027,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", @@ -3054,9 +3038,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.5.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" +checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" dependencies = [ "getrandom", "serde", @@ -3101,9 +3085,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.88" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7daec296f25a1bae309c0cd5c29c4b260e510e6d813c286b19eaadf409d40fce" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -3111,24 +3095,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.88" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e397f4664c0e4e428e8313a469aaa58310d302159845980fd23b0f22a847f217" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.38" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afec9963e3d0994cac82455b2b3502b81a7f40f9a0d32181f7528d9f4b43e02" +checksum = "ac36a15a220124ac510204aec1c3e5db8a22ab06fd6706d881dc6149f8ed9a12" dependencies = [ "cfg-if", "js-sys", @@ -3138,9 +3122,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.88" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5961017b3b08ad5f3fe39f1e79877f8ee7c23c5e5fd5eb80de95abc41f1f16b2" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3148,22 +3132,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.88" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5353b8dab669f5e10f5bd76df26a9360c748f054f862ff5f3f8aae0c7fb3907" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.88" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d046c5d029ba91a1ed14da14dca44b68bf2f124cfbaf741c54151fdb3e0750b" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" [[package]] name = "wasm-streams" @@ -3180,9 +3164,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.65" +version = "0.3.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db499c5f66323272151db0e666cd34f78617522fb0c1604d31a27c50c206a85" +checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" dependencies = [ "js-sys", "wasm-bindgen", @@ -3190,9 +3174,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.25.2" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" +checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" [[package]] name = "which" @@ -3243,7 +3227,7 @@ version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -3252,7 +3236,16 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", ] [[package]] @@ -3261,13 +3254,28 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] [[package]] @@ -3276,42 +3284,84 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + [[package]] name = "winreg" version = "0.50.0" @@ -3319,7 +3369,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" dependencies = [ "cfg-if", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -3333,22 +3383,22 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.26" +version = "0.7.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97e415490559a91254a2979b4829267a57d2fcd741a98eee8b722fb57289aa0" +checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.26" +version = "0.7.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd7e48ccf166952882ca8bd778a43502c64f33bf94c12ebe2a7f08e5a0f6689f" +checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index ba4216839..08f566d2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "33.0.0" +version = "34.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -37,19 +37,19 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.20", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { version = "33.0.0", features = ["pyarrow", "avro"] } -datafusion-common = { version = "33.0.0", features = ["pyarrow"] } -datafusion-expr = { version = "33.0.0" } -datafusion-optimizer = { version = "33.0.0" } -datafusion-sql = { version = "33.0.0" } -datafusion-substrait = { version = "33.0.0", optional = true } +datafusion = { version = "34.0.0", features = ["pyarrow", "avro"] } +datafusion-common = { version = "34.0.0", features = ["pyarrow"] } +datafusion-expr = { version = "34.0.0" } +datafusion-optimizer = { version = "34.0.0" } +datafusion-sql = { version = "34.0.0" } +datafusion-substrait = { version = "34.0.0", optional = true } prost = "0.12" prost-types = "0.12" uuid = { version = "1.3", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" futures = "0.3" -object_store = { version = "0.7.0", features = ["aws", "gcp", "azure"] } +object_store = { version = "0.8.0", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.8.1" syn = "2.0.37" diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 9719226a2..03248cc5b 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -291,9 +291,13 @@ impl DataTypeMap { ScalarValue::List(arr) => Ok(arr.data_type().to_owned()), ScalarValue::Struct(_, fields) => Ok(DataType::Struct(fields.to_owned())), ScalarValue::FixedSizeBinary(size, _) => Ok(DataType::FixedSizeBinary(*size)), - ScalarValue::Fixedsizelist(_, field_ref, size) => { - Ok(DataType::FixedSizeList(field_ref.to_owned(), *size)) + ScalarValue::FixedSizeList(_array_ref) => { + // The FieldRef was removed from ScalarValue::FixedSizeList in + // https://github.com/apache/arrow-datafusion/pull/8221, so we can no + // longer convert back to a DataType here + todo!() } + ScalarValue::LargeList(_) => todo!(), ScalarValue::DurationSecond(_) => Ok(DataType::Duration(TimeUnit::Second)), ScalarValue::DurationMillisecond(_) => Ok(DataType::Duration(TimeUnit::Millisecond)), ScalarValue::DurationMicrosecond(_) => Ok(DataType::Duration(TimeUnit::Microsecond)), diff --git a/src/common/schema.rs b/src/common/schema.rs index 77b0ce2ba..00113a510 100644 --- a/src/common/schema.rs +++ b/src/common/schema.rs @@ -21,7 +21,7 @@ use datafusion::arrow::datatypes::SchemaRef; use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource}; use pyo3::prelude::*; -use datafusion_optimizer::utils::split_conjunction; +use datafusion_expr::utils::split_conjunction; use super::{data_type::DataTypeMap, function::SqlFunction}; diff --git a/src/context.rs b/src/context.rs index fb7ca8385..63c48f007 100644 --- a/src/context.rs +++ b/src/context.rs @@ -377,7 +377,7 @@ impl PySessionContext { // Because create_dataframe() expects a vector of vectors of record batches // here we need to wrap the vector of record batches in an additional vector let batches = table.extract::>>(py)?; - let list_of_batches = PyArrowType::try_from(vec![batches.0])?; + let list_of_batches = PyArrowType::from(vec![batches.0]); self.create_dataframe(list_of_batches, name, py) }) } diff --git a/src/dataframe.rs b/src/dataframe.rs index ebf06f3d2..1e8790990 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -36,7 +36,7 @@ use std::sync::Arc; /// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. #[pyclass(name = "DataFrame", module = "datafusion", subclass)] #[derive(Clone)] -pub(crate) struct PyDataFrame { +pub struct PyDataFrame { df: Arc, } diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 0266ad9e3..89f73a93d 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -38,8 +38,8 @@ use datafusion::physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics, }; +use datafusion_expr::utils::conjunction; use datafusion_expr::Expr; -use datafusion_optimizer::utils::conjunction; use crate::errors::DataFusionError; use crate::pyarrow_filter_expression::PyArrowFilterExpression; diff --git a/src/expr.rs b/src/expr.rs index 3875fb381..dbb56ee99 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -24,10 +24,7 @@ use datafusion::scalar::ScalarValue; use datafusion_common::DFField; use datafusion_expr::{ col, - expr::{ - AggregateFunction, AggregateUDF, InList, InSubquery, ScalarFunction, ScalarUDF, Sort, - WindowFunction, - }, + expr::{AggregateFunction, InList, InSubquery, ScalarFunction, Sort, WindowFunction}, lit, utils::exprlist_to_fields, Between, BinaryExpr, Case, Cast, Expr, GetFieldAccess, GetIndexedField, Like, LogicalPlan, @@ -283,10 +280,8 @@ impl PyExpr { | Expr::ScalarFunction { .. } | Expr::AggregateFunction { .. } | Expr::WindowFunction { .. } - | Expr::AggregateUDF { .. } | Expr::InList { .. } | Expr::Wildcard { .. } - | Expr::ScalarUDF { .. } | Expr::Exists { .. } | Expr::InSubquery { .. } | Expr::GroupingSet(..) @@ -351,7 +346,8 @@ impl PyExpr { ScalarValue::DurationMillisecond(v) => v.into_py(py), ScalarValue::Struct(_, _) => todo!(), ScalarValue::Dictionary(_, _) => todo!(), - ScalarValue::Fixedsizelist(_, _, _) => todo!(), + ScalarValue::FixedSizeList(_) => todo!(), + ScalarValue::LargeList(_) => todo!(), }), _ => Err(py_type_err(format!( "Non Expr::Literal encountered in types: {:?}", @@ -391,9 +387,7 @@ impl PyExpr { // Expr variants containing a collection of Expr(s) for operands Expr::AggregateFunction(AggregateFunction { args, .. }) - | Expr::AggregateUDF(AggregateUDF { args, .. }) | Expr::ScalarFunction(ScalarFunction { args, .. }) - | Expr::ScalarUDF(ScalarUDF { args, .. }) | Expr::WindowFunction(WindowFunction { args, .. }) => { Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) } @@ -480,8 +474,9 @@ impl PyExpr { op, right: _, }) => format!("{op}"), - Expr::ScalarFunction(ScalarFunction { fun, args: _ }) => format!("{fun}"), - Expr::ScalarUDF(ScalarUDF { fun, .. }) => fun.name.clone(), + Expr::ScalarFunction(ScalarFunction { func_def, args: _ }) => { + func_def.name().to_string() + } Expr::Cast { .. } => "cast".to_string(), Expr::Between { .. } => "between".to_string(), Expr::Case { .. } => "case".to_string(), diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index 5ebf8c6cf..626d92c79 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -16,7 +16,7 @@ // under the License. use datafusion_common::DataFusionError; -use datafusion_expr::expr::{AggregateFunction, AggregateUDF, Alias}; +use datafusion_expr::expr::{AggregateFunction, Alias}; use datafusion_expr::logical_plan::Aggregate; use datafusion_expr::Expr; use pyo3::prelude::*; @@ -126,10 +126,9 @@ impl PyAggregate { match expr { // TODO: This Alias logic seems to be returning some strange results that we should investigate Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()), - Expr::AggregateFunction(AggregateFunction { fun: _, args, .. }) - | Expr::AggregateUDF(AggregateUDF { fun: _, args, .. }) => { - Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()) - } + Expr::AggregateFunction(AggregateFunction { + func_def: _, args, .. + }) => Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()), _ => Err(py_type_err( "Encountered a non Aggregate type in aggregation_arguments", )), @@ -139,8 +138,9 @@ impl PyAggregate { fn _agg_func_name(expr: &Expr) -> PyResult { match expr { Expr::Alias(Alias { expr, .. }) => Self::_agg_func_name(expr.as_ref()), - Expr::AggregateFunction(AggregateFunction { fun, .. }) => Ok(fun.to_string()), - Expr::AggregateUDF(AggregateUDF { fun, .. }) => Ok(fun.name.clone()), + Expr::AggregateFunction(AggregateFunction { func_def, .. }) => { + Ok(func_def.name().to_owned()) + } _ => Err(py_type_err( "Encountered a non Aggregate type in agg_func_name", )), diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs index 180105180..04ec29a15 100644 --- a/src/expr/aggregate_expr.rs +++ b/src/expr/aggregate_expr.rs @@ -41,7 +41,7 @@ impl From for PyAggregateFunction { impl Display for PyAggregateFunction { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { let args: Vec = self.aggr.args.iter().map(|expr| expr.to_string()).collect(); - write!(f, "{}({})", self.aggr.fun, args.join(", ")) + write!(f, "{}({})", self.aggr.func_def.name(), args.join(", ")) } } @@ -49,7 +49,7 @@ impl Display for PyAggregateFunction { impl PyAggregateFunction { /// Get the aggregate type, such as "MIN", or "MAX" fn aggregate_type(&self) -> String { - format!("{}", self.aggr.fun) + self.aggr.func_def.name().to_string() } /// is this a distinct aggregate such as `COUNT(DISTINCT expr)` diff --git a/src/expr/distinct.rs b/src/expr/distinct.rs index 681ae953b..5d3a0b459 100644 --- a/src/expr/distinct.rs +++ b/src/expr/distinct.rs @@ -44,12 +44,22 @@ impl From for PyDistinct { impl Display for PyDistinct { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Distinct + match &self.distinct { + Distinct::All(input) => write!( + f, + "Distinct ALL \nInput: {:?}", - self.distinct.input, - ) + input, + ), + Distinct::On(distinct_on) => { + write!( + f, + "Distinct ON + \nInput: {:?}", + distinct_on.input, + ) + } + } } } @@ -71,7 +81,12 @@ impl PyDistinct { impl LogicalNode for PyDistinct { fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.distinct.input).clone())] + match &self.distinct { + Distinct::All(input) => vec![PyLogicalPlan::from(input.as_ref().clone())], + Distinct::On(distinct_on) => { + vec![PyLogicalPlan::from(distinct_on.input.as_ref().clone())] + } + } } fn to_variant(&self, py: Python) -> PyResult { diff --git a/src/functions.rs b/src/functions.rs index b8c825552..d1f3e807b 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -23,11 +23,11 @@ use crate::expr::conditional_expr::PyCaseBuilder; use crate::expr::window::PyWindowFrame; use crate::expr::PyExpr; use datafusion::execution::FunctionRegistry; -use datafusion_common::Column; +use datafusion_common::{Column, TableReference}; use datafusion_expr::expr::Alias; use datafusion_expr::{ aggregate_function, - expr::{AggregateFunction, ScalarFunction, Sort, WindowFunction}, + expr::{AggregateFunction, AggregateFunctionDefinition, ScalarFunction, Sort, WindowFunction}, lit, window_function::find_df_window_func, BuiltinScalarFunction, Expr, @@ -88,8 +88,9 @@ fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyRes /// Creates a new Alias Expr #[pyfunction] fn alias(expr: PyExpr, name: &str) -> PyResult { + let relation: Option = None; Ok(PyExpr { - expr: datafusion_expr::Expr::Alias(Alias::new(expr.expr, name)), + expr: datafusion_expr::Expr::Alias(Alias::new(expr.expr, relation, name)), }) } @@ -109,7 +110,9 @@ fn col(name: &str) -> PyResult { fn count_star() -> PyResult { Ok(PyExpr { expr: Expr::AggregateFunction(AggregateFunction { - fun: aggregate_function::AggregateFunction::Count, + func_def: datafusion_expr::expr::AggregateFunctionDefinition::BuiltIn( + aggregate_function::AggregateFunction::Count, + ), args: vec![lit(1)], distinct: false, filter: None, @@ -181,7 +184,9 @@ macro_rules! scalar_function { #[pyo3(signature = (*args))] fn $NAME(args: Vec) -> PyExpr { let expr = datafusion_expr::Expr::ScalarFunction(ScalarFunction { - fun: BuiltinScalarFunction::$FUNC, + func_def: datafusion_expr::ScalarFunctionDefinition::BuiltIn( + BuiltinScalarFunction::$FUNC, + ), args: args.into_iter().map(|e| e.into()).collect(), }); expr.into() @@ -199,7 +204,9 @@ macro_rules! aggregate_function { #[pyo3(signature = (*args, distinct=false))] fn $NAME(args: Vec, distinct: bool) -> PyExpr { let expr = datafusion_expr::Expr::AggregateFunction(AggregateFunction { - fun: datafusion_expr::aggregate_function::AggregateFunction::$FUNC, + func_def: AggregateFunctionDefinition::BuiltIn( + datafusion_expr::aggregate_function::AggregateFunction::$FUNC, + ), args: args.into_iter().map(|e| e.into()).collect(), distinct, filter: None, diff --git a/src/udaf.rs b/src/udaf.rs index 596ed6904..5c43b6710 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -172,6 +172,6 @@ impl PyAggregateUDF { } fn __repr__(&self) -> PyResult { - Ok(format!("AggregateUDF({})", self.function.name)) + Ok(format!("AggregateUDF({})", self.function.name())) } } diff --git a/src/udf.rs b/src/udf.rs index bba8ae551..af17b57e8 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -95,6 +95,6 @@ impl PyScalarUDF { } fn __repr__(&self) -> PyResult { - Ok(format!("ScalarUDF({})", self.function.name)) + Ok(format!("ScalarUDF({})", self.function.name())) } } From 76d7fcffdd9d8664a003b76754033ebad4a15847 Mon Sep 17 00:00:00 2001 From: Dan Lovell Date: Thu, 28 Dec 2023 15:10:46 -0500 Subject: [PATCH 105/413] feat: udaf: enable multiple column input (#546) --- datafusion/__init__.py | 2 ++ src/udaf.rs | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/__init__.py b/datafusion/__init__.py index c854f3f9d..df53b396a 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -213,6 +213,8 @@ def udaf(accum, input_type, return_type, state_type, volatility, name=None): ) if name is None: name = accum.__qualname__.lower() + if isinstance(input_type, pa.lib.DataType): + input_type = [input_type] return AggregateUDF( name=name, accumulator=accum, diff --git a/src/udaf.rs b/src/udaf.rs index 5c43b6710..0e7a8deab 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -148,14 +148,14 @@ impl PyAggregateUDF { fn new( name: &str, accumulator: PyObject, - input_type: PyArrowType, + input_type: PyArrowType>, return_type: PyArrowType, state_type: PyArrowType>, volatility: &str, ) -> PyResult { let function = create_udaf( name, - vec![input_type.0], + input_type.0, Arc::new(return_type.0), parse_volatility(volatility)?, to_rust_accumulator(accumulator), From b22f82f3055941dc3599c9a18458a2de163ff4c0 Mon Sep 17 00:00:00 2001 From: Chih Wang Date: Fri, 29 Dec 2023 04:11:31 +0800 Subject: [PATCH 106/413] Add missing array functions (#551) * Add array_append, array_concat and array_cat * Add tests for array functions array_append, array_concat and array_cat * Add array_dims and list_dims * Add tests for array_dims and list_dims * Add array_element, array_extract, list_element and list_extract * Add tests for array_element, array_extract, list_element and list_extract * Add array_length and list_length --- datafusion/tests/test_functions.py | 64 ++++++++++++++++++++++++++++++ src/functions.rs | 27 +++++++++++++ 2 files changed, 91 insertions(+) diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py index be2a2f1f5..d0514f892 100644 --- a/datafusion/tests/test_functions.py +++ b/datafusion/tests/test_functions.py @@ -25,6 +25,8 @@ from datafusion import functions as f from datafusion import literal +np.seterr(invalid="ignore") + @pytest.fixture def df(): @@ -197,6 +199,68 @@ def test_math_functions(): ) +def test_array_functions(): + data = [[1.0, 2.0, 3.0], [4.0, 5.0], [6.0]] + ctx = SessionContext() + batch = pa.RecordBatch.from_arrays( + [np.array(data, dtype=object)], names=["arr"] + ) + df = ctx.create_dataframe([[batch]]) + + col = column("arr") + test_items = [ + [ + f.array_append(col, literal(99.0)), + lambda: [np.append(arr, 99.0) for arr in data], + ], + [ + f.array_concat(col, col), + lambda: [np.concatenate([arr, arr]) for arr in data], + ], + [ + f.array_cat(col, col), + lambda: [np.concatenate([arr, arr]) for arr in data], + ], + [ + f.array_dims(col), + lambda: [[len(r)] for r in data], + ], + [ + f.list_dims(col), + lambda: [[len(r)] for r in data], + ], + [ + f.array_element(col, literal(1)), + lambda: [r[0] for r in data], + ], + [ + f.array_extract(col, literal(1)), + lambda: [r[0] for r in data], + ], + [ + f.list_element(col, literal(1)), + lambda: [r[0] for r in data], + ], + [ + f.list_extract(col, literal(1)), + lambda: [r[0] for r in data], + ], + [ + f.array_length(col), + lambda: [len(r) for r in data], + ], + [ + f.list_length(col), + lambda: [len(r) for r in data], + ], + ] + + for stmt, py_expr in test_items: + query_result = df.select(stmt).collect()[0].column(0).tolist() + for a, b in zip(query_result, py_expr()): + np.testing.assert_array_almost_equal(a, b) + + def test_string_functions(df): df = df.select( f.ascii(column("a")), diff --git a/src/functions.rs b/src/functions.rs index d1f3e807b..3dc5322aa 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -357,6 +357,19 @@ scalar_function!(random, Random); scalar_function!(encode, Encode); scalar_function!(decode, Decode); +// Array Functions +scalar_function!(array_append, ArrayAppend); +scalar_function!(array_concat, ArrayConcat); +scalar_function!(array_cat, ArrayConcat); +scalar_function!(array_dims, ArrayDims); +scalar_function!(list_dims, ArrayDims); +scalar_function!(array_element, ArrayElement); +scalar_function!(array_extract, ArrayElement); +scalar_function!(list_element, ArrayElement); +scalar_function!(list_extract, ArrayElement); +scalar_function!(array_length, ArrayLength); +scalar_function!(list_length, ArrayLength); + aggregate_function!(approx_distinct, ApproxDistinct); aggregate_function!(approx_median, ApproxMedian); aggregate_function!(approx_percentile_cont, ApproxPercentileCont); @@ -546,5 +559,19 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { //Binary String Functions m.add_wrapped(wrap_pyfunction!(encode))?; m.add_wrapped(wrap_pyfunction!(decode))?; + + // Array Functions + m.add_wrapped(wrap_pyfunction!(array_append))?; + m.add_wrapped(wrap_pyfunction!(array_concat))?; + m.add_wrapped(wrap_pyfunction!(array_cat))?; + m.add_wrapped(wrap_pyfunction!(array_dims))?; + m.add_wrapped(wrap_pyfunction!(list_dims))?; + m.add_wrapped(wrap_pyfunction!(array_element))?; + m.add_wrapped(wrap_pyfunction!(array_extract))?; + m.add_wrapped(wrap_pyfunction!(list_element))?; + m.add_wrapped(wrap_pyfunction!(list_extract))?; + m.add_wrapped(wrap_pyfunction!(array_length))?; + m.add_wrapped(wrap_pyfunction!(list_length))?; + Ok(()) } From 967bbffb93123df0109d82f3a4b36ca0528d4fdb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 6 Jan 2024 20:04:11 -0800 Subject: [PATCH 107/413] build(deps): bump syn from 2.0.41 to 2.0.43 (#559) Bumps [syn](https://github.com/dtolnay/syn) from 2.0.41 to 2.0.43. - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.41...2.0.43) --- updated-dependencies: - dependency-name: syn dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 46 +++++++++++++++++++++++----------------------- Cargo.toml | 2 +- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index db776d5bd..c8b82d61e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -373,7 +373,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -384,7 +384,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -925,7 +925,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.41", + "syn 2.0.43", "tokio", "url", "uuid", @@ -1119,7 +1119,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2001,7 +2001,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2053,7 +2053,7 @@ dependencies = [ "itertools 0.11.0", "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2120,7 +2120,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2132,7 +2132,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2455,7 +2455,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2489,7 +2489,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2649,7 +2649,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2670,7 +2670,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.41", + "syn 2.0.43", "typify", "walkdir", ] @@ -2694,9 +2694,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.41" +version = "2.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" +checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53" dependencies = [ "proc-macro2", "quote", @@ -2760,7 +2760,7 @@ checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2824,7 +2824,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2876,7 +2876,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2921,7 +2921,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] @@ -2953,7 +2953,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.41", + "syn 2.0.43", "thiserror", "unicode-ident", ] @@ -2970,7 +2970,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.41", + "syn 2.0.43", "typify-impl", ] @@ -3104,7 +3104,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", "wasm-bindgen-shared", ] @@ -3138,7 +3138,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3398,7 +3398,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.41", + "syn 2.0.43", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 08f566d2d..0c4e3853d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,7 +52,7 @@ futures = "0.3" object_store = { version = "0.8.0", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.8.1" -syn = "2.0.37" +syn = "2.0.43" url = "2.2" [build-dependencies] From 3f8e862508194e1db8e6f893bf5c67cab457fef0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 6 Jan 2024 20:04:26 -0800 Subject: [PATCH 108/413] build(deps): bump tokio from 1.35.0 to 1.35.1 (#558) Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.35.0 to 1.35.1. - [Release notes](https://github.com/tokio-rs/tokio/releases) - [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.35.0...tokio-1.35.1) --- updated-dependencies: - dependency-name: tokio dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c8b82d61e..640d1c56d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2800,9 +2800,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.35.0" +version = "1.35.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c" +checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" dependencies = [ "backtrace", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 0c4e3853d..d0a1b8b1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ protoc = [ "datafusion-substrait/protoc" ] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } +tokio = { version = "1.35", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.20", features = ["extension-module", "abi3", "abi3-py38"] } datafusion = { version = "34.0.0", features = ["pyarrow", "avro"] } From 767d43e05d9e398df713bfc0545635f3b048ca17 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 6 Jan 2024 21:15:41 -0800 Subject: [PATCH 109/413] build(deps): bump async-trait from 0.1.74 to 0.1.77 (#556) Bumps [async-trait](https://github.com/dtolnay/async-trait) from 0.1.74 to 0.1.77. - [Release notes](https://github.com/dtolnay/async-trait/releases) - [Commits](https://github.com/dtolnay/async-trait/commits) --- updated-dependencies: - dependency-name: async-trait dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 58 +++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 640d1c56d..e3bad1a38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -373,18 +373,18 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] name = "async-trait" -version = "0.1.74" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -925,7 +925,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.43", + "syn 2.0.48", "tokio", "url", "uuid", @@ -1119,7 +1119,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2001,14 +2001,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] name = "proc-macro2" -version = "1.0.70" +version = "1.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" dependencies = [ "unicode-ident", ] @@ -2053,7 +2053,7 @@ dependencies = [ "itertools 0.11.0", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2120,7 +2120,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2132,7 +2132,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2153,9 +2153,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -2455,7 +2455,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2489,7 +2489,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2649,7 +2649,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2670,7 +2670,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.43", + "syn 2.0.48", "typify", "walkdir", ] @@ -2694,9 +2694,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.43" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -2760,7 +2760,7 @@ checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2824,7 +2824,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2876,7 +2876,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2921,7 +2921,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] @@ -2953,7 +2953,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.43", + "syn 2.0.48", "thiserror", "unicode-ident", ] @@ -2970,7 +2970,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.43", + "syn 2.0.48", "typify-impl", ] @@ -3104,7 +3104,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", "wasm-bindgen-shared", ] @@ -3138,7 +3138,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3398,7 +3398,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.48", ] [[package]] From 7de4316b70dd1b2aeed68bfee293d95a2cc384b5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 6 Jan 2024 23:09:01 -0800 Subject: [PATCH 110/413] build(deps): bump pyo3 from 0.20.0 to 0.20.2 (#557) Bumps [pyo3](https://github.com/pyo3/pyo3) from 0.20.0 to 0.20.2. - [Release notes](https://github.com/pyo3/pyo3/releases) - [Changelog](https://github.com/PyO3/pyo3/blob/main/CHANGELOG.md) - [Commits](https://github.com/pyo3/pyo3/compare/v0.20.0...v0.20.2) --- updated-dependencies: - dependency-name: pyo3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e3bad1a38..e2ed3d3f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2076,9 +2076,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e8453b658fe480c3e70c8ed4e3d3ec33eb74988bd186561b0cc66b85c3bc4b" +checksum = "9a89dc7a5850d0e983be1ec2a463a171d20990487c3cfcd68b5363f1ee3d6fe0" dependencies = [ "cfg-if", "indoc", @@ -2093,9 +2093,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96fe70b176a89cff78f2fa7b3c930081e163d5379b4dcdf993e3ae29ca662e5" +checksum = "07426f0d8fe5a601f26293f300afd1a7b1ed5e78b2a705870c5f30893c5163be" dependencies = [ "once_cell", "target-lexicon", @@ -2103,9 +2103,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "214929900fd25e6604661ed9cf349727c8920d47deff196c4e28165a6ef2a96b" +checksum = "dbb7dec17e17766b46bca4f1a4215a85006b4c2ecde122076c562dd058da6cf1" dependencies = [ "libc", "pyo3-build-config", @@ -2113,9 +2113,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac53072f717aa1bfa4db832b39de8c875b7c7af4f4a6fe93cdbf9264cf8383b" +checksum = "05f738b4e40d50b5711957f142878cfa0f28e054aa0ebdfc3fd137a843f74ed3" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2125,9 +2125,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7774b5a8282bd4f25f803b1f0d945120be959a36c72e08e7cd031c792fdfd424" +checksum = "0fc910d4851847827daf9d6cdd4a823fbdaab5b8818325c5e97a86da79e8881f" dependencies = [ "heck", "proc-macro2", From bef6cb66599588c096dae59ddfd707053e5741cd Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 1 Feb 2024 05:59:02 -0700 Subject: [PATCH 111/413] Prepare 35.0.0-rc1 (#564) --- CHANGELOG.md | 9 + Cargo.lock | 442 +++++++++++++++++++---------------- Cargo.toml | 16 +- datafusion/tests/test_sql.py | 3 +- src/common/data_type.rs | 1 + src/context.rs | 7 +- src/expr/window.rs | 6 +- src/functions.rs | 11 +- 8 files changed, 274 insertions(+), 221 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4af70d568..d5e3abbe2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,15 @@ # DataFusion Python Changelog +## [35.0.0](https://github.com/apache/arrow-datafusion-python/tree/35.0.0) (2024-01-20) + +**Merged pull requests:** + +- build(deps): bump syn from 2.0.41 to 2.0.43 [#559](https://github.com/apache/arrow-datafusion-python/pull/559) (dependabot[bot]) +- build(deps): bump tokio from 1.35.0 to 1.35.1 [#558](https://github.com/apache/arrow-datafusion-python/pull/558) (dependabot[bot]) +- build(deps): bump async-trait from 0.1.74 to 0.1.77 [#556](https://github.com/apache/arrow-datafusion-python/pull/556) (dependabot[bot]) +- build(deps): bump pyo3 from 0.20.0 to 0.20.2 [#557](https://github.com/apache/arrow-datafusion-python/pull/557) (dependabot[bot]) + ## [34.0.0](https://github.com/apache/arrow-datafusion-python/tree/34.0.0) (2023-12-28) **Merged pull requests:** diff --git a/Cargo.lock b/Cargo.lock index e2ed3d3f4..e6aa6050a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,9 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "ahash" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", "const-random", @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.75" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" [[package]] name = "apache-avro" @@ -130,11 +130,10 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" dependencies = [ - "ahash", "arrow-arith", "arrow-array", "arrow-buffer", @@ -153,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" dependencies = [ "arrow-array", "arrow-buffer", @@ -168,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" dependencies = [ "ahash", "arrow-buffer", @@ -185,9 +184,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" dependencies = [ "bytes", "half", @@ -196,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +214,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" dependencies = [ "arrow-array", "arrow-buffer", @@ -234,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" dependencies = [ "arrow-buffer", "arrow-schema", @@ -246,9 +245,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" dependencies = [ "arrow-array", "arrow-buffer", @@ -256,13 +255,14 @@ dependencies = [ "arrow-data", "arrow-schema", "flatbuffers", + "lz4_flex", ] [[package]] name = "arrow-json" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" dependencies = [ "arrow-array", "arrow-buffer", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" dependencies = [ "arrow-array", "arrow-buffer", @@ -295,9 +295,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" dependencies = [ "ahash", "arrow-array", @@ -310,18 +310,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", ] [[package]] name = "arrow-select" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" dependencies = [ "ahash", "arrow-array", @@ -333,9 +333,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -349,9 +349,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5" +checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" dependencies = [ "bzip2", "flate2", @@ -419,9 +419,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.5" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bitflags" @@ -431,9 +431,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" [[package]] name = "blake2" @@ -557,9 +557,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76" +checksum = "91d7b79e99bfaa0d47da0687c43aa3b7381938a62ad3a6498599039321f660b7" dependencies = [ "chrono", "chrono-tz-build", @@ -641,9 +641,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] @@ -715,14 +715,15 @@ dependencies = [ [[package]] name = "datafusion" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193fd1e7628278d0641c5122860f9a7fd6a1d77d055838d12f55d15bbe28d4d0" +checksum = "4328f5467f76d890fe3f924362dbc3a838c6a733f762b32d87f9e0b7bef5fb49" dependencies = [ "ahash", "apache-avro", "arrow", "arrow-array", + "arrow-ipc", "arrow-schema", "async-compression", "async-trait", @@ -764,9 +765,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "548bc49c4a489e3de474813831ea556dc9d368f9ed8d867b1493da42e8e9f613" +checksum = "d29a7752143b446db4a2cccd9a6517293c6b97e8c39e520ca43ccd07135a4f7e" dependencies = [ "ahash", "apache-avro", @@ -786,9 +787,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc865657ffcf4da5ff08bdc6436a9a833bc0aa96c3254c8d18ab8a0ad4e437d" +checksum = "2d447650af16e138c31237f53ddaef6dd4f92f0e2d3f2f35d190e16c214ca496" dependencies = [ "arrow", "chrono", @@ -807,9 +808,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c473f72d8d81a532e63f6e562ed66dd9209dfd8e433d9712abd42444ee161e" +checksum = "d8d19598e48a498850fb79f97a9719b1f95e7deb64a7a06f93f313e8fa1d524b" dependencies = [ "ahash", "arrow", @@ -823,9 +824,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6218318001d2f6783b7fffa17592318f65f26609d7aab605a3dd0c7c2e2618" +checksum = "8b7feb0391f1fc75575acb95b74bfd276903dc37a5409fcebe160bc7ddff2010" dependencies = [ "arrow", "async-trait", @@ -841,9 +842,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1ca7e35ca22f9dc506c2375b92054b03ccf91afe25c0a90b395a1473a09735" +checksum = "e911bca609c89a54e8f014777449d8290327414d3e10c57a3e3c2122e38878d0" dependencies = [ "ahash", "arrow", @@ -875,9 +876,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddde97adefcca3a55257c646ffee2a95b6cac66f74d1146a6e3a6dbb37830631" +checksum = "e96b546b8a02e9c2ab35ac6420d511f12a4701950c1eb2e568c122b4fefb0be3" dependencies = [ "ahash", "arrow", @@ -906,7 +907,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "34.0.0" +version = "35.0.0" dependencies = [ "async-trait", "datafusion", @@ -933,9 +934,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a60d9d6460a64fddb8663db41da97e6b8b0bf79da42f997ebe81722731eaf0e5" +checksum = "2d18d36f260bbbd63aafdb55339213a23d540d3419810575850ef0a798a6b768" dependencies = [ "arrow", "arrow-schema", @@ -947,9 +948,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2243428e8df8b2c5ce2c9e3af588312ae89c1b073c52de5693ec44ba29674547" +checksum = "dad6bef77af3d8a697ae63ffbcb5aa66b74cd08ea93a31e2e757da75b2f1452f" dependencies = [ "async-recursion", "chrono", @@ -959,7 +960,6 @@ dependencies = [ "prost", "prost-types", "substrait", - "tokio", ] [[package]] @@ -1065,9 +1065,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -1080,9 +1080,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -1090,15 +1090,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -1107,15 +1107,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", @@ -1124,21 +1124,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -1164,9 +1164,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", @@ -1185,7 +1185,7 @@ version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "libc", "libgit2-sys", "log", @@ -1200,9 +1200,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -1255,9 +1255,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" [[package]] name = "hex" @@ -1316,9 +1316,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.27" +version = "0.14.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" dependencies = [ "bytes", "futures-channel", @@ -1331,7 +1331,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.10", + "socket2", "tokio", "tower-service", "tracing", @@ -1354,9 +1354,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.58" +version = "0.1.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1448,9 +1448,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.66" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" dependencies = [ "wasm-bindgen", ] @@ -1527,9 +1527,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.151" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "libflate" @@ -1585,9 +1585,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.12" +version = "1.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +checksum = "295c17e837573c8c821dbaeb3cceb3d745ad082f7572191409e69cbc1b3fd050" dependencies = [ "cc", "libc", @@ -1597,9 +1597,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" @@ -1619,9 +1619,9 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "lz4_flex" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" +checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" dependencies = [ "twox-hash", ] @@ -1649,9 +1649,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.6.4" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "memoffset" @@ -1792,18 +1792,18 @@ dependencies = [ [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] [[package]] name = "object_store" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" +checksum = "d139f545f64630e2e3688fd9f81c470888ab01edeb72d13b4e86c566f1130000" dependencies = [ "async-trait", "base64", @@ -1812,14 +1812,14 @@ dependencies = [ "futures", "humantime", "hyper", - "itertools 0.11.0", + "itertools 0.12.0", "parking_lot", "percent-encoding", "quick-xml", "rand", "reqwest", "ring", - "rustls-pemfile", + "rustls-pemfile 2.0.0", "serde", "serde_json", "snafu", @@ -1835,6 +1835,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + [[package]] name = "ordered-float" version = "2.10.1" @@ -1869,9 +1875,9 @@ dependencies = [ [[package]] name = "parquet" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4" +checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" dependencies = [ "ahash", "arrow-array", @@ -1887,6 +1893,7 @@ dependencies = [ "chrono", "flate2", "futures", + "half", "hashbrown 0.14.3", "lz4_flex", "num", @@ -1984,9 +1991,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" [[package]] name = "ppv-lite86" @@ -1996,9 +2003,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", "syn 2.0.48", @@ -2246,9 +2253,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.22" +version = "0.11.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" +checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41" dependencies = [ "base64", "bytes", @@ -2268,7 +2275,8 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", - "rustls-pemfile", + "rustls-native-certs", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", @@ -2282,7 +2290,6 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots", "winreg", ] @@ -2323,11 +2330,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.28" +version = "0.38.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", @@ -2346,6 +2353,18 @@ dependencies = [ "sct", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -2355,6 +2374,22 @@ dependencies = [ "base64", ] +[[package]] +name = "rustls-pemfile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e4980fa29e4c4b212ffb3db068a564cbf560e51d3944b7c88bd8bf5bec64f4" +dependencies = [ + "base64", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -2386,6 +2421,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "schemars" version = "0.8.16" @@ -2426,11 +2470,34 @@ dependencies = [ "untrusted", ] +[[package]] +name = "security-framework" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" [[package]] name = "seq-macro" @@ -2440,18 +2507,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.193" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.193" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" dependencies = [ "proc-macro2", "quote", @@ -2471,9 +2538,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" dependencies = [ "itoa", "ryu", @@ -2506,9 +2573,9 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.27" +version = "0.9.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" +checksum = "b1bf28c79a99f70ee1f1d83d10c875d2e70618417fda01ad1785e027579d9d38" dependencies = [ "indexmap", "itoa", @@ -2545,9 +2612,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.2" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "snafu" @@ -2577,16 +2644,6 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" -[[package]] -name = "socket2" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "socket2" version = "0.5.5" @@ -2605,9 +2662,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.40.0" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c80afe31cdb649e56c0d9bb5503be9166600d68a852c38dd445636d126858e5" +checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" dependencies = [ "log", "sqlparser_derive", @@ -2615,13 +2672,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e9c2e1dde0efa87003e7923d94a90f46e3274ad1649f51de96812be561f041f" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.48", ] [[package]] @@ -2654,9 +2711,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.20.3" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdbe404d89610b54be889fb6540bcc6a9e234d0991cde0a8a9e341559768aa75" +checksum = "5478fbd0313a9b0915a1c0e7ebf15b5fed7d7c6dd7229b4f5e32ce75b10f256a" dependencies = [ "git2", "heck", @@ -2726,37 +2783,37 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.12" +version = "0.12.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" +checksum = "69758bda2e78f098e4ccb393021a0963bb3442eac05f135c30f61b7370bbafae" [[package]] name = "tempfile" -version = "3.8.1" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", "fastrand", "redox_syscall", "rustix", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "thiserror" -version = "1.0.51" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.51" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", @@ -2809,9 +2866,8 @@ dependencies = [ "libc", "mio", "num_cpus", - "parking_lot", "pin-project-lite", - "socket2 0.5.5", + "socket2", "tokio-macros", "windows-sys 0.48.0", ] @@ -2976,9 +3032,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" @@ -3038,9 +3094,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom", "serde", @@ -3085,9 +3141,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -3095,9 +3151,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" dependencies = [ "bumpalo", "log", @@ -3110,9 +3166,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac36a15a220124ac510204aec1c3e5db8a22ab06fd6706d881dc6149f8ed9a12" +checksum = "bde2032aeb86bdfaecc8b261eef3cba735cc426c1f3a3416d1e0791be95fc461" dependencies = [ "cfg-if", "js-sys", @@ -3122,9 +3178,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3132,9 +3188,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" dependencies = [ "proc-macro2", "quote", @@ -3145,9 +3201,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" +checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" [[package]] name = "wasm-streams" @@ -3164,20 +3220,14 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.66" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" +checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" dependencies = [ "js-sys", "wasm-bindgen", ] -[[package]] -name = "webpki-roots" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" - [[package]] name = "which" version = "4.4.2" @@ -3223,11 +3273,11 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.51.1" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.48.5", + "windows-targets 0.52.0", ] [[package]] @@ -3383,18 +3433,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.31" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.31" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index d0a1b8b1b..6c47484b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "34.0.0" +version = "35.0.0" homepage = "https://github.com/apache/arrow-datafusion-python" repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow "] @@ -37,19 +37,19 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.35", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.20", features = ["extension-module", "abi3", "abi3-py38"] } -datafusion = { version = "34.0.0", features = ["pyarrow", "avro"] } -datafusion-common = { version = "34.0.0", features = ["pyarrow"] } -datafusion-expr = { version = "34.0.0" } -datafusion-optimizer = { version = "34.0.0" } -datafusion-sql = { version = "34.0.0" } -datafusion-substrait = { version = "34.0.0", optional = true } +datafusion = { version = "35.0.0", features = ["pyarrow", "avro"] } +datafusion-common = { version = "35.0.0", features = ["pyarrow"] } +datafusion-expr = "35.0.0" +datafusion-optimizer = "35.0.0" +datafusion-sql = "35.0.0" +datafusion-substrait = { version = "35.0.0", optional = true } prost = "0.12" prost-types = "0.12" uuid = { version = "1.3", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" futures = "0.3" -object_store = { version = "0.8.0", features = ["aws", "gcp", "azure"] } +object_store = { version = "0.9.0", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.8.1" syn = "2.0.43" diff --git a/datafusion/tests/test_sql.py b/datafusion/tests/test_sql.py index 19a2ad2cf..0c6b26727 100644 --- a/datafusion/tests/test_sql.py +++ b/datafusion/tests/test_sql.py @@ -195,7 +195,7 @@ def test_register_json(ctx, tmp_path): assert result.to_pydict() == {"cnt": [3]} result = ctx.sql("SELECT * FROM json3").collect() - result = pa.Table.from_batches(result) + result = pa.Table.from_batches(result, alternative_schema) assert result.schema == alternative_schema with pytest.raises( @@ -224,7 +224,6 @@ def test_register_avro(ctx): "alltypes_plain_schema", path, schema=alternative_schema, - infinite=False, ) result = ctx.sql("SELECT * FROM alltypes_plain_schema").collect() result = pa.Table.from_batches(result) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 03248cc5b..6059768e3 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use datafusion::arrow::array::Array; use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit}; use datafusion_common::{DataFusionError, ScalarValue}; use pyo3::{exceptions::PyValueError, prelude::*}; diff --git a/src/context.rs b/src/context.rs index 63c48f007..9053e4f2e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -569,8 +569,7 @@ impl PySessionContext { path, schema=None, file_extension=".avro", - table_partition_cols=vec![], - infinite=false))] + table_partition_cols=vec![]))] pub fn register_avro( &mut self, name: &str, @@ -578,7 +577,6 @@ impl PySessionContext { schema: Option>, file_extension: &str, table_partition_cols: Vec<(String, String)>, - infinite: bool, py: Python, ) -> PyResult<()> { let path = path @@ -586,8 +584,7 @@ impl PySessionContext { .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; let mut options = AvroReadOptions::default() - .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) - .mark_infinite(infinite); + .table_partition_cols(convert_table_partition_cols(table_partition_cols)?); options.file_extension = file_extension; options.schema = schema.as_ref().map(|x| &x.0); diff --git a/src/expr/window.rs b/src/expr/window.rs index 3a5760c15..786651194 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -210,11 +210,7 @@ impl PyWindowFrame { }, }; Ok(PyWindowFrame { - window_frame: WindowFrame { - units, - start_bound, - end_bound, - }, + window_frame: WindowFrame::new_bounds(units, start_bound, end_bound), }) } diff --git a/src/functions.rs b/src/functions.rs index 3dc5322aa..045e7e01e 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -27,10 +27,11 @@ use datafusion_common::{Column, TableReference}; use datafusion_expr::expr::Alias; use datafusion_expr::{ aggregate_function, - expr::{AggregateFunction, AggregateFunctionDefinition, ScalarFunction, Sort, WindowFunction}, - lit, - window_function::find_df_window_func, - BuiltinScalarFunction, Expr, + expr::{ + find_df_window_func, AggregateFunction, AggregateFunctionDefinition, ScalarFunction, Sort, + WindowFunction, + }, + lit, BuiltinScalarFunction, Expr, WindowFunctionDefinition, }; #[pyfunction] @@ -143,7 +144,7 @@ fn window( ctx.and_then(|ctx| { ctx.ctx .udaf(name) - .map(datafusion_expr::WindowFunction::AggregateUDF) + .map(WindowFunctionDefinition::AggregateUDF) .ok() }) }); From 5296c0cfcf8e6fcb654d5935252469bf04f929e9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 10 Feb 2024 10:23:30 -0700 Subject: [PATCH 112/413] Allow PyDataFrame to be used from other projects (#582) * Allow PyDataFrame to be used from other projects * revert --- src/context.rs | 4 ++-- src/lib.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/context.rs b/src/context.rs index 9053e4f2e..f34fbce81 100644 --- a/src/context.rs +++ b/src/context.rs @@ -842,7 +842,7 @@ impl PySessionContext { } } -fn convert_table_partition_cols( +pub fn convert_table_partition_cols( table_partition_cols: Vec<(String, String)>, ) -> Result, DataFusionError> { table_partition_cols @@ -856,7 +856,7 @@ fn convert_table_partition_cols( .collect::, _>>() } -fn parse_file_compression_type( +pub fn parse_file_compression_type( file_compression_type: Option, ) -> Result { FileCompressionType::from_str(&*file_compression_type.unwrap_or("".to_string()).as_str()) diff --git a/src/lib.rs b/src/lib.rs index 5e57db9cf..49c325a53 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,7 +37,7 @@ mod config; #[allow(clippy::borrow_deref_ref)] pub mod context; #[allow(clippy::borrow_deref_ref)] -mod dataframe; +pub mod dataframe; mod dataset; mod dataset_exec; pub mod errors; From 476ca228cfcf75012a73932c4fb1fb53271c1e8a Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:34:04 +0100 Subject: [PATCH 113/413] docs: Add ASF attribution (#580) * Add ASF attribution and update docs README * Fix formatting * Fix unit test * Undo changes in functions & tests and requirements.txt --- docs/README.md | 45 ++++++++++++++++-------- docs/requirements.txt | 2 +- docs/source/_templates/docs-sidebar.html | 2 +- docs/source/_templates/layout.html | 21 +++++++++++ docs/source/conf.py | 6 ++-- 5 files changed, 57 insertions(+), 19 deletions(-) diff --git a/docs/README.md b/docs/README.md index 85278588e..8cb101d92 100644 --- a/docs/README.md +++ b/docs/README.md @@ -28,15 +28,31 @@ when changes are merged to the main branch. It's recommended to install build dependencies and build the documentation inside a Python `venv`. -```bash -python -m pip install -r requirements-310.txt -``` +To prepare building the documentation run the following on the root level of the project: + +1. Set up virtual environment if it was not already created + ```bash + python3 -m venv venv + ``` +1. Activate virtual environment + ```bash + source venv/bin/activate + ``` +1. Install Datafusion's Python dependencies + ```bash + pip install -r requirements-310.txt + ``` +1. Install documentation dependencies + ```bash + pip install -r docs/requirements.txt + ``` ## Build & Preview Run the provided script to build the HTML pages. ```bash +cd docs ./build.sh ``` @@ -50,14 +66,15 @@ firefox build/html/index.html ## Release Process -The documentation is served through the -[arrow-site](https://github.com/apache/arrow-site/) repo. To release a new -version of the docs, follow these steps: - -1. Run `./build.sh` inside `docs` folder to generate the docs website inside the `build/html` folder. -2. Clone the arrow-site repo -3. Checkout to the `asf-site` branch (NOT `master`) -4. Copy build artifacts into `arrow-site` repo's `datafusion` folder with a command such as - - `cp -rT ./build/html/ ../../arrow-site/datafusion/` (doesn't work on mac) - - `rsync -avzr ./build/html/ ../../arrow-site/datafusion/` -5. Commit changes in `arrow-site` and send a PR. +This documentation is hosted at https://arrow.apache.org/datafusion-python/ + +When the PR is merged to the `main` branch of the DataFusion +repository, a [github workflow](https://github.com/apache/arrow-datafusion-python/blob/main/.github/workflows/docs.yaml) which: + +1. Builds the html content +2. Pushes the html content to the [`asf-site`](https://github.com/apache/arrow-datafusion-python/tree/asf-site) branch in this repository. + +The Apache Software Foundation provides https://arrow.apache.org/, +which serves content based on the configuration in +[.asf.yaml](https://github.com/apache/arrow-datafusion-python/blob/main/.asf.yaml), +which specifies the target as https://arrow.apache.org/datafusion-python/. \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index eb7f5b7bb..67f1ec6ac 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -sphinx==5.3.0 +sphinx pydata-sphinx-theme==0.8.0 myst-parser maturin diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html index bc2bf0092..6541b7713 100644 --- a/docs/source/_templates/docs-sidebar.html +++ b/docs/source/_templates/docs-sidebar.html @@ -1,6 +1,6 @@ - +