From 860fe47cd5338af28eaf0818bc7bc605eab4d8f2 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 13 Mar 2024 20:59:08 +0000 Subject: [PATCH 1/3] chore: add deferred exec code samples --- bigframes/_config/compute_options.py | 9 ++++++ .../pandas/core/config_init.py | 31 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py index 20c31d3906..3360dc891f 100644 --- a/bigframes/_config/compute_options.py +++ b/bigframes/_config/compute_options.py @@ -23,6 +23,15 @@ class ComputeOptions: """ Encapsulates configuration for compute options. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + + >>> bpd.options.compute.maximum_bytes_billed = 500 + >>> df.to_pandas() #this should fail + google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required. + Attributes: maximum_bytes_billed (int, Options): Limits the bytes billed for query jobs. Queries that will have diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py index dfb91dfeb8..f9a18b1017 100644 --- a/third_party/bigframes_vendored/pandas/core/config_init.py +++ b/third_party/bigframes_vendored/pandas/core/config_init.py @@ -15,6 +15,37 @@ display_options_doc = """ Encapsulates configuration for displaying objects. +**Examples:** + +Define Repr mode to "deferred" will prevent job execution in repr. + >>> import bigframes.pandas as bpd + >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + + >>> bpd.options.display.repr_mode = "deferred" + >>> df.head(20) # will no longer run the job + Computation deferred. Computation will process 28.9 kB + +Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect. + >>> df.query_job.total_bytes_processed + 28947 + +User can execute the job by calling .to_pandas() + >>> df.to_pandas() + species island culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g sex + 0 Gentoo penguin (Pygoscelis papua) Biscoe 50.5 15.9 225.0 5400.0 MALE + 1 Gentoo penguin (Pygoscelis papua) Biscoe 45.1 14.5 215.0 5000.0 FEMALE + 2 Adelie Penguin (Pygoscelis adeliae) Torgersen 41.4 18.5 202.0 3875.0 MALE + 3 Adelie Penguin (Pygoscelis adeliae) Torgersen 38.6 17.0 188.0 2900.0 FEMALE + 4 Gentoo penguin (Pygoscelis papua) Biscoe 46.5 14.8 217.0 5200.0 FEMALE + .. ... ... ... ... ... ... ... + 339 Adelie Penguin (Pygoscelis adeliae) Dream 38.1 17.6 187.0 3425.0 FEMALE + 340 Adelie Penguin (Pygoscelis adeliae) Biscoe 36.4 17.1 184.0 2850.0 FEMALE + 341 Chinstrap penguin (Pygoscelis antarctica) Dream 40.9 16.6 187.0 3200.0 FEMALE + 342 Adelie Penguin (Pygoscelis adeliae) Biscoe 41.3 21.1 195.0 4400.0 MALE + 343 Chinstrap penguin (Pygoscelis antarctica) Dream 45.2 16.6 191.0 3250.0 FEMALE + + [344 rows x 7 columns] + Attributes: max_columns (int, default 20): If `max_columns` is exceeded, switch to truncate view. From 031d50d14defcf1b9c96c06f0b316e37a41c7d0a Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Thu, 14 Mar 2024 01:51:17 +0000 Subject: [PATCH 2/3] fix tests --- bigframes/_config/compute_options.py | 4 +++- third_party/bigframes_vendored/pandas/core/config_init.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py index 3360dc891f..fb708b844c 100644 --- a/bigframes/_config/compute_options.py +++ b/bigframes/_config/compute_options.py @@ -29,9 +29,11 @@ class ComputeOptions: >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") >>> bpd.options.compute.maximum_bytes_billed = 500 - >>> df.to_pandas() #this should fail + >>> # df.to_pandas() # this should fail google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required. + >>> bpd.options.compute.maximum_bytes_billed = None # reset option + Attributes: maximum_bytes_billed (int, Options): Limits the bytes billed for query jobs. Queries that will have diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py index f9a18b1017..4c09bc42ab 100644 --- a/third_party/bigframes_vendored/pandas/core/config_init.py +++ b/third_party/bigframes_vendored/pandas/core/config_init.py @@ -46,6 +46,9 @@ [344 rows x 7 columns] +Reset option + >>> bpd.options.display.repr_mode = "head" + Attributes: max_columns (int, default 20): If `max_columns` is exceeded, switch to truncate view. From bcdc995a272907f2e56ca3cf4095568d62b2bf6d Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Thu, 14 Mar 2024 18:13:47 +0000 Subject: [PATCH 3/3] fix tests --- .../pandas/core/config_init.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py index 4c09bc42ab..33c6b3e093 100644 --- a/third_party/bigframes_vendored/pandas/core/config_init.py +++ b/third_party/bigframes_vendored/pandas/core/config_init.py @@ -30,21 +30,7 @@ 28947 User can execute the job by calling .to_pandas() - >>> df.to_pandas() - species island culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g sex - 0 Gentoo penguin (Pygoscelis papua) Biscoe 50.5 15.9 225.0 5400.0 MALE - 1 Gentoo penguin (Pygoscelis papua) Biscoe 45.1 14.5 215.0 5000.0 FEMALE - 2 Adelie Penguin (Pygoscelis adeliae) Torgersen 41.4 18.5 202.0 3875.0 MALE - 3 Adelie Penguin (Pygoscelis adeliae) Torgersen 38.6 17.0 188.0 2900.0 FEMALE - 4 Gentoo penguin (Pygoscelis papua) Biscoe 46.5 14.8 217.0 5200.0 FEMALE - .. ... ... ... ... ... ... ... - 339 Adelie Penguin (Pygoscelis adeliae) Dream 38.1 17.6 187.0 3425.0 FEMALE - 340 Adelie Penguin (Pygoscelis adeliae) Biscoe 36.4 17.1 184.0 2850.0 FEMALE - 341 Chinstrap penguin (Pygoscelis antarctica) Dream 40.9 16.6 187.0 3200.0 FEMALE - 342 Adelie Penguin (Pygoscelis adeliae) Biscoe 41.3 21.1 195.0 4400.0 MALE - 343 Chinstrap penguin (Pygoscelis antarctica) Dream 45.2 16.6 191.0 3250.0 FEMALE - - [344 rows x 7 columns] + >>> # df.to_pandas() Reset option >>> bpd.options.display.repr_mode = "head"