From a79e61ac0fe3c2a1fb4f5b0943a703bdf9be9871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 18 Mar 2024 15:58:19 -0500 Subject: [PATCH 1/4] feat: update plot sample to 1000 rows In making a line plot sample with Salem, I noticed that 100 rows loses some important shape information. Most screens are > 1000 pixels wide, so this seems a reasonable default. --- bigframes/operations/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index 5c9d771f61..db212fcd21 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -45,7 +45,7 @@ def generate(self) -> None: def _compute_plot_data(self, data): # TODO: Cache the sampling data in the PlotAccessor. - sampling_n = self.kwargs.pop("sampling_n", 100) + sampling_n = self.kwargs.pop("sampling_n", 1000) sampling_random_state = self.kwargs.pop("sampling_random_state", 0) return ( data.sample(n=sampling_n, random_state=sampling_random_state) From f42ee25d220ddd66798d1edc0705c58c9918bef3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Mar 2024 18:57:26 +0000 Subject: [PATCH 2/4] update test to use larger data for sampling --- tests/system/small/operations/test_plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index 876c8f7d04..f872b56348 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -220,7 +220,7 @@ def test_sampling_plot_args_n(): def test_sampling_plot_args_random_state(): - df = bpd.DataFrame(np.arange(1000), columns=["one"]) + df = bpd.DataFrame(np.arange(10000), columns=["one"]) ax_0 = df.plot.line() ax_1 = df.plot.line() ax_2 = df.plot.line(sampling_random_state=100) From e75774e2ab0b3b8d14bf58b19024d0be04fa824d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 20 Mar 2024 16:39:33 +0000 Subject: [PATCH 3/4] add constants for default values --- bigframes/operations/_matplotlib/core.py | 9 +++++++-- tests/system/small/operations/test_plotting.py | 8 ++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index db212fcd21..f7c546b61e 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -17,6 +17,9 @@ import matplotlib.pyplot as plt +DEFAULT_SAMPLING_N = 1000 +DEFAULT_SAMPLING_STATE = 0 + class MPLPlot(abc.ABC): @abc.abstractmethod @@ -45,8 +48,10 @@ def generate(self) -> None: def _compute_plot_data(self, data): # TODO: Cache the sampling data in the PlotAccessor. - sampling_n = self.kwargs.pop("sampling_n", 1000) - sampling_random_state = self.kwargs.pop("sampling_random_state", 0) + sampling_n = self.kwargs.pop("sampling_n", DEFAULT_SAMPLING_N) + sampling_random_state = self.kwargs.pop( + "sampling_random_state", DEFAULT_SAMPLING_STATE + ) return ( data.sample(n=sampling_n, random_state=sampling_random_state) .to_pandas() diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index f872b56348..14a497304c 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -16,6 +16,7 @@ import pandas._testing as tm import pytest +import bigframes.operations._matplotlib.core as bf_mpl import bigframes.pandas as bpd @@ -208,11 +209,10 @@ def test_scatter(scalars_dfs): def test_sampling_plot_args_n(): - df = bpd.DataFrame(np.arange(1000), columns=["one"]) + df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLE_N * 10), columns=["one"]) ax = df.plot.line() assert len(ax.lines) == 1 - # Default sampling_n is 100 - assert len(ax.lines[0].get_data()[1]) == 100 + assert len(ax.lines[0].get_data()[1]) == bf_mpl.DEFAULT_SAMPLING_N ax = df.plot.line(sampling_n=2) assert len(ax.lines) == 1 @@ -220,7 +220,7 @@ def test_sampling_plot_args_n(): def test_sampling_plot_args_random_state(): - df = bpd.DataFrame(np.arange(10000), columns=["one"]) + df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLING_N * 10), columns=["one"]) ax_0 = df.plot.line() ax_1 = df.plot.line() ax_2 = df.plot.line(sampling_random_state=100) From b0f7dd56c4eb915e6ef15a148df3ec83a1d5c0b3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 20 Mar 2024 16:42:47 +0000 Subject: [PATCH 4/4] typo --- tests/system/small/operations/test_plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index 14a497304c..9f4686ddf2 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -209,7 +209,7 @@ def test_scatter(scalars_dfs): def test_sampling_plot_args_n(): - df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLE_N * 10), columns=["one"]) + df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLING_N * 10), columns=["one"]) ax = df.plot.line() assert len(ax.lines) == 1 assert len(ax.lines[0].get_data()[1]) == bf_mpl.DEFAULT_SAMPLING_N