From e156f81278d4a1037966ffa20eaca937337cc0dd Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Thu, 13 Jun 2024 19:54:38 +0000
Subject: [PATCH 1/4] fix: df.loc with the 2nd input as bigframes boolean
 Series

---
 bigframes/core/indexers.py           |  7 ++++++-
 tests/system/small/test_dataframe.py | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index 582141d539..f21228146a 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -159,7 +159,12 @@ def __getitem__(self, key):
                 )
 
                 columns = key[1]
-                if isinstance(columns, pd.Series) and columns.dtype == "bool":
+                if isinstance(columns, bigframes.series.Series):
+                    columns = columns.to_pandas()
+                if isinstance(columns, pd.Series) and columns.dtype in (
+                    "bool",
+                    "boolean",
+                ):
                     # TODO(b/340892590): fix type error
                     columns = df.columns[columns]  # type: ignore
 
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 93db0c4196..3c003b1a52 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3020,6 +3020,21 @@ def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_in
     )
 
 
+def test_loc_select_with_column_condition_bf_series(
+    scalars_df_index, scalars_pandas_df_index
+):
+    bf_result = scalars_df_index.loc[
+        :, series.Series(scalars_df_index.dtypes == "Int64")
+    ].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[
+        :, scalars_pandas_df_index.dtypes == "Int64"
+    ]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_index):
     scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(

From cbcaa866d00701048d1258522e431907c4d72d8c Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Thu, 13 Jun 2024 20:17:24 +0000
Subject: [PATCH 2/4] merge main

---
 bigframes/core/indexers.py                    | 13 +++----
 bigframes/core/indexes/base.py                |  5 +++
 bigframes/dataframe.py                        | 19 +++++++--
 bigframes/series.py                           |  5 +++
 tests/system/load/test_large_tables.py        |  5 +--
 .../system/small/operations/test_plotting.py  |  6 ++-
 tests/unit/core/test_indexes.py               | 39 +++++++++++++++++++
 tests/unit/test_dataframe.py                  | 14 +++++++
 tests/unit/test_series.py                     | 27 +++++++++++++
 9 files changed, 118 insertions(+), 15 deletions(-)
 create mode 100644 tests/unit/core/test_indexes.py
 create mode 100644 tests/unit/test_series.py

diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index f21228146a..dae5eada70 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -162,11 +162,10 @@ def __getitem__(self, key):
                 if isinstance(columns, bigframes.series.Series):
                     columns = columns.to_pandas()
                 if isinstance(columns, pd.Series) and columns.dtype in (
-                    "bool",
-                    "boolean",
+                    bool,
+                    pd.BooleanDtype(),
                 ):
-                    # TODO(b/340892590): fix type error
-                    columns = df.columns[columns]  # type: ignore
+                    columns = df.columns[typing.cast(pd.Series, columns)]
 
                 return df[columns]
 
@@ -257,7 +256,7 @@ def __getitem__(self, key: tuple) -> bigframes.core.scalar.Scalar:
             raise ValueError(error_message)
         if len(key) != 2:
             raise TypeError(error_message)
-        block: bigframes.core.blocks.Block = self._dataframe._block  # type: ignore
+        block: bigframes.core.blocks.Block = self._dataframe._block
         column_block = block.select_columns([block.value_columns[key[1]]])
         column = bigframes.series.Series(column_block)
         return column.iloc[key[0]]
@@ -381,14 +380,14 @@ def _perform_loc_list_join(
         )
         result = result.rename(original_name)
     else:
-        result = series_or_dataframe._perform_join_by_index(keys_index, how="right")  # type: ignore
+        result = series_or_dataframe._perform_join_by_index(keys_index, how="right")
 
     if drop_levels and series_or_dataframe.index.nlevels > keys_index.nlevels:
         # drop common levels
         levels_to_drop = [
             name for name in series_or_dataframe.index.names if name in keys_index.names
         ]
-        result = result.droplevel(levels_to_drop)  # type: ignore
+        result = result.droplevel(levels_to_drop)
     return result
 
 
diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py
index 0e5082447a..8df6155591 100644
--- a/bigframes/core/indexes/base.py
+++ b/bigframes/core/indexes/base.py
@@ -243,6 +243,11 @@ def query_job(self) -> Optional[bigquery.QueryJob]:
         return self._query_job
 
     def __repr__(self) -> str:
+        # Protect against errors with uninitialized Series. See:
+        # https://github.com/googleapis/python-bigquery-dataframes/issues/728
+        if not hasattr(self, "_block"):
+            return object.__repr__(self)
+
         # TODO(swast): Add a timeout here? If the query is taking a long time,
         # maybe we just print the job metadata that we have so far?
         # TODO(swast): Avoid downloading the whole series by using job
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index f78dee1642..9763b68fef 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -574,9 +574,18 @@ def _getitem_bool_series(self, key: bigframes.series.Series) -> DataFrame:
         return DataFrame(block)
 
     def __getattr__(self, key: str):
+        # Protect against recursion errors with uninitialized DataFrame
+        # objects. See:
+        # https://github.com/googleapis/python-bigquery-dataframes/issues/728
+        # and
+        # https://nedbatchelder.com/blog/201010/surprising_getattr_recursion.html
+        if key == "_block":
+            raise AttributeError("_block")
+
         if key in self._block.column_labels:
             return self.__getitem__(key)
-        elif hasattr(pandas.DataFrame, key):
+
+        if hasattr(pandas.DataFrame, key):
             raise AttributeError(
                 textwrap.dedent(
                     f"""
@@ -585,8 +594,7 @@ def __getattr__(self, key: str):
                     """
                 )
             )
-        else:
-            raise AttributeError(key)
+        raise AttributeError(key)
 
     def __setattr__(self, key: str, value):
         if key in ["_block", "_query_job"]:
@@ -616,6 +624,11 @@ def __repr__(self) -> str:
 
         Only represents the first `bigframes.options.display.max_rows`.
         """
+        # Protect against errors with uninitialized DataFrame. See:
+        # https://github.com/googleapis/python-bigquery-dataframes/issues/728
+        if not hasattr(self, "_block"):
+            return object.__repr__(self)
+
         opts = bigframes.options.display
         max_results = opts.max_rows
         if opts.repr_mode == "deferred":
diff --git a/bigframes/series.py b/bigframes/series.py
index d858060aec..cb56319471 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -281,6 +281,11 @@ def reset_index(
             return bigframes.dataframe.DataFrame(block)
 
     def __repr__(self) -> str:
+        # Protect against errors with uninitialized Series. See:
+        # https://github.com/googleapis/python-bigquery-dataframes/issues/728
+        if not hasattr(self, "_block"):
+            return object.__repr__(self)
+
         # TODO(swast): Add a timeout here? If the query is taking a long time,
         # maybe we just print the job metadata that we have so far?
         # TODO(swast): Avoid downloading the whole series by using job
diff --git a/tests/system/load/test_large_tables.py b/tests/system/load/test_large_tables.py
index f92207b191..472be3d2ad 100644
--- a/tests/system/load/test_large_tables.py
+++ b/tests/system/load/test_large_tables.py
@@ -94,8 +94,7 @@ def test_to_pandas_large_table():
     # df will be downloaded locally
     expected_row_count, expected_column_count = df.shape
 
-    # TODO(b/340893653): fix type error
-    df = df.to_pandas()  # type: ignore
-    row_count, column_count = df.shape
+    df_converted = df.to_pandas()
+    row_count, column_count = df_converted.shape
     assert column_count == expected_column_count
     assert row_count == expected_row_count
diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py
index e0ef84641c..7be44e0a0f 100644
--- a/tests/system/small/operations/test_plotting.py
+++ b/tests/system/small/operations/test_plotting.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from matplotlib.collections import PathCollection
 import numpy as np
 import pandas as pd
 import pandas._testing as tm
@@ -258,9 +259,10 @@ def test_scatter_args_s(s):
 
     ax = df.plot.scatter(x="a", y="b", s="s")
     pd_ax = pd_df.plot.scatter(x="a", y="b", s="s")
-    # TODO(b/340891723): fix type error
+
+    assert isinstance(pd_ax.collections[0], PathCollection)
     tm.assert_numpy_array_equal(
-        ax.collections[0].get_sizes(), pd_ax.collections[0].get_sizes()  # type: ignore
+        ax.collections[0].get_sizes(), pd_ax.collections[0].get_sizes()
     )
 
 
diff --git a/tests/unit/core/test_indexes.py b/tests/unit/core/test_indexes.py
new file mode 100644
index 0000000000..6e739c9dc9
--- /dev/null
+++ b/tests/unit/core/test_indexes.py
@@ -0,0 +1,39 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import bigframes.core.indexes
+
+
+def test_index_repr_with_uninitialized_object():
+    """Ensures Index.__init__ can be paused in a visual debugger without crashing.
+
+    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728
+    """
+    # Avoid calling __init__ to simulate pausing __init__ in a debugger.
+    # https://stackoverflow.com/a/6384982/101923
+    index = object.__new__(bigframes.core.indexes.Index)
+    got = repr(index)
+    assert "Index" in got
+
+
+def test_multiindex_repr_with_uninitialized_object():
+    """Ensures MultiIndex.__init__ can be paused in a visual debugger without crashing.
+
+    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728
+    """
+    # Avoid calling __init__ to simulate pausing __init__ in a debugger.
+    # https://stackoverflow.com/a/6384982/101923
+    index = object.__new__(bigframes.core.indexes.MultiIndex)
+    got = repr(index)
+    assert "MultiIndex" in got
diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py
index 17a8290889..6370d1b987 100644
--- a/tests/unit/test_dataframe.py
+++ b/tests/unit/test_dataframe.py
@@ -15,9 +15,23 @@
 import google.cloud.bigquery
 import pytest
 
+import bigframes.dataframe
+
 from . import resources
 
 
+def test_dataframe_repr_with_uninitialized_object():
+    """Ensures DataFrame.__init__ can be paused in a visual debugger without crashing.
+
+    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728
+    """
+    # Avoid calling __init__ to simulate pausing __init__ in a debugger.
+    # https://stackoverflow.com/a/6384982/101923
+    dataframe = bigframes.dataframe.DataFrame.__new__(bigframes.dataframe.DataFrame)
+    got = repr(dataframe)
+    assert "DataFrame" in got
+
+
 def test_dataframe_to_gbq_invalid_destination(monkeypatch: pytest.MonkeyPatch):
     dataframe = resources.create_dataframe(monkeypatch)
 
diff --git a/tests/unit/test_series.py b/tests/unit/test_series.py
new file mode 100644
index 0000000000..1409209c6c
--- /dev/null
+++ b/tests/unit/test_series.py
@@ -0,0 +1,27 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import bigframes.series
+
+
+def test_series_repr_with_uninitialized_object():
+    """Ensures Series.__init__ can be paused in a visual debugger without crashing.
+
+    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728
+    """
+    # Avoid calling __init__ to simulate pausing __init__ in a debugger.
+    # https://stackoverflow.com/a/6384982/101923
+    series = bigframes.series.Series.__new__(bigframes.series.Series)
+    got = repr(series)
+    assert "Series" in got

From ff377e28f7f4800874104a9c2dd50d4f6f28ef44 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Thu, 13 Jun 2024 21:20:14 +0000
Subject: [PATCH 3/4] modify test case

---
 tests/system/small/test_dataframe.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 3c003b1a52..39817492ec 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3023,11 +3023,19 @@ def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_in
 def test_loc_select_with_column_condition_bf_series(
     scalars_df_index, scalars_pandas_df_index
 ):
+    # (b/347072677) GEOGRAPH type doesn't support UNIQUE op
+    columns = [
+        item for item in scalars_pandas_df_index.columns if item != "geography_col"
+    ]
+    scalars_df_index = scalars_df_index[columns]
+    scalars_pandas_df_index = scalars_pandas_df_index[columns]
+
+    size_half = len(scalars_pandas_df_index) / 2
     bf_result = scalars_df_index.loc[
-        :, series.Series(scalars_df_index.dtypes == "Int64")
+        :, scalars_df_index.nunique() > size_half
     ].to_pandas()
     pd_result = scalars_pandas_df_index.loc[
-        :, scalars_pandas_df_index.dtypes == "Int64"
+        :, scalars_pandas_df_index.nunique() > size_half
     ]
     pd.testing.assert_frame_equal(
         bf_result,

From 569c3c1f07cebbc704b1f9c752e8d6816db21add Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Thu, 13 Jun 2024 21:22:32 +0000
Subject: [PATCH 4/4] fix comment

---
 tests/system/small/test_dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 39817492ec..782ef2d5ea 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3023,7 +3023,7 @@ def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_in
 def test_loc_select_with_column_condition_bf_series(
     scalars_df_index, scalars_pandas_df_index
 ):
-    # (b/347072677) GEOGRAPH type doesn't support UNIQUE op
+    # (b/347072677) GEOGRAPH type doesn't support DISTINCT op
     columns = [
         item for item in scalars_pandas_df_index.columns if item != "geography_col"
     ]