From c81ebfbe9cd57f302432cae62a1d434fa146fcd6 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 2 Apr 2024 23:53:49 +0000 Subject: [PATCH 1/6] fix: error for object dtype on read_pandas --- bigframes/session/__init__.py | 8 ++++++++ tests/system/small/test_session.py | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 354352f1c9..73ebc10b68 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -71,6 +71,7 @@ ReadPickleBuffer, StorageOptions, ) +import pandas.core.dtypes.common as pd_dtypes import pyarrow as pa import bigframes._config.bigquery_options as bigquery_options @@ -1047,6 +1048,13 @@ def _read_pandas( "bigframes.pandas.DataFrame." ) + for column_name, dtype in pandas_dataframe.dtypes.items(): + if pd_dtypes.is_object_dtype(dtype): + raise ValueError( + f"Column `{column_name}` has an unsupported dtype: `{dtype}`. " + + f"{constants.FEEDBACK_LINK}" + ) + inline_df = self._read_pandas_inline(pandas_dataframe) if inline_df is not None: return inline_df diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index d84244e5cf..a77382d808 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -436,6 +436,11 @@ def test_read_pandas_index(session): pd.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx) +def test_read_pandas_w_unsupported_object_dtype(session): + with pytest.raises(ValueError, match="unsupported dtype: `object`"): + session.read_pandas(pd.DataFrame({"a": [1, "hello"]})) + + def test_read_pandas_inline_respects_location(): options = bigframes.BigQueryOptions(location="europe-west1") session = bigframes.Session(options) From 18d02a4e1a076365911738ed065dec6ab37b834e Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 5 Apr 2024 22:16:30 +0000 Subject: [PATCH 2/6] catch pa.ArrowInvalid exception --- bigframes/session/__init__.py | 16 +++++++--------- tests/system/small/test_session.py | 5 +++-- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 73ebc10b68..a9e0184e85 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1048,17 +1048,13 @@ def _read_pandas( "bigframes.pandas.DataFrame." ) - for column_name, dtype in pandas_dataframe.dtypes.items(): - if pd_dtypes.is_object_dtype(dtype): - raise ValueError( - f"Column `{column_name}` has an unsupported dtype: `{dtype}`. " - + f"{constants.FEEDBACK_LINK}" - ) - inline_df = self._read_pandas_inline(pandas_dataframe) if inline_df is not None: return inline_df - return self._read_pandas_load_job(pandas_dataframe, api_name) + try: + return self._read_pandas_load_job(pandas_dataframe, api_name) + except pa.ArrowInvalid as e: + raise pa.ArrowInvalid(f"Unsupported dtype: `{e}`. ") def _read_pandas_inline( self, pandas_dataframe: pandas.DataFrame @@ -1072,7 +1068,9 @@ def _read_pandas_inline( inline_df = dataframe.DataFrame( blocks.Block.from_local(pandas_dataframe, self) ) - except ValueError: # Thrown by ibis for some unhandled types + except pa.ArrowInvalid as e: + raise pa.ArrowInvalid(f"Unsupported dtype: `{e}`. ") + except ValueError as e: # Thrown by ibis for some unhandled types return None except pa.ArrowTypeError: # Thrown by arrow for types without mapping (geo). return None diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index a77382d808..56ca597e4d 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -24,6 +24,7 @@ import google.cloud.bigquery as bigquery import numpy as np import pandas as pd +import pyarrow as pa import pytest import bigframes @@ -436,8 +437,8 @@ def test_read_pandas_index(session): pd.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx) -def test_read_pandas_w_unsupported_object_dtype(session): - with pytest.raises(ValueError, match="unsupported dtype: `object`"): +def test_read_pandas_w_unsupported_mixed_dtype(session): + with pytest.raises(pa.ArrowInvalid, match="Unsupported dtype"): session.read_pandas(pd.DataFrame({"a": [1, "hello"]})) From 0bb0609cfce96442020cecf358dd5eb8ca63f83c Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 5 Apr 2024 23:10:27 +0000 Subject: [PATCH 3/6] fix lint --- bigframes/session/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index a9e0184e85..3385c61151 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -71,7 +71,6 @@ ReadPickleBuffer, StorageOptions, ) -import pandas.core.dtypes.common as pd_dtypes import pyarrow as pa import bigframes._config.bigquery_options as bigquery_options @@ -1070,7 +1069,7 @@ def _read_pandas_inline( ) except pa.ArrowInvalid as e: raise pa.ArrowInvalid(f"Unsupported dtype: `{e}`. ") - except ValueError as e: # Thrown by ibis for some unhandled types + except ValueError: # Thrown by ibis for some unhandled types return None except pa.ArrowTypeError: # Thrown by arrow for types without mapping (geo). return None From 4b695e6ef944de0477471fb24e13e73c11f36e71 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 10 Apr 2024 16:53:42 +0000 Subject: [PATCH 4/6] update exception message --- bigframes/session/__init__.py | 4 ++-- tests/system/small/test_session.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 3385c61151..f7ad5138b9 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1053,7 +1053,7 @@ def _read_pandas( try: return self._read_pandas_load_job(pandas_dataframe, api_name) except pa.ArrowInvalid as e: - raise pa.ArrowInvalid(f"Unsupported dtype: `{e}`. ") + raise pa.ArrowInvalid(f"Could not convert with a BigQuery type: `{e}`. ") from e def _read_pandas_inline( self, pandas_dataframe: pandas.DataFrame @@ -1068,7 +1068,7 @@ def _read_pandas_inline( blocks.Block.from_local(pandas_dataframe, self) ) except pa.ArrowInvalid as e: - raise pa.ArrowInvalid(f"Unsupported dtype: `{e}`. ") + raise pa.ArrowInvalid(f"Could not convert with a BigQuery type: `{e}`. ") from e except ValueError: # Thrown by ibis for some unhandled types return None except pa.ArrowTypeError: # Thrown by arrow for types without mapping (geo). diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 56ca597e4d..ce415f9324 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -438,7 +438,7 @@ def test_read_pandas_index(session): def test_read_pandas_w_unsupported_mixed_dtype(session): - with pytest.raises(pa.ArrowInvalid, match="Unsupported dtype"): + with pytest.raises(pa.ArrowInvalid, match="Could not convert"): session.read_pandas(pd.DataFrame({"a": [1, "hello"]})) From f04f0252de3b497a6e343c2af8c4642b7cab0e4a Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 10 Apr 2024 16:55:38 +0000 Subject: [PATCH 5/6] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/session/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index f7ad5138b9..b6d56006be 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1053,7 +1053,9 @@ def _read_pandas( try: return self._read_pandas_load_job(pandas_dataframe, api_name) except pa.ArrowInvalid as e: - raise pa.ArrowInvalid(f"Could not convert with a BigQuery type: `{e}`. ") from e + raise pa.ArrowInvalid( + f"Could not convert with a BigQuery type: `{e}`. " + ) from e def _read_pandas_inline( self, pandas_dataframe: pandas.DataFrame @@ -1068,7 +1070,9 @@ def _read_pandas_inline( blocks.Block.from_local(pandas_dataframe, self) ) except pa.ArrowInvalid as e: - raise pa.ArrowInvalid(f"Could not convert with a BigQuery type: `{e}`. ") from e + raise pa.ArrowInvalid( + f"Could not convert with a BigQuery type: `{e}`. " + ) from e except ValueError: # Thrown by ibis for some unhandled types return None except pa.ArrowTypeError: # Thrown by arrow for types without mapping (geo). From 7dd96dbb0a8ed59a5e8a862c073c52527c9b57c1 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 10 Apr 2024 16:55:44 +0000 Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/session/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index f7ad5138b9..b6d56006be 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1053,7 +1053,9 @@ def _read_pandas( try: return self._read_pandas_load_job(pandas_dataframe, api_name) except pa.ArrowInvalid as e: - raise pa.ArrowInvalid(f"Could not convert with a BigQuery type: `{e}`. ") from e + raise pa.ArrowInvalid( + f"Could not convert with a BigQuery type: `{e}`. " + ) from e def _read_pandas_inline( self, pandas_dataframe: pandas.DataFrame @@ -1068,7 +1070,9 @@ def _read_pandas_inline( blocks.Block.from_local(pandas_dataframe, self) ) except pa.ArrowInvalid as e: - raise pa.ArrowInvalid(f"Could not convert with a BigQuery type: `{e}`. ") from e + raise pa.ArrowInvalid( + f"Could not convert with a BigQuery type: `{e}`. " + ) from e except ValueError: # Thrown by ibis for some unhandled types return None except pa.ArrowTypeError: # Thrown by arrow for types without mapping (geo).