Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix more tests
  • Loading branch information
TrevorBergeron committed Jul 1, 2025
commit a2c96791eaea6b045888b135db81ca8fffce973e
3 changes: 3 additions & 0 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ class SimpleDtypeInfo:
"decimal128(38, 9)[pyarrow]",
"decimal256(76, 38)[pyarrow]",
"binary[pyarrow]",
"duration[us][pyarrow]",
]

DTYPE_STRINGS = typing.get_args(DtypeString)
Expand Down Expand Up @@ -421,6 +422,8 @@ def is_bool_coercable(type_: ExpressionType) -> bool:
# special case - both "Int64" and "int64[pyarrow]" are accepted
BIGFRAMES_STRING_TO_BIGFRAMES["int64[pyarrow]"] = INT_DTYPE

BIGFRAMES_STRING_TO_BIGFRAMES["duration[us][pyarrow]"] = TIMEDELTA_DTYPE

# For the purposes of dataframe.memory_usage
DTYPE_BYTE_SIZES = {
type_info.dtype: type_info.logical_bytes for type_info in SIMPLE_TYPES
Expand Down
8 changes: 7 additions & 1 deletion tests/system/small/pandas/core/methods/test_describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ def test_df_describe_non_temporal(scalars_dfs):
pytest.importorskip("pandas", minversion="2.0.0")
scalars_df, scalars_pandas_df = scalars_dfs
# excluding temporal columns here because BigFrames cannot perform percentiles operations on them
unsupported_columns = ["datetime_col", "timestamp_col", "time_col", "date_col"]
unsupported_columns = [
"datetime_col",
"timestamp_col",
"time_col",
"date_col",
"duration_col",
]
bf_result = scalars_df.drop(columns=unsupported_columns).describe().to_pandas()

modified_pd_df = scalars_pandas_df.drop(columns=unsupported_columns)
Expand Down
12 changes: 8 additions & 4 deletions tests/system/small/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,7 @@ def test_to_sql_query_unnamed_index_included(
scalars_df_default_index: bpd.DataFrame,
scalars_pandas_df_default_index: pd.DataFrame,
):
bf_df = scalars_df_default_index.reset_index(drop=True)
bf_df = scalars_df_default_index.reset_index(drop=True).drop(columns="duration_col")
sql, idx_ids, idx_labels = bf_df._to_sql_query(include_index=True)
assert len(idx_labels) == 1
assert len(idx_ids) == 1
Expand All @@ -1017,7 +1017,9 @@ def test_to_sql_query_named_index_included(
scalars_df_default_index: bpd.DataFrame,
scalars_pandas_df_default_index: pd.DataFrame,
):
bf_df = scalars_df_default_index.set_index("rowindex_2", drop=True)
bf_df = scalars_df_default_index.set_index("rowindex_2", drop=True).drop(
columns="duration_col"
)
sql, idx_ids, idx_labels = bf_df._to_sql_query(include_index=True)
assert len(idx_labels) == 1
assert len(idx_ids) == 1
Expand All @@ -1034,7 +1036,7 @@ def test_to_sql_query_unnamed_index_excluded(
scalars_df_default_index: bpd.DataFrame,
scalars_pandas_df_default_index: pd.DataFrame,
):
bf_df = scalars_df_default_index.reset_index(drop=True)
bf_df = scalars_df_default_index.reset_index(drop=True).drop(columns="duration_col")
sql, idx_ids, idx_labels = bf_df._to_sql_query(include_index=False)
assert len(idx_labels) == 0
assert len(idx_ids) == 0
Expand All @@ -1051,7 +1053,9 @@ def test_to_sql_query_named_index_excluded(
scalars_df_default_index: bpd.DataFrame,
scalars_pandas_df_default_index: pd.DataFrame,
):
bf_df = scalars_df_default_index.set_index("rowindex_2", drop=True)
bf_df = scalars_df_default_index.set_index("rowindex_2", drop=True).drop(
columns="duration_col"
)
sql, idx_ids, idx_labels = bf_df._to_sql_query(include_index=False)
assert len(idx_labels) == 0
assert len(idx_ids) == 0
Expand Down
30 changes: 25 additions & 5 deletions tests/system/small/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1820,6 +1820,7 @@ def test_read_parquet_gcs(
df_out = df_out.assign(
datetime_col=df_out["datetime_col"].astype("timestamp[us][pyarrow]"),
timestamp_col=df_out["timestamp_col"].astype("timestamp[us, tz=UTC][pyarrow]"),
duration_col=df_out["duration_col"].astype("duration[us][pyarrow]"),
)

# Make sure we actually have at least some values before comparing.
Expand Down Expand Up @@ -1868,7 +1869,8 @@ def test_read_parquet_gcs_compressed(
# DATETIME gets loaded as TIMESTAMP in parquet. See:
# https://cloud.google.com/bigquery/docs/exporting-data#parquet_export_details
df_out = df_out.assign(
datetime_col=df_out["datetime_col"].astype("timestamp[us][pyarrow]")
datetime_col=df_out["datetime_col"].astype("timestamp[us][pyarrow]"),
duration_col=df_out["duration_col"].astype("duration[us][pyarrow]"),
)

# Make sure we actually have at least some values before comparing.
Expand Down Expand Up @@ -1926,9 +1928,23 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder):

# The auto detects of BigQuery load job have restrictions to detect the bytes,
# datetime, numeric and geometry types, so they're skipped here.
df = df.drop(columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"])
df = df.drop(
columns=[
"bytes_col",
"datetime_col",
"numeric_col",
"geography_col",
"duration_col",
]
)
scalars_df = scalars_df.drop(
columns=["bytes_col", "datetime_col", "numeric_col", "geography_col"]
columns=[
"bytes_col",
"datetime_col",
"numeric_col",
"geography_col",
"duration_col",
]
)
assert df.shape[0] == scalars_df.shape[0]
pd.testing.assert_series_equal(
Expand Down Expand Up @@ -1962,11 +1978,15 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder):
# The auto detects of BigQuery load job have restrictions to detect the bytes,
# numeric and geometry types, so they're skipped here.
df = df.drop(columns=["bytes_col", "numeric_col", "geography_col"])
scalars_df = scalars_df.drop(columns=["bytes_col", "numeric_col", "geography_col"])
scalars_df = scalars_df.drop(
columns=["bytes_col", "numeric_col", "geography_col", "duration_col"]
)

# pandas read_json does not respect the dtype overrides for these columns
df = df.drop(columns=["date_col", "datetime_col", "time_col"])
scalars_df = scalars_df.drop(columns=["date_col", "datetime_col", "time_col"])
scalars_df = scalars_df.drop(
columns=["date_col", "datetime_col", "time_col", "duration_col"]
)

assert df.shape[0] == scalars_df.shape[0]
pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)
Expand Down