Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion bigframes/core/groupby/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import bigframes_vendored.constants as constants
import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
import jellyfish
import pandas as pd

from bigframes.core import log_adapter
Expand Down Expand Up @@ -91,8 +92,21 @@ def __getitem__(

bad_keys = [key for key in keys if key not in self._block.column_labels]

# Raise a KeyError message with the possible correct key(s)
if len(bad_keys) > 0:
raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
possible_key = []
for bad_key in bad_keys:
possible_key.append(
min(
self._block.column_labels,
key=lambda item: jellyfish.damerau_levenshtein_distance(
bad_key, item
),
)
)
raise KeyError(
f"Columns not found: {str(bad_keys)[1:-1]}. Did you mean {str(possible_key)[1:-1]}?"
)

columns = [
col_id for col_id, label in self._col_id_labels.items() if label in keys
Expand Down
37 changes: 21 additions & 16 deletions tests/system/small/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,24 +426,12 @@ def test_dataframe_groupby_getitem_error(
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(KeyError, match="\"Columns not found: 'not_in_group'\""):
(
scalars_df_index[col_names]
.groupby("string_col")["not_in_group"]
.min()
.to_pandas()
)


def test_dataframe_groupby_getitem_multiple_columns_error(
scalars_df_index,
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(KeyError, match="\"Columns not found: 'col1', 'col2'\""):
with pytest.raises(
KeyError, match=r"Columns not found: 'not_in_group'. Did you mean 'string_col'?"
):
(
scalars_df_index[col_names]
.groupby("string_col")["col1", "col2"]
.groupby("bool_col")["not_in_group"]
.min()
.to_pandas()
)
Expand All @@ -464,6 +452,23 @@ def test_dataframe_groupby_getitem_list(
pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)


def test_dataframe_groupby_getitem_list_error(
scalars_df_index,
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(
KeyError,
match=r"Columns not found: 'col1', 'float'. Did you mean 'bool_col', 'float64_col'?",
):
(
scalars_df_index[col_names]
.groupby("string_col")["col1", "float"]
.min()
.to_pandas()
)


def test_dataframe_groupby_nonnumeric_with_mean():
df = pd.DataFrame(
{
Expand Down