From 59dfd38881467b0a1ad4655abd014afdb168ee6c Mon Sep 17 00:00:00 2001
From: Arwa <arwas@google.com>
Date: Mon, 21 Oct 2024 16:01:57 -0500
Subject: [PATCH 1/2] feat: show possible correct key(s) in .__getitem__
 KeyError message

---
 bigframes/core/groupby/__init__.py | 28 ++++++++++++++++++++--
 tests/system/small/test_groupby.py | 37 +++++++++++++++++-------------
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 2d351cf82d..1b74427f1d 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -19,6 +19,7 @@
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
+import jellyfish
 import pandas as pd
 
 from bigframes.core import log_adapter
@@ -91,8 +92,31 @@ def __getitem__(
 
         bad_keys = [key for key in keys if key not in self._block.column_labels]
 
-        if len(bad_keys) > 0:
-            raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
+        # Raise a KeyError message with the possible correct key(s)
+        if len(bad_keys) == 1:
+            possible_key = min(
+                self._block.column_labels,
+                key=lambda item: jellyfish.damerau_levenshtein_distance(
+                    bad_keys[0], item
+                ),
+            )
+            raise KeyError(
+                f"Columns not found: {str(bad_keys)[1:-1]}. Did you mean '{str(possible_key)}'?"
+            )
+        if len(bad_keys) > 1:
+            possible_key = []
+            for bad_key in bad_keys:
+                possible_key.append(
+                    min(
+                        self._block.column_labels,
+                        key=lambda item: jellyfish.damerau_levenshtein_distance(
+                            bad_key, item
+                        ),
+                    )
+                )
+            raise KeyError(
+                f"Columns not found: {str(bad_keys)[1:-1]}. Did you mean {str(possible_key)[1:-1]}?"
+            )
 
         columns = [
             col_id for col_id, label in self._col_id_labels.items() if label in keys
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index 2d5ae21bb4..cbf6e1269d 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -426,24 +426,12 @@ def test_dataframe_groupby_getitem_error(
     scalars_pandas_df_index,
 ):
     col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
-    with pytest.raises(KeyError, match="\"Columns not found: 'not_in_group'\""):
-        (
-            scalars_df_index[col_names]
-            .groupby("string_col")["not_in_group"]
-            .min()
-            .to_pandas()
-        )
-
-
-def test_dataframe_groupby_getitem_multiple_columns_error(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
-    with pytest.raises(KeyError, match="\"Columns not found: 'col1', 'col2'\""):
+    with pytest.raises(
+        KeyError, match=r"Columns not found: 'not_in_group'. Did you mean 'string_col'?"
+    ):
         (
             scalars_df_index[col_names]
-            .groupby("string_col")["col1", "col2"]
+            .groupby("bool_col")["not_in_group"]
             .min()
             .to_pandas()
         )
@@ -464,6 +452,23 @@ def test_dataframe_groupby_getitem_list(
     pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
 
 
+def test_dataframe_groupby_getitem_list_error(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
+    with pytest.raises(
+        KeyError,
+        match=r"Columns not found: 'col1', 'float'. Did you mean 'bool_col', 'float64_col'?",
+    ):
+        (
+            scalars_df_index[col_names]
+            .groupby("string_col")["col1", "float"]
+            .min()
+            .to_pandas()
+        )
+
+
 def test_dataframe_groupby_nonnumeric_with_mean():
     df = pd.DataFrame(
         {

From 24af2d89b4e5860289c5cafa397f8b15ec937409 Mon Sep 17 00:00:00 2001
From: Arwa <arwas@google.com>
Date: Wed, 23 Oct 2024 10:52:36 -0500
Subject: [PATCH 2/2] Keep one if statment

---
 bigframes/core/groupby/__init__.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 1b74427f1d..dfbe2ddea2 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -93,17 +93,7 @@ def __getitem__(
         bad_keys = [key for key in keys if key not in self._block.column_labels]
 
         # Raise a KeyError message with the possible correct key(s)
-        if len(bad_keys) == 1:
-            possible_key = min(
-                self._block.column_labels,
-                key=lambda item: jellyfish.damerau_levenshtein_distance(
-                    bad_keys[0], item
-                ),
-            )
-            raise KeyError(
-                f"Columns not found: {str(bad_keys)[1:-1]}. Did you mean '{str(possible_key)}'?"
-            )
-        if len(bad_keys) > 1:
+        if len(bad_keys) > 0:
             possible_key = []
             for bad_key in bad_keys:
                 possible_key.append(