Skip to content

Commit c30464f

Browse files
committed
fix order_by syntax
1 parent 82407b3 commit c30464f

File tree

5 files changed

+70
-39
lines changed

5 files changed

+70
-39
lines changed

β€Žbigframes/core/compile/aggregate_compiler.pyβ€Ž

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@
3434
def compile_aggregate(
3535
aggregate: ex.Aggregation,
3636
bindings: typing.Dict[str, ibis_types.Value],
37-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
37+
order_by: typing.Sequence[ibis_types.Value] = [],
3838
) -> ibis_types.Value:
3939
if isinstance(aggregate, ex.UnaryAggregation):
4040
input = scalar_compiler.compile_expression(aggregate.arg, bindings=bindings)
41-
return compile_unary_agg(aggregate.op, input, agg_order_by=agg_order_by)
41+
return compile_unary_agg(aggregate.op, input, order_by=order_by)
4242
elif isinstance(aggregate, ex.BinaryAggregation):
4343
left = scalar_compiler.compile_expression(aggregate.left, bindings=bindings)
4444
right = scalar_compiler.compile_expression(aggregate.right, bindings=bindings)
@@ -76,7 +76,7 @@ def compile_unary_agg(
7676
op: agg_ops.WindowOp,
7777
input: ibis_types.Column,
7878
window: Optional[window_spec.WindowSpec] = None,
79-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
79+
order_by: typing.Sequence[ibis_types.Value] = [],
8080
) -> ibis_types.Value:
8181
raise ValueError(f"Can't compile unrecognized operation: {op}")
8282

@@ -87,7 +87,7 @@ def constrained_op(
8787
op,
8888
column: ibis_types.Column,
8989
window=None,
90-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
90+
order_by: typing.Sequence[ibis_types.Value] = [],
9191
):
9292
if column.type().is_boolean():
9393
column = typing.cast(
@@ -112,7 +112,7 @@ def _(
112112
op: agg_ops.SumOp,
113113
column: ibis_types.NumericColumn,
114114
window=None,
115-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
115+
order_by: typing.Sequence[ibis_types.Value] = [],
116116
) -> ibis_types.NumericValue:
117117
# Will be null if all inputs are null. Pandas defaults to zero sum though.
118118
bq_sum = _apply_window_if_present(column.sum(), window)
@@ -127,7 +127,7 @@ def _(
127127
op: agg_ops.MedianOp,
128128
column: ibis_types.NumericColumn,
129129
window=None,
130-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
130+
order_by: typing.Sequence[ibis_types.Value] = [],
131131
) -> ibis_types.NumericValue:
132132
# PERCENTILE_CONT has very few allowed windows. For example, "window
133133
# framing clause is not allowed for analytic function percentile_cont".
@@ -148,7 +148,7 @@ def _(
148148
op: agg_ops.ApproxQuartilesOp,
149149
column: ibis_types.NumericColumn,
150150
window=None,
151-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
151+
order_by: typing.Sequence[ibis_types.Value] = [],
152152
) -> ibis_types.NumericValue:
153153
# PERCENTILE_CONT has very few allowed windows. For example, "window
154154
# framing clause is not allowed for analytic function percentile_cont".
@@ -168,7 +168,7 @@ def _(
168168
op: agg_ops.QuantileOp,
169169
column: ibis_types.NumericColumn,
170170
window=None,
171-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
171+
order_by: typing.Sequence[ibis_types.Value] = [],
172172
) -> ibis_types.NumericValue:
173173
return _apply_window_if_present(column.quantile(op.q), window)
174174

@@ -179,7 +179,7 @@ def _(
179179
op: agg_ops.MeanOp,
180180
column: ibis_types.NumericColumn,
181181
window=None,
182-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
182+
order_by: typing.Sequence[ibis_types.Value] = [],
183183
) -> ibis_types.NumericValue:
184184
return _apply_window_if_present(column.mean(), window)
185185

@@ -190,7 +190,7 @@ def _(
190190
op: agg_ops.ProductOp,
191191
column: ibis_types.NumericColumn,
192192
window=None,
193-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
193+
order_by: typing.Sequence[ibis_types.Value] = [],
194194
) -> ibis_types.NumericValue:
195195
# Need to short-circuit as log with zeroes is illegal sql
196196
is_zero = cast(ibis_types.BooleanColumn, (column == 0))
@@ -229,7 +229,7 @@ def _(
229229
op: agg_ops.MaxOp,
230230
column: ibis_types.Column,
231231
window=None,
232-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
232+
order_by: typing.Sequence[ibis_types.Value] = [],
233233
) -> ibis_types.Value:
234234
return _apply_window_if_present(column.max(), window)
235235

@@ -239,7 +239,7 @@ def _(
239239
op: agg_ops.MinOp,
240240
column: ibis_types.Column,
241241
window=None,
242-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
242+
order_by: typing.Sequence[ibis_types.Value] = [],
243243
) -> ibis_types.Value:
244244
return _apply_window_if_present(column.min(), window)
245245

@@ -250,7 +250,7 @@ def _(
250250
op: agg_ops.StdOp,
251251
x: ibis_types.Column,
252252
window=None,
253-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
253+
order_by: typing.Sequence[ibis_types.Value] = [],
254254
) -> ibis_types.Value:
255255
return _apply_window_if_present(cast(ibis_types.NumericColumn, x).std(), window)
256256

@@ -261,7 +261,7 @@ def _(
261261
op: agg_ops.VarOp,
262262
x: ibis_types.Column,
263263
window=None,
264-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
264+
order_by: typing.Sequence[ibis_types.Value] = [],
265265
) -> ibis_types.Value:
266266
return _apply_window_if_present(cast(ibis_types.NumericColumn, x).var(), window)
267267

@@ -272,7 +272,7 @@ def _(
272272
op: agg_ops.PopVarOp,
273273
x: ibis_types.Column,
274274
window=None,
275-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
275+
order_by: typing.Sequence[ibis_types.Value] = [],
276276
) -> ibis_types.Value:
277277
return _apply_window_if_present(
278278
cast(ibis_types.NumericColumn, x).var(how="pop"), window
@@ -284,7 +284,7 @@ def _(
284284
op: agg_ops.CountOp,
285285
column: ibis_types.Column,
286286
window=None,
287-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
287+
order_by: typing.Sequence[ibis_types.Value] = [],
288288
) -> ibis_types.IntegerValue:
289289
return _apply_window_if_present(column.count(), window)
290290

@@ -294,7 +294,7 @@ def _(
294294
op: agg_ops.ArrayAggOp,
295295
column: ibis_types.Column,
296296
window=None,
297-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
297+
order_by: typing.Sequence[ibis_types.Value] = [],
298298
) -> ibis_types.ArrayValue:
299299
# BigQuery doesn't currently support using ARRAY_AGG with both window and aggregate
300300
# functions simultaneously. Some aggregate functions (or its equivalent syntax)
@@ -310,7 +310,7 @@ def _(
310310

311311
return vendored_ibis_ops.ArrayAggregate(
312312
column,
313-
order_by_columns=agg_order_by,
313+
order_by=order_by,
314314
).to_expr()
315315

316316

@@ -319,7 +319,7 @@ def _(
319319
op: agg_ops.CutOp,
320320
x: ibis_types.Column,
321321
window=None,
322-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
322+
order_by: typing.Sequence[ibis_types.Value] = [],
323323
):
324324
out = ibis.case()
325325
if isinstance(op.bins, int):
@@ -376,7 +376,7 @@ def _(
376376
self: agg_ops.QcutOp,
377377
column: ibis_types.Column,
378378
window=None,
379-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
379+
order_by: typing.Sequence[ibis_types.Value] = [],
380380
) -> ibis_types.IntegerValue:
381381
if isinstance(self.quantiles, int):
382382
quantiles_ibis = dtypes.literal_to_ibis_scalar(self.quantiles)
@@ -409,7 +409,7 @@ def _(
409409
op: agg_ops.NuniqueOp,
410410
column: ibis_types.Column,
411411
window=None,
412-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
412+
order_by: typing.Sequence[ibis_types.Value] = [],
413413
) -> ibis_types.IntegerValue:
414414
return _apply_window_if_present(column.nunique(), window)
415415

@@ -419,7 +419,7 @@ def _(
419419
op: agg_ops.AnyValueOp,
420420
column: ibis_types.Column,
421421
window=None,
422-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
422+
order_by: typing.Sequence[ibis_types.Value] = [],
423423
) -> ibis_types.IntegerValue:
424424
return _apply_window_if_present(column.arbitrary(), window)
425425

@@ -429,7 +429,7 @@ def _(
429429
op: agg_ops.RankOp,
430430
column: ibis_types.Column,
431431
window=None,
432-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
432+
order_by: typing.Sequence[ibis_types.Value] = [],
433433
) -> ibis_types.IntegerValue:
434434
# Ibis produces 0-based ranks, while pandas creates 1-based ranks
435435
return _apply_window_if_present(ibis.rank(), window) + 1
@@ -440,7 +440,7 @@ def _(
440440
op: agg_ops.DenseRankOp,
441441
column: ibis_types.Column,
442442
window=None,
443-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
443+
order_by: typing.Sequence[ibis_types.Value] = [],
444444
) -> ibis_types.IntegerValue:
445445
# Ibis produces 0-based ranks, while pandas creates 1-based ranks
446446
return _apply_window_if_present(column.dense_rank(), window) + 1
@@ -456,7 +456,7 @@ def _(
456456
op: agg_ops.FirstNonNullOp,
457457
column: ibis_types.Column,
458458
window=None,
459-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
459+
order_by: typing.Sequence[ibis_types.Value] = [],
460460
) -> ibis_types.Value:
461461
return _apply_window_if_present(
462462
vendored_ibis_ops.FirstNonNullValue(column).to_expr(), window # type: ignore
@@ -468,7 +468,7 @@ def _(
468468
op: agg_ops.LastOp,
469469
column: ibis_types.Column,
470470
window=None,
471-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
471+
order_by: typing.Sequence[ibis_types.Value] = [],
472472
) -> ibis_types.Value:
473473
return _apply_window_if_present(column.last(), window)
474474

@@ -478,7 +478,7 @@ def _(
478478
op: agg_ops.LastNonNullOp,
479479
column: ibis_types.Column,
480480
window=None,
481-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
481+
order_by: typing.Sequence[ibis_types.Value] = [],
482482
) -> ibis_types.Value:
483483
return _apply_window_if_present(
484484
vendored_ibis_ops.LastNonNullValue(column).to_expr(), window # type: ignore
@@ -490,7 +490,7 @@ def _(
490490
op: agg_ops.ShiftOp,
491491
column: ibis_types.Column,
492492
window=None,
493-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
493+
order_by: typing.Sequence[ibis_types.Value] = [],
494494
) -> ibis_types.Value:
495495
if op.periods == 0: # No-op
496496
return column
@@ -504,7 +504,7 @@ def _(
504504
op: agg_ops.DiffOp,
505505
column: ibis_types.Column,
506506
window=None,
507-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
507+
order_by: typing.Sequence[ibis_types.Value] = [],
508508
) -> ibis_types.Value:
509509
shifted = compile_unary_agg(agg_ops.ShiftOp(op.periods), column, window)
510510
if column.type().is_boolean():
@@ -524,7 +524,7 @@ def _(
524524
op: agg_ops.AllOp,
525525
column: ibis_types.Column,
526526
window=None,
527-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
527+
order_by: typing.Sequence[ibis_types.Value] = [],
528528
) -> ibis_types.BooleanValue:
529529
# BQ will return null for empty column, result would be true in pandas.
530530
result = _is_true(column).all()
@@ -539,7 +539,7 @@ def _(
539539
op: agg_ops.AnyOp,
540540
column: ibis_types.Column,
541541
window=None,
542-
agg_order_by: typing.Sequence[ibis_types.Value] = [],
542+
order_by: typing.Sequence[ibis_types.Value] = [],
543543
) -> ibis_types.BooleanValue:
544544
# BQ will return null for empty column, result would be false in pandas.
545545
result = _is_true(column).any()

β€Žbigframes/core/compile/compiled.pyβ€Ž

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -608,10 +608,9 @@ def aggregate(
608608
"""
609609
table = self._to_ibis_expr(ordering_mode="unordered", expose_hidden_cols=True)
610610
bindings = {col: table[col] for col in self.column_ids}
611-
agg_order_by = [table[col] for col in self._hidden_ordering_column_names]
612611
stats = {
613612
col_out: agg_compiler.compile_aggregate(
614-
aggregate, bindings, agg_order_by=agg_order_by
613+
aggregate, bindings, order_by=self._ibis_order
615614
)
616615
for aggregate, col_out in aggregations
617616
}

β€Žtests/system/small/bigquery/test_array.pyβ€Ž

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,40 @@ def test_array_agg_w_dataframe():
8383
expected.to_pandas(),
8484
)
8585

86+
87+
@pytest.mark.parametrize(
88+
("ascending", "expected_b", "expected_c"),
89+
[
90+
pytest.param(
91+
True, [["a", "b"], ["e", "d", "c"]], [[4, 5], [1, 2, 3]], id="asc"
92+
),
93+
pytest.param(
94+
False, [["b", "a"], ["c", "d", "e"]], [[5, 4], [3, 2, 1]], id="des"
95+
),
96+
],
97+
)
98+
def test_array_agg_reserve_order(ascending, expected_b, expected_c):
99+
data = {
100+
"a": [1, 1, 2, 2, 2],
101+
"b": ["a", "b", "c", "d", "e"],
102+
"c": [4, 5, 3, 2, 1],
103+
}
104+
df = bpd.DataFrame(data)
105+
106+
result = bbq.array_agg(df.sort_values("c", ascending=ascending).groupby(by=["a"]))
107+
expected_data = {
108+
"a": [1, 2],
109+
"b": expected_b,
110+
"c": expected_c,
111+
}
112+
expected = bpd.DataFrame(expected_data).set_index("a")
113+
114+
pd.testing.assert_frame_equal(
115+
result.to_pandas(),
116+
expected.to_pandas(),
117+
)
118+
119+
86120
def assert_array_agg_matches_after_explode():
87121
data = {
88122
"index": np.arange(10),

β€Žthird_party/bigframes_vendored/ibis/backends/bigquery/registry.pyβ€Ž

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,9 @@ def _array_aggregate(translator, op: vendored_ibis_ops.ArrayAggregate):
4242
arg = translator.translate(op.arg)
4343

4444
order_by_sql = ""
45-
if len(op.order_by_columns) > 0:
46-
order_by_columns = ", ".join(
47-
[translator.translate(column) for column in op.order_by_columns]
48-
)
49-
order_by_sql = f"ORDER BY {order_by_columns}"
45+
if len(op.order_by) > 0:
46+
order_by = ", ".join([translator.translate(column) for column in op.order_by])
47+
order_by_sql = f"ORDER BY {order_by}"
5048

5149
return f"ARRAY_AGG({arg} IGNORE NULLS {order_by_sql})"
5250

β€Žthird_party/bigframes_vendored/ibis/expr/operations/reductions.pyβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class ArrayAggregate(Filterable, Reduction):
2727
"""
2828

2929
arg: ibis_ops_core.Column
30-
order_by_columns: VarTuple[ibis_ops_core.Value] = ()
30+
order_by: VarTuple[ibis_ops_core.Value] = ()
3131

3232
@ibis_annotations.attribute
3333
def dtype(self):

0 commit comments

Comments
 (0)