14
14
15
15
import numpy as np
16
16
import pandas as pd
17
+ import pytest
17
18
18
19
import bigframes .bigquery as bbq
19
20
import bigframes .pandas as bpd
@@ -23,10 +24,76 @@ def test_array_length():
23
24
series = bpd .Series ([["A" , "AA" , "AAA" ], ["BB" , "B" ], np .nan , [], ["C" ]])
24
25
# TODO(b/336880368): Allow for NULL values to be input for ARRAY columns.
25
26
# Once we actually store NULL values, this will be NULL where the input is NULL.
26
- expected = pd .Series ([3 , 2 , 0 , 0 , 1 ])
27
+ expected = bpd .Series ([3 , 2 , 0 , 0 , 1 ])
27
28
pd .testing .assert_series_equal (
28
29
bbq .array_length (series ).to_pandas (),
29
- expected ,
30
- check_dtype = False ,
31
- check_index_type = False ,
30
+ expected .to_pandas (),
31
+ )
32
+
33
+
34
+ @pytest .mark .parametrize (
35
+ ("input_data" , "output_data" ),
36
+ [
37
+ pytest .param ([1 , 2 , 3 , 4 , 5 ], [[1 , 2 ], [3 , 4 ], [5 ]], id = "ints" ),
38
+ pytest .param (
39
+ ["e" , "d" , "c" , "b" , "a" ],
40
+ [["e" , "d" ], ["c" , "b" ], ["a" ]],
41
+ id = "reverse_strings" ,
42
+ ),
43
+ pytest .param (
44
+ [1.0 , 2.0 , np .nan , np .nan , np .nan ], [[1.0 , 2.0 ], [], []], id = "nans"
45
+ ),
46
+ pytest .param (
47
+ [{"A" : {"x" : 1.0 }}, {"A" : {"z" : 4.0 }}, {}, {"B" : "b" }, np .nan ],
48
+ [[{"A" : {"x" : 1.0 }}, {"A" : {"z" : 4.0 }}], [{}, {"B" : "b" }], []],
49
+ id = "structs" ,
50
+ ),
51
+ ],
52
+ )
53
+ def test_array_agg_w_series (input_data , output_data ):
54
+ input_index = ["a" , "a" , "b" , "b" , "c" ]
55
+ series = bpd .Series (input_data , index = input_index )
56
+ result = bbq .array_agg (series .groupby (level = 0 ))
57
+
58
+ expected = bpd .Series (output_data , index = ["a" , "b" , "c" ])
59
+ pd .testing .assert_series_equal (
60
+ result .to_pandas (),
61
+ expected .to_pandas (),
62
+ )
63
+
64
+
65
+ def test_array_agg_w_dataframe ():
66
+ data = {
67
+ "a" : [1 , 1 , 2 , 1 ],
68
+ "b" : [2 , None , 1 , 2 ],
69
+ "c" : [3 , 4 , 3 , 2 ],
70
+ }
71
+ df = bpd .DataFrame (data )
72
+ result = bbq .array_agg (df .groupby (by = ["b" ]))
73
+
74
+ expected_data = {
75
+ "b" : [1.0 , 2.0 ],
76
+ "a" : [[2 ], [1 , 1 ]],
77
+ "c" : [[3 ], [3 , 2 ]],
78
+ }
79
+ expected = bpd .DataFrame (expected_data ).set_index ("b" )
80
+
81
+ pd .testing .assert_frame_equal (
82
+ result .to_pandas (),
83
+ expected .to_pandas (),
84
+ )
85
+
86
+ def assert_array_agg_matches_after_explode ():
87
+ data = {
88
+ "index" : np .arange (10 ),
89
+ "a" : [np .random .randint (0 , 10 , 10 ) for _ in range (10 )],
90
+ "b" : [np .random .randint (0 , 10 , 10 ) for _ in range (10 )],
91
+ }
92
+ df = bpd .DataFrame (data ).set_index ("index" )
93
+ result = bbq .array_agg (df .explode (["a" , "b" ]).groupby (level = 0 ))
94
+ result .index .name = "index"
95
+
96
+ pd .testing .assert_frame_equal (
97
+ result .to_pandas (),
98
+ df .to_pandas (),
32
99
)
0 commit comments