39
39
Optional ,
40
40
Sequence ,
41
41
Tuple ,
42
- TYPE_CHECKING ,
43
42
Union ,
44
43
)
45
44
import warnings
68
67
import bigframes .core .window_spec as windows
69
68
import bigframes .dtypes
70
69
import bigframes .exceptions as bfe
71
- import bigframes .features
72
70
import bigframes .operations as ops
73
71
import bigframes .operations .aggregations as agg_ops
74
- import bigframes .session ._io .pandas as io_pandas
75
-
76
- if TYPE_CHECKING :
77
- import bigframes .session .executor
78
72
79
73
# Type constraint for wherever column labels are used
80
74
Label = typing .Hashable
@@ -221,7 +215,7 @@ def shape(self) -> typing.Tuple[int, int]:
221
215
except Exception :
222
216
pass
223
217
224
- row_count = self .session ._executor .get_row_count (self .expr )
218
+ row_count = self .session ._executor .execute (self .expr . row_count ()). to_py_scalar ( )
225
219
return (row_count , len (self .value_columns ))
226
220
227
221
@property
@@ -485,7 +479,7 @@ def to_arrow(
485
479
* ,
486
480
ordered : bool = True ,
487
481
allow_large_results : Optional [bool ] = None ,
488
- ) -> Tuple [pa .Table , bigquery .QueryJob ]:
482
+ ) -> Tuple [pa .Table , Optional [ bigquery .QueryJob ] ]:
489
483
"""Run query and download results as a pyarrow Table."""
490
484
execute_result = self .session ._executor .execute (
491
485
self .expr , ordered = ordered , use_explicit_destination = allow_large_results
@@ -580,7 +574,7 @@ def try_peek(
580
574
result = self .session ._executor .peek (
581
575
self .expr , n , use_explicit_destination = allow_large_results
582
576
)
583
- df = io_pandas . arrow_to_pandas ( result .to_arrow_table (), self . expr . schema )
577
+ df = result .to_pandas ( )
584
578
self ._copy_index_to_pandas (df )
585
579
return df
586
580
else :
@@ -604,8 +598,7 @@ def to_pandas_batches(
604
598
page_size = page_size ,
605
599
max_results = max_results ,
606
600
)
607
- for record_batch in execute_result .arrow_batches ():
608
- df = io_pandas .arrow_to_pandas (record_batch , self .expr .schema )
601
+ for df in execute_result .to_pandas_batches ():
609
602
self ._copy_index_to_pandas (df )
610
603
if squeeze :
611
604
yield df .squeeze (axis = 1 )
@@ -659,7 +652,7 @@ def _materialize_local(
659
652
660
653
# TODO: Maybe materialize before downsampling
661
654
# Some downsampling methods
662
- if fraction < 1 :
655
+ if fraction < 1 and ( execute_result . total_rows is not None ) :
663
656
if not sample_config .enable_downsampling :
664
657
raise RuntimeError (
665
658
f"The data size ({ table_mb :.2f} MB) exceeds the maximum download limit of "
@@ -690,9 +683,7 @@ def _materialize_local(
690
683
MaterializationOptions (ordered = materialize_options .ordered )
691
684
)
692
685
else :
693
- total_rows = execute_result .total_rows
694
- arrow = execute_result .to_arrow_table ()
695
- df = io_pandas .arrow_to_pandas (arrow , schema = self .expr .schema )
686
+ df = execute_result .to_pandas ()
696
687
self ._copy_index_to_pandas (df )
697
688
698
689
return df , execute_result .query_job
@@ -1570,12 +1561,11 @@ def retrieve_repr_request_results(
1570
1561
1571
1562
# head caches full underlying expression, so row_count will be free after
1572
1563
head_result = self .session ._executor .head (self .expr , max_results )
1573
- count = self .session ._executor .get_row_count (self .expr )
1564
+ row_count = self .session ._executor .execute (self .expr . row_count ()). to_py_scalar ( )
1574
1565
1575
- arrow = head_result .to_arrow_table ()
1576
- df = io_pandas .arrow_to_pandas (arrow , schema = self .expr .schema )
1566
+ df = head_result .to_pandas ()
1577
1567
self ._copy_index_to_pandas (df )
1578
- return df , count , head_result .query_job
1568
+ return df , row_count , head_result .query_job
1579
1569
1580
1570
def promote_offsets (self , label : Label = None ) -> typing .Tuple [Block , str ]:
1581
1571
expr , result_id = self ._expr .promote_offsets ()
0 commit comments