@@ -566,10 +566,15 @@ def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_ind
566
566
"species" ,
567
567
),
568
568
(
569
- "scale " ,
569
+ "standard_scale " ,
570
570
preprocessing .StandardScaler (),
571
571
["culmen_length_mm" , "flipper_length_mm" ],
572
572
),
573
+ (
574
+ "max_abs_scale" ,
575
+ preprocessing .MaxAbsScaler (),
576
+ ["culmen_length_mm" , "flipper_length_mm" ],
577
+ ),
573
578
(
574
579
"label" ,
575
580
preprocessing .LabelEncoder (),
@@ -637,6 +642,11 @@ def test_pipeline_columntransformer_to_gbq(penguins_df_default_index, dataset_id
637
642
preprocessing .StandardScaler (),
638
643
["culmen_length_mm" , "flipper_length_mm" ],
639
644
),
645
+ (
646
+ "max_abs_scale" ,
647
+ preprocessing .MaxAbsScaler (),
648
+ ["culmen_length_mm" , "flipper_length_mm" ],
649
+ ),
640
650
(
641
651
"label" ,
642
652
preprocessing .LabelEncoder (),
@@ -660,30 +670,26 @@ def test_pipeline_columntransformer_to_gbq(penguins_df_default_index, dataset_id
660
670
661
671
assert isinstance (pl_loaded ._transform , compose .ColumnTransformer )
662
672
transformers = pl_loaded ._transform .transformers_
663
- assert len (transformers ) == 4
664
-
665
- assert transformers [0 ][0 ] == "ont_hot_encoder"
666
- assert isinstance (transformers [0 ][1 ], preprocessing .OneHotEncoder )
667
- one_hot_encoder = transformers [0 ][1 ]
668
- assert one_hot_encoder .drop == "most_frequent"
669
- assert one_hot_encoder .min_frequency == 5
670
- assert one_hot_encoder .max_categories == 100
671
- assert transformers [0 ][2 ] == "species"
672
-
673
- assert transformers [1 ][0 ] == "label_encoder"
674
- assert isinstance (transformers [1 ][1 ], preprocessing .LabelEncoder )
675
- one_hot_encoder = transformers [1 ][1 ]
676
- assert one_hot_encoder .min_frequency == 0
677
- assert one_hot_encoder .max_categories == 1000001
678
- assert transformers [1 ][2 ] == "species"
679
-
680
- assert transformers [2 ][0 ] == "standard_scaler"
681
- assert isinstance (transformers [2 ][1 ], preprocessing .StandardScaler )
682
- assert transformers [2 ][2 ] == "culmen_length_mm"
673
+ expected = [
674
+ (
675
+ "ont_hot_encoder" ,
676
+ preprocessing .OneHotEncoder (
677
+ drop = "most_frequent" , max_categories = 100 , min_frequency = 5
678
+ ),
679
+ "species" ,
680
+ ),
681
+ (
682
+ "label_encoder" ,
683
+ preprocessing .LabelEncoder (max_categories = 1000001 , min_frequency = 0 ),
684
+ "species" ,
685
+ ),
686
+ ("standard_scaler" , preprocessing .StandardScaler (), "culmen_length_mm" ),
687
+ ("max_abs_encoder" , preprocessing .MaxAbsScaler (), "culmen_length_mm" ),
688
+ ("standard_scaler" , preprocessing .StandardScaler (), "flipper_length_mm" ),
689
+ ("max_abs_encoder" , preprocessing .MaxAbsScaler (), "flipper_length_mm" ),
690
+ ]
683
691
684
- assert transformers [3 ][0 ] == "standard_scaler"
685
- assert isinstance (transformers [2 ][1 ], preprocessing .StandardScaler )
686
- assert transformers [3 ][2 ] == "flipper_length_mm"
692
+ assert transformers == expected
687
693
688
694
assert isinstance (pl_loaded ._estimator , linear_model .LinearRegression )
689
695
assert pl_loaded ._estimator .fit_intercept is False
@@ -717,6 +723,34 @@ def test_pipeline_standard_scaler_to_gbq(penguins_df_default_index, dataset_id):
717
723
assert pl_loaded ._estimator .fit_intercept is False
718
724
719
725
726
+ def test_pipeline_max_abs_scaler_to_gbq (penguins_df_default_index , dataset_id ):
727
+ pl = pipeline .Pipeline (
728
+ [
729
+ ("transform" , preprocessing .MaxAbsScaler ()),
730
+ ("estimator" , linear_model .LinearRegression (fit_intercept = False )),
731
+ ]
732
+ )
733
+
734
+ df = penguins_df_default_index .dropna ()
735
+ X_train = df [
736
+ [
737
+ "culmen_length_mm" ,
738
+ "culmen_depth_mm" ,
739
+ "flipper_length_mm" ,
740
+ ]
741
+ ]
742
+ y_train = df [["body_mass_g" ]]
743
+ pl .fit (X_train , y_train )
744
+
745
+ pl_loaded = pl .to_gbq (
746
+ f"{ dataset_id } .test_penguins_pipeline_standard_scaler" , replace = True
747
+ )
748
+ assert isinstance (pl_loaded ._transform , preprocessing .MaxAbsScaler )
749
+
750
+ assert isinstance (pl_loaded ._estimator , linear_model .LinearRegression )
751
+ assert pl_loaded ._estimator .fit_intercept is False
752
+
753
+
720
754
def test_pipeline_one_hot_encoder_to_gbq (penguins_df_default_index , dataset_id ):
721
755
pl = pipeline .Pipeline (
722
756
[
0 commit comments