NVIDIA · mzient · Apr 9, 2026 · Apr 8, 2026
diff --git a/dali/operators/decoder/inflate/inflate.h b/dali/operators/decoder/inflate/inflate.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -38,6 +38,7 @@ class InflateOpImplBase : public OpImplBase<Backend> {
         IsFloatingPoint(dtype_) || IsIntegral(dtype_),
         make_string("The inflate output type must have floating point or integral type, got `",
                     dtype_, "` instead."));
+    element_type_ = TypeTable::GetTypeInfo(dtype_);
   }
 
   bool SetupImpl(std::vector<OutputDesc> &output_desc, const Workspace &ws) override {
@@ -62,6 +63,7 @@ class InflateOpImplBase : public OpImplBase<Backend> {
 
  protected:
   DALIDataType dtype_;
+  TypeInfo element_type_;
   inflate::ShapeParams<Backend> params_;
 };
 

diff --git a/dali/operators/decoder/inflate/inflate_gpu.cc b/dali/operators/decoder/inflate/inflate_gpu.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -58,9 +58,12 @@ class InflateOpGpuLZ4Impl : public InflateOpImplBase<GPUBackend> {
         stream, params_.GetInChunkSizes(), input_ptrs_, inflated_sizes_, inflated_ptrs_);
 
     size_t tempSize;
-    CUDA_CALL(nvcompBatchedLZ4DecompressGetTempSizeAsync(total_chunks_num,
-                                                         params_.GetMaxOutChunkVol(),
-                                                         {}, &tempSize, params_.GetMaxOutVol()));
+    CUDA_CALL(nvcompBatchedLZ4DecompressGetTempSizeAsync(
+        total_chunks_num,
+        params_.GetMaxOutChunkVol() * element_type_.size(),
+        {},
+        &tempSize,
+        params_.GetMaxOutVol() * element_type_.size()));
 
     void *temp = scratchpad.AllocateGPU<uint8_t>(tempSize);
     nvcompStatus_t *device_statuses = scratchpad.AllocateGPU<nvcompStatus_t>(total_chunks_num);

diff --git a/dali/operators/decoder/inflate/inflate_params.h b/dali/operators/decoder/inflate/inflate_params.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022, 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -354,6 +354,7 @@ class ShapeParams {
     int sample_dim = provided_shape[0].num_elements();
     TensorListShape<> shape(num_samples, sample_dim);
     max_output_vol_ = 0;
+    max_output_sample_vol_ = 0;
     for (int sample_idx = 0; sample_idx < provided_shape.num_samples(); sample_idx++) {
       const int *data = provided_shape.tensor_data(sample_idx);
       for (int d = 0; d < sample_dim; d++) {