Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion dali/operators/decoder/inflate/inflate.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,6 +38,7 @@ class InflateOpImplBase : public OpImplBase<Backend> {
IsFloatingPoint(dtype_) || IsIntegral(dtype_),
make_string("The inflate output type must have floating point or integral type, got `",
dtype_, "` instead."));
element_type_ = TypeTable::GetTypeInfo(dtype_);
}

bool SetupImpl(std::vector<OutputDesc> &output_desc, const Workspace &ws) override {
Expand All @@ -62,6 +63,7 @@ class InflateOpImplBase : public OpImplBase<Backend> {

protected:
DALIDataType dtype_;
TypeInfo element_type_;
inflate::ShapeParams<Backend> params_;
};

Expand Down
11 changes: 7 additions & 4 deletions dali/operators/decoder/inflate/inflate_gpu.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -58,9 +58,12 @@ class InflateOpGpuLZ4Impl : public InflateOpImplBase<GPUBackend> {
stream, params_.GetInChunkSizes(), input_ptrs_, inflated_sizes_, inflated_ptrs_);

size_t tempSize;
CUDA_CALL(nvcompBatchedLZ4DecompressGetTempSizeAsync(total_chunks_num,
params_.GetMaxOutChunkVol(),
{}, &tempSize, params_.GetMaxOutVol()));
CUDA_CALL(nvcompBatchedLZ4DecompressGetTempSizeAsync(
total_chunks_num,
params_.GetMaxOutChunkVol() * element_type_.size(),
{},
&tempSize,
params_.GetMaxOutVol() * element_type_.size()));

void *temp = scratchpad.AllocateGPU<uint8_t>(tempSize);
nvcompStatus_t *device_statuses = scratchpad.AllocateGPU<nvcompStatus_t>(total_chunks_num);
Expand Down
3 changes: 2 additions & 1 deletion dali/operators/decoder/inflate/inflate_params.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022, 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -354,6 +354,7 @@ class ShapeParams {
int sample_dim = provided_shape[0].num_elements();
TensorListShape<> shape(num_samples, sample_dim);
max_output_vol_ = 0;
max_output_sample_vol_ = 0;
for (int sample_idx = 0; sample_idx < provided_shape.num_samples(); sample_idx++) {
const int *data = provided_shape.tensor_data(sample_idx);
for (int d = 0; d < sample_dim; d++) {
Expand Down
Loading