Skip to content

Commit 88ac48c

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Support RagFileTransformationConfig in upload_file function in V1 sdk
PiperOrigin-RevId: 700756555
1 parent c23c62d commit 88ac48c

File tree

3 files changed

+79
-6
lines changed

3 files changed

+79
-6
lines changed

tests/unit/vertex_rag/test_rag_constants.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,15 +150,31 @@
150150
TEST_RAG_CORPUS_RESOURCE_NAME + f"/ragFiles/{TEST_RAG_FILE_ID}"
151151
)
152152
TEST_UPLOAD_RAG_FILE_RESPONSE_CONTENT = ""
153+
TEST_CHUNK_SIZE = 512
154+
TEST_CHUNK_OVERLAP = 100
153155
TEST_RAG_FILE_JSON = {
154156
"ragFile": {
155157
"name": TEST_RAG_FILE_RESOURCE_NAME,
156158
"displayName": TEST_FILE_DISPLAY_NAME,
157159
}
158160
}
161+
TEST_RAG_FILE_JSON_WITH_UPLOAD_CONFIG = {
162+
"ragFile": {
163+
"name": TEST_RAG_FILE_RESOURCE_NAME,
164+
"displayName": TEST_FILE_DISPLAY_NAME,
165+
},
166+
"rag_file_transformation_config": {
167+
"rag_file_transformation_config": {
168+
"rag_file_chunking_config": {
169+
"fixed_length_chunking": {
170+
"chunk_size": TEST_CHUNK_SIZE,
171+
"chunk_overlap": TEST_CHUNK_OVERLAP,
172+
}
173+
}
174+
}
175+
},
176+
}
159177
TEST_RAG_FILE_JSON_ERROR = {"error": {"code": 13}}
160-
TEST_CHUNK_SIZE = 512
161-
TEST_CHUNK_OVERLAP = 100
162178
TEST_RAG_FILE_TRANSFORMATION_CONFIG = RagFileTransformationConfig(
163179
rag_file_chunking_config=RagFileChunkingConfig(
164180
fixed_length_chunking=RagFileChunkingConfig.FixedLengthChunking(

tests/unit/vertex_rag/test_rag_data.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,15 @@ def upload_file_mock(authorized_session_mock):
224224
yield mock_post
225225

226226

227+
@pytest.fixture
228+
def upload_file_with_upload_config_mock(authorized_session_mock):
229+
with patch.object(authorized_session_mock, "post") as mock_post:
230+
mock_post.return_value = MockResponse(
231+
test_rag_constants.TEST_RAG_FILE_JSON_WITH_UPLOAD_CONFIG, 200
232+
)
233+
yield mock_post
234+
235+
227236
@pytest.fixture
228237
def upload_file_not_found_mock(authorized_session_mock):
229238
with patch.object(authorized_session_mock, "post") as mock_post:
@@ -571,6 +580,29 @@ def test_upload_file_success(
571580

572581
rag_file_eq(rag_file, test_rag_constants.TEST_RAG_FILE)
573582

583+
@pytest.mark.usefixtures("open_file_mock")
584+
def test_upload_file_success_with_transformation_config(
585+
self,
586+
upload_file_with_upload_config_mock,
587+
):
588+
aiplatform.init(
589+
project=test_rag_constants.TEST_PROJECT,
590+
location=test_rag_constants.TEST_REGION,
591+
)
592+
rag_file = rag.upload_file(
593+
corpus_name=test_rag_constants.TEST_RAG_CORPUS_RESOURCE_NAME,
594+
path=test_rag_constants.TEST_PATH,
595+
display_name=test_rag_constants.TEST_FILE_DISPLAY_NAME,
596+
transformation_config=create_transformation_config(),
597+
)
598+
599+
upload_file_with_upload_config_mock.assert_called_once()
600+
_, mock_kwargs = upload_file_with_upload_config_mock.call_args
601+
assert mock_kwargs["url"] == test_rag_constants.TEST_UPLOAD_REQUEST_URI
602+
assert mock_kwargs["headers"] == test_rag_constants.TEST_HEADERS
603+
604+
rag_file_eq(rag_file, test_rag_constants.TEST_RAG_FILE)
605+
574606
@pytest.mark.usefixtures("rag_data_client_mock_exception", "open_file_mock")
575607
def test_upload_file_failure(self):
576608
with pytest.raises(RuntimeError) as e:

vertexai/rag/rag_data.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ def upload_file(
292292
path: Union[str, Sequence[str]],
293293
display_name: Optional[str] = None,
294294
description: Optional[str] = None,
295+
transformation_config: Optional[TransformationConfig] = None,
295296
) -> RagFile:
296297
"""
297298
Synchronous file upload to an existing RagCorpus.
@@ -304,10 +305,19 @@ def upload_file(
304305
305306
vertexai.init(project="my-project")
306307
308+
// Optional.
309+
transformation_config = TransformationConfig(
310+
chunking_config=ChunkingConfig(
311+
chunk_size=1024,
312+
chunk_overlap=200,
313+
),
314+
)
315+
307316
rag_file = rag.upload_file(
308317
corpus_name="projects/my-project/locations/us-central1/ragCorpora/my-corpus-1",
309318
display_name="my_file.txt",
310319
path="usr/home/my_file.txt",
320+
transformation_config=transformation_config,
311321
)
312322
```
313323
@@ -319,6 +329,8 @@ def upload_file(
319329
"usr/home/my_file.txt".
320330
display_name: The display name of the data file.
321331
description: The description of the RagFile.
332+
transformation_config: The config for transforming the RagFile, like chunking.
333+
322334
Returns:
323335
RagFile.
324336
Raises:
@@ -337,12 +349,25 @@ def upload_file(
337349
aiplatform.constants.base.API_BASE_PATH,
338350
corpus_name,
339351
)
352+
js_rag_file = {"rag_file": {"display_name": display_name}}
353+
340354
if description:
341-
js_rag_file = {
342-
"rag_file": {"display_name": display_name, "description": description}
355+
js_rag_file["rag_file"]["description"] = description
356+
357+
if transformation_config and transformation_config.chunking_config:
358+
chunk_size = transformation_config.chunking_config.chunk_size
359+
chunk_overlap = transformation_config.chunking_config.chunk_overlap
360+
js_rag_file["upload_rag_file_config"] = {
361+
"rag_file_transformation_config": {
362+
"rag_file_chunking_config": {
363+
"fixed_length_chunking": {
364+
"chunk_size": chunk_size,
365+
"chunk_overlap": chunk_overlap,
366+
}
367+
}
368+
}
343369
}
344-
else:
345-
js_rag_file = {"rag_file": {"display_name": display_name}}
370+
346371
files = {
347372
"metadata": (None, str(js_rag_file)),
348373
"file": open(path, "rb"),

0 commit comments

Comments
 (0)