Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Set use_rcts in build_index helper used by local index build.
Recursive Text Splitter is enabled by default for cloud index builds and effects how Markdown files are chunked. It preferences chunk length being close to the given `chunk_size` over splitting by heading sections.
  • Loading branch information
tot0 committed Feb 5, 2024
commit ce69936f00a0de470a43025472d4b7fcc4e5ebb5
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

from pathlib import Path
from typing import Dict, Optional, Union
from packaging import version

import yaml # type: ignore[import]
from packaging import version

from azure.ai.resources._utils._open_ai_utils import build_open_ai_protocol
from azure.ai.resources.entities.mlindex import Index
from azure.ai.resources.operations._index_data_source import ACSSource, LocalSource
from azure.ai.resources.operations._acs_output_config import ACSOutputConfig
from azure.ai.resources._utils._open_ai_utils import build_open_ai_protocol
from azure.ai.resources.operations._index_data_source import ACSSource, LocalSource


def build_index(
Expand Down Expand Up @@ -60,6 +60,7 @@ def build_index(
splitter_args= {
'chunk_size': chunk_size,
'chunk_overlap': chunk_overlap,
'use_rcts': True
}
if max_sample_files is not None:
splitter_args["max_sample_files"] = max_sample_files
Expand Down