diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 63c4f38cc92..8deca237a9d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -39,6 +39,7 @@ /tools/perf-automation/ @mikeharder @benbp /tools/pipeline-generator/ @weshaggard @benbp /tools/pipeline-witness/ @praveenkuttappan @weshaggard +/tools/sdk-ai-bots/ @raych1 /tools/sdk-generation-pipeline/ @weshaggard @praveenkuttappan @maririos /tools/sdk-testgen/ @raych1 @tadelesh /tools/test-proxy/ @scbedd @mikeharder diff --git a/tools/sdk-ai-bots/.pipelines/build-document-embeddings.yml b/tools/sdk-ai-bots/.pipelines/build-document-embeddings.yml new file mode 100644 index 00000000000..52937f725e3 --- /dev/null +++ b/tools/sdk-ai-bots/.pipelines/build-document-embeddings.yml @@ -0,0 +1,105 @@ +trigger: none +pr: none + +schedules: +- cron: "0 0 * * *" + displayName: Daily midnight build + branches: + include: + - main + always: true + +pool: + vmImage: 'windows-latest' + +variables: + st-account-name: $(storage-account-name) + st-container-name: $(storage-account-container) + aoai-endpoint: $(azure-openai-endpoint) + asch-endpoint: $(azure-search-endpoint) + asch-index-name: $(azure-search-index-name) + aoai-embedding-model: $(azure-openai-embedding-model) + +parameters: +- name: incrementalEmbedding + displayName: 'Incremental Embedding Build?' + type: boolean + default: true + +stages: +- stage: BuildEngHubDocEmbeddings + displayName: 'Build EngHub Document Embeddings' + jobs: + - job: BuildEngHubDocumentEmbeddings + steps: + - template: setup-pipeline.yml + - checkout: git://internal/_git/azure-sdk-docs-eng.ms + displayName: 'Checkout azure-sdk-docs-eng.ms repository' + - task: Powershell@2 + inputs: + filePath: $(Build.SourcesDirectory)/azure-sdk-tools/tools/sdk-ai-bots/Scripts/Build-EngHubDocEmbeddings.ps1 + arguments: > + -IncrementalEmbedding "${{ parameters.incrementalEmbedding }}" + pwsh: true + workingDirectory: $(Build.SourcesDirectory) + displayName: 'Run embeddings build script' + env: + AZURE_OPENAI_ENDPOINT: $(aoai-endpoint) + AZURE_SEARCH_ENDPOINT: $(asch-endpoint) + AZURE_SEARCH_INDEX_NAME: $(asch-index-name) + AZURE_OPENAI_EMBEDDING_MODEL: $(aoai-embedding-model) + AZURE_OPENAI_API_KEY: $(azure-openapi-key) + AZURE_SEARCH_KEY: $(azure-search-key) + AZURE_STORAGE_ACCOUNT_KEY: $(storage-account-key) + AZURE_STORAGE_ACCOUNT_NAME: $(st-account-name) + AZURE_STORAGE_ACCOUNT_CONTAINER: $(st-container-name) + +- stage: BuildTypeSpecDocEmbeddings + displayName: 'Build TypeSpec Document Embeddings' + jobs: + - job: BuildTypeSpecDocumentEmbeddings + steps: + - template: setup-pipeline.yml + - task: Powershell@2 + inputs: + filePath: $(Build.SourcesDirectory)/tools/sdk-ai-bots/Scripts/Build-TypeSpecAzureDocEmbeddings.ps1 + arguments: > + -IncrementalEmbedding "${{ parameters.incrementalEmbedding }}" + pwsh: true + workingDirectory: $(Build.SourcesDirectory) + displayName: 'Run embeddings build script' + env: + AZURE_OPENAI_ENDPOINT: $(aoai-endpoint) + AZURE_SEARCH_ENDPOINT: $(asch-endpoint) + AZURE_SEARCH_INDEX_NAME: $(asch-index-name) + AZURE_OPENAI_EMBEDDING_MODEL: $(aoai-embedding-model) + AZURE_OPENAI_API_KEY: $(azure-openapi-key) + AZURE_SEARCH_KEY: $(azure-search-key) + AZURE_STORAGE_ACCOUNT_KEY: $(storage-account-key) + AZURE_STORAGE_ACCOUNT_NAME: $(st-account-name) + AZURE_STORAGE_ACCOUNT_CONTAINER: $(st-container-name) + +- stage: BuildCustomizedDocEmbeddings + displayName: 'Build Customized Document Embeddings' + jobs: + - job: BuildCustomizedDocumentEmbeddings + steps: + - template: setup-pipeline.yml + - task: Powershell@2 + inputs: + filePath: $(Build.SourcesDirectory)/tools/sdk-ai-bots/Scripts/Build-CustomizedDocEmbeddings.ps1 + arguments: > + -IncrementalEmbedding "${{ parameters.incrementalEmbedding }}" + pwsh: true + workingDirectory: $(Build.SourcesDirectory) + displayName: 'Run embeddings build script' + env: + AZURE_OPENAI_ENDPOINT: $(aoai-endpoint) + AZURE_SEARCH_ENDPOINT: $(asch-endpoint) + AZURE_SEARCH_INDEX_NAME: $(asch-index-name) + AZURE_OPENAI_EMBEDDING_MODEL: $(aoai-embedding-model) + AZURE_OPENAI_API_KEY: $(azure-openapi-key) + AZURE_SEARCH_KEY: $(azure-search-key) + AZURE_STORAGE_ACCOUNT_KEY: $(storage-account-key) + AZURE_STORAGE_ACCOUNT_NAME: $(st-account-name) + AZURE_STORAGE_ACCOUNT_CONTAINER: $(st-container-name) \ No newline at end of file diff --git a/tools/sdk-ai-bots/.pipelines/setup-pipeline.yml b/tools/sdk-ai-bots/.pipelines/setup-pipeline.yml new file mode 100644 index 00000000000..ebcd2c16de2 --- /dev/null +++ b/tools/sdk-ai-bots/.pipelines/setup-pipeline.yml @@ -0,0 +1,11 @@ +parameters: +- name: pythonVersion + type: string + default: '3.x' + +steps: +- task: UsePythonVersion@0 + inputs: + versionSpec: ${{ parameters.pythonVersion }} + addToPath: true +- checkout: self \ No newline at end of file diff --git a/tools/sdk-ai-bots/AzureSdkQaBot/README.md b/tools/sdk-ai-bots/AzureSdkQaBot/README.md index 96b6d1cd4c4..551b7ba18a5 100644 --- a/tools/sdk-ai-bots/AzureSdkQaBot/README.md +++ b/tools/sdk-ai-bots/AzureSdkQaBot/README.md @@ -1,5 +1,5 @@ -# Azure SDK Assistant +# Azure SDK Teams Assistant ## Summary -This is a conversational bot for Microsoft Teams that answers the question related to Azure rest api spec repo document and pull request review. +This is a conversational bot for Microsoft Teams that answers the question related to the Azure domain area. diff --git a/tools/sdk-ai-bots/Embeddings/README.md b/tools/sdk-ai-bots/Embeddings/README.md index 31eb844174c..e4940360c31 100644 --- a/tools/sdk-ai-bots/Embeddings/README.md +++ b/tools/sdk-ai-bots/Embeddings/README.md @@ -1,4 +1,23 @@ -## Get Started +## How to Run This Tool 1. Run `pip install -r requirements.txt` to install requirements. 1. Set the environment variables that are defined in `settings/settings.py` in `.env` file. -1. Run `python main.py`. \ No newline at end of file +1. Run `python main.py`. + +## Environment Variables +INCREMENTAL_EMBEDDING: the option to build embedding incrementally. +METADATA_PATH: the file path of the metadata file which contains the document URL and title. +DOCUMENT_PATH: the folder path of the document which need to build embeddings. +RAG_CHUNK_PATH: the file path of the RAG chunk file which is the last version or just the file name if it doesn't exist. + +AZURE_OPENAI_API_KEY: Azure OpenAI api key +AZURE_OPENAI_ENDPOINT: Azure OpenAI endpoint +AZURE_SEARCH_KEY: Azure search service key +AZURE_SEARCH_ENDPOINT: Azure serach service endpoint +AZURE_SEARCH_INDEX_NAME: Azure serach service index name +AZURE_OPENAI_EMBEDDING_MODEL: the deployed model name in Azure OpenAI service + +##### DO NOT CHANGE BELOW VARIABLES' VALUE +AZURESEARCH_FIELDS_CONTENT=Text +AZURESEARCH_FIELDS_CONTENT_VECTOR=Embedding +AZURESEARCH_FIELDS_TAG=AdditionalMetadata +AZURESEARCH_FIELDS_ID=Id diff --git a/tools/sdk-ai-bots/Embeddings/embedding/embedding.py b/tools/sdk-ai-bots/Embeddings/embedding/embedding.py index 1622af447b1..5dce2fbfe85 100644 --- a/tools/sdk-ai-bots/Embeddings/embedding/embedding.py +++ b/tools/sdk-ai-bots/Embeddings/embedding/embedding.py @@ -18,10 +18,6 @@ class Embedding: def __init__(self): - os.environ["AZURESEARCH_FIELDS_CONTENT"] = "Text" - os.environ["AZURESEARCH_FIELDS_CONTENT_VECTOR"] = "Embedding" - os.environ["AZURESEARCH_FIELDS_TAG"] = "AdditionalMetadata" - os.environ["AZURESEARCH_FIELDS_ID"] = "Id" embeddings: OpenAIEmbeddings = OpenAIEmbeddings( openai_api_type="azure", diff --git a/tools/sdk-ai-bots/README.md b/tools/sdk-ai-bots/README.md index a840efbb4c6..ed77b3a2b7f 100644 --- a/tools/sdk-ai-bots/README.md +++ b/tools/sdk-ai-bots/README.md @@ -1,2 +1,37 @@ -# sdk-ai-bots +## Overview This folder contains a collection of tools that utilize AI techniques. + +#### AzureSdkQaBot +AzureSdkQaBot is a Teams bot which can answer the questions related to the Azure SDK domain. It is written in C#. + +#### Embeddings +It is a tool written in Python that uses `langchain` library to create embeddings in Azure Search Service. + +#### Scripts +This folder contains some scripts to build embeddings by calling the `Embeddings` tool. + + +## How to Refresh the Document Embeddings Used by Teams Bot +We have an [Azure DevOps pipeline](https://dev.azure.com/azure-sdk/internal/_build?definitionId=6811&_a=summary) which can help create or refresh the embeddings. + +1. This pipeline contains three stages: + - Build EngHub Document Embeddings + This stage builds embeddings for all the documents under the [engineering hub site](https://dev.azure.com/azure-sdk/internal/_git/azure-sdk-docs-eng.ms?path=/docs) + - Build TypeSpec Document Embeddings + This stage builds embeddings for all the documents under the [typespec-azure site](https://github.com/Azure/typespec-azure) + - Build Customized Document Embeddings + This stage builds embeddings for some markdown documents which are publicly accessible. + +2. The user can select specific stages when running the pipeline. By default, all three stages are included. + +3. The pipeline has an option to refresh the embeddings incrementally. By default, `Incremental Embedding Build` is selected when the pipeline is triggered. If the user wants to create embeddings from scratch, they should unselect this option when triggering the pipeline. + +### How to Add a New Document to the Customized Document List +If you have a publicly accessbile markdown file that you want the Teams bot to understand, you can add the information to [this file](https://github.com/Azure/azure-sdk-tools/blob/main/tools/sdk-ai-bots/Embeddings/settings/metadata_customized_docs.json) in the following format. +```JSON +"ci-fix.md": { + "title": "CI Fix Guide", + "url": "https://github.com/Azure/azure-rest-api-specs/blob/main/documentation/ci-fix.md" + } +``` + This file is a `JSON`, and you must ensure that the `key` in this `JSON` is not duplicated when adding a new document. \ No newline at end of file diff --git a/tools/sdk-ai-bots/Scripts/Build-CustomizedDocEmbeddings.ps1 b/tools/sdk-ai-bots/Scripts/Build-CustomizedDocEmbeddings.ps1 index d0e217c87e3..2c5f91dd94e 100644 --- a/tools/sdk-ai-bots/Scripts/Build-CustomizedDocEmbeddings.ps1 +++ b/tools/sdk-ai-bots/Scripts/Build-CustomizedDocEmbeddings.ps1 @@ -52,28 +52,30 @@ function Download-GitHubFile { } try { - Invoke-WebRequest -Uri $url -OutFile $DestinationFilePath + Invoke-WebRequest -Uri $FileUrl -OutFile $DestinationFilePath Write-Host "File downloaded successfully to: $DestinationFilePath" } catch { - Write-Error "Failed to download file from GitHub: $url" + Write-Error "Failed to download file from GitHub: $FileUrl" exit 1 } } -$workingDirectory = Get-Location -if($env:AGENT_ID) { - $workingDirectory = $(System.DefaultWorkingDirectory) -} -$workingDirectory = Join-Path $workingDirectory "tools\sdk-ai-bots" +# Set the working directory, current location is supposed to be the root of the repository +$buildSourceDirectory = Get-Location +$workingDirectory = Join-Path $buildSourceDirectory "tools\sdk-ai-bots" $scriptsRoot = Join-Path $workingDirectory "Scripts" $embeddingToolFolder = Join-Path $workingDirectory "Embeddings" -. (Join-Path $scriptsRoot common.ps1) - Write-Host "scriptsRoot: $scriptsRoot" Write-Host "embeddingToolFolder: $embeddingToolFolder" +. (Join-Path $scriptsRoot Common.ps1) + +# Install Az.Storage module +if (-not (Get-Module -ListAvailable -Name Az.Storage)) { + Install-Module -Name Az.Storage -Force -AllowClobber -Scope CurrentUser +} # Create embeddingSource folder on current location $embeddingSourceFolder = Join-Path -Path $workingDirectory -ChildPath "embeddingSource" @@ -106,11 +108,15 @@ else { } # Download previous saved embeddings(last_rag_chunks_customized_docs.json) from Azure Blob Storage -$storageAccountName = "saazuresdkbot" -$containerName = "rag-contents" $blobName = "last_rag_chunks_customized_docs.json" $destinationPath = $embeddingSourceFolder $ragChunkPath = Join-Path -Path $embeddingSourceFolder -ChildPath $blobName +$storageAccountName = $env:AZURE_STORAGE_ACCOUNT_NAME +$containerName = $env:AZURE_STORAGE_ACCOUNT_CONTAINER +if(-not $containerName) { + Write-Error "Please set the environment variable 'AZURE_STORAGE_ACCOUNT_CONTAINER'." + exit 1 +} if($IncrementalEmbedding -eq $true) { Write-Host "Downloading previous saved embeddings $blobName from Azure Blob Storage" if(-not (Download-AzureBlob -StorageAccountName $storageAccountName -ContainerName $containerName -BlobName $blobName -DestinationPath $destinationPath)) { @@ -124,6 +130,11 @@ $env:RAG_CHUNK_PATH = $ragChunkPath $env:METADATA_PATH = $customizedDocsMetadataFile $env:DOCUMENT_PATH = $customizedDocsDestFolder $env:INCREMENTAL_EMBEDDING = $IncrementalEmbedding +$env:AZURESEARCH_FIELDS_CONTENT = "Text" +$env:AZURESEARCH_FIELDS_CONTENT_VECTOR = "Embedding" +$env:AZURESEARCH_FIELDS_TAG = "AdditionalMetadata" +$env:AZURESEARCH_FIELDS_ID = "Id" + if(-not (Build-Embeddings -EmbeddingToolFolder $embeddingToolFolder)) { exit 1 } diff --git a/tools/sdk-ai-bots/Scripts/Build-EngHubDocEmbeddings.ps1 b/tools/sdk-ai-bots/Scripts/Build-EngHubDocEmbeddings.ps1 index 7169ac70c36..895ddfbd865 100644 --- a/tools/sdk-ai-bots/Scripts/Build-EngHubDocEmbeddings.ps1 +++ b/tools/sdk-ai-bots/Scripts/Build-EngHubDocEmbeddings.ps1 @@ -17,30 +17,23 @@ param ( [ValidateNotNullOrEmpty()] [string] $IncrementalEmbedding = $true ) - -$workingDirectory = Get-Location +# Set the working directory, current location is supposed to be the root of the repository +$buildSourceDirectory = Get-Location +$workingDirectory = Join-Path $buildSourceDirectory "tools\sdk-ai-bots" if($env:AGENT_ID) { - $workingDirectory = $(System.DefaultWorkingDirectory) + # Running in Azure DevOps, pipeline would checkout two repositories, azure-sdk-tools and enginerring hub repository, so the working directory should be azure-sdk-tools + $workingDirectory = Join-Path $buildSourceDirectory "azure-sdk-tools\tools\sdk-ai-bots" } -$workingDirectory = Join-Path $workingDirectory "tools\sdk-ai-bots" $scriptsRoot = Join-Path $workingDirectory "Scripts" $embeddingToolFolder = Join-Path $workingDirectory "Embeddings" -. (Join-Path $scriptsRoot common.ps1) - Write-Host "scriptsRoot: $scriptsRoot" Write-Host "embeddingToolFolder: $embeddingToolFolder" +. (Join-Path $scriptsRoot Common.ps1) -# Create 'repos' folder on current location -$reposFolder = Join-Path -Path $workingDirectory -ChildPath "repos" -if (-not (Test-Path -Path $reposFolder)) { - New-Item -ItemType Directory -Path $reposFolder -} - -# Clone azure-sdk-docs-eng.ms repository -Write-Host "Cloning azure-sdk-docs-eng.ms repository at $reposFolder" -if(-not (Clone-Repository -RepoUrl "https://azure-sdk@dev.azure.com/azure-sdk/internal/_git/azure-sdk-docs-eng.ms" -RootFolder $reposFolder)) { - exit 1 +# Install Az.Storage module +if (-not (Get-Module -ListAvailable -Name Az.Storage)) { + Install-Module -Name Az.Storage -Force -AllowClobber -Scope CurrentUser } # Create embeddingSource folder on current location @@ -55,7 +48,19 @@ if (-not (Test-Path -Path $enghubDocsDestFolder)) { New-Item -ItemType Directory -Path $enghubDocsDestFolder } -$enghubDocsSrcFolder = Join-Path -Path $reposFolder -ChildPath "azure-sdk-docs-eng.ms/docs" +$reposFolder = Join-Path -Path $buildSourceDirectory -ChildPath "azure-sdk-docs-eng.ms" +if(-not (Test-Path $reposFolder)) { + # Clone eng hub repository + Write-Host "Cloning azure-sdk-docs-eng.ms repository at $buildSourceDirectory" + if(-not (Clone-Repository -RepoUrl "https://azure-sdk@dev.azure.com/azure-sdk/internal/_git/azure-sdk-docs-eng.ms" -RootFolder $buildSourceDirectory)) { + exit 1 + } +} +$enghubDocsSrcFolder = Join-Path -Path $buildSourceDirectory -ChildPath "azure-sdk-docs-eng.ms/docs" +if(-not (Test-Path $enghubDocsSrcFolder)) { + Write-Error "Failed to find the enghub documents folder at $enghubDocsSrcFolder" + exit 1 +} # Call the script to build the metadata.json file Write-Host "Building metadata.json file for enghub documents" @@ -71,11 +76,15 @@ else { } # Download previous saved embeddings(last_rag_chunks_enghub_docs.json) from Azure Blob Storage -$storageAccountName = "saazuresdkbot" -$containerName = "rag-contents" $blobName = "last_rag_chunks_enghub_docs.json" $destinationPath = $embeddingSourceFolder $ragChunkPath = Join-Path -Path $embeddingSourceFolder -ChildPath $blobName +$storageAccountName = $env:AZURE_STORAGE_ACCOUNT_NAME +$containerName = $env:AZURE_STORAGE_ACCOUNT_CONTAINER +if(-not $containerName) { + Write-Error "Please set the environment variable 'AZURE_STORAGE_ACCOUNT_CONTAINER'." + exit 1 +} if($IncrementalEmbedding -eq $true) { Write-Host "Downloading previous saved embeddings $blobName from Azure Blob Storage" if(-not (Download-AzureBlob -StorageAccountName $storageAccountName -ContainerName $containerName -BlobName $blobName -DestinationPath $destinationPath)) { @@ -89,6 +98,12 @@ $env:RAG_CHUNK_PATH = $ragChunkPath $env:METADATA_PATH = "$embeddingSourceFolder/metadata_enghub_docs.json" $env:DOCUMENT_PATH = $enghubDocsDestFolder $env:INCREMENTAL_EMBEDDING = $IncrementalEmbedding + +$env:AZURESEARCH_FIELDS_CONTENT = "Text" +$env:AZURESEARCH_FIELDS_CONTENT_VECTOR = "Embedding" +$env:AZURESEARCH_FIELDS_TAG = "AdditionalMetadata" +$env:AZURESEARCH_FIELDS_ID = "Id" + if(-not (Build-Embeddings -EmbeddingToolFolder $embeddingToolFolder)) { exit 1 } diff --git a/tools/sdk-ai-bots/Scripts/Build-TypeSpecAzureDocEmbeddings.ps1 b/tools/sdk-ai-bots/Scripts/Build-TypeSpecAzureDocEmbeddings.ps1 index 8a9addd3ca8..6ad37c7ef88 100644 --- a/tools/sdk-ai-bots/Scripts/Build-TypeSpecAzureDocEmbeddings.ps1 +++ b/tools/sdk-ai-bots/Scripts/Build-TypeSpecAzureDocEmbeddings.ps1 @@ -18,30 +18,19 @@ param ( [ValidateNotNullOrEmpty()] [string] $IncrementalEmbedding = $true ) - -$workingDirectory = Get-Location -if($env:AGENT_ID) { - $workingDirectory = $(System.DefaultWorkingDirectory) -} -$workingDirectory = Join-Path $workingDirectory "tools\sdk-ai-bots" +# Set the working directory, current location is supposed to be the root of the repository +$buildSourceDirectory = Get-Location +$workingDirectory = Join-Path $buildSourceDirectory "tools\sdk-ai-bots" $scriptsRoot = Join-Path $workingDirectory "Scripts" $embeddingToolFolder = Join-Path $workingDirectory "Embeddings" -. (Join-Path $scriptsRoot common.ps1) - Write-Host "scriptsRoot: $scriptsRoot" Write-Host "embeddingToolFolder: $embeddingToolFolder" +. (Join-Path $scriptsRoot Common.ps1) -# Create 'repos' folder on current location -$reposFolder = Join-Path -Path $workingDirectory -ChildPath "repos" -if (-not (Test-Path -Path $reposFolder)) { - New-Item -ItemType Directory -Path $reposFolder -} - -# Clone Azure/typespec-azure repository -Write-Host "Cloning Azure/typespec-azure repository at $reposFolder" -if(-not (Clone-Repository -RepoUrl "https://github.com/Azure/typespec-azure.git" -RootFolder $reposFolder)) { - exit 1 +# Install Az.Storage module +if (-not (Get-Module -ListAvailable -Name Az.Storage)) { + Install-Module -Name Az.Storage -Force -AllowClobber -Scope CurrentUser } # Create embeddingSource folder on current location @@ -56,7 +45,19 @@ if (-not (Test-Path -Path $typespecDocsDestFolder)) { New-Item -ItemType Directory -Path $typespecDocsDestFolder } -$typespecDocsSrcFolder = Join-Path -Path $reposFolder -ChildPath "typespec-azure/docs" +$reposFolder = Join-Path -Path $buildSourceDirectory -ChildPath "typespec-azure" +if(-not (Test-Path $reposFolder)) { + # Clone Azure/typespec-azure repository + Write-Host "Cloning Azure/typespec-azure repository at $buildSourceDirectory" + if(-not (Clone-Repository -RepoUrl "https://github.com/Azure/typespec-azure.git" -RootFolder $buildSourceDirectory)) { + exit 1 + } +} +$typespecDocsSrcFolder = Join-Path -Path $buildSourceDirectory -ChildPath "typespec-azure/docs" +if(-not (Test-Path $typespecDocsSrcFolder)) { + Write-Error "Failed to find the typespec documents folder at $typespecDocsSrcFolder" + exit 1 +} # Call the script to build the metadata.json file Write-Host "Building metadata.json file for typespec documents" @@ -72,11 +73,15 @@ else { } # Download previous saved embeddings(last_rag_chunks_typespec_docs.json) from Azure Blob Storage -$storageAccountName = "saazuresdkbot" -$containerName = "rag-contents" $blobName = "last_rag_chunks_typespec_docs.json" $destinationPath = $embeddingSourceFolder $ragChunkPath = Join-Path -Path $embeddingSourceFolder -ChildPath $blobName +$storageAccountName = $env:AZURE_STORAGE_ACCOUNT_NAME +$containerName = $env:AZURE_STORAGE_ACCOUNT_CONTAINER +if(-not $containerName) { + Write-Error "Please set the environment variable 'AZURE_STORAGE_ACCOUNT_CONTAINER'." + exit 1 +} if($IncrementalEmbedding -eq $true) { Write-Host "Downloading previous saved embeddings $blobName from Azure Blob Storage" if(-not (Download-AzureBlob -StorageAccountName $storageAccountName -ContainerName $containerName -BlobName $blobName -DestinationPath $destinationPath)) { @@ -90,6 +95,11 @@ $env:RAG_CHUNK_PATH = $ragChunkPath $env:METADATA_PATH = "$embeddingSourceFolder/metadata_typespec_docs.json" $env:DOCUMENT_PATH = $typespecDocsDestFolder $env:INCREMENTAL_EMBEDDING = $IncrementalEmbedding +$env:AZURESEARCH_FIELDS_CONTENT = "Text" +$env:AZURESEARCH_FIELDS_CONTENT_VECTOR = "Embedding" +$env:AZURESEARCH_FIELDS_TAG = "AdditionalMetadata" +$env:AZURESEARCH_FIELDS_ID = "Id" + if(-not (Build-Embeddings -EmbeddingToolFolder $embeddingToolFolder)) { exit 1 } diff --git a/tools/sdk-ai-bots/Scripts/Common.ps1 b/tools/sdk-ai-bots/Scripts/Common.ps1 index 1b086c27e9a..b604cbb39b7 100644 --- a/tools/sdk-ai-bots/Scripts/Common.ps1 +++ b/tools/sdk-ai-bots/Scripts/Common.ps1 @@ -13,6 +13,10 @@ function Clone-Repository { [string] $RootFolder ) try { + if(-not (Test-Path $RootFolder)) { + New-Item -ItemType Directory -Path $RootFolder + } + Push-Location $RootFolder # Clone repository git clone $RepoUrl @@ -82,13 +86,28 @@ function Build-Embeddings { Write-Host "Building embeddings..." try { Push-Location $embeddingToolFolder + + # Print Python version + $pythonVersion = python -c "import sys; print(sys.version)" + Write-Host "Python version: $pythonVersion" + # Print Python executable path + $pythonEnvExePath = python -c "import sys; print(sys.executable)" + Write-Host "Python executable path: $pythonEnvExePath" + # setup python environment and install required packages Write-Host "Setting up python environment" python -m pip install --upgrade pip - + Write-Host "Installing required packages" - pip install -r requirements.txt - + python -m pip install -r requirements.txt + + Write-Host "List package versions..." + python -m pip list > pip_list.txt + + Write-Host "Print the content of pip_list.txt" + $installedPkg = Get-Content -Path "pip_list.txt" + Write-Host $installedPkg + Write-Host "Starts building" python main.py } @@ -100,7 +119,6 @@ function Build-Embeddings { Pop-Location } } - Write-Host "Finishes building with time: $($stopwatch.TotalSeconds) seconds" return $true } diff --git a/tools/sdk-ai-bots/Scripts/Markdown-BuildIndexMetadata.ps1 b/tools/sdk-ai-bots/Scripts/Markdown-BuildIndexMetadata.ps1 index ff616774b46..e806fb1326e 100644 --- a/tools/sdk-ai-bots/Scripts/Markdown-BuildIndexMetadata.ps1 +++ b/tools/sdk-ai-bots/Scripts/Markdown-BuildIndexMetadata.ps1 @@ -62,15 +62,14 @@ function Generate-Metadata([string]$rootFolder, [string]$outputFolder) $pagePath = $_.DirectoryName.Substring($_.DirectoryName.IndexOf("\docs\")+"\docs\".Length).Replace('\','/') $url = $DocBaseUrl + '/' + $pagePath + '/' + $fileName $url = $url.Replace(' ', '%20') - #Write-Host "filename:" $fileName - Write-Host "url:" $url + Write-Host "The URL of the Markdown file is: $url" $title = Get-TitleFromMarkdown $_.FullName Write-Host "The title of the Markdown file is: $title" - #$key = $_.Name.Replace(' ','-') # adding path path to key to avoid name conflict $key = $pagePath + '/' + $_.Name $key = $key.Replace(' ', '-') $key = $key.Replace('/', '-') + Write-Host "The key of the Markdown file is: $key" $fileData = @{ "title" = $title "url" = $url