> SearchAsync(
[Description("The phrase to search for.")] string searchPhrase,
[Description("If possible, specify the filename to search that file only. If not provided or empty, the search includes all files.")] string? filenameFilter = null)
@@ -114,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text}");
+ $"{result.Text}");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
index ccb5853cec4..667189beabd 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
@@ -17,10 +17,7 @@
public required string File { get; set; }
[Parameter]
- public int? PageNumber { get; set; }
-
- [Parameter]
- public required string Quote { get; set; }
+ public string? Quote { get; set; }
private string? viewerUrl;
@@ -28,11 +25,15 @@
{
viewerUrl = null;
- // If you ingest other types of content besides PDF files, construct a URL to an appropriate viewer here
- if (File.EndsWith(".pdf"))
+ // If you ingest other types of content besides Markdown or PDF files, construct a URL to an appropriate viewer here
+ if (File.EndsWith(".md"))
+ {
+ viewerUrl = $"lib/markdown_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#:~:text={Uri.EscapeDataString(Quote ?? "")}";
+ }
+ else if (File.EndsWith(".pdf"))
{
var search = Quote?.Trim('.', ',', ' ', '\n', '\r', '\t', '"', '\'');
- viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#page={PageNumber}&search={HttpUtility.UrlEncode(search)}&phrase=true";
+ viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#search={HttpUtility.UrlEncode(search)}&phrase=true";
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
index 92c20c70667..e45d92ab5f9 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
@@ -28,11 +28,24 @@ else if (Message.Role == ChatRole.Assistant)
@foreach (var citation in citations ?? [])
{
-
+
}
}
+ else if (content is FunctionCallContent { Name: "LoadDocuments" })
+ {
+
+
+
+ Loading relevant documents (this will take a minute)...
+
+
+ }
else if (content is FunctionCallContent { Name: "Search" } fcc && fcc.Arguments?.TryGetValue("searchPhrase", out var searchPhrase) is true)
{
@@ -56,9 +69,9 @@ else if (Message.Role == ChatRole.Assistant)
@code {
private static readonly ConditionalWeakTable
SubscribersLookup = new();
- private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
+ private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
- private List<(string File, int? Page, string Quote)>? citations;
+ private List<(string File, string Quote)>? citations;
[Parameter, EditorRequired]
public required ChatMessage Message { get; set; }
@@ -88,7 +101,7 @@ else if (Message.Role == ChatRole.Assistant)
{
var matches = CitationRegex.Matches(text);
citations = matches.Any()
- ? matches.Select(m => (m.Groups["file"].Value, int.TryParse(m.Groups["page"].Value, out var page) ? page : (int?)null, m.Groups["quote"].Value)).ToList()
+ ? matches.Select(m => (m.Groups["file"].Value, m.Groups["quote"].Value)).ToList()
: null;
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Program.cs
index 450914c4461..76ac926bbd1 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Program.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Program.cs
@@ -15,10 +15,11 @@
openai.AddEmbeddingGenerator("text-embedding-3-small");
builder.AddAzureSearchClient("search");
-builder.Services.AddAzureAISearchCollection("data-aichatweb-chunks");
-builder.Services.AddAzureAISearchCollection("data-aichatweb-documents");
-builder.Services.AddScoped();
+builder.Services.AddAzureAISearchVectorStore();
+builder.Services.AddAzureAISearchCollection(IngestedChunk.CollectionName);
+builder.Services.AddSingleton();
builder.Services.AddSingleton();
+builder.Services.AddKeyedSingleton("ingestion_directory", new DirectoryInfo(Path.Combine(builder.Environment.WebRootPath, "Data")));
var app = builder.Build();
@@ -39,12 +40,4 @@
app.MapRazorComponents()
.AddInteractiveServerRenderMode();
-// By default, we ingest PDF files from the /wwwroot/Data directory. You can ingest from
-// other sources by implementing IIngestionSource.
-// Important: ensure that any content you ingest is trusted, as it may be reflected back
-// to users or could be a source of prompt injection risk.
-await DataIngestor.IngestDataAsync(
- app.Services,
- new PDFDirectorySource(Path.Combine(builder.Environment.WebRootPath, "Data")));
-
app.Run();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
index 0fd76874dfd..2d93db8fd94 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
@@ -1,24 +1,31 @@
-using Microsoft.Extensions.VectorData;
+using System.Text.Json.Serialization;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
public class IngestedChunk
{
- private const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
- private const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
+ public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
+ public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
+ public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey]
- public required string Key { get; set; }
+ [VectorStoreKey(StorageName = "key")]
+ [JsonPropertyName("key")]
+ public required Guid Key { get; set; }
- [VectorStoreData(IsIndexed = true)]
+ [VectorStoreData(StorageName = "documentid")]
+ [JsonPropertyName("documentid")]
public required string DocumentId { get; set; }
- [VectorStoreData]
- public int PageNumber { get; set; }
-
- [VectorStoreData]
+ [VectorStoreData(StorageName = "content")]
+ [JsonPropertyName("content")]
public required string Text { get; set; }
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
+ [VectorStoreData(StorageName = "context")]
+ [JsonPropertyName("context")]
+ public string? Context { get; set; }
+
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
+ [JsonPropertyName("embedding")]
public string? Vector => Text;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs
deleted file mode 100644
index 370aef16fd9..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs
+++ /dev/null
@@ -1,25 +0,0 @@
-using Microsoft.Extensions.VectorData;
-
-namespace aichatweb.Web.Services;
-
-public class IngestedDocument
-{
- private const int VectorDimensions = 2;
- private const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
-
- [VectorStoreKey]
- public required string Key { get; set; }
-
- [VectorStoreData(IsIndexed = true)]
- public required string SourceId { get; set; }
-
- [VectorStoreData]
- public required string DocumentId { get; set; }
-
- [VectorStoreData]
- public required string DocumentVersion { get; set; }
-
- // The vector is not used but required for some vector databases
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
- public ReadOnlyMemory Vector { get; set; } = new ReadOnlyMemory([0, 0]);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 2fe43370071..9dd366a03a5 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -1,58 +1,35 @@
using Microsoft.Extensions.AI;
+using Microsoft.Extensions.DataIngestion;
+using Microsoft.Extensions.DataIngestion.Chunkers;
using Microsoft.Extensions.VectorData;
+using Microsoft.ML.Tokenizers;
namespace aichatweb.Web.Services.Ingestion;
public class DataIngestor(
ILogger logger,
- VectorStoreCollection chunksCollection,
- VectorStoreCollection documentsCollection)
+ ILoggerFactory loggerFactory,
+ VectorStore vectorStore,
+ IEmbeddingGenerator> embeddingGenerator)
{
- public static async Task IngestDataAsync(IServiceProvider services, IIngestionSource source)
+ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var scope = services.CreateScope();
- var ingestor = scope.ServiceProvider.GetRequiredService();
- await ingestor.IngestDataAsync(source);
- }
-
- public async Task IngestDataAsync(IIngestionSource source)
- {
- await chunksCollection.EnsureCollectionExistsAsync();
- await documentsCollection.EnsureCollectionExistsAsync();
-
- var sourceId = source.SourceId;
- var documentsForSource = await documentsCollection.GetAsync(doc => doc.SourceId == sourceId, top: int.MaxValue).ToListAsync();
-
- var deletedDocuments = await source.GetDeletedDocumentsAsync(documentsForSource);
- foreach (var deletedDocument in deletedDocuments)
+ using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
{
- logger.LogInformation("Removing ingested data for {DocumentId}", deletedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(deletedDocument);
- await documentsCollection.DeleteAsync(deletedDocument.Key);
- }
-
- var modifiedDocuments = await source.GetNewOrModifiedDocumentsAsync(documentsForSource);
- foreach (var modifiedDocument in modifiedDocuments)
- {
- logger.LogInformation("Processing {DocumentId}", modifiedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(modifiedDocument);
-
- await documentsCollection.UpsertAsync(modifiedDocument);
-
- var newRecords = await source.CreateChunksForDocumentAsync(modifiedDocument);
- await chunksCollection.UpsertAsync(newRecords);
- }
-
- logger.LogInformation("Ingestion is up-to-date");
-
- async Task DeleteChunksForDocumentAsync(IngestedDocument document)
+ CollectionName = IngestedChunk.CollectionName,
+ DistanceFunction = IngestedChunk.VectorDistanceFunction,
+ IncrementalIngestion = false,
+ });
+
+ using var pipeline = new IngestionPipeline(
+ reader: new DocumentReader(directory),
+ chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
+ writer: writer,
+ loggerFactory: loggerFactory);
+
+ await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
{
- var documentId = document.DocumentId;
- var chunksToDelete = await chunksCollection.GetAsync(record => record.DocumentId == documentId, int.MaxValue).ToListAsync();
- if (chunksToDelete.Count != 0)
- {
- await chunksCollection.DeleteAsync(chunksToDelete.Select(r => r.Key));
- }
+ logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs
new file mode 100644
index 00000000000..60fcdbdc128
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs
@@ -0,0 +1,42 @@
+using Microsoft.Extensions.DataIngestion;
+
+namespace aichatweb.Web.Services.Ingestion;
+
+internal sealed class DocumentReader(DirectoryInfo rootDirectory) : IngestionDocumentReader
+{
+ private readonly MarkdownReader _markdownReader = new();
+ private readonly MarkItDownMcpReader _pdfReader = new(mcpServerUri: GetMarkItDownMcpServerUrl());
+
+ public override Task ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
+ {
+ if (Path.IsPathFullyQualified(identifier))
+ {
+ // Normalize the identifier to its relative path
+ identifier = Path.GetRelativePath(rootDirectory.FullName, identifier);
+ }
+
+ mediaType = GetCustomMediaType(source) ?? mediaType;
+ return base.ReadAsync(source, identifier, mediaType, cancellationToken);
+ }
+
+ public override Task ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
+ => mediaType switch
+ {
+ "application/pdf" => _pdfReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ "text/markdown" => _markdownReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ _ => throw new InvalidOperationException($"Unsupported media type '{mediaType}'"),
+ };
+
+ private static string? GetCustomMediaType(FileInfo source)
+ => source.Extension switch
+ {
+ ".md" => "text/markdown",
+ _ => null
+ };
+
+ private static Uri GetMarkItDownMcpServerUrl()
+ {
+ var markItDownMcpUrl = $"{Environment.GetEnvironmentVariable("MARKITDOWN_MCP_URL")}/mcp";
+ return new Uri(markItDownMcpUrl);
+ }
+}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs
deleted file mode 100644
index a1c6b2191d1..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace aichatweb.Web.Services.Ingestion;
-
-public interface IIngestionSource
-{
- string SourceId { get; }
-
- Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> CreateChunksForDocumentAsync(IngestedDocument document);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs
deleted file mode 100644
index 32e9f225c08..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs
+++ /dev/null
@@ -1,71 +0,0 @@
-using Microsoft.SemanticKernel.Text;
-using UglyToad.PdfPig;
-using UglyToad.PdfPig.Content;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;
-
-namespace aichatweb.Web.Services.Ingestion;
-
-public class PDFDirectorySource(string sourceDirectory) : IIngestionSource
-{
- public static string SourceFileId(string path) => Path.GetFileName(path);
- public static string SourceFileVersion(string path) => File.GetLastWriteTimeUtc(path).ToString("o");
-
- public string SourceId => $"{nameof(PDFDirectorySource)}:{sourceDirectory}";
-
- public Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var results = new List();
- var sourceFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var existingDocumentsById = existingDocuments.ToDictionary(d => d.DocumentId);
-
- foreach (var sourceFile in sourceFiles)
- {
- var sourceFileId = SourceFileId(sourceFile);
- var sourceFileVersion = SourceFileVersion(sourceFile);
- var existingDocumentVersion = existingDocumentsById.TryGetValue(sourceFileId, out var existingDocument) ? existingDocument.DocumentVersion : null;
- if (existingDocumentVersion != sourceFileVersion)
- {
- results.Add(new() { Key = Guid.CreateVersion7().ToString(), SourceId = SourceId, DocumentId = sourceFileId, DocumentVersion = sourceFileVersion });
- }
- }
-
- return Task.FromResult((IEnumerable)results);
- }
-
- public Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var currentFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var currentFileIds = currentFiles.ToLookup(SourceFileId);
- var deletedDocuments = existingDocuments.Where(d => !currentFileIds.Contains(d.DocumentId));
- return Task.FromResult(deletedDocuments);
- }
-
- public Task> CreateChunksForDocumentAsync(IngestedDocument document)
- {
- using var pdf = PdfDocument.Open(Path.Combine(sourceDirectory, document.DocumentId));
- var paragraphs = pdf.GetPages().SelectMany(GetPageParagraphs).ToList();
-
- return Task.FromResult(paragraphs.Select(p => new IngestedChunk
- {
- Key = Guid.CreateVersion7().ToString(),
- DocumentId = document.DocumentId,
- PageNumber = p.PageNumber,
- Text = p.Text,
- }));
- }
-
- private static IEnumerable<(int PageNumber, int IndexOnPage, string Text)> GetPageParagraphs(Page pdfPage)
- {
- var letters = pdfPage.Letters;
- var words = NearestNeighbourWordExtractor.Instance.GetWords(letters);
- var textBlocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
- var pageText = string.Join(Environment.NewLine + Environment.NewLine,
- textBlocks.Select(t => t.Text.ReplaceLineEndings(" ")));
-
-#pragma warning disable SKEXP0050 // Type is for evaluation purposes only
- return TextChunker.SplitPlainTextParagraphs([pageText], 200)
- .Select((text, index) => (pdfPage.Number, index, text));
-#pragma warning restore SKEXP0050 // Type is for evaluation purposes only
- }
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
index 84fb719f6ae..d043c8efb84 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
@@ -1,12 +1,22 @@
-using Microsoft.Extensions.VectorData;
+using aichatweb.Web.Services.Ingestion;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
public class SemanticSearch(
- VectorStoreCollection vectorCollection)
+ VectorStoreCollection vectorCollection,
+ [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory,
+ DataIngestor dataIngestor)
{
+ private Task? _ingestionTask;
+
+ public async Task LoadDocumentsAsync() => await ( _ingestionTask ??= dataIngestor.IngestDataAsync(ingestionDirectory, searchPattern: "*.*"));
+
public async Task> SearchAsync(string text, string? documentIdFilter, int maxResults)
{
+ // Ensure documents have been loaded before searching
+ await LoadDocumentsAsync();
+
var nearest = vectorCollection.SearchAsync(text, maxResults, new VectorSearchOptions
{
Filter = documentIdFilter is { Length: > 0 } ? record => record.DocumentId == documentIdFilter : null,
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
index 861a3a974c6..f97d0b28a77 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
enable
enable
secret
@@ -11,11 +11,13 @@
-
-
-
+
+
+
+
+
-
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md
new file mode 100644
index 00000000000..f7d042edf83
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md
@@ -0,0 +1,193 @@
+# TrailMaster GPS Watch
+
+## 1. Introduction
+
+ExpeditionTech is proud to introduce the TrailMaster GPS Watch, a state-of-the-art outdoor electronics device designed for the most demanding outdoor adventurers. This watch is meticulously engineered to provide accurate location information, real-time sharing, and reliable mapping capabilities. The TrailMaster GPS Watch is built to withstand the harshest environmental conditions while providing users with the critical data they need to navigate through challenging terrain and optimize their outdoor experiences.
+
+### 1.1 Product Overview
+
+The TrailMaster GPS Watch is a high-performance outdoor electronics device that combines advanced GPS technology with rugged design. It features a durable, shock-resistant casing, a high-contrast display for optimal visibility in bright sunlight, and a built-in rechargeable battery for extended use in remote areas. The TrailMaster GPS Watch is equipped with a comprehensive suite of navigation tools, including topographic maps, trail tracking, and waypoint management. Additionally, it offers real-time location sharing for enhanced safety and group coordination during outdoor activities.
+
+### 1.2 Intended Use
+
+The TrailMaster GPS Watch is intended for use by experienced outdoor enthusiasts, including hikers, backpackers, trail runners, and mountaineers. This device is designed to provide essential navigation information and location tracking in remote, off-grid environments. Users can rely on the TrailMaster GPS Watch to plan and execute their outdoor adventures with confidence, knowing they have access to accurate maps, location data, and real-time sharing capabilities. Although the TrailMaster GPS Watch is engineered for extreme durability, users should always exercise caution and take appropriate safety measures when engaging in outdoor activities.
+
+#### 1.2.1 Safety Precautions
+
+Before using the TrailMaster GPS Watch, users must ensure that the device is fully charged and properly calibrated according to the manufacturer's specifications. It is essential to familiarize oneself with the watch's features, controls, and navigation functions before venturing into remote outdoor areas. Users should also be aware of potential environmental hazards, such as extreme weather conditions, rugged terrain, and limited access to emergency services. Proper planning, preparation, and adherence to best practices for outdoor safety are critical when using the TrailMaster GPS Watch.
+
+## 2. Technical Specifications
+
+### 2.1 Hardware
+
+#### 2.1.1 Processor and Memory
+
+The TrailMaster GPS Watch is equipped with a high-performance ARM Cortex processor, ensuring fast and efficient operation for all your outdoor activities. The device also comes with 4GB of internal memory, providing ample storage for maps, routes, and waypoints. The processor operates at 1.2GHz, allowing for quick map rendering and location updates.
+
+#### 2.1.2 GPS Module
+
+The GPS module in the TrailMaster GPS Watch utilizes a state-of-the-art multi-constellation receiver, supporting GPS, GLONASS, and Galileo satellite systems. This enables precise location tracking and navigation, even in challenging outdoor environments. The module also features advanced signal processing algorithms, ensuring reliable performance in remote areas with limited satellite visibility.
+
+#### 2.1.3 Sensors
+
+Equipped with a comprehensive set of sensors, the TrailMaster GPS Watch offers an array of valuable data for outdoor enthusiasts. The built-in altimeter provides accurate altitude readings, while the barometer monitors changes in air pressure to forecast weather conditions. Additionally, the watch includes a digital compass for reliable orientation, ensuring a seamless navigation experience in the great outdoors.
+
+### 2.2 Software
+
+#### 2.2.1 Operating System
+
+The TrailMaster GPS Watch runs on a proprietary operating system optimized for outdoor navigation and tracking. This custom OS combines robustness and efficiency, allowing for seamless integration with the device's hardware and sensors. The interface is designed for intuitive interaction, enabling users to access maps, location sharing, and other features with ease, even in challenging outdoor conditions.
+
+#### 2.2.2 Map Data
+
+The TrailMaster GPS Watch comes preloaded with detailed topographic maps, providing comprehensive coverage of trails, terrain, and points of interest. These maps are stored locally on the device, ensuring rapid access and smooth navigation without relying on cellular or data network connectivity. Users can also import additional map data via the dedicated USB port, expanding the watch's mapping capabilities for customized adventures.
+
+## 3. Setup and Installation
+
+### 3.1 Charging
+
+Before using your TrailMaster GPS Watch for the first time, it is essential to ensure that the device is fully charged. To charge your GPS watch, follow these steps:
+
+1. Locate the charging port cover on the back of the watch.
+2. Gently lift the cover to expose the charging port.
+3. Connect the provided USB charging cable to the port, ensuring a secure connection.
+4. Plug the other end of the cable into a power source, such as a computer or USB wall adapter.
+5. Allow the watch to charge for at least 2 hours, or until the battery indicator on the display shows it is fully charged.
+
+For optimal performance, it is recommended to fully charge the device before each use and to avoid overcharging. Please refer to the TrailMaster GPS Watch technical specifications for detailed battery charging information.
+
+### 3.2 Initial Configuration
+
+Once your TrailMaster GPS Watch is fully charged, you can proceed with the initial configuration by following these steps:
+
+1. Press and hold the power button located on the side of the watch to turn it on.
+2. Follow the on-screen instructions to select your language, set the date and time, and calibrate the GPS.
+3. Connect the watch to your smartphone using the TrailMaster companion app to enable real-time location sharing and receive notifications.
+4. Customize the watch settings, such as display preferences and map views, to suit your outdoor activities.
+
+Before using the GPS functionality, it is crucial to ensure that the watch has a clear view of the sky to acquire GPS signals. Please refer to the TrailMaster GPS Watch user guide for detailed instructions on GPS calibration and satellite acquisition.
+
+## 4. Operation
+
+### 4.1 Basic Functions
+
+The TrailMaster GPS Watch is designed for outdoor use and offers a variety of basic functions to assist users in navigating and staying connected during their adventures.
+
+#### 4.1.1 Powering On and Off
+
+To power on the TrailMaster GPS Watch, press and hold the power button (located on the right side of the watch) for 3 seconds. The watch will display the ExpeditionTech logo and then proceed to the main navigation screen. To power off the watch, press and hold the power button and select "Power Off" from the menu.
+
+#### 4.1.2 Accessing Built-in Maps
+
+The TrailMaster GPS Watch comes pre-loaded with detailed maps of various outdoor locations. To access the maps, press the map button (located on the left side of the watch) to bring up the map interface. From here, you can view your current location, set waypoints, and plan routes.
+
+#### 4.1.3 Real-time Location Sharing
+
+Utilize the real-time location sharing feature to transmit your current location to designated contacts. Press the share button (located on the top of the watch) and select the specific contacts you wish to share your location with. This feature requires a stable GPS signal and a connected smartphone with the TrailMaster app installed.
+
+### 4.2 Advanced Features
+
+The TrailMaster GPS Watch offers advanced features to enhance the user experience and provide additional functionality for outdoor enthusiasts.
+
+#### 4.2.1 Rugged Design
+
+The TrailMaster GPS Watch is built to withstand rugged outdoor conditions, including extreme temperatures, water exposure, and impact. The durable casing and reinforced strap ensure that the watch remains operational in challenging environments.
+
+#### 4.2.2 Navigation Tools
+
+In addition to basic map access, the TrailMaster GPS Watch includes advanced navigation tools such as compass, altimeter, and barometer. These tools provide vital information for navigation and weather monitoring while in the field.
+
+#### 4.2.3 Customizing Data Screens
+
+Users can customize the data screens on the TrailMaster GPS Watch to display the specific information they require during their outdoor activities. From the main menu, navigate to the settings and select "Data Screens" to adjust the layout and content of the screens.
+
+#### 4.2.4 Tracking Performance Metrics
+
+The TrailMaster GPS Watch is equipped with sensors to track performance metrics such as distance traveled, speed, elevation gain, and heart rate. Use the tracking mode to monitor these metrics in real-time or review them after completing an activity.
+
+## 5. Troubleshooting
+
+### 5.1 Diagnostic Tools and Equipment
+
+When troubleshooting the TrailMaster GPS Watch, it is essential to use specialized industrial diagnostic tools and equipment to perform accurate diagnostic tests. These tools and equipment include:
+
+- **GPS Signal Analyzer**: Use a GPS signal analyzer to check the strength and quality of the satellite signals received by the watch.
+- **RF Spectrum Analyzer**: An RF spectrum analyzer is required to analyze the radio frequency spectrum and identify any interference affecting the GPS reception.
+- **Ruggedness Test Equipment**: Perform ruggedness tests using shock and vibration equipment to ensure the watch can withstand outdoor adventures without performance issues.
+
+It is essential to use these specialized tools and equipment to accurately diagnose any issues with the TrailMaster GPS Watch and ensure optimal performance.
+
+### 5.2 Common Issues
+
+#### 5.2.1 GPS Signal Loss
+
+**Symptoms:**
+
+The watch displays "No GPS Signal" or intermittently loses GPS signal during use.
+
+**Potential Causes:**
+
+- Obstruction of satellite signals due to dense foliage, tall buildings, or natural terrain features.
+- Radio frequency interference affecting GPS reception.
+- Wear and tear on the GPS antenna or receiver.
+
+**Troubleshooting Steps:**
+
+1. Check the surroundings for any obstructions blocking satellite signals.
+2. Use an RF spectrum analyzer to identify any potential sources of interference.
+3. Perform a diagnostics test using a GPS signal analyzer to assess the strength and quality of the GPS signal received by the watch.
+4. If the issue persists, contact ExpeditionTech customer support for further assistance.
+
+#### 5.2.2 Connectivity Issues
+
+**Symptoms:**
+
+- Inability to share real-time location or connect to other devices.
+- Unreliable Bluetooth connectivity.
+
+**Potential Causes:**
+
+- Bluetooth interference from other electronic devices.
+- Signal obstruction due to environmental factors.
+- Software or firmware issues.
+
+**Troubleshooting Steps:**
+
+1. Ensure the watch is within the recommended Bluetooth range of the connected device.
+2. Identify and eliminate potential sources of Bluetooth interference in the vicinity.
+3. Update the watch's firmware to the latest version to address any software-related connectivity issues.
+4. If connectivity problems persist, perform a comprehensive diagnostics test using a specialized Bluetooth signal analyzer.
+
+By following these troubleshooting steps and using specialized diagnostic tools and equipment, users can effectively identify and address common issues with the TrailMaster GPS Watch.
+
+## 6. Maintenance and Care
+
+### 6.1 Cleaning
+
+It is important to regularly clean your TrailMaster GPS Watch to ensure optimal performance. Use a soft, damp cloth to gently wipe the watch face and straps. Do not use harsh chemicals or solvents, as these may damage the watch's rugged design and built-in maps.
+
+### 6.2 Battery Maintenance
+
+The TrailMaster GPS Watch is equipped with a rechargeable lithium-ion battery. To optimize battery life, it is recommended to fully charge the watch before each outdoor adventure. Additionally, it is important to avoid exposing the watch to extreme temperatures, as this may affect battery performance.
+
+### 6.3 Storage
+
+When not in use, store your TrailMaster GPS Watch in a cool, dry place. Avoid prolonged exposure to direct sunlight or extreme temperatures, as this may cause damage to the device.
+
+### 6.4 Software Updates
+
+To ensure that your TrailMaster GPS Watch operates efficiently, it is important to regularly check for and install software updates. These updates may include improvements to the built-in maps, real-time location sharing, and overall performance of the watch.
+
+### 6.5 Technical Diagnostics
+
+For advanced maintenance and care, it is recommended to perform technical diagnostics using specialized industrial and scientific equipment. Refer to formal industry specification codes and standards bodies for detailed instructions on performing these diagnostics.
+
+## 7. Regulatory Compliance
+
+### 7.1 FCC Compliance
+
+The ExpeditionTech TrailMaster GPS Watch complies with Part 15 of the FCC rules. Operation is subject to the following two conditions: (1) this device may not cause harmful interference, and (2) this device must accept any interference received, including interference that may cause undesired operation. Any changes or modifications not expressly approved by the party responsible for compliance could void the user's authority to operate the equipment. The user is cautioned that any changes or modifications made to this device that are not expressly approved by the manufacturer could void the user's authority to operate the equipment.
+
+### 7.2 CE Marking
+
+The CE marking on the ExpeditionTech TrailMaster GPS Watch indicates that it complies with the essential requirements of the relevant European health, safety, and environmental protection legislation. The device is in conformity with the essential requirements and other relevant provisions of Directive 1999/5/EC. The CE marking is affixed to the device to demonstrate that it meets the essential requirements for safety, health, and environmental protection. The user should only use the device in accordance with the instructions provided in the user manual.
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf
deleted file mode 100644
index c87df644c58..00000000000
Binary files a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf and /dev/null differ
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html
new file mode 100644
index 00000000000..94447ffc47e
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html
@@ -0,0 +1,32 @@
+
+
+
+
+ Markdown viewer
+
+
+
+
+
+
+
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs
new file mode 100644
index 00000000000..78eb819c9a8
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.AzureOpenAI_Qdrant_Aspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs
@@ -0,0 +1,13 @@
+import { parse } from '../marked/dist/marked.esm.js';
+import purify from '../dompurify/dist/purify.es.mjs';
+
+const url = new URL(window.location);
+const fileUrl = url.searchParams.get('file');
+if (!fileUrl) {
+ throw new Error('File not specified in the URL query string');
+}
+
+var response = await fetch(fileUrl);
+var text = await response.text();
+
+document.getElementById('content').innerHTML = purify.sanitize(parse(text));
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/Chat.razor
index 8aa0ec9fd28..6fc5881c18f 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/Chat.razor
@@ -13,7 +13,7 @@
To get started, try asking about these example documents. You can replace these with your own data and replace this message.
-
+
@@ -29,10 +29,12 @@
Do not answer questions about anything else.
Use only simple markdown to format your responses.
- Use the search tool to find relevant information. When you do this, end your
+ Use the LoadDocuments tool to prepare for searches before answering any questions.
+
+ Use the Search tool to find relevant information. When you do this, end your
reply with citations in the special XML format:
- exact quote here
+ exact quote here
Always include the citation in your response if there are results.
@@ -52,7 +54,10 @@
{
statefulMessageCount = 0;
messages.Add(new(ChatRole.System, SystemPrompt));
- chatOptions.Tools = [AIFunctionFactory.Create(SearchAsync)];
+ chatOptions.Tools = [
+ AIFunctionFactory.Create(LoadDocumentsAsync),
+ AIFunctionFactory.Create(SearchAsync)
+ ];
}
private async Task AddUserMessageAsync(ChatMessage userMessage)
@@ -106,7 +111,14 @@
await chatInput!.FocusAsync();
}
- [Description("Searches for information using a phrase or keyword")]
+ [Description("Loads the documents needed for performing searches. Must be completed before a search can be executed, but only needs to be completed once.")]
+ private async Task LoadDocumentsAsync()
+ {
+ await InvokeAsync(StateHasChanged);
+ await Search.LoadDocumentsAsync();
+ }
+
+ [Description("Searches for information using a phrase or keyword. Relies on documents already being loaded.")]
private async Task> SearchAsync(
[Description("The phrase to search for.")] string searchPhrase,
[Description("If possible, specify the filename to search that file only. If not provided or empty, the search includes all files.")] string? filenameFilter = null)
@@ -114,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text}");
+ $"{result.Text}");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/ChatCitation.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/ChatCitation.razor
index ccb5853cec4..667189beabd 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/ChatCitation.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/ChatCitation.razor
@@ -17,10 +17,7 @@
public required string File { get; set; }
[Parameter]
- public int? PageNumber { get; set; }
-
- [Parameter]
- public required string Quote { get; set; }
+ public string? Quote { get; set; }
private string? viewerUrl;
@@ -28,11 +25,15 @@
{
viewerUrl = null;
- // If you ingest other types of content besides PDF files, construct a URL to an appropriate viewer here
- if (File.EndsWith(".pdf"))
+ // If you ingest other types of content besides Markdown or PDF files, construct a URL to an appropriate viewer here
+ if (File.EndsWith(".md"))
+ {
+ viewerUrl = $"lib/markdown_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#:~:text={Uri.EscapeDataString(Quote ?? "")}";
+ }
+ else if (File.EndsWith(".pdf"))
{
var search = Quote?.Trim('.', ',', ' ', '\n', '\r', '\t', '"', '\'');
- viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#page={PageNumber}&search={HttpUtility.UrlEncode(search)}&phrase=true";
+ viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#search={HttpUtility.UrlEncode(search)}&phrase=true";
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/ChatMessageItem.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/ChatMessageItem.razor
index 92c20c70667..e45d92ab5f9 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/ChatMessageItem.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Components/Pages/Chat/ChatMessageItem.razor
@@ -28,11 +28,24 @@ else if (Message.Role == ChatRole.Assistant)
@foreach (var citation in citations ?? [])
{
-
+
}
}
+ else if (content is FunctionCallContent { Name: "LoadDocuments" })
+ {
+
+
+
+ Loading relevant documents (this will take a minute)...
+
+
+ }
else if (content is FunctionCallContent { Name: "Search" } fcc && fcc.Arguments?.TryGetValue("searchPhrase", out var searchPhrase) is true)
{
@@ -56,9 +69,9 @@ else if (Message.Role == ChatRole.Assistant)
@code {
private static readonly ConditionalWeakTable
SubscribersLookup = new();
- private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
+ private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
- private List<(string File, int? Page, string Quote)>? citations;
+ private List<(string File, string Quote)>? citations;
[Parameter, EditorRequired]
public required ChatMessage Message { get; set; }
@@ -88,7 +101,7 @@ else if (Message.Role == ChatRole.Assistant)
{
var matches = CitationRegex.Matches(text);
citations = matches.Any()
- ? matches.Select(m => (m.Groups["file"].Value, int.TryParse(m.Groups["page"].Value, out var page) ? page : (int?)null, m.Groups["quote"].Value)).ToList()
+ ? matches.Select(m => (m.Groups["file"].Value, m.Groups["quote"].Value)).ToList()
: null;
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Program.cs
index 1ff3845eb08..27e50372647 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Program.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Program.cs
@@ -24,11 +24,12 @@
var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db");
var vectorStoreConnectionString = $"Data Source={vectorStorePath}";
-builder.Services.AddSqliteCollection("data-aichatweb-chunks", vectorStoreConnectionString);
-builder.Services.AddSqliteCollection("data-aichatweb-documents", vectorStoreConnectionString);
+builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString);
+builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString);
-builder.Services.AddScoped();
+builder.Services.AddSingleton();
builder.Services.AddSingleton();
+builder.Services.AddKeyedSingleton("ingestion_directory", new DirectoryInfo(Path.Combine(builder.Environment.WebRootPath, "Data")));
builder.Services.AddChatClient(chatClient).UseFunctionInvocation().UseLogging();
builder.Services.AddEmbeddingGenerator(embeddingGenerator);
@@ -49,12 +50,4 @@
app.MapRazorComponents()
.AddInteractiveServerRenderMode();
-// By default, we ingest PDF files from the /wwwroot/Data directory. You can ingest from
-// other sources by implementing IIngestionSource.
-// Important: ensure that any content you ingest is trusted, as it may be reflected back
-// to users or could be a source of prompt injection risk.
-await DataIngestor.IngestDataAsync(
- app.Services,
- new PDFDirectorySource(Path.Combine(builder.Environment.WebRootPath, "Data")));
-
app.Run();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/IngestedChunk.cs
index 2c5a38c7912..68af3ef20fb 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/IngestedChunk.cs
@@ -1,24 +1,31 @@
-using Microsoft.Extensions.VectorData;
+using System.Text.Json.Serialization;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Services;
public class IngestedChunk
{
- private const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
- private const string VectorDistanceFunction = DistanceFunction.CosineDistance;
+ public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
+ public const string VectorDistanceFunction = DistanceFunction.CosineDistance;
+ public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey]
- public required string Key { get; set; }
+ [VectorStoreKey(StorageName = "key")]
+ [JsonPropertyName("key")]
+ public required Guid Key { get; set; }
- [VectorStoreData(IsIndexed = true)]
+ [VectorStoreData(StorageName = "documentid")]
+ [JsonPropertyName("documentid")]
public required string DocumentId { get; set; }
- [VectorStoreData]
- public int PageNumber { get; set; }
-
- [VectorStoreData]
+ [VectorStoreData(StorageName = "content")]
+ [JsonPropertyName("content")]
public required string Text { get; set; }
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
+ [VectorStoreData(StorageName = "context")]
+ [JsonPropertyName("context")]
+ public string? Context { get; set; }
+
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
+ [JsonPropertyName("embedding")]
public string? Vector => Text;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/IngestedDocument.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/IngestedDocument.cs
deleted file mode 100644
index f101cfdc96a..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/IngestedDocument.cs
+++ /dev/null
@@ -1,25 +0,0 @@
-using Microsoft.Extensions.VectorData;
-
-namespace aichatweb.Services;
-
-public class IngestedDocument
-{
- private const int VectorDimensions = 2;
- private const string VectorDistanceFunction = DistanceFunction.CosineDistance;
-
- [VectorStoreKey]
- public required string Key { get; set; }
-
- [VectorStoreData(IsIndexed = true)]
- public required string SourceId { get; set; }
-
- [VectorStoreData]
- public required string DocumentId { get; set; }
-
- [VectorStoreData]
- public required string DocumentVersion { get; set; }
-
- // The vector is not used but required for some vector databases
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
- public ReadOnlyMemory Vector { get; set; } = new ReadOnlyMemory([0, 0]);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/DataIngestor.cs
index 89fe287ebed..d97b986b694 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/DataIngestor.cs
@@ -1,58 +1,35 @@
using Microsoft.Extensions.AI;
+using Microsoft.Extensions.DataIngestion;
+using Microsoft.Extensions.DataIngestion.Chunkers;
using Microsoft.Extensions.VectorData;
+using Microsoft.ML.Tokenizers;
namespace aichatweb.Services.Ingestion;
public class DataIngestor(
ILogger logger,
- VectorStoreCollection chunksCollection,
- VectorStoreCollection documentsCollection)
+ ILoggerFactory loggerFactory,
+ VectorStore vectorStore,
+ IEmbeddingGenerator> embeddingGenerator)
{
- public static async Task IngestDataAsync(IServiceProvider services, IIngestionSource source)
+ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var scope = services.CreateScope();
- var ingestor = scope.ServiceProvider.GetRequiredService();
- await ingestor.IngestDataAsync(source);
- }
-
- public async Task IngestDataAsync(IIngestionSource source)
- {
- await chunksCollection.EnsureCollectionExistsAsync();
- await documentsCollection.EnsureCollectionExistsAsync();
-
- var sourceId = source.SourceId;
- var documentsForSource = await documentsCollection.GetAsync(doc => doc.SourceId == sourceId, top: int.MaxValue).ToListAsync();
-
- var deletedDocuments = await source.GetDeletedDocumentsAsync(documentsForSource);
- foreach (var deletedDocument in deletedDocuments)
+ using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
{
- logger.LogInformation("Removing ingested data for {DocumentId}", deletedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(deletedDocument);
- await documentsCollection.DeleteAsync(deletedDocument.Key);
- }
-
- var modifiedDocuments = await source.GetNewOrModifiedDocumentsAsync(documentsForSource);
- foreach (var modifiedDocument in modifiedDocuments)
- {
- logger.LogInformation("Processing {DocumentId}", modifiedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(modifiedDocument);
-
- await documentsCollection.UpsertAsync(modifiedDocument);
-
- var newRecords = await source.CreateChunksForDocumentAsync(modifiedDocument);
- await chunksCollection.UpsertAsync(newRecords);
- }
-
- logger.LogInformation("Ingestion is up-to-date");
-
- async Task DeleteChunksForDocumentAsync(IngestedDocument document)
+ CollectionName = IngestedChunk.CollectionName,
+ DistanceFunction = IngestedChunk.VectorDistanceFunction,
+ IncrementalIngestion = false,
+ });
+
+ using var pipeline = new IngestionPipeline(
+ reader: new DocumentReader(directory),
+ chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
+ writer: writer,
+ loggerFactory: loggerFactory);
+
+ await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
{
- var documentId = document.DocumentId;
- var chunksToDelete = await chunksCollection.GetAsync(record => record.DocumentId == documentId, int.MaxValue).ToListAsync();
- if (chunksToDelete.Count != 0)
- {
- await chunksCollection.DeleteAsync(chunksToDelete.Select(r => r.Key));
- }
+ logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/DocumentReader.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/DocumentReader.cs
new file mode 100644
index 00000000000..315a6ad3d53
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/DocumentReader.cs
@@ -0,0 +1,36 @@
+using Microsoft.Extensions.DataIngestion;
+
+namespace aichatweb.Services.Ingestion;
+
+internal sealed class DocumentReader(DirectoryInfo rootDirectory) : IngestionDocumentReader
+{
+ private readonly MarkdownReader _markdownReader = new();
+ private readonly PdfPigReader _pdfReader = new();
+
+ public override Task ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
+ {
+ if (Path.IsPathFullyQualified(identifier))
+ {
+ // Normalize the identifier to its relative path
+ identifier = Path.GetRelativePath(rootDirectory.FullName, identifier);
+ }
+
+ mediaType = GetCustomMediaType(source) ?? mediaType;
+ return base.ReadAsync(source, identifier, mediaType, cancellationToken);
+ }
+
+ public override Task ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
+ => mediaType switch
+ {
+ "application/pdf" => _pdfReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ "text/markdown" => _markdownReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ _ => throw new InvalidOperationException($"Unsupported media type '{mediaType}'"),
+ };
+
+ private static string? GetCustomMediaType(FileInfo source)
+ => source.Extension switch
+ {
+ ".md" => "text/markdown",
+ _ => null
+ };
+}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/IIngestionSource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/IIngestionSource.cs
deleted file mode 100644
index 540cac117e7..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/IIngestionSource.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace aichatweb.Services.Ingestion;
-
-public interface IIngestionSource
-{
- string SourceId { get; }
-
- Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> CreateChunksForDocumentAsync(IngestedDocument document);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/PDFDirectorySource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/PDFDirectorySource.cs
deleted file mode 100644
index 0be02a9d008..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/PDFDirectorySource.cs
+++ /dev/null
@@ -1,71 +0,0 @@
-using Microsoft.SemanticKernel.Text;
-using UglyToad.PdfPig;
-using UglyToad.PdfPig.Content;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;
-
-namespace aichatweb.Services.Ingestion;
-
-public class PDFDirectorySource(string sourceDirectory) : IIngestionSource
-{
- public static string SourceFileId(string path) => Path.GetFileName(path);
- public static string SourceFileVersion(string path) => File.GetLastWriteTimeUtc(path).ToString("o");
-
- public string SourceId => $"{nameof(PDFDirectorySource)}:{sourceDirectory}";
-
- public Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var results = new List();
- var sourceFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var existingDocumentsById = existingDocuments.ToDictionary(d => d.DocumentId);
-
- foreach (var sourceFile in sourceFiles)
- {
- var sourceFileId = SourceFileId(sourceFile);
- var sourceFileVersion = SourceFileVersion(sourceFile);
- var existingDocumentVersion = existingDocumentsById.TryGetValue(sourceFileId, out var existingDocument) ? existingDocument.DocumentVersion : null;
- if (existingDocumentVersion != sourceFileVersion)
- {
- results.Add(new() { Key = Guid.CreateVersion7().ToString(), SourceId = SourceId, DocumentId = sourceFileId, DocumentVersion = sourceFileVersion });
- }
- }
-
- return Task.FromResult((IEnumerable)results);
- }
-
- public Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var currentFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var currentFileIds = currentFiles.ToLookup(SourceFileId);
- var deletedDocuments = existingDocuments.Where(d => !currentFileIds.Contains(d.DocumentId));
- return Task.FromResult(deletedDocuments);
- }
-
- public Task> CreateChunksForDocumentAsync(IngestedDocument document)
- {
- using var pdf = PdfDocument.Open(Path.Combine(sourceDirectory, document.DocumentId));
- var paragraphs = pdf.GetPages().SelectMany(GetPageParagraphs).ToList();
-
- return Task.FromResult(paragraphs.Select(p => new IngestedChunk
- {
- Key = Guid.CreateVersion7().ToString(),
- DocumentId = document.DocumentId,
- PageNumber = p.PageNumber,
- Text = p.Text,
- }));
- }
-
- private static IEnumerable<(int PageNumber, int IndexOnPage, string Text)> GetPageParagraphs(Page pdfPage)
- {
- var letters = pdfPage.Letters;
- var words = NearestNeighbourWordExtractor.Instance.GetWords(letters);
- var textBlocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
- var pageText = string.Join(Environment.NewLine + Environment.NewLine,
- textBlocks.Select(t => t.Text.ReplaceLineEndings(" ")));
-
-#pragma warning disable SKEXP0050 // Type is for evaluation purposes only
- return TextChunker.SplitPlainTextParagraphs([pageText], 200)
- .Select((text, index) => (pdfPage.Number, index, text));
-#pragma warning restore SKEXP0050 // Type is for evaluation purposes only
- }
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/PdfPigReader.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/PdfPigReader.cs
new file mode 100644
index 00000000000..f6de539eb22
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/Ingestion/PdfPigReader.cs
@@ -0,0 +1,42 @@
+using Microsoft.Extensions.DataIngestion;
+using UglyToad.PdfPig;
+using UglyToad.PdfPig.Content;
+using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
+using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;
+
+namespace aichatweb.Services.Ingestion;
+
+internal sealed class PdfPigReader : IngestionDocumentReader
+{
+ public override Task ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
+ {
+ using var pdf = PdfDocument.Open(source);
+ var document = new IngestionDocument(identifier);
+ foreach (var page in pdf.GetPages())
+ {
+ document.Sections.Add(GetPageSection(page));
+ }
+ return Task.FromResult(document);
+ }
+
+ private static IngestionDocumentSection GetPageSection(Page pdfPage)
+ {
+ var section = new IngestionDocumentSection
+ {
+ PageNumber = pdfPage.Number,
+ };
+
+ var letters = pdfPage.Letters;
+ var words = NearestNeighbourWordExtractor.Instance.GetWords(letters);
+
+ foreach (var textBlock in DocstrumBoundingBoxes.Instance.GetBlocks(words))
+ {
+ section.Elements.Add(new IngestionDocumentParagraph(textBlock.Text)
+ {
+ Text = textBlock.Text
+ });
+ }
+
+ return section;
+ }
+}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/SemanticSearch.cs
index 291c6c4b4a9..8072f8bcddb 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/SemanticSearch.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/Services/SemanticSearch.cs
@@ -1,12 +1,22 @@
-using Microsoft.Extensions.VectorData;
+using aichatweb.Services.Ingestion;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Services;
public class SemanticSearch(
- VectorStoreCollection vectorCollection)
+ VectorStoreCollection vectorCollection,
+ [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory,
+ DataIngestor dataIngestor)
{
+ private Task? _ingestionTask;
+
+ public async Task LoadDocumentsAsync() => await ( _ingestionTask ??= dataIngestor.IngestDataAsync(ingestionDirectory, searchPattern: "*.*"));
+
public async Task> SearchAsync(string text, string? documentIdFilter, int maxResults)
{
+ // Ensure documents have been loaded before searching
+ await LoadDocumentsAsync();
+
var nearest = vectorCollection.SearchAsync(text, maxResults, new VectorSearchOptions
{
Filter = documentIdFilter is { Length: > 0 } ? record => record.DocumentId == documentIdFilter : null,
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/aichatweb.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/aichatweb.csproj
index 1e694a1d6a6..dad2183dcfd 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/aichatweb.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/aichatweb.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
enable
enable
secret
@@ -10,10 +10,12 @@
-
+
+
-
-
+
+
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.md b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.md
new file mode 100644
index 00000000000..f7d042edf83
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.md
@@ -0,0 +1,193 @@
+# TrailMaster GPS Watch
+
+## 1. Introduction
+
+ExpeditionTech is proud to introduce the TrailMaster GPS Watch, a state-of-the-art outdoor electronics device designed for the most demanding outdoor adventurers. This watch is meticulously engineered to provide accurate location information, real-time sharing, and reliable mapping capabilities. The TrailMaster GPS Watch is built to withstand the harshest environmental conditions while providing users with the critical data they need to navigate through challenging terrain and optimize their outdoor experiences.
+
+### 1.1 Product Overview
+
+The TrailMaster GPS Watch is a high-performance outdoor electronics device that combines advanced GPS technology with rugged design. It features a durable, shock-resistant casing, a high-contrast display for optimal visibility in bright sunlight, and a built-in rechargeable battery for extended use in remote areas. The TrailMaster GPS Watch is equipped with a comprehensive suite of navigation tools, including topographic maps, trail tracking, and waypoint management. Additionally, it offers real-time location sharing for enhanced safety and group coordination during outdoor activities.
+
+### 1.2 Intended Use
+
+The TrailMaster GPS Watch is intended for use by experienced outdoor enthusiasts, including hikers, backpackers, trail runners, and mountaineers. This device is designed to provide essential navigation information and location tracking in remote, off-grid environments. Users can rely on the TrailMaster GPS Watch to plan and execute their outdoor adventures with confidence, knowing they have access to accurate maps, location data, and real-time sharing capabilities. Although the TrailMaster GPS Watch is engineered for extreme durability, users should always exercise caution and take appropriate safety measures when engaging in outdoor activities.
+
+#### 1.2.1 Safety Precautions
+
+Before using the TrailMaster GPS Watch, users must ensure that the device is fully charged and properly calibrated according to the manufacturer's specifications. It is essential to familiarize oneself with the watch's features, controls, and navigation functions before venturing into remote outdoor areas. Users should also be aware of potential environmental hazards, such as extreme weather conditions, rugged terrain, and limited access to emergency services. Proper planning, preparation, and adherence to best practices for outdoor safety are critical when using the TrailMaster GPS Watch.
+
+## 2. Technical Specifications
+
+### 2.1 Hardware
+
+#### 2.1.1 Processor and Memory
+
+The TrailMaster GPS Watch is equipped with a high-performance ARM Cortex processor, ensuring fast and efficient operation for all your outdoor activities. The device also comes with 4GB of internal memory, providing ample storage for maps, routes, and waypoints. The processor operates at 1.2GHz, allowing for quick map rendering and location updates.
+
+#### 2.1.2 GPS Module
+
+The GPS module in the TrailMaster GPS Watch utilizes a state-of-the-art multi-constellation receiver, supporting GPS, GLONASS, and Galileo satellite systems. This enables precise location tracking and navigation, even in challenging outdoor environments. The module also features advanced signal processing algorithms, ensuring reliable performance in remote areas with limited satellite visibility.
+
+#### 2.1.3 Sensors
+
+Equipped with a comprehensive set of sensors, the TrailMaster GPS Watch offers an array of valuable data for outdoor enthusiasts. The built-in altimeter provides accurate altitude readings, while the barometer monitors changes in air pressure to forecast weather conditions. Additionally, the watch includes a digital compass for reliable orientation, ensuring a seamless navigation experience in the great outdoors.
+
+### 2.2 Software
+
+#### 2.2.1 Operating System
+
+The TrailMaster GPS Watch runs on a proprietary operating system optimized for outdoor navigation and tracking. This custom OS combines robustness and efficiency, allowing for seamless integration with the device's hardware and sensors. The interface is designed for intuitive interaction, enabling users to access maps, location sharing, and other features with ease, even in challenging outdoor conditions.
+
+#### 2.2.2 Map Data
+
+The TrailMaster GPS Watch comes preloaded with detailed topographic maps, providing comprehensive coverage of trails, terrain, and points of interest. These maps are stored locally on the device, ensuring rapid access and smooth navigation without relying on cellular or data network connectivity. Users can also import additional map data via the dedicated USB port, expanding the watch's mapping capabilities for customized adventures.
+
+## 3. Setup and Installation
+
+### 3.1 Charging
+
+Before using your TrailMaster GPS Watch for the first time, it is essential to ensure that the device is fully charged. To charge your GPS watch, follow these steps:
+
+1. Locate the charging port cover on the back of the watch.
+2. Gently lift the cover to expose the charging port.
+3. Connect the provided USB charging cable to the port, ensuring a secure connection.
+4. Plug the other end of the cable into a power source, such as a computer or USB wall adapter.
+5. Allow the watch to charge for at least 2 hours, or until the battery indicator on the display shows it is fully charged.
+
+For optimal performance, it is recommended to fully charge the device before each use and to avoid overcharging. Please refer to the TrailMaster GPS Watch technical specifications for detailed battery charging information.
+
+### 3.2 Initial Configuration
+
+Once your TrailMaster GPS Watch is fully charged, you can proceed with the initial configuration by following these steps:
+
+1. Press and hold the power button located on the side of the watch to turn it on.
+2. Follow the on-screen instructions to select your language, set the date and time, and calibrate the GPS.
+3. Connect the watch to your smartphone using the TrailMaster companion app to enable real-time location sharing and receive notifications.
+4. Customize the watch settings, such as display preferences and map views, to suit your outdoor activities.
+
+Before using the GPS functionality, it is crucial to ensure that the watch has a clear view of the sky to acquire GPS signals. Please refer to the TrailMaster GPS Watch user guide for detailed instructions on GPS calibration and satellite acquisition.
+
+## 4. Operation
+
+### 4.1 Basic Functions
+
+The TrailMaster GPS Watch is designed for outdoor use and offers a variety of basic functions to assist users in navigating and staying connected during their adventures.
+
+#### 4.1.1 Powering On and Off
+
+To power on the TrailMaster GPS Watch, press and hold the power button (located on the right side of the watch) for 3 seconds. The watch will display the ExpeditionTech logo and then proceed to the main navigation screen. To power off the watch, press and hold the power button and select "Power Off" from the menu.
+
+#### 4.1.2 Accessing Built-in Maps
+
+The TrailMaster GPS Watch comes pre-loaded with detailed maps of various outdoor locations. To access the maps, press the map button (located on the left side of the watch) to bring up the map interface. From here, you can view your current location, set waypoints, and plan routes.
+
+#### 4.1.3 Real-time Location Sharing
+
+Utilize the real-time location sharing feature to transmit your current location to designated contacts. Press the share button (located on the top of the watch) and select the specific contacts you wish to share your location with. This feature requires a stable GPS signal and a connected smartphone with the TrailMaster app installed.
+
+### 4.2 Advanced Features
+
+The TrailMaster GPS Watch offers advanced features to enhance the user experience and provide additional functionality for outdoor enthusiasts.
+
+#### 4.2.1 Rugged Design
+
+The TrailMaster GPS Watch is built to withstand rugged outdoor conditions, including extreme temperatures, water exposure, and impact. The durable casing and reinforced strap ensure that the watch remains operational in challenging environments.
+
+#### 4.2.2 Navigation Tools
+
+In addition to basic map access, the TrailMaster GPS Watch includes advanced navigation tools such as compass, altimeter, and barometer. These tools provide vital information for navigation and weather monitoring while in the field.
+
+#### 4.2.3 Customizing Data Screens
+
+Users can customize the data screens on the TrailMaster GPS Watch to display the specific information they require during their outdoor activities. From the main menu, navigate to the settings and select "Data Screens" to adjust the layout and content of the screens.
+
+#### 4.2.4 Tracking Performance Metrics
+
+The TrailMaster GPS Watch is equipped with sensors to track performance metrics such as distance traveled, speed, elevation gain, and heart rate. Use the tracking mode to monitor these metrics in real-time or review them after completing an activity.
+
+## 5. Troubleshooting
+
+### 5.1 Diagnostic Tools and Equipment
+
+When troubleshooting the TrailMaster GPS Watch, it is essential to use specialized industrial diagnostic tools and equipment to perform accurate diagnostic tests. These tools and equipment include:
+
+- **GPS Signal Analyzer**: Use a GPS signal analyzer to check the strength and quality of the satellite signals received by the watch.
+- **RF Spectrum Analyzer**: An RF spectrum analyzer is required to analyze the radio frequency spectrum and identify any interference affecting the GPS reception.
+- **Ruggedness Test Equipment**: Perform ruggedness tests using shock and vibration equipment to ensure the watch can withstand outdoor adventures without performance issues.
+
+It is essential to use these specialized tools and equipment to accurately diagnose any issues with the TrailMaster GPS Watch and ensure optimal performance.
+
+### 5.2 Common Issues
+
+#### 5.2.1 GPS Signal Loss
+
+**Symptoms:**
+
+The watch displays "No GPS Signal" or intermittently loses GPS signal during use.
+
+**Potential Causes:**
+
+- Obstruction of satellite signals due to dense foliage, tall buildings, or natural terrain features.
+- Radio frequency interference affecting GPS reception.
+- Wear and tear on the GPS antenna or receiver.
+
+**Troubleshooting Steps:**
+
+1. Check the surroundings for any obstructions blocking satellite signals.
+2. Use an RF spectrum analyzer to identify any potential sources of interference.
+3. Perform a diagnostics test using a GPS signal analyzer to assess the strength and quality of the GPS signal received by the watch.
+4. If the issue persists, contact ExpeditionTech customer support for further assistance.
+
+#### 5.2.2 Connectivity Issues
+
+**Symptoms:**
+
+- Inability to share real-time location or connect to other devices.
+- Unreliable Bluetooth connectivity.
+
+**Potential Causes:**
+
+- Bluetooth interference from other electronic devices.
+- Signal obstruction due to environmental factors.
+- Software or firmware issues.
+
+**Troubleshooting Steps:**
+
+1. Ensure the watch is within the recommended Bluetooth range of the connected device.
+2. Identify and eliminate potential sources of Bluetooth interference in the vicinity.
+3. Update the watch's firmware to the latest version to address any software-related connectivity issues.
+4. If connectivity problems persist, perform a comprehensive diagnostics test using a specialized Bluetooth signal analyzer.
+
+By following these troubleshooting steps and using specialized diagnostic tools and equipment, users can effectively identify and address common issues with the TrailMaster GPS Watch.
+
+## 6. Maintenance and Care
+
+### 6.1 Cleaning
+
+It is important to regularly clean your TrailMaster GPS Watch to ensure optimal performance. Use a soft, damp cloth to gently wipe the watch face and straps. Do not use harsh chemicals or solvents, as these may damage the watch's rugged design and built-in maps.
+
+### 6.2 Battery Maintenance
+
+The TrailMaster GPS Watch is equipped with a rechargeable lithium-ion battery. To optimize battery life, it is recommended to fully charge the watch before each outdoor adventure. Additionally, it is important to avoid exposing the watch to extreme temperatures, as this may affect battery performance.
+
+### 6.3 Storage
+
+When not in use, store your TrailMaster GPS Watch in a cool, dry place. Avoid prolonged exposure to direct sunlight or extreme temperatures, as this may cause damage to the device.
+
+### 6.4 Software Updates
+
+To ensure that your TrailMaster GPS Watch operates efficiently, it is important to regularly check for and install software updates. These updates may include improvements to the built-in maps, real-time location sharing, and overall performance of the watch.
+
+### 6.5 Technical Diagnostics
+
+For advanced maintenance and care, it is recommended to perform technical diagnostics using specialized industrial and scientific equipment. Refer to formal industry specification codes and standards bodies for detailed instructions on performing these diagnostics.
+
+## 7. Regulatory Compliance
+
+### 7.1 FCC Compliance
+
+The ExpeditionTech TrailMaster GPS Watch complies with Part 15 of the FCC rules. Operation is subject to the following two conditions: (1) this device may not cause harmful interference, and (2) this device must accept any interference received, including interference that may cause undesired operation. Any changes or modifications not expressly approved by the party responsible for compliance could void the user's authority to operate the equipment. The user is cautioned that any changes or modifications made to this device that are not expressly approved by the manufacturer could void the user's authority to operate the equipment.
+
+### 7.2 CE Marking
+
+The CE marking on the ExpeditionTech TrailMaster GPS Watch indicates that it complies with the essential requirements of the relevant European health, safety, and environmental protection legislation. The device is in conformity with the essential requirements and other relevant provisions of Directive 1999/5/EC. The CE marking is affixed to the device to demonstrate that it meets the essential requirements for safety, health, and environmental protection. The user should only use the device in accordance with the instructions provided in the user manual.
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.pdf b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.pdf
deleted file mode 100644
index c87df644c58..00000000000
Binary files a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.pdf and /dev/null differ
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.html b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.html
new file mode 100644
index 00000000000..94447ffc47e
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.html
@@ -0,0 +1,32 @@
+
+
+
+
+ Markdown viewer
+
+
+
+
+
+
+
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.mjs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.mjs
new file mode 100644
index 00000000000..78eb819c9a8
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Basic.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.mjs
@@ -0,0 +1,13 @@
+import { parse } from '../marked/dist/marked.esm.js';
+import purify from '../dompurify/dist/purify.es.mjs';
+
+const url = new URL(window.location);
+const fileUrl = url.searchParams.get('file');
+if (!fileUrl) {
+ throw new Error('File not specified in the URL query string');
+}
+
+var response = await fetch(fileUrl);
+var text = await response.text();
+
+document.getElementById('content').innerHTML = purify.sanitize(parse(text));
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.AppHost/AppHost.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.AppHost/AppHost.cs
index d41eea07e40..bf116a0c47e 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.AppHost/AppHost.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.AppHost/AppHost.cs
@@ -6,7 +6,13 @@
// dotnet user-secrets set ConnectionStrings:openai "Endpoint=https://models.inference.ai.azure.com;Key=YOUR-API-KEY"
var openai = builder.AddConnectionString("openai");
+var markitdown = builder.AddContainer("markitdown", "mcp/markitdown")
+ .WithArgs("--http", "--host", "0.0.0.0", "--port", "3001")
+ .WithHttpEndpoint(targetPort: 3001, name: "http");
+
var webApp = builder.AddProject("aichatweb-app");
webApp.WithReference(openai);
+webApp
+ .WithEnvironment("MARKITDOWN_MCP_URL", markitdown.GetEndpoint("http"));
builder.Build().Run();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.AppHost/aichatweb.AppHost.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.AppHost/aichatweb.AppHost.csproj
index fa6140a8751..436f2241582 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.AppHost/aichatweb.AppHost.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.AppHost/aichatweb.AppHost.csproj
@@ -4,7 +4,7 @@
Exe
- net9.0
+ net10.0
enable
enable
secret
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.ServiceDefaults/Extensions.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.ServiceDefaults/Extensions.cs
index b44d60b604b..8d0b0cd5d67 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.ServiceDefaults/Extensions.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.ServiceDefaults/Extensions.cs
@@ -76,7 +76,8 @@ public static TBuilder ConfigureOpenTelemetry(this TBuilder builder) w
// Uncomment the following line to enable gRPC instrumentation (requires the OpenTelemetry.Instrumentation.GrpcNetClient package)
//.AddGrpcClientInstrumentation()
.AddHttpClientInstrumentation()
- .AddSource("Experimental.Microsoft.Extensions.AI");
+ .AddSource("Experimental.Microsoft.Extensions.AI")
+ .AddSource("Experimental.Microsoft.Extensions.DataIngestion");
});
builder.AddOpenTelemetryExporters();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.ServiceDefaults/aichatweb.ServiceDefaults.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.ServiceDefaults/aichatweb.ServiceDefaults.csproj
index 474dd445fae..a70a3ca8cd4 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.ServiceDefaults/aichatweb.ServiceDefaults.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.ServiceDefaults/aichatweb.ServiceDefaults.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
enable
enable
true
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
index 8aa0ec9fd28..6fc5881c18f 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
@@ -13,7 +13,7 @@
To get started, try asking about these example documents. You can replace these with your own data and replace this message.
-
+
@@ -29,10 +29,12 @@
Do not answer questions about anything else.
Use only simple markdown to format your responses.
- Use the search tool to find relevant information. When you do this, end your
+ Use the LoadDocuments tool to prepare for searches before answering any questions.
+
+ Use the Search tool to find relevant information. When you do this, end your
reply with citations in the special XML format:
- exact quote here
+ exact quote here
Always include the citation in your response if there are results.
@@ -52,7 +54,10 @@
{
statefulMessageCount = 0;
messages.Add(new(ChatRole.System, SystemPrompt));
- chatOptions.Tools = [AIFunctionFactory.Create(SearchAsync)];
+ chatOptions.Tools = [
+ AIFunctionFactory.Create(LoadDocumentsAsync),
+ AIFunctionFactory.Create(SearchAsync)
+ ];
}
private async Task AddUserMessageAsync(ChatMessage userMessage)
@@ -106,7 +111,14 @@
await chatInput!.FocusAsync();
}
- [Description("Searches for information using a phrase or keyword")]
+ [Description("Loads the documents needed for performing searches. Must be completed before a search can be executed, but only needs to be completed once.")]
+ private async Task LoadDocumentsAsync()
+ {
+ await InvokeAsync(StateHasChanged);
+ await Search.LoadDocumentsAsync();
+ }
+
+ [Description("Searches for information using a phrase or keyword. Relies on documents already being loaded.")]
private async Task> SearchAsync(
[Description("The phrase to search for.")] string searchPhrase,
[Description("If possible, specify the filename to search that file only. If not provided or empty, the search includes all files.")] string? filenameFilter = null)
@@ -114,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text}");
+ $"{result.Text}");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
index ccb5853cec4..667189beabd 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
@@ -17,10 +17,7 @@
public required string File { get; set; }
[Parameter]
- public int? PageNumber { get; set; }
-
- [Parameter]
- public required string Quote { get; set; }
+ public string? Quote { get; set; }
private string? viewerUrl;
@@ -28,11 +25,15 @@
{
viewerUrl = null;
- // If you ingest other types of content besides PDF files, construct a URL to an appropriate viewer here
- if (File.EndsWith(".pdf"))
+ // If you ingest other types of content besides Markdown or PDF files, construct a URL to an appropriate viewer here
+ if (File.EndsWith(".md"))
+ {
+ viewerUrl = $"lib/markdown_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#:~:text={Uri.EscapeDataString(Quote ?? "")}";
+ }
+ else if (File.EndsWith(".pdf"))
{
var search = Quote?.Trim('.', ',', ' ', '\n', '\r', '\t', '"', '\'');
- viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#page={PageNumber}&search={HttpUtility.UrlEncode(search)}&phrase=true";
+ viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#search={HttpUtility.UrlEncode(search)}&phrase=true";
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
index 92c20c70667..e45d92ab5f9 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
@@ -28,11 +28,24 @@ else if (Message.Role == ChatRole.Assistant)
@foreach (var citation in citations ?? [])
{
-
+
}
}
+ else if (content is FunctionCallContent { Name: "LoadDocuments" })
+ {
+
+
+
+ Loading relevant documents (this will take a minute)...
+
+
+ }
else if (content is FunctionCallContent { Name: "Search" } fcc && fcc.Arguments?.TryGetValue("searchPhrase", out var searchPhrase) is true)
{
@@ -56,9 +69,9 @@ else if (Message.Role == ChatRole.Assistant)
@code {
private static readonly ConditionalWeakTable
SubscribersLookup = new();
- private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
+ private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
- private List<(string File, int? Page, string Quote)>? citations;
+ private List<(string File, string Quote)>? citations;
[Parameter, EditorRequired]
public required ChatMessage Message { get; set; }
@@ -88,7 +101,7 @@ else if (Message.Role == ChatRole.Assistant)
{
var matches = CitationRegex.Matches(text);
citations = matches.Any()
- ? matches.Select(m => (m.Groups["file"].Value, int.TryParse(m.Groups["page"].Value, out var page) ? page : (int?)null, m.Groups["quote"].Value)).ToList()
+ ? matches.Select(m => (m.Groups["file"].Value, m.Groups["quote"].Value)).ToList()
: null;
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Program.cs
index 6d23308d93a..e47bad71cc8 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Program.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Program.cs
@@ -17,10 +17,11 @@
var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db");
var vectorStoreConnectionString = $"Data Source={vectorStorePath}";
-builder.Services.AddSqliteCollection("data-aichatweb-chunks", vectorStoreConnectionString);
-builder.Services.AddSqliteCollection("data-aichatweb-documents", vectorStoreConnectionString);
-builder.Services.AddScoped();
+builder.Services.AddSqliteVectorStore(_ => vectorStoreConnectionString);
+builder.Services.AddSqliteCollection(IngestedChunk.CollectionName, vectorStoreConnectionString);
+builder.Services.AddSingleton();
builder.Services.AddSingleton();
+builder.Services.AddKeyedSingleton("ingestion_directory", new DirectoryInfo(Path.Combine(builder.Environment.WebRootPath, "Data")));
var app = builder.Build();
@@ -41,12 +42,4 @@
app.MapRazorComponents()
.AddInteractiveServerRenderMode();
-// By default, we ingest PDF files from the /wwwroot/Data directory. You can ingest from
-// other sources by implementing IIngestionSource.
-// Important: ensure that any content you ingest is trusted, as it may be reflected back
-// to users or could be a source of prompt injection risk.
-await DataIngestor.IngestDataAsync(
- app.Services,
- new PDFDirectorySource(Path.Combine(builder.Environment.WebRootPath, "Data")));
-
app.Run();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
index 92e50e61414..af609ea239e 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
@@ -1,24 +1,31 @@
-using Microsoft.Extensions.VectorData;
+using System.Text.Json.Serialization;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
public class IngestedChunk
{
- private const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
- private const string VectorDistanceFunction = DistanceFunction.CosineDistance;
+ public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
+ public const string VectorDistanceFunction = DistanceFunction.CosineDistance;
+ public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey]
- public required string Key { get; set; }
+ [VectorStoreKey(StorageName = "key")]
+ [JsonPropertyName("key")]
+ public required Guid Key { get; set; }
- [VectorStoreData(IsIndexed = true)]
+ [VectorStoreData(StorageName = "documentid")]
+ [JsonPropertyName("documentid")]
public required string DocumentId { get; set; }
- [VectorStoreData]
- public int PageNumber { get; set; }
-
- [VectorStoreData]
+ [VectorStoreData(StorageName = "content")]
+ [JsonPropertyName("content")]
public required string Text { get; set; }
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
+ [VectorStoreData(StorageName = "context")]
+ [JsonPropertyName("context")]
+ public string? Context { get; set; }
+
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
+ [JsonPropertyName("embedding")]
public string? Vector => Text;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs
deleted file mode 100644
index 49a8143005e..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs
+++ /dev/null
@@ -1,25 +0,0 @@
-using Microsoft.Extensions.VectorData;
-
-namespace aichatweb.Web.Services;
-
-public class IngestedDocument
-{
- private const int VectorDimensions = 2;
- private const string VectorDistanceFunction = DistanceFunction.CosineDistance;
-
- [VectorStoreKey]
- public required string Key { get; set; }
-
- [VectorStoreData(IsIndexed = true)]
- public required string SourceId { get; set; }
-
- [VectorStoreData]
- public required string DocumentId { get; set; }
-
- [VectorStoreData]
- public required string DocumentVersion { get; set; }
-
- // The vector is not used but required for some vector databases
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
- public ReadOnlyMemory Vector { get; set; } = new ReadOnlyMemory([0, 0]);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 2fe43370071..9dd366a03a5 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -1,58 +1,35 @@
using Microsoft.Extensions.AI;
+using Microsoft.Extensions.DataIngestion;
+using Microsoft.Extensions.DataIngestion.Chunkers;
using Microsoft.Extensions.VectorData;
+using Microsoft.ML.Tokenizers;
namespace aichatweb.Web.Services.Ingestion;
public class DataIngestor(
ILogger logger,
- VectorStoreCollection chunksCollection,
- VectorStoreCollection documentsCollection)
+ ILoggerFactory loggerFactory,
+ VectorStore vectorStore,
+ IEmbeddingGenerator> embeddingGenerator)
{
- public static async Task IngestDataAsync(IServiceProvider services, IIngestionSource source)
+ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var scope = services.CreateScope();
- var ingestor = scope.ServiceProvider.GetRequiredService();
- await ingestor.IngestDataAsync(source);
- }
-
- public async Task IngestDataAsync(IIngestionSource source)
- {
- await chunksCollection.EnsureCollectionExistsAsync();
- await documentsCollection.EnsureCollectionExistsAsync();
-
- var sourceId = source.SourceId;
- var documentsForSource = await documentsCollection.GetAsync(doc => doc.SourceId == sourceId, top: int.MaxValue).ToListAsync();
-
- var deletedDocuments = await source.GetDeletedDocumentsAsync(documentsForSource);
- foreach (var deletedDocument in deletedDocuments)
+ using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
{
- logger.LogInformation("Removing ingested data for {DocumentId}", deletedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(deletedDocument);
- await documentsCollection.DeleteAsync(deletedDocument.Key);
- }
-
- var modifiedDocuments = await source.GetNewOrModifiedDocumentsAsync(documentsForSource);
- foreach (var modifiedDocument in modifiedDocuments)
- {
- logger.LogInformation("Processing {DocumentId}", modifiedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(modifiedDocument);
-
- await documentsCollection.UpsertAsync(modifiedDocument);
-
- var newRecords = await source.CreateChunksForDocumentAsync(modifiedDocument);
- await chunksCollection.UpsertAsync(newRecords);
- }
-
- logger.LogInformation("Ingestion is up-to-date");
-
- async Task DeleteChunksForDocumentAsync(IngestedDocument document)
+ CollectionName = IngestedChunk.CollectionName,
+ DistanceFunction = IngestedChunk.VectorDistanceFunction,
+ IncrementalIngestion = false,
+ });
+
+ using var pipeline = new IngestionPipeline(
+ reader: new DocumentReader(directory),
+ chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
+ writer: writer,
+ loggerFactory: loggerFactory);
+
+ await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
{
- var documentId = document.DocumentId;
- var chunksToDelete = await chunksCollection.GetAsync(record => record.DocumentId == documentId, int.MaxValue).ToListAsync();
- if (chunksToDelete.Count != 0)
- {
- await chunksCollection.DeleteAsync(chunksToDelete.Select(r => r.Key));
- }
+ logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs
new file mode 100644
index 00000000000..60fcdbdc128
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs
@@ -0,0 +1,42 @@
+using Microsoft.Extensions.DataIngestion;
+
+namespace aichatweb.Web.Services.Ingestion;
+
+internal sealed class DocumentReader(DirectoryInfo rootDirectory) : IngestionDocumentReader
+{
+ private readonly MarkdownReader _markdownReader = new();
+ private readonly MarkItDownMcpReader _pdfReader = new(mcpServerUri: GetMarkItDownMcpServerUrl());
+
+ public override Task ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
+ {
+ if (Path.IsPathFullyQualified(identifier))
+ {
+ // Normalize the identifier to its relative path
+ identifier = Path.GetRelativePath(rootDirectory.FullName, identifier);
+ }
+
+ mediaType = GetCustomMediaType(source) ?? mediaType;
+ return base.ReadAsync(source, identifier, mediaType, cancellationToken);
+ }
+
+ public override Task ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
+ => mediaType switch
+ {
+ "application/pdf" => _pdfReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ "text/markdown" => _markdownReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ _ => throw new InvalidOperationException($"Unsupported media type '{mediaType}'"),
+ };
+
+ private static string? GetCustomMediaType(FileInfo source)
+ => source.Extension switch
+ {
+ ".md" => "text/markdown",
+ _ => null
+ };
+
+ private static Uri GetMarkItDownMcpServerUrl()
+ {
+ var markItDownMcpUrl = $"{Environment.GetEnvironmentVariable("MARKITDOWN_MCP_URL")}/mcp";
+ return new Uri(markItDownMcpUrl);
+ }
+}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs
deleted file mode 100644
index a1c6b2191d1..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace aichatweb.Web.Services.Ingestion;
-
-public interface IIngestionSource
-{
- string SourceId { get; }
-
- Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> CreateChunksForDocumentAsync(IngestedDocument document);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs
deleted file mode 100644
index 32e9f225c08..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs
+++ /dev/null
@@ -1,71 +0,0 @@
-using Microsoft.SemanticKernel.Text;
-using UglyToad.PdfPig;
-using UglyToad.PdfPig.Content;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;
-
-namespace aichatweb.Web.Services.Ingestion;
-
-public class PDFDirectorySource(string sourceDirectory) : IIngestionSource
-{
- public static string SourceFileId(string path) => Path.GetFileName(path);
- public static string SourceFileVersion(string path) => File.GetLastWriteTimeUtc(path).ToString("o");
-
- public string SourceId => $"{nameof(PDFDirectorySource)}:{sourceDirectory}";
-
- public Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var results = new List();
- var sourceFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var existingDocumentsById = existingDocuments.ToDictionary(d => d.DocumentId);
-
- foreach (var sourceFile in sourceFiles)
- {
- var sourceFileId = SourceFileId(sourceFile);
- var sourceFileVersion = SourceFileVersion(sourceFile);
- var existingDocumentVersion = existingDocumentsById.TryGetValue(sourceFileId, out var existingDocument) ? existingDocument.DocumentVersion : null;
- if (existingDocumentVersion != sourceFileVersion)
- {
- results.Add(new() { Key = Guid.CreateVersion7().ToString(), SourceId = SourceId, DocumentId = sourceFileId, DocumentVersion = sourceFileVersion });
- }
- }
-
- return Task.FromResult((IEnumerable)results);
- }
-
- public Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var currentFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var currentFileIds = currentFiles.ToLookup(SourceFileId);
- var deletedDocuments = existingDocuments.Where(d => !currentFileIds.Contains(d.DocumentId));
- return Task.FromResult(deletedDocuments);
- }
-
- public Task> CreateChunksForDocumentAsync(IngestedDocument document)
- {
- using var pdf = PdfDocument.Open(Path.Combine(sourceDirectory, document.DocumentId));
- var paragraphs = pdf.GetPages().SelectMany(GetPageParagraphs).ToList();
-
- return Task.FromResult(paragraphs.Select(p => new IngestedChunk
- {
- Key = Guid.CreateVersion7().ToString(),
- DocumentId = document.DocumentId,
- PageNumber = p.PageNumber,
- Text = p.Text,
- }));
- }
-
- private static IEnumerable<(int PageNumber, int IndexOnPage, string Text)> GetPageParagraphs(Page pdfPage)
- {
- var letters = pdfPage.Letters;
- var words = NearestNeighbourWordExtractor.Instance.GetWords(letters);
- var textBlocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
- var pageText = string.Join(Environment.NewLine + Environment.NewLine,
- textBlocks.Select(t => t.Text.ReplaceLineEndings(" ")));
-
-#pragma warning disable SKEXP0050 // Type is for evaluation purposes only
- return TextChunker.SplitPlainTextParagraphs([pageText], 200)
- .Select((text, index) => (pdfPage.Number, index, text));
-#pragma warning restore SKEXP0050 // Type is for evaluation purposes only
- }
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
index 84fb719f6ae..d043c8efb84 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
@@ -1,12 +1,22 @@
-using Microsoft.Extensions.VectorData;
+using aichatweb.Web.Services.Ingestion;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
public class SemanticSearch(
- VectorStoreCollection vectorCollection)
+ VectorStoreCollection vectorCollection,
+ [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory,
+ DataIngestor dataIngestor)
{
+ private Task? _ingestionTask;
+
+ public async Task LoadDocumentsAsync() => await ( _ingestionTask ??= dataIngestor.IngestDataAsync(ingestionDirectory, searchPattern: "*.*"));
+
public async Task> SearchAsync(string text, string? documentIdFilter, int maxResults)
{
+ // Ensure documents have been loaded before searching
+ await LoadDocumentsAsync();
+
var nearest = vectorCollection.SearchAsync(text, maxResults, new VectorSearchOptions
{
Filter = documentIdFilter is { Length: > 0 } ? record => record.DocumentId == documentIdFilter : null,
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
index 22c00c41978..21a99cc28f2 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
enable
enable
secret
@@ -11,10 +11,12 @@
-
-
-
-
+
+
+
+
+
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md
new file mode 100644
index 00000000000..f7d042edf83
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md
@@ -0,0 +1,193 @@
+# TrailMaster GPS Watch
+
+## 1. Introduction
+
+ExpeditionTech is proud to introduce the TrailMaster GPS Watch, a state-of-the-art outdoor electronics device designed for the most demanding outdoor adventurers. This watch is meticulously engineered to provide accurate location information, real-time sharing, and reliable mapping capabilities. The TrailMaster GPS Watch is built to withstand the harshest environmental conditions while providing users with the critical data they need to navigate through challenging terrain and optimize their outdoor experiences.
+
+### 1.1 Product Overview
+
+The TrailMaster GPS Watch is a high-performance outdoor electronics device that combines advanced GPS technology with rugged design. It features a durable, shock-resistant casing, a high-contrast display for optimal visibility in bright sunlight, and a built-in rechargeable battery for extended use in remote areas. The TrailMaster GPS Watch is equipped with a comprehensive suite of navigation tools, including topographic maps, trail tracking, and waypoint management. Additionally, it offers real-time location sharing for enhanced safety and group coordination during outdoor activities.
+
+### 1.2 Intended Use
+
+The TrailMaster GPS Watch is intended for use by experienced outdoor enthusiasts, including hikers, backpackers, trail runners, and mountaineers. This device is designed to provide essential navigation information and location tracking in remote, off-grid environments. Users can rely on the TrailMaster GPS Watch to plan and execute their outdoor adventures with confidence, knowing they have access to accurate maps, location data, and real-time sharing capabilities. Although the TrailMaster GPS Watch is engineered for extreme durability, users should always exercise caution and take appropriate safety measures when engaging in outdoor activities.
+
+#### 1.2.1 Safety Precautions
+
+Before using the TrailMaster GPS Watch, users must ensure that the device is fully charged and properly calibrated according to the manufacturer's specifications. It is essential to familiarize oneself with the watch's features, controls, and navigation functions before venturing into remote outdoor areas. Users should also be aware of potential environmental hazards, such as extreme weather conditions, rugged terrain, and limited access to emergency services. Proper planning, preparation, and adherence to best practices for outdoor safety are critical when using the TrailMaster GPS Watch.
+
+## 2. Technical Specifications
+
+### 2.1 Hardware
+
+#### 2.1.1 Processor and Memory
+
+The TrailMaster GPS Watch is equipped with a high-performance ARM Cortex processor, ensuring fast and efficient operation for all your outdoor activities. The device also comes with 4GB of internal memory, providing ample storage for maps, routes, and waypoints. The processor operates at 1.2GHz, allowing for quick map rendering and location updates.
+
+#### 2.1.2 GPS Module
+
+The GPS module in the TrailMaster GPS Watch utilizes a state-of-the-art multi-constellation receiver, supporting GPS, GLONASS, and Galileo satellite systems. This enables precise location tracking and navigation, even in challenging outdoor environments. The module also features advanced signal processing algorithms, ensuring reliable performance in remote areas with limited satellite visibility.
+
+#### 2.1.3 Sensors
+
+Equipped with a comprehensive set of sensors, the TrailMaster GPS Watch offers an array of valuable data for outdoor enthusiasts. The built-in altimeter provides accurate altitude readings, while the barometer monitors changes in air pressure to forecast weather conditions. Additionally, the watch includes a digital compass for reliable orientation, ensuring a seamless navigation experience in the great outdoors.
+
+### 2.2 Software
+
+#### 2.2.1 Operating System
+
+The TrailMaster GPS Watch runs on a proprietary operating system optimized for outdoor navigation and tracking. This custom OS combines robustness and efficiency, allowing for seamless integration with the device's hardware and sensors. The interface is designed for intuitive interaction, enabling users to access maps, location sharing, and other features with ease, even in challenging outdoor conditions.
+
+#### 2.2.2 Map Data
+
+The TrailMaster GPS Watch comes preloaded with detailed topographic maps, providing comprehensive coverage of trails, terrain, and points of interest. These maps are stored locally on the device, ensuring rapid access and smooth navigation without relying on cellular or data network connectivity. Users can also import additional map data via the dedicated USB port, expanding the watch's mapping capabilities for customized adventures.
+
+## 3. Setup and Installation
+
+### 3.1 Charging
+
+Before using your TrailMaster GPS Watch for the first time, it is essential to ensure that the device is fully charged. To charge your GPS watch, follow these steps:
+
+1. Locate the charging port cover on the back of the watch.
+2. Gently lift the cover to expose the charging port.
+3. Connect the provided USB charging cable to the port, ensuring a secure connection.
+4. Plug the other end of the cable into a power source, such as a computer or USB wall adapter.
+5. Allow the watch to charge for at least 2 hours, or until the battery indicator on the display shows it is fully charged.
+
+For optimal performance, it is recommended to fully charge the device before each use and to avoid overcharging. Please refer to the TrailMaster GPS Watch technical specifications for detailed battery charging information.
+
+### 3.2 Initial Configuration
+
+Once your TrailMaster GPS Watch is fully charged, you can proceed with the initial configuration by following these steps:
+
+1. Press and hold the power button located on the side of the watch to turn it on.
+2. Follow the on-screen instructions to select your language, set the date and time, and calibrate the GPS.
+3. Connect the watch to your smartphone using the TrailMaster companion app to enable real-time location sharing and receive notifications.
+4. Customize the watch settings, such as display preferences and map views, to suit your outdoor activities.
+
+Before using the GPS functionality, it is crucial to ensure that the watch has a clear view of the sky to acquire GPS signals. Please refer to the TrailMaster GPS Watch user guide for detailed instructions on GPS calibration and satellite acquisition.
+
+## 4. Operation
+
+### 4.1 Basic Functions
+
+The TrailMaster GPS Watch is designed for outdoor use and offers a variety of basic functions to assist users in navigating and staying connected during their adventures.
+
+#### 4.1.1 Powering On and Off
+
+To power on the TrailMaster GPS Watch, press and hold the power button (located on the right side of the watch) for 3 seconds. The watch will display the ExpeditionTech logo and then proceed to the main navigation screen. To power off the watch, press and hold the power button and select "Power Off" from the menu.
+
+#### 4.1.2 Accessing Built-in Maps
+
+The TrailMaster GPS Watch comes pre-loaded with detailed maps of various outdoor locations. To access the maps, press the map button (located on the left side of the watch) to bring up the map interface. From here, you can view your current location, set waypoints, and plan routes.
+
+#### 4.1.3 Real-time Location Sharing
+
+Utilize the real-time location sharing feature to transmit your current location to designated contacts. Press the share button (located on the top of the watch) and select the specific contacts you wish to share your location with. This feature requires a stable GPS signal and a connected smartphone with the TrailMaster app installed.
+
+### 4.2 Advanced Features
+
+The TrailMaster GPS Watch offers advanced features to enhance the user experience and provide additional functionality for outdoor enthusiasts.
+
+#### 4.2.1 Rugged Design
+
+The TrailMaster GPS Watch is built to withstand rugged outdoor conditions, including extreme temperatures, water exposure, and impact. The durable casing and reinforced strap ensure that the watch remains operational in challenging environments.
+
+#### 4.2.2 Navigation Tools
+
+In addition to basic map access, the TrailMaster GPS Watch includes advanced navigation tools such as compass, altimeter, and barometer. These tools provide vital information for navigation and weather monitoring while in the field.
+
+#### 4.2.3 Customizing Data Screens
+
+Users can customize the data screens on the TrailMaster GPS Watch to display the specific information they require during their outdoor activities. From the main menu, navigate to the settings and select "Data Screens" to adjust the layout and content of the screens.
+
+#### 4.2.4 Tracking Performance Metrics
+
+The TrailMaster GPS Watch is equipped with sensors to track performance metrics such as distance traveled, speed, elevation gain, and heart rate. Use the tracking mode to monitor these metrics in real-time or review them after completing an activity.
+
+## 5. Troubleshooting
+
+### 5.1 Diagnostic Tools and Equipment
+
+When troubleshooting the TrailMaster GPS Watch, it is essential to use specialized industrial diagnostic tools and equipment to perform accurate diagnostic tests. These tools and equipment include:
+
+- **GPS Signal Analyzer**: Use a GPS signal analyzer to check the strength and quality of the satellite signals received by the watch.
+- **RF Spectrum Analyzer**: An RF spectrum analyzer is required to analyze the radio frequency spectrum and identify any interference affecting the GPS reception.
+- **Ruggedness Test Equipment**: Perform ruggedness tests using shock and vibration equipment to ensure the watch can withstand outdoor adventures without performance issues.
+
+It is essential to use these specialized tools and equipment to accurately diagnose any issues with the TrailMaster GPS Watch and ensure optimal performance.
+
+### 5.2 Common Issues
+
+#### 5.2.1 GPS Signal Loss
+
+**Symptoms:**
+
+The watch displays "No GPS Signal" or intermittently loses GPS signal during use.
+
+**Potential Causes:**
+
+- Obstruction of satellite signals due to dense foliage, tall buildings, or natural terrain features.
+- Radio frequency interference affecting GPS reception.
+- Wear and tear on the GPS antenna or receiver.
+
+**Troubleshooting Steps:**
+
+1. Check the surroundings for any obstructions blocking satellite signals.
+2. Use an RF spectrum analyzer to identify any potential sources of interference.
+3. Perform a diagnostics test using a GPS signal analyzer to assess the strength and quality of the GPS signal received by the watch.
+4. If the issue persists, contact ExpeditionTech customer support for further assistance.
+
+#### 5.2.2 Connectivity Issues
+
+**Symptoms:**
+
+- Inability to share real-time location or connect to other devices.
+- Unreliable Bluetooth connectivity.
+
+**Potential Causes:**
+
+- Bluetooth interference from other electronic devices.
+- Signal obstruction due to environmental factors.
+- Software or firmware issues.
+
+**Troubleshooting Steps:**
+
+1. Ensure the watch is within the recommended Bluetooth range of the connected device.
+2. Identify and eliminate potential sources of Bluetooth interference in the vicinity.
+3. Update the watch's firmware to the latest version to address any software-related connectivity issues.
+4. If connectivity problems persist, perform a comprehensive diagnostics test using a specialized Bluetooth signal analyzer.
+
+By following these troubleshooting steps and using specialized diagnostic tools and equipment, users can effectively identify and address common issues with the TrailMaster GPS Watch.
+
+## 6. Maintenance and Care
+
+### 6.1 Cleaning
+
+It is important to regularly clean your TrailMaster GPS Watch to ensure optimal performance. Use a soft, damp cloth to gently wipe the watch face and straps. Do not use harsh chemicals or solvents, as these may damage the watch's rugged design and built-in maps.
+
+### 6.2 Battery Maintenance
+
+The TrailMaster GPS Watch is equipped with a rechargeable lithium-ion battery. To optimize battery life, it is recommended to fully charge the watch before each outdoor adventure. Additionally, it is important to avoid exposing the watch to extreme temperatures, as this may affect battery performance.
+
+### 6.3 Storage
+
+When not in use, store your TrailMaster GPS Watch in a cool, dry place. Avoid prolonged exposure to direct sunlight or extreme temperatures, as this may cause damage to the device.
+
+### 6.4 Software Updates
+
+To ensure that your TrailMaster GPS Watch operates efficiently, it is important to regularly check for and install software updates. These updates may include improvements to the built-in maps, real-time location sharing, and overall performance of the watch.
+
+### 6.5 Technical Diagnostics
+
+For advanced maintenance and care, it is recommended to perform technical diagnostics using specialized industrial and scientific equipment. Refer to formal industry specification codes and standards bodies for detailed instructions on performing these diagnostics.
+
+## 7. Regulatory Compliance
+
+### 7.1 FCC Compliance
+
+The ExpeditionTech TrailMaster GPS Watch complies with Part 15 of the FCC rules. Operation is subject to the following two conditions: (1) this device may not cause harmful interference, and (2) this device must accept any interference received, including interference that may cause undesired operation. Any changes or modifications not expressly approved by the party responsible for compliance could void the user's authority to operate the equipment. The user is cautioned that any changes or modifications made to this device that are not expressly approved by the manufacturer could void the user's authority to operate the equipment.
+
+### 7.2 CE Marking
+
+The CE marking on the ExpeditionTech TrailMaster GPS Watch indicates that it complies with the essential requirements of the relevant European health, safety, and environmental protection legislation. The device is in conformity with the essential requirements and other relevant provisions of Directive 1999/5/EC. The CE marking is affixed to the device to demonstrate that it meets the essential requirements for safety, health, and environmental protection. The user should only use the device in accordance with the instructions provided in the user manual.
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf
deleted file mode 100644
index c87df644c58..00000000000
Binary files a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf and /dev/null differ
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html
new file mode 100644
index 00000000000..94447ffc47e
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html
@@ -0,0 +1,32 @@
+
+
+
+
+ Markdown viewer
+
+
+
+
+
+
+
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs
new file mode 100644
index 00000000000..78eb819c9a8
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.BasicAspire.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs
@@ -0,0 +1,13 @@
+import { parse } from '../marked/dist/marked.esm.js';
+import purify from '../dompurify/dist/purify.es.mjs';
+
+const url = new URL(window.location);
+const fileUrl = url.searchParams.get('file');
+if (!fileUrl) {
+ throw new Error('File not specified in the URL query string');
+}
+
+var response = await fetch(fileUrl);
+var text = await response.text();
+
+document.getElementById('content').innerHTML = purify.sanitize(parse(text));
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.AppHost/AppHost.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.AppHost/AppHost.cs
index 9521e1a0297..9f664334406 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.AppHost/AppHost.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.AppHost/AppHost.cs
@@ -9,6 +9,10 @@
.WithDataVolume()
.WithLifetime(ContainerLifetime.Persistent);
+var markitdown = builder.AddContainer("markitdown", "mcp/markitdown")
+ .WithArgs("--http", "--host", "0.0.0.0", "--port", "3001")
+ .WithHttpEndpoint(targetPort: 3001, name: "http");
+
var webApp = builder.AddProject("aichatweb-app");
webApp
.WithReference(chat)
@@ -18,5 +22,7 @@
webApp
.WithReference(vectorDB)
.WaitFor(vectorDB);
+webApp
+ .WithEnvironment("MARKITDOWN_MCP_URL", markitdown.GetEndpoint("http"));
builder.Build().Run();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.AppHost/aichatweb.AppHost.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.AppHost/aichatweb.AppHost.csproj
index 70239b97fa8..d29128f7763 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.AppHost/aichatweb.AppHost.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.AppHost/aichatweb.AppHost.csproj
@@ -4,7 +4,7 @@
Exe
- net9.0
+ net10.0
enable
enable
secret
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.ServiceDefaults/Extensions.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.ServiceDefaults/Extensions.cs
index b44d60b604b..8d0b0cd5d67 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.ServiceDefaults/Extensions.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.ServiceDefaults/Extensions.cs
@@ -76,7 +76,8 @@ public static TBuilder ConfigureOpenTelemetry(this TBuilder builder) w
// Uncomment the following line to enable gRPC instrumentation (requires the OpenTelemetry.Instrumentation.GrpcNetClient package)
//.AddGrpcClientInstrumentation()
.AddHttpClientInstrumentation()
- .AddSource("Experimental.Microsoft.Extensions.AI");
+ .AddSource("Experimental.Microsoft.Extensions.AI")
+ .AddSource("Experimental.Microsoft.Extensions.DataIngestion");
});
builder.AddOpenTelemetryExporters();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.ServiceDefaults/aichatweb.ServiceDefaults.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.ServiceDefaults/aichatweb.ServiceDefaults.csproj
index 474dd445fae..a70a3ca8cd4 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.ServiceDefaults/aichatweb.ServiceDefaults.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.ServiceDefaults/aichatweb.ServiceDefaults.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
enable
enable
true
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
index 8aa0ec9fd28..6fc5881c18f 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/Chat.razor
@@ -13,7 +13,7 @@
To get started, try asking about these example documents. You can replace these with your own data and replace this message.
-
+
@@ -29,10 +29,12 @@
Do not answer questions about anything else.
Use only simple markdown to format your responses.
- Use the search tool to find relevant information. When you do this, end your
+ Use the LoadDocuments tool to prepare for searches before answering any questions.
+
+ Use the Search tool to find relevant information. When you do this, end your
reply with citations in the special XML format:
- exact quote here
+ exact quote here
Always include the citation in your response if there are results.
@@ -52,7 +54,10 @@
{
statefulMessageCount = 0;
messages.Add(new(ChatRole.System, SystemPrompt));
- chatOptions.Tools = [AIFunctionFactory.Create(SearchAsync)];
+ chatOptions.Tools = [
+ AIFunctionFactory.Create(LoadDocumentsAsync),
+ AIFunctionFactory.Create(SearchAsync)
+ ];
}
private async Task AddUserMessageAsync(ChatMessage userMessage)
@@ -106,7 +111,14 @@
await chatInput!.FocusAsync();
}
- [Description("Searches for information using a phrase or keyword")]
+ [Description("Loads the documents needed for performing searches. Must be completed before a search can be executed, but only needs to be completed once.")]
+ private async Task LoadDocumentsAsync()
+ {
+ await InvokeAsync(StateHasChanged);
+ await Search.LoadDocumentsAsync();
+ }
+
+ [Description("Searches for information using a phrase or keyword. Relies on documents already being loaded.")]
private async Task> SearchAsync(
[Description("The phrase to search for.")] string searchPhrase,
[Description("If possible, specify the filename to search that file only. If not provided or empty, the search includes all files.")] string? filenameFilter = null)
@@ -114,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text}");
+ $"{result.Text}");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
index ccb5853cec4..667189beabd 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatCitation.razor
@@ -17,10 +17,7 @@
public required string File { get; set; }
[Parameter]
- public int? PageNumber { get; set; }
-
- [Parameter]
- public required string Quote { get; set; }
+ public string? Quote { get; set; }
private string? viewerUrl;
@@ -28,11 +25,15 @@
{
viewerUrl = null;
- // If you ingest other types of content besides PDF files, construct a URL to an appropriate viewer here
- if (File.EndsWith(".pdf"))
+ // If you ingest other types of content besides Markdown or PDF files, construct a URL to an appropriate viewer here
+ if (File.EndsWith(".md"))
+ {
+ viewerUrl = $"lib/markdown_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#:~:text={Uri.EscapeDataString(Quote ?? "")}";
+ }
+ else if (File.EndsWith(".pdf"))
{
var search = Quote?.Trim('.', ',', ' ', '\n', '\r', '\t', '"', '\'');
- viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#page={PageNumber}&search={HttpUtility.UrlEncode(search)}&phrase=true";
+ viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#search={HttpUtility.UrlEncode(search)}&phrase=true";
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
index 92c20c70667..e45d92ab5f9 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Components/Pages/Chat/ChatMessageItem.razor
@@ -28,11 +28,24 @@ else if (Message.Role == ChatRole.Assistant)
@foreach (var citation in citations ?? [])
{
-
+
}
}
+ else if (content is FunctionCallContent { Name: "LoadDocuments" })
+ {
+
+
+
+ Loading relevant documents (this will take a minute)...
+
+
+ }
else if (content is FunctionCallContent { Name: "Search" } fcc && fcc.Arguments?.TryGetValue("searchPhrase", out var searchPhrase) is true)
{
@@ -56,9 +69,9 @@ else if (Message.Role == ChatRole.Assistant)
@code {
private static readonly ConditionalWeakTable
SubscribersLookup = new();
- private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
+ private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
- private List<(string File, int? Page, string Quote)>? citations;
+ private List<(string File, string Quote)>? citations;
[Parameter, EditorRequired]
public required ChatMessage Message { get; set; }
@@ -88,7 +101,7 @@ else if (Message.Role == ChatRole.Assistant)
{
var matches = CitationRegex.Matches(text);
citations = matches.Any()
- ? matches.Select(m => (m.Groups["file"].Value, int.TryParse(m.Groups["page"].Value, out var page) ? page : (int?)null, m.Groups["quote"].Value)).ToList()
+ ? matches.Select(m => (m.Groups["file"].Value, m.Groups["quote"].Value)).ToList()
: null;
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Program.cs
index c67c70db5d6..0a20a05d52b 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Program.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Program.cs
@@ -16,10 +16,11 @@
.AddEmbeddingGenerator();
builder.AddQdrantClient("vectordb");
-builder.Services.AddQdrantCollection("data-aichatweb-chunks");
-builder.Services.AddQdrantCollection("data-aichatweb-documents");
-builder.Services.AddScoped();
+builder.Services.AddQdrantVectorStore();
+builder.Services.AddQdrantCollection(IngestedChunk.CollectionName);
+builder.Services.AddSingleton();
builder.Services.AddSingleton();
+builder.Services.AddKeyedSingleton("ingestion_directory", new DirectoryInfo(Path.Combine(builder.Environment.WebRootPath, "Data")));
// Applies robust HTTP resilience settings for all HttpClients in the Web project,
// not across the entire solution. It's aimed at supporting Ollama scenarios due
// to its self-hosted nature and potentially slow responses.
@@ -45,12 +46,4 @@
app.MapRazorComponents()
.AddInteractiveServerRenderMode();
-// By default, we ingest PDF files from the /wwwroot/Data directory. You can ingest from
-// other sources by implementing IIngestionSource.
-// Important: ensure that any content you ingest is trusted, as it may be reflected back
-// to users or could be a source of prompt injection risk.
-await DataIngestor.IngestDataAsync(
- app.Services,
- new PDFDirectorySource(Path.Combine(builder.Environment.WebRootPath, "Data")));
-
app.Run();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
index 0e161a6278b..b55a8b3c817 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/IngestedChunk.cs
@@ -1,24 +1,31 @@
-using Microsoft.Extensions.VectorData;
+using System.Text.Json.Serialization;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
public class IngestedChunk
{
- private const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model
- private const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
+ public const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model
+ public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
+ public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey]
+ [VectorStoreKey(StorageName = "key")]
+ [JsonPropertyName("key")]
public required Guid Key { get; set; }
- [VectorStoreData(IsIndexed = true)]
+ [VectorStoreData(StorageName = "documentid")]
+ [JsonPropertyName("documentid")]
public required string DocumentId { get; set; }
- [VectorStoreData]
- public int PageNumber { get; set; }
-
- [VectorStoreData]
+ [VectorStoreData(StorageName = "content")]
+ [JsonPropertyName("content")]
public required string Text { get; set; }
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
+ [VectorStoreData(StorageName = "context")]
+ [JsonPropertyName("context")]
+ public string? Context { get; set; }
+
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
+ [JsonPropertyName("embedding")]
public string? Vector => Text;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs
deleted file mode 100644
index 8a6ec320251..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/IngestedDocument.cs
+++ /dev/null
@@ -1,25 +0,0 @@
-using Microsoft.Extensions.VectorData;
-
-namespace aichatweb.Web.Services;
-
-public class IngestedDocument
-{
- private const int VectorDimensions = 2;
- private const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
-
- [VectorStoreKey]
- public required Guid Key { get; set; }
-
- [VectorStoreData(IsIndexed = true)]
- public required string SourceId { get; set; }
-
- [VectorStoreData]
- public required string DocumentId { get; set; }
-
- [VectorStoreData]
- public required string DocumentVersion { get; set; }
-
- // The vector is not used but required for some vector databases
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
- public ReadOnlyMemory Vector { get; set; } = new ReadOnlyMemory([0, 0]);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
index 894b85c10de..9dd366a03a5 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/DataIngestor.cs
@@ -1,58 +1,35 @@
using Microsoft.Extensions.AI;
+using Microsoft.Extensions.DataIngestion;
+using Microsoft.Extensions.DataIngestion.Chunkers;
using Microsoft.Extensions.VectorData;
+using Microsoft.ML.Tokenizers;
namespace aichatweb.Web.Services.Ingestion;
public class DataIngestor(
ILogger logger,
- VectorStoreCollection chunksCollection,
- VectorStoreCollection documentsCollection)
+ ILoggerFactory loggerFactory,
+ VectorStore vectorStore,
+ IEmbeddingGenerator> embeddingGenerator)
{
- public static async Task IngestDataAsync(IServiceProvider services, IIngestionSource source)
+ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var scope = services.CreateScope();
- var ingestor = scope.ServiceProvider.GetRequiredService();
- await ingestor.IngestDataAsync(source);
- }
-
- public async Task IngestDataAsync(IIngestionSource source)
- {
- await chunksCollection.EnsureCollectionExistsAsync();
- await documentsCollection.EnsureCollectionExistsAsync();
-
- var sourceId = source.SourceId;
- var documentsForSource = await documentsCollection.GetAsync(doc => doc.SourceId == sourceId, top: int.MaxValue).ToListAsync();
-
- var deletedDocuments = await source.GetDeletedDocumentsAsync(documentsForSource);
- foreach (var deletedDocument in deletedDocuments)
+ using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
{
- logger.LogInformation("Removing ingested data for {DocumentId}", deletedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(deletedDocument);
- await documentsCollection.DeleteAsync(deletedDocument.Key);
- }
-
- var modifiedDocuments = await source.GetNewOrModifiedDocumentsAsync(documentsForSource);
- foreach (var modifiedDocument in modifiedDocuments)
- {
- logger.LogInformation("Processing {DocumentId}", modifiedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(modifiedDocument);
-
- await documentsCollection.UpsertAsync(modifiedDocument);
-
- var newRecords = await source.CreateChunksForDocumentAsync(modifiedDocument);
- await chunksCollection.UpsertAsync(newRecords);
- }
-
- logger.LogInformation("Ingestion is up-to-date");
-
- async Task DeleteChunksForDocumentAsync(IngestedDocument document)
+ CollectionName = IngestedChunk.CollectionName,
+ DistanceFunction = IngestedChunk.VectorDistanceFunction,
+ IncrementalIngestion = false,
+ });
+
+ using var pipeline = new IngestionPipeline(
+ reader: new DocumentReader(directory),
+ chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
+ writer: writer,
+ loggerFactory: loggerFactory);
+
+ await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
{
- var documentId = document.DocumentId;
- var chunksToDelete = await chunksCollection.GetAsync(record => record.DocumentId == documentId, int.MaxValue).ToListAsync();
- if (chunksToDelete.Count != 0)
- {
- await chunksCollection.DeleteAsync(chunksToDelete.Select(r => r.Key));
- }
+ logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs
new file mode 100644
index 00000000000..60fcdbdc128
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/DocumentReader.cs
@@ -0,0 +1,42 @@
+using Microsoft.Extensions.DataIngestion;
+
+namespace aichatweb.Web.Services.Ingestion;
+
+internal sealed class DocumentReader(DirectoryInfo rootDirectory) : IngestionDocumentReader
+{
+ private readonly MarkdownReader _markdownReader = new();
+ private readonly MarkItDownMcpReader _pdfReader = new(mcpServerUri: GetMarkItDownMcpServerUrl());
+
+ public override Task ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
+ {
+ if (Path.IsPathFullyQualified(identifier))
+ {
+ // Normalize the identifier to its relative path
+ identifier = Path.GetRelativePath(rootDirectory.FullName, identifier);
+ }
+
+ mediaType = GetCustomMediaType(source) ?? mediaType;
+ return base.ReadAsync(source, identifier, mediaType, cancellationToken);
+ }
+
+ public override Task ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
+ => mediaType switch
+ {
+ "application/pdf" => _pdfReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ "text/markdown" => _markdownReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ _ => throw new InvalidOperationException($"Unsupported media type '{mediaType}'"),
+ };
+
+ private static string? GetCustomMediaType(FileInfo source)
+ => source.Extension switch
+ {
+ ".md" => "text/markdown",
+ _ => null
+ };
+
+ private static Uri GetMarkItDownMcpServerUrl()
+ {
+ var markItDownMcpUrl = $"{Environment.GetEnvironmentVariable("MARKITDOWN_MCP_URL")}/mcp";
+ return new Uri(markItDownMcpUrl);
+ }
+}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs
deleted file mode 100644
index a1c6b2191d1..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/IIngestionSource.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace aichatweb.Web.Services.Ingestion;
-
-public interface IIngestionSource
-{
- string SourceId { get; }
-
- Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> CreateChunksForDocumentAsync(IngestedDocument document);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs
deleted file mode 100644
index da043feb526..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/Ingestion/PDFDirectorySource.cs
+++ /dev/null
@@ -1,71 +0,0 @@
-using Microsoft.SemanticKernel.Text;
-using UglyToad.PdfPig;
-using UglyToad.PdfPig.Content;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;
-
-namespace aichatweb.Web.Services.Ingestion;
-
-public class PDFDirectorySource(string sourceDirectory) : IIngestionSource
-{
- public static string SourceFileId(string path) => Path.GetFileName(path);
- public static string SourceFileVersion(string path) => File.GetLastWriteTimeUtc(path).ToString("o");
-
- public string SourceId => $"{nameof(PDFDirectorySource)}:{sourceDirectory}";
-
- public Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var results = new List();
- var sourceFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var existingDocumentsById = existingDocuments.ToDictionary(d => d.DocumentId);
-
- foreach (var sourceFile in sourceFiles)
- {
- var sourceFileId = SourceFileId(sourceFile);
- var sourceFileVersion = SourceFileVersion(sourceFile);
- var existingDocumentVersion = existingDocumentsById.TryGetValue(sourceFileId, out var existingDocument) ? existingDocument.DocumentVersion : null;
- if (existingDocumentVersion != sourceFileVersion)
- {
- results.Add(new() { Key = Guid.CreateVersion7(), SourceId = SourceId, DocumentId = sourceFileId, DocumentVersion = sourceFileVersion });
- }
- }
-
- return Task.FromResult((IEnumerable)results);
- }
-
- public Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var currentFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var currentFileIds = currentFiles.ToLookup(SourceFileId);
- var deletedDocuments = existingDocuments.Where(d => !currentFileIds.Contains(d.DocumentId));
- return Task.FromResult(deletedDocuments);
- }
-
- public Task> CreateChunksForDocumentAsync(IngestedDocument document)
- {
- using var pdf = PdfDocument.Open(Path.Combine(sourceDirectory, document.DocumentId));
- var paragraphs = pdf.GetPages().SelectMany(GetPageParagraphs).ToList();
-
- return Task.FromResult(paragraphs.Select(p => new IngestedChunk
- {
- Key = Guid.CreateVersion7(),
- DocumentId = document.DocumentId,
- PageNumber = p.PageNumber,
- Text = p.Text,
- }));
- }
-
- private static IEnumerable<(int PageNumber, int IndexOnPage, string Text)> GetPageParagraphs(Page pdfPage)
- {
- var letters = pdfPage.Letters;
- var words = NearestNeighbourWordExtractor.Instance.GetWords(letters);
- var textBlocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
- var pageText = string.Join(Environment.NewLine + Environment.NewLine,
- textBlocks.Select(t => t.Text.ReplaceLineEndings(" ")));
-
-#pragma warning disable SKEXP0050 // Type is for evaluation purposes only
- return TextChunker.SplitPlainTextParagraphs([pageText], 200)
- .Select((text, index) => (pdfPage.Number, index, text));
-#pragma warning restore SKEXP0050 // Type is for evaluation purposes only
- }
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
index 044e8378595..7d8718028bd 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/Services/SemanticSearch.cs
@@ -1,12 +1,22 @@
-using Microsoft.Extensions.VectorData;
+using aichatweb.Web.Services.Ingestion;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Web.Services;
public class SemanticSearch(
- VectorStoreCollection vectorCollection)
+ VectorStoreCollection vectorCollection,
+ [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory,
+ DataIngestor dataIngestor)
{
+ private Task? _ingestionTask;
+
+ public async Task LoadDocumentsAsync() => await ( _ingestionTask ??= dataIngestor.IngestDataAsync(ingestionDirectory, searchPattern: "*.*"));
+
public async Task> SearchAsync(string text, string? documentIdFilter, int maxResults)
{
+ // Ensure documents have been loaded before searching
+ await LoadDocumentsAsync();
+
var nearest = vectorCollection.SearchAsync(text, maxResults, new VectorSearchOptions
{
Filter = documentIdFilter is { Length: > 0 } ? record => record.DocumentId == documentIdFilter : null,
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
index 2b573a14d47..a81428f6388 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/aichatweb.Web.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
enable
enable
secret
@@ -11,11 +11,13 @@
-
-
-
+
+
+
+
+
-
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md
new file mode 100644
index 00000000000..f7d042edf83
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.md
@@ -0,0 +1,193 @@
+# TrailMaster GPS Watch
+
+## 1. Introduction
+
+ExpeditionTech is proud to introduce the TrailMaster GPS Watch, a state-of-the-art outdoor electronics device designed for the most demanding outdoor adventurers. This watch is meticulously engineered to provide accurate location information, real-time sharing, and reliable mapping capabilities. The TrailMaster GPS Watch is built to withstand the harshest environmental conditions while providing users with the critical data they need to navigate through challenging terrain and optimize their outdoor experiences.
+
+### 1.1 Product Overview
+
+The TrailMaster GPS Watch is a high-performance outdoor electronics device that combines advanced GPS technology with rugged design. It features a durable, shock-resistant casing, a high-contrast display for optimal visibility in bright sunlight, and a built-in rechargeable battery for extended use in remote areas. The TrailMaster GPS Watch is equipped with a comprehensive suite of navigation tools, including topographic maps, trail tracking, and waypoint management. Additionally, it offers real-time location sharing for enhanced safety and group coordination during outdoor activities.
+
+### 1.2 Intended Use
+
+The TrailMaster GPS Watch is intended for use by experienced outdoor enthusiasts, including hikers, backpackers, trail runners, and mountaineers. This device is designed to provide essential navigation information and location tracking in remote, off-grid environments. Users can rely on the TrailMaster GPS Watch to plan and execute their outdoor adventures with confidence, knowing they have access to accurate maps, location data, and real-time sharing capabilities. Although the TrailMaster GPS Watch is engineered for extreme durability, users should always exercise caution and take appropriate safety measures when engaging in outdoor activities.
+
+#### 1.2.1 Safety Precautions
+
+Before using the TrailMaster GPS Watch, users must ensure that the device is fully charged and properly calibrated according to the manufacturer's specifications. It is essential to familiarize oneself with the watch's features, controls, and navigation functions before venturing into remote outdoor areas. Users should also be aware of potential environmental hazards, such as extreme weather conditions, rugged terrain, and limited access to emergency services. Proper planning, preparation, and adherence to best practices for outdoor safety are critical when using the TrailMaster GPS Watch.
+
+## 2. Technical Specifications
+
+### 2.1 Hardware
+
+#### 2.1.1 Processor and Memory
+
+The TrailMaster GPS Watch is equipped with a high-performance ARM Cortex processor, ensuring fast and efficient operation for all your outdoor activities. The device also comes with 4GB of internal memory, providing ample storage for maps, routes, and waypoints. The processor operates at 1.2GHz, allowing for quick map rendering and location updates.
+
+#### 2.1.2 GPS Module
+
+The GPS module in the TrailMaster GPS Watch utilizes a state-of-the-art multi-constellation receiver, supporting GPS, GLONASS, and Galileo satellite systems. This enables precise location tracking and navigation, even in challenging outdoor environments. The module also features advanced signal processing algorithms, ensuring reliable performance in remote areas with limited satellite visibility.
+
+#### 2.1.3 Sensors
+
+Equipped with a comprehensive set of sensors, the TrailMaster GPS Watch offers an array of valuable data for outdoor enthusiasts. The built-in altimeter provides accurate altitude readings, while the barometer monitors changes in air pressure to forecast weather conditions. Additionally, the watch includes a digital compass for reliable orientation, ensuring a seamless navigation experience in the great outdoors.
+
+### 2.2 Software
+
+#### 2.2.1 Operating System
+
+The TrailMaster GPS Watch runs on a proprietary operating system optimized for outdoor navigation and tracking. This custom OS combines robustness and efficiency, allowing for seamless integration with the device's hardware and sensors. The interface is designed for intuitive interaction, enabling users to access maps, location sharing, and other features with ease, even in challenging outdoor conditions.
+
+#### 2.2.2 Map Data
+
+The TrailMaster GPS Watch comes preloaded with detailed topographic maps, providing comprehensive coverage of trails, terrain, and points of interest. These maps are stored locally on the device, ensuring rapid access and smooth navigation without relying on cellular or data network connectivity. Users can also import additional map data via the dedicated USB port, expanding the watch's mapping capabilities for customized adventures.
+
+## 3. Setup and Installation
+
+### 3.1 Charging
+
+Before using your TrailMaster GPS Watch for the first time, it is essential to ensure that the device is fully charged. To charge your GPS watch, follow these steps:
+
+1. Locate the charging port cover on the back of the watch.
+2. Gently lift the cover to expose the charging port.
+3. Connect the provided USB charging cable to the port, ensuring a secure connection.
+4. Plug the other end of the cable into a power source, such as a computer or USB wall adapter.
+5. Allow the watch to charge for at least 2 hours, or until the battery indicator on the display shows it is fully charged.
+
+For optimal performance, it is recommended to fully charge the device before each use and to avoid overcharging. Please refer to the TrailMaster GPS Watch technical specifications for detailed battery charging information.
+
+### 3.2 Initial Configuration
+
+Once your TrailMaster GPS Watch is fully charged, you can proceed with the initial configuration by following these steps:
+
+1. Press and hold the power button located on the side of the watch to turn it on.
+2. Follow the on-screen instructions to select your language, set the date and time, and calibrate the GPS.
+3. Connect the watch to your smartphone using the TrailMaster companion app to enable real-time location sharing and receive notifications.
+4. Customize the watch settings, such as display preferences and map views, to suit your outdoor activities.
+
+Before using the GPS functionality, it is crucial to ensure that the watch has a clear view of the sky to acquire GPS signals. Please refer to the TrailMaster GPS Watch user guide for detailed instructions on GPS calibration and satellite acquisition.
+
+## 4. Operation
+
+### 4.1 Basic Functions
+
+The TrailMaster GPS Watch is designed for outdoor use and offers a variety of basic functions to assist users in navigating and staying connected during their adventures.
+
+#### 4.1.1 Powering On and Off
+
+To power on the TrailMaster GPS Watch, press and hold the power button (located on the right side of the watch) for 3 seconds. The watch will display the ExpeditionTech logo and then proceed to the main navigation screen. To power off the watch, press and hold the power button and select "Power Off" from the menu.
+
+#### 4.1.2 Accessing Built-in Maps
+
+The TrailMaster GPS Watch comes pre-loaded with detailed maps of various outdoor locations. To access the maps, press the map button (located on the left side of the watch) to bring up the map interface. From here, you can view your current location, set waypoints, and plan routes.
+
+#### 4.1.3 Real-time Location Sharing
+
+Utilize the real-time location sharing feature to transmit your current location to designated contacts. Press the share button (located on the top of the watch) and select the specific contacts you wish to share your location with. This feature requires a stable GPS signal and a connected smartphone with the TrailMaster app installed.
+
+### 4.2 Advanced Features
+
+The TrailMaster GPS Watch offers advanced features to enhance the user experience and provide additional functionality for outdoor enthusiasts.
+
+#### 4.2.1 Rugged Design
+
+The TrailMaster GPS Watch is built to withstand rugged outdoor conditions, including extreme temperatures, water exposure, and impact. The durable casing and reinforced strap ensure that the watch remains operational in challenging environments.
+
+#### 4.2.2 Navigation Tools
+
+In addition to basic map access, the TrailMaster GPS Watch includes advanced navigation tools such as compass, altimeter, and barometer. These tools provide vital information for navigation and weather monitoring while in the field.
+
+#### 4.2.3 Customizing Data Screens
+
+Users can customize the data screens on the TrailMaster GPS Watch to display the specific information they require during their outdoor activities. From the main menu, navigate to the settings and select "Data Screens" to adjust the layout and content of the screens.
+
+#### 4.2.4 Tracking Performance Metrics
+
+The TrailMaster GPS Watch is equipped with sensors to track performance metrics such as distance traveled, speed, elevation gain, and heart rate. Use the tracking mode to monitor these metrics in real-time or review them after completing an activity.
+
+## 5. Troubleshooting
+
+### 5.1 Diagnostic Tools and Equipment
+
+When troubleshooting the TrailMaster GPS Watch, it is essential to use specialized industrial diagnostic tools and equipment to perform accurate diagnostic tests. These tools and equipment include:
+
+- **GPS Signal Analyzer**: Use a GPS signal analyzer to check the strength and quality of the satellite signals received by the watch.
+- **RF Spectrum Analyzer**: An RF spectrum analyzer is required to analyze the radio frequency spectrum and identify any interference affecting the GPS reception.
+- **Ruggedness Test Equipment**: Perform ruggedness tests using shock and vibration equipment to ensure the watch can withstand outdoor adventures without performance issues.
+
+It is essential to use these specialized tools and equipment to accurately diagnose any issues with the TrailMaster GPS Watch and ensure optimal performance.
+
+### 5.2 Common Issues
+
+#### 5.2.1 GPS Signal Loss
+
+**Symptoms:**
+
+The watch displays "No GPS Signal" or intermittently loses GPS signal during use.
+
+**Potential Causes:**
+
+- Obstruction of satellite signals due to dense foliage, tall buildings, or natural terrain features.
+- Radio frequency interference affecting GPS reception.
+- Wear and tear on the GPS antenna or receiver.
+
+**Troubleshooting Steps:**
+
+1. Check the surroundings for any obstructions blocking satellite signals.
+2. Use an RF spectrum analyzer to identify any potential sources of interference.
+3. Perform a diagnostics test using a GPS signal analyzer to assess the strength and quality of the GPS signal received by the watch.
+4. If the issue persists, contact ExpeditionTech customer support for further assistance.
+
+#### 5.2.2 Connectivity Issues
+
+**Symptoms:**
+
+- Inability to share real-time location or connect to other devices.
+- Unreliable Bluetooth connectivity.
+
+**Potential Causes:**
+
+- Bluetooth interference from other electronic devices.
+- Signal obstruction due to environmental factors.
+- Software or firmware issues.
+
+**Troubleshooting Steps:**
+
+1. Ensure the watch is within the recommended Bluetooth range of the connected device.
+2. Identify and eliminate potential sources of Bluetooth interference in the vicinity.
+3. Update the watch's firmware to the latest version to address any software-related connectivity issues.
+4. If connectivity problems persist, perform a comprehensive diagnostics test using a specialized Bluetooth signal analyzer.
+
+By following these troubleshooting steps and using specialized diagnostic tools and equipment, users can effectively identify and address common issues with the TrailMaster GPS Watch.
+
+## 6. Maintenance and Care
+
+### 6.1 Cleaning
+
+It is important to regularly clean your TrailMaster GPS Watch to ensure optimal performance. Use a soft, damp cloth to gently wipe the watch face and straps. Do not use harsh chemicals or solvents, as these may damage the watch's rugged design and built-in maps.
+
+### 6.2 Battery Maintenance
+
+The TrailMaster GPS Watch is equipped with a rechargeable lithium-ion battery. To optimize battery life, it is recommended to fully charge the watch before each outdoor adventure. Additionally, it is important to avoid exposing the watch to extreme temperatures, as this may affect battery performance.
+
+### 6.3 Storage
+
+When not in use, store your TrailMaster GPS Watch in a cool, dry place. Avoid prolonged exposure to direct sunlight or extreme temperatures, as this may cause damage to the device.
+
+### 6.4 Software Updates
+
+To ensure that your TrailMaster GPS Watch operates efficiently, it is important to regularly check for and install software updates. These updates may include improvements to the built-in maps, real-time location sharing, and overall performance of the watch.
+
+### 6.5 Technical Diagnostics
+
+For advanced maintenance and care, it is recommended to perform technical diagnostics using specialized industrial and scientific equipment. Refer to formal industry specification codes and standards bodies for detailed instructions on performing these diagnostics.
+
+## 7. Regulatory Compliance
+
+### 7.1 FCC Compliance
+
+The ExpeditionTech TrailMaster GPS Watch complies with Part 15 of the FCC rules. Operation is subject to the following two conditions: (1) this device may not cause harmful interference, and (2) this device must accept any interference received, including interference that may cause undesired operation. Any changes or modifications not expressly approved by the party responsible for compliance could void the user's authority to operate the equipment. The user is cautioned that any changes or modifications made to this device that are not expressly approved by the manufacturer could void the user's authority to operate the equipment.
+
+### 7.2 CE Marking
+
+The CE marking on the ExpeditionTech TrailMaster GPS Watch indicates that it complies with the essential requirements of the relevant European health, safety, and environmental protection legislation. The device is in conformity with the essential requirements and other relevant provisions of Directive 1999/5/EC. The CE marking is affixed to the device to demonstrate that it meets the essential requirements for safety, health, and environmental protection. The user should only use the device in accordance with the instructions provided in the user manual.
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf
deleted file mode 100644
index c87df644c58..00000000000
Binary files a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/Data/Example_GPS_Watch.pdf and /dev/null differ
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html
new file mode 100644
index 00000000000..94447ffc47e
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.html
@@ -0,0 +1,32 @@
+
+
+
+
+ Markdown viewer
+
+
+
+
+
+
+
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs
new file mode 100644
index 00000000000..78eb819c9a8
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.Ollama_Qdrant.verified/aichatweb/aichatweb.Web/wwwroot/lib/markdown_viewer/viewer.mjs
@@ -0,0 +1,13 @@
+import { parse } from '../marked/dist/marked.esm.js';
+import purify from '../dompurify/dist/purify.es.mjs';
+
+const url = new URL(window.location);
+const fileUrl = url.searchParams.get('file');
+if (!fileUrl) {
+ throw new Error('File not specified in the URL query string');
+}
+
+var response = await fetch(fileUrl);
+var text = await response.text();
+
+document.getElementById('content').innerHTML = purify.sanitize(parse(text));
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/Chat.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/Chat.razor
index 8aa0ec9fd28..6fc5881c18f 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/Chat.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/Chat.razor
@@ -13,7 +13,7 @@
To get started, try asking about these example documents. You can replace these with your own data and replace this message.
-
+
@@ -29,10 +29,12 @@
Do not answer questions about anything else.
Use only simple markdown to format your responses.
- Use the search tool to find relevant information. When you do this, end your
+ Use the LoadDocuments tool to prepare for searches before answering any questions.
+
+ Use the Search tool to find relevant information. When you do this, end your
reply with citations in the special XML format:
- exact quote here
+ exact quote here
Always include the citation in your response if there are results.
@@ -52,7 +54,10 @@
{
statefulMessageCount = 0;
messages.Add(new(ChatRole.System, SystemPrompt));
- chatOptions.Tools = [AIFunctionFactory.Create(SearchAsync)];
+ chatOptions.Tools = [
+ AIFunctionFactory.Create(LoadDocumentsAsync),
+ AIFunctionFactory.Create(SearchAsync)
+ ];
}
private async Task AddUserMessageAsync(ChatMessage userMessage)
@@ -106,7 +111,14 @@
await chatInput!.FocusAsync();
}
- [Description("Searches for information using a phrase or keyword")]
+ [Description("Loads the documents needed for performing searches. Must be completed before a search can be executed, but only needs to be completed once.")]
+ private async Task LoadDocumentsAsync()
+ {
+ await InvokeAsync(StateHasChanged);
+ await Search.LoadDocumentsAsync();
+ }
+
+ [Description("Searches for information using a phrase or keyword. Relies on documents already being loaded.")]
private async Task> SearchAsync(
[Description("The phrase to search for.")] string searchPhrase,
[Description("If possible, specify the filename to search that file only. If not provided or empty, the search includes all files.")] string? filenameFilter = null)
@@ -114,7 +126,7 @@
await InvokeAsync(StateHasChanged);
var results = await Search.SearchAsync(searchPhrase, filenameFilter, maxResults: 5);
return results.Select(result =>
- $"{result.Text}");
+ $"{result.Text}");
}
public void Dispose()
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/ChatCitation.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/ChatCitation.razor
index ccb5853cec4..667189beabd 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/ChatCitation.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/ChatCitation.razor
@@ -17,10 +17,7 @@
public required string File { get; set; }
[Parameter]
- public int? PageNumber { get; set; }
-
- [Parameter]
- public required string Quote { get; set; }
+ public string? Quote { get; set; }
private string? viewerUrl;
@@ -28,11 +25,15 @@
{
viewerUrl = null;
- // If you ingest other types of content besides PDF files, construct a URL to an appropriate viewer here
- if (File.EndsWith(".pdf"))
+ // If you ingest other types of content besides Markdown or PDF files, construct a URL to an appropriate viewer here
+ if (File.EndsWith(".md"))
+ {
+ viewerUrl = $"lib/markdown_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#:~:text={Uri.EscapeDataString(Quote ?? "")}";
+ }
+ else if (File.EndsWith(".pdf"))
{
var search = Quote?.Trim('.', ',', ' ', '\n', '\r', '\t', '"', '\'');
- viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#page={PageNumber}&search={HttpUtility.UrlEncode(search)}&phrase=true";
+ viewerUrl = $"lib/pdf_viewer/viewer.html?file=/Data/{HttpUtility.UrlEncode(File)}#search={HttpUtility.UrlEncode(search)}&phrase=true";
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/ChatMessageItem.razor b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/ChatMessageItem.razor
index 92c20c70667..e45d92ab5f9 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/ChatMessageItem.razor
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Components/Pages/Chat/ChatMessageItem.razor
@@ -28,11 +28,24 @@ else if (Message.Role == ChatRole.Assistant)
@foreach (var citation in citations ?? [])
{
-
+
}
}
+ else if (content is FunctionCallContent { Name: "LoadDocuments" })
+ {
+
+
+
+ Loading relevant documents (this will take a minute)...
+
+
+ }
else if (content is FunctionCallContent { Name: "Search" } fcc && fcc.Arguments?.TryGetValue("searchPhrase", out var searchPhrase) is true)
{
@@ -56,9 +69,9 @@ else if (Message.Role == ChatRole.Assistant)
@code {
private static readonly ConditionalWeakTable
SubscribersLookup = new();
- private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
+ private static readonly Regex CitationRegex = new(@"(?.*?)
", RegexOptions.NonBacktracking);
- private List<(string File, int? Page, string Quote)>? citations;
+ private List<(string File, string Quote)>? citations;
[Parameter, EditorRequired]
public required ChatMessage Message { get; set; }
@@ -88,7 +101,7 @@ else if (Message.Role == ChatRole.Assistant)
{
var matches = CitationRegex.Matches(text);
citations = matches.Any()
- ? matches.Select(m => (m.Groups["file"].Value, int.TryParse(m.Groups["page"].Value, out var page) ? page : (int?)null, m.Groups["quote"].Value)).ToList()
+ ? matches.Select(m => (m.Groups["file"].Value, m.Groups["quote"].Value)).ToList()
: null;
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Program.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Program.cs
index 434e5662d6d..d7e25135462 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Program.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Program.cs
@@ -30,11 +30,12 @@
var azureAISearchEndpoint = new Uri(builder.Configuration["AzureAISearch:Endpoint"]
?? throw new InvalidOperationException("Missing configuration: AzureAISearch:Endpoint. See the README for details."));
var azureAISearchCredential = new DefaultAzureCredential();
-builder.Services.AddAzureAISearchCollection("data-aichatweb-chunks", azureAISearchEndpoint, azureAISearchCredential);
-builder.Services.AddAzureAISearchCollection("data-aichatweb-documents", azureAISearchEndpoint, azureAISearchCredential);
+builder.Services.AddAzureAISearchVectorStore(azureAISearchEndpoint, azureAISearchCredential);
+builder.Services.AddAzureAISearchCollection(IngestedChunk.CollectionName, azureAISearchEndpoint, azureAISearchCredential);
-builder.Services.AddScoped();
+builder.Services.AddSingleton();
builder.Services.AddSingleton();
+builder.Services.AddKeyedSingleton("ingestion_directory", new DirectoryInfo(Path.Combine(builder.Environment.WebRootPath, "Data")));
builder.Services.AddChatClient(chatClient).UseFunctionInvocation().UseLogging();
builder.Services.AddEmbeddingGenerator(embeddingGenerator);
@@ -55,12 +56,4 @@
app.MapRazorComponents()
.AddInteractiveServerRenderMode();
-// By default, we ingest PDF files from the /wwwroot/Data directory. You can ingest from
-// other sources by implementing IIngestionSource.
-// Important: ensure that any content you ingest is trusted, as it may be reflected back
-// to users or could be a source of prompt injection risk.
-await DataIngestor.IngestDataAsync(
- app.Services,
- new PDFDirectorySource(Path.Combine(builder.Environment.WebRootPath, "Data")));
-
app.Run();
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/IngestedChunk.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/IngestedChunk.cs
index 46270588cde..348bb5d942f 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/IngestedChunk.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/IngestedChunk.cs
@@ -1,24 +1,31 @@
-using Microsoft.Extensions.VectorData;
+using System.Text.Json.Serialization;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Services;
public class IngestedChunk
{
- private const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
- private const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
+ public const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
+ public const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
+ public const string CollectionName = "data-aichatweb-chunks";
- [VectorStoreKey]
- public required string Key { get; set; }
+ [VectorStoreKey(StorageName = "key")]
+ [JsonPropertyName("key")]
+ public required Guid Key { get; set; }
- [VectorStoreData(IsIndexed = true)]
+ [VectorStoreData(StorageName = "documentid")]
+ [JsonPropertyName("documentid")]
public required string DocumentId { get; set; }
- [VectorStoreData]
- public int PageNumber { get; set; }
-
- [VectorStoreData]
+ [VectorStoreData(StorageName = "content")]
+ [JsonPropertyName("content")]
public required string Text { get; set; }
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
+ [VectorStoreData(StorageName = "context")]
+ [JsonPropertyName("context")]
+ public string? Context { get; set; }
+
+ [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction, StorageName = "embedding")]
+ [JsonPropertyName("embedding")]
public string? Vector => Text;
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/IngestedDocument.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/IngestedDocument.cs
deleted file mode 100644
index 9b3da6058c9..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/IngestedDocument.cs
+++ /dev/null
@@ -1,25 +0,0 @@
-using Microsoft.Extensions.VectorData;
-
-namespace aichatweb.Services;
-
-public class IngestedDocument
-{
- private const int VectorDimensions = 2;
- private const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
-
- [VectorStoreKey]
- public required string Key { get; set; }
-
- [VectorStoreData(IsIndexed = true)]
- public required string SourceId { get; set; }
-
- [VectorStoreData]
- public required string DocumentId { get; set; }
-
- [VectorStoreData]
- public required string DocumentVersion { get; set; }
-
- // The vector is not used but required for some vector databases
- [VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
- public ReadOnlyMemory Vector { get; set; } = new ReadOnlyMemory([0, 0]);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/DataIngestor.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/DataIngestor.cs
index 89fe287ebed..d97b986b694 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/DataIngestor.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/DataIngestor.cs
@@ -1,58 +1,35 @@
using Microsoft.Extensions.AI;
+using Microsoft.Extensions.DataIngestion;
+using Microsoft.Extensions.DataIngestion.Chunkers;
using Microsoft.Extensions.VectorData;
+using Microsoft.ML.Tokenizers;
namespace aichatweb.Services.Ingestion;
public class DataIngestor(
ILogger logger,
- VectorStoreCollection chunksCollection,
- VectorStoreCollection documentsCollection)
+ ILoggerFactory loggerFactory,
+ VectorStore vectorStore,
+ IEmbeddingGenerator> embeddingGenerator)
{
- public static async Task IngestDataAsync(IServiceProvider services, IIngestionSource source)
+ public async Task IngestDataAsync(DirectoryInfo directory, string searchPattern)
{
- using var scope = services.CreateScope();
- var ingestor = scope.ServiceProvider.GetRequiredService();
- await ingestor.IngestDataAsync(source);
- }
-
- public async Task IngestDataAsync(IIngestionSource source)
- {
- await chunksCollection.EnsureCollectionExistsAsync();
- await documentsCollection.EnsureCollectionExistsAsync();
-
- var sourceId = source.SourceId;
- var documentsForSource = await documentsCollection.GetAsync(doc => doc.SourceId == sourceId, top: int.MaxValue).ToListAsync();
-
- var deletedDocuments = await source.GetDeletedDocumentsAsync(documentsForSource);
- foreach (var deletedDocument in deletedDocuments)
+ using var writer = new VectorStoreWriter(vectorStore, dimensionCount: IngestedChunk.VectorDimensions, new()
{
- logger.LogInformation("Removing ingested data for {DocumentId}", deletedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(deletedDocument);
- await documentsCollection.DeleteAsync(deletedDocument.Key);
- }
-
- var modifiedDocuments = await source.GetNewOrModifiedDocumentsAsync(documentsForSource);
- foreach (var modifiedDocument in modifiedDocuments)
- {
- logger.LogInformation("Processing {DocumentId}", modifiedDocument.DocumentId);
- await DeleteChunksForDocumentAsync(modifiedDocument);
-
- await documentsCollection.UpsertAsync(modifiedDocument);
-
- var newRecords = await source.CreateChunksForDocumentAsync(modifiedDocument);
- await chunksCollection.UpsertAsync(newRecords);
- }
-
- logger.LogInformation("Ingestion is up-to-date");
-
- async Task DeleteChunksForDocumentAsync(IngestedDocument document)
+ CollectionName = IngestedChunk.CollectionName,
+ DistanceFunction = IngestedChunk.VectorDistanceFunction,
+ IncrementalIngestion = false,
+ });
+
+ using var pipeline = new IngestionPipeline(
+ reader: new DocumentReader(directory),
+ chunker: new SemanticSimilarityChunker(embeddingGenerator, new(TiktokenTokenizer.CreateForModel("gpt-4o"))),
+ writer: writer,
+ loggerFactory: loggerFactory);
+
+ await foreach (var result in pipeline.ProcessAsync(directory, searchPattern))
{
- var documentId = document.DocumentId;
- var chunksToDelete = await chunksCollection.GetAsync(record => record.DocumentId == documentId, int.MaxValue).ToListAsync();
- if (chunksToDelete.Count != 0)
- {
- await chunksCollection.DeleteAsync(chunksToDelete.Select(r => r.Key));
- }
+ logger.LogInformation("Completed processing '{id}'. Succeeded: '{succeeded}'.", result.DocumentId, result.Succeeded);
}
}
}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/DocumentReader.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/DocumentReader.cs
new file mode 100644
index 00000000000..315a6ad3d53
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/DocumentReader.cs
@@ -0,0 +1,36 @@
+using Microsoft.Extensions.DataIngestion;
+
+namespace aichatweb.Services.Ingestion;
+
+internal sealed class DocumentReader(DirectoryInfo rootDirectory) : IngestionDocumentReader
+{
+ private readonly MarkdownReader _markdownReader = new();
+ private readonly PdfPigReader _pdfReader = new();
+
+ public override Task ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
+ {
+ if (Path.IsPathFullyQualified(identifier))
+ {
+ // Normalize the identifier to its relative path
+ identifier = Path.GetRelativePath(rootDirectory.FullName, identifier);
+ }
+
+ mediaType = GetCustomMediaType(source) ?? mediaType;
+ return base.ReadAsync(source, identifier, mediaType, cancellationToken);
+ }
+
+ public override Task ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
+ => mediaType switch
+ {
+ "application/pdf" => _pdfReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ "text/markdown" => _markdownReader.ReadAsync(source, identifier, mediaType, cancellationToken),
+ _ => throw new InvalidOperationException($"Unsupported media type '{mediaType}'"),
+ };
+
+ private static string? GetCustomMediaType(FileInfo source)
+ => source.Extension switch
+ {
+ ".md" => "text/markdown",
+ _ => null
+ };
+}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/IIngestionSource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/IIngestionSource.cs
deleted file mode 100644
index 540cac117e7..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/IIngestionSource.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace aichatweb.Services.Ingestion;
-
-public interface IIngestionSource
-{
- string SourceId { get; }
-
- Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments);
-
- Task> CreateChunksForDocumentAsync(IngestedDocument document);
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/PDFDirectorySource.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/PDFDirectorySource.cs
deleted file mode 100644
index 0be02a9d008..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/PDFDirectorySource.cs
+++ /dev/null
@@ -1,71 +0,0 @@
-using Microsoft.SemanticKernel.Text;
-using UglyToad.PdfPig;
-using UglyToad.PdfPig.Content;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
-using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;
-
-namespace aichatweb.Services.Ingestion;
-
-public class PDFDirectorySource(string sourceDirectory) : IIngestionSource
-{
- public static string SourceFileId(string path) => Path.GetFileName(path);
- public static string SourceFileVersion(string path) => File.GetLastWriteTimeUtc(path).ToString("o");
-
- public string SourceId => $"{nameof(PDFDirectorySource)}:{sourceDirectory}";
-
- public Task> GetNewOrModifiedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var results = new List();
- var sourceFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var existingDocumentsById = existingDocuments.ToDictionary(d => d.DocumentId);
-
- foreach (var sourceFile in sourceFiles)
- {
- var sourceFileId = SourceFileId(sourceFile);
- var sourceFileVersion = SourceFileVersion(sourceFile);
- var existingDocumentVersion = existingDocumentsById.TryGetValue(sourceFileId, out var existingDocument) ? existingDocument.DocumentVersion : null;
- if (existingDocumentVersion != sourceFileVersion)
- {
- results.Add(new() { Key = Guid.CreateVersion7().ToString(), SourceId = SourceId, DocumentId = sourceFileId, DocumentVersion = sourceFileVersion });
- }
- }
-
- return Task.FromResult((IEnumerable)results);
- }
-
- public Task> GetDeletedDocumentsAsync(IReadOnlyList existingDocuments)
- {
- var currentFiles = Directory.GetFiles(sourceDirectory, "*.pdf");
- var currentFileIds = currentFiles.ToLookup(SourceFileId);
- var deletedDocuments = existingDocuments.Where(d => !currentFileIds.Contains(d.DocumentId));
- return Task.FromResult(deletedDocuments);
- }
-
- public Task> CreateChunksForDocumentAsync(IngestedDocument document)
- {
- using var pdf = PdfDocument.Open(Path.Combine(sourceDirectory, document.DocumentId));
- var paragraphs = pdf.GetPages().SelectMany(GetPageParagraphs).ToList();
-
- return Task.FromResult(paragraphs.Select(p => new IngestedChunk
- {
- Key = Guid.CreateVersion7().ToString(),
- DocumentId = document.DocumentId,
- PageNumber = p.PageNumber,
- Text = p.Text,
- }));
- }
-
- private static IEnumerable<(int PageNumber, int IndexOnPage, string Text)> GetPageParagraphs(Page pdfPage)
- {
- var letters = pdfPage.Letters;
- var words = NearestNeighbourWordExtractor.Instance.GetWords(letters);
- var textBlocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
- var pageText = string.Join(Environment.NewLine + Environment.NewLine,
- textBlocks.Select(t => t.Text.ReplaceLineEndings(" ")));
-
-#pragma warning disable SKEXP0050 // Type is for evaluation purposes only
- return TextChunker.SplitPlainTextParagraphs([pageText], 200)
- .Select((text, index) => (pdfPage.Number, index, text));
-#pragma warning restore SKEXP0050 // Type is for evaluation purposes only
- }
-}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/PdfPigReader.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/PdfPigReader.cs
new file mode 100644
index 00000000000..f6de539eb22
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/Ingestion/PdfPigReader.cs
@@ -0,0 +1,42 @@
+using Microsoft.Extensions.DataIngestion;
+using UglyToad.PdfPig;
+using UglyToad.PdfPig.Content;
+using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
+using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor;
+
+namespace aichatweb.Services.Ingestion;
+
+internal sealed class PdfPigReader : IngestionDocumentReader
+{
+ public override Task ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
+ {
+ using var pdf = PdfDocument.Open(source);
+ var document = new IngestionDocument(identifier);
+ foreach (var page in pdf.GetPages())
+ {
+ document.Sections.Add(GetPageSection(page));
+ }
+ return Task.FromResult(document);
+ }
+
+ private static IngestionDocumentSection GetPageSection(Page pdfPage)
+ {
+ var section = new IngestionDocumentSection
+ {
+ PageNumber = pdfPage.Number,
+ };
+
+ var letters = pdfPage.Letters;
+ var words = NearestNeighbourWordExtractor.Instance.GetWords(letters);
+
+ foreach (var textBlock in DocstrumBoundingBoxes.Instance.GetBlocks(words))
+ {
+ section.Elements.Add(new IngestionDocumentParagraph(textBlock.Text)
+ {
+ Text = textBlock.Text
+ });
+ }
+
+ return section;
+ }
+}
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/SemanticSearch.cs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/SemanticSearch.cs
index 291c6c4b4a9..8072f8bcddb 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/SemanticSearch.cs
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/Services/SemanticSearch.cs
@@ -1,12 +1,22 @@
-using Microsoft.Extensions.VectorData;
+using aichatweb.Services.Ingestion;
+using Microsoft.Extensions.VectorData;
namespace aichatweb.Services;
public class SemanticSearch(
- VectorStoreCollection vectorCollection)
+ VectorStoreCollection vectorCollection,
+ [FromKeyedServices("ingestion_directory")] DirectoryInfo ingestionDirectory,
+ DataIngestor dataIngestor)
{
+ private Task? _ingestionTask;
+
+ public async Task LoadDocumentsAsync() => await ( _ingestionTask ??= dataIngestor.IngestDataAsync(ingestionDirectory, searchPattern: "*.*"));
+
public async Task> SearchAsync(string text, string? documentIdFilter, int maxResults)
{
+ // Ensure documents have been loaded before searching
+ await LoadDocumentsAsync();
+
var nearest = vectorCollection.SearchAsync(text, maxResults, new VectorSearchOptions
{
Filter = documentIdFilter is { Length: > 0 } ? record => record.DocumentId == documentIdFilter : null,
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/aichatweb.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/aichatweb.csproj
index da39bf3a4b0..c10763be0ea 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/aichatweb.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/aichatweb.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
enable
enable
secret
@@ -11,11 +11,13 @@
-
+
+
-
+
+
-
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.md b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.md
new file mode 100644
index 00000000000..f7d042edf83
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.md
@@ -0,0 +1,193 @@
+# TrailMaster GPS Watch
+
+## 1. Introduction
+
+ExpeditionTech is proud to introduce the TrailMaster GPS Watch, a state-of-the-art outdoor electronics device designed for the most demanding outdoor adventurers. This watch is meticulously engineered to provide accurate location information, real-time sharing, and reliable mapping capabilities. The TrailMaster GPS Watch is built to withstand the harshest environmental conditions while providing users with the critical data they need to navigate through challenging terrain and optimize their outdoor experiences.
+
+### 1.1 Product Overview
+
+The TrailMaster GPS Watch is a high-performance outdoor electronics device that combines advanced GPS technology with rugged design. It features a durable, shock-resistant casing, a high-contrast display for optimal visibility in bright sunlight, and a built-in rechargeable battery for extended use in remote areas. The TrailMaster GPS Watch is equipped with a comprehensive suite of navigation tools, including topographic maps, trail tracking, and waypoint management. Additionally, it offers real-time location sharing for enhanced safety and group coordination during outdoor activities.
+
+### 1.2 Intended Use
+
+The TrailMaster GPS Watch is intended for use by experienced outdoor enthusiasts, including hikers, backpackers, trail runners, and mountaineers. This device is designed to provide essential navigation information and location tracking in remote, off-grid environments. Users can rely on the TrailMaster GPS Watch to plan and execute their outdoor adventures with confidence, knowing they have access to accurate maps, location data, and real-time sharing capabilities. Although the TrailMaster GPS Watch is engineered for extreme durability, users should always exercise caution and take appropriate safety measures when engaging in outdoor activities.
+
+#### 1.2.1 Safety Precautions
+
+Before using the TrailMaster GPS Watch, users must ensure that the device is fully charged and properly calibrated according to the manufacturer's specifications. It is essential to familiarize oneself with the watch's features, controls, and navigation functions before venturing into remote outdoor areas. Users should also be aware of potential environmental hazards, such as extreme weather conditions, rugged terrain, and limited access to emergency services. Proper planning, preparation, and adherence to best practices for outdoor safety are critical when using the TrailMaster GPS Watch.
+
+## 2. Technical Specifications
+
+### 2.1 Hardware
+
+#### 2.1.1 Processor and Memory
+
+The TrailMaster GPS Watch is equipped with a high-performance ARM Cortex processor, ensuring fast and efficient operation for all your outdoor activities. The device also comes with 4GB of internal memory, providing ample storage for maps, routes, and waypoints. The processor operates at 1.2GHz, allowing for quick map rendering and location updates.
+
+#### 2.1.2 GPS Module
+
+The GPS module in the TrailMaster GPS Watch utilizes a state-of-the-art multi-constellation receiver, supporting GPS, GLONASS, and Galileo satellite systems. This enables precise location tracking and navigation, even in challenging outdoor environments. The module also features advanced signal processing algorithms, ensuring reliable performance in remote areas with limited satellite visibility.
+
+#### 2.1.3 Sensors
+
+Equipped with a comprehensive set of sensors, the TrailMaster GPS Watch offers an array of valuable data for outdoor enthusiasts. The built-in altimeter provides accurate altitude readings, while the barometer monitors changes in air pressure to forecast weather conditions. Additionally, the watch includes a digital compass for reliable orientation, ensuring a seamless navigation experience in the great outdoors.
+
+### 2.2 Software
+
+#### 2.2.1 Operating System
+
+The TrailMaster GPS Watch runs on a proprietary operating system optimized for outdoor navigation and tracking. This custom OS combines robustness and efficiency, allowing for seamless integration with the device's hardware and sensors. The interface is designed for intuitive interaction, enabling users to access maps, location sharing, and other features with ease, even in challenging outdoor conditions.
+
+#### 2.2.2 Map Data
+
+The TrailMaster GPS Watch comes preloaded with detailed topographic maps, providing comprehensive coverage of trails, terrain, and points of interest. These maps are stored locally on the device, ensuring rapid access and smooth navigation without relying on cellular or data network connectivity. Users can also import additional map data via the dedicated USB port, expanding the watch's mapping capabilities for customized adventures.
+
+## 3. Setup and Installation
+
+### 3.1 Charging
+
+Before using your TrailMaster GPS Watch for the first time, it is essential to ensure that the device is fully charged. To charge your GPS watch, follow these steps:
+
+1. Locate the charging port cover on the back of the watch.
+2. Gently lift the cover to expose the charging port.
+3. Connect the provided USB charging cable to the port, ensuring a secure connection.
+4. Plug the other end of the cable into a power source, such as a computer or USB wall adapter.
+5. Allow the watch to charge for at least 2 hours, or until the battery indicator on the display shows it is fully charged.
+
+For optimal performance, it is recommended to fully charge the device before each use and to avoid overcharging. Please refer to the TrailMaster GPS Watch technical specifications for detailed battery charging information.
+
+### 3.2 Initial Configuration
+
+Once your TrailMaster GPS Watch is fully charged, you can proceed with the initial configuration by following these steps:
+
+1. Press and hold the power button located on the side of the watch to turn it on.
+2. Follow the on-screen instructions to select your language, set the date and time, and calibrate the GPS.
+3. Connect the watch to your smartphone using the TrailMaster companion app to enable real-time location sharing and receive notifications.
+4. Customize the watch settings, such as display preferences and map views, to suit your outdoor activities.
+
+Before using the GPS functionality, it is crucial to ensure that the watch has a clear view of the sky to acquire GPS signals. Please refer to the TrailMaster GPS Watch user guide for detailed instructions on GPS calibration and satellite acquisition.
+
+## 4. Operation
+
+### 4.1 Basic Functions
+
+The TrailMaster GPS Watch is designed for outdoor use and offers a variety of basic functions to assist users in navigating and staying connected during their adventures.
+
+#### 4.1.1 Powering On and Off
+
+To power on the TrailMaster GPS Watch, press and hold the power button (located on the right side of the watch) for 3 seconds. The watch will display the ExpeditionTech logo and then proceed to the main navigation screen. To power off the watch, press and hold the power button and select "Power Off" from the menu.
+
+#### 4.1.2 Accessing Built-in Maps
+
+The TrailMaster GPS Watch comes pre-loaded with detailed maps of various outdoor locations. To access the maps, press the map button (located on the left side of the watch) to bring up the map interface. From here, you can view your current location, set waypoints, and plan routes.
+
+#### 4.1.3 Real-time Location Sharing
+
+Utilize the real-time location sharing feature to transmit your current location to designated contacts. Press the share button (located on the top of the watch) and select the specific contacts you wish to share your location with. This feature requires a stable GPS signal and a connected smartphone with the TrailMaster app installed.
+
+### 4.2 Advanced Features
+
+The TrailMaster GPS Watch offers advanced features to enhance the user experience and provide additional functionality for outdoor enthusiasts.
+
+#### 4.2.1 Rugged Design
+
+The TrailMaster GPS Watch is built to withstand rugged outdoor conditions, including extreme temperatures, water exposure, and impact. The durable casing and reinforced strap ensure that the watch remains operational in challenging environments.
+
+#### 4.2.2 Navigation Tools
+
+In addition to basic map access, the TrailMaster GPS Watch includes advanced navigation tools such as compass, altimeter, and barometer. These tools provide vital information for navigation and weather monitoring while in the field.
+
+#### 4.2.3 Customizing Data Screens
+
+Users can customize the data screens on the TrailMaster GPS Watch to display the specific information they require during their outdoor activities. From the main menu, navigate to the settings and select "Data Screens" to adjust the layout and content of the screens.
+
+#### 4.2.4 Tracking Performance Metrics
+
+The TrailMaster GPS Watch is equipped with sensors to track performance metrics such as distance traveled, speed, elevation gain, and heart rate. Use the tracking mode to monitor these metrics in real-time or review them after completing an activity.
+
+## 5. Troubleshooting
+
+### 5.1 Diagnostic Tools and Equipment
+
+When troubleshooting the TrailMaster GPS Watch, it is essential to use specialized industrial diagnostic tools and equipment to perform accurate diagnostic tests. These tools and equipment include:
+
+- **GPS Signal Analyzer**: Use a GPS signal analyzer to check the strength and quality of the satellite signals received by the watch.
+- **RF Spectrum Analyzer**: An RF spectrum analyzer is required to analyze the radio frequency spectrum and identify any interference affecting the GPS reception.
+- **Ruggedness Test Equipment**: Perform ruggedness tests using shock and vibration equipment to ensure the watch can withstand outdoor adventures without performance issues.
+
+It is essential to use these specialized tools and equipment to accurately diagnose any issues with the TrailMaster GPS Watch and ensure optimal performance.
+
+### 5.2 Common Issues
+
+#### 5.2.1 GPS Signal Loss
+
+**Symptoms:**
+
+The watch displays "No GPS Signal" or intermittently loses GPS signal during use.
+
+**Potential Causes:**
+
+- Obstruction of satellite signals due to dense foliage, tall buildings, or natural terrain features.
+- Radio frequency interference affecting GPS reception.
+- Wear and tear on the GPS antenna or receiver.
+
+**Troubleshooting Steps:**
+
+1. Check the surroundings for any obstructions blocking satellite signals.
+2. Use an RF spectrum analyzer to identify any potential sources of interference.
+3. Perform a diagnostics test using a GPS signal analyzer to assess the strength and quality of the GPS signal received by the watch.
+4. If the issue persists, contact ExpeditionTech customer support for further assistance.
+
+#### 5.2.2 Connectivity Issues
+
+**Symptoms:**
+
+- Inability to share real-time location or connect to other devices.
+- Unreliable Bluetooth connectivity.
+
+**Potential Causes:**
+
+- Bluetooth interference from other electronic devices.
+- Signal obstruction due to environmental factors.
+- Software or firmware issues.
+
+**Troubleshooting Steps:**
+
+1. Ensure the watch is within the recommended Bluetooth range of the connected device.
+2. Identify and eliminate potential sources of Bluetooth interference in the vicinity.
+3. Update the watch's firmware to the latest version to address any software-related connectivity issues.
+4. If connectivity problems persist, perform a comprehensive diagnostics test using a specialized Bluetooth signal analyzer.
+
+By following these troubleshooting steps and using specialized diagnostic tools and equipment, users can effectively identify and address common issues with the TrailMaster GPS Watch.
+
+## 6. Maintenance and Care
+
+### 6.1 Cleaning
+
+It is important to regularly clean your TrailMaster GPS Watch to ensure optimal performance. Use a soft, damp cloth to gently wipe the watch face and straps. Do not use harsh chemicals or solvents, as these may damage the watch's rugged design and built-in maps.
+
+### 6.2 Battery Maintenance
+
+The TrailMaster GPS Watch is equipped with a rechargeable lithium-ion battery. To optimize battery life, it is recommended to fully charge the watch before each outdoor adventure. Additionally, it is important to avoid exposing the watch to extreme temperatures, as this may affect battery performance.
+
+### 6.3 Storage
+
+When not in use, store your TrailMaster GPS Watch in a cool, dry place. Avoid prolonged exposure to direct sunlight or extreme temperatures, as this may cause damage to the device.
+
+### 6.4 Software Updates
+
+To ensure that your TrailMaster GPS Watch operates efficiently, it is important to regularly check for and install software updates. These updates may include improvements to the built-in maps, real-time location sharing, and overall performance of the watch.
+
+### 6.5 Technical Diagnostics
+
+For advanced maintenance and care, it is recommended to perform technical diagnostics using specialized industrial and scientific equipment. Refer to formal industry specification codes and standards bodies for detailed instructions on performing these diagnostics.
+
+## 7. Regulatory Compliance
+
+### 7.1 FCC Compliance
+
+The ExpeditionTech TrailMaster GPS Watch complies with Part 15 of the FCC rules. Operation is subject to the following two conditions: (1) this device may not cause harmful interference, and (2) this device must accept any interference received, including interference that may cause undesired operation. Any changes or modifications not expressly approved by the party responsible for compliance could void the user's authority to operate the equipment. The user is cautioned that any changes or modifications made to this device that are not expressly approved by the manufacturer could void the user's authority to operate the equipment.
+
+### 7.2 CE Marking
+
+The CE marking on the ExpeditionTech TrailMaster GPS Watch indicates that it complies with the essential requirements of the relevant European health, safety, and environmental protection legislation. The device is in conformity with the essential requirements and other relevant provisions of Directive 1999/5/EC. The CE marking is affixed to the device to demonstrate that it meets the essential requirements for safety, health, and environmental protection. The user should only use the device in accordance with the instructions provided in the user manual.
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.pdf b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.pdf
deleted file mode 100644
index c87df644c58..00000000000
Binary files a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/Data/Example_GPS_Watch.pdf and /dev/null differ
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.html b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.html
new file mode 100644
index 00000000000..94447ffc47e
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.html
@@ -0,0 +1,32 @@
+
+
+
+
+ Markdown viewer
+
+
+
+
+
+
+
+
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.mjs b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.mjs
new file mode 100644
index 00000000000..78eb819c9a8
--- /dev/null
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/aichatweb.OpenAI_AzureAISearch.verified/aichatweb/wwwroot/lib/markdown_viewer/viewer.mjs
@@ -0,0 +1,13 @@
+import { parse } from '../marked/dist/marked.esm.js';
+import purify from '../dompurify/dist/purify.es.mjs';
+
+const url = new URL(window.location);
+const fileUrl = url.searchParams.get('file');
+if (!fileUrl) {
+ throw new Error('File not specified in the URL query string');
+}
+
+var response = await fetch(fileUrl);
+var text = await response.text();
+
+document.getElementById('content').innerHTML = purify.sanitize(parse(text));
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.AotTrue.verified/mcpserver/mcpserver.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.AotTrue.verified/mcpserver/mcpserver.csproj
index 1b2dd939947..5325611c198 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.AotTrue.verified/mcpserver/mcpserver.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.AotTrue.verified/mcpserver/mcpserver.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
win-x64;win-arm64;osx-arm64;linux-x64;linux-arm64;linux-musl-x64
Exe
enable
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.Basic.verified/mcpserver/mcpserver.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.Basic.verified/mcpserver/mcpserver.csproj
index f6da2d9485e..393d0558d5e 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.Basic.verified/mcpserver/mcpserver.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.Basic.verified/mcpserver/mcpserver.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
win-x64;win-arm64;osx-arm64;linux-x64;linux-arm64;linux-musl-x64
Exe
enable
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.SelfContainedFalse.verified/mcpserver/mcpserver.csproj b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.SelfContainedFalse.verified/mcpserver/mcpserver.csproj
index a25caa73486..21b79a59f81 100644
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.SelfContainedFalse.verified/mcpserver/mcpserver.csproj
+++ b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/Snapshots/mcpserver.SelfContainedFalse.verified/mcpserver/mcpserver.csproj
@@ -1,7 +1,7 @@
- net9.0
+ net10.0
Major
Exe
enable
diff --git a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/TemplateSandbox/.gitignore b/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/TemplateSandbox/.gitignore
deleted file mode 100644
index ee80e74117d..00000000000
--- a/test/ProjectTemplates/Microsoft.Extensions.AI.Templates.IntegrationTests/TemplateSandbox/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# Template test output
-output/
diff --git a/test/Shared/JsonSchemaExporter/JsonSchemaExporterConfigurationTests.cs b/test/Shared/JsonSchemaExporter/JsonSchemaExporterConfigurationTests.cs
deleted file mode 100644
index 1d2b6caa74e..00000000000
--- a/test/Shared/JsonSchemaExporter/JsonSchemaExporterConfigurationTests.cs
+++ /dev/null
@@ -1,35 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Text.Json.Schema;
-using Xunit;
-
-namespace Microsoft.Extensions.AI.JsonSchemaExporter;
-
-public static class JsonSchemaExporterConfigurationTests
-{
- [Theory]
- [InlineData(false)]
- [InlineData(true)]
- public static void JsonSchemaExporterOptions_DefaultValues(bool useSingleton)
- {
- JsonSchemaExporterOptions configuration = useSingleton ? JsonSchemaExporterOptions.Default : new();
- Assert.False(configuration.TreatNullObliviousAsNonNullable);
- Assert.Null(configuration.TransformSchemaNode);
- }
-
- [Fact]
- public static void JsonSchemaExporterOptions_Singleton_ReturnsSameInstance()
- {
- Assert.Same(JsonSchemaExporterOptions.Default, JsonSchemaExporterOptions.Default);
- }
-
- [Theory]
- [InlineData(false)]
- [InlineData(true)]
- public static void JsonSchemaExporterOptions_TreatNullObliviousAsNonNullable(bool treatNullObliviousAsNonNullable)
- {
- JsonSchemaExporterOptions configuration = new() { TreatNullObliviousAsNonNullable = treatNullObliviousAsNonNullable };
- Assert.Equal(treatNullObliviousAsNonNullable, configuration.TreatNullObliviousAsNonNullable);
- }
-}
diff --git a/test/Shared/JsonSchemaExporter/JsonSchemaExporterTests.cs b/test/Shared/JsonSchemaExporter/JsonSchemaExporterTests.cs
deleted file mode 100644
index c83c50c1f2e..00000000000
--- a/test/Shared/JsonSchemaExporter/JsonSchemaExporterTests.cs
+++ /dev/null
@@ -1,180 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Collections.Generic;
-using System.Collections.Immutable;
-using System.Text.Json;
-using System.Text.Json.Nodes;
-using System.Text.Json.Schema;
-using System.Text.Json.Serialization;
-using System.Text.Json.Serialization.Metadata;
-#if !NET9_0_OR_GREATER
-using System.Xml.Linq;
-#endif
-using Xunit;
-using static Microsoft.Extensions.AI.JsonSchemaExporter.TestTypes;
-
-#pragma warning disable SA1402 // File may only contain a single type
-
-namespace Microsoft.Extensions.AI.JsonSchemaExporter;
-
-public abstract class JsonSchemaExporterTests
-{
- protected abstract JsonSerializerOptions Options { get; }
-
- [Theory]
- [MemberData(nameof(TestTypes.GetTestData), MemberType = typeof(TestTypes))]
- public void TestTypes_GeneratesExpectedJsonSchema(ITestData testData)
- {
- JsonSerializerOptions options = testData.Options is { } opts
- ? new(opts) { TypeInfoResolver = Options.TypeInfoResolver }
- : Options;
-
- JsonNode schema = options.GetJsonSchemaAsNode(testData.Type, (JsonSchemaExporterOptions?)testData.ExporterOptions);
- SchemaTestHelpers.AssertEqualJsonSchema(testData.ExpectedJsonSchema, schema);
- }
-
- [Theory]
- [MemberData(nameof(TestTypes.GetTestDataUsingAllValues), MemberType = typeof(TestTypes))]
- public void TestTypes_SerializedValueMatchesGeneratedSchema(ITestData testData)
- {
- JsonSerializerOptions options = testData.Options is { } opts
- ? new(opts) { TypeInfoResolver = Options.TypeInfoResolver }
- : Options;
-
- JsonNode schema = options.GetJsonSchemaAsNode(testData.Type, (JsonSchemaExporterOptions?)testData.ExporterOptions);
- JsonNode? instance = JsonSerializer.SerializeToNode(testData.Value, testData.Type, options);
- SchemaTestHelpers.AssertDocumentMatchesSchema(schema, instance);
- }
-
- [Theory]
- [InlineData(typeof(string), "string")]
- [InlineData(typeof(int[]), "array")]
- [InlineData(typeof(Dictionary), "object")]
- [InlineData(typeof(TestTypes.SimplePoco), "object")]
- public void TreatNullObliviousAsNonNullable_True_MarksAllReferenceTypesAsNonNullable(Type referenceType, string expectedType)
- {
- Assert.True(!referenceType.IsValueType);
- var config = new JsonSchemaExporterOptions { TreatNullObliviousAsNonNullable = true };
- JsonNode schema = Options.GetJsonSchemaAsNode(referenceType, config);
- JsonValue type = Assert.IsAssignableFrom(schema["type"]);
- Assert.Equal(expectedType, (string)type!);
- }
-
- [Theory]
- [InlineData(typeof(int), "integer")]
- [InlineData(typeof(double), "number")]
- [InlineData(typeof(bool), "boolean")]
- [InlineData(typeof(ImmutableArray), "array")]
- [InlineData(typeof(TestTypes.StructDictionary), "object")]
- [InlineData(typeof(TestTypes.SimpleRecordStruct), "object")]
- public void TreatNullObliviousAsNonNullable_True_DoesNotImpactNonReferenceTypes(Type referenceType, string expectedType)
- {
- Assert.True(referenceType.IsValueType);
- var config = new JsonSchemaExporterOptions { TreatNullObliviousAsNonNullable = true };
- JsonNode schema = Options.GetJsonSchemaAsNode(referenceType, config);
- JsonValue value = Assert.IsAssignableFrom(schema["type"]);
- Assert.Equal(expectedType, (string)value!);
- }
-
-#if !NET9_0_OR_GREATER // Disable until https://github.com/dotnet/runtime/pull/108764 gets backported
- [Fact]
- public void CanGenerateXElementSchema()
- {
- JsonNode schema = Options.GetJsonSchemaAsNode(typeof(XElement));
- Assert.True(schema.ToJsonString().Length < 100_000);
- }
-#endif
-
-#if !NET9_0_OR_GREATER // Disable until https://github.com/dotnet/runtime/pull/109954 gets backported
- [Fact]
- public void TransformSchemaNode_PropertiesWithCustomConverters()
- {
- // Regression test for https://github.com/dotnet/runtime/issues/109868
- List<(Type? parentType, string? propertyName, Type type)> visitedNodes = new();
- JsonSchemaExporterOptions exporterOptions = new()
- {
- TransformSchemaNode = (ctx, schema) =>
- {
-#if NET9_0_OR_GREATER
- visitedNodes.Add((ctx.PropertyInfo?.DeclaringType, ctx.PropertyInfo?.Name, ctx.TypeInfo.Type));
-#else
- visitedNodes.Add((ctx.DeclaringType, ctx.PropertyInfo?.Name, ctx.TypeInfo.Type));
-#endif
- return schema;
- }
- };
-
- List<(Type? parentType, string? propertyName, Type type)> expectedNodes =
- [
- (typeof(ClassWithPropertiesUsingCustomConverters), "Prop1", typeof(ClassWithPropertiesUsingCustomConverters.ClassWithCustomConverter1)),
- (typeof(ClassWithPropertiesUsingCustomConverters), "Prop2", typeof(ClassWithPropertiesUsingCustomConverters.ClassWithCustomConverter2)),
- (null, null, typeof(ClassWithPropertiesUsingCustomConverters)),
- ];
-
- Options.GetJsonSchemaAsNode(typeof(ClassWithPropertiesUsingCustomConverters), exporterOptions);
-
- Assert.Equal(expectedNodes, visitedNodes);
- }
-#endif
-
- [Fact]
- public void TreatNullObliviousAsNonNullable_True_DoesNotImpactObjectType()
- {
- var config = new JsonSchemaExporterOptions { TreatNullObliviousAsNonNullable = true };
- JsonNode schema = Options.GetJsonSchemaAsNode(typeof(object), config);
- Assert.False(schema is JsonObject jObj && jObj.ContainsKey("type"));
- }
-
- [Fact]
- public void TypeWithDisallowUnmappedMembers_AdditionalPropertiesFailValidation()
- {
- JsonNode schema = Options.GetJsonSchemaAsNode(typeof(TestTypes.PocoDisallowingUnmappedMembers));
- JsonNode? jsonWithUnmappedProperties = JsonNode.Parse("""{ "UnmappedProperty" : {} }""");
- SchemaTestHelpers.AssertDoesNotMatchSchema(schema, jsonWithUnmappedProperties);
- }
-
- [Fact]
- public void GetJsonSchema_NullInputs_ThrowsArgumentNullException()
- {
- Assert.Throws(() => ((JsonSerializerOptions)null!).GetJsonSchemaAsNode(typeof(int)));
- Assert.Throws(() => Options.GetJsonSchemaAsNode(type: null!));
- Assert.Throws(() => ((JsonTypeInfo)null!).GetJsonSchemaAsNode());
- }
-
- [Fact]
- public void GetJsonSchema_NoResolver_ThrowInvalidOperationException()
- {
- var options = new JsonSerializerOptions();
- Assert.Throws(() => options.GetJsonSchemaAsNode(typeof(int)));
- }
-
- [Fact]
- public void MaxDepth_SetToZero_NonTrivialSchema_ThrowsInvalidOperationException()
- {
- JsonSerializerOptions options = new(Options) { MaxDepth = 1 };
- var ex = Assert.Throws(() => options.GetJsonSchemaAsNode(typeof(TestTypes.SimplePoco)));
- Assert.Contains("The depth of the generated JSON schema exceeds the JsonSerializerOptions.MaxDepth setting.", ex.Message);
- }
-
- [Fact]
- public void ReferenceHandlePreserve_Enabled_ThrowsNotSupportedException()
- {
- var options = new JsonSerializerOptions(Options) { ReferenceHandler = ReferenceHandler.Preserve };
- options.MakeReadOnly();
-
- var ex = Assert.Throws(() => options.GetJsonSchemaAsNode(typeof(TestTypes.SimplePoco)));
- Assert.Contains("ReferenceHandler.Preserve", ex.Message);
- }
-}
-
-public sealed class ReflectionJsonSchemaExporterTests : JsonSchemaExporterTests
-{
- protected override JsonSerializerOptions Options => JsonSerializerOptions.Default;
-}
-
-public sealed class SourceGenJsonSchemaExporterTests : JsonSchemaExporterTests
-{
- protected override JsonSerializerOptions Options => TestTypes.TestTypesContext.Default.Options;
-}
diff --git a/test/Shared/Shared.Tests.csproj b/test/Shared/Shared.Tests.csproj
index b7e27306f2a..2764d5f5d5d 100644
--- a/test/Shared/Shared.Tests.csproj
+++ b/test/Shared/Shared.Tests.csproj
@@ -2,7 +2,6 @@
Microsoft.Shared.Test
Unit tests for Microsoft.Shared
- $(DefineConstants);TESTS_JSON_SCHEMA_EXPORTER_POLYFILL
diff --git a/test/TestUtilities/XUnit/ConditionalTheoryDiscoverer.cs b/test/TestUtilities/XUnit/ConditionalTheoryDiscoverer.cs
index b1e53b8ed77..e30b5206c8c 100644
--- a/test/TestUtilities/XUnit/ConditionalTheoryDiscoverer.cs
+++ b/test/TestUtilities/XUnit/ConditionalTheoryDiscoverer.cs
@@ -63,9 +63,21 @@ protected override IEnumerable CreateTestCasesForDataRow(ITestFr
}
}
- return skipReason != null ?
- base.CreateTestCasesForSkippedDataRow(discoveryOptions, testMethod, theoryAttribute, dataRow, skipReason)
- : base.CreateTestCasesForDataRow(discoveryOptions, testMethod, theoryAttribute, dataRow);
+ if (skipReason != null)
+ {
+ return base.CreateTestCasesForSkippedDataRow(discoveryOptions, testMethod, theoryAttribute, dataRow, skipReason);
+ }
+
+ // Create test cases that can handle runtime SkipTestException
+ return new[]
+ {
+ new SkippedTheoryTestCase(
+ DiagnosticMessageSink,
+ discoveryOptions.MethodDisplayOrDefault(),
+ discoveryOptions.MethodDisplayOptionsOrDefault(),
+ testMethod,
+ dataRow)
+ };
}
protected override IEnumerable CreateTestCasesForSkippedDataRow(
diff --git a/test/TestUtilities/XUnit/SkippedTheoryTestCase.cs b/test/TestUtilities/XUnit/SkippedTheoryTestCase.cs
new file mode 100644
index 00000000000..e91a8f762d5
--- /dev/null
+++ b/test/TestUtilities/XUnit/SkippedTheoryTestCase.cs
@@ -0,0 +1,49 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit.Abstractions;
+using Xunit.Sdk;
+
+namespace Microsoft.TestUtilities;
+
+///
+/// A test case for ConditionalTheory that can handle runtime SkipTestException
+/// by wrapping the message bus with SkippedTestMessageBus.
+///
+public class SkippedTheoryTestCase : XunitTestCase
+{
+ [Obsolete("Called by the de-serializer; should only be called by deriving classes for de-serialization purposes", error: true)]
+ public SkippedTheoryTestCase()
+ {
+ }
+
+ public SkippedTheoryTestCase(
+ IMessageSink diagnosticMessageSink,
+ TestMethodDisplay defaultMethodDisplay,
+ TestMethodDisplayOptions defaultMethodDisplayOptions,
+ ITestMethod testMethod,
+ object[]? testMethodArguments = null)
+ : base(diagnosticMessageSink, defaultMethodDisplay, defaultMethodDisplayOptions, testMethod, testMethodArguments)
+ {
+ }
+
+ public override async Task RunAsync(IMessageSink diagnosticMessageSink,
+ IMessageBus messageBus,
+ object[] constructorArguments,
+ ExceptionAggregator aggregator,
+ CancellationTokenSource cancellationTokenSource)
+ {
+ using SkippedTestMessageBus skipMessageBus = new(messageBus);
+ var result = await base.RunAsync(diagnosticMessageSink, skipMessageBus, constructorArguments, aggregator, cancellationTokenSource);
+ if (skipMessageBus.SkippedTestCount > 0)
+ {
+ result.Failed -= skipMessageBus.SkippedTestCount;
+ result.Skipped += skipMessageBus.SkippedTestCount;
+ }
+
+ return result;
+ }
+}
\ No newline at end of file