-
Notifications
You must be signed in to change notification settings - Fork 849
Add MarkItDownMcpReader for MCP server support #7025
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
3464cb2
9ee6a70
55eddcf
f91f70b
ab68e3a
b8e70dc
dceabf6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…ew.3, add McpClientOptions, refactor code Co-authored-by: adamsitnik <[email protected]>
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,14 +18,17 @@ namespace Microsoft.Extensions.DataIngestion; | |
| public class MarkItDownMcpReader : IngestionDocumentReader | ||
| { | ||
| private readonly Uri _mcpServerUri; | ||
| private readonly McpClientOptions? _options; | ||
|
|
||
| /// <summary> | ||
| /// Initializes a new instance of the <see cref="MarkItDownMcpReader"/> class. | ||
| /// </summary> | ||
| /// <param name="mcpServerUri">The URI of the MarkItDown MCP server (e.g., http://localhost:3001/sse).</param> | ||
| public MarkItDownMcpReader(Uri mcpServerUri) | ||
| /// <param name="mcpServerUri">The URI of the MarkItDown MCP server (e.g., http://localhost:3001/mcp).</param> | ||
| /// <param name="options">Optional MCP client options for configuring the connection.</param> | ||
| public MarkItDownMcpReader(Uri mcpServerUri, McpClientOptions? options = null) | ||
| { | ||
| _mcpServerUri = Throw.IfNull(mcpServerUri); | ||
| _options = options; | ||
| } | ||
|
|
||
| /// <inheritdoc/> | ||
|
|
@@ -44,16 +47,14 @@ public override async Task<IngestionDocument> ReadAsync(FileInfo source, string | |
| byte[] fileBytes = await File.ReadAllBytesAsync(source.FullName, cancellationToken).ConfigureAwait(false); | ||
| #else | ||
| byte[] fileBytes; | ||
| using (FileStream fs = new(source.FullName, FileMode.Open, FileAccess.Read, FileShare.Read)) | ||
| using (FileStream fs = new(source.FullName, FileMode.Open, FileAccess.Read, FileShare.Read, 1, FileOptions.Asynchronous)) | ||
| { | ||
| using MemoryStream ms = new(); | ||
| await fs.CopyToAsync(ms).ConfigureAwait(false); | ||
| fileBytes = ms.ToArray(); | ||
| } | ||
| #endif | ||
| string base64Content = Convert.ToBase64String(fileBytes); | ||
| string mimeType = string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType!; | ||
| string dataUri = $"data:{mimeType};base64,{base64Content}"; | ||
| string dataUri = CreateDataUri(fileBytes, mediaType); | ||
|
|
||
| string markdown = await ConvertToMarkdownAsync(dataUri, cancellationToken).ConfigureAwait(false); | ||
|
|
||
|
|
@@ -74,29 +75,30 @@ public override async Task<IngestionDocument> ReadAsync(Stream source, string id | |
| await source.CopyToAsync(ms).ConfigureAwait(false); | ||
| #endif | ||
| byte[] fileBytes = ms.ToArray(); | ||
| string base64Content = Convert.ToBase64String(fileBytes); | ||
| string mimeType = string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType; | ||
| string dataUri = $"data:{mimeType};base64,{base64Content}"; | ||
| string dataUri = CreateDataUri(fileBytes, mediaType); | ||
|
|
||
| string markdown = await ConvertToMarkdownAsync(dataUri, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| return MarkdownParser.Parse(markdown, identifier); | ||
| } | ||
|
|
||
| private static string CreateDataUri(byte[] fileBytes, string? mediaType) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot, you already have a reference (indirectly) to the Microsoft.Extensions.AI.Abstractions package, which provides a DataContent type. DataContent implicitly supports data uris. You code user or instead like
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot please send a new PR that adreses this feedback |
||
| { | ||
| string base64Content = Convert.ToBase64String(fileBytes); | ||
| string mimeType = string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType!; | ||
| return $"data:{mimeType};base64,{base64Content}"; | ||
| } | ||
|
|
||
| private async Task<string> ConvertToMarkdownAsync(string dataUri, CancellationToken cancellationToken) | ||
| { | ||
| // Create HTTP client transport for MCP | ||
| #pragma warning disable CA2007 // Consider calling ConfigureAwait on the awaited task - await using pattern | ||
| await using var transport = new HttpClientTransport(new HttpClientTransportOptions | ||
| { | ||
| Endpoint = _mcpServerUri | ||
| }); | ||
| #pragma warning restore CA2007 | ||
|
|
||
| // Create MCP client | ||
| #pragma warning disable CA2007 // Consider calling ConfigureAwait on the awaited task | ||
| await using var client = await McpClient.CreateAsync(transport, cancellationToken: cancellationToken).ConfigureAwait(false); | ||
| #pragma warning restore CA2007 // Consider calling ConfigureAwait on the awaited task | ||
| await using var client = await McpClient.CreateAsync(transport, _options, loggerFactory: null, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| // Build parameters for convert_to_markdown tool | ||
| var parameters = new Dictionary<string, object?> | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot, once you switch to using DataContent, it accepts a Readonlymemory, so rather than using ToArray, you can pass in a Readonlymemory created from MemoryStream's GetBuffer and Length