Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address PR feedback: upgrade to ModelContextProtocol.Core 0.4.0-previ…
…ew.3, add McpClientOptions, refactor code

Co-authored-by: adamsitnik <[email protected]>
  • Loading branch information
Copilot and adamsitnik committed Nov 7, 2025
commit ab68e3acc37a87fdff6c5c1ab4e8e4de83164967
2 changes: 1 addition & 1 deletion eng/packages/General.props
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
<PackageVersion Include="Microsoft.Extensions.VectorData.Abstractions" Version="$(MicrosoftExtensionsVectorDataAbstractionsVersion)" />
<PackageVersion Include="Microsoft.IO.RecyclableMemoryStream" Version="3.0.0" />
<PackageVersion Include="Microsoft.ML.Tokenizers" Version="$(MicrosoftMLTokenizersVersion)" />
<PackageVersion Include="ModelContextProtocol" Version="0.4.0-preview.2" />
<PackageVersion Include="ModelContextProtocol.Core" Version="0.4.0-preview.3" />
<PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
<PackageVersion Include="OllamaSharp" Version="5.1.9" />
<PackageVersion Include="OpenAI" Version="2.6.0" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@ namespace Microsoft.Extensions.DataIngestion;
public class MarkItDownMcpReader : IngestionDocumentReader
{
private readonly Uri _mcpServerUri;
private readonly McpClientOptions? _options;

/// <summary>
/// Initializes a new instance of the <see cref="MarkItDownMcpReader"/> class.
/// </summary>
/// <param name="mcpServerUri">The URI of the MarkItDown MCP server (e.g., http://localhost:3001/sse).</param>
public MarkItDownMcpReader(Uri mcpServerUri)
/// <param name="mcpServerUri">The URI of the MarkItDown MCP server (e.g., http://localhost:3001/mcp).</param>
/// <param name="options">Optional MCP client options for configuring the connection.</param>
public MarkItDownMcpReader(Uri mcpServerUri, McpClientOptions? options = null)
{
_mcpServerUri = Throw.IfNull(mcpServerUri);
_options = options;
}

/// <inheritdoc/>
Expand All @@ -44,16 +47,14 @@ public override async Task<IngestionDocument> ReadAsync(FileInfo source, string
byte[] fileBytes = await File.ReadAllBytesAsync(source.FullName, cancellationToken).ConfigureAwait(false);
#else
byte[] fileBytes;
using (FileStream fs = new(source.FullName, FileMode.Open, FileAccess.Read, FileShare.Read))
using (FileStream fs = new(source.FullName, FileMode.Open, FileAccess.Read, FileShare.Read, 1, FileOptions.Asynchronous))
{
using MemoryStream ms = new();
await fs.CopyToAsync(ms).ConfigureAwait(false);
fileBytes = ms.ToArray();
}
#endif
string base64Content = Convert.ToBase64String(fileBytes);
string mimeType = string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType!;
string dataUri = $"data:{mimeType};base64,{base64Content}";
string dataUri = CreateDataUri(fileBytes, mediaType);

string markdown = await ConvertToMarkdownAsync(dataUri, cancellationToken).ConfigureAwait(false);

Expand All @@ -74,29 +75,30 @@ public override async Task<IngestionDocument> ReadAsync(Stream source, string id
await source.CopyToAsync(ms).ConfigureAwait(false);
#endif
byte[] fileBytes = ms.ToArray();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot, once you switch to using DataContent, it accepts a Readonlymemory, so rather than using ToArray, you can pass in a Readonlymemory created from MemoryStream's GetBuffer and Length

string base64Content = Convert.ToBase64String(fileBytes);
string mimeType = string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType;
string dataUri = $"data:{mimeType};base64,{base64Content}";
string dataUri = CreateDataUri(fileBytes, mediaType);

string markdown = await ConvertToMarkdownAsync(dataUri, cancellationToken).ConfigureAwait(false);

return MarkdownParser.Parse(markdown, identifier);
}

private static string CreateDataUri(byte[] fileBytes, string? mediaType)
Copy link
Member

@stephentoub stephentoub Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot, you already have a reference (indirectly) to the Microsoft.Extensions.AI.Abstractions package, which provides a DataContent type. DataContent implicitly supports data uris. You code user or instead like new DataContent(bytes, mime Type).Uri.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot please send a new PR that adreses this feedback

{
string base64Content = Convert.ToBase64String(fileBytes);
string mimeType = string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType!;
return $"data:{mimeType};base64,{base64Content}";
}

private async Task<string> ConvertToMarkdownAsync(string dataUri, CancellationToken cancellationToken)
{
// Create HTTP client transport for MCP
#pragma warning disable CA2007 // Consider calling ConfigureAwait on the awaited task - await using pattern
await using var transport = new HttpClientTransport(new HttpClientTransportOptions
{
Endpoint = _mcpServerUri
});
#pragma warning restore CA2007

// Create MCP client
#pragma warning disable CA2007 // Consider calling ConfigureAwait on the awaited task
await using var client = await McpClient.CreateAsync(transport, cancellationToken: cancellationToken).ConfigureAwait(false);
#pragma warning restore CA2007 // Consider calling ConfigureAwait on the awaited task
await using var client = await McpClient.CreateAsync(transport, _options, loggerFactory: null, cancellationToken).ConfigureAwait(false);

// Build parameters for convert_to_markdown tool
var parameters = new Dictionary<string, object?>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

<ItemGroup>
<PackageReference Include="Markdig.Signed" />
<PackageReference Include="ModelContextProtocol" />
<PackageReference Include="ModelContextProtocol.Core" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ using Microsoft.Extensions.DataIngestion;

// Connect to a MarkItDown MCP server (e.g., running in Docker)
IngestionDocumentReader reader =
new MarkItDownMcpReader(new Uri("http://localhost:3001/sse"));
new MarkItDownMcpReader(new Uri("http://localhost:3001/mcp"));

using IngestionPipeline<string> pipeline = new(reader, CreateChunker(), CreateWriter());
```

The MarkItDown MCP server can be run using Docker:

```bash
docker run -p 3001:3001 mcp/markitdown
docker run -p 3001:3001 mcp/markitdown --http --host 0.0.0.0 --port 3001
```

Or installed via pip:
Expand All @@ -60,6 +60,32 @@ pip install markitdown-mcp-server
markitdown-mcp --http --host 0.0.0.0 --port 3001
```

### Integrating with Aspire

Aspire can be used for seamless integration with [MarkItDown MCP](https://github.com/microsoft/markitdown/tree/main/packages/markitdown-mcp). Sample AppHost logic:

```csharp
var builder = DistributedApplication.CreateBuilder(args);

var markitdown = builder.AddContainer("markitdown", "mcp/markitdown")
.WithArgs("--http", "--host", "0.0.0.0", "--port", "3001")
.WithHttpEndpoint(targetPort: 3001, name: "http");

var webApp = builder.AddProject("name");

webApp.WithEnvironment("MARKITDOWN_MCP_URL", markitdown.GetEndpoint("http"));

builder.Build().Run();
```

Sample Ingestion Service:

```csharp
string url = $"{Environment.GetEnvironmentVariable("MARKITDOWN_MCP_URL")}/mcp";

IngestionDocumentReader reader = new MarkItDownMcpReader(new Uri(url));
```

## Feedback & Contributing

We welcome feedback and contributions in [our GitHub repo](https://github.com/dotnet/extensions).
Loading