Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add query plan baseline tests for vector search
  • Loading branch information
neildsh committed Jun 10, 2024
commit 9fed309ccaead2b7a46840d652fe08b3e67155c7
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
<Results>
<Result>
<Input>
<Description>Euclidean Distance</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.Embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.Embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Descending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.Embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.Embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.Embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.Embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
<Result>
<Input>
<Description>Cosine Similarity</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.Embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.Embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Descending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.Embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.Embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.Embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.Embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
<Result>
<Input>
<Description>Dot Product</Description>
<Query>SELECT TOP 10 c.title AS Title, VectorDistance(c.Embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.Embedding, @vectorEmbedding, true)</Query>
<PartitionKeys>
<Key>/PartitionKey</Key>
</PartitionKeys>
<PartitionKeyType>Hash</PartitionKeyType>
<GeospatialType>Geography</GeospatialType>
<QueryParameters><![CDATA[[
{
"name": "@vectorEmbedding",
"value": [
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184
]
}
]]]></QueryParameters>
</Input>
<Output>
<PartitionedQueryExecutionInfoInternal>
<QueryInfo>
<DistinctType>None</DistinctType>
<Top>10</Top>
<Offset />
<Limit />
<GroupByExpressions />
<OrderBy>
<SortOrder>Descending</SortOrder>
</OrderBy>
<OrderByExpressions>
<OrderByExpression>VectorDistance(c.Embedding, @vectorEmbedding, true)</OrderByExpression>
</OrderByExpressions>
<Aggregates />
<GroupByAliasToAggregateType />
<GroupByAliases />
<HasSelectValue>False</HasSelectValue>
</QueryInfo>
<QueryRanges>
<Range>
<Range>[[],"Infinity")</Range>
</Range>
</QueryRanges>
<RewrittenQuery><![CDATA[SELECT TOP 10 c._rid, [{"item": VectorDistance(c.Embedding, @vectorEmbedding, true)}] AS orderByItems, {"Title": c.title, "SimilarityScore": VectorDistance(c.Embedding, @vectorEmbedding, true)} AS payload
FROM c
WHERE ({documentdb-formattableorderbyquery-filter})
ORDER BY VectorDistance(c.Embedding, @vectorEmbedding, true)]]></RewrittenQuery>
</PartitionedQueryExecutionInfoInternal>
</Output>
</Result>
</Results>
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,9 @@
<None Update="BaselineTest\TestBaseline\QueryPlanBaselineTests.Top.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="BaselineTest\TestBaseline\QueryPlanBaselineTests.VectorSearch.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="BaselineTest\TestBaseline\OptimisticDirectExecutionQueryBaselineTests.PositiveOptimisticDirectExecutionOutput.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1342,14 +1342,70 @@ public void Spatial()
PartitionKeyDefinition pkDefinitions = CreateHashPartitionKey("/key");
return new List<QueryPlanBaselineTestInput>
{
new QueryPlanBaselineTestInput($"{variation.Description} Geography", pkDefinitions, new SqlQuerySpec(variation.Query)) { GeospatialType = Cosmos.GeospatialType.Geography },
new QueryPlanBaselineTestInput($"{variation.Description} Geometry", pkDefinitions, new SqlQuerySpec(variation.Query)) { GeospatialType = Cosmos.GeospatialType.Geometry }
new QueryPlanBaselineTestInput($"{variation.Description} Geography", pkDefinitions, vectorEmbeddingPolicy: null, new SqlQuerySpec(variation.Query), Cosmos.GeospatialType.Geography),
new QueryPlanBaselineTestInput($"{variation.Description} Geometry", pkDefinitions, vectorEmbeddingPolicy : null, new SqlQuerySpec(variation.Query), Cosmos.GeospatialType.Geometry)
};
})
.ToList();

this.ExecuteTestSuite(testVariations);
}
}

[TestMethod]
[Owner("ndeshpan")]
public void VectorSearch()
{
List<QueryPlanBaselineTestInput> testCases = new List<QueryPlanBaselineTestInput>
{
MakeVectorTest("Euclidean Distance", Cosmos.DistanceFunction.Euclidean),
MakeVectorTest("Cosine Similarity", Cosmos.DistanceFunction.Cosine),
MakeVectorTest("Dot Product", Cosmos.DistanceFunction.DotProduct),
};


this.ExecuteTestSuite(testCases);
}

private static QueryPlanBaselineTestInput MakeVectorTest(string description, Cosmos.DistanceFunction distanceFunction)
{
PartitionKeyDefinition partitionKeyDefinition = CreateHashPartitionKey("/PartitionKey");

Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy = new Cosmos.VectorEmbeddingPolicy(new Collection<Cosmos.Embedding>
{
new Cosmos.Embedding
{
Path = "/embedding",
DataType = Cosmos.VectorDataType.Float32,
Dimensions = 8,
DistanceFunction = distanceFunction
}
});

string queryText = @"SELECT TOP 10 c.title AS Title, VectorDistance(c.Embedding, @vectorEmbedding, true) AS SimilarityScore
FROM c
ORDER BY VectorDistance(c.Embedding, @vectorEmbedding, true)";

SqlQuerySpec sqlQuerySpec = new SqlQuerySpec(
queryText,
new SqlParameterCollection(new SqlParameter[] { new SqlParameter("@vectorEmbedding", VectorEmbedding) }));

return new QueryPlanBaselineTestInput(
description,
partitionKeyDefinition,
vectorEmbeddingPolicy,
sqlQuerySpec,
Cosmos.GeospatialType.Geography);
}

private static readonly double[] VectorEmbedding = new double[] {
0.0039695268496870995,
0.027338456362485886,
-0.005676387343555689,
-0.013547309674322605,
-0.002445543883368373,
0.01579204574227333,
-0.016796082258224487,
-0.012471556663513184 };

private static PartitionKeyDefinition CreateHashPartitionKey(
params string[] partitionKeys) => new PartitionKeyDefinition()
Expand Down Expand Up @@ -1440,7 +1496,7 @@ public override QueryPlanBaselineTestOutput ExecuteTest(QueryPlanBaselineTestInp
TryCatch<PartitionedQueryExecutionInfoInternal> info = QueryPartitionProviderTestInstance.Object.TryGetPartitionedQueryExecutionInfoInternal(
JsonConvert.SerializeObject(input.SqlQuerySpec),
input.PartitionKeyDefinition,
vectorEmbeddingPolicy: null,
input.VectorEmbeddingPolicy,
requireFormattableOrderByQuery: true,
isContinuationExpected: false,
allowNonValueAggregateQuery: true,
Expand All @@ -1460,18 +1516,36 @@ public override QueryPlanBaselineTestOutput ExecuteTest(QueryPlanBaselineTestInp

public sealed class QueryPlanBaselineTestInput : BaselineTestInput
{
internal PartitionKeyDefinition PartitionKeyDefinition { get; set; }
internal SqlQuerySpec SqlQuerySpec { get; set; }
internal Cosmos.GeospatialType? GeospatialType { get; set; }
internal PartitionKeyDefinition PartitionKeyDefinition { get; }

internal Cosmos.VectorEmbeddingPolicy VectorEmbeddingPolicy { get; }

internal SqlQuerySpec SqlQuerySpec { get; }

internal Cosmos.GeospatialType? GeospatialType { get; }

internal QueryPlanBaselineTestInput(
string description,
PartitionKeyDefinition partitionKeyDefinition,
SqlQuerySpec sqlQuerySpec)
: base(description)
: this(description, partitionKeyDefinition, vectorEmbeddingPolicy: null, sqlQuerySpec, geospatialType: null)
{
this.PartitionKeyDefinition = partitionKeyDefinition;
this.SqlQuerySpec = sqlQuerySpec;
}

internal QueryPlanBaselineTestInput(
string description,
PartitionKeyDefinition partitionKeyDefinition,
Cosmos.VectorEmbeddingPolicy vectorEmbeddingPolicy,
SqlQuerySpec sqlQuerySpec,
Cosmos.GeospatialType? geospatialType)
: base(description)
{
this.PartitionKeyDefinition = partitionKeyDefinition;
this.VectorEmbeddingPolicy = vectorEmbeddingPolicy;
this.SqlQuerySpec = sqlQuerySpec;
this.GeospatialType = geospatialType;
}

public override void SerializeAsXml(XmlWriter xmlWriter)
Expand Down