Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
0269f52
Adding changes for vectorIndex and vectorEmbeddingPolicy
aayush3011 Mar 22, 2024
29ad391
Adding some necessary comments
aayush3011 Mar 27, 2024
cd4d8cf
Adding test case
aayush3011 Mar 28, 2024
a2f6a83
updating enum values
aayush3011 Mar 28, 2024
c4bc283
Updating test case
aayush3011 Mar 28, 2024
af99d7b
Updating test case
aayush3011 Mar 29, 2024
6d8fc9b
Updating test case
aayush3011 Mar 29, 2024
2f7112d
updating changelog
aayush3011 Mar 29, 2024
158880f
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Mar 29, 2024
bb85dd3
Updating test case
aayush3011 Mar 29, 2024
a7185d7
Merge branch 'users/akataria/vectorindexing' of https://github.com/aa…
aayush3011 Mar 29, 2024
f4c4012
Merge branch 'Azure:main' into users/akataria/vectorindexing
aayush3011 Apr 2, 2024
72a4bcd
Resolving comments
aayush3011 Apr 2, 2024
dfb3575
Resolving comments
aayush3011 Apr 2, 2024
67f51cb
Fixing test case
aayush3011 Apr 2, 2024
730f8c2
Resolving comments
aayush3011 Apr 23, 2024
ad3ac89
Resolving Comments
aayush3011 Apr 27, 2024
940c6af
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Apr 27, 2024
3eb77ea
Fixing build issues
aayush3011 Apr 27, 2024
44f4e07
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Apr 29, 2024
460f681
Resolving comments
aayush3011 Apr 30, 2024
5579dd1
Resolving Comments
aayush3011 May 1, 2024
54a2ce3
Merge branch 'users/akataria/vectorindexing' of https://github.com/aa…
aayush3011 May 1, 2024
528a0eb
[Cosmos][VectorIndex]Adding changes for vectorIndex and vectorEmbeddi…
aayush3011 May 2, 2024
148cba5
[Cosmos][VectorSearch] Non Streaming Order By Query (#40085)
aayush3011 May 8, 2024
df7e838
[Cosmos][VectorSearch] Non Streaming Order By Query (#40096)
aayush3011 May 9, 2024
c8de52f
[Cosmos][VectorSearch] Non Streaming Order By Query (#40098)
aayush3011 May 9, 2024
425b78f
Merge branch 'Azure:main' into users/akataria/vectorindexing
aayush3011 May 10, 2024
55efb81
Resolving comments
aayush3011 May 10, 2024
9a3b003
Resolving comments
aayush3011 May 10, 2024
52917f9
[Cosmos][VectorSearch] Non Streaming Order By Query (#40115)
aayush3011 May 10, 2024
72a7145
Merge branch 'feature/vector_search' into users/akataria/vectorindexing
aayush3011 May 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Resolving comments
  • Loading branch information
aayush3011 committed Apr 2, 2024
commit 72a4bcd0601bd5e36ae0e8593fefc7059dcb96f2
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.rx;

import com.azure.cosmos.ConsistencyLevel;
Expand All @@ -11,19 +14,17 @@
import com.azure.cosmos.implementation.TestConfigurations;
import com.azure.cosmos.implementation.guava25.collect.ImmutableList;
import com.azure.cosmos.models.CosmosContainerProperties;
import com.azure.cosmos.models.DistanceFunction;
import com.azure.cosmos.models.Embedding;
import com.azure.cosmos.models.CosmosVectorDataType;
import com.azure.cosmos.models.CosmosVectorDistanceFunction;
import com.azure.cosmos.models.CosmosVectorEmbedding;
import com.azure.cosmos.models.ExcludedPath;
import com.azure.cosmos.models.IncludedPath;
import com.azure.cosmos.models.IndexingMode;
import com.azure.cosmos.models.IndexingPolicy;
import com.azure.cosmos.models.PartitionKeyDefinition;
import com.azure.cosmos.models.VectorDataType;
import com.azure.cosmos.models.VectorEmbeddingPolicy;
import com.azure.cosmos.models.VectorIndexSpec;
import com.azure.cosmos.models.VectorIndexType;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.fail;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.AfterClass;
Expand All @@ -37,6 +38,9 @@
import java.util.List;
import java.util.UUID;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.fail;

@Ignore("TODO: Ignore these test cases until the public emulator with vector indexes is released.")
public class VectorIndexTest extends TestSuiteBase {
protected static final int TIMEOUT = 30000;
Expand All @@ -48,7 +52,7 @@ public class VectorIndexTest extends TestSuiteBase {
private CosmosAsyncClient client;
private CosmosAsyncDatabase database;

@BeforeClass(groups = {"long"}, timeOut = SETUP_TIMEOUT)
@BeforeClass(groups = {"emulator"}, timeOut = SETUP_TIMEOUT)
public void before_UniqueIndexTest() {
// set up the client
client = new CosmosClientBuilder()
Expand All @@ -62,13 +66,13 @@ public void before_UniqueIndexTest() {
database = createDatabase(client, databaseId);
}

@AfterClass(groups = {"long"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true)
@AfterClass(groups = {"emulator"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true)
public void afterClass() {
safeDeleteDatabase(database);
safeClose(client);
}

@Test(groups = {"long"}, timeOut = TIMEOUT)
@Test(groups = {"emulator"}, timeOut = TIMEOUT)
public void shouldCreateVectorEmbeddingPolicy() {
PartitionKeyDefinition partitionKeyDef = new PartitionKeyDefinition();
ArrayList<String> paths = new ArrayList<String>();
Expand Down Expand Up @@ -99,7 +103,7 @@ public void shouldCreateVectorEmbeddingPolicy() {
validateCollectionProperties(collectionDefinition, collectionProperties);
}

@Test(groups = {"long"}, timeOut = TIMEOUT)
@Test(groups = {"emulator"}, timeOut = TIMEOUT)
public void shouldFailOnEmptyVectorEmbeddingPolicy() {
PartitionKeyDefinition partitionKeyDef = new PartitionKeyDefinition();
ArrayList<String> paths = new ArrayList<String>();
Expand All @@ -118,7 +122,7 @@ public void shouldFailOnEmptyVectorEmbeddingPolicy() {
indexingPolicy.setIncludedPaths(ImmutableList.of(includedPath1, includedPath2));

VectorIndexSpec vectorIndexSpec = new VectorIndexSpec("/vector1");
vectorIndexSpec.setType(VectorIndexType.FLAT.getValue());
vectorIndexSpec.setType(VectorIndexType.FLAT.toString());
indexingPolicy.setVectorIndexes(ImmutableList.of(vectorIndexSpec));

collectionDefinition.setIndexingPolicy(indexingPolicy);
Expand All @@ -132,7 +136,7 @@ public void shouldFailOnEmptyVectorEmbeddingPolicy() {
}
}

@Test(groups = {"long"}, timeOut = TIMEOUT)
@Test(groups = {"emulator"}, timeOut = TIMEOUT)
public void shouldFailOnWrongVectorEmbeddingPolicy() {
PartitionKeyDefinition partitionKeyDef = new PartitionKeyDefinition();
ArrayList<String> paths = new ArrayList<String>();
Expand All @@ -151,15 +155,15 @@ public void shouldFailOnWrongVectorEmbeddingPolicy() {
indexingPolicy.setIncludedPaths(ImmutableList.of(includedPath1, includedPath2));

VectorIndexSpec vectorIndexSpec = new VectorIndexSpec("/vector1");
vectorIndexSpec.setType(VectorIndexType.FLAT.getValue());
vectorIndexSpec.setType(VectorIndexType.FLAT.toString());
indexingPolicy.setVectorIndexes(ImmutableList.of(vectorIndexSpec));
collectionDefinition.setIndexingPolicy(indexingPolicy);

Embedding embedding = new Embedding();
embedding.setPath("/vector1");
embedding.setDistanceFunction(DistanceFunction.COSINE.getValue());
embedding.setDimensions(3L);
embedding.setVectorDataType("String");
CosmosVectorEmbedding embedding = new CosmosVectorEmbedding(
"/vector1",
CosmosVectorDistanceFunction.COSINE.toString(),
3L,
"String");

try {
VectorEmbeddingPolicy vectorEmbeddingPolicy = new VectorEmbeddingPolicy(ImmutableList.of(embedding));
Expand All @@ -178,7 +182,7 @@ public void shouldFailOnWrongVectorEmbeddingPolicy() {
assertThat(ex.getMessage()).isEqualTo("Vector data type cannot be empty for the vector embedding policy.");
}

embedding.setVectorDataType(VectorDataType.FLOAT32.getValue());
embedding.setVectorDataType(CosmosVectorDataType.FLOAT32.toString());
embedding.setDistanceFunction("COS");
try {
VectorEmbeddingPolicy vectorEmbeddingPolicy = new VectorEmbeddingPolicy(ImmutableList.of(embedding));
Expand All @@ -197,7 +201,7 @@ public void shouldFailOnWrongVectorEmbeddingPolicy() {
assertThat(ex.getMessage()).isEqualTo("Distance function cannot be empty for the vector embedding policy.");
}

embedding.setDistanceFunction(DistanceFunction.COSINE.getValue());
embedding.setDistanceFunction(CosmosVectorDistanceFunction.COSINE.toString());
embedding.setDimensions(-1L);
try {
VectorEmbeddingPolicy vectorEmbeddingPolicy = new VectorEmbeddingPolicy(ImmutableList.of(embedding));
Expand All @@ -208,7 +212,7 @@ public void shouldFailOnWrongVectorEmbeddingPolicy() {
}
}

@Test(groups = {"long"}, timeOut = TIMEOUT)
@Test(groups = {"emulator"}, timeOut = TIMEOUT)
public void shouldFailOnWrongVectorIndex() {
PartitionKeyDefinition partitionKeyDef = new PartitionKeyDefinition();
ArrayList<String> paths = new ArrayList<String>();
Expand All @@ -231,11 +235,11 @@ public void shouldFailOnWrongVectorIndex() {
indexingPolicy.setVectorIndexes(ImmutableList.of(vectorIndexSpec));
collectionDefinition.setIndexingPolicy(indexingPolicy);

Embedding embedding = new Embedding();
embedding.setPath("/vector1");
embedding.setDistanceFunction(DistanceFunction.COSINE.getValue());
embedding.setDimensions(3L);
embedding.setVectorDataType(VectorDataType.INT8.getValue());
CosmosVectorEmbedding embedding = new CosmosVectorEmbedding(
"/vector1",
CosmosVectorDistanceFunction.COSINE.toString(),
3L,
CosmosVectorDataType.INT8.toString());
VectorEmbeddingPolicy vectorEmbeddingPolicy = new VectorEmbeddingPolicy(ImmutableList.of(embedding));
collectionDefinition.setVectorEmbeddingPolicy(vectorEmbeddingPolicy);

Expand All @@ -248,7 +252,7 @@ public void shouldFailOnWrongVectorIndex() {
}
}

@Test(groups = {"long"}, timeOut = TIMEOUT)
@Test(groups = {"emulator"}, timeOut = TIMEOUT)
public void shouldCreateVectorIndexSimilarPathDifferentVectorType() {
PartitionKeyDefinition partitionKeyDef = new PartitionKeyDefinition();
ArrayList<String> paths = new ArrayList<String>();
Expand All @@ -270,7 +274,7 @@ public void shouldCreateVectorIndexSimilarPathDifferentVectorType() {
vectorIndexes.get(2).setPath("/vector2");
indexingPolicy.setVectorIndexes(vectorIndexes);

List<Embedding> embeddings = populateEmbeddings();
List<CosmosVectorEmbedding> embeddings = populateEmbeddings();
embeddings.get(2).setPath("/vector2");
VectorEmbeddingPolicy vectorEmbeddingPolicy = new VectorEmbeddingPolicy(embeddings);

Expand All @@ -286,7 +290,7 @@ public void shouldCreateVectorIndexSimilarPathDifferentVectorType() {
private void validateCollectionProperties(CosmosContainerProperties collectionDefinition, CosmosContainerProperties collectionProperties) {
assertThat(collectionProperties.getVectorEmbeddingPolicy()).isNotNull();
assertThat(collectionProperties.getVectorEmbeddingPolicy().getEmbeddings()).isNotNull();
List<Embedding> embeddings = collectionProperties.getVectorEmbeddingPolicy().getEmbeddings();
List<CosmosVectorEmbedding> embeddings = collectionProperties.getVectorEmbeddingPolicy().getEmbeddings();
assertThat(embeddings).hasSameSizeAs(collectionDefinition.getVectorEmbeddingPolicy().getEmbeddings());
for (int i = 0; i < embeddings.size(); i++) {
assertThat(embeddings.get(0).getPath()).isEqualTo(
Expand All @@ -304,35 +308,35 @@ private void validateCollectionProperties(CosmosContainerProperties collectionDe

private List<VectorIndexSpec> populateVectorIndexes() {
VectorIndexSpec vectorIndexSpec1 = new VectorIndexSpec("/vector1");
vectorIndexSpec1.setType(VectorIndexType.FLAT.getValue());
vectorIndexSpec1.setType(VectorIndexType.FLAT.toString());

VectorIndexSpec vectorIndexSpec2 = new VectorIndexSpec("/vector2");
vectorIndexSpec2.setType(VectorIndexType.QUANTIZED_FLAT.getValue());
vectorIndexSpec2.setType(VectorIndexType.QUANTIZED_FLAT.toString());

VectorIndexSpec vectorIndexSpec3 = new VectorIndexSpec("/vector3");
vectorIndexSpec3.setType(VectorIndexType.DISK_ANN.getValue());
vectorIndexSpec3.setType(VectorIndexType.DISK_ANN.toString());

return Arrays.asList(vectorIndexSpec1, vectorIndexSpec2, vectorIndexSpec3);
}

private List<Embedding> populateEmbeddings() {
Embedding embedding1 = new Embedding();
embedding1.setPath("/vector1");
embedding1.setDistanceFunction(DistanceFunction.COSINE.getValue());
embedding1.setDimensions(3L);
embedding1.setVectorDataType(VectorDataType.FLOAT32.getValue());

Embedding embedding2 = new Embedding();
embedding2.setPath("/vector2");
embedding2.setDistanceFunction(DistanceFunction.DOT_PRODUCT.getValue());
embedding2.setDimensions(3L);
embedding2.setVectorDataType(VectorDataType.INT8.getValue());

Embedding embedding3 = new Embedding();
embedding3.setPath("/vector3");
embedding3.setDistanceFunction(DistanceFunction.EUCLIDEAN.getValue());
embedding3.setDimensions(3L);
embedding3.setVectorDataType(VectorDataType.UINT8.getValue());
private List<CosmosVectorEmbedding> populateEmbeddings() {
CosmosVectorEmbedding embedding1 = new CosmosVectorEmbedding(
"/vector1",
CosmosVectorDistanceFunction.COSINE.toString(),
3L,
CosmosVectorDataType.FLOAT32.toString());

CosmosVectorEmbedding embedding2 = new CosmosVectorEmbedding(
"/vector2",
CosmosVectorDistanceFunction.DOT_PRODUCT.toString(),
3L,
CosmosVectorDataType.INT8.toString());

CosmosVectorEmbedding embedding3 = new CosmosVectorEmbedding(
"/vector3",
CosmosVectorDistanceFunction.EUCLIDEAN.toString(),
3L,
CosmosVectorDataType.UINT8.toString());
return Arrays.asList(embedding1, embedding2, embedding3);
}
}
1 change: 0 additions & 1 deletion sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
### 4.58.0-beta.1 (Unreleased)

#### Features Added
* Added public APIs `setMaxMicroBatchSize` and `getMaxMicroBatchSize` in `CosmosBulkExecutionOptions` - See [PR 39335](https://github.com/Azure/azure-sdk-for-java/pull/39335)
* Added `vectorEmbeddingPolicy` in `cosmosContainerProperties` and `vectorIndexes` in `indexPolicy` to support vector search in CosmosDB - See[39379](https://github.com/Azure/azure-sdk-for-java/pull/39379)

#### Breaking Changes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
/**
* Data types for the embeddings in Cosmos DB database service.
*/
public enum VectorDataType {
public enum CosmosVectorDataType {
/**
* Represents a int8 data type.
*/
Expand All @@ -29,28 +29,12 @@ public enum VectorDataType {

private final String overWireValue;

VectorDataType(String overWireValue) {
CosmosVectorDataType(String overWireValue) {
this.overWireValue = overWireValue;
}

@Override
public String toString() {
return this.overWireValue;
}

/**
*
* @return value for the enum
*/
public String getValue() {
return this.overWireValue;
}

/**
*
* @return if the value for the enum is empty or not.
*/
public boolean isEmpty() {
return this.overWireValue.isEmpty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
/**
* Distance Function for the embeddings in the Cosmos DB database service.
*/
public enum DistanceFunction {
public enum CosmosVectorDistanceFunction {
/**
* Represents the euclidean distance function.
*/
Expand All @@ -24,28 +24,12 @@ public enum DistanceFunction {

private final String overWireValue;

DistanceFunction(String overWireValue) {
CosmosVectorDistanceFunction(String overWireValue) {
this.overWireValue = overWireValue;
}

@Override
public String toString() {
return this.overWireValue;
}

/**
*
* @return value for the enum
*/
public String getValue() {
return this.overWireValue;
}

/**
*
* @return if the value for the enum is empty or not.
*/
public boolean isEmpty() {
return this.overWireValue.isEmpty();
}
}
Loading