Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
0269f52
Adding changes for vectorIndex and vectorEmbeddingPolicy
aayush3011 Mar 22, 2024
29ad391
Adding some necessary comments
aayush3011 Mar 27, 2024
cd4d8cf
Adding test case
aayush3011 Mar 28, 2024
a2f6a83
updating enum values
aayush3011 Mar 28, 2024
c4bc283
Updating test case
aayush3011 Mar 28, 2024
af99d7b
Updating test case
aayush3011 Mar 29, 2024
6d8fc9b
Updating test case
aayush3011 Mar 29, 2024
2f7112d
updating changelog
aayush3011 Mar 29, 2024
158880f
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Mar 29, 2024
bb85dd3
Updating test case
aayush3011 Mar 29, 2024
a7185d7
Merge branch 'users/akataria/vectorindexing' of https://github.com/aa…
aayush3011 Mar 29, 2024
f4c4012
Merge branch 'Azure:main' into users/akataria/vectorindexing
aayush3011 Apr 2, 2024
72a4bcd
Resolving comments
aayush3011 Apr 2, 2024
dfb3575
Resolving comments
aayush3011 Apr 2, 2024
67f51cb
Fixing test case
aayush3011 Apr 2, 2024
730f8c2
Resolving comments
aayush3011 Apr 23, 2024
ad3ac89
Resolving Comments
aayush3011 Apr 27, 2024
940c6af
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Apr 27, 2024
3eb77ea
Fixing build issues
aayush3011 Apr 27, 2024
44f4e07
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Apr 29, 2024
460f681
Resolving comments
aayush3011 Apr 30, 2024
5579dd1
Resolving Comments
aayush3011 May 1, 2024
54a2ce3
Merge branch 'users/akataria/vectorindexing' of https://github.com/aa…
aayush3011 May 1, 2024
528a0eb
[Cosmos][VectorIndex]Adding changes for vectorIndex and vectorEmbeddi…
aayush3011 May 2, 2024
148cba5
[Cosmos][VectorSearch] Non Streaming Order By Query (#40085)
aayush3011 May 8, 2024
df7e838
[Cosmos][VectorSearch] Non Streaming Order By Query (#40096)
aayush3011 May 9, 2024
c8de52f
[Cosmos][VectorSearch] Non Streaming Order By Query (#40098)
aayush3011 May 9, 2024
425b78f
Merge branch 'Azure:main' into users/akataria/vectorindexing
aayush3011 May 10, 2024
55efb81
Resolving comments
aayush3011 May 10, 2024
9a3b003
Resolving comments
aayush3011 May 10, 2024
52917f9
[Cosmos][VectorSearch] Non Streaming Order By Query (#40115)
aayush3011 May 10, 2024
72a7145
Merge branch 'feature/vector_search' into users/akataria/vectorindexing
aayush3011 May 10, 2024
43f1d83
Merge branch 'Azure:main' into users/akataria/vectorindexing
aayush3011 May 12, 2024
fd13d87
Merge branch 'Azure:main' into users/akataria/vectorindexing
aayush3011 May 14, 2024
4055fe8
Fixing merge issues
aayush3011 May 14, 2024
62931db
Fixing merge issues
aayush3011 May 14, 2024
f7f7d08
Fixing build
aayush3011 May 15, 2024
bce71de
Fixing build
aayush3011 May 15, 2024
0a0a26a
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 May 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
### 4.58.0-beta.1 (Unreleased)

#### Features Added
* Added public APIs `setMaxMicroBatchSize` and `getMaxMicroBatchSize` in `CosmosBulkExecutionOptions` - See [PR 39335](https://github.com/Azure/azure-sdk-for-java/pull/39335)
* Added `vectorEmbeddingPolicy` in `cosmosContainerProperties` and `vectorIndexes` in `indexPolicy` to support vector search in CosmosDB - See[39379](https://github.com/Azure/azure-sdk-for-java/pull/39379)

#### Breaking Changes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ public static final class Properties {
public static final String SPATIAL_INDEXES = "spatialIndexes";
public static final String TYPES = "types";

// Vector Embedding Policy
public static final String VECTOR_EMBEDDING_POLICY = "vectorEmbeddingPolicy";
public static final String VECTOR_INDEXES = "vectorIndexes";
public static final String VECTOR_EMBEDDINGS = "vectorEmbeddings";
public static final String VECTOR_INDEX_TYPE = "type";
public static final String VECTOR_DATA_TYPE = "dataType";
public static final String VECTOR_DIMENSIONS = "dimensions";
public static final String DISTANCE_FUNCTION = "distanceFunction";

// Unique index.
public static final String UNIQUE_KEY_POLICY = "uniqueKeyPolicy";
public static final String UNIQUE_KEYS = "uniqueKeys";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.azure.cosmos.models.ModelBridgeInternal;
import com.azure.cosmos.models.PartitionKeyDefinition;
import com.azure.cosmos.models.UniqueKeyPolicy;
import com.azure.cosmos.models.VectorEmbeddingPolicy;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
Expand Down Expand Up @@ -41,6 +42,7 @@ public final class DocumentCollection extends Resource {
private UniqueKeyPolicy uniqueKeyPolicy;
private PartitionKeyDefinition partitionKeyDefinition;
private ClientEncryptionPolicy clientEncryptionPolicyInternal;
private VectorEmbeddingPolicy vectorEmbeddingPolicy;

/**
* Constructor.
Expand Down Expand Up @@ -411,6 +413,36 @@ public void setClientEncryptionPolicy(ClientEncryptionPolicy value) {
setProperty(this, Constants.Properties.CLIENT_ENCRYPTION_POLICY, value);
}

/**
* Gets the Vector Embedding Policy containing paths for embeddings along with path-specific settings for the item
* used in performing vector search on the items in a collection in the Azure CosmosDB database service.
*
* @return the Vector Embedding Policy.
*/
public VectorEmbeddingPolicy getVectorEmbeddingPolicy() {
if (this.vectorEmbeddingPolicy == null) {
if (super.has(Constants.Properties.VECTOR_EMBEDDING_POLICY)) {
this.vectorEmbeddingPolicy = super.getObject(Constants.Properties.VECTOR_EMBEDDING_POLICY,
VectorEmbeddingPolicy.class);
}
}
return this.vectorEmbeddingPolicy;
}

/**
* Sets the Vector Embedding Policy containing paths for embeddings along with path-specific settings for the item
* used in performing vector search on the items in a collection in the Azure CosmosDB database service.
*
* @param value the Vector Embedding Policy.
*/
public void setVectorEmbeddingPolicy(VectorEmbeddingPolicy value) {
if (value == null) {
throw new IllegalArgumentException("VectorEmbeddingPolicy cannot be null.");
}
this.vectorEmbeddingPolicy = value;
setProperty(this, Constants.Properties.VECTOR_EMBEDDING_POLICY, value);
}

public void populatePropertyBag() {
super.populatePropertyBag();
if (this.indexingPolicy == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public CompositePathSortOrder getOrder() {
}

/**
* Gets the sort order for the composite path.
* Sets the sort order for the composite path.
* <p>
* For example if you want to run the query "SELECT * FROM c ORDER BY c.age asc, c.height desc",
* then you need to make the order for "/age" "ascending" and the order for "/height" "descending".
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,28 @@ public CosmosContainerProperties setClientEncryptionPolicy(ClientEncryptionPolic
return this;
}

/**
* Gets the Vector Embedding Policy containing paths for embeddings along with path-specific settings for the item
* used in performing vector search on the items in a collection in the Azure CosmosDB database service.
*
* @return the Vector Embedding Policy.
*/
public VectorEmbeddingPolicy getVectorEmbeddingPolicy() {
return this.documentCollection.getVectorEmbeddingPolicy();
}

/**
* Sets the Vector Embedding Policy containing paths for embeddings along with path-specific settings for the item
* used in performing vector search on the items in a collection in the Azure CosmosDB database service.
*
* @param value the Vector Embedding Policy.
* @return the CosmosContainerProperties.
*/
public CosmosContainerProperties setVectorEmbeddingPolicy(VectorEmbeddingPolicy value) {
this.documentCollection.setVectorEmbeddingPolicy(value);
return this;
}

Resource getResource() {
return this.documentCollection;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.models;

/**
* Distance Function for the embeddings in the Cosmos DB database service.
*/
public enum DistanceFunction {
/**
* Represents the euclidean distance function.
*/
EUCLIDEAN("euclidean"),

/**
* Represents the cosine distance function.
*/
COSINE("cosine"),

/**
* Represents the dot product distance function.
*/
DOT_PRODUCT("dotproduct");

private final String overWireValue;

DistanceFunction(String overWireValue) {
this.overWireValue = overWireValue;
}

@Override
public String toString() {
return this.overWireValue;
}

/**
*
* @return value for the enum
*/
public String getValue() {
return this.overWireValue;
}

/**
*
* @return if the value for the enum is empty or not.
*/
public boolean isEmpty() {
return this.overWireValue.isEmpty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.models;

import com.azure.cosmos.implementation.Constants;
import com.fasterxml.jackson.annotation.JsonProperty;

/**
* Embedding settings within {@link VectorEmbeddingPolicy}
*/
public final class Embedding {
@JsonProperty(Constants.Properties.PATH)
private String path;
@JsonProperty(Constants.Properties.VECTOR_DATA_TYPE)
private String vectorDataType;
@JsonProperty(Constants.Properties.VECTOR_DIMENSIONS)
private Long dimensions;
@JsonProperty(Constants.Properties.DISTANCE_FUNCTION)
private String distanceFunction;

/**
* Gets the path for the embedding.
*
* @return path
*/
public String getPath() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a use case to make the getters as public APIs? Usually we start with only setters as public and only make getters public if customers really need them.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Retaining the getters as they are essential for consistency. Therefore, even if getPath is not currently utilized, we should keep it here, as per Fabian's suggestion.

return path;
}

/**
* Sets the path for the embedding.
*
* @param path the path for the embedding
* @return Embedding
*/
public Embedding setPath(String path) {
this.path = path;
return this;
}

/**
* Gets the data type for the embedding.
*
* @return vectorDataType
*/
public String getVectorDataType() {
return vectorDataType;
}

/**
* Sets the data type for the embedding.
*
* @param vectorDataType the data type for the embedding
* @return Embedding
*/
public Embedding setVectorDataType(String vectorDataType) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as for DistanceFunction - we probably don't need to make VectorDataType as public class.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same to Distance Function, VectorDataType class can be used to set the values when creating a new Embedding.

CosmosVectorEmbedding embedding1 = new CosmosVectorEmbedding(
            "/vector1",
            CosmosVectorDistanceFunction.COSINE.toString(),
            3L,
            CosmosVectorDataType.FLOAT32.toString());

this.vectorDataType = vectorDataType;
return this;
}

/**
* Gets the dimensions for the embedding.
*
* @return dimensions
*/
public Long getDimensions() {
return dimensions;
}

/**
* Sets the dimensions for the embedding.
*
* @param dimensions the dimensions for the embedding
* @return Embedding
*/
public Embedding setDimensions(Long dimensions) {
this.dimensions = dimensions;
return this;
}

/**
* Gets the distanceFunction for the embedding.
*
* @return distanceFunction
*/
public String getDistanceFunction() {
return distanceFunction;
}

/**
* Sets the distanceFunction for the embedding.
*
* @param distanceFunction the distanceFunction for the embedding
* @return Embedding
*/
public Embedding setDistanceFunction(String distanceFunction) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If setting and getting distanceFunction is being done by String, then why are we making DistanceFunction class public?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The DistanceFunction class can be used to set the values when creating a new Embedding.

CosmosVectorEmbedding embedding1 = new CosmosVectorEmbedding(
            "/vector1",
            CosmosVectorDistanceFunction.COSINE.toString(),
            3L,
            CosmosVectorDataType.FLOAT32.toString());

this.distanceFunction = distanceFunction;
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@
*/
public final class IndexingPolicy {
private static final String DEFAULT_PATH = "/*";

private final JsonSerializable jsonSerializable;
private List<IncludedPath> includedPaths;
private List<ExcludedPath> excludedPaths;
private List<List<CompositePath>> compositeIndexes;
private List<SpatialSpec> spatialIndexes;

private JsonSerializable jsonSerializable;
private List<VectorIndexSpec> vectorIndexes;

/**
* Constructor.
Expand Down Expand Up @@ -53,7 +52,7 @@ public IndexingPolicy() {
* </pre>
*
* @param defaultIndexOverrides comma separated set of indexes that serve as default index specifications for the
* root path.
* root path.
* @throws IllegalArgumentException throws when defaultIndexOverrides is null
*/
IndexingPolicy(Index[] defaultIndexOverrides) {
Expand Down Expand Up @@ -234,7 +233,7 @@ public IndexingPolicy setCompositeIndexes(List<List<CompositePath>> compositeInd
}

/**
* Sets the spatial indexes for additional indexes.
* Gets the spatial indexes for additional indexes.
*
* @return the spatial indexes.
*/
Expand Down Expand Up @@ -262,11 +261,55 @@ public IndexingPolicy setSpatialIndexes(List<SpatialSpec> spatialIndexes) {
return this;
}

/**
* Gets the vector indexes.
*
* @return the vector indexes
*/
public List<VectorIndexSpec> getVectorIndexes() {
if (this.vectorIndexes == null) {
this.vectorIndexes = this.jsonSerializable.getList(Constants.Properties.VECTOR_INDEXES, VectorIndexSpec.class);

if (this.vectorIndexes == null) {
this.vectorIndexes = new ArrayList<VectorIndexSpec>();
}
}

return this.vectorIndexes;
}

/**
* Sets the vector indexes.
*
* Example of the vectorIndexes:
* "vectorIndexes": [
* {
* "path": "/vector1",
* "type": "diskANN"
* },
* {
* "path": "/vector1",
* "type": "flat"
* },
* {
* "path": "/vector2",
* "type": "quantizedFlat"
* }]
*
* @param vectorIndexes the vector indexes
* @return the Indexing Policy.
*/
public IndexingPolicy setVectorIndexes(List<VectorIndexSpec> vectorIndexes) {
this.vectorIndexes = vectorIndexes;
this.jsonSerializable.set(Constants.Properties.VECTOR_INDEXES,this.vectorIndexes);
return this;
}

void populatePropertyBag() {
this.jsonSerializable.populatePropertyBag();
// If indexing mode is not 'none' and not paths are set, set them to the defaults
if (this.getIndexingMode() != IndexingMode.NONE && this.getIncludedPaths().size() == 0
&& this.getExcludedPaths().size() == 0) {
&& this.getExcludedPaths().size() == 0) {
IncludedPath includedPath = new IncludedPath(IndexingPolicy.DEFAULT_PATH);
this.getIncludedPaths().add(includedPath);
}
Expand All @@ -286,5 +329,7 @@ void populatePropertyBag() {
}
}

JsonSerializable getJsonSerializable() { return this.jsonSerializable; }
JsonSerializable getJsonSerializable() {
return this.jsonSerializable;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,8 @@ public static <T> void populatePropertyBag(T t) {
((PartitionKeyDefinition) t).populatePropertyBag();
} else if (t instanceof SpatialSpec) {
((SpatialSpec) t).populatePropertyBag();
} else if (t instanceof VectorIndexSpec) {
((VectorIndexSpec) t).populatePropertyBag();
} else if (t instanceof SqlParameter) {
((SqlParameter) t).populatePropertyBag();
} else if (t instanceof SqlQuerySpec) {
Expand Down Expand Up @@ -631,6 +633,8 @@ public static <T> JsonSerializable getJsonSerializable(T t) {
return ((PartitionKeyDefinition) t).getJsonSerializable();
} else if (t instanceof SpatialSpec) {
return ((SpatialSpec) t).getJsonSerializable();
} else if (t instanceof VectorIndexSpec) {
return ((VectorIndexSpec) t).getJsonSerializable();
} else if (t instanceof SqlParameter) {
return ((SqlParameter) t).getJsonSerializable();
} else if (t instanceof SqlQuerySpec) {
Expand Down
Loading