Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0269f52
Adding changes for vectorIndex and vectorEmbeddingPolicy
aayush3011 Mar 22, 2024
29ad391
Adding some necessary comments
aayush3011 Mar 27, 2024
cd4d8cf
Adding test case
aayush3011 Mar 28, 2024
a2f6a83
updating enum values
aayush3011 Mar 28, 2024
c4bc283
Updating test case
aayush3011 Mar 28, 2024
af99d7b
Updating test case
aayush3011 Mar 29, 2024
6d8fc9b
Updating test case
aayush3011 Mar 29, 2024
2f7112d
updating changelog
aayush3011 Mar 29, 2024
158880f
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Mar 29, 2024
bb85dd3
Updating test case
aayush3011 Mar 29, 2024
a7185d7
Merge branch 'users/akataria/vectorindexing' of https://github.com/aa…
aayush3011 Mar 29, 2024
f4c4012
Merge branch 'Azure:main' into users/akataria/vectorindexing
aayush3011 Apr 2, 2024
72a4bcd
Resolving comments
aayush3011 Apr 2, 2024
dfb3575
Resolving comments
aayush3011 Apr 2, 2024
67f51cb
Fixing test case
aayush3011 Apr 2, 2024
730f8c2
Resolving comments
aayush3011 Apr 23, 2024
ad3ac89
Resolving Comments
aayush3011 Apr 27, 2024
940c6af
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Apr 27, 2024
3eb77ea
Fixing build issues
aayush3011 Apr 27, 2024
44f4e07
Merge branch 'main' into users/akataria/vectorindexing
aayush3011 Apr 29, 2024
460f681
Resolving comments
aayush3011 Apr 30, 2024
5579dd1
Resolving Comments
aayush3011 May 1, 2024
54a2ce3
Merge branch 'users/akataria/vectorindexing' of https://github.com/aa…
aayush3011 May 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
### 4.59.0 (2024-04-27)

#### Features Added
* Added `cosmosVectorEmbeddingPolicy` in `cosmosContainerProperties` and `vectorIndexes` in `indexPolicy` to support vector search in CosmosDB - See[39379](https://github.com/Azure/azure-sdk-for-java/pull/39379)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the PR reference here seems wrong - should be 40004?

* Added public APIs `getCustomItemSerializer` and `setCustomItemSerializer` to allow customers to specify custom payload transformations or serialization settings. - See [PR 38997](https://github.com/Azure/azure-sdk-for-java/pull/38997) and [PR 39933](https://github.com/Azure/azure-sdk-for-java/pull/39933)

#### Other Changes
* Load Blackbird or Afterburner into the ObjectMapper depending upon Java version and presence of modules in classpath. Make Afterburner and Blackbird optional maven dependencies. See - [PR 39689](https://github.com/Azure/azure-sdk-for-java/pull/39689)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ public static final class Properties {
public static final String SPATIAL_INDEXES = "spatialIndexes";
public static final String TYPES = "types";

// Vector Embedding Policy
public static final String VECTOR_EMBEDDING_POLICY = "vectorEmbeddingPolicy";
public static final String VECTOR_INDEXES = "vectorIndexes";
public static final String VECTOR_EMBEDDINGS = "vectorEmbeddings";
public static final String VECTOR_INDEX_TYPE = "type";
public static final String VECTOR_DATA_TYPE = "dataType";
public static final String VECTOR_DIMENSIONS = "dimensions";
public static final String DISTANCE_FUNCTION = "distanceFunction";

// Unique index.
public static final String UNIQUE_KEY_POLICY = "uniqueKeyPolicy";
public static final String UNIQUE_KEYS = "uniqueKeys";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
import com.azure.cosmos.CosmosItemSerializer;
import com.azure.cosmos.implementation.apachecommons.lang.StringUtils;
import com.azure.cosmos.implementation.caches.SerializableWrapper;
import com.azure.cosmos.models.ClientEncryptionPolicy;
import com.azure.cosmos.models.ChangeFeedPolicy;
import com.azure.cosmos.models.ClientEncryptionPolicy;
import com.azure.cosmos.models.ComputedProperty;
import com.azure.cosmos.models.ConflictResolutionPolicy;
import com.azure.cosmos.models.CosmosVectorEmbeddingPolicy;
import com.azure.cosmos.models.IndexingPolicy;
import com.azure.cosmos.models.ModelBridgeInternal;
import com.azure.cosmos.models.PartitionKeyDefinition;
Expand All @@ -24,6 +25,8 @@
import java.util.Collection;
import java.util.Collections;

import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull;

/**
* Represents a document collection in the Azure Cosmos DB database service. A collection is a named logical container
* for documents.
Expand All @@ -40,6 +43,7 @@ public final class DocumentCollection extends Resource {
private UniqueKeyPolicy uniqueKeyPolicy;
private PartitionKeyDefinition partitionKeyDefinition;
private ClientEncryptionPolicy clientEncryptionPolicyInternal;
private CosmosVectorEmbeddingPolicy cosmosVectorEmbeddingPolicy;

/**
* Constructor.
Expand Down Expand Up @@ -410,6 +414,33 @@ public void setClientEncryptionPolicy(ClientEncryptionPolicy value) {
this.set(Constants.Properties.CLIENT_ENCRYPTION_POLICY, value, CosmosItemSerializer.DEFAULT_SERIALIZER);
}

/**
* Gets the Vector Embedding Policy containing paths for embeddings along with path-specific settings for the item
* used in performing vector search on the items in a collection in the Azure CosmosDB database service.
*
* @return the Vector Embedding Policy.
*/
public CosmosVectorEmbeddingPolicy getVectorEmbeddingPolicy() {
if (this.cosmosVectorEmbeddingPolicy == null) {
if (super.has(Constants.Properties.VECTOR_EMBEDDING_POLICY)) {
this.cosmosVectorEmbeddingPolicy = super.getObject(Constants.Properties.VECTOR_EMBEDDING_POLICY,
CosmosVectorEmbeddingPolicy.class);
}
}
return this.cosmosVectorEmbeddingPolicy;
}

/**
* Sets the Vector Embedding Policy containing paths for embeddings along with path-specific settings for the item
* used in performing vector search on the items in a collection in the Azure CosmosDB database service.
*
* @param value the Vector Embedding Policy.
*/
public void setVectorEmbeddingPolicy(CosmosVectorEmbeddingPolicy value) {
checkNotNull(value, "cosmosVectorEmbeddingPolicy cannot be null");
this.set(Constants.Properties.VECTOR_EMBEDDING_POLICY, value, CosmosItemSerializer.DEFAULT_SERIALIZER);
}

public void populatePropertyBag() {
super.populatePropertyBag();
if (this.indexingPolicy == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public CompositePathSortOrder getOrder() {
}

/**
* Gets the sort order for the composite path.
* Sets the sort order for the composite path.
* <p>
* For example if you want to run the query "SELECT * FROM c ORDER BY c.age asc, c.height desc",
* then you need to make the order for "/age" "ascending" and the order for "/height" "descending".
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,28 @@ public CosmosContainerProperties setClientEncryptionPolicy(ClientEncryptionPolic
return this;
}

/**
* Gets the Vector Embedding Policy containing paths for embeddings along with path-specific settings for the item
* used in performing vector search on the items in a collection in the Azure CosmosDB database service.
*
* @return the Vector Embedding Policy.
*/
public CosmosVectorEmbeddingPolicy getVectorEmbeddingPolicy() {
return this.documentCollection.getVectorEmbeddingPolicy();
}

/**
* Sets the Vector Embedding Policy containing paths for embeddings along with path-specific settings for the item
* used in performing vector search on the items in a collection in the Azure CosmosDB database service.
*
* @param value the Vector Embedding Policy.
* @return the CosmosContainerProperties.
*/
public CosmosContainerProperties setVectorEmbeddingPolicy(CosmosVectorEmbeddingPolicy value) {
this.documentCollection.setVectorEmbeddingPolicy(value);
return this;
}

Resource getResource() {
return this.documentCollection;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.models;

import com.fasterxml.jackson.annotation.JsonValue;

import java.util.Arrays;

/**
* Data types for the embeddings in Cosmos DB database service.
*/
public enum CosmosVectorDataType {
/**
* Represents a int8 data type.
*/
INT8("int8"),

/**
* Represents a uint8 data type.
*/
UINT8("uint8"),

/**
* Represents a float16 data type.
*/
FLOAT16("float16"),

/**
* Represents a float32 data type.
*/
FLOAT32("float32");

private final String overWireValue;

CosmosVectorDataType(String overWireValue) {
this.overWireValue = overWireValue;
}

@JsonValue
@Override
public String toString() {
return this.overWireValue;
}

/**
* Method to retrieve the enum constant by its overWireValue.
* @param value the overWire value of the enum constant
* @return the matching CosmosVectorDataType
* @throws IllegalArgumentException if no matching enum constant is found
*/
public static CosmosVectorDataType fromString(String value) {
return Arrays.stream(CosmosVectorDataType.values())
.filter(vectorDataType -> vectorDataType.toString().equalsIgnoreCase(value))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("Invalid vector data type for the vector embedding policy."));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.models;

import com.fasterxml.jackson.annotation.JsonValue;

import java.util.Arrays;

/**
* Distance Function for the embeddings in the Cosmos DB database service.
*/
public enum CosmosVectorDistanceFunction {
/**
* Represents the euclidean distance function.
*/
EUCLIDEAN("euclidean"),

/**
* Represents the cosine distance function.
*/
COSINE("cosine"),

/**
* Represents the dot product distance function.
*/
DOT_PRODUCT("dotproduct");

private final String overWireValue;

CosmosVectorDistanceFunction(String overWireValue) {
this.overWireValue = overWireValue;
}

@JsonValue
@Override
public String toString() {
return this.overWireValue;
}

/**
* Method to retrieve the enum constant by its overWireValue.
* @param value the overWire value of the enum constant
* @return the matching CosmosVectorDataType
* @throws IllegalArgumentException if no matching enum constant is found
*/
public static CosmosVectorDistanceFunction fromString(String value) {
return Arrays.stream(CosmosVectorDistanceFunction.values())
.filter(vectorDistanceFunction -> vectorDistanceFunction.toString().equalsIgnoreCase(value))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("Invalid distance function for the vector embedding policy."));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.models;

import com.azure.cosmos.implementation.Constants;
import com.azure.cosmos.implementation.JsonSerializable;
import com.azure.cosmos.implementation.apachecommons.lang.StringUtils;
import com.fasterxml.jackson.annotation.JsonProperty;
import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull;

/**
* Embedding settings within {@link CosmosVectorEmbeddingPolicy}
*/
public final class CosmosVectorEmbedding {
@JsonProperty(Constants.Properties.PATH)
private String path;
@JsonProperty(Constants.Properties.VECTOR_DATA_TYPE)
private String dataType;
@JsonProperty(Constants.Properties.VECTOR_DIMENSIONS)
private Long dimensions;
@JsonProperty(Constants.Properties.DISTANCE_FUNCTION)
private String distanceFunction;
private JsonSerializable jsonSerializable;

/**
* Constructor
*/
public CosmosVectorEmbedding() {
this.jsonSerializable = new JsonSerializable();
}

/**
* Gets the path for the cosmosVectorEmbedding.
*
* @return path
*/
public String getPath() {
return path;
}

/**
* Sets the path for the cosmosVectorEmbedding.
*
* @param path the path for the cosmosVectorEmbedding
* @return CosmosVectorEmbedding
*/
public CosmosVectorEmbedding setPath(String path) {
if (StringUtils.isEmpty(path)) {
throw new NullPointerException("embedding path is empty");
}

if (path.charAt(0) != '/' || path.lastIndexOf('/') != 0) {
throw new IllegalArgumentException("");
}

this.path = path;
return this;
}

/**
* Gets the data type for the cosmosVectorEmbedding.
*
* @return dataType
*/
public CosmosVectorDataType getDataType() {
return CosmosVectorDataType.fromString(dataType);
}

/**
* Sets the data type for the cosmosVectorEmbedding.
*
* @param dataType the data type for the cosmosVectorEmbedding
* @return CosmosVectorEmbedding
*/
public CosmosVectorEmbedding setDataType(CosmosVectorDataType dataType) {
checkNotNull(dataType, "cosmosVectorDataType cannot be null");
this.dataType = dataType.toString();
return this;
}

/**
* Gets the dimensions for the cosmosVectorEmbedding.
*
* @return dimensions
*/
public Long getDimensions() {
return dimensions;
}

/**
* Sets the dimensions for the cosmosVectorEmbedding.
*
* @param dimensions the dimensions for the cosmosVectorEmbedding
* @return CosmosVectorEmbedding
*/
public CosmosVectorEmbedding setDimensions(Long dimensions) {
checkNotNull(dimensions, "dimensions cannot be null");
if (dimensions < 1) {
throw new IllegalArgumentException("Dimensions for the embedding has to be a long value greater than 0 " +
"for the vector embedding policy");
}

this.dimensions = dimensions;
return this;
}

/**
* Gets the distanceFunction for the cosmosVectorEmbedding.
*
* @return distanceFunction
*/
public CosmosVectorDistanceFunction getDistanceFunction() {
return CosmosVectorDistanceFunction.fromString(distanceFunction);
}

/**
* Sets the distanceFunction for the cosmosVectorEmbedding.
*
* @param distanceFunction the distanceFunction for the cosmosVectorEmbedding
* @return CosmosVectorEmbedding
*/
public CosmosVectorEmbedding setDistanceFunction(CosmosVectorDistanceFunction distanceFunction) {
checkNotNull(distanceFunction, "cosmosVectorDistanceFunction cannot be empty");
this.distanceFunction = distanceFunction.toString();
return this;
}
}
Loading