Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
d0fcdff
Update changelog (#35929)
xiangyan99 Jun 5, 2024
8c581a2
Autoinstrumentation rework (#35890)
jeremydvoss Jun 5, 2024
72919a9
switch to majority entra auth for tests (#35581)
kristapratico Jun 6, 2024
428ccf2
add a new parameter allow_roleassignment_on_rg to allow/disallow role…
wenjie1070116 Jun 6, 2024
61138a7
Increment package version after release of azure-core (#35950)
azure-sdk Jun 6, 2024
e6f98bc
[Event Hubs] Update URI used for consumer auth to include consumer gr…
swathipil Jun 6, 2024
697a9bf
Allow configuration of metric Views in distro (#35932)
lzchen Jun 6, 2024
bf4ee7f
[EventHub] Update README for enable logging section (#35955)
swathipil Jun 6, 2024
215bb40
[Storage] [STG 94] Merge STG 94 into `main` branch (#35888)
weirongw23-msft Jun 6, 2024
6e6648a
Sync eng/common directory with azure-sdk-tools for PR 8377 (#35915)
azure-sdk Jun 6, 2024
a73ca09
Distro release 1.6.0 (#35935)
jeremydvoss Jun 6, 2024
ae48eea
Python client for Model-as-a-Service (MaaS) / Model-as-a-Platform (Ma…
dargilco Jun 7, 2024
610da5d
[AutoRelease] t2-datafactory-2024-06-03-75602(can only be merged by S…
azure-sdk Jun 7, 2024
5b30781
Use DOTNET_ROLL_FORWARD: 'Major' for test-proxy (#35956)
azure-sdk Jun 7, 2024
acd606f
Pin pester version to 5.5.0 (#35967)
azure-sdk Jun 7, 2024
6bb9e47
Some minor updates to package & samples README.md files (#35971)
dargilco Jun 7, 2024
0b99ee1
add aoai assistants streaming/v2 tests (#35443)
kristapratico Jun 7, 2024
9b98575
[Identity] Allow use of client assertion in OBO cred (#35812)
pvaneck Jun 7, 2024
984542f
[Identity] Disable live service principal tests (#35958)
pvaneck Jun 7, 2024
a7cb46a
set storage account access to identity-based for feature store creati…
runhli Jun 7, 2024
9b6427c
Change Workspace related PR reviewer (#35921)
debuggerXi Jun 10, 2024
8abbc26
[EG] GA Namespaces (#35831)
l0lawrence Jun 10, 2024
f24b567
Increment package version after release of azure-monitor-opentelemetr…
azure-sdk Jun 10, 2024
c9b1e27
update test for new structure of custom blocklist (#36001)
kristapratico Jun 10, 2024
ec8190c
Update github-event-processor to 1.0.0-dev.20240610.2 (#36000)
azure-sdk Jun 10, 2024
81de947
[Identity] Minor doc updates (#35974)
pvaneck Jun 10, 2024
2ac0060
allow for futher embedded snippets (#36004)
l0lawrence Jun 10, 2024
40cf085
[Monitor Query + Ingestions] Update changelogs (#35942)
pvaneck Jun 10, 2024
cb065ac
[Identity] Managed identity bug fix (#36010)
pvaneck Jun 10, 2024
fe0e014
Added release dates (#36006)
vincenttran-msft Jun 10, 2024
5b55203
Remove MayankKumar91 (#35911)
lmazuel Jun 10, 2024
adbac73
Increment package version after release of azure-identity (#36015)
azure-sdk Jun 10, 2024
10c3c79
Always run analyze weekly (#35968)
kristapratico Jun 11, 2024
433b99a
move samples (#35966)
l0lawrence Jun 11, 2024
4356326
[Key Vault] Change location for weekly China cloud tests (#36018)
mccoyp Jun 11, 2024
5fd14fe
Fix Sphinx on azure-storage-blob-changefeed (#35975)
Jun 11, 2024
dff6744
update release date (#36028)
l0lawrence Jun 11, 2024
b052da8
azure-mgmt-core shouldn't use mgmt docs build (#35936)
kristapratico Jun 11, 2024
d97ff44
Identity credential unavailable error non json imds (#36016)
xiangyan99 Jun 11, 2024
e08b3b0
Update azure-ai-inference client library to support sending images as…
dargilco Jun 11, 2024
cf49b4e
Export InputTypes from constants (#35848)
emepetres Jun 11, 2024
a79c5ab
[EG] Eventgrid Release (#36030)
l0lawrence Jun 11, 2024
47fdf5d
Fix Sphinx on azure-storage-blob (#36014)
Jun 11, 2024
1552259
[Identity] Update AzurePipelinesCredential (#35858)
pvaneck Jun 11, 2024
01fa69c
upgrade autorest.python to `6.13.19` (#36024)
msyyc Jun 12, 2024
215eb63
Increment version for monitor releases (#36036)
azure-sdk Jun 12, 2024
bfd541b
compatible with new date format (#36049)
msyyc Jun 12, 2024
5b61bd4
[AutoRelease] t2-cdn-2024-06-12-45722(can only be merged by SDK owner…
azure-sdk Jun 12, 2024
614a928
[EG] link + patch update (#36045)
l0lawrence Jun 12, 2024
80ecdfb
async with (#36060)
l0lawrence Jun 12, 2024
2aba54e
Incremental (#36040)
vincenttran-msft Jun 12, 2024
379cfd3
typo (#36062)
l0lawrence Jun 12, 2024
c5e1659
Fix prepare-pipelines line wrapping (#36061)
azure-sdk Jun 12, 2024
fda24bd
[bct] Initial refactoring breaking changes tool (#36005)
catalinaperalta Jun 12, 2024
a642e74
Update swagger_to_sdk_config_dpg.json (#36068)
msyyc Jun 13, 2024
3ce8196
Increment package version after release of azure-eventgrid (#36063)
azure-sdk Jun 13, 2024
66d5de4
Sync eng/common directory with azure-sdk-tools for PR 8388 (#35970)
azure-sdk Jun 13, 2024
15bcb99
report number of breaking changes (#36067)
catalinaperalta Jun 13, 2024
0f27374
update codeowner (#36074)
xiangyan99 Jun 13, 2024
d7bfdb0
update strict-sphinx to v7 (#36075)
kristapratico Jun 13, 2024
df9c8c7
Update spelling dependencies (#36084)
azure-sdk Jun 14, 2024
3e7dff6
[DevCenter] Update release date (#36083)
drielenr Jun 14, 2024
147746b
[Identity] Add TSG section for AzurePipelinesCredential (#36048)
pvaneck Jun 14, 2024
c19f701
Support sending image data as part of a user message, using a new Ima…
dargilco Jun 14, 2024
ee65563
update (#36051)
msyyc Jun 14, 2024
892881a
Increment package version after release of azure-ai-inference (#36091)
azure-sdk Jun 14, 2024
c6383aa
address API review comments (#36058)
Adarsh-Ramanathan Jun 14, 2024
811dc0e
Update CodeownersLinter version to 1.0.0-dev.20240614.4 (#36093)
azure-sdk Jun 14, 2024
fe435b7
[AutoRelease] t2-mobilenetwork-2024-06-05-65505(can only be merged by…
azure-sdk Jun 17, 2024
a566320
[AutoRelease] t2-storagemover-2024-06-11-87054(can only be merged by …
azure-sdk Jun 17, 2024
698cd95
code and test (#35959)
azure-sdk Jun 17, 2024
40a2625
[AutoRelease] t2-web-2024-06-07-57417(can only be merged by SDK owner…
azure-sdk Jun 17, 2024
3c833e1
Update breaking_changes_allowlist.py (#36104)
msyyc Jun 17, 2024
323fdc7
appconfig mi test (#35842)
xiangyan99 Jun 17, 2024
c51ac91
Bugfix: None was being appended to output path for batch-endpoint inv…
nagkumar91 Jun 17, 2024
cd1725e
Sync eng/common directory with azure-sdk-tools for PR 8457 (#36113)
azure-sdk Jun 17, 2024
d791bc6
Merge branch 'main' into 1.17.0-core-main-merge
MilesHolland Jun 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update azure-ai-inference client library to support sending images as…
… part of chat completions (#36022)
  • Loading branch information
dargilco authored Jun 11, 2024
commit e08b3b0f8843a38407515b91a72638aa3dd09447
10 changes: 7 additions & 3 deletions sdk/ai/azure-ai-inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ print(response.choices[0].message.content)

<!-- END SNIPPET -->

The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage` (See sample [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`).
The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See sample [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`. See [sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) for usage of `UserMessage` that
includes uploading an image.

Alternatively, you can provide the messages as dictionary instead of using the strongly typed classes like `SystemMessage` and `UserMessage`:

Expand All @@ -232,7 +233,10 @@ response = client.complete(
"role": "assistant",
"content": "The main construction of the International Space Station (ISS) was completed between 1998 and 2011. During this period, more than 30 flights by US space shuttles and 40 by Russian rockets were conducted to transport components and modules to the station.",
},
{"role": "user", "content": "And what was the estimated cost to build it?"},
{
"role": "user",
"content": "And what was the estimated cost to build it?"
},
]
}
)
Expand Down Expand Up @@ -399,7 +403,7 @@ try:
result = client.complete( ... )
except HttpResponseError as e:
print(f"Status code: {e.status_code} ({e.reason})")
print(f"{e.message}")
print(e.message)
```

For example, when you provide a wrong authentication key:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,6 @@ def _complete(
response. Required.
"model": "str", # The model used for the chat completion. Required.
"usage": {
"capacity_type": "str", # Indicates whether your capacity has been
affected by the usage amount (token count) reported here. Required. Known
values are: "usage" and "fixed".
"completion_tokens": 0, # The number of tokens generated across all
completions emissions. Required.
"prompt_tokens": 0, # The number of tokens in the provided prompts
Expand Down Expand Up @@ -678,9 +675,6 @@ def _embed(
"id": "str", # Unique identifier for the embeddings result. Required.
"model": "str", # The model ID used to generate this result. Required.
"usage": {
"capacity_type": "str", # Indicates whether your capacity has been
affected by the usage amount (token count) reported here. Required. Known
values are: "usage" and "fixed".
"input_tokens": 0, # Number of tokens in the request prompt.
Required.
"prompt_tokens": 0, # Number of tokens used for the prompt sent to
Expand Down Expand Up @@ -953,9 +947,6 @@ def _embed(
"id": "str", # Unique identifier for the embeddings result. Required.
"model": "str", # The model ID used to generate this result. Required.
"usage": {
"capacity_type": "str", # Indicates whether your capacity has been
affected by the usage amount (token count) reported here. Required. Known
values are: "usage" and "fixed".
"input_tokens": 0, # Number of tokens in the request prompt.
Required.
"prompt_tokens": 0, # Number of tokens used for the prompt sent to
Expand Down
31 changes: 9 additions & 22 deletions sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,10 @@ def load_client(
:raises ~azure.core.exceptions.HttpResponseError
"""

with ChatCompletionsClient(endpoint, credential, **kwargs) as client: # Pick any of the clients, it does not matter.
model_info = client.get_model_info() # type: ignore
with ChatCompletionsClient(
endpoint, credential, **kwargs
) as client: # Pick any of the clients, it does not matter.
model_info = client.get_model_info() # type: ignore

_LOGGER.info("model_info=%s", model_info)
if not model_info.model_type:
Expand Down Expand Up @@ -142,7 +144,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr
self._model_info: Optional[_models.ModelInfo] = None
super().__init__(endpoint, credential, **kwargs)


@overload
def complete(
self,
Expand All @@ -164,9 +165,7 @@ def complete(
] = None,
seed: Optional[int] = None,
**kwargs: Any,
) -> _models.ChatCompletions:
...

) -> _models.ChatCompletions: ...

@overload
def complete(
Expand All @@ -189,9 +188,7 @@ def complete(
] = None,
seed: Optional[int] = None,
**kwargs: Any,
) -> _models.StreamingChatCompletions:
...

) -> _models.StreamingChatCompletions: ...

@overload
def complete(
Expand Down Expand Up @@ -535,7 +532,6 @@ def complete(

return _deserialize(_models._models.ChatCompletions, response.json()) # pylint: disable=protected-access


@distributed_trace
def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
# pylint: disable=line-too-long
Expand All @@ -546,15 +542,13 @@ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
:raises ~azure.core.exceptions.HttpResponseError
"""
if not self._model_info:
self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
return self._model_info


def __str__(self) -> str:
# pylint: disable=client-method-name-no-double-underscore
return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()


# Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
# and you see the equivalent auto-generated method in _client.py return "Self"
def __enter__(self) -> Self:
Expand All @@ -581,7 +575,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr
self._model_info: Optional[_models.ModelInfo] = None
super().__init__(endpoint, credential, **kwargs)


@overload
def embed(
self,
Expand Down Expand Up @@ -791,7 +784,6 @@ def embed(

return deserialized # type: ignore


@distributed_trace
def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
# pylint: disable=line-too-long
Expand All @@ -802,15 +794,13 @@ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
:raises ~azure.core.exceptions.HttpResponseError
"""
if not self._model_info:
self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
return self._model_info


def __str__(self) -> str:
# pylint: disable=client-method-name-no-double-underscore
return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()


# Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
# and you see the equivalent auto-generated method in _client.py return "Self"
def __enter__(self) -> Self:
Expand Down Expand Up @@ -1046,7 +1036,6 @@ def embed(

return deserialized # type: ignore


@distributed_trace
def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
# pylint: disable=line-too-long
Expand All @@ -1057,15 +1046,13 @@ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
:raises ~azure.core.exceptions.HttpResponseError
"""
if not self._model_info:
self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
return self._model_info


def __str__(self) -> str:
# pylint: disable=client-method-name-no-double-underscore
return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()


# Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
# and you see the equivalent auto-generated method in _client.py return "Self"
def __enter__(self) -> Self:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,7 @@ def _deserialize(self, target_obj, data):
elif isinstance(response, type) and issubclass(response, Enum):
return self.deserialize_enum(data, response)

if data is None:
if data is None or data is CoreNull:
return data
try:
attributes = response._attribute_map # type: ignore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,6 @@ async def _complete(
response. Required.
"model": "str", # The model used for the chat completion. Required.
"usage": {
"capacity_type": "str", # Indicates whether your capacity has been
affected by the usage amount (token count) reported here. Required. Known
values are: "usage" and "fixed".
"completion_tokens": 0, # The number of tokens generated across all
completions emissions. Required.
"prompt_tokens": 0, # The number of tokens in the provided prompts
Expand Down Expand Up @@ -547,9 +544,6 @@ async def _embed(
"id": "str", # Unique identifier for the embeddings result. Required.
"model": "str", # The model ID used to generate this result. Required.
"usage": {
"capacity_type": "str", # Indicates whether your capacity has been
affected by the usage amount (token count) reported here. Required. Known
values are: "usage" and "fixed".
"input_tokens": 0, # Number of tokens in the request prompt.
Required.
"prompt_tokens": 0, # Number of tokens used for the prompt sent to
Expand Down Expand Up @@ -822,9 +816,6 @@ async def _embed(
"id": "str", # Unique identifier for the embeddings result. Required.
"model": "str", # The model ID used to generate this result. Required.
"usage": {
"capacity_type": "str", # Indicates whether your capacity has been
affected by the usage amount (token count) reported here. Required. Known
values are: "usage" and "fixed".
"input_tokens": 0, # Number of tokens in the request prompt.
Required.
"prompt_tokens": 0, # Number of tokens used for the prompt sent to
Expand Down
32 changes: 10 additions & 22 deletions sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ async def load_client(
:raises ~azure.core.exceptions.HttpResponseError
"""

async with ChatCompletionsClient(endpoint, credential, **kwargs) as client: # Pick any of the clients, it does not matter.
model_info = await client.get_model_info() # type: ignore
async with ChatCompletionsClient(
endpoint, credential, **kwargs
) as client: # Pick any of the clients, it does not matter.
model_info = await client.get_model_info() # type: ignore

_LOGGER.info("model_info=%s", model_info)
if not model_info.model_type:
Expand Down Expand Up @@ -151,9 +153,7 @@ async def complete(
] = None,
seed: Optional[int] = None,
**kwargs: Any,
) -> _models.ChatCompletions:
...

) -> _models.ChatCompletions: ...

@overload
async def complete(
Expand All @@ -177,9 +177,7 @@ async def complete(
] = None,
seed: Optional[int] = None,
**kwargs: Any,
) -> _models.AsyncStreamingChatCompletions:
...

) -> _models.AsyncStreamingChatCompletions: ...

@overload
async def complete(
Expand Down Expand Up @@ -539,7 +537,6 @@ async def complete(

return _deserialize(_models.ChatCompletions, response.json()) # pylint: disable=protected-access


@distributed_trace_async
async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
# pylint: disable=line-too-long
Expand All @@ -550,15 +547,13 @@ async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
:raises ~azure.core.exceptions.HttpResponseError
"""
if not self._model_info:
self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
return self._model_info


def __str__(self) -> str:
# pylint: disable=client-method-name-no-double-underscore
return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()


# Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
# and you see the equivalent auto-generated method in _client.py return "Self"
async def __aenter__(self) -> Self:
Expand Down Expand Up @@ -587,7 +582,6 @@ def __init__(
self._model_info: Optional[_models.ModelInfo] = None
super().__init__(endpoint=endpoint, credential=credential, **kwargs)


@overload
async def embed(
self,
Expand Down Expand Up @@ -797,7 +791,6 @@ async def embed(

return deserialized # type: ignore


@distributed_trace_async
async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
# pylint: disable=line-too-long
Expand All @@ -808,15 +801,13 @@ async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
:raises ~azure.core.exceptions.HttpResponseError
"""
if not self._model_info:
self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
return self._model_info


def __str__(self) -> str:
# pylint: disable=client-method-name-no-double-underscore
return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()


# Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
# and you see the equivalent auto-generated method in _client.py return "Self"
async def __aenter__(self) -> Self:
Expand Down Expand Up @@ -845,7 +836,6 @@ def __init__(
self._model_info: Optional[_models.ModelInfo] = None
super().__init__(endpoint=endpoint, credential=credential, **kwargs)


@overload
async def embed(
self,
Expand Down Expand Up @@ -1055,7 +1045,6 @@ async def embed(

return deserialized # type: ignore


@distributed_trace_async
async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
# pylint: disable=line-too-long
Expand All @@ -1066,21 +1055,20 @@ async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
:raises ~azure.core.exceptions.HttpResponseError
"""
if not self._model_info:
self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
return self._model_info


def __str__(self) -> str:
# pylint: disable=client-method-name-no-double-underscore
return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()


# Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
# and you see the equivalent auto-generated method in _client.py return "Self"
async def __aenter__(self) -> Self:
await self._client.__aenter__()
return self


__all__: List[str] = [
"load_client",
"ChatCompletionsClient",
Expand Down
Loading