From d7f8a462ec2d13b0c906cd546f6e3e57adbb9b2f Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 15 Oct 2024 23:33:52 -0700
Subject: [PATCH 01/21] Prompty support within Azure AI Inference SDK

---
 .../azure/ai/inference/prompts/__init__.py    |  23 +
 .../azure/ai/inference/prompts/_patch.py      |  88 +++
 .../azure/ai/inference/prompts/core.py        | 329 +++++++++
 .../azure/ai/inference/prompts/mustache.py    | 684 ++++++++++++++++++
 .../azure/ai/inference/prompts/parsers.py     | 129 ++++
 .../azure/ai/inference/prompts/py.typed       |   1 +
 .../azure/ai/inference/prompts/renderers.py   |  20 +
 .../azure/ai/inference/prompts/utils.py       | 256 +++++++
 .../samples/sample1.prompty                   |  30 +
 ...at_completions_from_input_prompt_string.py |  77 ++
 ...ple_chat_completions_from_input_prompty.py |  69 ++
 sdk/ai/azure-ai-inference/setup.py            |   1 +
 .../azure-ai-inference/tests/test_prompts.py  |  64 ++
 13 files changed, 1771 insertions(+)
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/py.typed
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
 create mode 100644 sdk/ai/azure-ai-inference/samples/sample1.prompty
 create mode 100644 sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
 create mode 100644 sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
 create mode 100644 sdk/ai/azure-ai-inference/tests/test_prompts.py

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
new file mode 100644
index 000000000000..aee79879c6fe
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
@@ -0,0 +1,23 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+from .core import InvokerFactory
+from .core import Prompty
+
+from .renderers import MustacheRenderer
+from .parsers import PromptyChatParser
+from .utils import load
+from ._patch import patch_sdk as _patch_sdk, PromptyTemplate
+
+# Register the Mustache renderer and parser
+InvokerFactory().register_renderer("mustache", MustacheRenderer)
+InvokerFactory().register_parser("prompty.chat", PromptyChatParser)
+
+__all__ = [
+    "load",
+    "Prompty",
+    "PromptyTemplate",
+]
+
+_patch_sdk()
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
new file mode 100644
index 000000000000..1dbaff7a0c09
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -0,0 +1,88 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+
+from azure.ai.inference.models import ChatRequestMessage, SystemMessage
+import azure.ai.inference.prompts as prompts
+from .core import Prompty
+from .utils import prepare
+from .parsers import RoleMap
+
+
+class PromptConfig:
+    def __init__(
+            self,
+            prompty: Prompty | None = None,
+            api: str | None = None,
+            prompt_template: str | None = None,
+            model_name: str | None = None,
+    ) -> None:
+        self.prompty = prompty
+        if self.prompty is not None:
+            self.model_name = prompty.model.configuration["azure_deployment"]
+            self.config = prompty.model.parameters
+            self._parameters = {}
+        elif prompt_template is not None and model_name is not None:
+            self.model_name = model_name
+            self.config = {}
+            # _parameters is a dict to hold the internal configuration
+            self._parameters = {
+                "api": api if api is not None else "chat",
+                "prompt_template": prompt_template
+            }
+        else:
+            raise ValueError("Please invalid arguments for PromptConfig")
+
+    def render(self, input_variables: dict[str, any], format: str = "inference_sdk") -> list[ChatRequestMessage]:
+        if self.prompty is not None:
+            parsed = prepare(self.prompty, input_variables)
+            if format == "inference_sdk":
+                messages = []
+                for message in parsed:
+                    message_class = RoleMap.get_message_class(message["role"])
+                    messages.append(message_class(content=message["content"]))
+                return messages
+            elif format == "openai":
+                return parsed
+            else:
+                raise ValueError("Invalid message format")
+
+        elif "prompt_template" in self._parameters:
+            system_prompt = self._parameters["prompt_template"].format(**input_variables)
+            if format == "inference_sdk":
+                return [SystemMessage(content=system_prompt)]
+            elif format == "openai":
+                return [{"role": "system", "content": system_prompt}]
+            else:
+                raise ValueError("Invalid message format")
+
+
+class PromptyTemplate:
+    @staticmethod
+    def load(file_path: str) -> PromptConfig:
+        if not file_path:
+            raise ValueError("Please provide file_path")
+        prompty = prompts.load(file_path)
+        return PromptConfig(prompty=prompty)        
+    
+    @staticmethod
+    def from_message(
+        model_name: str,
+        prompt_template: str,
+        api: str = "chat"
+    ) -> PromptConfig:
+        return PromptConfig(api=api, prompt_template=prompt_template, model_name=model_name, prompty=None)
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py
new file mode 100644
index 000000000000..f570be7c13cf
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py
@@ -0,0 +1,329 @@
+"""
+Adapted from https://github.com/langchain-ai/langchain
+MIT License
+"""
+from __future__ import annotations
+
+import abc
+import json
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, Generic, List, Literal, Optional, Type, TypeVar, Union
+
+import yaml
+from pydantic import BaseModel, ConfigDict, Field, FilePath
+
+T = TypeVar("T")
+
+
+class SimpleModel(BaseModel, Generic[T]):
+    """Simple model for a single item."""
+
+    item: T
+
+
+class PropertySettings(BaseModel):
+    """Property settings for a prompty model."""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    type: Literal["string", "number", "array", "object", "boolean"]
+    default: Union[str, int, float, List, Dict, bool] = Field(default=None)
+    description: str = Field(default="")
+
+
+class ModelSettings(BaseModel):
+    """Model settings for a prompty model."""
+
+    api: str = Field(default="")
+    configuration: dict = Field(default={})
+    parameters: dict = Field(default={})
+    response: dict = Field(default={})
+
+    def model_dump_safe(self) -> dict:
+        d = self.model_dump()
+        d["configuration"] = {
+            k: "*" * len(v) if "key" in k.lower() or "secret" in k.lower() else v
+            for k, v in d["configuration"].items()
+        }
+        return d
+
+
+class TemplateSettings(BaseModel):
+    """Template settings for a prompty model."""
+
+    type: str = Field(default="mustache")
+    parser: str = Field(default="")
+
+
+class Prompty(BaseModel):
+    """Base Prompty model."""
+
+    # metadata
+    name: str = Field(default="")
+    description: str = Field(default="")
+    authors: List[str] = Field(default=[])
+    tags: List[str] = Field(default=[])
+    version: str = Field(default="")
+    base: str = Field(default="")
+    basePrompty: Optional[Prompty] = Field(default=None)
+    # model
+    model: ModelSettings = Field(default_factory=ModelSettings)
+
+    # sample
+    sample: dict = Field(default={})
+
+    # input / output
+    inputs: Dict[str, PropertySettings] = Field(default={})
+    outputs: Dict[str, PropertySettings] = Field(default={})
+
+    # template
+    template: TemplateSettings
+
+    file: FilePath = Field(default="")
+    content: str = Field(default="")
+
+    def to_safe_dict(self) -> Dict[str, Any]:
+        d = {}
+        for k, v in self:
+            if v != "" and v != {} and v != [] and v is not None:
+                if k == "model":
+                    d[k] = v.model_dump_safe()
+                elif k == "template":
+                    d[k] = v.model_dump()
+                elif k == "inputs" or k == "outputs":
+                    d[k] = {k: v.model_dump() for k, v in v.items()}
+                elif k == "file":
+                    d[k] = (
+                        str(self.file.as_posix())
+                        if isinstance(self.file, Path)
+                        else self.file
+                    )
+                elif k == "basePrompty":
+                    # no need to serialize basePrompty
+                    continue
+
+                else:
+                    d[k] = v
+        return d
+
+    # generate json representation of the prompty
+    def to_safe_json(self) -> str:
+        d = self.to_safe_dict()
+        return json.dumps(d)
+
+    @staticmethod
+    def normalize(attribute: Any, parent: Path, env_error: bool = True) -> Any:
+        if isinstance(attribute, str):
+            attribute = attribute.strip()
+            if attribute.startswith("${") and attribute.endswith("}"):
+                variable = attribute[2:-1].split(":")
+                if variable[0] in os.environ.keys():
+                    return os.environ[variable[0]]
+                else:
+                    if len(variable) > 1:
+                        return variable[1]
+                    else:
+                        if env_error:
+                            raise ValueError(
+                                f"Variable {variable[0]} not found in environment"
+                            )
+                        else:
+                            return ""
+            elif (
+                attribute.startswith("file:")
+                and Path(parent / attribute.split(":")[1]).exists()
+            ):
+                with open(parent / attribute.split(":")[1], "r") as f:
+                    items = json.load(f)
+                    if isinstance(items, list):
+                        return [Prompty.normalize(value, parent) for value in items]
+                    elif isinstance(items, dict):
+                        return {
+                            key: Prompty.normalize(value, parent)
+                            for key, value in items.items()
+                        }
+                    else:
+                        return items
+            else:
+                return attribute
+        elif isinstance(attribute, list):
+            return [Prompty.normalize(value, parent) for value in attribute]
+        elif isinstance(attribute, dict):
+            return {
+                key: Prompty.normalize(value, parent)
+                for key, value in attribute.items()
+            }
+        else:
+            return attribute
+
+
+def param_hoisting(
+    top: Dict[str, Any], bottom: Dict[str, Any], top_key: Any = None
+) -> Dict[str, Any]:
+    """Merge two dictionaries with hoisting of parameters from bottom to top.
+
+    Args:
+        top: The top dictionary.
+        bottom: The bottom dictionary.
+        top_key: The key to hoist from the bottom to the top.
+
+    Returns:
+        The merged dictionary.
+    """
+    if top_key:
+        new_dict = {**top[top_key]} if top_key in top else {}
+    else:
+        new_dict = {**top}
+    for key, value in bottom.items():
+        if key not in new_dict:
+            new_dict[key] = value
+    return new_dict
+
+
+class Invoker(abc.ABC):
+    """Base class for all invokers."""
+
+    def __init__(self, prompty: Prompty) -> None:
+        self.prompty = prompty
+
+    @abc.abstractmethod
+    def invoke(self, data: BaseModel) -> BaseModel:
+        pass
+
+    def __call__(self, data: BaseModel) -> BaseModel:
+        return self.invoke(data)
+
+
+class NoOpParser(Invoker):
+    """NoOp parser for invokers."""
+
+    def invoke(self, data: BaseModel) -> BaseModel:
+        return data
+
+
+class InvokerFactory(object):
+    """Factory for creating invokers."""
+
+    _instance = None
+    _renderers: Dict[str, Type[Invoker]] = {}
+    _parsers: Dict[str, Type[Invoker]] = {}
+    _executors: Dict[str, Type[Invoker]] = {}
+    _processors: Dict[str, Type[Invoker]] = {}
+
+    def __new__(cls) -> InvokerFactory:
+        if cls._instance is None:
+            cls._instance = super(InvokerFactory, cls).__new__(cls)
+            # Add NOOP invokers
+            cls._renderers["NOOP"] = NoOpParser
+            cls._parsers["NOOP"] = NoOpParser
+            cls._executors["NOOP"] = NoOpParser
+            cls._processors["NOOP"] = NoOpParser
+        return cls._instance
+
+    def register(
+        self,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        name: str,
+        invoker: Type[Invoker],
+    ) -> None:
+        if type == "renderer":
+            self._renderers[name] = invoker
+        elif type == "parser":
+            self._parsers[name] = invoker
+        elif type == "executor":
+            self._executors[name] = invoker
+        elif type == "processor":
+            self._processors[name] = invoker
+        else:
+            raise ValueError(f"Invalid type {type}")
+
+    def register_renderer(self, name: str, renderer_class: Any) -> None:
+        self.register("renderer", name, renderer_class)
+
+    def register_parser(self, name: str, parser_class: Any) -> None:
+        self.register("parser", name, parser_class)
+
+    def register_executor(self, name: str, executor_class: Any) -> None:
+        self.register("executor", name, executor_class)
+
+    def register_processor(self, name: str, processor_class: Any) -> None:
+        self.register("processor", name, processor_class)
+
+    def __call__(
+        self,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        name: str,
+        prompty: Prompty,
+        data: BaseModel,
+    ) -> Any:
+        if type == "renderer":
+            return self._renderers[name](prompty)(data)
+        elif type == "parser":
+            return self._parsers[name](prompty)(data)
+        elif type == "executor":
+            return self._executors[name](prompty)(data)
+        elif type == "processor":
+            return self._processors[name](prompty)(data)
+        else:
+            raise ValueError(f"Invalid type {type}")
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "renderers": {
+                k: f"{v.__module__}.{v.__name__}" for k, v in self._renderers.items()
+            },
+            "parsers": {
+                k: f"{v.__module__}.{v.__name__}" for k, v in self._parsers.items()
+            },
+            "executors": {
+                k: f"{v.__module__}.{v.__name__}" for k, v in self._executors.items()
+            },
+            "processors": {
+                k: f"{v.__module__}.{v.__name__}" for k, v in self._processors.items()
+            },
+        }
+
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict())
+
+
+class Frontmatter:
+    """Class for reading frontmatter from a string or file."""
+
+    _yaml_delim = r"(?:---|\+\+\+)"
+    _yaml = r"(.*?)"
+    _content = r"\s*(.+)$"
+    _re_pattern = r"^\s*" + _yaml_delim + _yaml + _yaml_delim + _content
+    _regex = re.compile(_re_pattern, re.S | re.M)
+
+    @classmethod
+    def read_file(cls, path: str) -> dict[str, Any]:
+        """Reads file at path and returns dict with separated frontmatter.
+        See read() for more info on dict return value.
+        """
+        with open(path, encoding="utf-8") as file:
+            file_contents = file.read()
+            return cls.read(file_contents)
+
+    @classmethod
+    def read(cls, string: str) -> dict[str, Any]:
+        """Returns dict with separated frontmatter from string.
+
+        Returned dict keys:
+        attributes -- extracted YAML attributes in dict form.
+        body -- string contents below the YAML separators
+        frontmatter -- string representation of YAML
+        """
+        fmatter = ""
+        body = ""
+        result = cls._regex.search(string)
+
+        if result:
+            fmatter = result.group(1)
+            body = result.group(2)
+        return {
+            "attributes": yaml.load(fmatter, Loader=yaml.FullLoader),
+            "body": body,
+            "frontmatter": fmatter,
+        }
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py
new file mode 100644
index 000000000000..ce175045e7a9
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py
@@ -0,0 +1,684 @@
+"""
+Adapted from https://github.com/langchain-ai/langchain and https://github.com/noahmorrison/chevron
+MIT License
+"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import Iterator, Mapping, Sequence
+from types import MappingProxyType
+from typing import (
+    Any,
+    Literal,
+    Optional,
+    Union,
+    cast,
+)
+
+from typing_extensions import TypeAlias
+
+logger = logging.getLogger(__name__)
+
+
+Scopes: TypeAlias = list[Union[Literal[False, 0], Mapping[str, Any]]]
+
+
+# Globals
+_CURRENT_LINE = 1
+_LAST_TAG_LINE = None
+
+
+class ChevronError(SyntaxError):
+    """Custom exception for Chevron errors."""
+
+
+#
+# Helper functions
+#
+
+
+def grab_literal(template: str, l_del: str) -> tuple[str, str]:
+    """Parse a literal from the template.
+
+    Args:
+        template: The template to parse.
+        l_del: The left delimiter.
+
+    Returns:
+        Tuple[str, str]: The literal and the template.
+    """
+
+    global _CURRENT_LINE
+
+    try:
+        # Look for the next tag and move the template to it
+        literal, template = template.split(l_del, 1)
+        _CURRENT_LINE += literal.count("\n")
+        return (literal, template)
+
+    # There are no more tags in the template?
+    except ValueError:
+        # Then the rest of the template is a literal
+        return (template, "")
+
+
+def l_sa_check(template: str, literal: str, is_standalone: bool) -> bool:
+    """Do a preliminary check to see if a tag could be a standalone.
+
+    Args:
+        template: The template. (Not used.)
+        literal: The literal.
+        is_standalone: Whether the tag is standalone.
+
+    Returns:
+        bool: Whether the tag could be a standalone.
+    """
+
+    # If there is a newline, or the previous tag was a standalone
+    if literal.find("\n") != -1 or is_standalone:
+        padding = literal.split("\n")[-1]
+
+        # If all the characters since the last newline are spaces
+        # Then the next tag could be a standalone
+        # Otherwise it can't be
+        return padding.isspace() or padding == ""
+    else:
+        return False
+
+
+def r_sa_check(template: str, tag_type: str, is_standalone: bool) -> bool:
+    """Do a final check to see if a tag could be a standalone.
+
+    Args:
+        template: The template.
+        tag_type: The type of the tag.
+        is_standalone: Whether the tag is standalone.
+
+    Returns:
+        bool: Whether the tag could be a standalone.
+    """
+
+    # Check right side if we might be a standalone
+    if is_standalone and tag_type not in ["variable", "no escape"]:
+        on_newline = template.split("\n", 1)
+
+        # If the stuff to the right of us are spaces we're a standalone
+        return on_newline[0].isspace() or not on_newline[0]
+
+    # If we're a tag can't be a standalone
+    else:
+        return False
+
+
+def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], str]:
+    """Parse a tag from a template.
+
+    Args:
+        template: The template.
+        l_del: The left delimiter.
+        r_del: The right delimiter.
+
+    Returns:
+        Tuple[Tuple[str, str], str]: The tag and the template.
+
+    Raises:
+        ChevronError: If the tag is unclosed.
+        ChevronError: If the set delimiter tag is unclosed.
+    """
+    global _CURRENT_LINE
+    global _LAST_TAG_LINE
+
+    tag_types = {
+        "!": "comment",
+        "#": "section",
+        "^": "inverted section",
+        "/": "end",
+        ">": "partial",
+        "=": "set delimiter?",
+        "{": "no escape?",
+        "&": "no escape",
+    }
+
+    # Get the tag
+    try:
+        tag, template = template.split(r_del, 1)
+    except ValueError as e:
+        msg = "unclosed tag " f"at line {_CURRENT_LINE}"
+        raise ChevronError(msg) from e
+
+    # Find the type meaning of the first character
+    tag_type = tag_types.get(tag[0], "variable")
+
+    # If the type is not a variable
+    if tag_type != "variable":
+        # Then that first character is not needed
+        tag = tag[1:]
+
+    # If we might be a set delimiter tag
+    if tag_type == "set delimiter?":
+        # Double check to make sure we are
+        if tag.endswith("="):
+            tag_type = "set delimiter"
+            # Remove the equal sign
+            tag = tag[:-1]
+
+        # Otherwise we should complain
+        else:
+            msg = "unclosed set delimiter tag\n" f"at line {_CURRENT_LINE}"
+            raise ChevronError(msg)
+
+    elif (
+        # If we might be a no html escape tag
+        tag_type == "no escape?"
+        # And we have a third curly brace
+        # (And are using curly braces as delimiters)
+        and l_del == "{{"
+        and r_del == "}}"
+        and template.startswith("}")
+    ):
+        # Then we are a no html escape tag
+        template = template[1:]
+        tag_type = "no escape"
+
+    # Strip the whitespace off the key and return
+    return ((tag_type, tag.strip()), template)
+
+
+#
+# The main tokenizing function
+#
+
+
+def tokenize(
+    template: str, def_ldel: str = "{{", def_rdel: str = "}}"
+) -> Iterator[tuple[str, str]]:
+    """Tokenize a mustache template.
+
+    Tokenizes a mustache template in a generator fashion,
+    using file-like objects. It also accepts a string containing
+    the template.
+
+
+    Arguments:
+
+    template -- a file-like object, or a string of a mustache template
+
+    def_ldel -- The default left delimiter
+                ("{{" by default, as in spec compliant mustache)
+
+    def_rdel -- The default right delimiter
+                ("}}" by default, as in spec compliant mustache)
+
+
+    Returns:
+
+    A generator of mustache tags in the form of a tuple
+
+    -- (tag_type, tag_key)
+
+    Where tag_type is one of:
+     * literal
+     * section
+     * inverted section
+     * end
+     * partial
+     * no escape
+
+    And tag_key is either the key or in the case of a literal tag,
+    the literal itself.
+    """
+
+    global _CURRENT_LINE, _LAST_TAG_LINE
+    _CURRENT_LINE = 1
+    _LAST_TAG_LINE = None
+
+    is_standalone = True
+    open_sections = []
+    l_del = def_ldel
+    r_del = def_rdel
+
+    while template:
+        literal, template = grab_literal(template, l_del)
+
+        # If the template is completed
+        if not template:
+            # Then yield the literal and leave
+            yield ("literal", literal)
+            break
+
+        # Do the first check to see if we could be a standalone
+        is_standalone = l_sa_check(template, literal, is_standalone)
+
+        # Parse the tag
+        tag, template = parse_tag(template, l_del, r_del)
+        tag_type, tag_key = tag
+
+        # Special tag logic
+
+        # If we are a set delimiter tag
+        if tag_type == "set delimiter":
+            # Then get and set the delimiters
+            dels = tag_key.strip().split(" ")
+            l_del, r_del = dels[0], dels[-1]
+
+        # If we are a section tag
+        elif tag_type in ["section", "inverted section"]:
+            # Then open a new section
+            open_sections.append(tag_key)
+            _LAST_TAG_LINE = _CURRENT_LINE
+
+        # If we are an end tag
+        elif tag_type == "end":
+            # Then check to see if the last opened section
+            # is the same as us
+            try:
+                last_section = open_sections.pop()
+            except IndexError as e:
+                msg = (
+                    f'Trying to close tag "{tag_key}"\n'
+                    "Looks like it was not opened.\n"
+                    f"line {_CURRENT_LINE + 1}"
+                )
+                raise ChevronError(msg) from e
+            if tag_key != last_section:
+                # Otherwise we need to complain
+                msg = (
+                    f'Trying to close tag "{tag_key}"\n'
+                    f'last open tag is "{last_section}"\n'
+                    f"line {_CURRENT_LINE + 1}"
+                )
+                raise ChevronError(msg)
+
+        # Do the second check to see if we're a standalone
+        is_standalone = r_sa_check(template, tag_type, is_standalone)
+
+        # Which if we are
+        if is_standalone:
+            # Remove the stuff before the newline
+            template = template.split("\n", 1)[-1]
+
+            # Partials need to keep the spaces on their left
+            if tag_type != "partial":
+                # But other tags don't
+                literal = literal.rstrip(" ")
+
+        # Start yielding
+        # Ignore literals that are empty
+        if literal != "":
+            yield ("literal", literal)
+
+        # Ignore comments and set delimiters
+        if tag_type not in ["comment", "set delimiter?"]:
+            yield (tag_type, tag_key)
+
+    # If there are any open sections when we're done
+    if open_sections:
+        # Then we need to complain
+        msg = (
+            "Unexpected EOF\n"
+            f'the tag "{open_sections[-1]}" was never closed\n'
+            f"was opened at line {_LAST_TAG_LINE}"
+        )
+        raise ChevronError(msg)
+
+
+#
+# Helper functions
+#
+
+
+def _html_escape(string: str) -> str:
+    """HTML escape all of these " & < >"""
+
+    html_codes = {
+        '"': "&quot;",
+        "<": "&lt;",
+        ">": "&gt;",
+    }
+
+    # & must be handled first
+    string = string.replace("&", "&amp;")
+    for char in html_codes:
+        string = string.replace(char, html_codes[char])
+    return string
+
+
+def _get_key(
+    key: str,
+    scopes: Scopes,
+    warn: bool,
+    keep: bool,
+    def_ldel: str,
+    def_rdel: str,
+) -> Any:
+    """Get a key from the current scope"""
+
+    # If the key is a dot
+    if key == ".":
+        # Then just return the current scope
+        return scopes[0]
+
+    # Loop through the scopes
+    for scope in scopes:
+        try:
+            # Return an empty string if falsy, with two exceptions
+            # 0 should return 0, and False should return False
+            if scope in (0, False):
+                return scope
+
+            # For every dot separated key
+            for child in key.split("."):
+                # Return an empty string if falsy, with two exceptions
+                # 0 should return 0, and False should return False
+                if scope in (0, False):
+                    return scope
+                # Move into the scope
+                try:
+                    # Try subscripting (Normal dictionaries)
+                    scope = cast(dict[str, Any], scope)[child]
+                except (TypeError, AttributeError):
+                    try:
+                        scope = getattr(scope, child)
+                    except (TypeError, AttributeError):
+                        # Try as a list
+                        scope = scope[int(child)]  # type: ignore
+
+            try:
+                # This allows for custom falsy data types
+                # https://github.com/noahmorrison/chevron/issues/35
+                if scope._CHEVRON_return_scope_when_falsy:  # type: ignore
+                    return scope
+            except AttributeError:
+                if scope in (0, False):
+                    return scope
+                return scope or ""
+        except (AttributeError, KeyError, IndexError, ValueError):
+            # We couldn't find the key in the current scope
+            # We'll try again on the next pass
+            pass
+
+    # We couldn't find the key in any of the scopes
+
+    if warn:
+        logger.warn(f"Could not find key '{key}'")
+
+    if keep:
+        return f"{def_ldel} {key} {def_rdel}"
+
+    return ""
+
+
+def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str:
+    """Load a partial"""
+    try:
+        # Maybe the partial is in the dictionary
+        return partials_dict[name]
+    except KeyError:
+        return ""
+
+
+#
+# The main rendering function
+#
+g_token_cache: dict[str, list[tuple[str, str]]] = {}
+
+EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({})
+
+
+def render(
+    template: Union[str, list[tuple[str, str]]] = "",
+    data: Mapping[str, Any] = EMPTY_DICT,
+    partials_dict: Mapping[str, str] = EMPTY_DICT,
+    padding: str = "",
+    def_ldel: str = "{{",
+    def_rdel: str = "}}",
+    scopes: Optional[Scopes] = None,
+    warn: bool = False,
+    keep: bool = False,
+) -> str:
+    """Render a mustache template.
+
+    Renders a mustache template with a data scope and inline partial capability.
+
+    Arguments:
+
+    template      -- A file-like object or a string containing the template.
+
+    data          -- A python dictionary with your data scope.
+
+    partials_path -- The path to where your partials are stored.
+                     If set to None, then partials won't be loaded from the file system
+                     (defaults to '.').
+
+    partials_ext  -- The extension that you want the parser to look for
+                     (defaults to 'mustache').
+
+    partials_dict -- A python dictionary which will be search for partials
+                     before the filesystem is. {'include': 'foo'} is the same
+                     as a file called include.mustache
+                     (defaults to {}).
+
+    padding       -- This is for padding partials, and shouldn't be used
+                     (but can be if you really want to).
+
+    def_ldel      -- The default left delimiter
+                     ("{{" by default, as in spec compliant mustache).
+
+    def_rdel      -- The default right delimiter
+                     ("}}" by default, as in spec compliant mustache).
+
+    scopes        -- The list of scopes that get_key will look through.
+
+    warn          -- Log a warning when a template substitution isn't found in the data
+
+    keep          -- Keep unreplaced tags when a substitution isn't found in the data.
+
+
+    Returns:
+
+    A string containing the rendered template.
+    """
+
+    # If the template is a sequence but not derived from a string
+    if isinstance(template, Sequence) and not isinstance(template, str):
+        # Then we don't need to tokenize it
+        # But it does need to be a generator
+        tokens: Iterator[tuple[str, str]] = (token for token in template)
+    else:
+        if template in g_token_cache:
+            tokens = (token for token in g_token_cache[template])
+        else:
+            # Otherwise make a generator
+            tokens = tokenize(template, def_ldel, def_rdel)
+
+    output = ""
+
+    if scopes is None:
+        scopes = [data]
+
+    # Run through the tokens
+    for tag, key in tokens:
+        # Set the current scope
+        current_scope = scopes[0]
+
+        # If we're an end tag
+        if tag == "end":
+            # Pop out of the latest scope
+            del scopes[0]
+
+        # If the current scope is falsy and not the only scope
+        elif not current_scope and len(scopes) != 1:
+            if tag in ["section", "inverted section"]:
+                # Set the most recent scope to a falsy value
+                scopes.insert(0, False)
+
+        # If we're a literal tag
+        elif tag == "literal":
+            # Add padding to the key and add it to the output
+            output += key.replace("\n", "\n" + padding)
+
+        # If we're a variable tag
+        elif tag == "variable":
+            # Add the html escaped key to the output
+            thing = _get_key(
+                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
+            )
+            if thing is True and key == ".":
+                # if we've coerced into a boolean by accident
+                # (inverted tags do this)
+                # then get the un-coerced object (next in the stack)
+                thing = scopes[1]
+            if not isinstance(thing, str):
+                thing = str(thing)
+            output += _html_escape(thing)
+
+        # If we're a no html escape tag
+        elif tag == "no escape":
+            # Just lookup the key and add it
+            thing = _get_key(
+                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
+            )
+            if not isinstance(thing, str):
+                thing = str(thing)
+            output += thing
+
+        # If we're a section tag
+        elif tag == "section":
+            # Get the sections scope
+            scope = _get_key(
+                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
+            )
+
+            # If the scope is a callable (as described in
+            # https://mustache.github.io/mustache.5.html)
+            if callable(scope):
+                # Generate template text from tags
+                text = ""
+                tags: list[tuple[str, str]] = []
+                for token in tokens:
+                    if token == ("end", key):
+                        break
+
+                    tags.append(token)
+                    tag_type, tag_key = token
+                    if tag_type == "literal":
+                        text += tag_key
+                    elif tag_type == "no escape":
+                        text += f"{def_ldel}& {tag_key} {def_rdel}"
+                    else:
+                        text += "{}{} {}{}".format(
+                            def_ldel,
+                            {
+                                "comment": "!",
+                                "section": "#",
+                                "inverted section": "^",
+                                "end": "/",
+                                "partial": ">",
+                                "set delimiter": "=",
+                                "no escape": "&",
+                                "variable": "",
+                            }[tag_type],
+                            tag_key,
+                            def_rdel,
+                        )
+
+                g_token_cache[text] = tags
+
+                rend = scope(
+                    text,
+                    lambda template, data=None: render(
+                        template,
+                        data={},
+                        partials_dict=partials_dict,
+                        padding=padding,
+                        def_ldel=def_ldel,
+                        def_rdel=def_rdel,
+                        scopes=data and [data] + scopes or scopes,
+                        warn=warn,
+                        keep=keep,
+                    ),
+                )
+
+                output += rend
+
+            # If the scope is a sequence, an iterator or generator but not
+            # derived from a string
+            elif isinstance(scope, (Sequence, Iterator)) and not isinstance(scope, str):
+                # Then we need to do some looping
+
+                # Gather up all the tags inside the section
+                # (And don't be tricked by nested end tags with the same key)
+                # TODO: This feels like it still has edge cases, no?
+                tags = []
+                tags_with_same_key = 0
+                for token in tokens:
+                    if token == ("section", key):
+                        tags_with_same_key += 1
+                    if token == ("end", key):
+                        tags_with_same_key -= 1
+                        if tags_with_same_key < 0:
+                            break
+                    tags.append(token)
+
+                # For every item in the scope
+                for thing in scope:
+                    # Append it as the most recent scope and render
+                    new_scope = [thing] + scopes
+                    rend = render(
+                        template=tags,
+                        scopes=new_scope,
+                        padding=padding,
+                        partials_dict=partials_dict,
+                        def_ldel=def_ldel,
+                        def_rdel=def_rdel,
+                        warn=warn,
+                        keep=keep,
+                    )
+
+                    output += rend
+
+            else:
+                # Otherwise we're just a scope section
+                scopes.insert(0, scope)
+
+        # If we're an inverted section
+        elif tag == "inverted section":
+            # Add the flipped scope to the scopes
+            scope = _get_key(
+                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
+            )
+            scopes.insert(0, cast(Literal[False], not scope))
+
+        # If we're a partial
+        elif tag == "partial":
+            # Load the partial
+            partial = _get_partial(key, partials_dict)
+
+            # Find what to pad the partial with
+            left = output.rpartition("\n")[2]
+            part_padding = padding
+            if left.isspace():
+                part_padding += left
+
+            # Render the partial
+            part_out = render(
+                template=partial,
+                partials_dict=partials_dict,
+                def_ldel=def_ldel,
+                def_rdel=def_rdel,
+                padding=part_padding,
+                scopes=scopes,
+                warn=warn,
+                keep=keep,
+            )
+
+            # If the partial was indented
+            if left.isspace():
+                # then remove the spaces from the end
+                part_out = part_out.rstrip(" \t")
+
+            # Add the partials output to the output
+            output += part_out
+
+    return output
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py
new file mode 100644
index 000000000000..254b503d90f2
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py
@@ -0,0 +1,129 @@
+"""
+Adapted from https://github.com/langchain-ai/langchain
+MIT License
+"""
+import base64
+import re
+from typing import Dict, List, Type, Union
+from pydantic import BaseModel
+from azure.ai.inference.models import ChatRequestMessage, SystemMessage, UserMessage, AssistantMessage, ToolMessage
+from .core import Invoker, Prompty, SimpleModel
+
+
+class RoleMap:
+    _ROLE_MAP: Dict[str, Type[ChatRequestMessage]] = {
+        "system": SystemMessage,
+        "user": UserMessage,
+        "human": UserMessage,
+        "assistant": AssistantMessage,
+        "ai": AssistantMessage,
+        "function": ToolMessage,
+    }
+    ROLES = _ROLE_MAP.keys()
+
+    @classmethod
+    def get_message_class(cls, role: str) -> Type[ChatRequestMessage]:
+        return cls._ROLE_MAP[role]
+
+
+class PromptyChatParser(Invoker):
+    """Parse a chat prompt into a list of messages."""
+
+    def __init__(self, prompty: Prompty) -> None:
+        self.prompty = prompty
+        self.roles = RoleMap.ROLES
+        self.path = self.prompty.file.parent
+
+    def inline_image(self, image_item: str) -> str:
+        # pass through if it's a url or base64 encoded
+        if image_item.startswith("http") or image_item.startswith("data"):
+            return image_item
+        # otherwise, it's a local file - need to base64 encode it
+        else:
+            image_path = self.path / image_item
+            with open(image_path, "rb") as f:
+                base64_image = base64.b64encode(f.read()).decode("utf-8")
+
+            if image_path.suffix == ".png":
+                return f"data:image/png;base64,{base64_image}"
+            elif image_path.suffix == ".jpg":
+                return f"data:image/jpeg;base64,{base64_image}"
+            elif image_path.suffix == ".jpeg":
+                return f"data:image/jpeg;base64,{base64_image}"
+            else:
+                raise ValueError(
+                    f"Invalid image format {image_path.suffix} - currently only .png "
+                    "and .jpg / .jpeg are supported."
+                )
+
+    def parse_content(self, content: str) -> Union[str, List]:
+        """for parsing inline images"""
+        # regular expression to parse markdown images
+        image = r"(?P<alt>!\[[^\]]*\])\((?P<filename>.*?)(?=\"|\))\)"
+        matches = re.findall(image, content, flags=re.MULTILINE)
+        if len(matches) > 0:
+            content_items = []
+            content_chunks = re.split(image, content, flags=re.MULTILINE)
+            current_chunk = 0
+            for i in range(len(content_chunks)):
+                # image entry
+                if (
+                    current_chunk < len(matches)
+                    and content_chunks[i] == matches[current_chunk][0]
+                ):
+                    content_items.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": self.inline_image(
+                                    matches[current_chunk][1].split(" ")[0].strip()
+                                )
+                            },
+                        }
+                    )
+                # second part of image entry
+                elif (
+                    current_chunk < len(matches)
+                    and content_chunks[i] == matches[current_chunk][1]
+                ):
+                    current_chunk += 1
+                # text entry
+                else:
+                    if len(content_chunks[i].strip()) > 0:
+                        content_items.append(
+                            {"type": "text", "text": content_chunks[i].strip()}
+                        )
+            return content_items
+        else:
+            return content
+
+    def invoke(self, data: BaseModel) -> BaseModel:
+        assert isinstance(data, SimpleModel)
+        messages = []
+        separator = r"(?i)^\s*#?\s*(" + "|".join(self.roles) + r")\s*:\s*\n"
+
+        # get valid chunks - remove empty items
+        chunks = [
+            item
+            for item in re.split(separator, data.item, flags=re.MULTILINE)
+            if len(item.strip()) > 0
+        ]
+
+        # if no starter role, then inject system role
+        if chunks[0].strip().lower() not in self.roles:
+            chunks.insert(0, "system")
+
+        # if last chunk is role entry, then remove (no content?)
+        if chunks[-1].strip().lower() in self.roles:
+            chunks.pop()
+
+        if len(chunks) % 2 != 0:
+            raise ValueError("Invalid prompt format")
+
+        # create messages
+        for i in range(0, len(chunks), 2):
+            role = chunks[i].strip().lower()
+            content = chunks[i + 1].strip()
+            messages.append({"role": role, "content": self.parse_content(content)})
+
+        return SimpleModel[list](item=messages)
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/py.typed b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/py.typed
new file mode 100644
index 000000000000..e5aff4f83af8
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/py.typed
@@ -0,0 +1 @@
+# Marker file for PEP 561.
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py
new file mode 100644
index 000000000000..aeaa09defa0e
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py
@@ -0,0 +1,20 @@
+"""
+Adapted from https://github.com/langchain-ai/langchain
+MIT License
+"""
+from pydantic import BaseModel
+from .mustache import render
+
+from .core import Invoker, Prompty, SimpleModel
+
+
+class MustacheRenderer(Invoker):
+    """Render a mustache template."""
+
+    def __init__(self, prompty: Prompty) -> None:
+        self.prompty = prompty
+
+    def invoke(self, data: BaseModel) -> BaseModel:
+        assert isinstance(data, SimpleModel)
+        generated = render(self.prompty.content, data.item)
+        return SimpleModel[str](item=generated)
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
new file mode 100644
index 000000000000..bc7abbcc7c7f
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
@@ -0,0 +1,256 @@
+"""
+Adapted from https://github.com/langchain-ai/langchain
+MIT License
+"""
+import traceback
+from pathlib import Path
+from typing import Any, Dict, List, Union
+
+from .core import (
+    Frontmatter,
+    InvokerFactory,
+    ModelSettings,
+    Prompty,
+    PropertySettings,
+    SimpleModel,
+    TemplateSettings,
+    param_hoisting,
+)
+
+
+def load(prompt_path: str, configuration: str = "default") -> Prompty:
+    """Load a prompty file and return a Prompty object.
+
+    Args:
+        prompt_path: The path to the prompty file.
+        configuration: The configuration to use. Defaults to "default".
+
+    Returns:
+        The Prompty object.
+    """
+    file_path = Path(prompt_path)
+    if not file_path.is_absolute():
+        # get caller's path (take into account trace frame)
+        caller = Path(traceback.extract_stack()[-3].filename)
+        file_path = Path(caller.parent / file_path).resolve().absolute()
+
+    # load dictionary from prompty file
+    matter = Frontmatter.read_file(file_path.__fspath__())
+    attributes = matter["attributes"]
+    content = matter["body"]
+
+    # normalize attribute dictionary resolve keys and files
+    attributes = Prompty.normalize(attributes, file_path.parent)
+
+    # load global configuration
+    if "model" not in attributes:
+        attributes["model"] = {}
+
+    # pull model settings out of attributes
+    try:
+        model = ModelSettings(**attributes.pop("model"))
+    except Exception as e:
+        raise ValueError(f"Error in model settings: {e}")
+
+    # pull template settings
+    try:
+        if "template" in attributes:
+            t = attributes.pop("template")
+            if isinstance(t, dict):
+                template = TemplateSettings(**t)
+            # has to be a string denoting the type
+            else:
+                template = TemplateSettings(type=t, parser="prompty")
+        else:
+            template = TemplateSettings(type="mustache", parser="prompty")
+    except Exception as e:
+        raise ValueError(f"Error in template loader: {e}")
+
+    # formalize inputs and outputs
+    if "inputs" in attributes:
+        try:
+            inputs = {
+                k: PropertySettings(**v) for (k, v) in attributes.pop("inputs").items()
+            }
+        except Exception as e:
+            raise ValueError(f"Error in inputs: {e}")
+    else:
+        inputs = {}
+    if "outputs" in attributes:
+        try:
+            outputs = {
+                k: PropertySettings(**v) for (k, v) in attributes.pop("outputs").items()
+            }
+        except Exception as e:
+            raise ValueError(f"Error in outputs: {e}")
+    else:
+        outputs = {}
+
+    # recursive loading of base prompty
+    if "base" in attributes:
+        # load the base prompty from the same directory as the current prompty
+        base = load(file_path.parent / attributes["base"])
+        # hoist the base prompty's attributes to the current prompty
+        model.api = base.model.api if model.api == "" else model.api
+        model.configuration = param_hoisting(
+            model.configuration, base.model.configuration
+        )
+        model.parameters = param_hoisting(model.parameters, base.model.parameters)
+        model.response = param_hoisting(model.response, base.model.response)
+        attributes["sample"] = param_hoisting(attributes, base.sample, "sample")
+
+        p = Prompty(
+            **attributes,
+            model=model,
+            inputs=inputs,
+            outputs=outputs,
+            template=template,
+            content=content,
+            file=file_path,
+            basePrompty=base,
+        )
+    else:
+        p = Prompty(
+            **attributes,
+            model=model,
+            inputs=inputs,
+            outputs=outputs,
+            template=template,
+            content=content,
+            file=file_path,
+        )
+    return p
+
+
+def prepare(
+    prompt: Prompty,
+    inputs: Dict[str, Any] = {},
+) -> Any:
+    """Prepare the inputs for the prompty.
+
+    Args:
+        prompt: The Prompty object.
+        inputs: The inputs to the prompty. Defaults to {}.
+
+    Returns:
+        The prepared inputs.
+    """
+    invoker = InvokerFactory()
+
+    inputs = param_hoisting(inputs, prompt.sample)
+
+    if prompt.template.type == "NOOP":
+        render = prompt.content
+    else:
+        # render
+        result = invoker(
+            "renderer",
+            prompt.template.type,
+            prompt,
+            SimpleModel(item=inputs),
+        )
+        render = result.item
+
+    if prompt.template.parser == "NOOP":
+        result = render
+    else:
+        # parse
+        result = invoker(
+            "parser",
+            f"{prompt.template.parser}.{prompt.model.api}",
+            prompt,
+            SimpleModel(item=result.item),
+        )
+
+    if isinstance(result, SimpleModel):
+        return result.item
+    else:
+        return result
+
+
+def run(
+    prompt: Prompty,
+    content: Union[Dict, List, str],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    raw: bool = False,
+) -> Any:
+    """Run the prompty.
+
+    Args:
+        prompt: The Prompty object.
+        content: The content to run the prompty on.
+        configuration: The configuration to use. Defaults to {}.
+        parameters: The parameters to use. Defaults to {}.
+        raw: Whether to return the raw output. Defaults to False.
+
+    Returns:
+        The result of running the prompty.
+    """
+    invoker = InvokerFactory()
+
+    if configuration != {}:
+        prompt.model.configuration = param_hoisting(
+            configuration, prompt.model.configuration
+        )
+
+    if parameters != {}:
+        prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters)
+
+    # execute
+    result = invoker(
+        "executor",
+        prompt.model.configuration["type"],
+        prompt,
+        SimpleModel(item=content),
+    )
+
+    # skip?
+    if not raw:
+        # process
+        result = invoker(
+            "processor",
+            prompt.model.configuration["type"],
+            prompt,
+            result,
+        )
+
+    if isinstance(result, SimpleModel):
+        return result.item
+    else:
+        return result
+
+
+def execute(
+    prompt: Union[str, Prompty],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    inputs: Dict[str, Any] = {},
+    raw: bool = False,
+    connection: str = "default",
+) -> Any:
+    """Execute a prompty.
+
+    Args:
+        prompt: The prompt to execute.
+            Can be a path to a prompty file or a Prompty object.
+        configuration: The configuration to use. Defaults to {}.
+        parameters: The parameters to use. Defaults to {}.
+        inputs: The inputs to the prompty. Defaults to {}.
+        raw: Whether to return the raw output. Defaults to False.
+        connection: The connection to use. Defaults to "default".
+
+    Returns:
+        The result of executing the prompty.
+    """
+
+    if isinstance(prompt, str):
+        prompt = load(prompt, connection)
+
+    # prepare content
+    content = prepare(prompt, inputs)
+
+    # run LLM model
+    result = run(prompt, content, configuration, parameters, raw)
+
+    return result
diff --git a/sdk/ai/azure-ai-inference/samples/sample1.prompty b/sdk/ai/azure-ai-inference/samples/sample1.prompty
new file mode 100644
index 000000000000..973445961991
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/samples/sample1.prompty
@@ -0,0 +1,30 @@
+---
+name: Basic Prompt
+description: A basic prompt that uses the GPT-3 chat API to answer questions
+authors:
+  - author_1
+  - author_2
+model:
+  api: chat
+  configuration:
+    azure_deployment: gpt-4o-mini
+  parameters:
+    temperature: 1
+    frequency_penalty: 0.5
+    presence_penalty: 0.5
+sample:
+  firstName: Jane
+  lastName: Doe
+  question: What is the meaning of life?
+  chat_history: []
+---
+system:
+You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
+
+{{#chat_history}}
+{{role}}:
+{{content}}
+{{/chat_history}}
+
+user:
+{{input}}
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
new file mode 100644
index 000000000000..33812bb9a774
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -0,0 +1,77 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""
+DESCRIPTION:
+    This sample demonstrates how to get a chat completions response from
+    the service using a synchronous client, and directly providing the 
+    input in string format.
+
+    This sample assumes the AI model is hosted on a Serverless API or
+    Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
+    the client constructor needs to be modified. See package documentation:
+    https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#key-concepts
+
+USAGE:
+    python sample_chat_completions_from_input_prompt_string.py
+
+    Set these two environment variables before running the sample:
+    1) AZURE_AI_CHAT_ENDPOINT - Your endpoint URL, in the form 
+        https://<your-deployment-name>.<your-azure-region>.models.ai.azure.com
+        where `your-deployment-name` is your unique AI Model deployment name, and
+        `your-azure-region` is the Azure region where your model is deployed.
+    2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret.
+"""
+# mypy: disable-error-code="union-attr"
+# pyright: reportAttributeAccessIssue=false
+
+
+def sample_chat_completions_from_input_prompt_string():
+    import os
+    from azure.ai.inference import ChatCompletionsClient
+    from azure.ai.inference.prompts import PromptyTemplate
+    from azure.core.credentials import AzureKeyCredential
+
+    try:
+        endpoint = os.environ["AZURE_AI_CHAT_ENDPOINT"]
+        key = os.environ["AZURE_AI_CHAT_KEY"]
+    except KeyError:
+        print("Missing environment variable 'AZURE_AI_CHAT_ENDPOINT' or 'AZURE_AI_CHAT_KEY'")
+        print("Set them before running this sample.")
+        exit()
+
+
+    prompt_template = """
+You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
+
+{input}
+"""
+    prompt_config = PromptyTemplate.from_message(
+        api = "chat",
+        model_name = "gpt-4o-mini",
+        prompt_template = prompt_template
+    )
+
+    input_variables = {
+        "input": "please tell me a joke about cats",
+    }
+
+    messages = prompt_config.render(input_variables=input_variables)
+    # messages = prompt_config.render(input_variables=input_variables, format="openai")
+
+    client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
+
+    # [START chat_completions]
+    response = client.complete(
+        {
+            "messages": messages,
+        }
+    )
+    # [END chat_completions]
+
+    print(response.choices[0].message.content)
+
+
+if __name__ == "__main__":
+    sample_chat_completions_from_input_prompt_string()
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
new file mode 100644
index 000000000000..06b689805695
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -0,0 +1,69 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""
+DESCRIPTION:
+    This sample demonstrates how to get a chat completions response from
+    the service using a synchronous client, and directly providing the 
+    input in Prompty format.
+
+    This sample assumes the AI model is hosted on a Serverless API or
+    Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
+    the client constructor needs to be modified. See package documentation:
+    https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#key-concepts
+
+USAGE:
+    python sample_chat_completions_from_input_prompty.py
+
+    Set these two environment variables before running the sample:
+    1) AZURE_AI_CHAT_ENDPOINT - Your endpoint URL, in the form 
+        https://<your-deployment-name>.<your-azure-region>.models.ai.azure.com
+        where `your-deployment-name` is your unique AI Model deployment name, and
+        `your-azure-region` is the Azure region where your model is deployed.
+    2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret.
+"""
+# mypy: disable-error-code="union-attr"
+# pyright: reportAttributeAccessIssue=false
+
+
+def sample_chat_completions_from_input_prompty():
+    import os
+    from azure.ai.inference import ChatCompletionsClient
+    from azure.ai.inference.prompts import PromptyTemplate
+    from azure.core.credentials import AzureKeyCredential
+
+    try:
+        endpoint = os.environ["AZURE_AI_CHAT_ENDPOINT"]
+        key = os.environ["AZURE_AI_CHAT_KEY"]
+    except KeyError:
+        print("Missing environment variable 'AZURE_AI_CHAT_ENDPOINT' or 'AZURE_AI_CHAT_KEY'")
+        print("Set them before running this sample.")
+        exit()
+
+
+    path = "./sample1.prompty"
+    prompt_config = PromptyTemplate.load(file_path=path)
+
+    input_variables = {
+        "input": "please tell me a joke about cats",
+    }
+
+    messages = prompt_config.render(input_variables=input_variables)
+    # messages = prompt_config.render(input_variables=input_variables, format="openai")
+
+    client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
+
+    # [START chat_completions]
+    response = client.complete(
+        messages=messages,
+        model=prompt_config.model_name,
+        **prompt_config.config,
+    )
+    # [END chat_completions]
+
+    print(response.choices[0].message.content)
+
+
+if __name__ == "__main__":
+    sample_chat_completions_from_input_prompty()
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index c264ae00239e..fdca440194fa 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -61,6 +61,7 @@
     include_package_data=True,
     package_data={
         "azure.ai.inference": ["py.typed"],
+        "azure.ai.prompty": ["py.typed"],
     },
     install_requires=[
         "isodate>=0.6.1",
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
new file mode 100644
index 000000000000..fe27fb9d9d5c
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -0,0 +1,64 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+import os
+import json
+import azure.ai.inference as sdk
+import azure.ai.inference.prompts as prompts
+
+from model_inference_test_base import (
+    ModelClientTestBase,
+    ServicePreparerChatCompletions,
+    ServicePreparerAOAIChatCompletions,
+    ServicePreparerEmbeddings,
+)
+from azure.core.pipeline.transport import RequestsTransport
+from devtools_testutils import recorded_by_proxy
+from azure.core.exceptions import AzureError, ServiceRequestError
+from azure.core.credentials import AzureKeyCredential
+
+
+
+class TestModelClient(ModelClientTestBase):
+
+    # **********************************************************************************
+    #
+    #                               UNIT TESTS
+    #
+    # **********************************************************************************
+
+    def test_prompty(self, **kwargs):
+        path = "/Users/weiwu/Workspace/1_Testing/TestAI/test-prompty/test.prompty"
+        p = prompts.load(path)
+
+        inputs = {
+            "input": "my first question",
+        }
+
+        print(p)
+
+        parsed = prompts.prepare(p, inputs)
+
+        lc_messages = [] # TODO: will be removed
+        for message in parsed:
+            message_class = prompts.RoleMap.get_message_class(message["role"])
+            lc_messages.append(message_class(content=message["content"]))
+
+        print(lc_messages)
+
+        assert True
+
+
+    def test_prompt_config(self, **kwargs):
+        path = "/Users/weiwu/Workspace/1_Testing/TestAI/test-prompty/test.prompty"
+        prompt_config = prompts.get_prompt_config(file_path=path)
+
+        inputs = {
+            "input": "my first question",
+        }
+
+        messages = prompt_config.render(inputs)
+        print(messages)
+
+        assert True

From 1e250757286ac41fd30e5fa7c75a27cc467f5062 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 15 Oct 2024 23:57:49 -0700
Subject: [PATCH 02/21] Fix unit test

---
 ...at_completions_from_input_prompt_string.py |  2 +
 .../azure-ai-inference/tests/sample1.prompty  | 30 +++++++
 .../azure-ai-inference/tests/test_prompts.py  | 82 ++++++++-----------
 3 files changed, 66 insertions(+), 48 deletions(-)
 create mode 100644 sdk/ai/azure-ai-inference/tests/sample1.prompty

diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index 33812bb9a774..9cf4794b47de 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -43,8 +43,10 @@ def sample_chat_completions_from_input_prompt_string():
 
 
     prompt_template = """
+system:
 You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
 
+user:
 {input}
 """
     prompt_config = PromptyTemplate.from_message(
diff --git a/sdk/ai/azure-ai-inference/tests/sample1.prompty b/sdk/ai/azure-ai-inference/tests/sample1.prompty
new file mode 100644
index 000000000000..973445961991
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/tests/sample1.prompty
@@ -0,0 +1,30 @@
+---
+name: Basic Prompt
+description: A basic prompt that uses the GPT-3 chat API to answer questions
+authors:
+  - author_1
+  - author_2
+model:
+  api: chat
+  configuration:
+    azure_deployment: gpt-4o-mini
+  parameters:
+    temperature: 1
+    frequency_penalty: 0.5
+    presence_penalty: 0.5
+sample:
+  firstName: Jane
+  lastName: Doe
+  question: What is the meaning of life?
+  chat_history: []
+---
+system:
+You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
+
+{{#chat_history}}
+{{role}}:
+{{content}}
+{{/chat_history}}
+
+user:
+{{input}}
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index fe27fb9d9d5c..6fca351cbc17 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -3,24 +3,11 @@
 # Licensed under the MIT License.
 # ------------------------------------
 import os
-import json
-import azure.ai.inference as sdk
-import azure.ai.inference.prompts as prompts
+from azure.ai.inference.prompts import PromptyTemplate
+from devtools_testutils import AzureRecordedTestCase
 
-from model_inference_test_base import (
-    ModelClientTestBase,
-    ServicePreparerChatCompletions,
-    ServicePreparerAOAIChatCompletions,
-    ServicePreparerEmbeddings,
-)
-from azure.core.pipeline.transport import RequestsTransport
-from devtools_testutils import recorded_by_proxy
-from azure.core.exceptions import AzureError, ServiceRequestError
-from azure.core.credentials import AzureKeyCredential
 
-
-
-class TestModelClient(ModelClientTestBase):
+class TestPrompts(AzureRecordedTestCase):
 
     # **********************************************************************************
     #
@@ -28,37 +15,36 @@ class TestModelClient(ModelClientTestBase):
     #
     # **********************************************************************************
 
-    def test_prompty(self, **kwargs):
-        path = "/Users/weiwu/Workspace/1_Testing/TestAI/test-prompty/test.prompty"
-        p = prompts.load(path)
-
-        inputs = {
-            "input": "my first question",
+    def test_prompt_config_from_prompty(self, **kwargs):
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        prompty_file_path = os.path.join(script_dir, "sample1.prompty")
+        prompt_config = PromptyTemplate.load(prompty_file_path)
+        assert prompt_config.model_name == "gpt-4o-mini"
+        assert prompt_config.config["temperature"] == 1
+        assert prompt_config.config["frequency_penalty"] == 0.5
+        assert prompt_config.config["presence_penalty"] == 0.5
+        input_variables = {
+            "input": "please tell me a joke about cats",
         }
-
-        print(p)
-
-        parsed = prompts.prepare(p, inputs)
-
-        lc_messages = [] # TODO: will be removed
-        for message in parsed:
-            message_class = prompts.RoleMap.get_message_class(message["role"])
-            lc_messages.append(message_class(content=message["content"]))
-
-        print(lc_messages)
-
-        assert True
-
-
-    def test_prompt_config(self, **kwargs):
-        path = "/Users/weiwu/Workspace/1_Testing/TestAI/test-prompty/test.prompty"
-        prompt_config = prompts.get_prompt_config(file_path=path)
-
-        inputs = {
-            "input": "my first question",
+        messages = prompt_config.render(input_variables=input_variables)
+        assert len(messages) == 2
+        assert messages[0]["role"] == "system"
+        assert messages[1]["role"] == "user"
+        assert messages[1]["content"] == "please tell me a joke about cats"
+
+    def test_prompt_config_from_message(self, **kwargs):
+        prompt_config = PromptyTemplate.from_message(
+            api = "chat",
+            model_name = "gpt-4o-mini",
+            prompt_template = "system prompt template {input}"
+        )
+        assert prompt_config.model_name == "gpt-4o-mini"
+        input_variables = {
+            "input": "please tell me a joke about cats",
         }
-
-        messages = prompt_config.render(inputs)
-        print(messages)
-
-        assert True
+        messages = prompt_config.render(input_variables=input_variables)
+        assert len(messages) == 1
+        assert messages[0]["role"] == "system"
+        # TODO: need to separate the system prompt from the user input
+        # assert messages[1]["role"] == "user"
+        # assert messages[1]["content"] == "please tell me a joke about cats"

From ffeaab821908b071207ec69a1338707c3194a53a Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Fri, 18 Oct 2024 14:37:14 -0700
Subject: [PATCH 03/21] Address PR feedback with copyright, merge PromptConfig
 to PromptTemplate

---
 .../azure/ai/inference/prompts/__init__.py    |  4 +-
 .../azure/ai/inference/prompts/_patch.py      | 57 +++++++------------
 .../azure/ai/inference/prompts/core.py        |  4 ++
 .../azure/ai/inference/prompts/mustache.py    |  4 ++
 .../azure/ai/inference/prompts/parsers.py     |  4 ++
 .../azure/ai/inference/prompts/renderers.py   |  4 ++
 .../azure/ai/inference/prompts/utils.py       |  4 ++
 ...at_completions_from_input_prompt_string.py | 23 ++++----
 ...ple_chat_completions_from_input_prompty.py | 20 +++----
 .../azure-ai-inference/tests/test_prompts.py  | 27 ++++-----
 10 files changed, 73 insertions(+), 78 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
index aee79879c6fe..87a0ca05574f 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
@@ -8,7 +8,7 @@
 from .renderers import MustacheRenderer
 from .parsers import PromptyChatParser
 from .utils import load
-from ._patch import patch_sdk as _patch_sdk, PromptyTemplate
+from ._patch import patch_sdk as _patch_sdk, PromptTemplate
 
 # Register the Mustache renderer and parser
 InvokerFactory().register_renderer("mustache", MustacheRenderer)
@@ -17,7 +17,7 @@
 __all__ = [
     "load",
     "Prompty",
-    "PromptyTemplate",
+    "PromptTemplate",
 ]
 
 _patch_sdk()
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 1dbaff7a0c09..628fcefb2686 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -7,14 +7,27 @@
 Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
 """
 
-from azure.ai.inference.models import ChatRequestMessage, SystemMessage
 import azure.ai.inference.prompts as prompts
 from .core import Prompty
 from .utils import prepare
-from .parsers import RoleMap
 
 
-class PromptConfig:
+class PromptTemplate:
+    @staticmethod
+    def from_prompty(file_path: str):
+        if not file_path:
+            raise ValueError("Please provide file_path")
+        prompty = prompts.load(file_path)
+        return PromptTemplate(prompty=prompty)        
+    
+    @staticmethod
+    def from_message(
+        model_name: str,
+        prompt_template: str,
+        api: str = "chat"
+    ):
+        return PromptTemplate(api=api, prompt_template=prompt_template, model_name=model_name, prompty=None)
+
     def __init__(
             self,
             prompty: Prompty | None = None,
@@ -38,45 +51,13 @@ def __init__(
         else:
             raise ValueError("Please invalid arguments for PromptConfig")
 
-    def render(self, input_variables: dict[str, any], format: str = "inference_sdk") -> list[ChatRequestMessage]:
+    def render(self, input_variables: dict[str, any]):
         if self.prompty is not None:
             parsed = prepare(self.prompty, input_variables)
-            if format == "inference_sdk":
-                messages = []
-                for message in parsed:
-                    message_class = RoleMap.get_message_class(message["role"])
-                    messages.append(message_class(content=message["content"]))
-                return messages
-            elif format == "openai":
-                return parsed
-            else:
-                raise ValueError("Invalid message format")
-
+            return parsed
         elif "prompt_template" in self._parameters:
             system_prompt = self._parameters["prompt_template"].format(**input_variables)
-            if format == "inference_sdk":
-                return [SystemMessage(content=system_prompt)]
-            elif format == "openai":
-                return [{"role": "system", "content": system_prompt}]
-            else:
-                raise ValueError("Invalid message format")
-
-
-class PromptyTemplate:
-    @staticmethod
-    def load(file_path: str) -> PromptConfig:
-        if not file_path:
-            raise ValueError("Please provide file_path")
-        prompty = prompts.load(file_path)
-        return PromptConfig(prompty=prompty)        
-    
-    @staticmethod
-    def from_message(
-        model_name: str,
-        prompt_template: str,
-        api: str = "chat"
-    ) -> PromptConfig:
-        return PromptConfig(api=api, prompt_template=prompt_template, model_name=model_name, prompty=None)
+            return [{"role": "system", "content": system_prompt}]
 
 
 def patch_sdk():
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py
index f570be7c13cf..0f3a39f8b319 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py
@@ -1,3 +1,7 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
 """
 Adapted from https://github.com/langchain-ai/langchain
 MIT License
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py
index ce175045e7a9..165d881b6807 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py
@@ -1,3 +1,7 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
 """
 Adapted from https://github.com/langchain-ai/langchain and https://github.com/noahmorrison/chevron
 MIT License
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py
index 254b503d90f2..54f793d87f1b 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py
@@ -1,3 +1,7 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
 """
 Adapted from https://github.com/langchain-ai/langchain
 MIT License
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py
index aeaa09defa0e..55d46f5d029c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py
@@ -1,3 +1,7 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
 """
 Adapted from https://github.com/langchain-ai/langchain
 MIT License
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
index bc7abbcc7c7f..5c3b63d8839c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
@@ -1,3 +1,7 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
 """
 Adapted from https://github.com/langchain-ai/langchain
 MIT License
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index 9cf4794b47de..9b53d244fccd 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -30,7 +30,7 @@
 def sample_chat_completions_from_input_prompt_string():
     import os
     from azure.ai.inference import ChatCompletionsClient
-    from azure.ai.inference.prompts import PromptyTemplate
+    from azure.ai.inference.prompts import PromptTemplate
     from azure.core.credentials import AzureKeyCredential
 
     try:
@@ -42,35 +42,32 @@ def sample_chat_completions_from_input_prompt_string():
         exit()
 
 
-    prompt_template = """
-system:
-You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
+    prompt_template_str = """
+        system:
+        You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
 
-user:
-{input}
-"""
-    prompt_config = PromptyTemplate.from_message(
+        user:
+        {input}
+    """
+    prompt_template = PromptTemplate.from_message(
         api = "chat",
         model_name = "gpt-4o-mini",
-        prompt_template = prompt_template
+        prompt_template = prompt_template_str
     )
 
     input_variables = {
         "input": "please tell me a joke about cats",
     }
 
-    messages = prompt_config.render(input_variables=input_variables)
-    # messages = prompt_config.render(input_variables=input_variables, format="openai")
+    messages = prompt_template.render(input_variables=input_variables)
 
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
 
-    # [START chat_completions]
     response = client.complete(
         {
             "messages": messages,
         }
     )
-    # [END chat_completions]
 
     print(response.choices[0].message.content)
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index 06b689805695..2114f5458965 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -6,7 +6,7 @@
 DESCRIPTION:
     This sample demonstrates how to get a chat completions response from
     the service using a synchronous client, and directly providing the 
-    input in Prompty format.
+    input in Prompty format from a Prompty file. Prompty website: https://prompty.ai
 
     This sample assumes the AI model is hosted on a Serverless API or
     Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
@@ -30,7 +30,7 @@
 def sample_chat_completions_from_input_prompty():
     import os
     from azure.ai.inference import ChatCompletionsClient
-    from azure.ai.inference.prompts import PromptyTemplate
+    from azure.ai.inference.prompts import PromptTemplate
     from azure.core.credentials import AzureKeyCredential
 
     try:
@@ -43,24 +43,24 @@ def sample_chat_completions_from_input_prompty():
 
 
     path = "./sample1.prompty"
-    prompt_config = PromptyTemplate.load(file_path=path)
+    prompt_template = PromptTemplate.from_prompty(file_path=path)
 
     input_variables = {
         "input": "please tell me a joke about cats",
     }
 
-    messages = prompt_config.render(input_variables=input_variables)
-    # messages = prompt_config.render(input_variables=input_variables, format="openai")
+    messages = prompt_template.render(input_variables=input_variables)
 
-    client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
+    client = ChatCompletionsClient(
+        endpoint=endpoint,
+        credential=AzureKeyCredential(key)
+    )
 
-    # [START chat_completions]
     response = client.complete(
         messages=messages,
-        model=prompt_config.model_name,
-        **prompt_config.config,
+        model=prompt_template.model_name,
+        **prompt_template.config,
     )
-    # [END chat_completions]
 
     print(response.choices[0].message.content)
 
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index 6fca351cbc17..b3317861afb0 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -3,7 +3,7 @@
 # Licensed under the MIT License.
 # ------------------------------------
 import os
-from azure.ai.inference.prompts import PromptyTemplate
+from azure.ai.inference.prompts import PromptTemplate
 from devtools_testutils import AzureRecordedTestCase
 
 
@@ -15,36 +15,33 @@ class TestPrompts(AzureRecordedTestCase):
     #
     # **********************************************************************************
 
-    def test_prompt_config_from_prompty(self, **kwargs):
+    def test_prompt_template_from_prompty(self, **kwargs):
         script_dir = os.path.dirname(os.path.abspath(__file__))
         prompty_file_path = os.path.join(script_dir, "sample1.prompty")
-        prompt_config = PromptyTemplate.load(prompty_file_path)
-        assert prompt_config.model_name == "gpt-4o-mini"
-        assert prompt_config.config["temperature"] == 1
-        assert prompt_config.config["frequency_penalty"] == 0.5
-        assert prompt_config.config["presence_penalty"] == 0.5
+        prompt_template = PromptTemplate.from_prompty(prompty_file_path)
+        assert prompt_template.model_name == "gpt-4o-mini"
+        assert prompt_template.config["temperature"] == 1
+        assert prompt_template.config["frequency_penalty"] == 0.5
+        assert prompt_template.config["presence_penalty"] == 0.5
         input_variables = {
             "input": "please tell me a joke about cats",
         }
-        messages = prompt_config.render(input_variables=input_variables)
+        messages = prompt_template.render(input_variables=input_variables)
         assert len(messages) == 2
         assert messages[0]["role"] == "system"
         assert messages[1]["role"] == "user"
         assert messages[1]["content"] == "please tell me a joke about cats"
 
-    def test_prompt_config_from_message(self, **kwargs):
-        prompt_config = PromptyTemplate.from_message(
+    def test_prompt_template_from_message(self, **kwargs):
+        prompt_template = PromptTemplate.from_message(
             api = "chat",
             model_name = "gpt-4o-mini",
             prompt_template = "system prompt template {input}"
         )
-        assert prompt_config.model_name == "gpt-4o-mini"
+        assert prompt_template.model_name == "gpt-4o-mini"
         input_variables = {
             "input": "please tell me a joke about cats",
         }
-        messages = prompt_config.render(input_variables=input_variables)
+        messages = prompt_template.render(input_variables=input_variables)
         assert len(messages) == 1
         assert messages[0]["role"] == "system"
-        # TODO: need to separate the system prompt from the user input
-        # assert messages[1]["role"] == "user"
-        # assert messages[1]["content"] == "please tell me a joke about cats"

From 44d2f2c847d2b091ed5f67feedbe3df310819f10 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Fri, 18 Oct 2024 16:16:07 -0700
Subject: [PATCH 04/21] Add comment and set model_name as optional

---
 .../azure/ai/inference/prompts/_patch.py       | 18 +++++++++++++++---
 ...hat_completions_from_input_prompt_string.py |  1 -
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 628fcefb2686..6ebc17ab654c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -13,6 +13,18 @@
 
 
 class PromptTemplate:
+    """The helper class which takes varient of inputs, e.g. Prompty format or string, and returns the parsed prompt in an array.
+
+    :param prompty: Prompty object which contains both model config and prompt template.
+    :type prompty: Prompty
+    :param prompt_template: The prompt template string.
+    :type prompt_template: str
+    :param api: The API type, e.g. "chat" or "completion".
+    :type api: str
+    :param model_name: The model name, e.g. "gpt-4o-mini".
+    :type model_name: str
+    """
+
     @staticmethod
     def from_prompty(file_path: str):
         if not file_path:
@@ -22,9 +34,9 @@ def from_prompty(file_path: str):
     
     @staticmethod
     def from_message(
-        model_name: str,
         prompt_template: str,
-        api: str = "chat"
+        api: str = "chat",
+        model_name: str | None = None
     ):
         return PromptTemplate(api=api, prompt_template=prompt_template, model_name=model_name, prompty=None)
 
@@ -40,7 +52,7 @@ def __init__(
             self.model_name = prompty.model.configuration["azure_deployment"]
             self.config = prompty.model.parameters
             self._parameters = {}
-        elif prompt_template is not None and model_name is not None:
+        elif prompt_template is not None:
             self.model_name = model_name
             self.config = {}
             # _parameters is a dict to hold the internal configuration
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index 9b53d244fccd..6f56ef8d65d7 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -51,7 +51,6 @@ def sample_chat_completions_from_input_prompt_string():
     """
     prompt_template = PromptTemplate.from_message(
         api = "chat",
-        model_name = "gpt-4o-mini",
         prompt_template = prompt_template_str
     )
 

From 2d1d132f1126b5b313b3178331466241b2edc50a Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 22 Oct 2024 08:48:57 -0700
Subject: [PATCH 05/21] Bug fixes

---
 .../azure/ai/inference/prompts/_patch.py      |  5 +++--
 .../azure/ai/inference/prompts/utils.py       |  2 +-
 .../samples/sample1.prompty                   | 10 +++++-----
 ...at_completions_from_input_prompt_string.py | 19 +++++++++++++++++--
 ...ple_chat_completions_from_input_prompty.py |  7 ++++++-
 5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 6ebc17ab654c..5b90e8860e67 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -10,6 +10,7 @@
 import azure.ai.inference.prompts as prompts
 from .core import Prompty
 from .utils import prepare
+from .mustache import render
 
 
 class PromptTemplate:
@@ -49,7 +50,7 @@ def __init__(
     ) -> None:
         self.prompty = prompty
         if self.prompty is not None:
-            self.model_name = prompty.model.configuration["azure_deployment"]
+            self.model_name = prompty.model.configuration["azure_deployment"] if "azure_deployment" in prompty.model.configuration else None
             self.config = prompty.model.parameters
             self._parameters = {}
         elif prompt_template is not None:
@@ -68,7 +69,7 @@ def render(self, input_variables: dict[str, any]):
             parsed = prepare(self.prompty, input_variables)
             return parsed
         elif "prompt_template" in self._parameters:
-            system_prompt = self._parameters["prompt_template"].format(**input_variables)
+            system_prompt = render(self._parameters["prompt_template"], input_variables)
             return [{"role": "system", "content": system_prompt}]
 
 
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
index 5c3b63d8839c..30add215d8b0 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
@@ -48,7 +48,7 @@ def load(prompt_path: str, configuration: str = "default") -> Prompty:
 
     # load global configuration
     if "model" not in attributes:
-        attributes["model"] = {}
+        attributes["model"] = { "api": "chat" }
 
     # pull model settings out of attributes
     try:
diff --git a/sdk/ai/azure-ai-inference/samples/sample1.prompty b/sdk/ai/azure-ai-inference/samples/sample1.prompty
index 973445961991..6dbcbf40bc6f 100644
--- a/sdk/ai/azure-ai-inference/samples/sample1.prompty
+++ b/sdk/ai/azure-ai-inference/samples/sample1.prompty
@@ -12,15 +12,15 @@ model:
     temperature: 1
     frequency_penalty: 0.5
     presence_penalty: 0.5
-sample:
-  firstName: Jane
-  lastName: Doe
-  question: What is the meaning of life?
-  chat_history: []
 ---
 system:
 You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
 
+# context
+{{#rules}}
+{{rule}}
+{{/rules}}
+
 {{#chat_history}}
 {{role}}:
 {{content}}
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index 6f56ef8d65d7..b6dbe73ffd89 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -46,8 +46,18 @@ def sample_chat_completions_from_input_prompt_string():
         system:
         You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
 
+        # context
+        {{#rules}}
+        {{rule}}
+        {{/rules}}
+
+        {{#chat_history}}
+        {{role}}:
+        {{content}}
+        {{/chat_history}}
+
         user:
-        {input}
+        {{input}}
     """
     prompt_template = PromptTemplate.from_message(
         api = "chat",
@@ -55,7 +65,12 @@ def sample_chat_completions_from_input_prompt_string():
     )
 
     input_variables = {
-        "input": "please tell me a joke about cats",
+        "input": "What's the checkin and checkout time?",
+        "rules": [
+            { "rule": "The checkin time is 3pm" },
+            { "rule": "The checkout time is 11am" },
+            { "rule": "Breakfast is served from 7am to 10am" },
+        ],
     }
 
     messages = prompt_template.render(input_variables=input_variables)
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index 2114f5458965..030ed1926e95 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -46,7 +46,12 @@ def sample_chat_completions_from_input_prompty():
     prompt_template = PromptTemplate.from_prompty(file_path=path)
 
     input_variables = {
-        "input": "please tell me a joke about cats",
+        "input": "What's the checkin and checkout time?",
+        "rules": [
+            { "rule": "The checkin time is 3pm" },
+            { "rule": "The checkout time is 11am" },
+            { "rule": "Breakfast is served from 7am to 10am" },
+        ],
     }
 
     messages = prompt_template.render(input_variables=input_variables)

From 9f7b67970ea8ae75290386e733410d4e5d5e9fe6 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 22 Oct 2024 09:13:17 -0700
Subject: [PATCH 06/21] Updated parameter names from PM feedbacks

---
 .../azure/ai/inference/prompts/_patch.py      | 13 ++++++----
 ...at_completions_from_input_prompt_string.py | 24 +++++++------------
 ...ple_chat_completions_from_input_prompty.py | 20 +++++++---------
 3 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 5b90e8860e67..b331e030bdc0 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -51,11 +51,11 @@ def __init__(
         self.prompty = prompty
         if self.prompty is not None:
             self.model_name = prompty.model.configuration["azure_deployment"] if "azure_deployment" in prompty.model.configuration else None
-            self.config = prompty.model.parameters
+            self.parameters = prompty.model.parameters
             self._parameters = {}
         elif prompt_template is not None:
             self.model_name = model_name
-            self.config = {}
+            self.parameters = {}
             # _parameters is a dict to hold the internal configuration
             self._parameters = {
                 "api": api if api is not None else "chat",
@@ -64,12 +64,15 @@ def __init__(
         else:
             raise ValueError("Please invalid arguments for PromptConfig")
 
-    def render(self, input_variables: dict[str, any]):
+    def render(self, data: dict[str, any] | None = None, **kwargs):
+        if data is None:
+            data = kwargs
+
         if self.prompty is not None:
-            parsed = prepare(self.prompty, input_variables)
+            parsed = prepare(self.prompty, data)
             return parsed
         elif "prompt_template" in self._parameters:
-            system_prompt = render(self._parameters["prompt_template"], input_variables)
+            system_prompt = render(self._parameters["prompt_template"], data)
             return [{"role": "system", "content": system_prompt}]
 
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index b6dbe73ffd89..2c961b529c7e 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -64,24 +64,16 @@ def sample_chat_completions_from_input_prompt_string():
         prompt_template = prompt_template_str
     )
 
-    input_variables = {
-        "input": "What's the checkin and checkout time?",
-        "rules": [
-            { "rule": "The checkin time is 3pm" },
-            { "rule": "The checkout time is 11am" },
-            { "rule": "Breakfast is served from 7am to 10am" },
-        ],
-    }
-
-    messages = prompt_template.render(input_variables=input_variables)
+    input = "What's the checkin and checkout time?"
+    rules = [
+        { "rule": "The checkin time is 3pm" },
+        { "rule": "The checkout time is 11am" },
+        { "rule": "Breakfast is served from 7am to 10am" },
+    ]
+    messages = prompt_template.render(input=input, rules=rules)
 
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
-
-    response = client.complete(
-        {
-            "messages": messages,
-        }
-    )
+    response = client.complete(messages=messages)
 
     print(response.choices[0].message.content)
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index 030ed1926e95..9d6b109350af 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -41,20 +41,16 @@ def sample_chat_completions_from_input_prompty():
         print("Set them before running this sample.")
         exit()
 
-
     path = "./sample1.prompty"
     prompt_template = PromptTemplate.from_prompty(file_path=path)
 
-    input_variables = {
-        "input": "What's the checkin and checkout time?",
-        "rules": [
-            { "rule": "The checkin time is 3pm" },
-            { "rule": "The checkout time is 11am" },
-            { "rule": "Breakfast is served from 7am to 10am" },
-        ],
-    }
-
-    messages = prompt_template.render(input_variables=input_variables)
+    input = "What's the checkin and checkout time?"
+    rules = [
+        { "rule": "The checkin time is 3pm" },
+        { "rule": "The checkout time is 11am" },
+        { "rule": "Breakfast is served from 7am to 10am" },
+    ]
+    messages = prompt_template.render(input=input, rules=rules)
 
     client = ChatCompletionsClient(
         endpoint=endpoint,
@@ -64,7 +60,7 @@ def sample_chat_completions_from_input_prompty():
     response = client.complete(
         messages=messages,
         model=prompt_template.model_name,
-        **prompt_template.config,
+        **prompt_template.parameters,
     )
 
     print(response.choices[0].message.content)

From 38eb25833261b71cae2b2e64583ff3318eb914ba Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Mon, 28 Oct 2024 11:29:00 -0700
Subject: [PATCH 07/21] Improve sample code and unit tests

---
 ...at_completions_from_input_prompt_string.py |  8 ++-
 ...ple_chat_completions_from_input_prompty.py |  8 ++-
 sdk/ai/azure-ai-inference/setup.py            |  1 -
 .../azure-ai-inference/tests/sample1.prompty  | 10 +--
 .../azure-ai-inference/tests/test_prompts.py  | 71 +++++++++++++++----
 5 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index 2c961b529c7e..a7b6f158134f 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -64,13 +64,17 @@ def sample_chat_completions_from_input_prompt_string():
         prompt_template = prompt_template_str
     )
 
-    input = "What's the checkin and checkout time?"
+    input = "When I arrived, can I still have breakfast?"
     rules = [
         { "rule": "The checkin time is 3pm" },
         { "rule": "The checkout time is 11am" },
         { "rule": "Breakfast is served from 7am to 10am" },
     ]
-    messages = prompt_template.render(input=input, rules=rules)
+    chat_history = [
+        { "role": "user", "content": "I'll arrive at 2pm. What's the checkin and checkout time?" },
+        { "role": "system", "content": "The check-in time is 3 PM, and the checkout time is 11 AM." },
+    ]
+    messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
 
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
     response = client.complete(messages=messages)
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index 9d6b109350af..714c6f293581 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -44,13 +44,17 @@ def sample_chat_completions_from_input_prompty():
     path = "./sample1.prompty"
     prompt_template = PromptTemplate.from_prompty(file_path=path)
 
-    input = "What's the checkin and checkout time?"
+    input = "When I arrived, can I still have breakfast?"
     rules = [
         { "rule": "The checkin time is 3pm" },
         { "rule": "The checkout time is 11am" },
         { "rule": "Breakfast is served from 7am to 10am" },
     ]
-    messages = prompt_template.render(input=input, rules=rules)
+    chat_history = [
+        { "role": "user", "content": "I'll arrive at 2pm. What's the checkin and checkout time?" },
+        { "role": "system", "content": "The check-in time is 3 PM, and the checkout time is 11 AM." },
+    ]
+    messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
 
     client = ChatCompletionsClient(
         endpoint=endpoint,
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index fdca440194fa..c264ae00239e 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -61,7 +61,6 @@
     include_package_data=True,
     package_data={
         "azure.ai.inference": ["py.typed"],
-        "azure.ai.prompty": ["py.typed"],
     },
     install_requires=[
         "isodate>=0.6.1",
diff --git a/sdk/ai/azure-ai-inference/tests/sample1.prompty b/sdk/ai/azure-ai-inference/tests/sample1.prompty
index 973445961991..6dbcbf40bc6f 100644
--- a/sdk/ai/azure-ai-inference/tests/sample1.prompty
+++ b/sdk/ai/azure-ai-inference/tests/sample1.prompty
@@ -12,15 +12,15 @@ model:
     temperature: 1
     frequency_penalty: 0.5
     presence_penalty: 0.5
-sample:
-  firstName: Jane
-  lastName: Doe
-  question: What is the meaning of life?
-  chat_history: []
 ---
 system:
 You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
 
+# context
+{{#rules}}
+{{rule}}
+{{/rules}}
+
 {{#chat_history}}
 {{role}}:
 {{content}}
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index b3317861afb0..644bffb0c1b2 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -20,28 +20,69 @@ def test_prompt_template_from_prompty(self, **kwargs):
         prompty_file_path = os.path.join(script_dir, "sample1.prompty")
         prompt_template = PromptTemplate.from_prompty(prompty_file_path)
         assert prompt_template.model_name == "gpt-4o-mini"
-        assert prompt_template.config["temperature"] == 1
-        assert prompt_template.config["frequency_penalty"] == 0.5
-        assert prompt_template.config["presence_penalty"] == 0.5
-        input_variables = {
-            "input": "please tell me a joke about cats",
-        }
-        messages = prompt_template.render(input_variables=input_variables)
+        assert prompt_template.parameters["temperature"] == 1
+        assert prompt_template.parameters["frequency_penalty"] == 0.5
+        assert prompt_template.parameters["presence_penalty"] == 0.5
+
+        input = "What's the checkin and checkout time?"
+        rules = [
+            { "rule": "The checkin time is 3pm" },
+            { "rule": "The checkout time is 11am" },
+            { "rule": "Breakfast is served from 7am to 10am" },
+        ]
+        messages = prompt_template.render(input=input, rules=rules)
         assert len(messages) == 2
         assert messages[0]["role"] == "system"
+        assert "Breakfast is served from 7am to 10am" in messages[0]["content"]
         assert messages[1]["role"] == "user"
-        assert messages[1]["content"] == "please tell me a joke about cats"
+        assert messages[1]["content"] == "What's the checkin and checkout time?"
 
     def test_prompt_template_from_message(self, **kwargs):
+        prompt_template_str = "system prompt template text\nuser:\n{{input}}"
         prompt_template = PromptTemplate.from_message(
             api = "chat",
-            model_name = "gpt-4o-mini",
-            prompt_template = "system prompt template {input}"
+            prompt_template = prompt_template_str
         )
-        assert prompt_template.model_name == "gpt-4o-mini"
-        input_variables = {
-            "input": "please tell me a joke about cats",
-        }
-        messages = prompt_template.render(input_variables=input_variables)
+        input = "user question input text"
+        messages = prompt_template.render(input=input)
+        assert len(messages) == 1
+        assert messages[0]["role"] == "system"
+        assert "system prompt template text\nuser:\nuser question input text" == messages[0]["content"]
+
+    def test_prompt_template_from_message_with_tags(self, **kwargs):
+        prompt_template_str = """
+            system:
+            You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
+
+            # context
+            {{#rules}}
+            {{rule}}
+            {{/rules}}
+
+            {{#chat_history}}
+            {{role}}:
+            {{content}}
+            {{/chat_history}}
+
+            user:
+            {{input}}
+        """
+        prompt_template = PromptTemplate.from_message(
+            api = "chat",
+            prompt_template = prompt_template_str
+        )
+        input = "When I arrived, can I still have breakfast?"
+        rules = [
+            { "rule": "The checkin time is 3pm" },
+            { "rule": "The checkout time is 11am" },
+            { "rule": "Breakfast is served from 7am to 10am" },
+        ]
+        chat_history = [
+            { "role": "user", "content": "I'll arrive at 2pm. What's the checkin and checkout time?" },
+            { "role": "system", "content": "The check-in time is 3 PM, and the checkout time is 11 AM." },
+        ]
+        messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
         assert len(messages) == 1
         assert messages[0]["role"] == "system"
+        assert "You are an AI assistant in a hotel." in messages[0]["content"]
+        assert "When I arrived, can I still have breakfast?" in messages[0]["content"]

From aa28df45cb79f1924835fa97fcd058d15fd81039 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Mon, 28 Oct 2024 11:53:23 -0700
Subject: [PATCH 08/21] Update readme and comments

---
 sdk/ai/azure-ai-inference/samples/README.md                   | 2 ++
 .../sample_chat_completions_from_input_prompt_string.py       | 4 ++--
 .../samples/sample_chat_completions_from_input_prompty.py     | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/samples/README.md b/sdk/ai/azure-ai-inference/samples/README.md
index b6704138e4b2..6054eaad5440 100644
--- a/sdk/ai/azure-ai-inference/samples/README.md
+++ b/sdk/ai/azure-ai-inference/samples/README.md
@@ -99,6 +99,8 @@ similarly for the other samples.
 |[sample_chat_completions_from_input_bytes.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_bytes.py) | One chat completion operation using a synchronous client, with input messages provided as `IO[bytes]`. |
 |[sample_chat_completions_from_input_json.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`) |
 |[sample_chat_completions_from_input_json_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_image_url.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image URL. |
+|[sample_chat_completions_from_input_prompt_string.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py) | One chat completion operation using a synchronous client, with input message template in string format. |
+|[sample_chat_completions_from_input_prompty.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py) | One chat completion operation using a synchronous client, with the input in Prompty format from a Prompty file. Prompty website: https://prompty.ai |
 |[sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) | Shows how do use a tool (function) in chat completions, for an AI model that supports tools |
 |[sample_chat_completions_streaming_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py) | Shows how do use a tool (function) in chat completions, with streaming response, for an AI model that supports tools |
 |[sample_load_client.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_load_client.py) | Shows how do use the function `load_client` to create the appropriate synchronous client based on the provided endpoint URL. In this example, it creates a synchronous `ChatCompletionsClient`. |
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index a7b6f158134f..c1392cfe78b5 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -5,8 +5,8 @@
 """
 DESCRIPTION:
     This sample demonstrates how to get a chat completions response from
-    the service using a synchronous client, and directly providing the 
-    input in string format.
+    the service using a synchronous client, with input message template
+    in string format.
 
     This sample assumes the AI model is hosted on a Serverless API or
     Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index 714c6f293581..82fd26ea6027 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -5,8 +5,8 @@
 """
 DESCRIPTION:
     This sample demonstrates how to get a chat completions response from
-    the service using a synchronous client, and directly providing the 
-    input in Prompty format from a Prompty file. Prompty website: https://prompty.ai
+    the service using a synchronous client, with the input in Prompty format
+    from a Prompty file. Prompty website: https://prompty.ai
 
     This sample assumes the AI model is hosted on a Serverless API or
     Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,

From 9a1eb7904b7de1ed4e8570d033c17cb8be92eb3c Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Mon, 28 Oct 2024 12:34:37 -0700
Subject: [PATCH 09/21] Rename files

---
 .../azure/ai/inference/prompts/__init__.py             | 10 +++++-----
 .../azure/ai/inference/prompts/{core.py => _core.py}   |  0
 .../ai/inference/prompts/{mustache.py => _mustache.py} |  0
 .../ai/inference/prompts/{parsers.py => _parsers.py}   |  2 +-
 .../azure/ai/inference/prompts/_patch.py               |  6 +++---
 .../inference/prompts/{renderers.py => _renderers.py}  |  4 ++--
 .../azure/ai/inference/prompts/{utils.py => _utils.py} |  2 +-
 .../azure/ai/inference/prompts/py.typed                |  1 -
 8 files changed, 12 insertions(+), 13 deletions(-)
 rename sdk/ai/azure-ai-inference/azure/ai/inference/prompts/{core.py => _core.py} (100%)
 rename sdk/ai/azure-ai-inference/azure/ai/inference/prompts/{mustache.py => _mustache.py} (100%)
 rename sdk/ai/azure-ai-inference/azure/ai/inference/prompts/{parsers.py => _parsers.py} (99%)
 rename sdk/ai/azure-ai-inference/azure/ai/inference/prompts/{renderers.py => _renderers.py} (88%)
 rename sdk/ai/azure-ai-inference/azure/ai/inference/prompts/{utils.py => _utils.py} (99%)
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/py.typed

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
index 87a0ca05574f..a3dd95ba1b09 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
@@ -2,12 +2,12 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-from .core import InvokerFactory
-from .core import Prompty
+from ._core import InvokerFactory
+from ._core import Prompty
 
-from .renderers import MustacheRenderer
-from .parsers import PromptyChatParser
-from .utils import load
+from ._renderers import MustacheRenderer
+from ._parsers import PromptyChatParser
+from ._utils import load
 from ._patch import patch_sdk as _patch_sdk, PromptTemplate
 
 # Register the Mustache renderer and parser
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
similarity index 100%
rename from sdk/ai/azure-ai-inference/azure/ai/inference/prompts/core.py
rename to sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
similarity index 100%
rename from sdk/ai/azure-ai-inference/azure/ai/inference/prompts/mustache.py
rename to sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
similarity index 99%
rename from sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py
rename to sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
index 54f793d87f1b..811913e4a9ef 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/parsers.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
@@ -11,7 +11,7 @@
 from typing import Dict, List, Type, Union
 from pydantic import BaseModel
 from azure.ai.inference.models import ChatRequestMessage, SystemMessage, UserMessage, AssistantMessage, ToolMessage
-from .core import Invoker, Prompty, SimpleModel
+from ._core import Invoker, Prompty, SimpleModel
 
 
 class RoleMap:
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index b331e030bdc0..27a0042ff5e5 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -8,9 +8,9 @@
 """
 
 import azure.ai.inference.prompts as prompts
-from .core import Prompty
-from .utils import prepare
-from .mustache import render
+from ._core import Prompty
+from ._utils import prepare
+from ._mustache import render
 
 
 class PromptTemplate:
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
similarity index 88%
rename from sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py
rename to sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
index 55d46f5d029c..31af7d10538e 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/renderers.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
@@ -7,9 +7,9 @@
 MIT License
 """
 from pydantic import BaseModel
-from .mustache import render
+from ._mustache import render
 
-from .core import Invoker, Prompty, SimpleModel
+from ._core import Invoker, Prompty, SimpleModel
 
 
 class MustacheRenderer(Invoker):
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
similarity index 99%
rename from sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
rename to sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
index 30add215d8b0..18b56069f5bf 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 from typing import Any, Dict, List, Union
 
-from .core import (
+from ._core import (
     Frontmatter,
     InvokerFactory,
     ModelSettings,
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/py.typed b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/py.typed
deleted file mode 100644
index e5aff4f83af8..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/py.typed
+++ /dev/null
@@ -1 +0,0 @@
-# Marker file for PEP 561.
\ No newline at end of file

From 1252b3aedb054616676c13ef4af7ecc271c490e1 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 29 Oct 2024 09:41:13 -0700
Subject: [PATCH 10/21] Address PR comment

---
 .../azure/ai/inference/prompts/__init__.py      |  2 --
 .../azure/ai/inference/prompts/_patch.py        | 17 ++++++++++-------
 ...chat_completions_from_input_prompt_string.py |  2 --
 ...ample_chat_completions_from_input_prompty.py |  2 --
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
index a3dd95ba1b09..2b5f148ffd12 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
@@ -7,7 +7,6 @@
 
 from ._renderers import MustacheRenderer
 from ._parsers import PromptyChatParser
-from ._utils import load
 from ._patch import patch_sdk as _patch_sdk, PromptTemplate
 
 # Register the Mustache renderer and parser
@@ -15,7 +14,6 @@
 InvokerFactory().register_parser("prompty.chat", PromptyChatParser)
 
 __all__ = [
-    "load",
     "Prompty",
     "PromptTemplate",
 ]
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 27a0042ff5e5..7dbfe9617dfe 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -7,9 +7,9 @@
 Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
 """
 
-import azure.ai.inference.prompts as prompts
+from typing import Any, Dict, List
 from ._core import Prompty
-from ._utils import prepare
+from ._utils import load, prepare
 from ._mustache import render
 
 
@@ -26,15 +26,16 @@ class PromptTemplate:
     :type model_name: str
     """
 
-    @staticmethod
-    def from_prompty(file_path: str):
+    @classmethod
+    def from_prompty(cls, file_path: str):
         if not file_path:
             raise ValueError("Please provide file_path")
-        prompty = prompts.load(file_path)
+        prompty = load(file_path)
         return PromptTemplate(prompty=prompty)        
     
-    @staticmethod
+    @classmethod
     def from_message(
+        cls,
         prompt_template: str,
         api: str = "chat",
         model_name: str | None = None
@@ -64,7 +65,7 @@ def __init__(
         else:
             raise ValueError("Please invalid arguments for PromptConfig")
 
-    def render(self, data: dict[str, any] | None = None, **kwargs):
+    def render(self, data: dict[str, Any] | None = None, **kwargs) -> List[Dict[str, Any]]:
         if data is None:
             data = kwargs
 
@@ -74,6 +75,8 @@ def render(self, data: dict[str, any] | None = None, **kwargs):
         elif "prompt_template" in self._parameters:
             system_prompt = render(self._parameters["prompt_template"], data)
             return [{"role": "system", "content": system_prompt}]
+        else:
+            raise ValueError("Please provide valid prompt template")
 
 
 def patch_sdk():
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index c1392cfe78b5..c31abaeb4173 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -23,8 +23,6 @@
         `your-azure-region` is the Azure region where your model is deployed.
     2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret.
 """
-# mypy: disable-error-code="union-attr"
-# pyright: reportAttributeAccessIssue=false
 
 
 def sample_chat_completions_from_input_prompt_string():
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index 82fd26ea6027..42f68146f196 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -23,8 +23,6 @@
         `your-azure-region` is the Azure region where your model is deployed.
     2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret.
 """
-# mypy: disable-error-code="union-attr"
-# pyright: reportAttributeAccessIssue=false
 
 
 def sample_chat_completions_from_input_prompty():

From b3e86162d03390a49aad51294ec59011102b692e Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 29 Oct 2024 11:59:58 -0700
Subject: [PATCH 11/21] add Pydantic as dependency

---
 sdk/ai/azure-ai-inference/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index c264ae00239e..fb369ec490b5 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -66,6 +66,7 @@
         "isodate>=0.6.1",
         "azure-core>=1.30.0",
         "typing-extensions>=4.6.0",
+        "pydantic>=2.0.0",
     ],
     python_requires=">=3.8",
     extras_require={

From c43f88ed3394cb97140fcd92f973edc9d3033e69 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 29 Oct 2024 12:55:03 -0700
Subject: [PATCH 12/21] Fix type errors

---
 .../azure/ai/inference/prompts/_patch.py             | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 7dbfe9617dfe..f9865b64c1ec 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -38,16 +38,16 @@ def from_message(
         cls,
         prompt_template: str,
         api: str = "chat",
-        model_name: str | None = None
+        model_name: str = None
     ):
         return PromptTemplate(api=api, prompt_template=prompt_template, model_name=model_name, prompty=None)
 
     def __init__(
             self,
-            prompty: Prompty | None = None,
-            api: str | None = None,
-            prompt_template: str | None = None,
-            model_name: str | None = None,
+            prompty: Prompty = None,
+            api: str = None,
+            prompt_template: str = None,
+            model_name: str = None,
     ) -> None:
         self.prompty = prompty
         if self.prompty is not None:
@@ -65,7 +65,7 @@ def __init__(
         else:
             raise ValueError("Please invalid arguments for PromptConfig")
 
-    def render(self, data: dict[str, Any] | None = None, **kwargs) -> List[Dict[str, Any]]:
+    def render(self, data: dict[str, Any] = None, **kwargs) -> List[Dict[str, Any]]:
         if data is None:
             data = kwargs
 

From e9cab12460409e6d4d85e91fd9fc40cca3815212 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 29 Oct 2024 13:55:49 -0700
Subject: [PATCH 13/21] Fix spelling issues

---
 .../azure/ai/inference/prompts/_patch.py               |  2 +-
 sdk/ai/azure-ai-inference/cspell.json                  | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 sdk/ai/azure-ai-inference/cspell.json

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index f9865b64c1ec..06ed29bca55c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -14,7 +14,7 @@
 
 
 class PromptTemplate:
-    """The helper class which takes varient of inputs, e.g. Prompty format or string, and returns the parsed prompt in an array.
+    """The helper class which takes variant of inputs, e.g. Prompty format or string, and returns the parsed prompt in an array.
 
     :param prompty: Prompty object which contains both model config and prompt template.
     :type prompty: Prompty
diff --git a/sdk/ai/azure-ai-inference/cspell.json b/sdk/ai/azure-ai-inference/cspell.json
new file mode 100644
index 000000000000..587698f9c2b1
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/cspell.json
@@ -0,0 +1,10 @@
+{
+    "ignoreWords": [
+        "dels",
+        "fmatter",
+        "fspath",
+        "ldel",
+        "prompty",
+        "rdel"
+    ]
+}
\ No newline at end of file

From 24c3ced9945db3c947762fc30782427ec74a9a2b Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 29 Oct 2024 14:53:50 -0700
Subject: [PATCH 14/21] Address PR comments and fix linter issues

---
 .../azure/ai/inference/prompts/_core.py       |  2 +-
 .../azure/ai/inference/prompts/_parsers.py    |  8 +-
 .../azure/ai/inference/prompts/_patch.py      | 83 ++++++++++++-------
 .../azure/ai/inference/prompts/_utils.py      |  2 +-
 ...at_completions_from_input_prompt_string.py |  8 +-
 ...ple_chat_completions_from_input_prompty.py |  8 +-
 sdk/ai/azure-ai-inference/setup.py            |  2 +-
 .../azure-ai-inference/tests/test_prompts.py  | 16 ++--
 8 files changed, 79 insertions(+), 50 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
index 0f3a39f8b319..8b1aab27361c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
@@ -327,7 +327,7 @@ def read(cls, string: str) -> dict[str, Any]:
             fmatter = result.group(1)
             body = result.group(2)
         return {
-            "attributes": yaml.load(fmatter, Loader=yaml.FullLoader),
+            "attributes": yaml.safe_load(fmatter),
             "body": body,
             "frontmatter": fmatter,
         }
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
index 811913e4a9ef..fc2e0a5e0cfa 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
@@ -10,7 +10,13 @@
 import re
 from typing import Dict, List, Type, Union
 from pydantic import BaseModel
-from azure.ai.inference.models import ChatRequestMessage, SystemMessage, UserMessage, AssistantMessage, ToolMessage
+from azure.ai.inference.models import (
+    ChatRequestMessage,
+    SystemMessage,
+    UserMessage,
+    AssistantMessage,
+    ToolMessage,
+)
 from ._core import Invoker, Prompty, SimpleModel
 
 
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 06ed29bca55c..92f01fcb3582 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -7,51 +7,65 @@
 Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
 """
 
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional, Self
 from ._core import Prompty
 from ._utils import load, prepare
 from ._mustache import render
 
 
 class PromptTemplate:
-    """The helper class which takes variant of inputs, e.g. Prompty format or string, and returns the parsed prompt in an array.
-
-    :param prompty: Prompty object which contains both model config and prompt template.
-    :type prompty: Prompty
-    :param prompt_template: The prompt template string.
-    :type prompt_template: str
-    :param api: The API type, e.g. "chat" or "completion".
-    :type api: str
-    :param model_name: The model name, e.g. "gpt-4o-mini".
-    :type model_name: str
-    """
+    """The helper class which takes variant of inputs, e.g. Prompty format or string, and returns the parsed prompt in an array."""
 
     @classmethod
-    def from_prompty(cls, file_path: str):
+    def from_prompty(cls, file_path: str) -> Self:
+        """Initialize a PromptTemplate object from a prompty file.
+
+        :param file_path: The path to the prompty file.
+        :type file_path: str
+        :return: The PromptTemplate object.
+        :rtype: PromptTemplate
+        """
         if not file_path:
             raise ValueError("Please provide file_path")
         prompty = load(file_path)
-        return PromptTemplate(prompty=prompty)        
-    
+        return cls(prompty=prompty)
+
     @classmethod
     def from_message(
-        cls,
-        prompt_template: str,
-        api: str = "chat",
-        model_name: str = None
-    ):
-        return PromptTemplate(api=api, prompt_template=prompt_template, model_name=model_name, prompty=None)
+        cls, prompt_template: str, api: str = "chat", model_name: Optional[str] = None
+    ) -> Self:
+        """Initialize a PromptTemplate object from a message template.
+
+        :param prompt_template: The prompt template string.
+        :type prompt_template: str
+        :param api: The API type, e.g. "chat" or "completion".
+        :type api: str
+        :param model_name: The model name, e.g. "gpt-4o-mini".
+        :type model_name: str
+        :return: The PromptTemplate object.
+        :rtype: PromptTemplate
+        """
+        return cls(
+            api=api,
+            prompt_template=prompt_template,
+            model_name=model_name,
+            prompty=None,
+        )
 
     def __init__(
-            self,
-            prompty: Prompty = None,
-            api: str = None,
-            prompt_template: str = None,
-            model_name: str = None,
+        self,
+        prompty: Prompty = None,
+        prompt_template: str = None,
+        api: str = "chat",
+        model_name: Optional[str] = None,
     ) -> None:
         self.prompty = prompty
         if self.prompty is not None:
-            self.model_name = prompty.model.configuration["azure_deployment"] if "azure_deployment" in prompty.model.configuration else None
+            self.model_name = (
+                prompty.model.configuration["azure_deployment"]
+                if "azure_deployment" in prompty.model.configuration
+                else None
+            )
             self.parameters = prompty.model.parameters
             self._parameters = {}
         elif prompt_template is not None:
@@ -60,12 +74,21 @@ def __init__(
             # _parameters is a dict to hold the internal configuration
             self._parameters = {
                 "api": api if api is not None else "chat",
-                "prompt_template": prompt_template
+                "prompt_template": prompt_template,
             }
         else:
-            raise ValueError("Please invalid arguments for PromptConfig")
+            raise ValueError("Please invalid arguments for PromptTemplate")
+
+    def render(
+        self, data: Optional[Dict[str, Any]] = None, **kwargs
+    ) -> List[Dict[str, Any]]:
+        """Render the prompt template with the given data.
 
-    def render(self, data: dict[str, Any] = None, **kwargs) -> List[Dict[str, Any]]:
+        :param data: The data to render the prompt template with.
+        :type data: Optional[Dict[str, Any]]
+        :return: The rendered prompt template.
+        :rtype: List[Dict[str, Any]]
+        """
         if data is None:
             data = kwargs
 
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
index 18b56069f5bf..7ca2971ab438 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
@@ -48,7 +48,7 @@ def load(prompt_path: str, configuration: str = "default") -> Prompty:
 
     # load global configuration
     if "model" not in attributes:
-        attributes["model"] = { "api": "chat" }
+        attributes["model"] = {"api": "chat"}
 
     # pull model settings out of attributes
     try:
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index c31abaeb4173..5887e2c74c01 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -64,13 +64,13 @@ def sample_chat_completions_from_input_prompt_string():
 
     input = "When I arrived, can I still have breakfast?"
     rules = [
-        { "rule": "The checkin time is 3pm" },
-        { "rule": "The checkout time is 11am" },
+        { "rule": "The check-in time is 3pm" },
+        { "rule": "The check-out time is 11am" },
         { "rule": "Breakfast is served from 7am to 10am" },
     ]
     chat_history = [
-        { "role": "user", "content": "I'll arrive at 2pm. What's the checkin and checkout time?" },
-        { "role": "system", "content": "The check-in time is 3 PM, and the checkout time is 11 AM." },
+        { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
+        { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
     ]
     messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index 42f68146f196..d72fe8f0a353 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -44,13 +44,13 @@ def sample_chat_completions_from_input_prompty():
 
     input = "When I arrived, can I still have breakfast?"
     rules = [
-        { "rule": "The checkin time is 3pm" },
-        { "rule": "The checkout time is 11am" },
+        { "rule": "The check-in time is 3pm" },
+        { "rule": "The check-out time is 11am" },
         { "rule": "Breakfast is served from 7am to 10am" },
     ]
     chat_history = [
-        { "role": "user", "content": "I'll arrive at 2pm. What's the checkin and checkout time?" },
-        { "role": "system", "content": "The check-in time is 3 PM, and the checkout time is 11 AM." },
+        { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
+        { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
     ]
     messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
 
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index fb369ec490b5..0794fc44eff9 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -66,7 +66,7 @@
         "isodate>=0.6.1",
         "azure-core>=1.30.0",
         "typing-extensions>=4.6.0",
-        "pydantic>=2.0.0",
+        "pydantic",
     ],
     python_requires=">=3.8",
     extras_require={
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index 644bffb0c1b2..b3b1731819fc 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -24,10 +24,10 @@ def test_prompt_template_from_prompty(self, **kwargs):
         assert prompt_template.parameters["frequency_penalty"] == 0.5
         assert prompt_template.parameters["presence_penalty"] == 0.5
 
-        input = "What's the checkin and checkout time?"
+        input = "What's the check-in and check-out time?"
         rules = [
-            { "rule": "The checkin time is 3pm" },
-            { "rule": "The checkout time is 11am" },
+            { "rule": "The check-in time is 3pm" },
+            { "rule": "The check-out time is 11am" },
             { "rule": "Breakfast is served from 7am to 10am" },
         ]
         messages = prompt_template.render(input=input, rules=rules)
@@ -35,7 +35,7 @@ def test_prompt_template_from_prompty(self, **kwargs):
         assert messages[0]["role"] == "system"
         assert "Breakfast is served from 7am to 10am" in messages[0]["content"]
         assert messages[1]["role"] == "user"
-        assert messages[1]["content"] == "What's the checkin and checkout time?"
+        assert messages[1]["content"] == "What's the check-in and check-out time?"
 
     def test_prompt_template_from_message(self, **kwargs):
         prompt_template_str = "system prompt template text\nuser:\n{{input}}"
@@ -73,13 +73,13 @@ def test_prompt_template_from_message_with_tags(self, **kwargs):
         )
         input = "When I arrived, can I still have breakfast?"
         rules = [
-            { "rule": "The checkin time is 3pm" },
-            { "rule": "The checkout time is 11am" },
+            { "rule": "The check-in time is 3pm" },
+            { "rule": "The check-out time is 11am" },
             { "rule": "Breakfast is served from 7am to 10am" },
         ]
         chat_history = [
-            { "role": "user", "content": "I'll arrive at 2pm. What's the checkin and checkout time?" },
-            { "role": "system", "content": "The check-in time is 3 PM, and the checkout time is 11 AM." },
+            { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
+            { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
         ]
         messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
         assert len(messages) == 1

From 19316b85cb082ad8eedbdb63e124122048d1a0e2 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Tue, 29 Oct 2024 19:55:58 -0700
Subject: [PATCH 15/21] Fix type import for "Self"

---
 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 92f01fcb3582..b23afa000e2c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -7,7 +7,8 @@
 Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
 """
 
-from typing import Any, Dict, List, Optional, Self
+from typing import Any, Dict, List, Optional
+from typing_extensions import Self
 from ._core import Prompty
 from ._utils import load, prepare
 from ._mustache import render

From ed718cb2618c6086cc8a414ca512e49a16fce246 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Wed, 30 Oct 2024 10:41:56 -0700
Subject: [PATCH 16/21] Change to keyword-only constructor and fix linter
 issues

---
 .../azure/ai/inference/prompts/_mustache.py   | 16 +++++++-------
 .../azure/ai/inference/prompts/_patch.py      | 21 ++++++++++---------
 .../azure/ai/inference/prompts/_utils.py      |  2 +-
 ...at_completions_from_input_prompt_string.py |  4 +++-
 ...ple_chat_completions_from_input_prompty.py |  2 ++
 sdk/ai/azure-ai-inference/setup.py            |  3 ++-
 6 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
index 165d881b6807..a807951fda43 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
@@ -14,6 +14,8 @@
 from types import MappingProxyType
 from typing import (
     Any,
+    Dict,
+    List,
     Literal,
     Optional,
     Union,
@@ -25,7 +27,7 @@
 logger = logging.getLogger(__name__)
 
 
-Scopes: TypeAlias = list[Union[Literal[False, 0], Mapping[str, Any]]]
+Scopes: TypeAlias = List[Union[Literal[False, 0], Mapping[str, Any]]]
 
 
 # Globals
@@ -380,7 +382,7 @@ def _get_key(
                 # Move into the scope
                 try:
                     # Try subscripting (Normal dictionaries)
-                    scope = cast(dict[str, Any], scope)[child]
+                    scope = cast(Dict[str, Any], scope)[child]
                 except (TypeError, AttributeError):
                     try:
                         scope = getattr(scope, child)
@@ -425,13 +427,13 @@ def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str:
 #
 # The main rendering function
 #
-g_token_cache: dict[str, list[tuple[str, str]]] = {}
+g_token_cache: Dict[str, List[tuple[str, str]]] = {}
 
 EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({})
 
 
 def render(
-    template: Union[str, list[tuple[str, str]]] = "",
+    template: Union[str, List[tuple[str, str]]] = "",
     data: Mapping[str, Any] = EMPTY_DICT,
     partials_dict: Mapping[str, str] = EMPTY_DICT,
     padding: str = "",
@@ -559,7 +561,7 @@ def render(
             if callable(scope):
                 # Generate template text from tags
                 text = ""
-                tags: list[tuple[str, str]] = []
+                tags: List[tuple[str, str]] = []
                 for token in tokens:
                     if token == ("end", key):
                         break
@@ -604,7 +606,7 @@ def render(
                     ),
                 )
 
-                output += rend
+                output += rend # type: ignore[reportOperatorIssue]
 
             # If the scope is a sequence, an iterator or generator but not
             # derived from a string
@@ -644,7 +646,7 @@ def render(
 
             else:
                 # Otherwise we're just a scope section
-                scopes.insert(0, scope)
+                scopes.insert(0, scope) # type: ignore[reportArgumentType]
 
         # If we're an inverted section
         elif tag == "inverted section":
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index b23afa000e2c..49f91dcd5808 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -55,25 +55,26 @@ def from_message(
 
     def __init__(
         self,
-        prompty: Prompty = None,
-        prompt_template: str = None,
+        *,
         api: str = "chat",
+        prompty: Optional[Prompty] = None,
+        prompt_template: Optional[str] = None,
         model_name: Optional[str] = None,
     ) -> None:
         self.prompty = prompty
         if self.prompty is not None:
             self.model_name = (
-                prompty.model.configuration["azure_deployment"]
-                if "azure_deployment" in prompty.model.configuration
+                self.prompty.model.configuration["azure_deployment"]
+                if "azure_deployment" in self.prompty.model.configuration
                 else None
             )
-            self.parameters = prompty.model.parameters
-            self._parameters = {}
+            self.parameters = self.prompty.model.parameters
+            self._config = {}
         elif prompt_template is not None:
             self.model_name = model_name
             self.parameters = {}
-            # _parameters is a dict to hold the internal configuration
-            self._parameters = {
+            # _config is a dict to hold the internal configuration
+            self._config = {
                 "api": api if api is not None else "chat",
                 "prompt_template": prompt_template,
             }
@@ -96,8 +97,8 @@ def render(
         if self.prompty is not None:
             parsed = prepare(self.prompty, data)
             return parsed
-        elif "prompt_template" in self._parameters:
-            system_prompt = render(self._parameters["prompt_template"], data)
+        elif "prompt_template" in self._config:
+            system_prompt = render(self._config["prompt_template"], data)
             return [{"role": "system", "content": system_prompt}]
         else:
             raise ValueError("Please provide valid prompt template")
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
index 7ca2971ab438..bc96c0cb9990 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
@@ -163,7 +163,7 @@ def prepare(
             "parser",
             f"{prompt.template.parser}.{prompt.model.api}",
             prompt,
-            SimpleModel(item=result.item),
+            SimpleModel(item=result.item), # type: ignore[reportPossiblyUnboundVariable]
         )
 
     if isinstance(result, SimpleModel):
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index 5887e2c74c01..c6227166f252 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -23,6 +23,8 @@
         `your-azure-region` is the Azure region where your model is deployed.
     2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret.
 """
+# mypy: disable-error-code="union-attr"
+# pyright: reportAttributeAccessIssue=false
 
 
 def sample_chat_completions_from_input_prompt_string():
@@ -75,7 +77,7 @@ def sample_chat_completions_from_input_prompt_string():
     messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
 
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
-    response = client.complete(messages=messages)
+    response = client.complete(messages=messages) # type: ignore[reportCallIssue, reportArgumentType]
 
     print(response.choices[0].message.content)
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index d72fe8f0a353..5a74a87283a3 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -23,6 +23,8 @@
         `your-azure-region` is the Azure region where your model is deployed.
     2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret.
 """
+# mypy: disable-error-code="union-attr"
+# pyright: reportAttributeAccessIssue=false
 
 
 def sample_chat_completions_from_input_prompty():
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index 0794fc44eff9..22ebe4d4a084 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -66,7 +66,8 @@
         "isodate>=0.6.1",
         "azure-core>=1.30.0",
         "typing-extensions>=4.6.0",
-        "pydantic",
+        "pydantic>=2.0.0",
+        "types-PyYAML",
     ],
     python_requires=">=3.8",
     extras_require={

From ebfa1f80f4efbd15a7487eab4bc88c1e6a619458 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Fri, 1 Nov 2024 09:59:27 -0700
Subject: [PATCH 17/21] Rename function `from_message` to `from_str`; `render`
 to `create_messages`

---
 .../azure/ai/inference/prompts/_patch.py               |  4 ++--
 ...sample_chat_completions_from_input_prompt_string.py |  4 ++--
 .../sample_chat_completions_from_input_prompty.py      |  2 +-
 sdk/ai/azure-ai-inference/tests/test_prompts.py        | 10 +++++-----
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 49f91dcd5808..70885f1bb8b5 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -32,7 +32,7 @@ def from_prompty(cls, file_path: str) -> Self:
         return cls(prompty=prompty)
 
     @classmethod
-    def from_message(
+    def from_str(
         cls, prompt_template: str, api: str = "chat", model_name: Optional[str] = None
     ) -> Self:
         """Initialize a PromptTemplate object from a message template.
@@ -81,7 +81,7 @@ def __init__(
         else:
             raise ValueError("Please invalid arguments for PromptTemplate")
 
-    def render(
+    def create_messages(
         self, data: Optional[Dict[str, Any]] = None, **kwargs
     ) -> List[Dict[str, Any]]:
         """Render the prompt template with the given data.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index c6227166f252..3cee7d09003c 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -59,7 +59,7 @@ def sample_chat_completions_from_input_prompt_string():
         user:
         {{input}}
     """
-    prompt_template = PromptTemplate.from_message(
+    prompt_template = PromptTemplate.from_str(
         api = "chat",
         prompt_template = prompt_template_str
     )
@@ -74,7 +74,7 @@ def sample_chat_completions_from_input_prompt_string():
         { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
         { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
     ]
-    messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
+    messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
 
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
     response = client.complete(messages=messages) # type: ignore[reportCallIssue, reportArgumentType]
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index 5a74a87283a3..a1e71112ce61 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -54,7 +54,7 @@ def sample_chat_completions_from_input_prompty():
         { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
         { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
     ]
-    messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
+    messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
 
     client = ChatCompletionsClient(
         endpoint=endpoint,
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index b3b1731819fc..09cd24c83d56 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -30,7 +30,7 @@ def test_prompt_template_from_prompty(self, **kwargs):
             { "rule": "The check-out time is 11am" },
             { "rule": "Breakfast is served from 7am to 10am" },
         ]
-        messages = prompt_template.render(input=input, rules=rules)
+        messages = prompt_template.create_messages(input=input, rules=rules)
         assert len(messages) == 2
         assert messages[0]["role"] == "system"
         assert "Breakfast is served from 7am to 10am" in messages[0]["content"]
@@ -39,12 +39,12 @@ def test_prompt_template_from_prompty(self, **kwargs):
 
     def test_prompt_template_from_message(self, **kwargs):
         prompt_template_str = "system prompt template text\nuser:\n{{input}}"
-        prompt_template = PromptTemplate.from_message(
+        prompt_template = PromptTemplate.from_str(
             api = "chat",
             prompt_template = prompt_template_str
         )
         input = "user question input text"
-        messages = prompt_template.render(input=input)
+        messages = prompt_template.create_messages(input=input)
         assert len(messages) == 1
         assert messages[0]["role"] == "system"
         assert "system prompt template text\nuser:\nuser question input text" == messages[0]["content"]
@@ -67,7 +67,7 @@ def test_prompt_template_from_message_with_tags(self, **kwargs):
             user:
             {{input}}
         """
-        prompt_template = PromptTemplate.from_message(
+        prompt_template = PromptTemplate.from_str(
             api = "chat",
             prompt_template = prompt_template_str
         )
@@ -81,7 +81,7 @@ def test_prompt_template_from_message_with_tags(self, **kwargs):
             { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
             { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
         ]
-        messages = prompt_template.render(input=input, rules=rules, chat_history=chat_history)
+        messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
         assert len(messages) == 1
         assert messages[0]["role"] == "system"
         assert "You are an AI assistant in a hotel." in messages[0]["content"]

From 25a03655b52b1d5a429321d57c9431d35ce1458c Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Fri, 1 Nov 2024 11:45:27 -0700
Subject: [PATCH 18/21] Change from `from_str` to `from_string`

---
 .../azure-ai-inference/azure/ai/inference/prompts/_patch.py | 2 +-
 .../sample_chat_completions_from_input_prompt_string.py     | 6 +++---
 sdk/ai/azure-ai-inference/tests/test_prompts.py             | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 70885f1bb8b5..f05d1a2b5f7c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -32,7 +32,7 @@ def from_prompty(cls, file_path: str) -> Self:
         return cls(prompty=prompty)
 
     @classmethod
-    def from_str(
+    def from_string(
         cls, prompt_template: str, api: str = "chat", model_name: Optional[str] = None
     ) -> Self:
         """Initialize a PromptTemplate object from a message template.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index 3cee7d09003c..55de9e004102 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -59,12 +59,12 @@ def sample_chat_completions_from_input_prompt_string():
         user:
         {{input}}
     """
-    prompt_template = PromptTemplate.from_str(
+    prompt_template = PromptTemplate.from_string(
         api = "chat",
         prompt_template = prompt_template_str
     )
 
-    input = "When I arrived, can I still have breakfast?"
+    query = "When I arrived, can I still have breakfast?"
     rules = [
         { "rule": "The check-in time is 3pm" },
         { "rule": "The check-out time is 11am" },
@@ -74,7 +74,7 @@ def sample_chat_completions_from_input_prompt_string():
         { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
         { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
     ]
-    messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
+    messages = prompt_template.create_messages(query=query, rules=rules, chat_history=chat_history)
 
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
     response = client.complete(messages=messages) # type: ignore[reportCallIssue, reportArgumentType]
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index 09cd24c83d56..64f41f9f6e92 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -39,7 +39,7 @@ def test_prompt_template_from_prompty(self, **kwargs):
 
     def test_prompt_template_from_message(self, **kwargs):
         prompt_template_str = "system prompt template text\nuser:\n{{input}}"
-        prompt_template = PromptTemplate.from_str(
+        prompt_template = PromptTemplate.from_string(
             api = "chat",
             prompt_template = prompt_template_str
         )
@@ -67,7 +67,7 @@ def test_prompt_template_from_message_with_tags(self, **kwargs):
             user:
             {{input}}
         """
-        prompt_template = PromptTemplate.from_str(
+        prompt_template = PromptTemplate.from_string(
             api = "chat",
             prompt_template = prompt_template_str
         )

From a7a0bf260ae5df9a3ad89e426e70f8033f12744c Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Sun, 3 Nov 2024 11:58:02 -0800
Subject: [PATCH 19/21] Merge latest code from `microsoft/prompty` and resolve
 linter issues

---
 .../azure/ai/inference/_model_base.py         |   2 +-
 .../azure/ai/inference/prompts/__init__.py    |  15 +-
 .../azure/ai/inference/prompts/_core.py       | 575 ++++++++---------
 .../azure/ai/inference/prompts/_invoker.py    | 295 +++++++++
 .../azure/ai/inference/prompts/_mustache.py   |  41 +-
 .../azure/ai/inference/prompts/_parsers.py    | 137 +++--
 .../azure/ai/inference/prompts/_patch.py      |  23 +-
 .../ai/inference/prompts/_prompty_utils.py    | 580 ++++++++++++++++++
 .../azure/ai/inference/prompts/_renderers.py  |  30 +-
 .../azure/ai/inference/prompts/_tracer.py     | 316 ++++++++++
 .../azure/ai/inference/prompts/_utils.py      | 297 +++------
 sdk/ai/azure-ai-inference/cspell.json         |   3 +
 .../azure-ai-inference/dev_requirements.txt   |   6 +-
 ...at_completions_from_input_prompt_string.py |  24 +-
 ...ple_chat_completions_from_input_prompty.py |  15 +-
 sdk/ai/azure-ai-inference/setup.py            |   2 -
 .../tests/sample1_with_secrets.prompty        |  34 +
 .../azure-ai-inference/tests/test_prompts.py  |  36 +-
 18 files changed, 1757 insertions(+), 674 deletions(-)
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
 create mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py
 create mode 100644 sdk/ai/azure-ai-inference/tests/sample1_with_secrets.prompty

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py
index c4b1008c1e85..53305e2213a7 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py
@@ -674,7 +674,7 @@ def _get_deserialize_callable_from_annotation(  # pylint: disable=R0911, R0915,
         except AttributeError:
             model_name = annotation
         if module is not None:
-            annotation = _get_model(module, model_name)
+            annotation = _get_model(module, model_name)  # type: ignore
 
     try:
         if module and _is_model(annotation):
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
index 2b5f148ffd12..2e11b31cb6a4 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
@@ -2,20 +2,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-from ._core import InvokerFactory
-from ._core import Prompty
-
-from ._renderers import MustacheRenderer
-from ._parsers import PromptyChatParser
+# pylint: disable=unused-import
 from ._patch import patch_sdk as _patch_sdk, PromptTemplate
 
-# Register the Mustache renderer and parser
-InvokerFactory().register_renderer("mustache", MustacheRenderer)
-InvokerFactory().register_parser("prompty.chat", PromptyChatParser)
-
-__all__ = [
-    "Prompty",
-    "PromptTemplate",
-]
-
 _patch_sdk()
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
index 8b1aab27361c..9d1baee3a033 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
@@ -2,332 +2,351 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-"""
-Adapted from https://github.com/langchain-ai/langchain
-MIT License
-"""
+# mypy: disable-error-code="assignment,attr-defined,index,arg-type"
+# pylint: disable=line-too-long,R,consider-iterating-dictionary,raise-missing-from,dangerous-default-value
 from __future__ import annotations
-
-import abc
-import json
 import os
-import re
+from dataclasses import dataclass, field, asdict
 from pathlib import Path
-from typing import Any, Dict, Generic, List, Literal, Optional, Type, TypeVar, Union
+from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Union
+from ._tracer import Tracer, to_dict
+from ._utils import load_json, load_json_async
+
+
+@dataclass
+class ToolCall:
+    id: str
+    name: str
+    arguments: str
+
+
+@dataclass
+class PropertySettings:
+    """PropertySettings class to define the properties of the model
+
+    Attributes
+    ----------
+    type : str
+        The type of the property
+    default : Any
+        The default value of the property
+    description : str
+        The description of the property
+    """
 
-import yaml
-from pydantic import BaseModel, ConfigDict, Field, FilePath
+    type: Literal["string", "number", "array", "object", "boolean"]
+    default: Union[str, int, float, List, Dict, bool, None] = field(default=None)
+    description: str = field(default="")
+
+
+@dataclass
+class ModelSettings:
+    """ModelSettings class to define the model of the prompty
+
+    Attributes
+    ----------
+    api : str
+        The api of the model
+    configuration : Dict
+        The configuration of the model
+    parameters : Dict
+        The parameters of the model
+    response : Dict
+        The response of the model
+    """
 
-T = TypeVar("T")
+    api: str = field(default="")
+    configuration: Dict = field(default_factory=dict)
+    parameters: Dict = field(default_factory=dict)
+    response: Dict = field(default_factory=dict)
 
 
-class SimpleModel(BaseModel, Generic[T]):
-    """Simple model for a single item."""
+@dataclass
+class TemplateSettings:
+    """TemplateSettings class to define the template of the prompty
 
-    item: T
+    Attributes
+    ----------
+    type : str
+        The type of the template
+    parser : str
+        The parser of the template
+    """
 
+    type: str = field(default="mustache")
+    parser: str = field(default="")
+
+
+@dataclass
+class Prompty:
+    """Prompty class to define the prompty
+
+    Attributes
+    ----------
+    name : str
+        The name of the prompty
+    description : str
+        The description of the prompty
+    authors : List[str]
+        The authors of the prompty
+    tags : List[str]
+        The tags of the prompty
+    version : str
+        The version of the prompty
+    base : str
+        The base of the prompty
+    basePrompty : Prompty
+        The base prompty
+    model : ModelSettings
+        The model of the prompty
+    sample : Dict
+        The sample of the prompty
+    inputs : Dict[str, PropertySettings]
+        The inputs of the prompty
+    outputs : Dict[str, PropertySettings]
+        The outputs of the prompty
+    template : TemplateSettings
+        The template of the prompty
+    file : FilePath
+        The file of the prompty
+    content : Union[str, List[str], Dict]
+        The content of the prompty
+    """
 
-class PropertySettings(BaseModel):
-    """Property settings for a prompty model."""
+    # metadata
+    name: str = field(default="")
+    description: str = field(default="")
+    authors: List[str] = field(default_factory=list)
+    tags: List[str] = field(default_factory=list)
+    version: str = field(default="")
+    base: str = field(default="")
+    basePrompty: Union[Prompty, None] = field(default=None)
+    # model
+    model: ModelSettings = field(default_factory=ModelSettings)
 
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    type: Literal["string", "number", "array", "object", "boolean"]
-    default: Union[str, int, float, List, Dict, bool] = Field(default=None)
-    description: str = Field(default="")
+    # sample
+    sample: Dict = field(default_factory=dict)
 
+    # input / output
+    inputs: Dict[str, PropertySettings] = field(default_factory=dict)
+    outputs: Dict[str, PropertySettings] = field(default_factory=dict)
 
-class ModelSettings(BaseModel):
-    """Model settings for a prompty model."""
+    # template
+    template: TemplateSettings = field(default_factory=TemplateSettings)
 
-    api: str = Field(default="")
-    configuration: dict = Field(default={})
-    parameters: dict = Field(default={})
-    response: dict = Field(default={})
+    file: Union[Path, str] = field(default="")
+    content: Union[str, List[str], Dict] = field(default="")
 
-    def model_dump_safe(self) -> dict:
-        d = self.model_dump()
-        d["configuration"] = {
-            k: "*" * len(v) if "key" in k.lower() or "secret" in k.lower() else v
-            for k, v in d["configuration"].items()
-        }
+    def to_safe_dict(self) -> Dict[str, Any]:
+        d = {}
+        if self.model:
+            d["model"] = asdict(self.model)
+            _mask_secrets(d, ["model", "configuration"])
+        if self.template:
+            d["template"] = asdict(self.template)
+        if self.inputs:
+            d["inputs"] = {k: asdict(v) for k, v in self.inputs.items()}
+        if self.outputs:
+            d["outputs"] = {k: asdict(v) for k, v in self.outputs.items()}
+        if self.file:
+            d["file"] = str(self.file.as_posix()) if isinstance(self.file, Path) else self.file
         return d
 
+    @staticmethod
+    def hoist_base_prompty(top: Prompty, base: Prompty) -> Prompty:
+        top.name = base.name if top.name == "" else top.name
+        top.description = base.description if top.description == "" else top.description
+        top.authors = list(set(base.authors + top.authors))
+        top.tags = list(set(base.tags + top.tags))
+        top.version = base.version if top.version == "" else top.version
 
-class TemplateSettings(BaseModel):
-    """Template settings for a prompty model."""
+        top.model.api = base.model.api if top.model.api == "" else top.model.api
+        top.model.configuration = param_hoisting(top.model.configuration, base.model.configuration)
+        top.model.parameters = param_hoisting(top.model.parameters, base.model.parameters)
+        top.model.response = param_hoisting(top.model.response, base.model.response)
 
-    type: str = Field(default="mustache")
-    parser: str = Field(default="")
+        top.sample = param_hoisting(top.sample, base.sample)
 
+        top.basePrompty = base
 
-class Prompty(BaseModel):
-    """Base Prompty model."""
+        return top
 
-    # metadata
-    name: str = Field(default="")
-    description: str = Field(default="")
-    authors: List[str] = Field(default=[])
-    tags: List[str] = Field(default=[])
-    version: str = Field(default="")
-    base: str = Field(default="")
-    basePrompty: Optional[Prompty] = Field(default=None)
-    # model
-    model: ModelSettings = Field(default_factory=ModelSettings)
-
-    # sample
-    sample: dict = Field(default={})
+    @staticmethod
+    def _process_file(file: str, parent: Path) -> Any:
+        file_path = Path(parent / Path(file)).resolve().absolute()
+        if file_path.exists():
+            items = load_json(file_path)
+            if isinstance(items, list):
+                return [Prompty.normalize(value, parent) for value in items]
+            elif isinstance(items, Dict):
+                return {key: Prompty.normalize(value, parent) for key, value in items.items()}
+            else:
+                return items
+        else:
+            raise FileNotFoundError(f"File {file} not found")
 
-    # input / output
-    inputs: Dict[str, PropertySettings] = Field(default={})
-    outputs: Dict[str, PropertySettings] = Field(default={})
+    @staticmethod
+    async def _process_file_async(file: str, parent: Path) -> Any:
+        file_path = Path(parent / Path(file)).resolve().absolute()
+        if file_path.exists():
+            items = await load_json_async(file_path)
+            if isinstance(items, list):
+                return [Prompty.normalize(value, parent) for value in items]
+            elif isinstance(items, Dict):
+                return {key: Prompty.normalize(value, parent) for key, value in items.items()}
+            else:
+                return items
+        else:
+            raise FileNotFoundError(f"File {file} not found")
 
-    # template
-    template: TemplateSettings
+    @staticmethod
+    def _process_env(variable: str, env_error=True, default: Union[str, None] = None) -> Any:
+        if variable in os.environ.keys():
+            return os.environ[variable]
+        else:
+            if default:
+                return default
+            if env_error:
+                raise ValueError(f"Variable {variable} not found in environment")
 
-    file: FilePath = Field(default="")
-    content: str = Field(default="")
+            return ""
 
-    def to_safe_dict(self) -> Dict[str, Any]:
-        d = {}
-        for k, v in self:
-            if v != "" and v != {} and v != [] and v is not None:
-                if k == "model":
-                    d[k] = v.model_dump_safe()
-                elif k == "template":
-                    d[k] = v.model_dump()
-                elif k == "inputs" or k == "outputs":
-                    d[k] = {k: v.model_dump() for k, v in v.items()}
-                elif k == "file":
-                    d[k] = (
-                        str(self.file.as_posix())
-                        if isinstance(self.file, Path)
-                        else self.file
+    @staticmethod
+    def normalize(attribute: Any, parent: Path, env_error=True) -> Any:
+        if isinstance(attribute, str):
+            attribute = attribute.strip()
+            if attribute.startswith("${") and attribute.endswith("}"):
+                # check if env or file
+                variable = attribute[2:-1].split(":")
+                if variable[0] == "env" and len(variable) > 1:
+                    return Prompty._process_env(
+                        variable[1],
+                        env_error,
+                        variable[2] if len(variable) > 2 else None,
                     )
-                elif k == "basePrompty":
-                    # no need to serialize basePrompty
-                    continue
-
+                elif variable[0] == "file" and len(variable) > 1:
+                    return Prompty._process_file(variable[1], parent)
                 else:
-                    d[k] = v
-        return d
-
-    # generate json representation of the prompty
-    def to_safe_json(self) -> str:
-        d = self.to_safe_dict()
-        return json.dumps(d)
+                    raise ValueError(f"Invalid attribute format ({attribute})")
+            else:
+                return attribute
+        elif isinstance(attribute, list):
+            return [Prompty.normalize(value, parent) for value in attribute]
+        elif isinstance(attribute, Dict):
+            return {key: Prompty.normalize(value, parent) for key, value in attribute.items()}
+        else:
+            return attribute
 
     @staticmethod
-    def normalize(attribute: Any, parent: Path, env_error: bool = True) -> Any:
+    async def normalize_async(attribute: Any, parent: Path, env_error=True) -> Any:
         if isinstance(attribute, str):
             attribute = attribute.strip()
             if attribute.startswith("${") and attribute.endswith("}"):
+                # check if env or file
                 variable = attribute[2:-1].split(":")
-                if variable[0] in os.environ.keys():
-                    return os.environ[variable[0]]
+                if variable[0] == "env" and len(variable) > 1:
+                    return Prompty._process_env(
+                        variable[1],
+                        env_error,
+                        variable[2] if len(variable) > 2 else None,
+                    )
+                elif variable[0] == "file" and len(variable) > 1:
+                    return await Prompty._process_file_async(variable[1], parent)
                 else:
-                    if len(variable) > 1:
-                        return variable[1]
-                    else:
-                        if env_error:
-                            raise ValueError(
-                                f"Variable {variable[0]} not found in environment"
-                            )
-                        else:
-                            return ""
-            elif (
-                attribute.startswith("file:")
-                and Path(parent / attribute.split(":")[1]).exists()
-            ):
-                with open(parent / attribute.split(":")[1], "r") as f:
-                    items = json.load(f)
-                    if isinstance(items, list):
-                        return [Prompty.normalize(value, parent) for value in items]
-                    elif isinstance(items, dict):
-                        return {
-                            key: Prompty.normalize(value, parent)
-                            for key, value in items.items()
-                        }
-                    else:
-                        return items
+                    raise ValueError(f"Invalid attribute format ({attribute})")
             else:
                 return attribute
         elif isinstance(attribute, list):
-            return [Prompty.normalize(value, parent) for value in attribute]
-        elif isinstance(attribute, dict):
-            return {
-                key: Prompty.normalize(value, parent)
-                for key, value in attribute.items()
-            }
+            return [await Prompty.normalize_async(value, parent) for value in attribute]
+        elif isinstance(attribute, Dict):
+            return {key: await Prompty.normalize_async(value, parent) for key, value in attribute.items()}
         else:
             return attribute
 
 
-def param_hoisting(
-    top: Dict[str, Any], bottom: Dict[str, Any], top_key: Any = None
-) -> Dict[str, Any]:
-    """Merge two dictionaries with hoisting of parameters from bottom to top.
-
-    Args:
-        top: The top dictionary.
-        bottom: The bottom dictionary.
-        top_key: The key to hoist from the bottom to the top.
-
-    Returns:
-        The merged dictionary.
-    """
+def param_hoisting(top: Dict[str, Any], bottom: Dict[str, Any], top_key: Union[str, None] = None) -> Dict[str, Any]:
     if top_key:
         new_dict = {**top[top_key]} if top_key in top else {}
     else:
         new_dict = {**top}
     for key, value in bottom.items():
-        if key not in new_dict:
+        if not key in new_dict:
             new_dict[key] = value
     return new_dict
 
 
-class Invoker(abc.ABC):
-    """Base class for all invokers."""
-
-    def __init__(self, prompty: Prompty) -> None:
-        self.prompty = prompty
-
-    @abc.abstractmethod
-    def invoke(self, data: BaseModel) -> BaseModel:
-        pass
-
-    def __call__(self, data: BaseModel) -> BaseModel:
-        return self.invoke(data)
-
-
-class NoOpParser(Invoker):
-    """NoOp parser for invokers."""
-
-    def invoke(self, data: BaseModel) -> BaseModel:
-        return data
-
-
-class InvokerFactory(object):
-    """Factory for creating invokers."""
-
-    _instance = None
-    _renderers: Dict[str, Type[Invoker]] = {}
-    _parsers: Dict[str, Type[Invoker]] = {}
-    _executors: Dict[str, Type[Invoker]] = {}
-    _processors: Dict[str, Type[Invoker]] = {}
-
-    def __new__(cls) -> InvokerFactory:
-        if cls._instance is None:
-            cls._instance = super(InvokerFactory, cls).__new__(cls)
-            # Add NOOP invokers
-            cls._renderers["NOOP"] = NoOpParser
-            cls._parsers["NOOP"] = NoOpParser
-            cls._executors["NOOP"] = NoOpParser
-            cls._processors["NOOP"] = NoOpParser
-        return cls._instance
-
-    def register(
-        self,
-        type: Literal["renderer", "parser", "executor", "processor"],
-        name: str,
-        invoker: Type[Invoker],
-    ) -> None:
-        if type == "renderer":
-            self._renderers[name] = invoker
-        elif type == "parser":
-            self._parsers[name] = invoker
-        elif type == "executor":
-            self._executors[name] = invoker
-        elif type == "processor":
-            self._processors[name] = invoker
-        else:
-            raise ValueError(f"Invalid type {type}")
-
-    def register_renderer(self, name: str, renderer_class: Any) -> None:
-        self.register("renderer", name, renderer_class)
-
-    def register_parser(self, name: str, parser_class: Any) -> None:
-        self.register("parser", name, parser_class)
-
-    def register_executor(self, name: str, executor_class: Any) -> None:
-        self.register("executor", name, executor_class)
-
-    def register_processor(self, name: str, processor_class: Any) -> None:
-        self.register("processor", name, processor_class)
-
-    def __call__(
-        self,
-        type: Literal["renderer", "parser", "executor", "processor"],
-        name: str,
-        prompty: Prompty,
-        data: BaseModel,
-    ) -> Any:
-        if type == "renderer":
-            return self._renderers[name](prompty)(data)
-        elif type == "parser":
-            return self._parsers[name](prompty)(data)
-        elif type == "executor":
-            return self._executors[name](prompty)(data)
-        elif type == "processor":
-            return self._processors[name](prompty)(data)
-        else:
-            raise ValueError(f"Invalid type {type}")
-
-    def to_dict(self) -> Dict[str, Any]:
-        return {
-            "renderers": {
-                k: f"{v.__module__}.{v.__name__}" for k, v in self._renderers.items()
-            },
-            "parsers": {
-                k: f"{v.__module__}.{v.__name__}" for k, v in self._parsers.items()
-            },
-            "executors": {
-                k: f"{v.__module__}.{v.__name__}" for k, v in self._executors.items()
-            },
-            "processors": {
-                k: f"{v.__module__}.{v.__name__}" for k, v in self._processors.items()
-            },
-        }
-
-    def to_json(self) -> str:
-        return json.dumps(self.to_dict())
-
-
-class Frontmatter:
-    """Class for reading frontmatter from a string or file."""
-
-    _yaml_delim = r"(?:---|\+\+\+)"
-    _yaml = r"(.*?)"
-    _content = r"\s*(.+)$"
-    _re_pattern = r"^\s*" + _yaml_delim + _yaml + _yaml_delim + _content
-    _regex = re.compile(_re_pattern, re.S | re.M)
-
-    @classmethod
-    def read_file(cls, path: str) -> dict[str, Any]:
-        """Reads file at path and returns dict with separated frontmatter.
-        See read() for more info on dict return value.
-        """
-        with open(path, encoding="utf-8") as file:
-            file_contents = file.read()
-            return cls.read(file_contents)
-
-    @classmethod
-    def read(cls, string: str) -> dict[str, Any]:
-        """Returns dict with separated frontmatter from string.
-
-        Returned dict keys:
-        attributes -- extracted YAML attributes in dict form.
-        body -- string contents below the YAML separators
-        frontmatter -- string representation of YAML
-        """
-        fmatter = ""
-        body = ""
-        result = cls._regex.search(string)
-
-        if result:
-            fmatter = result.group(1)
-            body = result.group(2)
-        return {
-            "attributes": yaml.safe_load(fmatter),
-            "body": body,
-            "frontmatter": fmatter,
-        }
+class PromptyStream(Iterator):
+    """PromptyStream class to iterate over LLM stream.
+    Necessary for Prompty to handle streaming data when tracing."""
+
+    def __init__(self, name: str, iterator: Iterator):
+        self.name = name
+        self.iterator = iterator
+        self.items: List[Any] = []
+        self.__name__ = "PromptyStream"
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            # enumerate but add to list
+            o = self.iterator.__next__()
+            self.items.append(o)
+            return o
+
+        except StopIteration:
+            # StopIteration is raised
+            # contents are exhausted
+            if len(self.items) > 0:
+                with Tracer.start("PromptyStream") as trace:
+                    trace("signature", f"{self.name}.PromptyStream")
+                    trace("inputs", "None")
+                    trace("result", [to_dict(s) for s in self.items])
+
+            raise StopIteration
+
+
+class AsyncPromptyStream(AsyncIterator):
+    """AsyncPromptyStream class to iterate over LLM stream.
+    Necessary for Prompty to handle streaming data when tracing."""
+
+    def __init__(self, name: str, iterator: AsyncIterator):
+        self.name = name
+        self.iterator = iterator
+        self.items: List[Any] = []
+        self.__name__ = "AsyncPromptyStream"
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        try:
+            # enumerate but add to list
+            o = await self.iterator.__anext__()
+            self.items.append(o)
+            return o
+
+        except StopAsyncIteration:
+            # StopIteration is raised
+            # contents are exhausted
+            if len(self.items) > 0:
+                with Tracer.start("AsyncPromptyStream") as trace:
+                    trace("signature", f"{self.name}.AsyncPromptyStream")
+                    trace("inputs", "None")
+                    trace("result", [to_dict(s) for s in self.items])
+
+            raise StopAsyncIteration
+
+
+def _mask_secrets(d: Dict[str, Any], path: list[str], patterns: list[str] = ["key", "secret"]) -> bool:
+    sub_d = d
+    for key in path:
+        if key not in sub_d:
+            return False
+        sub_d = sub_d[key]
+
+    for k, v in sub_d.items():
+        if any([pattern in k.lower() for pattern in patterns]):
+            sub_d[k] = "*" * len(v)
+    return True
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
new file mode 100644
index 000000000000..d682662e7b01
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
@@ -0,0 +1,295 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="return-value,operator"
+# pylint: disable=line-too-long,R,docstring-missing-param,docstring-missing-return,docstring-missing-rtype,unnecessary-pass
+import abc
+from typing import Any, Callable, Dict, Literal
+from ._tracer import trace
+from ._core import Prompty
+
+
+class Invoker(abc.ABC):
+    """Abstract class for Invoker
+
+    Attributes
+    ----------
+    prompty : Prompty
+        The prompty object
+    name : str
+        The name of the invoker
+
+    """
+
+    def __init__(self, prompty: Prompty) -> None:
+        self.prompty = prompty
+        self.name = self.__class__.__name__
+
+    @abc.abstractmethod
+    def invoke(self, data: Any) -> Any:
+        """Abstract method to invoke the invoker
+
+        Parameters
+        ----------
+        data : Any
+            The data to be invoked
+
+        Returns
+        -------
+        Any
+            The invoked
+        """
+        pass
+
+    @abc.abstractmethod
+    async def invoke_async(self, data: Any) -> Any:
+        """Abstract method to invoke the invoker asynchronously
+
+        Parameters
+        ----------
+        data : Any
+            The data to be invoked
+
+        Returns
+        -------
+        Any
+            The invoked
+        """
+        pass
+
+    @trace
+    def run(self, data: Any) -> Any:
+        """Method to run the invoker
+
+        Parameters
+        ----------
+        data : Any
+            The data to be invoked
+
+        Returns
+        -------
+        Any
+            The invoked
+        """
+        return self.invoke(data)
+
+    @trace
+    async def run_async(self, data: Any) -> Any:
+        """Method to run the invoker asynchronously
+
+        Parameters
+        ----------
+        data : Any
+            The data to be invoked
+
+        Returns
+        -------
+        Any
+            The invoked
+        """
+        return await self.invoke_async(data)
+
+
+class InvokerFactory:
+    """Factory class for Invoker"""
+
+    _renderers: Dict[str, Invoker] = {}
+    _parsers: Dict[str, Invoker] = {}
+    _executors: Dict[str, Invoker] = {}
+    _processors: Dict[str, Invoker] = {}
+
+    @classmethod
+    def add_renderer(cls, name: str, invoker: Invoker) -> None:
+        cls._renderers[name] = invoker
+
+    @classmethod
+    def add_parser(cls, name: str, invoker: Invoker) -> None:
+        cls._parsers[name] = invoker
+
+    @classmethod
+    def add_executor(cls, name: str, invoker: Invoker) -> None:
+        cls._executors[name] = invoker
+
+    @classmethod
+    def add_processor(cls, name: str, invoker: Invoker) -> None:
+        cls._processors[name] = invoker
+
+    @classmethod
+    def register_renderer(cls, name: str) -> Callable:
+        def inner_wrapper(wrapped_class: Invoker) -> Callable:
+            cls._renderers[name] = wrapped_class
+            return wrapped_class  # type: ignore
+
+        return inner_wrapper
+
+    @classmethod
+    def register_parser(cls, name: str) -> Callable:
+        def inner_wrapper(wrapped_class: Invoker) -> Callable:
+            cls._parsers[name] = wrapped_class
+            return wrapped_class  # type: ignore
+
+        return inner_wrapper
+
+    @classmethod
+    def register_executor(cls, name: str) -> Callable:
+        def inner_wrapper(wrapped_class: Invoker) -> Callable:
+            cls._executors[name] = wrapped_class
+            return wrapped_class  # type: ignore
+
+        return inner_wrapper
+
+    @classmethod
+    def register_processor(cls, name: str) -> Callable:
+        def inner_wrapper(wrapped_class: Invoker) -> Callable:
+            cls._processors[name] = wrapped_class
+            return wrapped_class  # type: ignore
+
+        return inner_wrapper
+
+    @classmethod
+    def _get_name(
+        cls,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        prompty: Prompty,
+    ) -> str:
+        if type == "renderer":
+            return prompty.template.type
+        elif type == "parser":
+            return f"{prompty.template.parser}.{prompty.model.api}"
+        elif type == "executor":
+            return prompty.model.configuration["type"]
+        elif type == "processor":
+            return prompty.model.configuration["type"]
+        else:
+            raise ValueError(f"Type {type} not found")
+
+    @classmethod
+    def _get_invoker(
+        cls,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        prompty: Prompty,
+    ) -> Invoker:
+        if type == "renderer":
+            name = prompty.template.type
+            if name not in cls._renderers:
+                raise ValueError(f"Renderer {name} not found")
+
+            return cls._renderers[name](prompty)  # type: ignore
+
+        elif type == "parser":
+            name = f"{prompty.template.parser}.{prompty.model.api}"
+            if name not in cls._parsers:
+                raise ValueError(f"Parser {name} not found")
+
+            return cls._parsers[name](prompty)  # type: ignore
+
+        elif type == "executor":
+            name = prompty.model.configuration["type"]
+            if name not in cls._executors:
+                raise ValueError(f"Executor {name} not found")
+
+            return cls._executors[name](prompty)  # type: ignore
+
+        elif type == "processor":
+            name = prompty.model.configuration["type"]
+            if name not in cls._processors:
+                raise ValueError(f"Processor {name} not found")
+
+            return cls._processors[name](prompty)  # type: ignore
+
+        else:
+            raise ValueError(f"Type {type} not found")
+
+    @classmethod
+    def run(
+        cls,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        prompty: Prompty,
+        data: Any,
+        default: Any = None,
+    ):
+        name = cls._get_name(type, prompty)
+        if name.startswith("NOOP") and default is not None:
+            return default
+        elif name.startswith("NOOP"):
+            return data
+
+        invoker = cls._get_invoker(type, prompty)
+        value = invoker.run(data)
+        return value
+
+    @classmethod
+    async def run_async(
+        cls,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        prompty: Prompty,
+        data: Any,
+        default: Any = None,
+    ):
+        name = cls._get_name(type, prompty)
+        if name.startswith("NOOP") and default is not None:
+            return default
+        elif name.startswith("NOOP"):
+            return data
+        invoker = cls._get_invoker(type, prompty)
+        value = await invoker.run_async(data)
+        return value
+
+    @classmethod
+    def run_renderer(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return cls.run("renderer", prompty, data, default)
+
+    @classmethod
+    async def run_renderer_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return await cls.run_async("renderer", prompty, data, default)
+
+    @classmethod
+    def run_parser(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return cls.run("parser", prompty, data, default)
+
+    @classmethod
+    async def run_parser_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return await cls.run_async("parser", prompty, data, default)
+
+    @classmethod
+    def run_executor(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return cls.run("executor", prompty, data, default)
+
+    @classmethod
+    async def run_executor_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return await cls.run_async("executor", prompty, data, default)
+
+    @classmethod
+    def run_processor(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return cls.run("processor", prompty, data, default)
+
+    @classmethod
+    async def run_processor_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return await cls.run_async("processor", prompty, data, default)
+
+
+class InvokerException(Exception):
+    """Exception class for Invoker"""
+
+    def __init__(self, message: str, type: str) -> None:
+        super().__init__(message)
+        self.type = type
+
+    def __str__(self) -> str:
+        return f"{super().__str__()}. Make sure to pip install any necessary package extras (i.e. could be something like `pip install prompty[{self.type}]`) for {self.type} as well as import the appropriate invokers (i.e. could be something like `import prompty.{self.type}`)."
+
+
+@InvokerFactory.register_renderer("NOOP")
+@InvokerFactory.register_parser("NOOP")
+@InvokerFactory.register_executor("NOOP")
+@InvokerFactory.register_processor("NOOP")
+@InvokerFactory.register_parser("prompty.embedding")
+@InvokerFactory.register_parser("prompty.image")
+@InvokerFactory.register_parser("prompty.completion")
+class NoOp(Invoker):
+    def invoke(self, data: Any) -> Any:
+        return data
+
+    async def invoke_async(self, data: str) -> Any:
+        return self.invoke(data)
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
index a807951fda43..f7a0c21d8bb8 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
@@ -2,26 +2,21 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-"""
-Adapted from https://github.com/langchain-ai/langchain and https://github.com/noahmorrison/chevron
-MIT License
-"""
-
+# pylint: disable=line-too-long,R,consider-using-dict-items,docstring-missing-return,docstring-missing-rtype,docstring-missing-param,global-statement,unused-argument,global-variable-not-assigned,protected-access,logging-fstring-interpolation,deprecated-method
 from __future__ import annotations
-
 import logging
-from collections.abc import Iterator, Mapping, Sequence
+from collections.abc import Iterator, Sequence
 from types import MappingProxyType
 from typing import (
     Any,
     Dict,
     List,
     Literal,
+    Mapping,
     Optional,
     Union,
     cast,
 )
-
 from typing_extensions import TypeAlias
 
 logger = logging.getLogger(__name__)
@@ -196,9 +191,7 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s
 #
 
 
-def tokenize(
-    template: str, def_ldel: str = "{{", def_rdel: str = "}}"
-) -> Iterator[tuple[str, str]]:
+def tokenize(template: str, def_ldel: str = "{{", def_rdel: str = "}}") -> Iterator[tuple[str, str]]:
     """Tokenize a mustache template.
 
     Tokenizes a mustache template in a generator fashion,
@@ -281,11 +274,7 @@ def tokenize(
             try:
                 last_section = open_sections.pop()
             except IndexError as e:
-                msg = (
-                    f'Trying to close tag "{tag_key}"\n'
-                    "Looks like it was not opened.\n"
-                    f"line {_CURRENT_LINE + 1}"
-                )
+                msg = f'Trying to close tag "{tag_key}"\n' "Looks like it was not opened.\n" f"line {_CURRENT_LINE + 1}"
                 raise ChevronError(msg) from e
             if tag_key != last_section:
                 # Otherwise we need to complain
@@ -527,9 +516,7 @@ def render(
         # If we're a variable tag
         elif tag == "variable":
             # Add the html escaped key to the output
-            thing = _get_key(
-                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
-            )
+            thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
             if thing is True and key == ".":
                 # if we've coerced into a boolean by accident
                 # (inverted tags do this)
@@ -542,9 +529,7 @@ def render(
         # If we're a no html escape tag
         elif tag == "no escape":
             # Just lookup the key and add it
-            thing = _get_key(
-                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
-            )
+            thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
             if not isinstance(thing, str):
                 thing = str(thing)
             output += thing
@@ -552,9 +537,7 @@ def render(
         # If we're a section tag
         elif tag == "section":
             # Get the sections scope
-            scope = _get_key(
-                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
-            )
+            scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
 
             # If the scope is a callable (as described in
             # https://mustache.github.io/mustache.5.html)
@@ -606,7 +589,7 @@ def render(
                     ),
                 )
 
-                output += rend # type: ignore[reportOperatorIssue]
+                output += rend  # type: ignore[reportOperatorIssue]
 
             # If the scope is a sequence, an iterator or generator but not
             # derived from a string
@@ -646,14 +629,12 @@ def render(
 
             else:
                 # Otherwise we're just a scope section
-                scopes.insert(0, scope) # type: ignore[reportArgumentType]
+                scopes.insert(0, scope)  # type: ignore[reportArgumentType]
 
         # If we're an inverted section
         elif tag == "inverted section":
             # Add the flipped scope to the scopes
-            scope = _get_key(
-                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
-            )
+            scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
             scopes.insert(0, cast(Literal[False], not scope))
 
         # If we're a partial
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
index fc2e0a5e0cfa..0e92e84667cd 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
@@ -2,49 +2,38 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-"""
-Adapted from https://github.com/langchain-ai/langchain
-MIT License
-"""
-import base64
+# mypy: disable-error-code="union-attr,return-value"
+# pylint: disable=line-too-long,R,consider-using-enumerate,docstring-missing-param,docstring-missing-return,docstring-missing-rtype
 import re
-from typing import Dict, List, Type, Union
-from pydantic import BaseModel
-from azure.ai.inference.models import (
-    ChatRequestMessage,
-    SystemMessage,
-    UserMessage,
-    AssistantMessage,
-    ToolMessage,
-)
-from ._core import Invoker, Prompty, SimpleModel
-
-
-class RoleMap:
-    _ROLE_MAP: Dict[str, Type[ChatRequestMessage]] = {
-        "system": SystemMessage,
-        "user": UserMessage,
-        "human": UserMessage,
-        "assistant": AssistantMessage,
-        "ai": AssistantMessage,
-        "function": ToolMessage,
-    }
-    ROLES = _ROLE_MAP.keys()
-
-    @classmethod
-    def get_message_class(cls, role: str) -> Type[ChatRequestMessage]:
-        return cls._ROLE_MAP[role]
+import base64
+from pathlib import Path
+from typing import Any
+from ._core import Prompty
+from ._invoker import Invoker, InvokerFactory
 
 
+@InvokerFactory.register_parser("prompty.chat")
 class PromptyChatParser(Invoker):
-    """Parse a chat prompt into a list of messages."""
+    """Prompty Chat Parser"""
 
     def __init__(self, prompty: Prompty) -> None:
-        self.prompty = prompty
-        self.roles = RoleMap.ROLES
-        self.path = self.prompty.file.parent
+        super().__init__(prompty)
+        self.roles = ["assistant", "function", "system", "user"]
+        self.path = Path(self.prompty.file).parent
 
     def inline_image(self, image_item: str) -> str:
+        """Inline Image
+
+        Parameters
+        ----------
+        image_item : str
+            The image item to inline
+
+        Returns
+        -------
+        str
+            The inlined image
+        """
         # pass through if it's a url or base64 encoded
         if image_item.startswith("http") or image_item.startswith("data"):
             return image_item
@@ -62,12 +51,22 @@ def inline_image(self, image_item: str) -> str:
                 return f"data:image/jpeg;base64,{base64_image}"
             else:
                 raise ValueError(
-                    f"Invalid image format {image_path.suffix} - currently only .png "
-                    "and .jpg / .jpeg are supported."
+                    f"Invalid image format {image_path.suffix} - currently only .png and .jpg / .jpeg are supported."
                 )
 
-    def parse_content(self, content: str) -> Union[str, List]:
-        """for parsing inline images"""
+    def parse_content(self, content: str):
+        """for parsing inline images
+
+        Parameters
+        ----------
+        content : str
+            The content to parse
+
+        Returns
+        -------
+        any
+            The parsed content
+        """
         # regular expression to parse markdown images
         image = r"(?P<alt>!\[[^\]]*\])\((?P<filename>.*?)(?=\"|\))\)"
         matches = re.findall(image, content, flags=re.MULTILINE)
@@ -77,50 +76,45 @@ def parse_content(self, content: str) -> Union[str, List]:
             current_chunk = 0
             for i in range(len(content_chunks)):
                 # image entry
-                if (
-                    current_chunk < len(matches)
-                    and content_chunks[i] == matches[current_chunk][0]
-                ):
+                if current_chunk < len(matches) and content_chunks[i] == matches[current_chunk][0]:
                     content_items.append(
                         {
                             "type": "image_url",
-                            "image_url": {
-                                "url": self.inline_image(
-                                    matches[current_chunk][1].split(" ")[0].strip()
-                                )
-                            },
+                            "image_url": {"url": self.inline_image(matches[current_chunk][1].split(" ")[0].strip())},
                         }
                     )
                 # second part of image entry
-                elif (
-                    current_chunk < len(matches)
-                    and content_chunks[i] == matches[current_chunk][1]
-                ):
+                elif current_chunk < len(matches) and content_chunks[i] == matches[current_chunk][1]:
                     current_chunk += 1
                 # text entry
                 else:
                     if len(content_chunks[i].strip()) > 0:
-                        content_items.append(
-                            {"type": "text", "text": content_chunks[i].strip()}
-                        )
+                        content_items.append({"type": "text", "text": content_chunks[i].strip()})
             return content_items
         else:
             return content
 
-    def invoke(self, data: BaseModel) -> BaseModel:
-        assert isinstance(data, SimpleModel)
+    def invoke(self, data: str) -> Any:
+        """Invoke the Prompty Chat Parser
+
+        Parameters
+        ----------
+        data : str
+            The data to parse
+
+        Returns
+        -------
+        str
+            The parsed data
+        """
         messages = []
         separator = r"(?i)^\s*#?\s*(" + "|".join(self.roles) + r")\s*:\s*\n"
 
         # get valid chunks - remove empty items
-        chunks = [
-            item
-            for item in re.split(separator, data.item, flags=re.MULTILINE)
-            if len(item.strip()) > 0
-        ]
+        chunks = [item for item in re.split(separator, data, flags=re.MULTILINE) if len(item.strip()) > 0]
 
         # if no starter role, then inject system role
-        if chunks[0].strip().lower() not in self.roles:
+        if not chunks[0].strip().lower() in self.roles:
             chunks.insert(0, "system")
 
         # if last chunk is role entry, then remove (no content?)
@@ -136,4 +130,19 @@ def invoke(self, data: BaseModel) -> BaseModel:
             content = chunks[i + 1].strip()
             messages.append({"role": role, "content": self.parse_content(content)})
 
-        return SimpleModel[list](item=messages)
+        return messages
+
+    async def invoke_async(self, data: str) -> Any:
+        """Invoke the Prompty Chat Parser (Async)
+
+        Parameters
+        ----------
+        data : str
+            The data to parse
+
+        Returns
+        -------
+        str
+            The parsed data
+        """
+        return self.invoke(data)
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index f05d1a2b5f7c..3a6ebf0d1872 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -2,16 +2,19 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
+# pylint: disable=line-too-long,R
 """Customize generated code here.
 
 Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
 """
 
+import traceback
+from pathlib import Path
 from typing import Any, Dict, List, Optional
 from typing_extensions import Self
 from ._core import Prompty
-from ._utils import load, prepare
 from ._mustache import render
+from ._prompty_utils import load, prepare
 
 
 class PromptTemplate:
@@ -28,13 +31,19 @@ def from_prompty(cls, file_path: str) -> Self:
         """
         if not file_path:
             raise ValueError("Please provide file_path")
-        prompty = load(file_path)
+
+        # Get the absolute path of the file by `traceback.extract_stack()`, it's "-2" because:
+        #  In the stack, the last function is the current function.
+        #  The second last function is the caller function, which is the root of the file_path.
+        stack = traceback.extract_stack()
+        caller = Path(stack[-2].filename)
+        abs_file_path = Path(caller.parent / Path(file_path)).resolve().absolute()
+
+        prompty = load(str(abs_file_path))
         return cls(prompty=prompty)
 
     @classmethod
-    def from_string(
-        cls, prompt_template: str, api: str = "chat", model_name: Optional[str] = None
-    ) -> Self:
+    def from_string(cls, prompt_template: str, api: str = "chat", model_name: Optional[str] = None) -> Self:
         """Initialize a PromptTemplate object from a message template.
 
         :param prompt_template: The prompt template string.
@@ -81,9 +90,7 @@ def __init__(
         else:
             raise ValueError("Please invalid arguments for PromptTemplate")
 
-    def create_messages(
-        self, data: Optional[Dict[str, Any]] = None, **kwargs
-    ) -> List[Dict[str, Any]]:
+    def create_messages(self, data: Optional[Dict[str, Any]] = None, **kwargs) -> List[Dict[str, Any]]:
         """Render the prompt template with the given data.
 
         :param data: The data to render the prompt template with.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
new file mode 100644
index 000000000000..a921137482ac
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
@@ -0,0 +1,580 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="assignment"
+# pylint: disable=R,docstring-missing-param,docstring-missing-return,docstring-missing-rtype,dangerous-default-value,redefined-outer-name,unused-wildcard-import,wildcard-import,raise-missing-from
+import traceback
+from pathlib import Path
+from typing import Any, Dict, List, Union
+from ._tracer import trace
+from ._invoker import InvokerFactory
+from ._core import (
+    ModelSettings,
+    Prompty,
+    PropertySettings,
+    TemplateSettings,
+    param_hoisting,
+)
+from ._utils import (
+    load_global_config,
+    load_global_config_async,
+    load_prompty_async,
+    load_prompty,
+)
+
+from ._renderers import *
+from ._parsers import *
+
+
+@trace(description="Create a headless prompty object for programmatic use.")
+def headless(
+    api: str,
+    content: Union[str, List[str], dict],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    connection: str = "default",
+) -> Prompty:
+    """Create a headless prompty object for programmatic use.
+
+    Parameters
+    ----------
+    api : str
+        The API to use for the model
+    content : Union[str, List[str], dict]
+        The content to process
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    connection : str, optional
+        The connection to use, by default "default"
+
+    Returns
+    -------
+    Prompty
+        The headless prompty object
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.headless(
+            api="embedding",
+            configuration={"type": "azure", "azure_deployment": "text-embedding-ada-002"},
+            content="hello world",
+        )
+    >>> emb = prompty.execute(p)
+
+    """
+
+    # get caller's path (to get relative path for prompty.json)
+    caller = Path(traceback.extract_stack()[-2].filename)
+    templateSettings = TemplateSettings(type="NOOP", parser="NOOP")
+    modelSettings = ModelSettings(
+        api=api,
+        configuration=Prompty.normalize(
+            param_hoisting(configuration, load_global_config(caller.parent, connection)),
+            caller.parent,
+        ),
+        parameters=parameters,
+    )
+
+    return Prompty(model=modelSettings, template=templateSettings, content=content)
+
+
+@trace(description="Create a headless prompty object for programmatic use.")
+async def headless_async(
+    api: str,
+    content: Union[str, List[str], dict],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    connection: str = "default",
+) -> Prompty:
+    """Create a headless prompty object for programmatic use.
+
+    Parameters
+    ----------
+    api : str
+        The API to use for the model
+    content : Union[str, List[str], dict]
+        The content to process
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    connection : str, optional
+        The connection to use, by default "default"
+
+    Returns
+    -------
+    Prompty
+        The headless prompty object
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = await prompty.headless_async(
+            api="embedding",
+            configuration={"type": "azure", "azure_deployment": "text-embedding-ada-002"},
+            content="hello world",
+        )
+    >>> emb = prompty.execute(p)
+
+    """
+
+    # get caller's path (to get relative path for prompty.json)
+    caller = Path(traceback.extract_stack()[-2].filename)
+    templateSettings = TemplateSettings(type="NOOP", parser="NOOP")
+
+    global_config = await load_global_config_async(caller.parent, connection)
+    c = await Prompty.normalize_async(param_hoisting(configuration, global_config), caller.parent)
+
+    modelSettings = ModelSettings(
+        api=api,
+        configuration=c,
+        parameters=parameters,
+    )
+
+    return Prompty(model=modelSettings, template=templateSettings, content=content)
+
+
+def _load_raw_prompty(attributes: dict, content: str, p: Path, global_config: dict):
+    if "model" not in attributes:
+        attributes["model"] = {}
+
+    if "configuration" not in attributes["model"]:
+        attributes["model"]["configuration"] = global_config
+    else:
+        attributes["model"]["configuration"] = param_hoisting(
+            attributes["model"]["configuration"],
+            global_config,
+        )
+
+    # pull model settings out of attributes
+    try:
+        model = ModelSettings(**attributes.pop("model"))
+    except Exception as e:
+        raise ValueError(f"Error in model settings: {e}")
+
+    # pull template settings
+    try:
+        if "template" in attributes:
+            t = attributes.pop("template")
+            if isinstance(t, dict):
+                template = TemplateSettings(**t)
+            # has to be a string denoting the type
+            else:
+                template = TemplateSettings(type=t, parser="prompty")
+        else:
+            template = TemplateSettings(type="mustache", parser="prompty")
+    except Exception as e:
+        raise ValueError(f"Error in template loader: {e}")
+
+    # formalize inputs and outputs
+    if "inputs" in attributes:
+        try:
+            inputs = {k: PropertySettings(**v) for (k, v) in attributes.pop("inputs").items()}
+        except Exception as e:
+            raise ValueError(f"Error in inputs: {e}")
+    else:
+        inputs = {}
+    if "outputs" in attributes:
+        try:
+            outputs = {k: PropertySettings(**v) for (k, v) in attributes.pop("outputs").items()}
+        except Exception as e:
+            raise ValueError(f"Error in outputs: {e}")
+    else:
+        outputs = {}
+
+    prompty = Prompty(
+        **attributes,
+        model=model,
+        inputs=inputs,
+        outputs=outputs,
+        template=template,
+        content=content,
+        file=p,
+    )
+
+    return prompty
+
+
+@trace(description="Load a prompty file.")
+def load(prompty_file: Union[str, Path], configuration: str = "default") -> Prompty:
+    """Load a prompty file.
+
+    Parameters
+    ----------
+    prompty_file : str
+        The path to the prompty file
+    configuration : str, optional
+        The configuration to use, by default "default"
+
+    Returns
+    -------
+    Prompty
+        The loaded prompty object
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> print(p)
+    """
+
+    p = Path(prompty_file)
+    if not p.is_absolute():
+        # get caller's path (take into account trace frame)
+        caller = Path(traceback.extract_stack()[-3].filename)
+        p = Path(caller.parent / p).resolve().absolute()
+
+    # load dictionary from prompty file
+    matter = load_prompty(p)
+
+    attributes = matter["attributes"]
+    content = matter["body"]
+
+    # normalize attribute dictionary resolve keys and files
+    attributes = Prompty.normalize(attributes, p.parent)
+
+    # load global configuration
+    global_config = Prompty.normalize(load_global_config(p.parent, configuration), p.parent)
+
+    prompty = _load_raw_prompty(attributes, content, p, global_config)
+
+    # recursive loading of base prompty
+    if "base" in attributes:
+        # load the base prompty from the same directory as the current prompty
+        base = load(p.parent / attributes["base"])
+        prompty = Prompty.hoist_base_prompty(prompty, base)
+
+    return prompty
+
+
+@trace(description="Load a prompty file.")
+async def load_async(prompty_file: Union[str, Path], configuration: str = "default") -> Prompty:
+    """Load a prompty file.
+
+    Parameters
+    ----------
+    prompty_file : str
+        The path to the prompty file
+    configuration : str, optional
+        The configuration to use, by default "default"
+
+    Returns
+    -------
+    Prompty
+        The loaded prompty object
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> print(p)
+    """
+
+    p = Path(prompty_file)
+    if not p.is_absolute():
+        # get caller's path (take into account trace frame)
+        caller = Path(traceback.extract_stack()[-3].filename)
+        p = Path(caller.parent / p).resolve().absolute()
+
+    # load dictionary from prompty file
+    matter = await load_prompty_async(p)
+
+    attributes = matter["attributes"]
+    content = matter["body"]
+
+    # normalize attribute dictionary resolve keys and files
+    attributes = await Prompty.normalize_async(attributes, p.parent)
+
+    # load global configuration
+    config = await load_global_config_async(p.parent, configuration)
+    global_config = await Prompty.normalize_async(config, p.parent)
+
+    prompty = _load_raw_prompty(attributes, content, p, global_config)
+
+    # recursive loading of base prompty
+    if "base" in attributes:
+        # load the base prompty from the same directory as the current prompty
+        base = await load_async(p.parent / attributes["base"])
+        prompty = Prompty.hoist_base_prompty(prompty, base)
+
+    return prompty
+
+
+@trace(description="Prepare the inputs for the prompt.")
+def prepare(
+    prompt: Prompty,
+    inputs: Dict[str, Any] = {},
+):
+    """Prepare the inputs for the prompt.
+
+    Parameters
+    ----------
+    prompt : Prompty
+        The prompty object
+    inputs : Dict[str, Any], optional
+        The inputs to the prompt, by default {}
+
+    Returns
+    -------
+    dict
+        The prepared and hidrated template shaped to the LLM model
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> inputs = {"name": "John Doe"}
+    >>> content = prompty.prepare(p, inputs)
+    """
+    inputs = param_hoisting(inputs, prompt.sample)
+
+    render = InvokerFactory.run_renderer(prompt, inputs, prompt.content)
+    result = InvokerFactory.run_parser(prompt, render)
+
+    return result
+
+
+@trace(description="Prepare the inputs for the prompt.")
+async def prepare_async(
+    prompt: Prompty,
+    inputs: Dict[str, Any] = {},
+):
+    """Prepare the inputs for the prompt.
+
+    Parameters
+    ----------
+    prompt : Prompty
+        The prompty object
+    inputs : Dict[str, Any], optional
+        The inputs to the prompt, by default {}
+
+    Returns
+    -------
+    dict
+        The prepared and hidrated template shaped to the LLM model
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> inputs = {"name": "John Doe"}
+    >>> content = await prompty.prepare_async(p, inputs)
+    """
+    inputs = param_hoisting(inputs, prompt.sample)
+
+    render = await InvokerFactory.run_renderer_async(prompt, inputs, prompt.content)
+    result = await InvokerFactory.run_parser_async(prompt, render)
+
+    return result
+
+
+@trace(description="Run the prepared Prompty content against the model.")
+def run(
+    prompt: Prompty,
+    content: Union[dict, list, str],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    raw: bool = False,
+):
+    """Run the prepared Prompty content.
+
+    Parameters
+    ----------
+    prompt : Prompty
+        The prompty object
+    content : Union[dict, list, str]
+        The content to process
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    raw : bool, optional
+        Whether to skip processing, by default False
+
+    Returns
+    -------
+    Any
+        The result of the prompt
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> inputs = {"name": "John Doe"}
+    >>> content = prompty.prepare(p, inputs)
+    >>> result = prompty.run(p, content)
+    """
+
+    if configuration != {}:
+        prompt.model.configuration = param_hoisting(configuration, prompt.model.configuration)
+
+    if parameters != {}:
+        prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters)
+
+    result = InvokerFactory.run_executor(prompt, content)
+    if not raw:
+        result = InvokerFactory.run_processor(prompt, result)
+
+    return result
+
+
+@trace(description="Run the prepared Prompty content against the model.")
+async def run_async(
+    prompt: Prompty,
+    content: Union[dict, list, str],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    raw: bool = False,
+):
+    """Run the prepared Prompty content.
+
+    Parameters
+    ----------
+    prompt : Prompty
+        The prompty object
+    content : Union[dict, list, str]
+        The content to process
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    raw : bool, optional
+        Whether to skip processing, by default False
+
+    Returns
+    -------
+    Any
+        The result of the prompt
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> inputs = {"name": "John Doe"}
+    >>> content = await prompty.prepare_async(p, inputs)
+    >>> result = await prompty.run_async(p, content)
+    """
+
+    if configuration != {}:
+        prompt.model.configuration = param_hoisting(configuration, prompt.model.configuration)
+
+    if parameters != {}:
+        prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters)
+
+    result = await InvokerFactory.run_executor_async(prompt, content)
+    if not raw:
+        result = await InvokerFactory.run_processor_async(prompt, result)
+
+    return result
+
+
+@trace(description="Execute a prompty")
+def execute(
+    prompt: Union[str, Prompty],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    inputs: Dict[str, Any] = {},
+    raw: bool = False,
+    config_name: str = "default",
+):
+    """Execute a prompty.
+
+    Parameters
+    ----------
+    prompt : Union[str, Prompty]
+        The prompty object or path to the prompty file
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    inputs : Dict[str, Any], optional
+        The inputs to the prompt, by default {}
+    raw : bool, optional
+        Whether to skip processing, by default False
+    connection : str, optional
+        The connection to use, by default "default"
+
+    Returns
+    -------
+    Any
+        The result of the prompt
+
+    Example
+    -------
+    >>> import prompty
+    >>> inputs = {"name": "John Doe"}
+    >>> result = prompty.execute("prompts/basic.prompty", inputs=inputs)
+    """
+    if isinstance(prompt, str):
+        path = Path(prompt)
+        if not path.is_absolute():
+            # get caller's path (take into account trace frame)
+            caller = Path(traceback.extract_stack()[-3].filename)
+            path = Path(caller.parent / path).resolve().absolute()
+        prompt = load(path, config_name)
+
+    # prepare content
+    content = prepare(prompt, inputs)
+
+    # run LLM model
+    result = run(prompt, content, configuration, parameters, raw)
+
+    return result
+
+
+@trace(description="Execute a prompty")
+async def execute_async(
+    prompt: Union[str, Prompty],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    inputs: Dict[str, Any] = {},
+    raw: bool = False,
+    config_name: str = "default",
+):
+    """Execute a prompty.
+
+    Parameters
+    ----------
+    prompt : Union[str, Prompty]
+        The prompty object or path to the prompty file
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    inputs : Dict[str, Any], optional
+        The inputs to the prompt, by default {}
+    raw : bool, optional
+        Whether to skip processing, by default False
+    connection : str, optional
+        The connection to use, by default "default"
+
+    Returns
+    -------
+    Any
+        The result of the prompt
+
+    Example
+    -------
+    >>> import prompty
+    >>> inputs = {"name": "John Doe"}
+    >>> result = await prompty.execute_async("prompts/basic.prompty", inputs=inputs)
+    """
+    if isinstance(prompt, str):
+        path = Path(prompt)
+        if not path.is_absolute():
+            # get caller's path (take into account trace frame)
+            caller = Path(traceback.extract_stack()[-3].filename)
+            path = Path(caller.parent / path).resolve().absolute()
+        prompt = await load_async(path, config_name)
+
+    # prepare content
+    content = await prepare_async(prompt, inputs)
+
+    # run LLM model
+    result = await run_async(prompt, content, configuration, parameters, raw)
+
+    return result
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
index 31af7d10538e..0d682a7fe151 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
@@ -2,23 +2,29 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-"""
-Adapted from https://github.com/langchain-ai/langchain
-MIT License
-"""
-from pydantic import BaseModel
+# mypy: disable-error-code="union-attr,assignment,arg-type"
+from pathlib import Path
+from ._core import Prompty
+from ._invoker import Invoker, InvokerFactory
 from ._mustache import render
 
-from ._core import Invoker, Prompty, SimpleModel
-
 
+@InvokerFactory.register_renderer("mustache")
 class MustacheRenderer(Invoker):
     """Render a mustache template."""
 
     def __init__(self, prompty: Prompty) -> None:
-        self.prompty = prompty
+        super().__init__(prompty)
+        self.templates = {}
+        cur_prompt = self.prompty
+        while cur_prompt:
+            self.templates[Path(cur_prompt.file).name] = cur_prompt.content
+            cur_prompt = cur_prompt.basePrompty
+        self.name = Path(self.prompty.file).name
+
+    def invoke(self, data: str) -> str:
+        generated = render(self.prompty.content, data)  # type: ignore
+        return generated
 
-    def invoke(self, data: BaseModel) -> BaseModel:
-        assert isinstance(data, SimpleModel)
-        generated = render(self.prompty.content, data.item)
-        return SimpleModel[str](item=generated)
+    async def invoke_async(self, data: str) -> str:
+        return self.invoke(data)
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py
new file mode 100644
index 000000000000..24f800b465f4
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py
@@ -0,0 +1,316 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="union-attr,arg-type,misc,return-value,assignment,func-returns-value"
+# pylint: disable=R,redefined-outer-name,bare-except,unspecified-encoding
+import os
+import json
+import inspect
+import traceback
+import importlib
+import contextlib
+from pathlib import Path
+from numbers import Number
+from datetime import datetime
+from functools import wraps, partial
+from typing import Any, Callable, Dict, Iterator, List, Union
+
+
+# clean up key value pairs for sensitive values
+def sanitize(key: str, value: Any) -> Any:
+    if isinstance(value, str) and any([s in key.lower() for s in ["key", "token", "secret", "password", "credential"]]):
+        return len(str(value)) * "*"
+
+    if isinstance(value, dict):
+        return {k: sanitize(k, v) for k, v in value.items()}
+
+    return value
+
+
+class Tracer:
+    _tracers: Dict[str, Callable[[str], Iterator[Callable[[str, Any], None]]]] = {}
+
+    @classmethod
+    def add(cls, name: str, tracer: Callable[[str], Iterator[Callable[[str, Any], None]]]) -> None:
+        cls._tracers[name] = tracer
+
+    @classmethod
+    def clear(cls) -> None:
+        cls._tracers = {}
+
+    @classmethod
+    @contextlib.contextmanager
+    def start(cls, name: str) -> Iterator[Callable[[str, Any], None]]:
+        with contextlib.ExitStack() as stack:
+            traces: List[Any] = [stack.enter_context(tracer(name)) for tracer in cls._tracers.values()]  # type: ignore
+            yield lambda key, value: [  # type: ignore
+                # normalize and sanitize any trace values
+                trace(key, sanitize(key, to_dict(value)))
+                for trace in traces
+            ]
+
+
+def to_dict(obj: Any) -> Union[Dict[str, Any], List[Dict[str, Any]], str, Number, bool]:
+    # simple json types
+    if isinstance(obj, str) or isinstance(obj, Number) or isinstance(obj, bool):
+        return obj
+
+    # datetime
+    if isinstance(obj, datetime):
+        return obj.isoformat()
+
+    # safe Prompty obj serialization
+    if type(obj).__name__ == "Prompty":
+        return obj.to_safe_dict()
+
+    # safe PromptyStream obj serialization
+    if type(obj).__name__ == "PromptyStream":
+        return "PromptyStream"
+
+    if type(obj).__name__ == "AsyncPromptyStream":
+        return "AsyncPromptyStream"
+
+    # recursive list and dict
+    if isinstance(obj, List):
+        return [to_dict(item) for item in obj]  # type: ignore
+
+    if isinstance(obj, Dict):
+        return {k: v if isinstance(v, str) else to_dict(v) for k, v in obj.items()}
+
+    if isinstance(obj, Path):
+        return str(obj)
+
+    # cast to string otherwise...
+    return str(obj)
+
+
+def _name(func: Callable, args):
+    if hasattr(func, "__qualname__"):
+        signature = f"{func.__module__}.{func.__qualname__}"
+    else:
+        signature = f"{func.__module__}.{func.__name__}"
+
+    # core invoker gets special treatment prompty.invoker.Invoker
+    core_invoker = signature.startswith("prompty.invoker.Invoker.run")
+    if core_invoker:
+        name = type(args[0]).__name__
+        if signature.endswith("async"):
+            signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke_async"
+        else:
+            signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke"
+    else:
+        name = func.__name__
+
+    return name, signature
+
+
+def _inputs(func: Callable, args, kwargs) -> dict:
+    ba = inspect.signature(func).bind(*args, **kwargs)
+    ba.apply_defaults()
+
+    inputs = {k: to_dict(v) for k, v in ba.arguments.items() if k != "self"}
+
+    return inputs
+
+
+def _results(result: Any) -> Union[Dict, List[Dict], str, Number, bool]:
+    return to_dict(result) if result is not None else "None"
+
+
+def _trace_sync(func: Union[Callable, None] = None, **okwargs: Any) -> Callable:
+
+    @wraps(func)  # type: ignore
+    def wrapper(*args, **kwargs):
+        name, signature = _name(func, args)  # type: ignore
+        with Tracer.start(name) as trace:
+            trace("signature", signature)
+
+            # support arbitrary keyword
+            # arguments for trace decorator
+            for k, v in okwargs.items():
+                trace(k, to_dict(v))
+
+            inputs = _inputs(func, args, kwargs)  # type: ignore
+            trace("inputs", inputs)
+
+            try:
+                result = func(*args, **kwargs)  # type: ignore
+                trace("result", _results(result))
+            except Exception as e:
+                trace(
+                    "result",
+                    {
+                        "exception": {
+                            "type": type(e),
+                            "traceback": (traceback.format_tb(tb=e.__traceback__) if e.__traceback__ else None),
+                            "message": str(e),
+                            "args": to_dict(e.args),
+                        }
+                    },
+                )
+                raise e
+
+            return result
+
+    return wrapper
+
+
+def _trace_async(func: Union[Callable, None] = None, **okwargs: Any) -> Callable:
+
+    @wraps(func)  # type: ignore
+    async def wrapper(*args, **kwargs):
+        name, signature = _name(func, args)  # type: ignore
+        with Tracer.start(name) as trace:
+            trace("signature", signature)
+
+            # support arbitrary keyword
+            # arguments for trace decorator
+            for k, v in okwargs.items():
+                trace(k, to_dict(v))
+
+            inputs = _inputs(func, args, kwargs)  # type: ignore
+            trace("inputs", inputs)
+            try:
+                result = await func(*args, **kwargs)  # type: ignore
+                trace("result", _results(result))
+            except Exception as e:
+                trace(
+                    "result",
+                    {
+                        "exception": {
+                            "type": type(e),
+                            "traceback": (traceback.format_tb(tb=e.__traceback__) if e.__traceback__ else None),
+                            "message": str(e),
+                            "args": to_dict(e.args),
+                        }
+                    },
+                )
+                raise e
+
+            return result
+
+    return wrapper
+
+
+def trace(func: Union[Callable, None] = None, **kwargs: Any) -> Callable:
+    if func is None:
+        return partial(trace, **kwargs)
+    wrapped_method = _trace_async if inspect.iscoroutinefunction(func) else _trace_sync
+    return wrapped_method(func, **kwargs)
+
+
+class PromptyTracer:
+    def __init__(self, output_dir: Union[str, None] = None) -> None:
+        if output_dir:
+            self.output = Path(output_dir).resolve().absolute()
+        else:
+            self.output = Path(Path(os.getcwd()) / ".runs").resolve().absolute()
+
+        if not self.output.exists():
+            self.output.mkdir(parents=True, exist_ok=True)
+
+        self.stack: List[Dict[str, Any]] = []
+
+    @contextlib.contextmanager
+    def tracer(self, name: str) -> Iterator[Callable[[str, Any], None]]:
+        try:
+            self.stack.append({"name": name})
+            frame = self.stack[-1]
+            frame["__time"] = {
+                "start": datetime.now(),
+            }
+
+            def add(key: str, value: Any) -> None:
+                if key not in frame:
+                    frame[key] = value
+                # multiple values creates list
+                else:
+                    if isinstance(frame[key], list):
+                        frame[key].append(value)
+                    else:
+                        frame[key] = [frame[key], value]
+
+            yield add
+        finally:
+            frame = self.stack.pop()
+            start: datetime = frame["__time"]["start"]
+            end: datetime = datetime.now()
+
+            # add duration to frame
+            frame["__time"] = {
+                "start": start.strftime("%Y-%m-%dT%H:%M:%S.%f"),
+                "end": end.strftime("%Y-%m-%dT%H:%M:%S.%f"),
+                "duration": int((end - start).total_seconds() * 1000),
+            }
+
+            # hoist usage to parent frame
+            if "result" in frame and isinstance(frame["result"], dict):
+                if "usage" in frame["result"]:
+                    frame["__usage"] = self.hoist_item(
+                        frame["result"]["usage"],
+                        frame["__usage"] if "__usage" in frame else {},
+                    )
+
+            # streamed results may have usage as well
+            if "result" in frame and isinstance(frame["result"], list):
+                for result in frame["result"]:
+                    if isinstance(result, dict) and "usage" in result and isinstance(result["usage"], dict):
+                        frame["__usage"] = self.hoist_item(
+                            result["usage"],
+                            frame["__usage"] if "__usage" in frame else {},
+                        )
+
+            # add any usage frames from below
+            if "__frames" in frame:
+                for child in frame["__frames"]:
+                    if "__usage" in child:
+                        frame["__usage"] = self.hoist_item(
+                            child["__usage"],
+                            frame["__usage"] if "__usage" in frame else {},
+                        )
+
+            # if stack is empty, dump the frame
+            if len(self.stack) == 0:
+                self.write_trace(frame)
+            # otherwise, append the frame to the parent
+            else:
+                if "__frames" not in self.stack[-1]:
+                    self.stack[-1]["__frames"] = []
+                self.stack[-1]["__frames"].append(frame)
+
+    def hoist_item(self, src: Dict[str, Any], cur: Dict[str, Any]) -> Dict[str, Any]:
+        for key, value in src.items():
+            if value is None or isinstance(value, list) or isinstance(value, dict):
+                continue
+            try:
+                if key not in cur:
+                    cur[key] = value
+                else:
+                    cur[key] += value
+            except:
+                continue
+
+        return cur
+
+    def write_trace(self, frame: Dict[str, Any]) -> None:
+        trace_file = self.output / f"{frame['name']}.{datetime.now().strftime('%Y%m%d.%H%M%S')}.tracy"
+
+        v = importlib.metadata.version("prompty")  # type: ignore
+        enriched_frame = {
+            "runtime": "python",
+            "version": v,
+            "trace": frame,
+        }
+
+        with open(trace_file, "w") as f:
+            json.dump(enriched_frame, f, indent=4)
+
+
+@contextlib.contextmanager
+def console_tracer(name: str) -> Iterator[Callable[[str, Any], None]]:
+    try:
+        print(f"Starting {name}")
+        yield lambda key, value: print(f"{key}:\n{json.dumps(to_dict(value), indent=4)}")
+    finally:
+        print(f"Ending {name}")
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
index bc96c0cb9990..ec04be0c73f4 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
@@ -2,259 +2,106 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 # ------------------------------------
-"""
-Adapted from https://github.com/langchain-ai/langchain
-MIT License
-"""
-import traceback
+# mypy: disable-error-code="import-untyped,return-value"
+# pylint: disable=line-too-long,R,wrong-import-order,global-variable-not-assigned)
+import re
+import yaml
+import json
+import aiofiles
+from typing import Any, Dict, Union
 from pathlib import Path
-from typing import Any, Dict, List, Union
 
-from ._core import (
-    Frontmatter,
-    InvokerFactory,
-    ModelSettings,
-    Prompty,
-    PropertySettings,
-    SimpleModel,
-    TemplateSettings,
-    param_hoisting,
+_yaml_regex = re.compile(
+    r"^\s*" + r"(?:---|\+\+\+)" + r"(.*?)" + r"(?:---|\+\+\+)" + r"\s*(.+)$",
+    re.S | re.M,
 )
 
 
-def load(prompt_path: str, configuration: str = "default") -> Prompty:
-    """Load a prompty file and return a Prompty object.
+def load_text(file_path, encoding="utf-8"):
+    with open(file_path, "r", encoding=encoding) as file:
+        return file.read()
 
-    Args:
-        prompt_path: The path to the prompty file.
-        configuration: The configuration to use. Defaults to "default".
 
-    Returns:
-        The Prompty object.
-    """
-    file_path = Path(prompt_path)
-    if not file_path.is_absolute():
-        # get caller's path (take into account trace frame)
-        caller = Path(traceback.extract_stack()[-3].filename)
-        file_path = Path(caller.parent / file_path).resolve().absolute()
+async def load_text_async(file_path, encoding="utf-8"):
+    async with aiofiles.open(file_path, mode="r", encoding=encoding) as f:
+        content = await f.read()
+        return content
 
-    # load dictionary from prompty file
-    matter = Frontmatter.read_file(file_path.__fspath__())
-    attributes = matter["attributes"]
-    content = matter["body"]
 
-    # normalize attribute dictionary resolve keys and files
-    attributes = Prompty.normalize(attributes, file_path.parent)
+def load_json(file_path, encoding="utf-8"):
+    return json.loads(load_text(file_path, encoding=encoding))
 
-    # load global configuration
-    if "model" not in attributes:
-        attributes["model"] = {"api": "chat"}
 
-    # pull model settings out of attributes
-    try:
-        model = ModelSettings(**attributes.pop("model"))
-    except Exception as e:
-        raise ValueError(f"Error in model settings: {e}")
+async def load_json_async(file_path, encoding="utf-8"):
+    # async file open
+    content = await load_text_async(file_path, encoding=encoding)
+    return json.loads(content)
 
-    # pull template settings
-    try:
-        if "template" in attributes:
-            t = attributes.pop("template")
-            if isinstance(t, dict):
-                template = TemplateSettings(**t)
-            # has to be a string denoting the type
-            else:
-                template = TemplateSettings(type=t, parser="prompty")
-        else:
-            template = TemplateSettings(type="mustache", parser="prompty")
-    except Exception as e:
-        raise ValueError(f"Error in template loader: {e}")
 
-    # formalize inputs and outputs
-    if "inputs" in attributes:
-        try:
-            inputs = {
-                k: PropertySettings(**v) for (k, v) in attributes.pop("inputs").items()
-            }
-        except Exception as e:
-            raise ValueError(f"Error in inputs: {e}")
-    else:
-        inputs = {}
-    if "outputs" in attributes:
-        try:
-            outputs = {
-                k: PropertySettings(**v) for (k, v) in attributes.pop("outputs").items()
-            }
-        except Exception as e:
-            raise ValueError(f"Error in outputs: {e}")
-    else:
-        outputs = {}
+def _find_global_config(prompty_path: Path = Path.cwd()) -> Union[Path, None]:
+    prompty_config = list(Path.cwd().glob("**/prompty.json"))
 
-    # recursive loading of base prompty
-    if "base" in attributes:
-        # load the base prompty from the same directory as the current prompty
-        base = load(file_path.parent / attributes["base"])
-        # hoist the base prompty's attributes to the current prompty
-        model.api = base.model.api if model.api == "" else model.api
-        model.configuration = param_hoisting(
-            model.configuration, base.model.configuration
-        )
-        model.parameters = param_hoisting(model.parameters, base.model.parameters)
-        model.response = param_hoisting(model.response, base.model.response)
-        attributes["sample"] = param_hoisting(attributes, base.sample, "sample")
-
-        p = Prompty(
-            **attributes,
-            model=model,
-            inputs=inputs,
-            outputs=outputs,
-            template=template,
-            content=content,
-            file=file_path,
-            basePrompty=base,
-        )
+    if len(prompty_config) > 0:
+        return sorted(
+            [c for c in prompty_config if len(c.parent.parts) <= len(prompty_path.parts)],
+            key=lambda p: len(p.parts),
+        )[-1]
     else:
-        p = Prompty(
-            **attributes,
-            model=model,
-            inputs=inputs,
-            outputs=outputs,
-            template=template,
-            content=content,
-            file=file_path,
-        )
-    return p
-
+        return None
 
-def prepare(
-    prompt: Prompty,
-    inputs: Dict[str, Any] = {},
-) -> Any:
-    """Prepare the inputs for the prompty.
 
-    Args:
-        prompt: The Prompty object.
-        inputs: The inputs to the prompty. Defaults to {}.
+def load_global_config(prompty_path: Path = Path.cwd(), configuration: str = "default") -> Dict[str, Any]:
+    # prompty.config laying around?
+    config = _find_global_config(prompty_path)
 
-    Returns:
-        The prepared inputs.
-    """
-    invoker = InvokerFactory()
-
-    inputs = param_hoisting(inputs, prompt.sample)
-
-    if prompt.template.type == "NOOP":
-        render = prompt.content
-    else:
-        # render
-        result = invoker(
-            "renderer",
-            prompt.template.type,
-            prompt,
-            SimpleModel(item=inputs),
-        )
-        render = result.item
-
-    if prompt.template.parser == "NOOP":
-        result = render
-    else:
-        # parse
-        result = invoker(
-            "parser",
-            f"{prompt.template.parser}.{prompt.model.api}",
-            prompt,
-            SimpleModel(item=result.item), # type: ignore[reportPossiblyUnboundVariable]
-        )
-
-    if isinstance(result, SimpleModel):
-        return result.item
-    else:
-        return result
-
-
-def run(
-    prompt: Prompty,
-    content: Union[Dict, List, str],
-    configuration: Dict[str, Any] = {},
-    parameters: Dict[str, Any] = {},
-    raw: bool = False,
-) -> Any:
-    """Run the prompty.
-
-    Args:
-        prompt: The Prompty object.
-        content: The content to run the prompty on.
-        configuration: The configuration to use. Defaults to {}.
-        parameters: The parameters to use. Defaults to {}.
-        raw: Whether to return the raw output. Defaults to False.
-
-    Returns:
-        The result of running the prompty.
-    """
-    invoker = InvokerFactory()
+    # if there is one load it
+    if config is not None:
+        c = load_json(config)
+        if configuration in c:
+            return c[configuration]
+        else:
+            raise ValueError(f'Item "{configuration}" not found in "{config}"')
 
-    if configuration != {}:
-        prompt.model.configuration = param_hoisting(
-            configuration, prompt.model.configuration
-        )
+    return {}
 
-    if parameters != {}:
-        prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters)
 
-    # execute
-    result = invoker(
-        "executor",
-        prompt.model.configuration["type"],
-        prompt,
-        SimpleModel(item=content),
-    )
+async def load_global_config_async(prompty_path: Path = Path.cwd(), configuration: str = "default") -> Dict[str, Any]:
+    # prompty.config laying around?
+    config = _find_global_config(prompty_path)
 
-    # skip?
-    if not raw:
-        # process
-        result = invoker(
-            "processor",
-            prompt.model.configuration["type"],
-            prompt,
-            result,
-        )
+    # if there is one load it
+    if config is not None:
+        c = await load_json_async(config)
+        if configuration in c:
+            return c[configuration]
+        else:
+            raise ValueError(f'Item "{configuration}" not found in "{config}"')
 
-    if isinstance(result, SimpleModel):
-        return result.item
-    else:
-        return result
+    return {}
 
 
-def execute(
-    prompt: Union[str, Prompty],
-    configuration: Dict[str, Any] = {},
-    parameters: Dict[str, Any] = {},
-    inputs: Dict[str, Any] = {},
-    raw: bool = False,
-    connection: str = "default",
-) -> Any:
-    """Execute a prompty.
+def load_prompty(file_path, encoding="utf-8") -> Dict[str, Any]:
+    contents = load_text(file_path, encoding=encoding)
+    return parse(contents)
 
-    Args:
-        prompt: The prompt to execute.
-            Can be a path to a prompty file or a Prompty object.
-        configuration: The configuration to use. Defaults to {}.
-        parameters: The parameters to use. Defaults to {}.
-        inputs: The inputs to the prompty. Defaults to {}.
-        raw: Whether to return the raw output. Defaults to False.
-        connection: The connection to use. Defaults to "default".
 
-    Returns:
-        The result of executing the prompty.
-    """
+async def load_prompty_async(file_path, encoding="utf-8"):
+    contents = await load_text_async(file_path, encoding=encoding)
+    return parse(contents)
 
-    if isinstance(prompt, str):
-        prompt = load(prompt, connection)
 
-    # prepare content
-    content = prepare(prompt, inputs)
+def parse(contents):
+    global _yaml_regex
 
-    # run LLM model
-    result = run(prompt, content, configuration, parameters, raw)
+    fmatter = ""
+    body = ""
+    result = _yaml_regex.search(contents)
 
-    return result
+    if result:
+        fmatter = result.group(1)
+        body = result.group(2)
+    return {
+        "attributes": yaml.load(fmatter, Loader=yaml.SafeLoader),
+        "body": body,
+        "frontmatter": fmatter,
+    }
diff --git a/sdk/ai/azure-ai-inference/cspell.json b/sdk/ai/azure-ai-inference/cspell.json
index 587698f9c2b1..a6eee0bd7318 100644
--- a/sdk/ai/azure-ai-inference/cspell.json
+++ b/sdk/ai/azure-ai-inference/cspell.json
@@ -1,9 +1,12 @@
 {
     "ignoreWords": [
+        "aiofiles",
         "dels",
         "fmatter",
         "fspath",
+        "fstring",
         "ldel",
+        "okwargs",
         "prompty",
         "rdel"
     ]
diff --git a/sdk/ai/azure-ai-inference/dev_requirements.txt b/sdk/ai/azure-ai-inference/dev_requirements.txt
index 9c82a165e327..6df9573fbc63 100644
--- a/sdk/ai/azure-ai-inference/dev_requirements.txt
+++ b/sdk/ai/azure-ai-inference/dev_requirements.txt
@@ -3,4 +3,8 @@
 ../../core/azure-core-tracing-opentelemetry
 ../../monitor/azure-monitor-opentelemetry
 aiohttp
-opentelemetry-sdk
\ No newline at end of file
+opentelemetry-sdk
+aiofiles
+dataclasses
+types-pyyaml
+types-aiofiles
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index 55de9e004102..29d02753c649 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -23,7 +23,7 @@
         `your-azure-region` is the Azure region where your model is deployed.
     2) AZURE_AI_CHAT_KEY - Your model key (a 32-character string). Keep it secret.
 """
-# mypy: disable-error-code="union-attr"
+# mypy: disable-error-code="union-attr,arg-type"
 # pyright: reportAttributeAccessIssue=false
 
 
@@ -41,7 +41,6 @@ def sample_chat_completions_from_input_prompt_string():
         print("Set them before running this sample.")
         exit()
 
-
     prompt_template_str = """
         system:
         You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
@@ -59,25 +58,22 @@ def sample_chat_completions_from_input_prompt_string():
         user:
         {{input}}
     """
-    prompt_template = PromptTemplate.from_string(
-        api = "chat",
-        prompt_template = prompt_template_str
-    )
+    prompt_template = PromptTemplate.from_string(api="chat", prompt_template=prompt_template_str)
 
-    query = "When I arrived, can I still have breakfast?"
+    input = "When I arrived, can I still have breakfast?"
     rules = [
-        { "rule": "The check-in time is 3pm" },
-        { "rule": "The check-out time is 11am" },
-        { "rule": "Breakfast is served from 7am to 10am" },
+        {"rule": "The check-in time is 3pm"},
+        {"rule": "The check-out time is 11am"},
+        {"rule": "Breakfast is served from 7am to 10am"},
     ]
     chat_history = [
-        { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
-        { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
+        {"role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?"},
+        {"role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM."},
     ]
-    messages = prompt_template.create_messages(query=query, rules=rules, chat_history=chat_history)
+    messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
 
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
-    response = client.complete(messages=messages) # type: ignore[reportCallIssue, reportArgumentType]
+    response = client.complete(messages=messages)  # type: ignore[reportCallIssue, reportArgumentType]
 
     print(response.choices[0].message.content)
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
index a1e71112ce61..bb5b671e894d 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
@@ -46,20 +46,17 @@ def sample_chat_completions_from_input_prompty():
 
     input = "When I arrived, can I still have breakfast?"
     rules = [
-        { "rule": "The check-in time is 3pm" },
-        { "rule": "The check-out time is 11am" },
-        { "rule": "Breakfast is served from 7am to 10am" },
+        {"rule": "The check-in time is 3pm"},
+        {"rule": "The check-out time is 11am"},
+        {"rule": "Breakfast is served from 7am to 10am"},
     ]
     chat_history = [
-        { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
-        { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
+        {"role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?"},
+        {"role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM."},
     ]
     messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
 
-    client = ChatCompletionsClient(
-        endpoint=endpoint,
-        credential=AzureKeyCredential(key)
-    )
+    client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
 
     response = client.complete(
         messages=messages,
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index 6b5a4d66a8e7..f6a2bea03eb4 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -66,8 +66,6 @@
         "isodate>=0.6.1",
         "azure-core>=1.30.0",
         "typing-extensions>=4.6.0",
-        "pydantic>=2.0.0",
-        "types-PyYAML",
     ],
     python_requires=">=3.8",
     extras_require={"opentelemetry": ["azure-core-tracing-opentelemetry"]},
diff --git a/sdk/ai/azure-ai-inference/tests/sample1_with_secrets.prompty b/sdk/ai/azure-ai-inference/tests/sample1_with_secrets.prompty
new file mode 100644
index 000000000000..8451c02b942e
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/tests/sample1_with_secrets.prompty
@@ -0,0 +1,34 @@
+---
+name: Basic Prompt
+description: A basic prompt that uses the GPT-3 chat API to answer questions
+authors:
+  - author_1
+  - author_2
+model:
+  api: chat
+  configuration:
+    azure_deployment: gpt-4o-mini
+    type: azure_openai
+    api_version: test_version
+    api_key: test_key
+    api_secret: test_secret
+  parameters:
+    temperature: 1
+    frequency_penalty: 0.5
+    presence_penalty: 0.5
+---
+system:
+You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
+
+# context
+{{#rules}}
+{{rule}}
+{{/rules}}
+
+{{#chat_history}}
+{{role}}:
+{{content}}
+{{/chat_history}}
+
+user:
+{{input}}
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index 64f41f9f6e92..b64969f29988 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -26,9 +26,9 @@ def test_prompt_template_from_prompty(self, **kwargs):
 
         input = "What's the check-in and check-out time?"
         rules = [
-            { "rule": "The check-in time is 3pm" },
-            { "rule": "The check-out time is 11am" },
-            { "rule": "Breakfast is served from 7am to 10am" },
+            {"rule": "The check-in time is 3pm"},
+            {"rule": "The check-out time is 11am"},
+            {"rule": "Breakfast is served from 7am to 10am"},
         ]
         messages = prompt_template.create_messages(input=input, rules=rules)
         assert len(messages) == 2
@@ -37,12 +37,19 @@ def test_prompt_template_from_prompty(self, **kwargs):
         assert messages[1]["role"] == "user"
         assert messages[1]["content"] == "What's the check-in and check-out time?"
 
+    def test_prompt_template_from_prompty_with_masked_secrets(self, **kwargs):
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        prompty_file_path = os.path.join(script_dir, "sample1_with_secrets.prompty")
+        prompt_template = PromptTemplate.from_prompty(prompty_file_path)
+        assert prompt_template.prompty.model.configuration["api_key"] == "test_key"
+        assert prompt_template.prompty.model.configuration["api_secret"] == "test_secret"
+        telemetry_dict = prompt_template.prompty.to_safe_dict()
+        assert telemetry_dict["model"]["configuration"]["api_key"] == "********"
+        assert telemetry_dict["model"]["configuration"]["api_secret"] == "***********"
+
     def test_prompt_template_from_message(self, **kwargs):
         prompt_template_str = "system prompt template text\nuser:\n{{input}}"
-        prompt_template = PromptTemplate.from_string(
-            api = "chat",
-            prompt_template = prompt_template_str
-        )
+        prompt_template = PromptTemplate.from_string(api="chat", prompt_template=prompt_template_str)
         input = "user question input text"
         messages = prompt_template.create_messages(input=input)
         assert len(messages) == 1
@@ -67,19 +74,16 @@ def test_prompt_template_from_message_with_tags(self, **kwargs):
             user:
             {{input}}
         """
-        prompt_template = PromptTemplate.from_string(
-            api = "chat",
-            prompt_template = prompt_template_str
-        )
+        prompt_template = PromptTemplate.from_string(api="chat", prompt_template=prompt_template_str)
         input = "When I arrived, can I still have breakfast?"
         rules = [
-            { "rule": "The check-in time is 3pm" },
-            { "rule": "The check-out time is 11am" },
-            { "rule": "Breakfast is served from 7am to 10am" },
+            {"rule": "The check-in time is 3pm"},
+            {"rule": "The check-out time is 11am"},
+            {"rule": "Breakfast is served from 7am to 10am"},
         ]
         chat_history = [
-            { "role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?" },
-            { "role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM." },
+            {"role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?"},
+            {"role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM."},
         ]
         messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
         assert len(messages) == 1

From 4b43b460168526a2d0efffeee35eed7e62c28cd5 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Mon, 4 Nov 2024 12:01:53 -0800
Subject: [PATCH 20/21] Fix PR comment

---
 sdk/ai/azure-ai-inference/tests/test_prompts.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index b64969f29988..0e001e282839 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -4,10 +4,9 @@
 # ------------------------------------
 import os
 from azure.ai.inference.prompts import PromptTemplate
-from devtools_testutils import AzureRecordedTestCase
 
 
-class TestPrompts(AzureRecordedTestCase):
+class TestPrompts:
 
     # **********************************************************************************
     #

From 633c84fae171c478ff7716d004e13424b3dd9476 Mon Sep 17 00:00:00 2001
From: David Wu <KyosukeNo1@gmail.com>
Date: Mon, 4 Nov 2024 23:55:18 -0800
Subject: [PATCH 21/21] Fix PR comments

---
 .vscode/cspell.json                           |  18 +-
 .../azure/ai/inference/prompts/_core.py       |  42 +----
 .../azure/ai/inference/prompts/_patch.py      |   2 +-
 .../ai/inference/prompts/_prompty_utils.py    | 167 +-----------------
 .../azure/ai/inference/prompts/_utils.py      |  33 ----
 sdk/ai/azure-ai-inference/cspell.json         |  13 --
 .../azure-ai-inference/dev_requirements.txt   |   3 -
 sdk/ai/azure-ai-inference/setup.py            |   6 +-
 .../azure-ai-inference/tests/test_prompts.py  |   5 +
 9 files changed, 22 insertions(+), 267 deletions(-)
 delete mode 100644 sdk/ai/azure-ai-inference/cspell.json

diff --git a/.vscode/cspell.json b/.vscode/cspell.json
index f1e127fcbc04..c604c50b847f 100644
--- a/.vscode/cspell.json
+++ b/.vscode/cspell.json
@@ -1323,12 +1323,20 @@
     {
       "filename": "sdk/ai/azure-ai-inference/**",
       "words": [
-        "ubinary",
-        "mros",
-        "Nify",
         "ctxt",
-        "wday",
-        "dtype"
+        "dels",
+        "dtype",
+        "fmatter",
+        "fspath",
+        "fstring",
+        "ldel",
+        "mros",
+        "nify",
+        "okwargs",
+        "prompty",
+        "rdel",
+        "ubinary",
+        "wday"
       ]
     },
     {
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
index 9d1baee3a033..ec6702995149 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Union
 from ._tracer import Tracer, to_dict
-from ._utils import load_json, load_json_async
+from ._utils import load_json
 
 
 @dataclass
@@ -185,20 +185,6 @@ def _process_file(file: str, parent: Path) -> Any:
         else:
             raise FileNotFoundError(f"File {file} not found")
 
-    @staticmethod
-    async def _process_file_async(file: str, parent: Path) -> Any:
-        file_path = Path(parent / Path(file)).resolve().absolute()
-        if file_path.exists():
-            items = await load_json_async(file_path)
-            if isinstance(items, list):
-                return [Prompty.normalize(value, parent) for value in items]
-            elif isinstance(items, Dict):
-                return {key: Prompty.normalize(value, parent) for key, value in items.items()}
-            else:
-                return items
-        else:
-            raise FileNotFoundError(f"File {file} not found")
-
     @staticmethod
     def _process_env(variable: str, env_error=True, default: Union[str, None] = None) -> Any:
         if variable in os.environ.keys():
@@ -237,32 +223,6 @@ def normalize(attribute: Any, parent: Path, env_error=True) -> Any:
         else:
             return attribute
 
-    @staticmethod
-    async def normalize_async(attribute: Any, parent: Path, env_error=True) -> Any:
-        if isinstance(attribute, str):
-            attribute = attribute.strip()
-            if attribute.startswith("${") and attribute.endswith("}"):
-                # check if env or file
-                variable = attribute[2:-1].split(":")
-                if variable[0] == "env" and len(variable) > 1:
-                    return Prompty._process_env(
-                        variable[1],
-                        env_error,
-                        variable[2] if len(variable) > 2 else None,
-                    )
-                elif variable[0] == "file" and len(variable) > 1:
-                    return await Prompty._process_file_async(variable[1], parent)
-                else:
-                    raise ValueError(f"Invalid attribute format ({attribute})")
-            else:
-                return attribute
-        elif isinstance(attribute, list):
-            return [await Prompty.normalize_async(value, parent) for value in attribute]
-        elif isinstance(attribute, Dict):
-            return {key: await Prompty.normalize_async(value, parent) for key, value in attribute.items()}
-        else:
-            return attribute
-
 
 def param_hoisting(top: Dict[str, Any], bottom: Dict[str, Any], top_key: Union[str, None] = None) -> Dict[str, Any]:
     if top_key:
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 3a6ebf0d1872..8689d1bd56b4 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -88,7 +88,7 @@ def __init__(
                 "prompt_template": prompt_template,
             }
         else:
-            raise ValueError("Please invalid arguments for PromptTemplate")
+            raise ValueError("Please pass valid arguments for PromptTemplate")
 
     def create_messages(self, data: Optional[Dict[str, Any]] = None, **kwargs) -> List[Dict[str, Any]]:
         """Render the prompt template with the given data.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
index a921137482ac..5ea38bda6229 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
@@ -18,8 +18,6 @@
 )
 from ._utils import (
     load_global_config,
-    load_global_config_async,
-    load_prompty_async,
     load_prompty,
 )
 
@@ -82,62 +80,6 @@ def headless(
     return Prompty(model=modelSettings, template=templateSettings, content=content)
 
 
-@trace(description="Create a headless prompty object for programmatic use.")
-async def headless_async(
-    api: str,
-    content: Union[str, List[str], dict],
-    configuration: Dict[str, Any] = {},
-    parameters: Dict[str, Any] = {},
-    connection: str = "default",
-) -> Prompty:
-    """Create a headless prompty object for programmatic use.
-
-    Parameters
-    ----------
-    api : str
-        The API to use for the model
-    content : Union[str, List[str], dict]
-        The content to process
-    configuration : Dict[str, Any], optional
-        The configuration to use, by default {}
-    parameters : Dict[str, Any], optional
-        The parameters to use, by default {}
-    connection : str, optional
-        The connection to use, by default "default"
-
-    Returns
-    -------
-    Prompty
-        The headless prompty object
-
-    Example
-    -------
-    >>> import prompty
-    >>> p = await prompty.headless_async(
-            api="embedding",
-            configuration={"type": "azure", "azure_deployment": "text-embedding-ada-002"},
-            content="hello world",
-        )
-    >>> emb = prompty.execute(p)
-
-    """
-
-    # get caller's path (to get relative path for prompty.json)
-    caller = Path(traceback.extract_stack()[-2].filename)
-    templateSettings = TemplateSettings(type="NOOP", parser="NOOP")
-
-    global_config = await load_global_config_async(caller.parent, connection)
-    c = await Prompty.normalize_async(param_hoisting(configuration, global_config), caller.parent)
-
-    modelSettings = ModelSettings(
-        api=api,
-        configuration=c,
-        parameters=parameters,
-    )
-
-    return Prompty(model=modelSettings, template=templateSettings, content=content)
-
-
 def _load_raw_prompty(attributes: dict, content: str, p: Path, global_config: dict):
     if "model" not in attributes:
         attributes["model"] = {}
@@ -205,7 +147,7 @@ def load(prompty_file: Union[str, Path], configuration: str = "default") -> Prom
 
     Parameters
     ----------
-    prompty_file : str
+    prompty_file : Union[str, Path]
         The path to the prompty file
     configuration : str, optional
         The configuration to use, by default "default"
@@ -251,59 +193,6 @@ def load(prompty_file: Union[str, Path], configuration: str = "default") -> Prom
     return prompty
 
 
-@trace(description="Load a prompty file.")
-async def load_async(prompty_file: Union[str, Path], configuration: str = "default") -> Prompty:
-    """Load a prompty file.
-
-    Parameters
-    ----------
-    prompty_file : str
-        The path to the prompty file
-    configuration : str, optional
-        The configuration to use, by default "default"
-
-    Returns
-    -------
-    Prompty
-        The loaded prompty object
-
-    Example
-    -------
-    >>> import prompty
-    >>> p = prompty.load("prompts/basic.prompty")
-    >>> print(p)
-    """
-
-    p = Path(prompty_file)
-    if not p.is_absolute():
-        # get caller's path (take into account trace frame)
-        caller = Path(traceback.extract_stack()[-3].filename)
-        p = Path(caller.parent / p).resolve().absolute()
-
-    # load dictionary from prompty file
-    matter = await load_prompty_async(p)
-
-    attributes = matter["attributes"]
-    content = matter["body"]
-
-    # normalize attribute dictionary resolve keys and files
-    attributes = await Prompty.normalize_async(attributes, p.parent)
-
-    # load global configuration
-    config = await load_global_config_async(p.parent, configuration)
-    global_config = await Prompty.normalize_async(config, p.parent)
-
-    prompty = _load_raw_prompty(attributes, content, p, global_config)
-
-    # recursive loading of base prompty
-    if "base" in attributes:
-        # load the base prompty from the same directory as the current prompty
-        base = await load_async(p.parent / attributes["base"])
-        prompty = Prompty.hoist_base_prompty(prompty, base)
-
-    return prompty
-
-
 @trace(description="Prepare the inputs for the prompt.")
 def prepare(
     prompt: Prompty,
@@ -524,57 +413,3 @@ def execute(
     result = run(prompt, content, configuration, parameters, raw)
 
     return result
-
-
-@trace(description="Execute a prompty")
-async def execute_async(
-    prompt: Union[str, Prompty],
-    configuration: Dict[str, Any] = {},
-    parameters: Dict[str, Any] = {},
-    inputs: Dict[str, Any] = {},
-    raw: bool = False,
-    config_name: str = "default",
-):
-    """Execute a prompty.
-
-    Parameters
-    ----------
-    prompt : Union[str, Prompty]
-        The prompty object or path to the prompty file
-    configuration : Dict[str, Any], optional
-        The configuration to use, by default {}
-    parameters : Dict[str, Any], optional
-        The parameters to use, by default {}
-    inputs : Dict[str, Any], optional
-        The inputs to the prompt, by default {}
-    raw : bool, optional
-        Whether to skip processing, by default False
-    connection : str, optional
-        The connection to use, by default "default"
-
-    Returns
-    -------
-    Any
-        The result of the prompt
-
-    Example
-    -------
-    >>> import prompty
-    >>> inputs = {"name": "John Doe"}
-    >>> result = await prompty.execute_async("prompts/basic.prompty", inputs=inputs)
-    """
-    if isinstance(prompt, str):
-        path = Path(prompt)
-        if not path.is_absolute():
-            # get caller's path (take into account trace frame)
-            caller = Path(traceback.extract_stack()[-3].filename)
-            path = Path(caller.parent / path).resolve().absolute()
-        prompt = await load_async(path, config_name)
-
-    # prepare content
-    content = await prepare_async(prompt, inputs)
-
-    # run LLM model
-    result = await run_async(prompt, content, configuration, parameters, raw)
-
-    return result
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
index ec04be0c73f4..45f0ac97a7e3 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
@@ -7,7 +7,6 @@
 import re
 import yaml
 import json
-import aiofiles
 from typing import Any, Dict, Union
 from pathlib import Path
 
@@ -22,22 +21,10 @@ def load_text(file_path, encoding="utf-8"):
         return file.read()
 
 
-async def load_text_async(file_path, encoding="utf-8"):
-    async with aiofiles.open(file_path, mode="r", encoding=encoding) as f:
-        content = await f.read()
-        return content
-
-
 def load_json(file_path, encoding="utf-8"):
     return json.loads(load_text(file_path, encoding=encoding))
 
 
-async def load_json_async(file_path, encoding="utf-8"):
-    # async file open
-    content = await load_text_async(file_path, encoding=encoding)
-    return json.loads(content)
-
-
 def _find_global_config(prompty_path: Path = Path.cwd()) -> Union[Path, None]:
     prompty_config = list(Path.cwd().glob("**/prompty.json"))
 
@@ -65,31 +52,11 @@ def load_global_config(prompty_path: Path = Path.cwd(), configuration: str = "de
     return {}
 
 
-async def load_global_config_async(prompty_path: Path = Path.cwd(), configuration: str = "default") -> Dict[str, Any]:
-    # prompty.config laying around?
-    config = _find_global_config(prompty_path)
-
-    # if there is one load it
-    if config is not None:
-        c = await load_json_async(config)
-        if configuration in c:
-            return c[configuration]
-        else:
-            raise ValueError(f'Item "{configuration}" not found in "{config}"')
-
-    return {}
-
-
 def load_prompty(file_path, encoding="utf-8") -> Dict[str, Any]:
     contents = load_text(file_path, encoding=encoding)
     return parse(contents)
 
 
-async def load_prompty_async(file_path, encoding="utf-8"):
-    contents = await load_text_async(file_path, encoding=encoding)
-    return parse(contents)
-
-
 def parse(contents):
     global _yaml_regex
 
diff --git a/sdk/ai/azure-ai-inference/cspell.json b/sdk/ai/azure-ai-inference/cspell.json
deleted file mode 100644
index a6eee0bd7318..000000000000
--- a/sdk/ai/azure-ai-inference/cspell.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-    "ignoreWords": [
-        "aiofiles",
-        "dels",
-        "fmatter",
-        "fspath",
-        "fstring",
-        "ldel",
-        "okwargs",
-        "prompty",
-        "rdel"
-    ]
-}
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-inference/dev_requirements.txt b/sdk/ai/azure-ai-inference/dev_requirements.txt
index 6df9573fbc63..b8f68ea98ffc 100644
--- a/sdk/ai/azure-ai-inference/dev_requirements.txt
+++ b/sdk/ai/azure-ai-inference/dev_requirements.txt
@@ -4,7 +4,4 @@
 ../../monitor/azure-monitor-opentelemetry
 aiohttp
 opentelemetry-sdk
-aiofiles
-dataclasses
 types-pyyaml
-types-aiofiles
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index f6a2bea03eb4..7e30f3716b8f 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -62,11 +62,7 @@
     package_data={
         "azure.ai.inference": ["py.typed"],
     },
-    install_requires=[
-        "isodate>=0.6.1",
-        "azure-core>=1.30.0",
-        "typing-extensions>=4.6.0",
-    ],
+    install_requires=["isodate>=0.6.1", "azure-core>=1.30.0", "typing-extensions>=4.6.0"],
     python_requires=">=3.8",
     extras_require={"opentelemetry": ["azure-core-tracing-opentelemetry"]},
 )
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index 0e001e282839..73c5341729fa 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -28,11 +28,16 @@ def test_prompt_template_from_prompty(self, **kwargs):
             {"rule": "The check-in time is 3pm"},
             {"rule": "The check-out time is 11am"},
             {"rule": "Breakfast is served from 7am to 10am"},
+            {"rule": 'The hotel website is https://www.myhotel.com?key1=param1&key2=param"2&key3=param<3>'},
         ]
         messages = prompt_template.create_messages(input=input, rules=rules)
         assert len(messages) == 2
         assert messages[0]["role"] == "system"
         assert "Breakfast is served from 7am to 10am" in messages[0]["content"]
+        assert (
+            "The hotel website is https://www.myhotel.com?key1=param1&amp;key2=param&quot;2&amp;key3=param&lt;3&gt;"
+            in messages[0]["content"]
+        )
         assert messages[1]["role"] == "user"
         assert messages[1]["content"] == "What's the check-in and check-out time?"