diff --git a/CHANGELOG.md b/CHANGELOG.md index 2cfc9ca9..7e7003a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Added support for Amazon Nova models (amazon.nova-micro-v1:0, amazon.nova-lite-v1:0, amazon.nova-pro-v1:0) +- Updated Anthropic Claude models to the latest 3.5 versions (anthropic.claude-3-5-haiku-20241022-v1:0, anthropic.claude-3-5-sonnet-20241022-v2:0) +- Removed older versions of Amazon Titan and Anthropic Claude models (amazon.titan-text-express-v1, anthropic.claude-v1, anthropic.claude-instant-v1, anthropic.claude-v2) +- Refactored Bedrock calls to use the Converse API eliminating the need for custom model specific payloads +- Refactored all model invocation to use Inference Profiles. This is required for Nova models. It is also applied to Anthropic models for consistency and improved scalability. +- Added adaptive retry configuration to Bedrock api calls to add some tolerance for quota throttling exceptions (at the expense of latency) + ## [0.7.11] - 2024-10-09 ### Added diff --git a/pca-main-nokendra.template b/pca-main-nokendra.template index 9a6e808d..93c52758 100644 --- a/pca-main-nokendra.template +++ b/pca-main-nokendra.template @@ -384,14 +384,17 @@ Parameters: GenAIQueryBedrockModelId: Type: String - Default: anthropic.claude-3-haiku-20240307-v1:0 + Default: us.amazon.nova-lite-v1:0 AllowedValues: - anthropic.claude-3-haiku-20240307-v1:0 - anthropic.claude-3-sonnet-20240229-v1:0 - - amazon.titan-text-express-v1 - - anthropic.claude-v1 - - anthropic.claude-instant-v1 - - anthropic.claude-v2 + - us.amazon.nova-micro-v1:0 + - us.amazon.nova-lite-v1:0 + - us.amazon.nova-pro-v1:0 + - us.anthropic.claude-3-5-haiku-20241022-v1:0 + - us.anthropic.claude-3-5-sonnet-20241022-v2:0 + - eu.anthropic.claude-3-5-sonnet-20240620-v1:0 + - apac.anthropic.claude-3-5-sonnet-20240620-v1:0 Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use. CallSummarization: @@ -417,14 +420,17 @@ Parameters: SummarizationBedrockModelId: Type: String - Default: anthropic.claude-3-haiku-20240307-v1:0 + Default: us.amazon.nova-lite-v1:0 AllowedValues: - anthropic.claude-3-haiku-20240307-v1:0 - anthropic.claude-3-sonnet-20240229-v1:0 - - amazon.titan-text-express-v1 - - anthropic.claude-v1 - - anthropic.claude-instant-v1 - - anthropic.claude-v2 + - us.amazon.nova-micro-v1:0 + - us.amazon.nova-lite-v1:0 + - us.amazon.nova-pro-v1:0 + - us.anthropic.claude-3-5-haiku-20241022-v1:0 + - us.anthropic.claude-3-5-sonnet-20241022-v2:0 + - eu.anthropic.claude-3-5-sonnet-20240620-v1:0 + - apac.anthropic.claude-3-5-sonnet-20240620-v1:0 Description: (Optional) If 'CallSummarization' is BEDROCK, which Bedrock model to use. TestBedrockModelId: @@ -714,6 +720,12 @@ Resources: Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:*:${AWS::AccountId}:custom-model/*" + - !Sub "arn:aws:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - Effect: Allow + Action: + - "bedrock:GetInferenceProfile" + Resource: + - !Sub "arn:aws:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" PolicyName: BedrockPolicy TestBedrockModelFunction: @@ -736,76 +748,38 @@ Resources: subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--target', '/tmp', 'boto3']) sys.path.insert(0,'/tmp') import boto3 - + from botocore.config import Config + # Defaults AWS_REGION = os.environ["AWS_REGION_OVERRIDE"] if "AWS_REGION_OVERRIDE" in os.environ else os.environ["AWS_REGION"] ENDPOINT_URL = os.environ.get("ENDPOINT_URL", f'https://bedrock-runtime.{AWS_REGION}.amazonaws.com') DEFAULT_MAX_TOKENS = 128 - def get_request_body(modelId, parameters, prompt): - provider = modelId.split(".")[0] - request_body = None - if provider == "anthropic": - if 'claude-3' in modelId: - request_body = { - "max_tokens": DEFAULT_MAX_TOKENS, - "messages": [{"role": "user", "content": prompt}], - "anthropic_version": "bedrock-2023-05-31" - } - else: - request_body = { - "prompt": prompt, - "max_tokens_to_sample": DEFAULT_MAX_TOKENS - } - - request_body.update(parameters) - elif provider == "ai21": - request_body = { - "prompt": prompt, - "maxTokens": DEFAULT_MAX_TOKENS + def get_generate_text(response): + return response["output"]["message"]["content"][0]["text"] + + def call_llm(prompt, modelId): + client = boto3.client( + service_name='bedrock-runtime', + region_name=AWS_REGION, + endpoint_url=ENDPOINT_URL, + config=Config(retries={'max_attempts': 50, 'mode': 'adaptive'}) + ) + + message = { + 'role': 'user', + 'content': [{'text': prompt}] + } + + response = client.converse( + modelId=modelId, + messages=[message], + inferenceConfig={ + 'maxTokens': DEFAULT_MAX_TOKENS, + 'temperature': 0 } - request_body.update(parameters) - elif provider == "amazon": - textGenerationConfig = { - "maxTokenCount": DEFAULT_MAX_TOKENS - } - textGenerationConfig.update(parameters) - request_body = { - "inputText": prompt, - "textGenerationConfig": textGenerationConfig - } - else: - raise Exception("Unsupported provider: ", provider) - return request_body - - def get_generate_text(modelId, response): - provider = modelId.split(".")[0] - generated_text = None - if provider == "anthropic": - if 'claude-3' in modelId: - response_raw = json.loads(response.get("body").read().decode()) - generated_text = response_raw.get('content')[0].get('text') - - else: - response_body = json.loads(response.get("body").read().decode()) - generated_text = response_body.get("completion") - elif provider == "ai21": - response_body = json.loads(response.get("body").read()) - generated_text = response_body.get("completions")[0].get("data").get("text") - elif provider == "amazon": - response_body = json.loads(response.get("body").read()) - generated_text = response_body.get("results")[0].get("outputText") - else: - raise Exception("Unsupported provider: ", provider) - return generated_text - - def call_llm(parameters, prompt): - modelId = parameters.pop("modelId") - body = get_request_body(modelId, parameters, prompt) - print("ModelId", modelId, "- Body: ", body) - client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=ENDPOINT_URL) - response = client.invoke_model(body=json.dumps(body), modelId=modelId, accept='application/json', contentType='application/json') - generated_text = get_generate_text(modelId, response) + ) + generated_text = get_generate_text(response) return generated_text def lambda_handler(event, context): @@ -821,12 +795,8 @@ Resources: # Test LLMModel llmModelId = event['ResourceProperties'].get('LLMModelId', '') modelId = llmModelId - parameters = { - "modelId": modelId, - "temperature": 0 - } print(f"Testing {modelId}") - call_llm(parameters, prompt) + call_llm(prompt, modelId) except Exception as e: status = cfnresponse.FAILED reason = f"Exception thrown testing ModelId='{modelId}'. Check that Amazon Bedrock is available in your region, and that model is activated in your Amazon Bedrock account - {e}" diff --git a/pca-main.template b/pca-main.template index 428e65af..05b2bb4b 100644 --- a/pca-main.template +++ b/pca-main.template @@ -425,14 +425,17 @@ Parameters: GenAIQueryBedrockModelId: Type: String - Default: anthropic.claude-3-haiku-20240307-v1:0 + Default: us.amazon.nova-lite-v1:0 AllowedValues: - anthropic.claude-3-haiku-20240307-v1:0 - anthropic.claude-3-sonnet-20240229-v1:0 - - amazon.titan-text-express-v1 - - anthropic.claude-v1 - - anthropic.claude-instant-v1 - - anthropic.claude-v2 + - us.amazon.nova-micro-v1:0 + - us.amazon.nova-lite-v1:0 + - us.amazon.nova-pro-v1:0 + - us.anthropic.claude-3-5-haiku-20241022-v1:0 + - us.anthropic.claude-3-5-sonnet-20241022-v2:0 + - eu.anthropic.claude-3-5-sonnet-20240620-v1:0 + - apac.anthropic.claude-3-5-sonnet-20240620-v1:0 Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use. CallSummarization: @@ -458,14 +461,17 @@ Parameters: SummarizationBedrockModelId: Type: String - Default: anthropic.claude-3-haiku-20240307-v1:0 + Default: us.amazon.nova-lite-v1:0 AllowedValues: - anthropic.claude-3-haiku-20240307-v1:0 - anthropic.claude-3-sonnet-20240229-v1:0 - - amazon.titan-text-express-v1 - - anthropic.claude-v1 - - anthropic.claude-instant-v1 - - anthropic.claude-v2 + - us.amazon.nova-micro-v1:0 + - us.amazon.nova-lite-v1:0 + - us.amazon.nova-pro-v1:0 + - us.anthropic.claude-3-5-haiku-20241022-v1:0 + - us.anthropic.claude-3-5-sonnet-20241022-v2:0 + - eu.anthropic.claude-3-5-sonnet-20240620-v1:0 + - apac.anthropic.claude-3-5-sonnet-20240620-v1:0 Description: (Optional) If 'CallSummarization' is BEDROCK, which Bedrock model to use. TestBedrockModelId: @@ -897,6 +903,12 @@ Resources: Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:*:${AWS::AccountId}:custom-model/*" + - !Sub "arn:aws:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - Effect: Allow + Action: + - "bedrock:GetInferenceProfile" + Resource: + - !Sub "arn:aws:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" PolicyName: BedrockPolicy TestBedrockModelFunction: @@ -919,76 +931,38 @@ Resources: subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--target', '/tmp', 'boto3']) sys.path.insert(0,'/tmp') import boto3 - + from botocore.config import Config + # Defaults AWS_REGION = os.environ["AWS_REGION_OVERRIDE"] if "AWS_REGION_OVERRIDE" in os.environ else os.environ["AWS_REGION"] ENDPOINT_URL = os.environ.get("ENDPOINT_URL", f'https://bedrock-runtime.{AWS_REGION}.amazonaws.com') DEFAULT_MAX_TOKENS = 128 - def get_request_body(modelId, parameters, prompt): - provider = modelId.split(".")[0] - request_body = None - if provider == "anthropic": - if 'claude-3' in modelId: - request_body = { - "max_tokens": DEFAULT_MAX_TOKENS, - "messages": [{"role": "user", "content": prompt}], - "anthropic_version": "bedrock-2023-05-31" - } - else: - request_body = { - "prompt": prompt, - "max_tokens_to_sample": DEFAULT_MAX_TOKENS - } - - request_body.update(parameters) - elif provider == "ai21": - request_body = { - "prompt": prompt, - "maxTokens": DEFAULT_MAX_TOKENS - } - request_body.update(parameters) - elif provider == "amazon": - textGenerationConfig = { - "maxTokenCount": DEFAULT_MAX_TOKENS + def get_generate_text(response): + return response["output"]["message"]["content"][0]["text"] + + def call_llm(prompt, modelId): + client = boto3.client( + service_name='bedrock-runtime', + region_name=AWS_REGION, + endpoint_url=ENDPOINT_URL, + config=Config(retries={'max_attempts': 50, 'mode': 'adaptive'}) + ) + + message = { + 'role': 'user', + 'content': [{'text': prompt}] + } + + response = client.converse( + modelId=modelId, + messages=[message], + inferenceConfig={ + 'maxTokens': DEFAULT_MAX_TOKENS, + 'temperature': 0 } - textGenerationConfig.update(parameters) - request_body = { - "inputText": prompt, - "textGenerationConfig": textGenerationConfig - } - else: - raise Exception("Unsupported provider: ", provider) - return request_body - - def get_generate_text(modelId, response): - provider = modelId.split(".")[0] - generated_text = None - if provider == "anthropic": - if 'claude-3' in modelId: - response_raw = json.loads(response.get("body").read().decode()) - generated_text = response_raw.get('content')[0].get('text') - - else: - response_body = json.loads(response.get("body").read().decode()) - generated_text = response_body.get("completion") - elif provider == "ai21": - response_body = json.loads(response.get("body").read()) - generated_text = response_body.get("completions")[0].get("data").get("text") - elif provider == "amazon": - response_body = json.loads(response.get("body").read()) - generated_text = response_body.get("results")[0].get("outputText") - else: - raise Exception("Unsupported provider: ", provider) - return generated_text - - def call_llm(parameters, prompt): - modelId = parameters.pop("modelId") - body = get_request_body(modelId, parameters, prompt) - print("ModelId", modelId, "- Body: ", body) - client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=ENDPOINT_URL) - response = client.invoke_model(body=json.dumps(body), modelId=modelId, accept='application/json', contentType='application/json') - generated_text = get_generate_text(modelId, response) + ) + generated_text = get_generate_text(response) return generated_text def lambda_handler(event, context): @@ -1004,12 +978,9 @@ Resources: # Test LLMModel llmModelId = event['ResourceProperties'].get('LLMModelId', '') modelId = llmModelId - parameters = { - "modelId": modelId, - "temperature": 0 - } + print(f"Testing {modelId}") - call_llm(parameters, prompt) + call_llm(prompt, modelId) except Exception as e: status = cfnresponse.FAILED reason = f"Exception thrown testing ModelId='{modelId}'. Check that Amazon Bedrock is available in your region, and that model is activated in your Amazon Bedrock account - {e}" diff --git a/pca-server/cfn/lib/pca.template b/pca-server/cfn/lib/pca.template index 37a21559..9e4075fa 100644 --- a/pca-server/cfn/lib/pca.template +++ b/pca-server/cfn/lib/pca.template @@ -322,6 +322,13 @@ Resources: Resource: - !Sub arn:${AWS::Partition}:bedrock:*::foundation-model/* - !Sub arn:${AWS::Partition}:bedrock:*:${AWS::AccountId}:custom-model/* + - !Sub arn:aws:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/* + - Sid: BedrockGetInferenceProfile + Effect: Allow + Action: + - bedrock:GetInferenceProfile + Resource: + - !Sub arn:aws:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/* - !If - HasAnthropicSummary - Sid: SecretsManagerPolicy diff --git a/pca-server/cfn/pca-server.template b/pca-server/cfn/pca-server.template index d73a0869..2bf16292 100644 --- a/pca-server/cfn/pca-server.template +++ b/pca-server/cfn/pca-server.template @@ -40,14 +40,17 @@ Parameters: SummarizationBedrockModelId: Type: String - Default: anthropic.claude-3-haiku-20240307-v1:0 + Default: us.amazon.nova-lite-v1:0 AllowedValues: - anthropic.claude-3-haiku-20240307-v1:0 - anthropic.claude-3-sonnet-20240229-v1:0 - - amazon.titan-text-express-v1 - - anthropic.claude-v1 - - anthropic.claude-instant-v1 - - anthropic.claude-v2 + - us.amazon.nova-micro-v1:0 + - us.amazon.nova-lite-v1:0 + - us.amazon.nova-pro-v1:0 + - us.anthropic.claude-3-5-haiku-20241022-v1:0 + - us.anthropic.claude-3-5-sonnet-20241022-v2:0 + - eu.anthropic.claude-3-5-sonnet-20240620-v1:0 + - apac.anthropic.claude-3-5-sonnet-20240620-v1:0 Description: (Optional) If 'CallSummarization' is BEDROCK, which Bedrock model to use. SummarizationSageMakerInitialInstanceCount: diff --git a/pca-server/src/pca/pca-aws-sf-summarize.py b/pca-server/src/pca/pca-aws-sf-summarize.py index a66d6c8c..92081df2 100644 --- a/pca-server/src/pca/pca-aws-sf-summarize.py +++ b/pca-server/src/pca/pca-aws-sf-summarize.py @@ -35,13 +35,6 @@ s3Client = boto3.client('s3') dynamodb_client = boto3.client('dynamodb') -config = Config( - retries = { - 'max_attempts': 100, - 'mode': 'adaptive' - } -) - def get_third_party_llm_secret(): print("Getting API key from Secrets Manager") secrets_client = boto3.client('secretsmanager') @@ -56,76 +49,43 @@ def get_third_party_llm_secret(): def get_bedrock_client(): print("Connecting to Bedrock Service: ", BEDROCK_ENDPOINT_URL) - client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=BEDROCK_ENDPOINT_URL, config=config) + client = boto3.client( + service_name='bedrock-runtime', + region_name=AWS_REGION, + endpoint_url=BEDROCK_ENDPOINT_URL, + config=Config(retries={'max_attempts': 50, 'mode': 'adaptive'}) + ) return client - -def get_bedrock_request_body(modelId, parameters, prompt): - provider = modelId.split(".")[0] - request_body = None - if provider == "anthropic": - if 'claude-3' in modelId: - request_body = { - "max_tokens": MAX_TOKENS, - "messages": [{"role": "user", "content": prompt}], - "anthropic_version": "bedrock-2023-05-31" - } - else: - request_body = { - "prompt": prompt, - "max_tokens_to_sample": MAX_TOKENS - } - request_body.update(parameters) - elif provider == "ai21": - request_body = { - "prompt": prompt, - "maxTokens": MAX_TOKENS - } - request_body.update(parameters) - elif provider == "amazon": - textGenerationConfig = { - "maxTokenCount": MAX_TOKENS - } - textGenerationConfig.update(parameters) - request_body = { - "inputText": prompt, - "textGenerationConfig": textGenerationConfig - } - else: - raise Exception("Unsupported provider: ", provider) - return request_body - -def get_bedrock_generate_text(modelId, response): - print("generating response with ", modelId) - provider = modelId.split(".")[0] - generated_text = None - if provider == "anthropic": - if 'claude-3' in modelId: - response_raw = json.loads(response.get("body").read().decode()) - generated_text = response_raw.get('content')[0].get('text') - - else: - response_body = json.loads(response.get("body").read().decode()) - generated_text = response_body.get("completion") - elif provider == "ai21": - response_body = json.loads(response.get("body").read()) - generated_text = response_body.get("completions")[0].get("data").get("text") - elif provider == "amazon": - response_body = json.loads(response.get("body").read()) - generated_text = response_body.get("results")[0].get("outputText") - else: - raise Exception("Unsupported provider: ", provider) + +def get_bedrock_generate_text(response): + generated_text = response["output"]["message"]["content"][0]["text"] generated_text = generated_text.replace('```','') return generated_text -def call_bedrock(parameters, prompt): +def call_bedrock(prompt, temperature, max_tokens): global bedrock_client modelId = BEDROCK_MODEL_ID - body = get_bedrock_request_body(modelId, parameters, prompt) - print("ModelId", modelId, "- Body: ", body) + + print(f"Bedrock request - ModelId: {modelId} Temperature: {temperature} Max Tokens: {max_tokens}") + if (bedrock_client is None): bedrock_client = get_bedrock_client() - response = bedrock_client.invoke_model(body=json.dumps(body), modelId=modelId, accept='application/json', contentType='application/json') - generated_text = get_bedrock_generate_text(modelId, response) + + message = { + "role": "user", + "content": [{"text": prompt}] + } + + response = bedrock_client.converse( + modelId=modelId, + messages=[message], + inferenceConfig={ + "maxTokens": max_tokens, + "temperature": temperature + } + ) + + generated_text = get_bedrock_generate_text(response) return generated_text def generate_sagemaker_summary(transcript): @@ -224,10 +184,7 @@ def generate_bedrock_summary(transcript, api_mode): continue else: prompt = prompt.replace("{transcript}", transcript) - parameters = { - "temperature": 0 - } - generated_text = call_bedrock(parameters, prompt) + generated_text = call_bedrock(prompt, 0, MAX_TOKENS) result[key] = generated_text if len(result.keys()) == 1: # This is a single node JSON with value that can be either: diff --git a/pca-server/src/pca/pcaconfiguration.py b/pca-server/src/pca/pcaconfiguration.py index 47134b2d..7b7b5cf8 100644 --- a/pca-server/src/pca/pcaconfiguration.py +++ b/pca-server/src/pca/pcaconfiguration.py @@ -7,54 +7,58 @@ SPDX-License-Identifier: Apache-2.0 """ import boto3 +import os from botocore.config import Config +# Get the stack name from environment variable +STACK_NAME = os.environ.get('STACK_NAME') + # Parameter Store Field Names used by main workflow -CONF_COMP_LANGS = "ComprehendLanguages" -CONF_REDACTION_LANGS = "ContentRedactionLanguages" -CONF_CONVO_LOCATION = "ConversationLocation" -CONF_ENTITYENDPOINT = "EntityRecognizerEndpoint" -CONF_ENTITY_FILE = "EntityStringMap" -CONF_ENTITYCONF = "EntityThreshold" -CONF_ENTITY_TYPES = "EntityTypes" -CONF_PREFIX_AUDIO_PLAYBACK = "InputBucketAudioPlayback" -CONF_S3BUCKET_INPUT = "InputBucketName" -CONF_PREFIX_RAW_AUDIO = "InputBucketRawAudio" -CONF_PREFIX_FAILED_AUDIO = "InputBucketFailedTranscriptions" -CONF_PREFIX_INPUT_TRANSCRIPTS = "InputBucketOrigTranscripts" -CONF_MAX_SPEAKERS = "MaxSpeakers" -CONF_MINNEGATIVE = "MinSentimentNegative" -CONF_MINPOSITIVE = "MinSentimentPositive" -CONF_S3BUCKET_OUTPUT = "OutputBucketName" -CONF_PREFIX_TRANSCRIBE_RESULTS = "OutputBucketTranscribeResults" -CONF_PREFIX_PARSED_RESULTS = "OutputBucketParsedResults" -CONF_SPEAKER_NAMES = "SpeakerNames" -CONF_SPEAKER_MODE = "SpeakerSeparationType" -COMP_SFN_NAME = "StepFunctionName" -CONF_SUPPORT_BUCKET = "SupportFilesBucketName" -CONF_TRANSCRIBE_LANG = "TranscribeLanguages" -CONF_TELEPHONY_CTR = "TelephonyCTRType" -CONF_TELEPHONY_CTR_SUFFIX = "TelephonyCTRFileSuffix" -CONF_VOCABNAME = "VocabularyName" -CONF_CLMNAME = "CustomLangModelName" -CONF_FILENAME_DATETIME_REGEX = "FilenameDatetimeRegex" -CONF_FILENAME_DATETIME_FIELDMAP = "FilenameDatetimeFieldMap" -CONF_FILENAME_GUID_REGEX = "FilenameGUIDRegex" -CONF_FILENAME_AGENT_REGEX = "FilenameAgentRegex" -CONF_FILENAME_CUST_REGEX = "FilenameCustRegex" -CONF_FILTER_MODE = "VocabFilterMode" -CONF_FILTER_NAME = "VocabFilterName" -CONF_KENDRA_INDEX_ID = "KendraIndexId" -CONF_WEB_URI = "WebUiUri" -CONF_TRANSCRIBE_API = "TranscribeApiMode" -CONF_REDACTION_TRANSCRIPT = "CallRedactionTranscript" -CONF_REDACTION_AUDIO = "CallRedactionAudio" -CONF_CALL_SUMMARIZATION = "CallSummarization" +CONF_COMP_LANGS = f"{STACK_NAME}-ComprehendLanguages" +CONF_REDACTION_LANGS = f"{STACK_NAME}-ContentRedactionLanguages" +CONF_CONVO_LOCATION = f"{STACK_NAME}-ConversationLocation" +CONF_ENTITYENDPOINT = f"{STACK_NAME}-EntityRecognizerEndpoint" +CONF_ENTITY_FILE = f"{STACK_NAME}-EntityStringMap" +CONF_ENTITYCONF = f"{STACK_NAME}-EntityThreshold" +CONF_ENTITY_TYPES = f"{STACK_NAME}-EntityTypes" +CONF_PREFIX_AUDIO_PLAYBACK = f"{STACK_NAME}-InputBucketAudioPlayback" +CONF_S3BUCKET_INPUT = f"{STACK_NAME}-InputBucketName" +CONF_PREFIX_RAW_AUDIO = f"{STACK_NAME}-InputBucketRawAudio" +CONF_PREFIX_FAILED_AUDIO = f"{STACK_NAME}-InputBucketFailedTranscriptions" +CONF_PREFIX_INPUT_TRANSCRIPTS = f"{STACK_NAME}-InputBucketOrigTranscripts" +CONF_MAX_SPEAKERS = f"{STACK_NAME}-MaxSpeakers" +CONF_MINNEGATIVE = f"{STACK_NAME}-MinSentimentNegative" +CONF_MINPOSITIVE = f"{STACK_NAME}-MinSentimentPositive" +CONF_S3BUCKET_OUTPUT = f"{STACK_NAME}-OutputBucketName" +CONF_PREFIX_TRANSCRIBE_RESULTS = f"{STACK_NAME}-OutputBucketTranscribeResults" +CONF_PREFIX_PARSED_RESULTS = f"{STACK_NAME}-OutputBucketParsedResults" +CONF_SPEAKER_NAMES = f"{STACK_NAME}-SpeakerNames" +CONF_SPEAKER_MODE = f"{STACK_NAME}-SpeakerSeparationType" +COMP_SFN_NAME = f"{STACK_NAME}-StepFunctionName" +CONF_SUPPORT_BUCKET = f"{STACK_NAME}-SupportFilesBucketName" +CONF_TRANSCRIBE_LANG = f"{STACK_NAME}-TranscribeLanguages" +CONF_TELEPHONY_CTR = f"{STACK_NAME}-TelephonyCTRType" +CONF_TELEPHONY_CTR_SUFFIX = f"{STACK_NAME}-TelephonyCTRFileSuffix" +CONF_VOCABNAME = f"{STACK_NAME}-VocabularyName" +CONF_CLMNAME = f"{STACK_NAME}-CustomLangModelName" +CONF_FILENAME_DATETIME_REGEX = f"{STACK_NAME}-FilenameDatetimeRegex" +CONF_FILENAME_DATETIME_FIELDMAP = f"{STACK_NAME}-FilenameDatetimeFieldMap" +CONF_FILENAME_GUID_REGEX = f"{STACK_NAME}-FilenameGUIDRegex" +CONF_FILENAME_AGENT_REGEX = f"{STACK_NAME}-FilenameAgentRegex" +CONF_FILENAME_CUST_REGEX = f"{STACK_NAME}-FilenameCustRegex" +CONF_FILTER_MODE = f"{STACK_NAME}-VocabFilterMode" +CONF_FILTER_NAME = f"{STACK_NAME}-VocabFilterName" +CONF_KENDRA_INDEX_ID = f"{STACK_NAME}-KendraIndexId" +CONF_WEB_URI = f"{STACK_NAME}-WebUiUri" +CONF_TRANSCRIBE_API = f"{STACK_NAME}-TranscribeApiMode" +CONF_REDACTION_TRANSCRIPT = f"{STACK_NAME}-CallRedactionTranscript" +CONF_REDACTION_AUDIO = f"{STACK_NAME}-CallRedactionAudio" +CONF_CALL_SUMMARIZATION = f"{STACK_NAME}-CallSummarization" # Parameter store fieldnames used by bulk import -BULK_S3_BUCKET = "BulkUploadBucket" -BULK_JOB_LIMIT = "BulkUploadMaxTranscribeJobs" -BULK_MAX_DRIP_RATE = "BulkUploadMaxDripRate" +BULK_S3_BUCKET = f"{STACK_NAME}-BulkUploadBucket" +BULK_JOB_LIMIT = f"{STACK_NAME}-BulkUploadMaxTranscribeJobs" +BULK_MAX_DRIP_RATE = f"{STACK_NAME}-BulkUploadMaxDripRate" # Transcribe API Modes API_STANDARD = "standard" diff --git a/pca-ui/cfn/lib/api.template b/pca-ui/cfn/lib/api.template index d30f42a1..2e357068 100644 --- a/pca-ui/cfn/lib/api.template +++ b/pca-ui/cfn/lib/api.template @@ -33,14 +33,17 @@ Parameters: GenAIQueryBedrockModelId: Type: String - Default: anthropic.claude-3-haiku-20240307-v1:0 + Default: us.amazon.nova-lite-v1:0 AllowedValues: - anthropic.claude-3-haiku-20240307-v1:0 - anthropic.claude-3-sonnet-20240229-v1:0 - - amazon.titan-text-express-v1 - - anthropic.claude-v1 - - anthropic.claude-instant-v1 - - anthropic.claude-v2 + - us.amazon.nova-micro-v1:0 + - us.amazon.nova-lite-v1:0 + - us.amazon.nova-pro-v1:0 + - us.anthropic.claude-3-5-haiku-20241022-v1:0 + - us.anthropic.claude-3-5-sonnet-20241022-v2:0 + - eu.anthropic.claude-3-5-sonnet-20240620-v1:0 + - apac.anthropic.claude-3-5-sonnet-20240620-v1:0 Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use. (Bedrock preview access only) FetchTranscriptArn: @@ -301,11 +304,18 @@ Resources: - 'dynamodb:GetItem' - Sid: InvokeBedrock Effect: Allow - Action: + Action: - bedrock:InvokeModel Resource: - - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - - !Sub "arn:${AWS::Partition}:bedrock:*:${AWS::AccountId}:custom-model/*" + - !Sub arn:${AWS::Partition}:bedrock:*::foundation-model/* + - !Sub arn:${AWS::Partition}:bedrock:*:${AWS::AccountId}:custom-model/* + - !Sub arn:aws:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/* + - Sid: BedrockGetInferenceProfile + Effect: Allow + Action: + - bedrock:GetInferenceProfile + Resource: + - !Sub arn:aws:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/* - !If - HasAnthropicSummary - Sid: SecretsManagerPolicy diff --git a/pca-ui/cfn/pca-ui.template b/pca-ui/cfn/pca-ui.template index 094bb9cb..cca29860 100644 --- a/pca-ui/cfn/pca-ui.template +++ b/pca-ui/cfn/pca-ui.template @@ -3,18 +3,22 @@ AWSTemplateFormatVersion: "2010-09-09" Description: Amazon Transcribe Post Call Analytics - PCA UI Parameters: + ParentStackName: + Type: String + Description: Name of the parent stack + AdminUsername: Type: String Default: "admin" Description: (Required) Username for admin user - + AdminEmail: - Type: String - Description: >- - (Required) Email address for the admin user. Will be used for logging in and for setting the admin password. - This email will receive the temporary password for the admin user. - AllowedPattern: ".+\\@.+\\..+" - ConstraintDescription: Must be valid email address eg. johndoe@example.com + Type: String + Description: >- + (Required) Email address for the admin user. Will be used for logging in and for setting the admin password. + This email will receive the temporary password for the admin user. + AllowedPattern: ".+\\@.+\\..+" + ConstraintDescription: Must be valid email address eg. johndoe@example.com AllowedSignUpEmailDomain: Type: String @@ -26,7 +30,7 @@ Parameters: If left empty, signup via the web UI is disabled and users will have to be created using Cognito. - AllowedPattern: '^(\*||([\w-]+\.)+[\w-]{2,6}(, *([\w-]+\.)+[\w-]{2,6})*)$' + AllowedPattern: '^(\*||([\w-]+\.)+[\w-]{2,6}(, *([\w-]+\.)+[\w-]{2,6})*)$' AudioBucket: Type: String @@ -64,17 +68,20 @@ Parameters: - 'LAMBDA' - 'ANTHROPIC' Description: This is what model to use for GenAIQuery. - + GenAIQueryBedrockModelId: Type: String - Default: anthropic.claude-3-haiku-20240307-v1:0 + Default: us.amazon.nova-lite-v1:0 AllowedValues: - anthropic.claude-3-haiku-20240307-v1:0 - anthropic.claude-3-sonnet-20240229-v1:0 - - amazon.titan-text-express-v1 - - anthropic.claude-v1 - - anthropic.claude-instant-v1 - - anthropic.claude-v2 + - us.amazon.nova-micro-v1:0 + - us.amazon.nova-lite-v1:0 + - us.amazon.nova-pro-v1:0 + - us.anthropic.claude-3-5-haiku-20241022-v1:0 + - us.anthropic.claude-3-5-sonnet-20241022-v2:0 + - eu.anthropic.claude-3-5-sonnet-20240620-v1:0 + - apac.anthropic.claude-3-5-sonnet-20240620-v1:0 Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use. LLMThirdPartyApiKey: @@ -97,7 +104,7 @@ Parameters: Default: '' Type: String Description: External Boto3 Layer Arn to use. If none is provided, one will be created automatically via boto3.template - + PyUtilsLayerArn: Default: '' Type: String @@ -143,7 +150,7 @@ Resources: AllowedSignUpEmailDomain: !Ref AllowedSignUpEmailDomain WebUri: !GetAtt Web.Outputs.Uri Environment: !Ref Environment - Name: + Name: !If - isMainStackNameEmpty - !Ref AWS::StackName @@ -193,7 +200,7 @@ Resources: DeployCountName: !Ref DeployCount DeployCountValue: !Ref DeployCount # DistributionId: !GetAtt Web.Outputs.DistributionId - EnableGenAIQuery: !If + EnableGenAIQuery: !If - ShouldEnableGenAIQuery - 'true' - 'false' @@ -201,7 +208,7 @@ Resources: WebUriParameter: Type: "AWS::SSM::Parameter" Properties: - Name: WebUiUri + Name: !Sub ${ParentStackName}-WebUiUri Type: String Value: !GetAtt Web.Outputs.Uri Description: PCA Web Application URI diff --git a/pca-ui/src/genai/index.py b/pca-ui/src/genai/index.py index a04893ba..35e9a063 100644 --- a/pca-ui/src/genai/index.py +++ b/pca-ui/src/genai/index.py @@ -12,6 +12,7 @@ import requests import urllib.parse from botocore.exceptions import ClientError +from botocore.config import Config AWS_REGION = os.environ["AWS_REGION_OVERRIDE"] if "AWS_REGION_OVERRIDE" in os.environ else os.environ["AWS_REGION"] QUERY_TYPE = os.getenv('QUERY_TYPE', 'DISABLED') @@ -44,77 +45,43 @@ def get_third_party_llm_secret(): def get_bedrock_client(): print("Connecting to Bedrock Service: ", BEDROCK_ENDPOINT_URL) - client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=BEDROCK_ENDPOINT_URL) + client = boto3.client( + service_name='bedrock-runtime', + region_name=AWS_REGION, + endpoint_url=BEDROCK_ENDPOINT_URL, + config=Config(retries={'max_attempts': 50, 'mode': 'adaptive'}) + ) return client - -def get_bedrock_request_body(modelId, parameters, prompt): - provider = modelId.split(".")[0] - request_body = None - if provider == "anthropic": - print(modelId) - if 'claude-3' in modelId: - request_body = { - "max_tokens": MAX_TOKENS, - "messages": [{"role": "user", "content": prompt}], - "anthropic_version": "bedrock-2023-05-31" - } - else: - request_body = { - "prompt": prompt, - "max_tokens_to_sample": MAX_TOKENS - } - request_body.update(parameters) - elif provider == "ai21": - request_body = { - "prompt": prompt, - "maxTokens": MAX_TOKENS - } - request_body.update(parameters) - elif provider == "amazon": - textGenerationConfig = { - "maxTokenCount": MAX_TOKENS - } - textGenerationConfig.update(parameters) - request_body = { - "inputText": prompt, - "textGenerationConfig": textGenerationConfig - } - else: - raise Exception("Unsupported provider: ", provider) - return request_body - -def get_bedrock_generate_text(modelId, response): - print("generating response with ", modelId) - provider = modelId.split(".")[0] - generated_text = None - if provider == "anthropic": - if 'claude-3' in modelId: - response_raw = json.loads(response.get("body").read().decode()) - generated_text = response_raw.get('content')[0].get('text') - else: - response_body = json.loads(response.get("body").read().decode()) - generated_text = response_body.get("completion") - elif provider == "ai21": - response_body = json.loads(response.get("body").read()) - generated_text = response_body.get("completions")[0].get("data").get("text") - elif provider == "amazon": - response_body = json.loads(response.get("body").read()) - generated_text = response_body.get("results")[0].get("outputText") - else: - raise Exception("Unsupported provider: ", provider) +def get_bedrock_generate_text(response): + generated_text = response["output"]["message"]["content"][0]["text"] generated_text = generated_text.replace('```','') return generated_text -def call_bedrock(parameters, prompt): +def call_bedrock(prompt, temperature, max_tokens): global bedrock_client modelId = BEDROCK_MODEL_ID - body = get_bedrock_request_body(modelId, parameters, prompt) - print("ModelId", modelId, "- Body: ", body) + + print(f"Bedrock request - ModelId: {modelId} Temperature: {temperature} Max Tokens: {max_tokens}") + if (bedrock_client is None): bedrock_client = get_bedrock_client() - response = bedrock_client.invoke_model(body=json.dumps(body), modelId=modelId, accept='application/json', contentType='application/json') - generated_text = get_bedrock_generate_text(modelId, response) + + message = { + "role": "user", + "content": [{"text": prompt}] + } + + response = bedrock_client.converse( + modelId=modelId, + messages=[message], + inferenceConfig={ + "maxTokens": max_tokens, + "temperature": temperature + } + ) + + generated_text = get_bedrock_generate_text(response) return generated_text def get_template_from_dynamodb(): @@ -164,10 +131,8 @@ def generate_bedrock_query(transcript, question): prompt = prompt.replace("{transcript}", transcript) prompt = prompt.replace("{question}", question) - parameters = { - "temperature": 0 - } - generated_text = call_bedrock(parameters, prompt) + + generated_text = call_bedrock(prompt, 0, MAX_TOKENS) return generated_text