newrelic · amychisholm03 · Jun 27, 2024 · Jun 26, 2024 · Jun 27, 2024 · Jun 27, 2024
diff --git a/lib/llm-events/aws-bedrock/bedrock-command.js b/lib/llm-events/aws-bedrock/bedrock-command.js
@@ -37,7 +37,7 @@ class BedrockCommand {
       result = this.#body.max_tokens_to_sample
     } else if (this.isClaude3() === true || this.isCohere() === true) {
       result = this.#body.max_tokens
-    } else if (this.isLlama2() === true) {
+    } else if (this.isLlama() === true) {
       result = this.#body.max_gen_length
     } else if (this.isTitan() === true) {
       result = this.#body.textGenerationConfig?.maxTokenCount
@@ -80,7 +80,7 @@ class BedrockCommand {
       this.isClaude() === true ||
       this.isAi21() === true ||
       this.isCohere() === true ||
-      this.isLlama2() === true
+      this.isLlama() === true
     ) {
       result = this.#body.prompt
     } else if (this.isClaude3() === true) {
@@ -104,7 +104,7 @@ class BedrockCommand {
       this.isClaude3() === true ||
       this.isAi21() === true ||
       this.isCohere() === true ||
-      this.isLlama2() === true
+      this.isLlama() === true
     ) {
       result = this.#body.temperature
     }
@@ -131,8 +131,8 @@ class BedrockCommand {
     return this.#modelId.startsWith('cohere.embed')
   }
 
-  isLlama2() {
-    return this.#modelId.startsWith('meta.llama2')
+  isLlama() {
+    return this.#modelId.startsWith('meta.llama')
   }
 
   isTitan() {

diff --git a/lib/llm-events/aws-bedrock/bedrock-response.js b/lib/llm-events/aws-bedrock/bedrock-response.js
@@ -70,7 +70,7 @@ class BedrockResponse {
     } else if (cmd.isCohere() === true) {
       this.#completions = body.generations?.map((g) => g.text) ?? []
       this.#id = body.id
-    } else if (cmd.isLlama2() === true) {
+    } else if (cmd.isLlama() === true) {
       body.generation && this.#completions.push(body.generation)
     } else if (cmd.isTitan() === true) {
       this.#completions = body.results?.map((r) => r.outputText) ?? []
@@ -107,7 +107,7 @@ class BedrockResponse {
       result = this.#parsedBody.stop_reason
     } else if (cmd.isCohere() === true) {
       result = this.#parsedBody.generations?.find((r) => r.finish_reason !== null)?.finish_reason
-    } else if (cmd.isLlama2() === true) {
+    } else if (cmd.isLlama() === true) {
       result = this.#parsedBody.stop_reason
     } else if (cmd.isTitan() === true) {
       result = this.#parsedBody.results?.find((r) => r.completionReason !== null)?.completionReason

diff --git a/lib/llm-events/aws-bedrock/stream-handler.js b/lib/llm-events/aws-bedrock/stream-handler.js
@@ -114,9 +114,9 @@ class StreamHandler {
     } else if (bedrockCommand.isCohereEmbed() === true) {
       this.stopReasonKey = 'nr_none'
       this.generator = handleCohereEmbed
-    } else if (bedrockCommand.isLlama2() === true) {
+    } else if (bedrockCommand.isLlama() === true) {
       this.stopReasonKey = 'stop_reason'
-      this.generator = handleLlama2
+      this.generator = handleLlama
     } else if (bedrockCommand.isTitan() === true) {
       this.stopReasonKey = 'completionReason'
       this.generator = handleTitan
@@ -271,7 +271,7 @@ async function* handleCohereEmbed() {
   }
 }
 
-async function* handleLlama2() {
+async function* handleLlama() {
   let currentBody = {}
   let generation = ''
 

diff --git a/test/lib/aws-server-stubs/ai-server/index.js b/test/lib/aws-server-stubs/ai-server/index.js
@@ -119,6 +119,12 @@ function handler(req, res) {
         break
       }
 
+      case 'meta.llama3-8b-instruct-v1:0':
+      case 'meta.llama3-70b-instruct-v1:0': {
+        response = responses.llama3.get(payload.prompt)
+        break
+      }
+
       default: {
         response = { statusCode: 418, body: {} }
       }

diff --git a/test/lib/aws-server-stubs/ai-server/responses/index.js b/test/lib/aws-server-stubs/ai-server/responses/index.js
@@ -11,12 +11,14 @@ const claude = require('./claude')
 const claude3 = require('./claude3')
 const cohere = require('./cohere')
 const llama2 = require('./llama2')
+const llama3 = require('./llama3')
 
 module.exports = {
   ai21,
   amazon,
   claude,
   claude3,
   cohere,
-  llama2
+  llama2,
+  llama3
 }
diff --git a/test/lib/aws-server-stubs/ai-server/responses/llama3.js b/test/lib/aws-server-stubs/ai-server/responses/llama3.js
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2024 New Relic Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+'use strict'
+
+const responses = new Map()
+const { contentType, reqId } = require('./constants')
+
+responses.set('text llama3 ultimate question', {
+  headers: {
+    'content-type': contentType,
+    'x-amzn-requestid': reqId,
+    'x-amzn-bedrock-invocation-latency': 9677,
+    'x-amzn-bedrock-output-token-count': 4,
+    'x-amzn-bedrock-input-token-count': 8
+  },
+  statusCode: 200,
+  body: {
+    generation: '42',
+    prompt_token_count: 14,
+    generation_token_count: 205,
+    stop_reason: 'endoftext'
+  }
+})
+
+responses.set('text llama3 ultimate question streamed', {
+  headers: {
+    'content-type': 'application/vnd.amazon.eventstream',
+    'x-amzn-requestid': reqId,
+    'x-amzn-bedrock-content-type': contentType
+  },
+  statusCode: 200,
+  chunks: [
+    {
+      headers: {
+        ':event-type': { type: 'string', value: 'chunk' },
+        ':content-type': { type: 'string', value: 'application/json' },
+        ':message-type': { type: 'string', value: 'event' }
+      },
+      body: {
+        generation: '42',
+        prompt_token_count: null,
+        generation_token_count: 211,
+        stop_reason: null
+      }
+    },
+    {
+      headers: {
+        ':event-type': { type: 'string', value: 'chunk' },
+        ':content-type': { type: 'string', value: 'application/json' },
+        ':message-type': { type: 'string', value: 'event' }
+      },
+      body: {
+        'generation': '',
+        'prompt_token_count': null,
+        'generation_token_count': 212,
+        'stop_reason': 'endoftext',
+        'amazon-bedrock-invocationMetrics': {
+          inputTokenCount: 8,
+          outputTokenCount: 4,
+          invocationLatency: 9825,
+          firstByteLatency: 283
+        }
+      }
+    }
+  ]
+})
+
+responses.set('text llama3 ultimate question error', {
+  headers: {
+    'content-type': contentType,
+    'x-amzn-requestid': reqId,
+    'x-amzn-errortype': 'ValidationException:http://internal.amazon.com/coral/com.amazon.bedrock/'
+  },
+  statusCode: 400,
+  body: {
+    message:
+      'Malformed input request: 2 schema violations found, please reformat your input and try again.'
+  }
+})
+
+module.exports = responses
diff --git a/test/unit/llm-events/aws-bedrock/bedrock-command.test.js b/test/unit/llm-events/aws-bedrock/bedrock-command.test.js
@@ -52,6 +52,13 @@ const llama2 = {
   }
 }
 
+const llama3 = {
+  modelId: 'meta.llama3-8b-instruct-v1:0',
+  body: {
+    prompt: 'who are you'
+  }
+}
+
 const titan = {
   modelId: 'amazon.titan-text-lite-v1',
   body: {
@@ -85,7 +92,7 @@ tap.test('non-conforming command is handled gracefully', async (t) => {
     'Claude3',
     'Cohere',
     'CohereEmbed',
-    'Llama2',
+    'Llama',
     'Titan',
     'TitanEmbed'
   ]) {
@@ -212,7 +219,7 @@ tap.test('cohere embed minimal command works', async (t) => {
 tap.test('llama2 minimal command works', async (t) => {
   t.context.updatePayload(structuredClone(llama2))
   const cmd = new BedrockCommand(t.context.input)
-  t.equal(cmd.isLlama2(), true)
+  t.equal(cmd.isLlama(), true)
   t.equal(cmd.maxTokens, undefined)
   t.equal(cmd.modelId, llama2.modelId)
   t.equal(cmd.modelType, 'completion')
@@ -226,7 +233,32 @@ tap.test('llama2 complete command works', async (t) => {
   payload.body.temperature = 0.5
   t.context.updatePayload(payload)
   const cmd = new BedrockCommand(t.context.input)
-  t.equal(cmd.isLlama2(), true)
+  t.equal(cmd.isLlama(), true)
+  t.equal(cmd.maxTokens, 25)
+  t.equal(cmd.modelId, payload.modelId)
+  t.equal(cmd.modelType, 'completion')
+  t.equal(cmd.prompt, payload.body.prompt)
+  t.equal(cmd.temperature, payload.body.temperature)
+})
+
+tap.test('llama3 minimal command works', async (t) => {
+  t.context.updatePayload(structuredClone(llama3))
+  const cmd = new BedrockCommand(t.context.input)
+  t.equal(cmd.isLlama(), true)
+  t.equal(cmd.maxTokens, undefined)
+  t.equal(cmd.modelId, llama3.modelId)
+  t.equal(cmd.modelType, 'completion')
+  t.equal(cmd.prompt, llama3.body.prompt)
+  t.equal(cmd.temperature, undefined)
+})
+
+tap.test('llama3 complete command works', async (t) => {
+  const payload = structuredClone(llama3)
+  payload.body.max_gen_length = 25
+  payload.body.temperature = 0.5
+  t.context.updatePayload(payload)
+  const cmd = new BedrockCommand(t.context.input)
+  t.equal(cmd.isLlama(), true)
   t.equal(cmd.maxTokens, 25)
   t.equal(cmd.modelId, payload.modelId)
   t.equal(cmd.modelType, 'completion')

diff --git a/test/unit/llm-events/aws-bedrock/bedrock-response.test.js b/test/unit/llm-events/aws-bedrock/bedrock-response.test.js
@@ -38,8 +38,8 @@ const cohere = {
   ]
 }
 
-const llama2 = {
-  generation: 'llama2-response',
+const llama = {
+  generation: 'llama-response',
   stop_reason: 'done'
 }
 
@@ -79,7 +79,7 @@ tap.beforeEach((t) => {
     isCohere() {
       return false
     },
-    isLlama2() {
+    isLlama() {
       return false
     },
     isTitan() {
@@ -172,8 +172,8 @@ tap.test('cohere complete responses work', async (t) => {
   t.equal(res.statusCode, 200)
 })
 
-tap.test('llama2 malformed responses work', async (t) => {
-  t.context.bedrockCommand.isLlama2 = () => true
+tap.test('llama malformed responses work', async (t) => {
+  t.context.bedrockCommand.isLlama = () => true
   const res = new BedrockResponse(t.context)
   t.same(res.completions, [])
   t.equal(res.finishReason, undefined)
@@ -183,11 +183,11 @@ tap.test('llama2 malformed responses work', async (t) => {
   t.equal(res.statusCode, 200)
 })
 
-tap.test('llama2 complete responses work', async (t) => {
-  t.context.bedrockCommand.isLlama2 = () => true
-  t.context.updatePayload(structuredClone(llama2))
+tap.test('llama complete responses work', async (t) => {
+  t.context.bedrockCommand.isLlama = () => true
+  t.context.updatePayload(structuredClone(llama))
   const res = new BedrockResponse(t.context)
-  t.same(res.completions, ['llama2-response'])
+  t.same(res.completions, ['llama-response'])
   t.equal(res.finishReason, 'done')
   t.same(res.headers, t.context.response.response.headers)
   t.equal(res.id, undefined)

diff --git a/test/unit/llm-events/aws-bedrock/stream-handler.test.js b/test/unit/llm-events/aws-bedrock/stream-handler.test.js
@@ -45,7 +45,7 @@ tap.beforeEach((t) => {
       isClaude3() {
         return false
       },
-      isLlama2() {
+      isLlama() {
         return false
       },
       isTitan() {
@@ -242,15 +242,15 @@ tap.test('handles cohere embedding streams', async (t) => {
   t.equal(br.statusCode, 200)
 })
 
-tap.test('handles llama2 streams', async (t) => {
-  t.context.passThroughParams.bedrockCommand.isLlama2 = () => true
+tap.test('handles llama streams', async (t) => {
+  t.context.passThroughParams.bedrockCommand.isLlama = () => true
   t.context.chunks = [
     { generation: '1', stop_reason: null },
     { generation: '2', stop_reason: 'done', ...t.context.metrics }
   ]
   const handler = new StreamHandler(t.context)
 
-  t.equal(handler.generator.name, 'handleLlama2')
+  t.equal(handler.generator.name, 'handleLlama')
   for await (const event of handler.generator()) {
     t.type(event.chunk.bytes, Uint8Array)
   }
@@ -267,7 +267,7 @@ tap.test('handles llama2 streams', async (t) => {
   })
 
   const bc = new BedrockCommand({
-    modelId: 'meta.llama2',
+    modelId: 'meta.llama',
     body: JSON.stringify({
       prompt: 'prompt',
       max_gen_length: 5

diff --git a/test/versioned/aws-sdk-v3/bedrock-chat-completions.tap.js b/test/versioned/aws-sdk-v3/bedrock-chat-completions.tap.js
@@ -51,6 +51,10 @@ const requests = {
   llama2: (prompt, modelId) => ({
     body: JSON.stringify({ prompt, max_gen_length: 100, temperature: 0.5 }),
     modelId
+  }),
+  llama3: (prompt, modelId) => ({
+    body: JSON.stringify({ prompt, max_gen_length: 100, temperature: 0.5 }),
+    modelId
   })
 }
 
@@ -98,7 +102,8 @@ tap.afterEach(async (t) => {
   { modelId: 'anthropic.claude-v2', resKey: 'claude' },
   { modelId: 'anthropic.claude-3-haiku-20240307-v1:0', resKey: 'claude3' },
   { modelId: 'cohere.command-text-v14', resKey: 'cohere' },
-  { modelId: 'meta.llama2-13b-chat-v1', resKey: 'llama2' }
+  { modelId: 'meta.llama2-13b-chat-v1', resKey: 'llama2' },
+  { modelId: 'meta.llama3-8b-instruct-v1:0', resKey: 'llama3' }
 ].forEach(({ modelId, resKey }) => {
   tap.test(`${modelId}: should properly create completion segment`, (t) => {
     const { bedrock, client, responses, agent, expectedExternalPath } = t.context