diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/Dockerfile b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/Dockerfile new file mode 100644 index 0000000..7f024da --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/Dockerfile @@ -0,0 +1,22 @@ +# --- Build stage --- +FROM eclipse-temurin:21-jdk AS builder + +WORKDIR /app +COPY . . + +# Use parallel threads and configure Gradle for speed +RUN ./gradlew :artificial-intelligence:context-enabled-semantic-caching-with-spring-ai:bootJar \ + --no-daemon \ + --parallel \ + --build-cache \ + --configuration-cache \ + --max-workers=$(nproc) + +# --- Runtime stage --- +FROM eclipse-temurin:21-jre + +WORKDIR /app +COPY --from=builder /app/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/build/libs/*.jar app.jar + +EXPOSE 8080 +ENTRYPOINT ["java", "-jar", "app.jar"] \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/README.md b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/README.md new file mode 100644 index 0000000..1fa460d --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/README.md @@ -0,0 +1,276 @@ +### Context-Enabled Semantic Caching with Spring AI Demo + +Semantic Caching is a technique that enhances Large Language Model (LLM) applications by caching responses based on the semantic meaning of queries rather than exact matches. + +Even though Semantic Caching can help us save costs and time, it may come with downsides depending on the business on which its applied. + +Sometimes, prompts may be similar, but refer to different contexts. For example: `What kind of beer goes well with meat?` and `What kind of beer goes well with Pizza?` + +These two prompts are semantically similar, but refer to two different context: `Pizza` and `Meat` - This is where Context Enabled Semantic Caching may help. + +Instead of relying solely on the semantic caching, we can serve the cached response to a less capable, cheaper, and faster model with the new provided information so that it can generate a response that satisfies the prompt with information, tone, or other characteristics that came from the more capable model. + +This demo showcases how to implement Context-Enabled Semantic Caching using Spring AI and Redis Vector Store to improve performance and reduce costs in a beer recommendation system. + +## Learning resources: + +- Video: [What is semantic caching?](https://www.youtube.com/watch?v=AtVTT_s8AGc) +- Video: [What is an embedding model?](https://youtu.be/0U1S0WSsPuE) +- Video: [Exact vs Approximate Nearest Neighbors - What's the difference?](https://youtu.be/9NvO-VdjY80) +- Video: [What is a vector database?](https://youtu.be/Yhv19le0sBw) + +## Requirements + +To run this demo, you’ll need the following installed on your system: +- Docker – [Install Docker](https://docs.docker.com/get-docker/) +- Docker Compose – Included with Docker Desktop or available via CLI installation guide +- An OpenAI API Key – You can get one from [platform.openai.com](https://platform.openai.com) + +## Running the demo + +The easiest way to run the demo is with Docker Compose, which sets up all required services in one command. + +### Step 1: Clone the repository + +If you haven’t already: + +```bash +git clone https://github.com/redis-developer/redis-springboot-recipes.git +cd redis-springboot-recipes/artificial-intelligence/semantic-caching-with-spring-ai +``` + +### Step 2: Configure your environment + +You can pass your OpenAI API key in two ways: + +#### Option 1: Export the key via terminal + +```bash +export OPENAI_API_KEY=sk-your-api-key +``` + +#### Option 2: Use a .env file + +Create a `.env` file in the same directory as the `docker-compose.yml` file: + +```env +OPENAI_API_KEY=sk-your-api-key +``` + +### Step 3: Start the services + +```bash +docker compose up --build +``` + +This will start: + +- redis: for storing both vector embeddings and chat history +- redis-insight: a UI to explore the Redis data +- semantic-caching-app: the Spring Boot app that implements the RAG application + +## Using the demo + +When all of your services are up and running. Go to `localhost:8080` to access the demo. + +![Screenshot of a web app titled “Semantic Caching with Spring AI.” It features a Beer Knowledge Assistant chat interface with a welcome message, input box, and “Start New Chat” and “Clear Chat” buttons. The footer displays “Powered by Redis.”](readme-assets/1_home.png) + +If you click on `Start Chat`, it may be that the embeddings are still being created, and you get a message asking for this operation to complete. This is the operation where the documents we'll search through will be turned into vectors and then stored in the database. It is done only the first time the app starts up and is required regardless of the vector database you use. + +![Popup message stating that embeddings are still being created (14,472 of 20,000 completed), with an estimated duration of three minutes and a “Close” button.](readme-assets/2_embeddings_being_created.png) + +Once all the embeddings have been created, you can start asking your chatbot questions. It will semantically search through the documents we have stored, try to find the best answer for your questions, and cache the responses semantically in Redis: + +![Animated screen recording of a user typing “What kind of beer goes well with smoked meat?” into the Beer Knowledge Assistant in the Semantic Caching with Spring AI demo. The interface shows the question being sent, demonstrating semantic search in action.](readme-assets/3_asking_a_question.gif) + +If you ask something similar to a question had already been asked, your chatbot will retrieve it from the cache instead of sending the query to the LLM. Retrieving an answer much faster now. + +![Animated screen recording showing a user asking a similar follow-up question, “What type of beer is a good combination with smoked beef?” The assistant instantly retrieves a cached answer from Redis, demonstrating faster response through semantic caching.](readme-assets/4_retrieving_from_cache.gif) + +## How It Is Implemented + +The application uses Spring AI's `RedisVectorStore` to store and retrieve responses from a semantic cache. + +### Configuring the Chat Models + +```kotlin +@Bean +fun openAiExpensiveChatModel(): OpenAiChatModel { + val modelName = "gpt-5-2025-08-07" + return openAiChatModel(modelName) +} + +@Bean +fun openAiCheapChatModel(): OpenAiChatModel { + val modelName = "gpt-5-nano-2025-08-07" + return openAiChatModel(modelName) +} + +private fun openAiChatModel(modelName: String): OpenAiChatModel { + val openAiApi = OpenAiApi.builder() + .apiKey(System.getenv("OPENAI_API_KEY")) + .build() + val openAiChatOptions = OpenAiChatOptions.builder() + .model(modelName) + .temperature(0.4) + .build() + + return OpenAiChatModel.builder() + .openAiApi(openAiApi) + .defaultOptions(openAiChatOptions) + .build() +} +``` + +### Configuring the Semantic Cache + +```kotlin +@Bean +fun semanticCachingVectorStore( + embeddingModel: TransformersEmbeddingModel, + jedisPooled: JedisPooled +): RedisVectorStore { + return RedisVectorStore.builder(jedisPooled, embeddingModel) + .indexName("semanticCachingIdx") + .contentFieldName("content") + .embeddingFieldName("embedding") + .metadataFields( + RedisVectorStore.MetadataField("answer", Schema.FieldType.TEXT), + ) + .prefix("semantic-caching:") + .initializeSchema(true) + .vectorAlgorithm(RedisVectorStore.Algorithm.HSNW) + .build() +} +``` + +Let's break this down: + +- **Index Name**: `semanticCachingIdx` - Redis will create an index with this name for searching cached responses +- **Content Field**: `content` - The raw prompt that will be embedded +- **Embedding Field**: `embedding` - The field that will store the resulting vector embedding +- **Metadata Fields**: `answer` - A TEXT field to store the LLM's response +- **Prefix**: `semantic-caching:` - All keys in Redis will be prefixed with this to organize the data +- **Vector Algorithm**: `HSNW` - Hierarchical Navigable Small World algorithm for efficient approximate nearest neighbor search + +### Storing Responses in the Semantic Cache + +When a user asks a question and the system generates a response, it stores the prompt and response in the semantic cache: + +```kotlin +fun storeInCache(prompt: String, answer: String) { + semanticCachingVectorStore.add(listOf(Document( + prompt, + mapOf( + "answer" to answer + ) + ))) +} +``` + +This method: +1. Creates a `Document` with the prompt as the content +2. Adds the answer as metadata +3. Stores the document in the vector store, which automatically generates and stores the embedding + +### Retrieving Responses from the Semantic Cache + +When a user asks a question, the system first checks if there's a semantically similar question in the cache: + +```kotlin +fun getFromCache(prompt: String, similarityThreshold: Double): String? { + val results = semanticCachingVectorStore.similaritySearch( + SearchRequest.builder() + .query(prompt) + .topK(1) + .build() + ) + + if (results?.isNotEmpty() == true) { + if (similarityThreshold < (results[0].score ?: 0.0)) { + logger.info("Returning cached answer. Similarity score: ${results[0].score}") + return results[0].metadata["answer"] as String + } + } + + return null +} +``` + +This method: +1. Performs a vector similarity search for the most similar prompt in the cache +2. Checks if the similarity score is above the threshold (typically 0.8) +3. If a match is found, the system uses the cheaper model to compute the new response based on the new knowledge and the previously generated response. + +### Integrating with the RAG System + +The RAG service integrates the semantic cache with the RAG system: + +```kotlin +// Regular prompt and prompt suffix in case of cache hit + + private val systemBeerPrompt = """ + You're assisting with questions about products in a beer catalog. + Use the information from the DOCUMENTS section to provide accurate answers. + The answer involves referring to the ABV or IBU of the beer, include the beer name in the response. + If unsure, simply state that you don't know. + + DOCUMENTS: + {documents} + """.trimIndent() + +private val semanticCachedAnswerPromptSuffix = """ + A similar prompt has been processed before. Use it as the base for your response with the new document selection and new prompt: + + SIMILAR PROMPT ALREADY PROCESSED: + SIMILAR PROMPT: + {similarPrompt} + + SIMILAR ANSWER: + {similarAnswer} + """.trimIndent() + + + fun retrieve(message: String): RagResult { + // Get documents + val docs = getDocuments(message) + + // Get potential cached answer + val (cachedQuestion, cachedAnswer) = semanticCachingService.getFromCache(message, 0.8) + + // Generate System Prompt + val systemMessage = if (cachedQuestion != null && cachedAnswer != null) { + getSystemMessage(docs, cachedQuestion, cachedAnswer) + } else { + getSystemMessage(docs) + } + + val userMessage = UserMessage(message) + + val prompt = Prompt(listOf(systemMessage, userMessage)) + + // Call the expensive or cheap model accordingly + val response: ChatResponse = if (cachedQuestion != null && cachedAnswer != null) { + openAiCheapChatModel.call(prompt) + } else { + openAiExpensiveChatModel.call(prompt) + } + + // Store in semantic caching + semanticCachingService.storeInCache(message, response.result.output.text.toString()) + + return RagResult( + generation = response.result + ) + } +``` + +This orchestrates the entire process: +1. Check if there's a semantically similar prompt in the cache +2. If found, return the cached answer immediately +3. If not found, perform the standard RAG process: + - Retrieve relevant documents using vector similarity search + - Generate a response using the LLM + - Store the prompt and response in the semantic cache for future use + +This approach significantly improves performance and reduces costs by avoiding unnecessary LLM calls for semantically similar queries, while still providing accurate and contextually relevant responses. diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/build.gradle.kts b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/build.gradle.kts new file mode 100644 index 0000000..b99fcaf --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/build.gradle.kts @@ -0,0 +1,53 @@ +plugins { + kotlin("jvm") version "1.9.25" + kotlin("plugin.spring") version "1.9.25" + id("org.springframework.boot") version "3.5.5" + id("io.spring.dependency-management") version "1.1.7" +} + +group = "com.redis" +version = "0.0.1-SNAPSHOT" +description = "context-enabled-semantic-caching" + +java { + toolchain { + languageVersion = JavaLanguageVersion.of(21) + } +} + +repositories { + mavenCentral() +} + +extra["springAiVersion"] = "1.0.1" + +dependencies { + implementation("org.springframework.boot:spring-boot-starter") + implementation("org.springframework.boot:spring-boot-starter-web") + implementation("org.springframework.ai:spring-ai-transformers:1.0.0") + implementation("org.springframework.ai:spring-ai-starter-vector-store-redis:1.0.0") + implementation("org.springframework.ai:spring-ai-starter-model-openai:1.0.0") + + implementation("com.redis.om:redis-om-spring:1.0.0") + + implementation("org.jetbrains.kotlin:kotlin-reflect") + testImplementation("org.springframework.boot:spring-boot-starter-test") + testImplementation("org.jetbrains.kotlin:kotlin-test-junit5") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") +} + +dependencyManagement { + imports { + mavenBom("org.springframework.ai:spring-ai-bom:${property("springAiVersion")}") + } +} + +kotlin { + compilerOptions { + freeCompilerArgs.addAll("-Xjsr305=strict") + } +} + +tasks.withType { + useJUnitPlatform() +} diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/docker-compose.yml b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/docker-compose.yml new file mode 100644 index 0000000..a07c8cc --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/docker-compose.yml @@ -0,0 +1,48 @@ +version: '3.8' + +services: + redis: + image: redis:latest + ports: + - "6379:6379" + volumes: + - redis-data:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 5s + retries: 5 + + redis-insight: + image: redis/redisinsight:latest + ports: + - "5540:5540" + environment: + - RI_REDIS_HOST=redis + - RI_REDIS_PORT=6379 + - RI_REDIS_ALIAS=Local Redis + - RI_REDIS_USERNAME=default + - RI_REDIS_PASSWORD= + - RI_REDIS_TLS=FALSE + volumes: + - redis-insight-data:/db + depends_on: + - redis + + context-enabled-semantic-caching-app: + build: + context: ../.. + dockerfile: artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/Dockerfile + ports: + - "8080:8080" + environment: + - SPRING_DATA_REDIS_HOST=redis + - SPRING_DATA_REDIS_PORT=6379 + - OPENAI_API_KEY=${OPENAI_API_KEY:-demo} + depends_on: + redis: + condition: service_healthy + +volumes: + redis-data: + redis-insight-data: diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/ContextEnabledSemanticCachingApplication.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/ContextEnabledSemanticCachingApplication.kt new file mode 100644 index 0000000..6cbe361 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/ContextEnabledSemanticCachingApplication.kt @@ -0,0 +1,11 @@ +package com.redis.contextenabledsemanticcaching + +import org.springframework.boot.autoconfigure.SpringBootApplication +import org.springframework.boot.runApplication + +@SpringBootApplication +class ContextEnabledSemanticCachingApplication + +fun main(args: Array) { + runApplication(*args) +} diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagConfiguration.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagConfiguration.kt new file mode 100644 index 0000000..31c0f0b --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagConfiguration.kt @@ -0,0 +1,26 @@ +package com.redis.contextenabledsemanticcaching + +import org.springframework.ai.transformers.TransformersEmbeddingModel +import org.springframework.ai.vectorstore.redis.RedisVectorStore +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import redis.clients.jedis.JedisPooled + +@Configuration +class RagConfiguration { + + @Bean + fun beerVectorStore( + embeddingModel: TransformersEmbeddingModel, + jedisPooled: JedisPooled + ): RedisVectorStore { + return RedisVectorStore.builder(jedisPooled, embeddingModel) + .indexName("beerIdx") + .contentFieldName("content") + .embeddingFieldName("embedding") + .prefix("beer:") + .initializeSchema(true) + .vectorAlgorithm(RedisVectorStore.Algorithm.HSNW) + .build() + } +} \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagController.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagController.kt new file mode 100644 index 0000000..1710de2 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagController.kt @@ -0,0 +1,66 @@ +package com.redis.contextenabledsemanticcaching + +import com.redis.contextenabledsemanticcaching.embeddingmodel.EmbeddingStatusService +import org.springframework.http.HttpStatus +import org.springframework.http.ResponseEntity +import org.springframework.stereotype.Controller +import org.springframework.web.bind.annotation.* +import java.util.* + +@Controller +class RagController( + private val ragService: RagService, + private val embeddingStatusService: EmbeddingStatusService +) { + + @PostMapping("/chat/startChat") + @ResponseBody + fun startChat(): ResponseEntity { + val embeddedDocs = embeddingStatusService.getTotalDocNum() + if (embeddedDocs < 20000) { + return ResponseEntity + .status(HttpStatus.SERVICE_UNAVAILABLE) + .body(ErrorResponse("Embeddings are still being created ($embeddedDocs of 20000 already created). This operation takes around three minutes to complete. Please try again later.")) + } + return ResponseEntity.ok(Message(UUID.randomUUID().toString())) + } + + @PostMapping("/chat/{chatId}") + @ResponseBody + fun chatMessage(@PathVariable chatId: String, @RequestBody prompt: Prompt): ChatResponse { + val result = ragService.retrieve(prompt.prompt) + return ChatResponse( + message = result.generation.output.text.toString(), + metrics = MetricsResponse( + embeddingTimeMs = result.metrics.embeddingTimeMs, + searchTimeMs = result.metrics.searchTimeMs, + llmTimeMs = result.metrics.llmTimeMs, + cachingTimeMs = result.metrics.cachingTimeMs, + wasCached = result.metrics.wasCached, + model = result.metrics.modelUsed + ) + ) + } +} + +data class Message(val message: String = "") + +data class Prompt(val prompt: String = "") + +data class MetricsResponse( + val embeddingTimeMs: Long, + val searchTimeMs: Long, + val llmTimeMs: Long, + val cachingTimeMs: Long = 0, + val wasCached: Boolean = false, + val model: String? = null +) + +data class ChatResponse( + val message: String, + val metrics: MetricsResponse +) + +data class ErrorResponse( + val error: String +) \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagDataLoader.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagDataLoader.kt new file mode 100644 index 0000000..5fc29d4 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagDataLoader.kt @@ -0,0 +1,52 @@ +package com.redis.contextenabledsemanticcaching + +import org.slf4j.LoggerFactory +import org.springframework.ai.reader.JsonReader +import org.springframework.ai.vectorstore.redis.RedisVectorStore +import org.springframework.beans.factory.annotation.Value +import org.springframework.boot.ApplicationArguments +import org.springframework.boot.ApplicationRunner +import org.springframework.core.io.InputStreamResource +import org.springframework.core.io.Resource +import org.springframework.stereotype.Component +import java.util.zip.GZIPInputStream + +@Component +class RagDataLoader( + private val beerVectorStore: RedisVectorStore +) : ApplicationRunner { + + @Value("classpath:/data/beers.json.gz") + private lateinit var data: Resource + + override fun run(args: ApplicationArguments) { + val indexInfo = beerVectorStore.jedis.ftInfo("beerIdx") + if (indexInfo["num_terms"] as Long > 20000) { + logger.info("Embeddings already loaded. Skipping") + return + } + + var file: Resource = data + if (data.filename?.endsWith(".gz") == true) { + val inputStream = GZIPInputStream(data.inputStream) + file = InputStreamResource(inputStream, "beers.json.gz") + } + + logger.info("Creating Embeddings (May take around 3 minutes...)") + val loader = JsonReader(file, *KEYS) + val documents = loader.get() + val batchSize = 500 + + documents.chunked(batchSize).forEachIndexed { index, batch -> + beerVectorStore.add(batch) + logger.info("Inserted batch ${index + 1} with ${batch.size} documents") + } + + logger.info("${documents.size} embeddings created.") + } + + companion object { + private val logger = LoggerFactory.getLogger(RagDataLoader::class.java) + private val KEYS = arrayOf("name", "abv", "ibu", "description") + } +} \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagService.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagService.kt new file mode 100644 index 0000000..243dd69 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RagService.kt @@ -0,0 +1,159 @@ +package com.redis.contextenabledsemanticcaching + +import com.redis.contextenabledsemanticcaching.semanticaching.SemanticCachingService +import org.slf4j.LoggerFactory +import org.springframework.ai.chat.messages.Message +import org.springframework.ai.chat.messages.UserMessage +import org.springframework.ai.chat.model.ChatModel +import org.springframework.ai.chat.model.ChatResponse +import org.springframework.ai.chat.model.Generation +import org.springframework.ai.chat.prompt.Prompt +import org.springframework.ai.chat.prompt.SystemPromptTemplate +import org.springframework.ai.document.Document +import org.springframework.ai.vectorstore.SearchRequest +import org.springframework.ai.vectorstore.VectorStore +import org.springframework.beans.factory.annotation.Value +import org.springframework.stereotype.Service + +@Service +class RagService( + private val openAiExpensiveChatModel: ChatModel, + private val openAiCheapChatModel: ChatModel, + private val beerVectorStore: VectorStore, + private val semanticCachingService: SemanticCachingService +) { + + private val logger = LoggerFactory.getLogger(RagService::class.java) + + private val systemBeerPrompt = """ + You're assisting with questions about products in a beer catalog. + Use the information from the DOCUMENTS section to provide accurate answers. + The answer involves referring to the ABV or IBU of the beer, include the beer name in the response. + If unsure, simply state that you don't know. + + DOCUMENTS: + {documents} + """.trimIndent() + + private val semanticCachedAnswerPromptSuffix = """ + A similar prompt has been processed before. Use it as the base for your response with the new document selection and new prompt: + + SIMILAR PROMPT ALREADY PROCESSED: + SIMILAR PROMPT: + {similarPrompt} + + SIMILAR ANSWER: + {similarAnswer} + """.trimIndent() + + @Value("\${topk:10}") + private var topK: Int = 10 + + data class RagMetrics( + val embeddingTimeMs: Long, + val searchTimeMs: Long, + val llmTimeMs: Long, + val cachingTimeMs: Long = 0, + val wasCached: Boolean = false, + val modelUsed: String = "" + ) + + data class RagResult( + val generation: Generation, + val metrics: RagMetrics + ) + + fun retrieve(message: String): RagResult { + // Measure time for augmentation (embedding + search) + var embeddingTimeMs: Long = 0 + var searchTimeMs: Long = 0 + + // Get documents with timing metrics + val (embTime, searchTime, docs) = getDocuments(message) + embeddingTimeMs = embTime + searchTimeMs = searchTime + + // Get potential cached answer + val startCachingTime = System.currentTimeMillis() + val (cachedQuestion, cachedAnswer) = semanticCachingService.getFromCache(message, 0.8) + val cachingTimeMs = System.currentTimeMillis() - startCachingTime + + // Generate System Prompt + val systemMessage = if (cachedQuestion != null && cachedAnswer != null) { + getSystemMessage(docs, cachedQuestion, cachedAnswer) + } else { + getSystemMessage(docs) + } + + val userMessage = UserMessage(message) + + val prompt = Prompt(listOf(systemMessage, userMessage)) + + // Call the expensive or cheap model accordingly + val startLlmTime = System.currentTimeMillis() + val response: ChatResponse = if (cachedQuestion != null && cachedAnswer != null) { + openAiCheapChatModel.call(prompt) + } else { + openAiExpensiveChatModel.call(prompt) + } + val llmTimeMs = System.currentTimeMillis() - startLlmTime + + semanticCachingService.storeInCache(message, response.result.output.text.toString()) + + return RagResult( + generation = response.result, + metrics = RagMetrics( + embeddingTimeMs = embeddingTimeMs, + searchTimeMs = searchTimeMs, + llmTimeMs = llmTimeMs, + cachingTimeMs = cachingTimeMs, + wasCached = cachedQuestion != null, + modelUsed = if (cachedQuestion != null) openAiCheapChatModel.defaultOptions.model.toString() else openAiExpensiveChatModel.defaultOptions.model.toString() + ) + ) + } + + private fun getDocuments(message: String): Triple> { + val request = SearchRequest + .builder() + .query(message) + .topK(topK) + .build() + + // Measure total search time (includes embedding) + val startSearchTime = System.currentTimeMillis() + val documents = beerVectorStore.similaritySearch(request) ?: emptyList() + val totalSearchTime = System.currentTimeMillis() - startSearchTime + + // Estimate embedding time as a portion of search time + // In a real implementation, you might want to measure this directly if possible + val embeddingTimeMs = (totalSearchTime * 0.7).toLong() // Assuming embedding is ~70% of search time + val searchTimeMs = totalSearchTime - embeddingTimeMs + + return Triple(embeddingTimeMs, searchTimeMs, documents) + } + + private fun getSystemMessage(docsForAugmentation: List): Message { + val documents = docsForAugmentation.joinToString("\n") { it.text.toString() } + + logger.info("Retrieved documents: {}", documents) + + val systemPromptTemplate = SystemPromptTemplate(systemBeerPrompt) + return systemPromptTemplate.createMessage(mapOf("documents" to documents)) + } + + private fun getSystemMessage( + docsForAugmentation: List, + cachedPrompt: String, + cachedAnswer: String + ): Message { + val documents = docsForAugmentation.joinToString("\n") { it.text.toString() } + + logger.info("Retrieved documents: {}", documents) + + val systemPromptTemplate = SystemPromptTemplate(systemBeerPrompt + "\n" + semanticCachedAnswerPromptSuffix) + return systemPromptTemplate.createMessage( + mapOf("documents" to documents, "similarPrompt" to cachedPrompt, "similarAnswer" to cachedAnswer) + ) + } +} diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RedisConfig.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RedisConfig.kt new file mode 100644 index 0000000..edef4c9 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/RedisConfig.kt @@ -0,0 +1,65 @@ +package com.redis.contextenabledsemanticcaching + +import com.redis.om.spring.RedisOMProperties +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import org.springframework.data.redis.connection.RedisPassword +import org.springframework.data.redis.connection.jedis.JedisClientConfiguration +import org.springframework.data.redis.connection.jedis.JedisConnectionFactory +import redis.clients.jedis.* + +@Configuration +class RedisConfig { + + @Bean + fun jedisPooled( + jedisConnectionFactory: JedisConnectionFactory + ): JedisPooled { + val cc = jedisConnectionFactory.clientConfiguration + val hostAndPort = HostAndPort(jedisConnectionFactory.hostName, jedisConnectionFactory.port) + val standaloneConfig = jedisConnectionFactory.standaloneConfiguration + val username = standaloneConfig?.username + val password = standaloneConfig?.password + val jedisClientConfig = createClientConfig(jedisConnectionFactory.database, username, password!!, cc) + + return JedisPooled( + jedisConnectionFactory.getPoolConfig(), + hostAndPort, + jedisClientConfig + ) + } + + private fun createClientConfig( + database: Int, + username: String?, + password: RedisPassword, + clientConfiguration: JedisClientConfiguration + ): JedisClientConfig { + val jedisConfigBuilder = DefaultJedisClientConfig.builder() + + clientConfiguration.clientName.ifPresent { jedisConfigBuilder.clientName(it) } + jedisConfigBuilder.connectionTimeoutMillis(clientConfiguration.connectTimeout.toMillis().toInt()) + jedisConfigBuilder.socketTimeoutMillis(clientConfiguration.readTimeout.toMillis().toInt()) + jedisConfigBuilder.database(database) + + jedisConfigBuilder.clientSetInfoConfig( + ClientSetInfoConfig.withLibNameSuffix("redis-om-spring_v${RedisOMProperties.ROMS_VERSION}") + ) + + if (!username.isNullOrEmpty()) { + jedisConfigBuilder.user(username) + } + + password.toOptional().map { it.toString() }.ifPresent { jedisConfigBuilder.password(it) } + + if (clientConfiguration.isUseSsl) { + jedisConfigBuilder.ssl(true) + + clientConfiguration.sslSocketFactory.ifPresent { jedisConfigBuilder.sslSocketFactory(it) } + clientConfiguration.hostnameVerifier.ifPresent { jedisConfigBuilder.hostnameVerifier(it) } + clientConfiguration.sslParameters.ifPresent { jedisConfigBuilder.sslParameters(it) } + } + + return jedisConfigBuilder.build() + } +} \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/chatmodel/ChatModelConfiguration.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/chatmodel/ChatModelConfiguration.kt new file mode 100644 index 0000000..1b8be65 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/chatmodel/ChatModelConfiguration.kt @@ -0,0 +1,49 @@ +package com.redis.contextenabledsemanticcaching.chatmodel + +import org.springframework.ai.openai.OpenAiChatModel +import org.springframework.ai.openai.OpenAiChatOptions +import org.springframework.ai.openai.api.OpenAiApi +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import org.springframework.http.client.SimpleClientHttpRequestFactory +import org.springframework.web.client.RestClient +import java.time.Duration +import kotlin.time.Duration.Companion.seconds + + +@Configuration +class ChatModelConfiguration { + @Bean + fun openAiExpensiveChatModel(): OpenAiChatModel { + val modelName = "gpt-5-2025-08-07" + return openAiChatModel(modelName) + } + + @Bean + fun openAiCheapChatModel(): OpenAiChatModel { + val modelName = "gpt-4.1-nano" + return openAiChatModel(modelName) + } + + private fun openAiChatModel(modelName: String): OpenAiChatModel { + val factory = SimpleClientHttpRequestFactory() + factory.setReadTimeout(Duration.ofSeconds(120000)) + + val openAiApi = OpenAiApi.builder() + .apiKey(System.getenv("OPENAI_API_KEY")) + .restClientBuilder( + RestClient + .builder().requestFactory(factory) + ) + .build() + val openAiChatOptions = OpenAiChatOptions.builder() + .model(modelName) + .temperature(1.0) + .build() + + return OpenAiChatModel.builder() + .openAiApi(openAiApi) + .defaultOptions(openAiChatOptions) + .build() + } +} \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/embeddingmodel/EmbeddingModelConfiguration.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/embeddingmodel/EmbeddingModelConfiguration.kt new file mode 100644 index 0000000..306cfab --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/embeddingmodel/EmbeddingModelConfiguration.kt @@ -0,0 +1,13 @@ +package com.redis.contextenabledsemanticcaching.embeddingmodel + +import org.springframework.ai.transformers.TransformersEmbeddingModel +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration + +@Configuration +class EmbeddingModelConfiguration { + @Bean + fun transformersEmbeddingClient(): TransformersEmbeddingModel { + return TransformersEmbeddingModel() + } +} \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/embeddingmodel/EmbeddingStatusService.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/embeddingmodel/EmbeddingStatusService.kt new file mode 100644 index 0000000..b055b20 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/embeddingmodel/EmbeddingStatusService.kt @@ -0,0 +1,24 @@ +package com.redis.contextenabledsemanticcaching.embeddingmodel + +import org.slf4j.LoggerFactory +import org.springframework.ai.vectorstore.redis.RedisVectorStore +import org.springframework.stereotype.Service + +@Service +class EmbeddingStatusService( + private val beerVectorStore: RedisVectorStore +) { + private val logger = LoggerFactory.getLogger(EmbeddingStatusService::class.java) + + fun getTotalDocNum(): Long { + try { + val indexInfo = beerVectorStore.jedis.ftInfo("beerIdx") + val numTerms = indexInfo["num_docs"] as Long + logger.info("Number of terms in beerIdx: $numTerms") + return numTerms + } catch (e: Exception) { + logger.error("Error checking embedding status", e) + return 0 + } + } +} \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/semanticaching/SemanticCachingConfiguration.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/semanticaching/SemanticCachingConfiguration.kt new file mode 100644 index 0000000..4906b4f --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/semanticaching/SemanticCachingConfiguration.kt @@ -0,0 +1,29 @@ +package com.redis.contextenabledsemanticcaching.semanticaching + +import org.springframework.ai.transformers.TransformersEmbeddingModel +import org.springframework.ai.vectorstore.redis.RedisVectorStore +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import redis.clients.jedis.JedisPooled +import redis.clients.jedis.search.Schema + +@Configuration +class SemanticCachingConfiguration { + @Bean + fun semanticCachingVectorStore( + embeddingModel: TransformersEmbeddingModel, + jedisPooled: JedisPooled + ): RedisVectorStore { + return RedisVectorStore.builder(jedisPooled, embeddingModel) + .indexName("semanticCachingIdx") + .contentFieldName("content") + .embeddingFieldName("embedding") + .metadataFields( + RedisVectorStore.MetadataField("answer", Schema.FieldType.TEXT), + ) + .prefix("semantic-caching:") + .initializeSchema(true) + .vectorAlgorithm(RedisVectorStore.Algorithm.HSNW) + .build() + } +} \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/semanticaching/SemanticCachingService.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/semanticaching/SemanticCachingService.kt new file mode 100644 index 0000000..940a5d6 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/kotlin/com/redis/contextenabledsemanticcaching/semanticaching/SemanticCachingService.kt @@ -0,0 +1,43 @@ +package com.redis.contextenabledsemanticcaching.semanticaching + +import org.slf4j.LoggerFactory +import org.springframework.ai.document.Document +import org.springframework.ai.vectorstore.SearchRequest +import org.springframework.ai.vectorstore.redis.RedisVectorStore +import org.springframework.stereotype.Service + +@Service +class SemanticCachingService( + private val semanticCachingVectorStore: RedisVectorStore +) { + + private val logger = LoggerFactory.getLogger(javaClass) + + fun storeInCache(prompt: String, answer: String) { + semanticCachingVectorStore.add(listOf(Document( + prompt, + mapOf( + "answer" to answer + ) + ))) + } + + fun getFromCache(prompt: String, similarityThreshold: Double): Pair { + val results = semanticCachingVectorStore.similaritySearch( + SearchRequest.builder() + .query(prompt) + .topK(1) + .build() + ) + + if (results.isNotEmpty()) { + if (similarityThreshold < (results[0].score ?: 0.0)) { + logger.info("Returning cached answer. Similarity score: ${results[0].score}") + return Pair(results[0].text as String,results[0].metadata["answer"] as String) + } + } + + return Pair(null, null) + } + +} \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/application.properties b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/application.properties new file mode 100644 index 0000000..5c02405 --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/application.properties @@ -0,0 +1,6 @@ +spring.application.name=context-enabled-semantic-caching-with-spring-ai + +# Spring AI configuration +spring.ai.openai.api-key=${OPENAI_API_KEY:demo} +spring.ai.openai.chat.options.model=gpt-4o-mini +spring.ai.openai.chat.options.temperature=1 \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/data/beers.json.gz b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/data/beers.json.gz new file mode 100644 index 0000000..e32d6b0 Binary files /dev/null and b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/data/beers.json.gz differ diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/css/styles.css b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/css/styles.css new file mode 100644 index 0000000..4bc840a --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/css/styles.css @@ -0,0 +1,233 @@ +/* General Styles */ +html { + height: 100%; + font-family: "Space Grotesk", sans-serif; +} + +body { + height: 100%; + font-family: "Space Grotesk", sans-serif; + background-color: #f8f9fa; + color: #333; + margin: 0; + padding: 0; + display: flex; + flex-direction: column; +} + +.wrapper { + flex: 1; + display: flex; + flex-direction: column; + align-items: center; + padding: 0 20px; + max-width: 1200px; + margin: 0 auto; + width: 100%; +} + +h1 { + text-align: center; + color: #d92b2b; + font-size: 2rem; + margin-top: 20px; +} + +h2 { + color: #d92b2b; + margin-bottom: 15px; +} + +h3 { + color: #333; + margin-bottom: 10px; +} + +.intro-text { + text-align: center; + max-width: 800px; + margin: 0 auto 30px; + line-height: 1.5; + color: #555; +} + +/* App Container */ +.app-container { + display: flex; + width: 100%; + max-width: 1200px; + margin: 20px 0; + height: 600px; + border-radius: 8px; + overflow: hidden; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); +} + +/* Chat Container */ +.chat-container { + flex: 1; + display: flex; + flex-direction: column; + background-color: white; +} + +.chat-header { + padding: 15px; + background-color: #f8f8f8; + border-bottom: 1px solid #ddd; + display: flex; + justify-content: space-between; + align-items: center; +} + +.chat-header h2 { + margin: 0; + font-size: 1.2rem; +} + +.user-info { + display: flex; + align-items: center; + gap: 10px; +} + +.chat-messages { + flex: 1; + padding: 15px; + overflow-y: auto; + display: flex; + flex-direction: column; + gap: 15px; +} + +.system-message { + background-color: #f0f0f0; + padding: 10px 15px; + border-radius: 8px; + align-self: center; + max-width: 80%; +} + +.user-message { + background-color: #e6f7ff; + padding: 10px 15px; + border-radius: 8px; + align-self: flex-end; + max-width: 70%; +} + +.assistant-message { + background-color: #f6ffed; + padding: 10px 15px; + border-radius: 8px; + align-self: flex-start; + max-width: 70%; +} + +.chat-input { + padding: 15px; + border-top: 1px solid #ddd; + display: flex; + gap: 10px; +} + +.chat-input textarea { + flex: 1; + padding: 10px; + border: 1px solid #ccc; + border-radius: 4px; + resize: none; + height: 60px; +} + +.chat-input button { + align-self: flex-end; +} + +button { + background-color: #d92b2b; + color: white; + border: none; + padding: 8px 15px; + border-radius: 4px; + font-size: 14px; + cursor: pointer; + transition: background-color 0.3s ease-in-out; +} + +button:hover { + background-color: #b22222; +} + +button:disabled { + background-color: #cccccc; + cursor: not-allowed; +} + +/* Footer */ +footer { + background-color: #d92b2b; + color: white; + text-align: center; + padding: 15px; + margin-top: auto; +} + +footer img { + width: 100px; + vertical-align: middle; +} + +/* Metrics Styles */ +.metrics-container { + margin-top: 10px; + padding-top: 8px; + border-top: 1px dashed #ccc; + font-size: 0.85rem; +} + +.metrics-title { + margin: 0 0 5px 0; + font-weight: bold; + color: #666; +} + +.metrics-list { + margin: 0; + padding-left: 20px; + color: #777; +} + +.metrics-list li { + margin-bottom: 2px; +} + +/* Error and Loading Styles */ +.error-message { + color: #d92b2b; + padding: 10px; + background-color: #ffebeb; + border-radius: 4px; + margin-top: 10px; +} + +.loading-message { + opacity: 0.7; +} + +/* Responsive */ +@media (max-width: 768px) { + .app-container { + flex-direction: column; + height: auto; + } + + .chat-container { + height: 500px; + } + + .user-info { + flex-direction: column; + align-items: flex-start; + } +} diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/images/redis-logo.png b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/images/redis-logo.png new file mode 100644 index 0000000..d32cd86 Binary files /dev/null and b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/images/redis-logo.png differ diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/index.html b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/index.html new file mode 100644 index 0000000..67e7f5a --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/index.html @@ -0,0 +1,45 @@ + + + + + + Semantic Caching with Spring AI + + + + +
+

Semantic Caching with Spring AI

+

This demo showcases a Retrieval-Augmented Generation (RAG) system with support for Semantic Caching using Spring AI and Redis. Ask questions about beer products to get accurate information retrieved from a knowledge base.

+ +
+
+
+

Beer Knowledge Assistant

+ +
+ +
+
+

Welcome to the Beer Knowledge Assistant! I can help you with questions about beer products. Try asking about specific beers, their ABV, IBU, or other characteristics.

+
+
+ +
+ + +
+
+
+
+ +
+

Powered by Redis Logo

+
+ + + + \ No newline at end of file diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/js/script.js b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/js/script.js new file mode 100644 index 0000000..c6e622c --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/main/resources/static/js/script.js @@ -0,0 +1,198 @@ +// Variables to store the current state +let currentChatId = ''; + +// Document ready function +document.addEventListener('DOMContentLoaded', function() { + // Initialize chat functionality + initializeChat(); +}); + +// Function to initialize chat functionality +function initializeChat() { + const sendButton = document.getElementById('send-message-btn'); + const messageInput = document.getElementById('user-message'); + const chatMessages = document.getElementById('chat-messages'); + const startChatButton = document.getElementById('start-chat-btn'); + const clearChatButton = document.getElementById('clear-chat-btn'); + + // Start chat button click handler + startChatButton.addEventListener('click', function() { + // Start a new chat session + fetch('/chat/startChat', { + method: 'POST' + }) + .then(response => { + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + return response.json(); + }) + .then(data => { + console.log('Chat started:', data); + + // Set the current chat ID + currentChatId = data.message; + + // Enable the message input and send button + messageInput.disabled = false; + sendButton.disabled = false; + + // Focus on the message input + messageInput.focus(); + + // Clear previous messages + chatMessages.innerHTML = ` +
+

Welcome to the Beer Knowledge Assistant! I can help you with questions about beer products. Try asking about specific beers, their ABV, IBU, or other characteristics.

+
+ `; + }) + .catch(error => { + console.error('Error starting chat:', error); + + // Show error message + chatMessages.innerHTML += ` +
+

Error starting chat: ${error.message}

+
+ `; + }); + }); + + // Clear chat button click handler + clearChatButton.addEventListener('click', function() { + // Reset the chat + currentChatId = ''; + + // Disable the message input and send button + messageInput.disabled = true; + sendButton.disabled = true; + + // Clear the chat messages + chatMessages.innerHTML = ` +
+

Chat cleared. Click "Start New Chat" to begin a new conversation.

+
+ `; + }); + + // Send button click handler + sendButton.addEventListener('click', function() { + sendMessage(); + }); + + // Enter key press handler + messageInput.addEventListener('keypress', function(e) { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + sendMessage(); + } + }); + + // Function to send a message + function sendMessage() { + const message = messageInput.value.trim(); + + if (!message) { + return; + } + + if (!currentChatId) { + alert('Please start a new chat first.'); + return; + } + + // Add user message to the chat + chatMessages.innerHTML += ` +
+

${message}

+
+ `; + + // Clear the message input + messageInput.value = ''; + + // Scroll to the bottom of the chat + chatMessages.scrollTop = chatMessages.scrollHeight; + + // Add loading indicator + const loadingId = 'loading-' + Date.now(); + chatMessages.innerHTML += ` +
+

Thinking...

+
+ `; + + // Scroll to the bottom of the chat + chatMessages.scrollTop = chatMessages.scrollHeight; + + // Send the message to the server + fetch(`/chat/${currentChatId}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ prompt: message }) + }) + .then(response => { + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + return response.json(); + }) + .then(data => { + console.log('Message sent:', data); + + // Remove the loading indicator + const loadingElement = document.getElementById(loadingId); + if (loadingElement) { + loadingElement.remove(); + } + + // Add assistant message to the chat with metrics + chatMessages.innerHTML += ` +
+

${data.message}

+
+

Processing Times:

+
    + ${data.metrics.cachingTimeMs > 0 ? `
  • Semantic Cache: ${data.metrics.cachingTimeMs}ms
  • ` : ''} +
  • From cache: ${data.metrics.wasCached}
  • +
  • Embedding: ${data.metrics.embeddingTimeMs}ms
  • +
  • Search: ${data.metrics.searchTimeMs}ms
  • +
  • LLM: ${data.metrics.llmTimeMs}ms
  • +
  • Model: ${data.metrics.model}
  • +
  • Total: ${data.metrics.embeddingTimeMs + data.metrics.searchTimeMs + data.metrics.llmTimeMs + data.metrics.cachingTimeMs}ms
  • +
+
+
+ `; + + // Scroll to the bottom of the chat + chatMessages.scrollTop = chatMessages.scrollHeight; + }) + .catch(error => { + console.error('Error sending message:', error); + + // Remove the loading indicator + const loadingElement = document.getElementById(loadingId); + if (loadingElement) { + loadingElement.remove(); + } + + // Add error message to the chat + chatMessages.innerHTML += ` +
+

Error sending message: ${error.message}

+
+ `; + + // Scroll to the bottom of the chat + chatMessages.scrollTop = chatMessages.scrollHeight; + }); + } + + // Initially disable the message input and send button + messageInput.disabled = true; + sendButton.disabled = true; +} diff --git a/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/test/kotlin/com/redis/contextenabledsemanticcaching/ContextEnabledSemanticCachingApplicationTests.kt b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/test/kotlin/com/redis/contextenabledsemanticcaching/ContextEnabledSemanticCachingApplicationTests.kt new file mode 100644 index 0000000..8eddc0d --- /dev/null +++ b/artificial-intelligence/context-enabled-semantic-caching-with-spring-ai/src/test/kotlin/com/redis/contextenabledsemanticcaching/ContextEnabledSemanticCachingApplicationTests.kt @@ -0,0 +1,13 @@ +package com.redis.contextenabledsemanticcaching + +import org.junit.jupiter.api.Test +import org.springframework.boot.test.context.SpringBootTest + +@SpringBootTest +class ContextEnabledSemanticCachingApplicationTests { + + @Test + fun contextLoads() { + } + +} diff --git a/settings.gradle.kts b/settings.gradle.kts index a12d978..c3d06dd 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -2,6 +2,7 @@ rootProject.name = "redis-springboot-recipes" include("artificial-intelligence") include("artificial-intelligence:agent-memory-with-spring-ai") +include("artificial-intelligence:context-enabled-semantic-caching-with-spring-ai") include("artificial-intelligence:rag-with-spring-ai") include("artificial-intelligence:semantic-caching-with-spring-ai")