From 922f04a95f57d0b8293e50f82c1ed97a60b0009a Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Fri, 4 Jul 2025 17:47:01 +0200 Subject: [PATCH] feat(audio-chat): adjust UI to really submit, handle audio attachments in the backend, schedule proper audioChat task Signed-off-by: Julien Veyssier --- appinfo/info.xml | 2 +- appinfo/routes.php | 1 + lib/Controller/ChattyLLMController.php | 105 +++++++++++++++++- lib/Db/ChattyLLM/Message.php | 8 ++ lib/Listener/ChattyLLMTaskListener.php | 19 +++- .../Version020600Date20250704145036.php | 43 +++++++ psalm.xml | 1 + .../ChattyLLM/ChattyLLMInputForm.vue | 34 +++++- src/components/ChattyLLM/ConversationBox.vue | 2 +- src/components/ChattyLLM/Message.vue | 17 ++- src/components/fields/AudioDisplay.vue | 27 +++-- src/components/fields/ListOfMediaField.vue | 1 + src/components/fields/MediaField.vue | 1 + 13 files changed, 236 insertions(+), 25 deletions(-) create mode 100644 lib/Migration/Version020600Date20250704145036.php diff --git a/appinfo/info.xml b/appinfo/info.xml index 0d0227a3..1f9ad679 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -62,7 +62,7 @@ Known providers: More details on how to set this up in the [admin docs](https://docs.nextcloud.com/server/latest/admin_manual/ai/index.html) ]]> - 2.5.0 + 2.6.0 agpl Julien Veyssier Assistant diff --git a/appinfo/routes.php b/appinfo/routes.php index ccfccd77..343c3a69 100644 --- a/appinfo/routes.php +++ b/appinfo/routes.php @@ -42,6 +42,7 @@ ['name' => 'chattyLLM#newMessage', 'url' => '/chat/new_message', 'verb' => 'PUT'], ['name' => 'chattyLLM#deleteMessage', 'url' => '/chat/delete_message', 'verb' => 'DELETE'], ['name' => 'chattyLLM#getMessages', 'url' => '/chat/messages', 'verb' => 'GET'], + ['name' => 'chattyLLM#getMessage', 'url' => '/chat/sessions/{sessionId}/messages/{messageId}', 'verb' => 'GET'], ['name' => 'chattyLLM#generateForSession', 'url' => '/chat/generate', 'verb' => 'GET'], ['name' => 'chattyLLM#regenerateForSession', 'url' => '/chat/regenerate', 'verb' => 'GET'], ['name' => 'chattyLLM#checkSession', 'url' => '/chat/check_session', 'verb' => 'GET'], diff --git a/lib/Controller/ChattyLLMController.php b/lib/Controller/ChattyLLMController.php index c7d260f4..991ddcd6 100644 --- a/lib/Controller/ChattyLLMController.php +++ b/lib/Controller/ChattyLLMController.php @@ -249,6 +249,7 @@ public function getSessions(): JSONResponse { * @param string $role Role of the message (human, assistant etc...) * @param string $content Content of the message * @param int $timestamp Date of the message + * @param ?array $attachments List of attachment objects * @param bool $firstHumanMessage Is it the first human message of the session? * @return JSONResponse|JSONResponse * @@ -259,7 +260,9 @@ public function getSessions(): JSONResponse { */ #[NoAdminRequired] #[OpenAPI(scope: OpenAPI::SCOPE_DEFAULT, tags: ['chat_api'])] - public function newMessage(int $sessionId, string $role, string $content, int $timestamp, bool $firstHumanMessage = false): JSONResponse { + public function newMessage( + int $sessionId, string $role, string $content, int $timestamp, ?array $attachments = null, bool $firstHumanMessage = false, + ): JSONResponse { if ($this->userId === null) { return new JSONResponse(['error' => $this->l10n->t('User not logged in')], Http::STATUS_UNAUTHORIZED); } @@ -270,10 +273,13 @@ public function newMessage(int $sessionId, string $role, string $content, int $t return new JSONResponse(['error' => $this->l10n->t('Session not found')], Http::STATUS_NOT_FOUND); } + // refuse empty text content if context agent is not available (we do classic chat) AND there is no attachment + // in other words: accept empty content if we are using agency OR there are attachments $content = trim($content); if (empty($content) && (!class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction') || !isset($this->taskProcessingManager->getAvailableTaskTypes()[\OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID])) + && $attachments === null ) { return new JSONResponse(['error' => $this->l10n->t('Message content is empty')], Http::STATUS_BAD_REQUEST); } @@ -284,6 +290,13 @@ public function newMessage(int $sessionId, string $role, string $content, int $t $message->setContent($content); $message->setTimestamp($timestamp); $message->setSources('[]'); + $message->setAttachments('[]'); + if ($attachments !== null) { + $encodedAttachments = json_encode($attachments); + if ($encodedAttachments !== false) { + $message->setAttachments($encodedAttachments); + } + } $this->messageMapper->insert($message); if ($firstHumanMessage) { @@ -342,6 +355,44 @@ public function getMessages(int $sessionId, int $limit = 20, int $cursor = 0): J } } + /** + * Get a message + * + * Get a chat message in a session + * + * @param int $sessionId The session ID + * @param int $messageId The message ID + * @return JSONResponse|JSONResponse + * + * 200: The message has been successfully obtained + * 401: Not logged in + * 404: The session or the message was not found + */ + #[NoAdminRequired] + #[OpenAPI(scope: OpenAPI::SCOPE_DEFAULT, tags: ['chat_api'])] + public function getMessage(int $sessionId, int $messageId): JSONResponse { + if ($this->userId === null) { + return new JSONResponse(['error' => $this->l10n->t('User not logged in')], Http::STATUS_UNAUTHORIZED); + } + + try { + $sessionExists = $this->sessionMapper->exists($this->userId, $sessionId); + if (!$sessionExists) { + return new JSONResponse(['error' => $this->l10n->t('Session not found')], Http::STATUS_NOT_FOUND); + } + + $message = $this->messageMapper->getMessageById($messageId); + if ($message->getSessionId() !== $sessionId) { + return new JSONResponse(['error' => $this->l10n->t('Message not found')], Http::STATUS_NOT_FOUND); + } + + return new JSONResponse($message->jsonSerialize()); + } catch (\OCP\DB\Exception $e) { + $this->logger->warning('Failed to get chat messages', ['exception' => $e]); + return new JSONResponse(['error' => $this->l10n->t('Failed to get chat message')], Http::STATUS_INTERNAL_SERVER_ERROR); + } + } + /** * Delete a message * @@ -430,16 +481,39 @@ public function generateForSession(int $sessionId, int $agencyConfirm = 0): JSON $lastUserMessage = array_pop($history); } while ($lastUserMessage->getRole() !== 'human'); // history is a list of JSON strings + // we ignore audio attachments here because they are supposed to have been transcribed, the content is the transcription + // this makes the history smaller $history = array_map(static function (Message $message) { return json_encode([ 'role' => $message->getRole(), 'content' => $message->getContent(), ]); }, $history); - try { - $taskId = $this->scheduleLLMChatTask($lastUserMessage->getContent(), $systemPrompt, $history, $sessionId); - } catch (\Exception $e) { - return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + + $lastAttachments = $lastUserMessage->jsonSerialize()['attachments']; + $audioAttachment = $lastAttachments[0] ?? null; + $audioAttachment = $audioAttachment['type'] === 'Audio' ? $audioAttachment : null; + /* php 8.4 allows: + $audioAttachment = array_find($lastAttachments, static function (array $attachment) { + return $attachment['type'] === 'Audio'; + }); + */ + if ($audioAttachment !== null + && class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat') + && isset($this->taskProcessingManager->getAvailableTaskTypes()[\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID]) + ) { + $fileId = $audioAttachment['fileId']; + try { + $taskId = $this->scheduleAudioChatTask($fileId, $systemPrompt, $history, $sessionId, $lastUserMessage->getId()); + } catch (\Exception $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + } + } else { + try { + $taskId = $this->scheduleLLMChatTask($lastUserMessage->getContent(), $systemPrompt, $history, $sessionId); + } catch (\Exception $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + } } } @@ -848,4 +922,25 @@ private function scheduleAgencyTask(string $content, int $confirmation, string $ $this->taskProcessingManager->scheduleTask($task); return $task->getId() ?? 0; } + + private function scheduleAudioChatTask( + int $audioFileId, string $systemPrompt, array $history, int $sessionId, int $queryMessageId, + ): int { + $customId = 'chatty-llm:' . $sessionId . ':' . $queryMessageId; + $this->checkIfSessionIsThinking($customId); + $input = [ + 'input' => $audioFileId, + 'system_prompt' => $systemPrompt, + 'history' => $history, + ]; + $task = new Task( + \OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID, + $input, + Application::APP_ID . ':chatty-llm', + $this->userId, + $customId, + ); + $this->taskProcessingManager->scheduleTask($task); + return $task->getId() ?? 0; + } } diff --git a/lib/Db/ChattyLLM/Message.php b/lib/Db/ChattyLLM/Message.php index d2b9dba4..a25def6c 100644 --- a/lib/Db/ChattyLLM/Message.php +++ b/lib/Db/ChattyLLM/Message.php @@ -25,6 +25,8 @@ * @method \void setOcpTaskId(int $ocpTaskId) * @method \string getSources() * @method \void setSources(string $sources) + * @method \string getAttachments() + * @method \void setAttachments(string $attachments) */ class Message extends Entity implements \JsonSerializable { /** @var int */ @@ -39,6 +41,8 @@ class Message extends Entity implements \JsonSerializable { protected $ocpTaskId; /** @var string */ protected $sources; + /** @var string */ + protected $attachments; public static $columns = [ 'id', @@ -48,6 +52,7 @@ class Message extends Entity implements \JsonSerializable { 'timestamp', 'ocp_task_id', 'sources', + 'attachments', ]; public static $fields = [ 'id', @@ -57,6 +62,7 @@ class Message extends Entity implements \JsonSerializable { 'timestamp', 'ocpTaskId', 'sources', + 'attachments', ]; public function __construct() { @@ -66,6 +72,7 @@ public function __construct() { $this->addType('timestamp', Types::INTEGER); $this->addType('ocp_task_id', Types::INTEGER); $this->addType('sources', Types::STRING); + $this->addType('attachments', Types::STRING); } #[\ReturnTypeWillChange] @@ -78,6 +85,7 @@ public function jsonSerialize() { 'timestamp' => $this->timestamp, 'ocp_task_id' => $this->ocpTaskId, 'sources' => $this->sources, + 'attachments' => json_decode($this->attachments, true) ?: [], ]; } } diff --git a/lib/Listener/ChattyLLMTaskListener.php b/lib/Listener/ChattyLLMTaskListener.php index 0f60a5c3..6fdd0800 100644 --- a/lib/Listener/ChattyLLMTaskListener.php +++ b/lib/Listener/ChattyLLMTaskListener.php @@ -52,17 +52,30 @@ public function handle(Event $event): void { } // message generation - if (preg_match('/^chatty-llm:(\d+)$/', $customId, $matches)) { + if (preg_match('/^chatty-llm:(\d+)/', $customId, $matches)) { $sessionId = (int)$matches[1]; $message = new Message(); $message->setSessionId($sessionId); $message->setOcpTaskId($task->getId()); $message->setRole('assistant'); - $message->setContent(trim($task->getOutput()['output'] ?? '')); $message->setTimestamp(time()); $sources = json_encode($task->getOutput()['sources'] ?? []); - $message->setSources($sources ? $sources : '[]'); + $message->setSources($sources ?: '[]'); + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat') + && $taskTypeId === \OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID) { + $message->setContent(trim($task->getOutput()['output_transcript'] ?? '')); + $message->setAttachments('[{"type":"Audio","fileId":' . $task->getOutput()['output'] . '}]'); + // now we have the transcription of the user audio input + if (preg_match('/^chatty-llm:\d+:(\d+)$/', $customId, $matches)) { + $queryMessageId = (int)$matches[1]; + $queryMessage = $this->messageMapper->getMessageById($queryMessageId); + $queryMessage->setContent(trim($task->getOutput()['input_transcript'] ?? '')); + $this->messageMapper->update($queryMessage); + } + } else { + $message->setContent(trim($task->getOutput()['output'] ?? '')); + } try { $this->messageMapper->insert($message); } catch (\OCP\DB\Exception $e) { diff --git a/lib/Migration/Version020600Date20250704145036.php b/lib/Migration/Version020600Date20250704145036.php new file mode 100644 index 00000000..eaec5000 --- /dev/null +++ b/lib/Migration/Version020600Date20250704145036.php @@ -0,0 +1,43 @@ +hasTable('assistant_chat_msgs')) { + $table = $schema->getTable('assistant_chat_msgs'); + if (!$table->hasColumn('attachments')) { + $table->addColumn('attachments', Types::TEXT, [ + 'notnull' => true, + 'default' => '[]', + ]); + $schemaChanged = true; + } + } + + return $schemaChanged ? $schema : null; + } +} diff --git a/psalm.xml b/psalm.xml index c1a048aa..df053955 100644 --- a/psalm.xml +++ b/psalm.xml @@ -39,6 +39,7 @@ + diff --git a/src/components/ChattyLLM/ChattyLLMInputForm.vue b/src/components/ChattyLLM/ChattyLLMInputForm.vue index d20d70cd..fbf61fa9 100644 --- a/src/components/ChattyLLM/ChattyLLMInputForm.vue +++ b/src/components/ChattyLLM/ChattyLLMInputForm.vue @@ -183,6 +183,7 @@ import axios from '@nextcloud/axios' import { showError } from '@nextcloud/dialogs' import { generateUrl, generateOcsUrl } from '@nextcloud/router' import moment from 'moment' +import { SHAPE_TYPE_NAMES } from '../../constants.js' // future: type (text, image, file, etc), attachments, etc support @@ -431,7 +432,7 @@ export default { this.active.agencyAnswered = true } - this.messages.push({ role, content, timestamp }) + this.messages.push({ role, content, timestamp, session_id: this.active.id }) this.chatContent = '' this.scrollToBottom() await this.newMessage(role, content, timestamp, this.active.id) @@ -440,8 +441,9 @@ export default { async handleSubmitAudio(fileId) { console.debug('[Assistant] submit audio', fileId) const role = Roles.HUMAN - const content = 'lala' + fileId + const content = '' const timestamp = +new Date() / 1000 | 0 + const attachments = [{ type: SHAPE_TYPE_NAMES.Audio, fileId }] if (this.active === null) { await this.newSession() @@ -453,10 +455,10 @@ export default { this.active.agencyAnswered = true } - this.messages.push({ role, content, timestamp }) + this.messages.push({ role, content, timestamp, session_id: this.active.id, attachments }) this.chatContent = '' this.scrollToBottom() - await this.newMessage(role, content, timestamp, this.active.id) + await this.newMessage(role, content, timestamp, this.active.id, attachments) }, onLoadOlderMessages() { @@ -596,7 +598,7 @@ export default { } }, - async newMessage(role, content, timestamp, sessionId, replaceLastMessage = true, agencyConfirm = null) { + async newMessage(role, content, timestamp, sessionId, attachments = null, replaceLastMessage = true, agencyConfirm = null) { try { this.loading.newHumanMessage = true const firstHumanMessage = this.messages.length === 1 && this.messages[0].role === Roles.HUMAN @@ -605,6 +607,7 @@ export default { sessionId, role, content, + attachments, timestamp, firstHumanMessage, }) @@ -612,6 +615,9 @@ export default { console.debug('newMessage response:', newMessageResponseData) this.loading.newHumanMessage = false + // we need the ID of the messages, even right after they have been added + this.messages[this.messages.length - 1].id = newMessageResponseData.id + if (replaceLastMessage) { // replace the last message with the response that contains the id this.messages[this.messages.length - 1] = newMessageResponseData @@ -716,6 +722,10 @@ export default { if (sessionId === this.active.id) { this.active.sessionAgencyPendingActions = responseData.sessionAgencyPendingActions this.active.agencyAnswered = false + // update content of previous message if we receive an audio message from the assistant + if (responseData.role === Roles.ASSISTANT && responseData.attachments.find(a => a.type === SHAPE_TYPE_NAMES.Audio)) { + this.updateLastHumanMessageContent() + } resolve(responseData) } else { console.debug('Ignoring received message for session ' + sessionId + ' that is not selected anymore') @@ -735,6 +745,18 @@ export default { }) }, + async updateLastHumanMessageContent() { + const lastHumanMessage = this.messages + .filter(m => m.role === Roles.HUMAN) + .pop() + if (lastHumanMessage) { + const updatedMessage = await axios.get( + getChatURL(`/sessions/${lastHumanMessage.session_id}/messages/${lastHumanMessage.id}`), + ) + lastHumanMessage.content = updatedMessage.data.content + } + }, + async pollTitleGenerationTask(taskId, sessionId) { return new Promise((resolve, reject) => { this.pollTitleGenerationTimerId = setInterval(() => { @@ -781,7 +803,7 @@ export default { // this.messages.push({ role, content, timestamp }) this.chatContent = '' this.scrollToBottom() - await this.newMessage(role, content, timestamp, this.active.id, false, confirm) + await this.newMessage(role, content, timestamp, this.active.id, null, false, confirm) }, async saveLastSelectedTaskType(taskType) { diff --git a/src/components/ChattyLLM/ConversationBox.vue b/src/components/ChattyLLM/ConversationBox.vue index 916ffa3e..c6c099bb 100644 --- a/src/components/ChattyLLM/ConversationBox.vue +++ b/src/components/ChattyLLM/ConversationBox.vue @@ -61,7 +61,7 @@ export default { }, props: { - // [{ id: number, session_id: number, role: string, content: string, timestamp: number, sources: string }] + // [{ id: number, session_id: number, role: string, content: string, timestamp: number, sources: string, attachments: array }] messages: { type: Array, default: null, diff --git a/src/components/ChattyLLM/Message.vue b/src/components/ChattyLLM/Message.vue index ca7acdf0..6bf987c8 100644 --- a/src/components/ChattyLLM/Message.vue +++ b/src/components/ChattyLLM/Message.vue @@ -3,7 +3,7 @@ - SPDX-License-Identifier: AGPL-3.0-or-later --> @@ -77,11 +83,13 @@ import { NcRichText } from '@nextcloud/vue/components/NcRichText' import InformationBox from 'vue-material-design-icons/InformationBox.vue' import MessageActions from './MessageActions.vue' +import AudioDisplay from '../fields/AudioDisplay.vue' import { getCurrentUser } from '@nextcloud/auth' import { showSuccess } from '@nextcloud/dialogs' import { generateOcsUrl } from '@nextcloud/router' import axios from '@nextcloud/axios' +import { SHAPE_TYPE_NAMES } from '../../constants.js' const PLAIN_URL_PATTERN = /(?:\s|^|\()((?:https?:\/\/)(?:[-A-Z0-9+_.]+(?::[0-9]+)?(?:\/[-A-Z0-9+&@#%?=~_|!:,.;()]*)*))(?:\s|$|\))/ig const MARKDOWN_LINK_PATTERN = /\[[-A-Z0-9+&@#%?=~_|!:,.;()]+\]\(((?:https?:\/\/)(?:[-A-Z0-9+_.]+(?::[0-9]+)?(?:\/[-A-Z0-9+&@#%?=~_|!:,.;]*)*))\)/ig @@ -90,6 +98,7 @@ export default { name: 'Message', components: { + AudioDisplay, AssistantIcon, NcAvatar, @@ -155,6 +164,12 @@ export default { parsedSources = parsedSources.map((source) => this.getSourceString(source)) return [...new Set(parsedSources)] }, + hasAttachments() { + return this.message.attachments?.length > 0 + }, + audioAttachments() { + return this.message.attachments?.filter(a => a.type === SHAPE_TYPE_NAMES.Audio) ?? [] + }, }, mounted() { diff --git a/src/components/fields/AudioDisplay.vue b/src/components/fields/AudioDisplay.vue index 97820957..f9407bbc 100644 --- a/src/components/fields/AudioDisplay.vue +++ b/src/components/fields/AudioDisplay.vue @@ -5,12 +5,13 @@