diff --git a/appinfo/info.xml b/appinfo/info.xml index 0d0227a3..1f9ad679 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -62,7 +62,7 @@ Known providers: More details on how to set this up in the [admin docs](https://docs.nextcloud.com/server/latest/admin_manual/ai/index.html) ]]> - 2.5.0 + 2.6.0 agpl Julien Veyssier Assistant diff --git a/appinfo/routes.php b/appinfo/routes.php index ccfccd77..343c3a69 100644 --- a/appinfo/routes.php +++ b/appinfo/routes.php @@ -42,6 +42,7 @@ ['name' => 'chattyLLM#newMessage', 'url' => '/chat/new_message', 'verb' => 'PUT'], ['name' => 'chattyLLM#deleteMessage', 'url' => '/chat/delete_message', 'verb' => 'DELETE'], ['name' => 'chattyLLM#getMessages', 'url' => '/chat/messages', 'verb' => 'GET'], + ['name' => 'chattyLLM#getMessage', 'url' => '/chat/sessions/{sessionId}/messages/{messageId}', 'verb' => 'GET'], ['name' => 'chattyLLM#generateForSession', 'url' => '/chat/generate', 'verb' => 'GET'], ['name' => 'chattyLLM#regenerateForSession', 'url' => '/chat/regenerate', 'verb' => 'GET'], ['name' => 'chattyLLM#checkSession', 'url' => '/chat/check_session', 'verb' => 'GET'], diff --git a/composer.lock b/composer.lock index e9bc2df6..fdce7262 100644 --- a/composer.lock +++ b/composer.lock @@ -776,12 +776,12 @@ "source": { "type": "git", "url": "https://github.com/nextcloud-deps/ocp.git", - "reference": "e2304c4f0f5ecf7fe16a3d3745c63b5f019dc718" + "reference": "833fdb31f00799d699dcc4ac65b057dd89588891" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/nextcloud-deps/ocp/zipball/e2304c4f0f5ecf7fe16a3d3745c63b5f019dc718", - "reference": "e2304c4f0f5ecf7fe16a3d3745c63b5f019dc718", + "url": "https://api.github.com/repos/nextcloud-deps/ocp/zipball/833fdb31f00799d699dcc4ac65b057dd89588891", + "reference": "833fdb31f00799d699dcc4ac65b057dd89588891", "shasum": "" }, "require": { @@ -817,7 +817,7 @@ "issues": "https://github.com/nextcloud-deps/ocp/issues", "source": "https://github.com/nextcloud-deps/ocp/tree/master" }, - "time": "2025-04-09T00:48:03+00:00" + "time": "2025-07-01T15:03:36+00:00" }, { "name": "nextcloud/openapi-extractor", @@ -2823,7 +2823,7 @@ "platform": { "php": "^8.1" }, - "platform-dev": [], + "platform-dev": {}, "platform-overrides": { "php": "8.1" }, diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index 6a87bb90..0d0d2175 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -22,6 +22,8 @@ use OCA\Assistant\Reference\SpeechToTextReferenceProvider; use OCA\Assistant\Reference\TaskOutputFileReferenceProvider; use OCA\Assistant\Reference\Text2ImageReferenceProvider; +use OCA\Assistant\TaskProcessing\AudioToAudioChatProvider; +use OCA\Assistant\TaskProcessing\ContextAgentAudioInteractionProvider; use OCP\AppFramework\App; use OCP\AppFramework\Bootstrap\IBootContext; @@ -70,6 +72,13 @@ public function register(IRegistrationContext $context): void { $context->registerNotifierService(Notifier::class); $context->registerEventListener(AddContentSecurityPolicyEvent::class, CSPListener::class); + + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat')) { + $context->registerTaskProcessingProvider(AudioToAudioChatProvider::class); + } + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction')) { + $context->registerTaskProcessingProvider(ContextAgentAudioInteractionProvider::class); + } } public function boot(IBootContext $context): void { diff --git a/lib/Controller/ChattyLLMController.php b/lib/Controller/ChattyLLMController.php index e0957b8b..0697306a 100644 --- a/lib/Controller/ChattyLLMController.php +++ b/lib/Controller/ChattyLLMController.php @@ -145,6 +145,7 @@ public function newSession(int $timestamp, ?string $title = null): JSONResponse $systemMsg = new Message(); $systemMsg->setSessionId($session->getId()); $systemMsg->setRole('system'); + $systemMsg->setAttachments('[]'); $systemMsg->setContent($userInstructions); $systemMsg->setTimestamp($session->getTimestamp()); $systemMsg->setSources('[]'); @@ -206,6 +207,7 @@ public function deleteSession(int $sessionId): JSONResponse { } try { + $this->deleteSessionTasks($this->userId, $sessionId); $this->sessionMapper->deleteSession($this->userId, $sessionId); $this->messageMapper->deleteMessagesBySession($sessionId); return new JSONResponse(); @@ -215,6 +217,27 @@ public function deleteSession(int $sessionId): JSONResponse { } } + private function deleteSessionTasks(string $userId, int $sessionId): void { + $sessionExists = $this->sessionMapper->exists($this->userId, $sessionId); + if (!$sessionExists) { + return; + } + $messages = $this->messageMapper->getMessages($sessionId, 0, 0); + foreach ($messages as $message) { + $ocpTaskId = $message->getOcpTaskId(); + if ($ocpTaskId !== 0) { + try { + $task = $this->taskProcessingManager->getTask($ocpTaskId); + $this->taskProcessingManager->deleteTask($task); + } catch (\OCP\TaskProcessing\Exception\Exception) { + // silent failure here because: + // if the task is not found: all good nothing to delete + // if the task couldn't be deleted, it will be deleted by the task processing cleanup job later anyway + } + } + } + } + /** * Get chat sessions * @@ -250,6 +273,7 @@ public function getSessions(): JSONResponse { * @param string $role Role of the message (human, assistant etc...) * @param string $content Content of the message * @param int $timestamp Date of the message + * @param ?list $attachments List of attachment objects * @param bool $firstHumanMessage Is it the first human message of the session? * @return JSONResponse|JSONResponse * @@ -260,7 +284,9 @@ public function getSessions(): JSONResponse { */ #[NoAdminRequired] #[OpenAPI(scope: OpenAPI::SCOPE_DEFAULT, tags: ['chat_api'])] - public function newMessage(int $sessionId, string $role, string $content, int $timestamp, bool $firstHumanMessage = false): JSONResponse { + public function newMessage( + int $sessionId, string $role, string $content, int $timestamp, ?array $attachments = null, bool $firstHumanMessage = false, + ): JSONResponse { if ($this->userId === null) { return new JSONResponse(['error' => $this->l10n->t('User not logged in')], Http::STATUS_UNAUTHORIZED); } @@ -271,10 +297,13 @@ public function newMessage(int $sessionId, string $role, string $content, int $t return new JSONResponse(['error' => $this->l10n->t('Session not found')], Http::STATUS_NOT_FOUND); } + // refuse empty text content if context agent is not available (we do classic chat) AND there is no attachment + // in other words: accept empty content if we are using agency OR there are attachments $content = trim($content); if (empty($content) && (!class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction') || !isset($this->taskProcessingManager->getAvailableTaskTypes()[\OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID])) + && $attachments === null ) { return new JSONResponse(['error' => $this->l10n->t('Message content is empty')], Http::STATUS_BAD_REQUEST); } @@ -285,6 +314,13 @@ public function newMessage(int $sessionId, string $role, string $content, int $t $message->setContent($content); $message->setTimestamp($timestamp); $message->setSources('[]'); + $message->setAttachments('[]'); + if ($attachments !== null) { + $encodedAttachments = json_encode($attachments); + if ($encodedAttachments !== false) { + $message->setAttachments($encodedAttachments); + } + } $this->messageMapper->insert($message); if ($firstHumanMessage) { @@ -343,6 +379,41 @@ public function getMessages(int $sessionId, int $limit = 20, int $cursor = 0): J } } + /** + * Get a message + * + * Get a chat message in a session + * + * @param int $sessionId The session ID + * @param int $messageId The message ID + * @return JSONResponse|JSONResponse + * + * 200: The message has been successfully obtained + * 401: Not logged in + * 404: The session or the message was not found + */ + #[NoAdminRequired] + #[OpenAPI(scope: OpenAPI::SCOPE_DEFAULT, tags: ['chat_api'])] + public function getMessage(int $sessionId, int $messageId): JSONResponse { + if ($this->userId === null) { + return new JSONResponse(['error' => $this->l10n->t('User not logged in')], Http::STATUS_UNAUTHORIZED); + } + + try { + $sessionExists = $this->sessionMapper->exists($this->userId, $sessionId); + if (!$sessionExists) { + return new JSONResponse(['error' => $this->l10n->t('Session not found')], Http::STATUS_NOT_FOUND); + } + + $message = $this->messageMapper->getMessageById($sessionId, $messageId); + + return new JSONResponse($message->jsonSerialize()); + } catch (\OCP\DB\Exception $e) { + $this->logger->warning('Failed to get chat messages', ['exception' => $e]); + return new JSONResponse(['error' => $this->l10n->t('Failed to get chat message')], Http::STATUS_INTERNAL_SERVER_ERROR); + } + } + /** * Delete a message * @@ -368,8 +439,19 @@ public function deleteMessage(int $messageId, int $sessionId): JSONResponse { if (!$sessionExists) { return new JSONResponse(['error' => $this->l10n->t('Session not found')], Http::STATUS_NOT_FOUND); } + $message = $this->messageMapper->getMessageById($sessionId, $messageId); + $ocpTaskId = $message->getOcpTaskId(); - $this->messageMapper->deleteMessageById($messageId); + $this->messageMapper->deleteMessageById($sessionId, $messageId); + + // delete the related task + if ($ocpTaskId !== 0) { + try { + $task = $this->taskProcessingManager->getTask($ocpTaskId); + $this->taskProcessingManager->deleteTask($task); + } catch (\OCP\TaskProcessing\Exception\Exception) { + } + } return new JSONResponse(); } catch (\OCP\DB\Exception|\RuntimeException $e) { $this->logger->warning('Failed to delete a chat message', ['exception' => $e]); @@ -410,14 +492,32 @@ public function generateForSession(int $sessionId, int $agencyConfirm = 0): JSON if (class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction') && isset($this->taskProcessingManager->getAvailableTaskTypes()[\OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID]) ) { - $message = $this->messageMapper->getLastHumanMessage($sessionId); - $prompt = $message->getContent(); + $lastUserMessage = $this->messageMapper->getLastHumanMessage($sessionId); $session = $this->sessionMapper->getUserSession($this->userId, $sessionId); $lastConversationToken = $session->getAgencyConversationToken() ?? '{}'; - try { - $taskId = $this->scheduleAgencyTask($prompt, $agencyConfirm, $lastConversationToken, $sessionId); - } catch (\Exception $e) { - return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + + $lastAttachments = $lastUserMessage->jsonSerialize()['attachments']; + $audioAttachment = $lastAttachments[0] ?? null; + $audioAttachment = $audioAttachment['type'] === 'Audio' ? $audioAttachment : null; + if ($audioAttachment !== null + && class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction') + && isset($this->taskProcessingManager->getAvailableTaskTypes()[\OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID]) + ) { + // audio agency + $fileId = $audioAttachment['file_id']; + try { + $taskId = $this->scheduleAgencyAudioTask($fileId, $agencyConfirm, $lastConversationToken, $sessionId, $lastUserMessage->getId()); + } catch (\Exception $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + } + } else { + // classic agency + $prompt = $lastUserMessage->getContent(); + try { + $taskId = $this->scheduleAgencyTask($prompt, $agencyConfirm, $lastConversationToken, $sessionId); + } catch (\Exception $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + } } } else { // classic chat @@ -430,23 +530,75 @@ public function generateForSession(int $sessionId, int $agencyConfirm = 0): JSON do { $lastUserMessage = array_pop($history); } while ($lastUserMessage->getRole() !== 'human'); - // history is a list of JSON strings - $history = array_map(static function (Message $message) { - return json_encode([ - 'role' => $message->getRole(), - 'content' => $message->getContent(), - ]); - }, $history); - try { - $taskId = $this->scheduleLLMChatTask($lastUserMessage->getContent(), $systemPrompt, $history, $sessionId); - } catch (\Exception $e) { - return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + + $lastAttachments = $lastUserMessage->jsonSerialize()['attachments']; + $audioAttachment = $lastAttachments[0] ?? null; + $audioAttachment = $audioAttachment['type'] === 'Audio' ? $audioAttachment : null; + /* php 8.4 allows: + $audioAttachment = array_find($lastAttachments, static function (array $attachment) { + return $attachment['type'] === 'Audio'; + }); + */ + if ($audioAttachment !== null + && class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat') + && isset($this->taskProcessingManager->getAvailableTaskTypes()[\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID]) + ) { + // for an audio chat task, let's try to get the remote audio IDs for all the previous audio messages + $history = $this->getAudioHistory($history); + $fileId = $audioAttachment['file_id']; + try { + $taskId = $this->scheduleAudioChatTask($fileId, $systemPrompt, $history, $sessionId, $lastUserMessage->getId()); + } catch (\Exception $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + } + } else { + // for a text chat task, let's only use text in the history + $history = array_map(static function (Message $message) { + return json_encode([ + 'role' => $message->getRole(), + 'content' => $message->getContent(), + ]); + }, $history); + try { + $taskId = $this->scheduleLLMChatTask($lastUserMessage->getContent(), $systemPrompt, $history, $sessionId); + } catch (\Exception $e) { + return new JSONResponse(['error' => $e->getMessage()], Http::STATUS_BAD_REQUEST); + } } } return new JSONResponse(['taskId' => $taskId]); } + private function getAudioHistory(array $history): array { + // history is a list of JSON strings + // The content is the remote audio ID (or the transcription as fallback) + // We only use the audio ID for assistant messages, if we have one and if it's not expired + // The audio ID is found in integration_openai's AudioToAudioChat response for example + // It is an optional output of AudioToAudioChat tasks + return array_map(static function (Message $message) { + $entry = [ + 'role' => $message->getRole(), + ]; + $attachments = $message->jsonSerialize()['attachments']; + if ($message->getRole() === 'assistant' + && count($attachments) > 0 + && $attachments[0]['type'] === 'Audio' + && isset($attachments[0]['remote_audio_id']) + ) { + if (!isset($attachments[0]['remote_audio_expires_at']) + || time() < $attachments[0]['remote_audio_expires_at'] + ) { + $entry['audio'] = ['id' => $attachments[0]['remote_audio_id']]; + return json_encode($entry); + } + } + + $entry['content'] = $message->getContent(); + return json_encode($entry); + }, $history); + } + /** * Regenerate response for a message * @@ -478,13 +630,25 @@ public function regenerateForSession(int $sessionId, int $messageId): JSONRespon return new JSONResponse(['error' => $this->l10n->t('Session not found')], Http::STATUS_NOT_FOUND); } + $message = $this->messageMapper->getMessageById($sessionId, $messageId); + $ocpTaskId = $message->getOcpTaskId(); + try { - $this->messageMapper->deleteMessageById($messageId); + $this->messageMapper->deleteMessageById($sessionId, $messageId); } catch (\OCP\DB\Exception|\RuntimeException $e) { $this->logger->warning('Failed to delete the last message', ['exception' => $e]); return new JSONResponse(['error' => $this->l10n->t('Failed to delete the last message')], Http::STATUS_INTERNAL_SERVER_ERROR); } + // delete the related task + if ($ocpTaskId !== 0) { + try { + $task = $this->taskProcessingManager->getTask($ocpTaskId); + $this->taskProcessingManager->deleteTask($task); + } catch (\OCP\TaskProcessing\Exception\Exception) { + } + } + return $this->generateForSession($sessionId); } @@ -496,7 +660,6 @@ public function regenerateForSession(int $sessionId, int $messageId): JSONRespon * @param int $taskId The message generation task ID * @param int $sessionId The chat session ID * @return JSONResponse|JSONResponse|JSONResponse - * @throws DoesNotExistException * @throws MultipleObjectsReturnedException * @throws \OCP\DB\Exception * @@ -538,8 +701,11 @@ public function checkMessageGenerationTask(int $taskId, int $sessionId): JSONRes // do not insert here, it is done by the listener return new JSONResponse($jsonMessage); } catch (\OCP\DB\Exception $e) { - $this->logger->warning('Failed to add a chat message into DB', ['exception' => $e]); + $this->logger->warning('Failed to add a chat message into the DB', ['exception' => $e]); return new JSONResponse(['error' => $this->l10n->t('Failed to add a chat message into DB')], Http::STATUS_INTERNAL_SERVER_ERROR); + } catch (DoesNotExistException $e) { + $this->logger->debug('Task finished successfully but failed to find the chat message in the DB. It should be created soon.', ['exception' => $e]); + return new JSONResponse(['task_status' => $task->getstatus()], Http::STATUS_EXPECTATION_FAILED); } } elseif ($task->getstatus() === Task::STATUS_RUNNING || $task->getstatus() === Task::STATUS_SCHEDULED) { return new JSONResponse(['task_status' => $task->getstatus()], Http::STATUS_EXPECTATION_FAILED); @@ -849,4 +1015,46 @@ private function scheduleAgencyTask(string $content, int $confirmation, string $ $this->taskProcessingManager->scheduleTask($task); return $task->getId() ?? 0; } + + private function scheduleAudioChatTask( + int $audioFileId, string $systemPrompt, array $history, int $sessionId, int $queryMessageId, + ): int { + $customId = 'chatty-llm:' . $sessionId . ':' . $queryMessageId; + $this->checkIfSessionIsThinking($customId); + $input = [ + 'input' => $audioFileId, + 'system_prompt' => $systemPrompt, + 'history' => $history, + ]; + $task = new Task( + \OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID, + $input, + Application::APP_ID . ':chatty-llm', + $this->userId, + $customId, + ); + $this->taskProcessingManager->scheduleTask($task); + return $task->getId() ?? 0; + } + + private function scheduleAgencyAudioTask( + int $audioFileId, int $confirmation, string $conversationToken, int $sessionId, int $queryMessageId, + ): int { + $customId = 'chatty-llm:' . $sessionId . ':' . $queryMessageId; + $this->checkIfSessionIsThinking($customId); + $taskInput = [ + 'input' => $audioFileId, + 'confirmation' => $confirmation, + 'conversation_token' => $conversationToken, + ]; + $task = new Task( + \OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID, + $taskInput, + Application::APP_ID . ':chatty-llm', + $this->userId, + $customId + ); + $this->taskProcessingManager->scheduleTask($task); + return $task->getId() ?? 0; + } } diff --git a/lib/Db/ChattyLLM/Message.php b/lib/Db/ChattyLLM/Message.php index d2b9dba4..a25def6c 100644 --- a/lib/Db/ChattyLLM/Message.php +++ b/lib/Db/ChattyLLM/Message.php @@ -25,6 +25,8 @@ * @method \void setOcpTaskId(int $ocpTaskId) * @method \string getSources() * @method \void setSources(string $sources) + * @method \string getAttachments() + * @method \void setAttachments(string $attachments) */ class Message extends Entity implements \JsonSerializable { /** @var int */ @@ -39,6 +41,8 @@ class Message extends Entity implements \JsonSerializable { protected $ocpTaskId; /** @var string */ protected $sources; + /** @var string */ + protected $attachments; public static $columns = [ 'id', @@ -48,6 +52,7 @@ class Message extends Entity implements \JsonSerializable { 'timestamp', 'ocp_task_id', 'sources', + 'attachments', ]; public static $fields = [ 'id', @@ -57,6 +62,7 @@ class Message extends Entity implements \JsonSerializable { 'timestamp', 'ocpTaskId', 'sources', + 'attachments', ]; public function __construct() { @@ -66,6 +72,7 @@ public function __construct() { $this->addType('timestamp', Types::INTEGER); $this->addType('ocp_task_id', Types::INTEGER); $this->addType('sources', Types::STRING); + $this->addType('attachments', Types::STRING); } #[\ReturnTypeWillChange] @@ -78,6 +85,7 @@ public function jsonSerialize() { 'timestamp' => $this->timestamp, 'ocp_task_id' => $this->ocpTaskId, 'sources' => $this->sources, + 'attachments' => json_decode($this->attachments, true) ?: [], ]; } } diff --git a/lib/Db/ChattyLLM/MessageMapper.php b/lib/Db/ChattyLLM/MessageMapper.php index b2c619a7..44ff6192 100644 --- a/lib/Db/ChattyLLM/MessageMapper.php +++ b/lib/Db/ChattyLLM/MessageMapper.php @@ -12,6 +12,7 @@ use OCP\AppFramework\Db\DoesNotExistException; use OCP\AppFramework\Db\MultipleObjectsReturnedException; use OCP\AppFramework\Db\QBMapper; +use OCP\DB\Exception; use OCP\DB\QueryBuilder\IQueryBuilder; use OCP\IDBConnection; @@ -62,6 +63,19 @@ public function getLastHumanMessage(int $sessionId): Message { return $this->findEntity($qb); } + public function getLastNonEmptyHumanMessage(int $sessionId): Message { + $qb = $this->db->getQueryBuilder(); + $qb->select(Message::$columns) + ->from($this->getTableName()) + ->where($qb->expr()->eq('session_id', $qb->createPositionalParameter($sessionId, IQueryBuilder::PARAM_INT))) + ->andWhere($qb->expr()->eq('role', $qb->createPositionalParameter('human', IQueryBuilder::PARAM_STR))) + ->andWhere($qb->expr()->nonEmptyString('content')) + ->orderBy('timestamp', 'DESC') + ->setMaxResults(1); + + return $this->findEntity($qb); + } + /** * @param int $sessionId * @param int $cursor @@ -86,17 +100,37 @@ public function getMessages(int $sessionId, int $cursor, int $limit): array { } /** + * @param int $sessionId + * @param int $maxTimestamp + * @return array + * @throws Exception + */ + public function getMessagesBefore(int $sessionId, int $maxTimestamp): array { + $qb = $this->db->getQueryBuilder(); + $qb->select(Message::$columns) + ->from($this->getTableName()) + ->where($qb->expr()->eq('session_id', $qb->createPositionalParameter($sessionId, IQueryBuilder::PARAM_INT))) + ->andWhere($qb->expr()->lt('timestamp', $qb->createPositionalParameter($maxTimestamp, IQueryBuilder::PARAM_INT))) + ->orderBy('id', 'DESC'); + + $messages = $this->findEntities($qb); + return array_reverse($messages); + } + + /** + * @param int $sessionId * @param integer $messageId * @return Message - * @throws \OCP\DB\Exception - * @throws MultipleObjectsReturnedException * @throws DoesNotExistException + * @throws Exception + * @throws MultipleObjectsReturnedException */ - public function getMessageById(int $messageId): Message { + public function getMessageById(int $sessionId, int $messageId): Message { $qb = $this->db->getQueryBuilder(); $qb->select(Message::$columns) ->from($this->getTableName()) - ->where($qb->expr()->eq('id', $qb->createPositionalParameter($messageId, IQueryBuilder::PARAM_INT))); + ->where($qb->expr()->eq('id', $qb->createPositionalParameter($messageId, IQueryBuilder::PARAM_INT))) + ->andWhere($qb->expr()->eq('session_id', $qb->createPositionalParameter($sessionId, IQueryBuilder::PARAM_INT))); return $this->findEntity($qb); } @@ -134,15 +168,16 @@ public function deleteMessagesBySession(int $sessionId): void { } /** + * @param int $sessionId * @param integer $messageId - * @throws \OCP\DB\Exception - * @throws \RuntimeException * @return void + * @throws Exception */ - public function deleteMessageById(int $messageId): void { + public function deleteMessageById(int $sessionId, int $messageId): void { $qb = $this->db->getQueryBuilder(); $qb->delete($this->getTableName()) - ->where($qb->expr()->eq('id', $qb->createPositionalParameter($messageId, IQueryBuilder::PARAM_INT))); + ->where($qb->expr()->eq('id', $qb->createPositionalParameter($messageId, IQueryBuilder::PARAM_INT))) + ->andWhere($qb->expr()->eq('session_id', $qb->createPositionalParameter($sessionId, IQueryBuilder::PARAM_INT))); $qb->executeStatement(); } diff --git a/lib/Listener/BeforeTemplateRenderedListener.php b/lib/Listener/BeforeTemplateRenderedListener.php index 245c5623..7f7686c3 100644 --- a/lib/Listener/BeforeTemplateRenderedListener.php +++ b/lib/Listener/BeforeTemplateRenderedListener.php @@ -66,6 +66,9 @@ public function handle(Event $event): void { $indexingComplete = $this->appConfig->getValueInt('context_chat', 'last_indexed_time', 0) !== 0; $this->initialStateService->provideInitialState('contextChatIndexingComplete', $indexingComplete); $this->initialStateService->provideInitialState('contextAgentToolSources', $this->assistantService->informationSources); + $this->initialStateService->provideInitialState('audio_chat_available', $this->assistantService->isAudioChatAvailable()); + $autoplayAudioChat = $this->config->getUserValue($this->userId, Application::APP_ID, 'autoplay_audio_chat', '1') === '1'; + $this->initialStateService->provideInitialState('autoplay_audio_chat', $autoplayAudioChat); } if (class_exists(\OCA\Viewer\Event\LoadViewer::class)) { $this->eventDispatcher->dispatchTyped(new \OCA\Viewer\Event\LoadViewer()); diff --git a/lib/Listener/ChattyLLMTaskListener.php b/lib/Listener/ChattyLLMTaskListener.php index 0f60a5c3..47f552ef 100644 --- a/lib/Listener/ChattyLLMTaskListener.php +++ b/lib/Listener/ChattyLLMTaskListener.php @@ -13,9 +13,11 @@ use OCA\Assistant\Db\ChattyLLM\Message; use OCA\Assistant\Db\ChattyLLM\MessageMapper; use OCA\Assistant\Db\ChattyLLM\SessionMapper; +use OCA\Assistant\Service\TaskProcessingService; use OCP\EventDispatcher\Event; use OCP\EventDispatcher\IEventListener; use OCP\TaskProcessing\Events\TaskSuccessfulEvent; +use OCP\TaskProcessing\Task; use Psr\Log\LoggerInterface; /** @@ -26,6 +28,7 @@ class ChattyLLMTaskListener implements IEventListener { public function __construct( private MessageMapper $messageMapper, private SessionMapper $sessionMapper, + private TaskProcessingService $taskProcessingService, private LoggerInterface $logger, ) { } @@ -52,17 +55,58 @@ public function handle(Event $event): void { } // message generation - if (preg_match('/^chatty-llm:(\d+)$/', $customId, $matches)) { + if (preg_match('/^chatty-llm:(\d+)/', $customId, $matches)) { $sessionId = (int)$matches[1]; + $isAgency = class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction') + && $taskTypeId === \OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID; + $isAudioChat = class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat') + && $taskTypeId === \OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID; + $isAgencyAudioChat = class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction') + && $taskTypeId === \OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID; + + $taskOutput = $task->getOutput(); + $message = new Message(); $message->setSessionId($sessionId); $message->setOcpTaskId($task->getId()); $message->setRole('assistant'); - $message->setContent(trim($task->getOutput()['output'] ?? '')); $message->setTimestamp(time()); - $sources = json_encode($task->getOutput()['sources'] ?? []); - $message->setSources($sources ? $sources : '[]'); + $sources = json_encode($taskOutput['sources'] ?? []); + $message->setSources($sources ?: '[]'); + $message->setAttachments('[]'); + if ($isAudioChat || $isAgencyAudioChat) { + $outputTranscript = trim($taskOutput['output_transcript'] ?? ''); + $message->setContent($outputTranscript); + // agency might not return any output but just ask for confirmation + if ($outputTranscript !== '') { + $attachment = ['type' => 'Audio', 'file_id' => $taskOutput['output']]; + if (isset($taskOutput['audio_id'])) { + $attachment['remote_audio_id'] = $taskOutput['audio_id']; + if (isset($taskOutput['audio_expires_at'])) { + $attachment['remote_audio_expires_at'] = $taskOutput['audio_expires_at']; + } + } + $message->setAttachments(json_encode([$attachment])); + } + // now we have the transcription of the user audio input + if (preg_match('/^chatty-llm:\d+:(\d+)$/', $customId, $matches)) { + $queryMessageId = (int)$matches[1]; + $queryMessage = $this->messageMapper->getMessageById($sessionId, $queryMessageId); + $queryMessageContent = trim($taskOutput['input_transcript'] ?? ''); + $queryMessage->setContent($queryMessageContent); + $this->messageMapper->update($queryMessage); + // update session title if it's the first message + $olderMessages = $this->messageMapper->getMessagesBefore($sessionId, $queryMessage->getTimestamp()); + if (count($olderMessages) === 0) { + $this->sessionMapper->updateSessionTitle($task->getUserId(), $sessionId, $queryMessageContent); + } + } + } else { + $content = trim($taskOutput['output'] ?? ''); + $message->setContent($content); + $this->runTtsIfNeeded($sessionId, $message, $taskTypeId, $task->getUserId()); + } try { $this->messageMapper->insert($message); } catch (\OCP\DB\Exception $e) { @@ -70,15 +114,71 @@ public function handle(Event $event): void { } // store the conversation token and the actions if we are using the agency feature - if (class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction') - && $taskTypeId === \OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID) { + if ($isAgency || $isAgencyAudioChat) { $session = $this->sessionMapper->getUserSession($task->getUserId(), $sessionId); - $conversationToken = ($task->getOutput()['conversation_token'] ?? null) ?: null; - $pendingActions = ($task->getOutput()['actions'] ?? null) ?: null; + $conversationToken = ($taskOutput['conversation_token'] ?? null) ?: null; + $pendingActions = ($taskOutput['actions'] ?? null) ?: null; $session->setAgencyConversationToken($conversationToken); $session->setAgencyPendingActions($pendingActions); $this->sessionMapper->update($session); } } } + + /** + * Run TTS on the response of an agency confirmation message + * + * @param int $sessionId + * @param Message $message + * @param string $taskTypeId + * @param string|null $userId + * @return void + */ + private function runTtsIfNeeded(int $sessionId, Message $message, string $taskTypeId, ?string $userId): void { + if ($taskTypeId !== \OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID) { + return; + } + // is the last non-empty user message an audio one? + $lastNonEmptyMessage = $this->messageMapper->getLastNonEmptyHumanMessage($sessionId); + $attachments = $lastNonEmptyMessage->jsonSerialize()['attachments'] ?? []; + foreach ($attachments as $attachment) { + if ($attachment['type'] === 'Audio') { + // we found an audio attachment, response should be audio + $this->runTtsTask($message, $userId); + return; + } + } + } + + /** + * @param Message $message + * @param string|null $userId + * @return void + */ + private function runTtsTask(Message $message, ?string $userId): void { + try { + $task = new Task( + \OCP\TaskProcessing\TaskTypes\TextToSpeech::ID, + ['input' => $message->getContent()], + Application::APP_ID . ':internal', + $userId, + ); + $ttsTaskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + } catch (\OCP\TaskProcessing\Exception\Exception $e) { + $this->logger->warning('TTS sub-task failed for chat message.', [ + 'exception' => $e, + 'messageId' => $message->getId(), + ]); + return; + } + $speechFileId = $ttsTaskOutput['speech']; + // we need to set "ocp_task_id" here because the file is not an output of the task that produced the message + // and we need the task ID + the file ID to load the audio file in the frontend + $attachment = [ + 'type' => 'Audio', + 'file_id' => $speechFileId, + 'ocp_task_id' => $task->getId(), + ]; + $message->setAttachments(json_encode([$attachment])); + } } diff --git a/lib/Migration/Version020600Date20250704145036.php b/lib/Migration/Version020600Date20250704145036.php new file mode 100644 index 00000000..eaec5000 --- /dev/null +++ b/lib/Migration/Version020600Date20250704145036.php @@ -0,0 +1,43 @@ +hasTable('assistant_chat_msgs')) { + $table = $schema->getTable('assistant_chat_msgs'); + if (!$table->hasColumn('attachments')) { + $table->addColumn('attachments', Types::TEXT, [ + 'notnull' => true, + 'default' => '[]', + ]); + $schemaChanged = true; + } + } + + return $schemaChanged ? $schema : null; + } +} diff --git a/lib/ResponseDefinitions.php b/lib/ResponseDefinitions.php index 4fbceec3..51aff909 100644 --- a/lib/ResponseDefinitions.php +++ b/lib/ResponseDefinitions.php @@ -63,6 +63,7 @@ * role: string, * content: string, * timestamp: int, + * attachments: list, * ocp_task_id: int, * sources: string, * } diff --git a/lib/Service/AssistantService.php b/lib/Service/AssistantService.php index 63915339..d94b3082 100644 --- a/lib/Service/AssistantService.php +++ b/lib/Service/AssistantService.php @@ -212,6 +212,17 @@ public function cancelNotifyWhenReady(int $taskId, string $userId): void { $this->taskNotificationMapper->deleteByTaskId($taskId); } + public function isAudioChatAvailable(): bool { + $availableTaskTypes = $this->taskProcessingManager->getAvailableTaskTypes(); + // we have at least the simple audio chat task type and the 3 sub task types available + return class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat') + && array_key_exists(\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID, $availableTaskTypes) + && array_key_exists(AudioToText::ID, $availableTaskTypes) + && class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech') + && array_key_exists(\OCP\TaskProcessing\TaskTypes\TextToSpeech::ID, $availableTaskTypes) + && array_key_exists(TextToTextChat::ID, $availableTaskTypes); + } + /** * @return array */ @@ -281,6 +292,14 @@ public function getAvailableTaskTypes(): array { && $typeId === \OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID) { continue; } + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction') + && $typeId === \OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID) { + continue; + } + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat') + && $typeId === \OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID) { + continue; + } } if ($typeId === TextToTextChat::ID) { // add the chattyUI virtual task type diff --git a/lib/Service/TaskProcessingService.php b/lib/Service/TaskProcessingService.php new file mode 100644 index 00000000..b14665b9 --- /dev/null +++ b/lib/Service/TaskProcessingService.php @@ -0,0 +1,69 @@ +taskProcessingManager->runTask($task); + $taskOutput = $task->getOutput(); + if ($taskOutput === null) { + throw new RuntimeException('Task with id ' . $task->getId() . ' does not have any output'); + } + return $taskOutput; + } + + /** + * @param int $fileId + * @return string + * @throws NotFoundException + * @throws GenericFileException + * @throws NotPermittedException + * @throws LockedException + */ + public function getOutputFileContent(int $fileId): string { + $node = $this->rootFolder->getFirstNodeById($fileId); + if ($node === null) { + $node = $this->rootFolder->getFirstNodeByIdInPath($fileId, '/' . $this->rootFolder->getAppDataDirectoryName() . '/'); + if (!$node instanceof File) { + throw new NotFoundException('Node is not a file'); + } + } elseif (!$node instanceof File) { + throw new NotFoundException('Node is not a file'); + } + return $node->getContent(); + } +} diff --git a/lib/Settings/Personal.php b/lib/Settings/Personal.php index 59bc112e..ecdd92ac 100644 --- a/lib/Settings/Personal.php +++ b/lib/Settings/Personal.php @@ -38,9 +38,13 @@ public function getForm(): TemplateResponse { $taskProcessingAvailable = $this->taskProcessingManager->hasProviders(); - $freePromptTaskTypeAvailable = in_array(TextToText::ID, $availableTaskTypes); - $speechToTextAvailable = in_array(AudioToText::ID, $availableTaskTypes); - $textToImageAvailable = in_array(TextToImage::ID, $availableTaskTypes); + $freePromptTaskTypeAvailable = array_key_exists(TextToText::ID, $availableTaskTypes); + $speechToTextAvailable = array_key_exists(AudioToText::ID, $availableTaskTypes); + $textToImageAvailable = array_key_exists(TextToImage::ID, $availableTaskTypes); + + $audioChatAvailable = (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat') && array_key_exists(\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID, $availableTaskTypes)) + || (class_exists('OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction') && array_key_exists(\OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID, $availableTaskTypes)); + $autoplayAudioChat = $this->config->getUserValue($this->userId, Application::APP_ID, 'autoplay_audio_chat', '1') === '1'; $assistantAvailable = $taskProcessingAvailable && $this->appConfig->getValueString(Application::APP_ID, 'assistant_enabled', '1') === '1'; $assistantEnabled = $this->config->getUserValue($this->userId, Application::APP_ID, 'assistant_enabled', '1') === '1'; @@ -63,6 +67,8 @@ public function getForm(): TemplateResponse { 'free_prompt_picker_enabled' => $freePromptPickerEnabled, 'speech_to_text_picker_available' => $speechToTextPickerAvailable, 'speech_to_text_picker_enabled' => $speechToTextPickerEnabled, + 'audio_chat_available' => $audioChatAvailable, + 'autoplay_audio_chat' => $autoplayAudioChat, ]; $this->initialStateService->provideInitialState('config', $userConfig); diff --git a/lib/TaskProcessing/AudioToAudioChatProvider.php b/lib/TaskProcessing/AudioToAudioChatProvider.php new file mode 100644 index 00000000..b2d74a4c --- /dev/null +++ b/lib/TaskProcessing/AudioToAudioChatProvider.php @@ -0,0 +1,156 @@ +l->t('Assistant fallback'); + } + + public function getTaskTypeId(): string { + return AudioToAudioChat::ID; + } + + public function getExpectedRuntime(): int { + return 60; + } + + public function getInputShapeEnumValues(): array { + return []; + } + + public function getInputShapeDefaults(): array { + return []; + } + + + public function getOptionalInputShape(): array { + return []; + } + + public function getOptionalInputShapeEnumValues(): array { + return []; + } + + public function getOptionalInputShapeDefaults(): array { + return []; + } + + public function getOutputShapeEnumValues(): array { + return []; + } + + public function getOptionalOutputShape(): array { + return []; + } + + public function getOptionalOutputShapeEnumValues(): array { + return []; + } + + public function process(?string $userId, array $input, callable $reportProgress): array { + if (!isset($input['input']) || !$input['input'] instanceof File || !$input['input']->isReadable()) { + throw new RuntimeException('Invalid input file'); + } + $inputFile = $input['input']; + + if (!isset($input['system_prompt']) || !is_string($input['system_prompt'])) { + throw new RuntimeException('Invalid system_prompt'); + } + $systemPrompt = $input['system_prompt']; + + if (!isset($input['history']) || !is_array($input['history'])) { + throw new RuntimeException('Invalid history'); + } + /** @var list $history */ + $history = $input['history']; + + //////////////// 3 steps: STT -> LLM -> TTS + // speech to text + try { + $task = new Task( + AudioToText::ID, + ['input' => $inputFile->getId()], + Application::APP_ID . ':internal', + $userId, + ); + $taskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + $inputTranscription = $taskOutput['output']; + } catch (Exception $e) { + $this->logger->warning('Transcription task failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new RuntimeException('Transcription sub task failed with: ' . $e->getMessage()); + } + + // free prompt + try { + $task = new Task( + TextToTextChat::ID, + [ + 'input' => $inputTranscription, + 'system_prompt' => $systemPrompt, + 'history' => $history, + ], + Application::APP_ID . ':internal', + $userId, + ); + $taskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + $llmResult = $taskOutput['output']; + } catch (Exception $e) { + throw new RuntimeException('TextToText sub task failed: ' . $e->getMessage()); + } + + // text to speech + try { + $task = new Task( + TextToSpeech::ID, + ['input' => $llmResult], + Application::APP_ID . ':internal', + $userId, + ); + $taskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + $outputAudioFileId = $taskOutput['speech']; + + return [ + 'output' => $this->taskProcessingService->getOutputFileContent($outputAudioFileId), + 'output_transcript' => $llmResult, + 'input_transcript' => $inputTranscription, + ]; + } catch (\Exception $e) { + $this->logger->warning('Text to speech generation failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new RuntimeException('Text to speech sub task failed with: ' . $e->getMessage()); + } + } +} diff --git a/lib/TaskProcessing/AudioToAudioChatTaskType.php b/lib/TaskProcessing/AudioToAudioChatTaskType.php new file mode 100644 index 00000000..94b5bcd2 --- /dev/null +++ b/lib/TaskProcessing/AudioToAudioChatTaskType.php @@ -0,0 +1,92 @@ +l->t('Voice chat'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Voice chat with the assistant'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'system_prompt' => new ShapeDescriptor( + $this->l->t('System prompt'), + $this->l->t('Define rules and assumptions that the assistant should follow during the conversation.'), + EShapeType::Text, + ), + 'input' => new ShapeDescriptor( + $this->l->t('Chat voice message'), + $this->l->t('Describe a task that you want the assistant to do or ask a question'), + EShapeType::Audio, + ), + 'history' => new ShapeDescriptor( + $this->l->t('Chat history'), + $this->l->t('The history of chat messages before the current message, starting with a message by the user'), + EShapeType::ListOfTexts, + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'input_transcript' => new ShapeDescriptor( + $this->l->t('Input transcript'), + $this->l->t('Transcription of the audio input'), + EShapeType::Text, + ), + 'output' => new ShapeDescriptor( + $this->l->t('Response voice message'), + $this->l->t('The generated voice response as part of the conversation'), + EShapeType::Audio + ), + 'output_transcript' => new ShapeDescriptor( + $this->l->t('Output transcript'), + $this->l->t('Transcription of the audio output'), + EShapeType::Text, + ), + ]; + } +} diff --git a/lib/TaskProcessing/ContextAgentAudioInteractionProvider.php b/lib/TaskProcessing/ContextAgentAudioInteractionProvider.php new file mode 100644 index 00000000..5d57865a --- /dev/null +++ b/lib/TaskProcessing/ContextAgentAudioInteractionProvider.php @@ -0,0 +1,162 @@ +l->t('Assistant'); + } + + public function getTaskTypeId(): string { + return ContextAgentAudioInteraction::ID; + } + + public function getExpectedRuntime(): int { + return 60; + } + + public function getInputShapeEnumValues(): array { + return []; + } + + public function getInputShapeDefaults(): array { + return []; + } + + + public function getOptionalInputShape(): array { + return []; + } + + public function getOptionalInputShapeEnumValues(): array { + return []; + } + + public function getOptionalInputShapeDefaults(): array { + return []; + } + + public function getOutputShapeEnumValues(): array { + return []; + } + + public function getOptionalOutputShape(): array { + return []; + } + + public function getOptionalOutputShapeEnumValues(): array { + return []; + } + + public function process(?string $userId, array $input, callable $reportProgress): array { + if (!isset($input['input']) || !$input['input'] instanceof File || !$input['input']->isReadable()) { + throw new RuntimeException('Invalid input file'); + } + $inputFile = $input['input']; + + if (!isset($input['confirmation']) || !is_numeric($input['confirmation'])) { + throw new RuntimeException('Invalid confirmation'); + } + $confirmation = $input['confirmation']; + + if (!isset($input['conversation_token']) || !is_string($input['conversation_token'])) { + throw new RuntimeException('Invalid conversation_token'); + } + $conversationToken = $input['conversation_token']; + + //////////////// 3 steps: STT -> Agency -> TTS + // speech to text + try { + $task = new Task( + AudioToText::ID, + ['input' => $inputFile->getId()], + Application::APP_ID . ':internal', + $userId, + ); + $taskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + $inputTranscription = $taskOutput['output']; + } catch (Exception $e) { + $this->logger->warning('Transcription task failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new RuntimeException('Transcription sub task failed with: ' . $e->getMessage()); + } + + // context agent + try { + $task = new Task( + ContextAgentInteraction::ID, + [ + 'input' => $inputTranscription, + 'confirmation' => $confirmation, + 'conversation_token' => $conversationToken, + ], + Application::APP_ID . ':internal', + $userId, + ); + $agencyTaskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + } catch (Exception $e) { + throw new RuntimeException('Agency sub task failed: ' . $e->getMessage()); + } + + // the agent might only ask for confirmation + if ($agencyTaskOutput['output'] !== '') { + // text to speech + try { + $task = new Task( + TextToSpeech::ID, + ['input' => $agencyTaskOutput['output']], + Application::APP_ID . ':internal', + $userId, + ); + $ttsTaskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + $outputAudioFileId = $ttsTaskOutput['speech']; + $outputAudioFileContent = $this->taskProcessingService->getOutputFileContent($outputAudioFileId); + } catch (\Exception $e) { + $this->logger->warning('Text to speech generation failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new RuntimeException('Text to speech sub task failed with: ' . $e->getMessage()); + } + } else { + $outputAudioFileContent = ''; + } + + return [ + 'output' => $outputAudioFileContent, + 'output_transcript' => $agencyTaskOutput['output'], + 'input_transcript' => $inputTranscription, + 'conversation_token' => $agencyTaskOutput['conversation_token'], + 'actions' => $agencyTaskOutput['actions'], + ]; + } +} diff --git a/openapi.json b/openapi.json index 83c5d54a..71748b4e 100644 --- a/openapi.json +++ b/openapi.json @@ -49,6 +49,7 @@ "role", "content", "timestamp", + "attachments", "ocp_task_id", "sources" ], @@ -71,6 +72,25 @@ "type": "integer", "format": "int64" }, + "attachments": { + "type": "array", + "items": { + "type": "object", + "required": [ + "type", + "fileId" + ], + "properties": { + "type": { + "type": "string" + }, + "fileId": { + "type": "integer", + "format": "int64" + } + } + } + }, "ocp_task_id": { "type": "integer", "format": "int64" @@ -2479,6 +2499,27 @@ "format": "int64", "description": "Date of the message" }, + "attachments": { + "type": "array", + "nullable": true, + "description": "List of attachment objects", + "items": { + "type": "object", + "required": [ + "type", + "file_id" + ], + "properties": { + "type": { + "type": "string" + }, + "file_id": { + "type": "integer", + "format": "int64" + } + } + } + }, "firstHumanMessage": { "type": "boolean", "default": false, @@ -2832,6 +2873,122 @@ } } }, + "/ocs/v2.php/apps/assistant/chat/sessions/{sessionId}/messages/{messageId}": { + "get": { + "operationId": "chattyllm-get-message", + "summary": "Get a message", + "description": "Get a chat message in a session", + "tags": [ + "chat_api" + ], + "security": [ + { + "bearer_auth": [] + }, + { + "basic_auth": [] + } + ], + "parameters": [ + { + "name": "sessionId", + "in": "path", + "description": "The session ID", + "required": true, + "schema": { + "type": "integer", + "format": "int64" + } + }, + { + "name": "messageId", + "in": "path", + "description": "The message ID", + "required": true, + "schema": { + "type": "integer", + "format": "int64" + } + }, + { + "name": "OCS-APIRequest", + "in": "header", + "description": "Required to be true for the API request to pass", + "required": true, + "schema": { + "type": "boolean", + "default": true + } + } + ], + "responses": { + "200": { + "description": "The message has been successfully obtained", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChatMessage" + } + } + } + }, + "500": { + "description": "", + "content": { + "application/json": { + "schema": { + "type": "object", + "required": [ + "error" + ], + "properties": { + "error": { + "type": "string" + } + } + } + } + } + }, + "401": { + "description": "Not logged in", + "content": { + "application/json": { + "schema": { + "type": "object", + "required": [ + "error" + ], + "properties": { + "error": { + "type": "string" + } + } + } + } + } + }, + "404": { + "description": "The session or the message was not found", + "content": { + "application/json": { + "schema": { + "type": "object", + "required": [ + "error" + ], + "properties": { + "error": { + "type": "string" + } + } + } + } + } + } + } + } + }, "/ocs/v2.php/apps/assistant/chat/generate": { "get": { "operationId": "chattyllm-generate-for-session", diff --git a/psalm.xml b/psalm.xml index 9de193f7..11000267 100644 --- a/psalm.xml +++ b/psalm.xml @@ -38,6 +38,9 @@ + + + diff --git a/src/assistant.js b/src/assistant.js index 039261ac..8e0c6499 100644 --- a/src/assistant.js +++ b/src/assistant.js @@ -135,7 +135,10 @@ export async function openAssistantForm({ view.outputs = finishedTask?.output } } else if (finishedTask.status === TASK_STATUS_STRING.failed) { - showError(t('assistant', 'The server failed to process your task with ID {id}. Please inform the server administrators of this issue.', { id: finishedTask.id })) + showError( + t('assistant', 'The server failed to process your task with ID {id}', { id: finishedTask.id }) + + '. ' + t('assistant', 'Please inform the server administrators of this issue.'), + ) console.error('[assistant] Task failed', finishedTask) view.outputs = null } @@ -210,7 +213,10 @@ export async function openAssistantForm({ view.outputs = finishedTask?.output view.selectedTaskId = finishedTask?.id } else if (finishedTask.status === TASK_STATUS_STRING.failed) { - showError(t('assistant', 'The server failed to process your task with ID {id}. Please inform the server administrators of this issue.', { id: finishedTask.id })) + showError( + t('assistant', 'The server failed to process your task with ID {id}', { id: finishedTask.id }) + + '. ' + t('assistant', 'Please inform the server administrators of this issue.'), + ) console.error('[assistant] Task failed', finishedTask) view.outputs = null } @@ -497,7 +503,10 @@ export async function openAssistantTask( .catch(error => { view.unmount() console.error('Assistant scheduling error', error) - showError(t('assistant', 'Assistant failed to schedule your task. Please try again and inform the server administrators if this issue persists.')) + showError( + t('assistant', 'Assistant failed to schedule your task') + + '. ' + t('assistant', 'Please try again and inform the server administrators if this issue persists.'), + ) }) }) const syncSubmit = (inputs, taskTypeId, newTaskCustomId = '') => { @@ -518,7 +527,10 @@ export async function openAssistantTask( if (finishedTask.status === TASK_STATUS_STRING.successful) { view.outputs = finishedTask?.output } else if (finishedTask.status === TASK_STATUS_STRING.failed) { - showError(t('assistant', 'The server failed to process your task with ID {id}. Please inform the server administrators of this issue.', { id: finishedTask.id })) + showError( + t('assistant', 'The server failed to process your task with ID {id}', { id: finishedTask.id }) + + '. ' + t('assistant', 'Please inform the server administrators of this issue.'), + ) console.error('[assistant] Task failed', finishedTask) view.outputs = null } @@ -591,7 +603,10 @@ export async function openAssistantTask( view.outputs = finishedTask?.output view.selectedTaskId = finishedTask?.id } else if (finishedTask.status === TASK_STATUS_STRING.failed) { - showError(t('assistant', 'The server failed to process your task with ID {id}. Please inform the server administrators of this issue.', { id: finishedTask.id })) + showError( + t('assistant', 'The server failed to process your task with ID {id}', { id: finishedTask.id }) + + '. ' + t('assistant', 'Please inform the server administrators of this issue.'), + ) console.error('[assistant] Task failed', finishedTask) view.outputs = null } diff --git a/src/components/AssistantTextProcessingForm.vue b/src/components/AssistantTextProcessingForm.vue index 69e64e61..a155b9db 100644 --- a/src/components/AssistantTextProcessingForm.vue +++ b/src/components/AssistantTextProcessingForm.vue @@ -280,9 +280,11 @@ export default { hasOptionalInputOutputShape() { const taskType = this.selectedTaskType console.debug('[assistant] selected taskType', taskType) + console.debug('[assistant] selected taskType', Object.keys(taskType.optionalInputShape).length) + console.debug('[assistant] selected taskType', Object.keys(taskType.optionalOutputShape).length) if (taskType) { return (taskType.optionalInputShape && Object.keys(taskType.optionalInputShape).length > 0) - || (taskType.optionalOutputShape && Object.keys(taskType.optionalOutputShape.length) > 0) + || (taskType.optionalOutputShape && Object.keys(taskType.optionalOutputShape).length > 0) } return false }, diff --git a/src/components/ChattyLLM/ChattyLLMInputForm.vue b/src/components/ChattyLLM/ChattyLLMInputForm.vue index 5a4b05fe..876d4321 100644 --- a/src/components/ChattyLLM/ChattyLLMInputForm.vue +++ b/src/components/ChattyLLM/ChattyLLMInputForm.vue @@ -128,7 +128,8 @@ v-model:chat-content="chatContent" class="session-area__input-area" :loading="loading" - @submit="handleSubmit" /> + @submit="handleSubmit" + @submit-audio="handleSubmitAudio" /> { this.pollMessageGenerationTimerId = setInterval(() => { - if (sessionId !== this.active.id) { + if (this.active === null || sessionId !== this.active.id) { console.debug('Stop polling messages for session ' + sessionId + ' because it is not selected anymore') clearInterval(this.pollMessageGenerationTimerId) return @@ -693,6 +724,17 @@ export default { if (sessionId === this.active.id) { this.active.sessionAgencyPendingActions = responseData.sessionAgencyPendingActions this.active.agencyAnswered = false + // update content of previous message if we receive an audio message from the assistant + // or if the last human message had an audio attachment + if (this.doesLastHumanMessageHaveAudio() + || (responseData.role === Roles.ASSISTANT && responseData.attachments.find(a => a.type === SHAPE_TYPE_NAMES.Audio)) + ) { + this.updateLastHumanMessageContent() + } + if (this.autoplayAudioChat) { + // auto play fresh messages + responseData.autoPlay = true + } resolve(responseData) } else { console.debug('Ignoring received message for session ' + sessionId + ' that is not selected anymore') @@ -712,10 +754,40 @@ export default { }) }, + getLastHumanMessage() { + return this.messages + .filter(m => m.role === Roles.HUMAN) + .pop() + }, + + doesLastHumanMessageHaveAudio() { + const lastHumanMessage = this.getLastHumanMessage() + if (lastHumanMessage) { + return lastHumanMessage.attachments.find(a => a.type === SHAPE_TYPE_NAMES.Audio) + } + return false + }, + + async updateLastHumanMessageContent() { + const lastHumanMessage = this.getLastHumanMessage() + if (lastHumanMessage) { + const updatedMessage = await axios.get( + getChatURL(`/sessions/${lastHumanMessage.session_id}/messages/${lastHumanMessage.id}`), + ) + lastHumanMessage.content = updatedMessage.data.content + // update session title (just in the frontend data, the db session is updated in the backend listener) + const isFirstHumanMessage = this.messages.filter(m => m.role === Roles.HUMAN).length === 1 + if (isFirstHumanMessage) { + const session = this.sessions.find((session) => session.id === lastHumanMessage.session_id) + session.title = updatedMessage.data.content + } + } + }, + async pollTitleGenerationTask(taskId, sessionId) { return new Promise((resolve, reject) => { this.pollTitleGenerationTimerId = setInterval(() => { - if (sessionId !== this.active.id) { + if (this.active === null || sessionId !== this.active.id) { console.debug('Stop polling title for session ' + sessionId + ' because it is not selected anymore') clearInterval(this.pollTitleGenerationTimerId) return @@ -758,7 +830,7 @@ export default { // this.messages.push({ role, content, timestamp }) this.chatContent = '' this.scrollToBottom() - await this.newMessage(role, content, timestamp, this.active.id, false, confirm) + await this.newMessage(role, content, timestamp, this.active.id, null, false, confirm) }, async saveLastSelectedTaskType(taskType) { diff --git a/src/components/ChattyLLM/ConversationBox.vue b/src/components/ChattyLLM/ConversationBox.vue index 916ffa3e..c6c099bb 100644 --- a/src/components/ChattyLLM/ConversationBox.vue +++ b/src/components/ChattyLLM/ConversationBox.vue @@ -61,7 +61,7 @@ export default { }, props: { - // [{ id: number, session_id: number, role: string, content: string, timestamp: number, sources: string }] + // [{ id: number, session_id: number, role: string, content: string, timestamp: number, sources: string, attachments: array }] messages: { type: Array, default: null, diff --git a/src/components/ChattyLLM/InputArea.vue b/src/components/ChattyLLM/InputArea.vue index 94d3bf34..5f24a732 100644 --- a/src/components/ChattyLLM/InputArea.vue +++ b/src/components/ChattyLLM/InputArea.vue @@ -18,7 +18,8 @@ @update:model-value="$emit('update:chatContent', $event)" @submit="$emit('submit', $event)" /> - + @@ -34,11 +40,17 @@ diff --git a/src/components/ChattyLLM/Message.vue b/src/components/ChattyLLM/Message.vue index ca7acdf0..d2111805 100644 --- a/src/components/ChattyLLM/Message.vue +++ b/src/components/ChattyLLM/Message.vue @@ -3,7 +3,7 @@ - SPDX-License-Identifier: AGPL-3.0-or-later --> - @@ -61,6 +61,13 @@ :reference-limit="1" :references="references" :autolink="true" /> + @@ -77,11 +84,13 @@ import { NcRichText } from '@nextcloud/vue/components/NcRichText' import InformationBox from 'vue-material-design-icons/InformationBox.vue' import MessageActions from './MessageActions.vue' +import AudioDisplay from '../fields/AudioDisplay.vue' import { getCurrentUser } from '@nextcloud/auth' import { showSuccess } from '@nextcloud/dialogs' import { generateOcsUrl } from '@nextcloud/router' import axios from '@nextcloud/axios' +import { SHAPE_TYPE_NAMES } from '../../constants.js' const PLAIN_URL_PATTERN = /(?:\s|^|\()((?:https?:\/\/)(?:[-A-Z0-9+_.]+(?::[0-9]+)?(?:\/[-A-Z0-9+&@#%?=~_|!:,.;()]*)*))(?:\s|$|\))/ig const MARKDOWN_LINK_PATTERN = /\[[-A-Z0-9+&@#%?=~_|!:,.;()]+\]\(((?:https?:\/\/)(?:[-A-Z0-9+_.]+(?::[0-9]+)?(?:\/[-A-Z0-9+&@#%?=~_|!:,.;]*)*))\)/ig @@ -90,6 +99,7 @@ export default { name: 'Message', components: { + AudioDisplay, AssistantIcon, NcAvatar, @@ -155,6 +165,12 @@ export default { parsedSources = parsedSources.map((source) => this.getSourceString(source)) return [...new Set(parsedSources)] }, + hasAttachments() { + return this.message.attachments?.length > 0 + }, + audioAttachments() { + return this.message.attachments?.filter(a => a.type === SHAPE_TYPE_NAMES.Audio) ?? [] + }, }, mounted() { diff --git a/src/components/PersonalSettings.vue b/src/components/PersonalSettings.vue index 90fb230d..13afa72f 100644 --- a/src/components/PersonalSettings.vue +++ b/src/components/PersonalSettings.vue @@ -16,6 +16,13 @@ {{ t('assistant', 'Enable Nextcloud Assistant in header') }} + + + {{ t('assistant', 'Auto-play audio chat responses') }} + + diff --git a/src/components/fields/AudioDisplay.vue b/src/components/fields/AudioDisplay.vue index 97820957..787dee59 100644 --- a/src/components/fields/AudioDisplay.vue +++ b/src/components/fields/AudioDisplay.vue @@ -5,12 +5,13 @@