Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3274c7f
feat(audiochat): implement generic audio chat provider that schedules…
julien-nc Jul 3, 2025
b98e717
feat(audio-chat): record audio, store file, submit new fake message
julien-nc Jul 3, 2025
285b40d
feat(audio-chat): adjust UI to really submit, handle audio attachment…
julien-nc Jul 4, 2025
0c0ed92
feat(audio-chat): auto play new assistant audio messages
julien-nc Jul 4, 2025
2d53275
feat(audio-chat): generate openAPI specs
julien-nc Jul 7, 2025
3a706b8
feat(audio-chat): remove fallback task type, register provider only i…
julien-nc Jul 7, 2025
ffe2bcf
feat(audio-chat): filter out the audio chat task type in the assistan…
julien-nc Jul 7, 2025
7594d46
feat(audio-chat): implement ContextAgentAudioInteraction provider and…
julien-nc Jul 7, 2025
9a72e81
feat(audio-chat): add personal setting to toggle autoplay, fix person…
julien-nc Jul 8, 2025
bb6124e
feat(audio-chat): make sure attachments are always set in the entity
julien-nc Jul 8, 2025
e609d1d
feat(audio-chat): perform TTS for response after agency confirmation
julien-nc Jul 8, 2025
ba4ea60
feat(audio-chat): add support for optional remote_audio_id which we c…
julien-nc Jul 9, 2025
4cecfca
fix(chat): delete related tasks when deleting a session or a message,…
julien-nc Jul 10, 2025
cb6529a
feat(audio-chat): store and use potential remote_audio_expires_at
julien-nc Jul 10, 2025
6d85f2d
fix(audio-chat): prevent crash when trying to delete non existing task
julien-nc Jul 10, 2025
330296c
check if audio chat is available to show the record button, filter ou…
julien-nc Jul 11, 2025
4eb342f
clarify message mapper, use sessionId in deleteMessageById and getMes…
julien-nc Jul 11, 2025
891bb75
in regenerateForSession, delete task related with the deleted message
julien-nc Jul 11, 2025
003cdb3
rename initial state, mention integration_openai in comments, add mis…
julien-nc Jul 11, 2025
5cc7c59
fix typos
julien-nc Jul 11, 2025
7afb3e1
fix(audio-chat): update session title if the first message was audio,…
julien-nc Jul 11, 2025
77c2647
enh(audio-chat): add warning log if TTS of agency post-confirmation m…
julien-nc Jul 15, 2025
cf6648a
enh(audio-chat): add comments and adjust frontend error messages
julien-nc Jul 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat(audio-chat): perform TTS for response after agency confirmation
Signed-off-by: Julien Veyssier <[email protected]>
  • Loading branch information
julien-nc committed Jul 15, 2025
commit e609d1df0d1648f2a6e010f58d83d186da2daec0
12 changes: 7 additions & 5 deletions lib/Controller/ChattyLLMController.php
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ public function getSessions(): JSONResponse {
* @param string $role Role of the message (human, assistant etc...)
* @param string $content Content of the message
* @param int $timestamp Date of the message
* @param ?list<array{type: string, fileId: int}> $attachments List of attachment objects
* @param ?list<array{type: string, file_id: int}> $attachments List of attachment objects
* @param bool $firstHumanMessage Is it the first human message of the session?
* @return JSONResponse<Http::STATUS_OK, AssistantChatMessage, array{}>|JSONResponse<Http::STATUS_INTERNAL_SERVER_ERROR|Http::STATUS_UNAUTHORIZED|Http::STATUS_BAD_REQUEST|Http::STATUS_NOT_FOUND, array{error: string}, array{}>
*
Expand Down Expand Up @@ -474,7 +474,7 @@ public function generateForSession(int $sessionId, int $agencyConfirm = 0): JSON
&& isset($this->taskProcessingManager->getAvailableTaskTypes()[\OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID])
) {
// audio agency
$fileId = $audioAttachment['fileId'];
$fileId = $audioAttachment['file_id'];
try {
$taskId = $this->scheduleAgencyAudioTask($fileId, $agencyConfirm, $lastConversationToken, $sessionId, $lastUserMessage->getId());
} catch (\Exception $e) {
Expand Down Expand Up @@ -522,7 +522,7 @@ public function generateForSession(int $sessionId, int $agencyConfirm = 0): JSON
&& class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat')
&& isset($this->taskProcessingManager->getAvailableTaskTypes()[\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID])
) {
$fileId = $audioAttachment['fileId'];
$fileId = $audioAttachment['file_id'];
try {
$taskId = $this->scheduleAudioChatTask($fileId, $systemPrompt, $history, $sessionId, $lastUserMessage->getId());
} catch (\Exception $e) {
Expand Down Expand Up @@ -589,7 +589,6 @@ public function regenerateForSession(int $sessionId, int $messageId): JSONRespon
* @param int $taskId The message generation task ID
* @param int $sessionId The chat session ID
* @return JSONResponse<Http::STATUS_OK, AssistantChatAgencyMessage, array{}>|JSONResponse<Http::STATUS_EXPECTATION_FAILED, array{task_status: int}, array{}>|JSONResponse<Http::STATUS_INTERNAL_SERVER_ERROR|Http::STATUS_UNAUTHORIZED|Http::STATUS_BAD_REQUEST|Http::STATUS_NOT_FOUND, array{error: string}, array{}>
* @throws DoesNotExistException
* @throws MultipleObjectsReturnedException
* @throws \OCP\DB\Exception
*
Expand Down Expand Up @@ -631,8 +630,11 @@ public function checkMessageGenerationTask(int $taskId, int $sessionId): JSONRes
// do not insert here, it is done by the listener
return new JSONResponse($jsonMessage);
} catch (\OCP\DB\Exception $e) {
$this->logger->warning('Failed to add a chat message into DB', ['exception' => $e]);
$this->logger->warning('Failed to add a chat message into the DB', ['exception' => $e]);
return new JSONResponse(['error' => $this->l10n->t('Failed to add a chat message into DB')], Http::STATUS_INTERNAL_SERVER_ERROR);
} catch (DoesNotExistException $e) {
$this->logger->debug('Task finished successfully but failed to find the chat message in the DB. It should be created soon.', ['exception' => $e]);
return new JSONResponse(['task_status' => $task->getstatus()], Http::STATUS_EXPECTATION_FAILED);
}
} elseif ($task->getstatus() === Task::STATUS_RUNNING || $task->getstatus() === Task::STATUS_SCHEDULED) {
return new JSONResponse(['task_status' => $task->getstatus()], Http::STATUS_EXPECTATION_FAILED);
Expand Down
13 changes: 13 additions & 0 deletions lib/Db/ChattyLLM/MessageMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@ public function getLastHumanMessage(int $sessionId): Message {
return $this->findEntity($qb);
}

public function getLastNonEmptyHumanMessage(int $sessionId): Message {
$qb = $this->db->getQueryBuilder();
$qb->select(Message::$columns)
->from($this->getTableName())
->where($qb->expr()->eq('session_id', $qb->createPositionalParameter($sessionId, IQueryBuilder::PARAM_INT)))
->andWhere($qb->expr()->eq('role', $qb->createPositionalParameter('human', IQueryBuilder::PARAM_STR)))
->andWhere($qb->expr()->nonEmptyString('content'))
->orderBy('timestamp', 'DESC')
->setMaxResults(1);

return $this->findEntity($qb);
}

/**
* @param int $sessionId
* @param int $cursor
Expand Down
54 changes: 52 additions & 2 deletions lib/Listener/ChattyLLMTaskListener.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@
use OCA\Assistant\Db\ChattyLLM\Message;
use OCA\Assistant\Db\ChattyLLM\MessageMapper;
use OCA\Assistant\Db\ChattyLLM\SessionMapper;
use OCA\Assistant\Service\TaskProcessingService;
use OCP\EventDispatcher\Event;
use OCP\EventDispatcher\IEventListener;
use OCP\TaskProcessing\Events\TaskSuccessfulEvent;
use OCP\TaskProcessing\Task;
use OCP\TaskProcessing\TaskTypes\TextToSpeech;
use Psr\Log\LoggerInterface;

/**
Expand All @@ -26,6 +29,7 @@ class ChattyLLMTaskListener implements IEventListener {
public function __construct(
private MessageMapper $messageMapper,
private SessionMapper $sessionMapper,
private TaskProcessingService $taskProcessingService,
private LoggerInterface $logger,
) {
}
Expand Down Expand Up @@ -75,7 +79,7 @@ public function handle(Event $event): void {
$message->setContent($outputTranscript);
// agency might not return any output but just ask for confirmation
if ($outputTranscript !== '') {
$message->setAttachments('[{"type":"Audio","fileId":' . $task->getOutput()['output'] . '}]');
$message->setAttachments('[{"type":"Audio","file_id":' . $task->getOutput()['output'] . '}]');
}
// now we have the transcription of the user audio input
if (preg_match('/^chatty-llm:\d+:(\d+)$/', $customId, $matches)) {
Expand All @@ -85,7 +89,9 @@ public function handle(Event $event): void {
$this->messageMapper->update($queryMessage);
}
} else {
$message->setContent(trim($task->getOutput()['output'] ?? ''));
$content = trim($task->getOutput()['output'] ?? '');
$message->setContent($content);
$this->runTtsIfNeeded($sessionId, $message, $taskTypeId, $task->getUserId());
}
try {
$this->messageMapper->insert($message);
Expand All @@ -104,4 +110,48 @@ public function handle(Event $event): void {
}
}
}

/**
* Run TTS on the response of an agency confirmation message
*
* @param int $sessionId
* @param Message $message
* @param string $taskTypeId
* @param string|null $userId
* @return void
*/
private function runTtsIfNeeded(int $sessionId, Message $message, string $taskTypeId, ?string $userId): void {
if ($taskTypeId !== \OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID) {
return;
}
// is the last non-empty user message an audio one?
$lastNonEmptyMessage = $this->messageMapper->getLastNonEmptyHumanMessage($sessionId);
$attachments = $lastNonEmptyMessage->jsonSerialize()['attachments'] ?? [];
foreach ($attachments as $attachment) {
if ($attachment['type'] === 'Audio') {
// we found an audio attachment
$this->runTtsTask($message, $userId);
return;
}
}
}

/**
* @param Message $message
* @param string|null $userId
* @return void
*/
private function runTtsTask(Message $message, ?string $userId): void {
$task = new Task(
TextToSpeech::ID,
['input' => $message->getContent()],
Application::APP_ID . ':internal',
$userId,
);
$ttsTaskOutput = $this->taskProcessingService->runTaskProcessingTask($task);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a good idea will be to catch expection for runTaskProcessingTask:

private function runTtsTask(Message $message, ?string $userId): void {
    try {
        $task = new Task(/* ... */);
        $ttsTaskOutput = $this->taskProcessingService->runTaskProcessingTask($task);
        // ... set attachments
    } catch (\Exception $e) {
        $this->logger->warning('TTS sub-task failed for chat message.', [
            'exception' => $e,
            'messageId' => $message->getId(),
        ]);
    }
}

Copy link
Member Author

@julien-nc julien-nc Jul 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. If the task fails, I think we need to not throw the exception we caught to make sure we don't interrupt the rest of what happens in this listener (the last part: store the conversation token and the actions...)

$speechFileId = $ttsTaskOutput['speech'];
// we need to set "ocp_task_id" here because the file is not an output of the task that produced the message
// and we need the task ID + the file ID to load the audio file in the frontend
$message->setAttachments('[{"type":"Audio","file_id":' . $speechFileId . ',"ocp_task_id":' . $task->getId() . '}]');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public function __construct(
}

public function getId(): string {
return Application::APP_ID . '-audio2audio:chat';
return Application::APP_ID . '-contextagent:audio-interaction';
}

public function getName(): string {
Expand Down
4 changes: 2 additions & 2 deletions openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -2507,13 +2507,13 @@
"type": "object",
"required": [
"type",
"fileId"
"file_id"
],
"properties": {
"type": {
"type": "string"
},
"fileId": {
"file_id": {
"type": "integer",
"format": "int64"
}
Expand Down
2 changes: 1 addition & 1 deletion src/components/ChattyLLM/ChattyLLMInputForm.vue
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ export default {
const role = Roles.HUMAN
const content = ''
const timestamp = +new Date() / 1000 | 0
const attachments = [{ type: SHAPE_TYPE_NAMES.Audio, fileId }]
const attachments = [{ type: SHAPE_TYPE_NAMES.Audio, file_id: fileId }]

if (this.active === null) {
await this.newSession()
Expand Down
6 changes: 3 additions & 3 deletions src/components/ChattyLLM/Message.vue
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@
:references="references"
:autolink="true" />
<AudioDisplay v-for="a in audioAttachments"
:key="a.type + '-' + a.fileId"
:key="a.type + '-' + a.file_id"
class="message__content"
:autoplay="message.autoPlay"
:file-id="a.fileId"
:task-id="message.role === 'human' ? undefined : message.ocp_task_id"
:file-id="a.file_id"
:task-id="message.role === 'human' ? undefined : (a.ocp_task_id ?? message.ocp_task_id)"
:is-output="message.role === 'assistant'" />
</div>
</template>
Expand Down