From 2da3f450fa7b3b15bad62183054b5dd8da3d2bc1 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Mon, 7 Jul 2025 15:28:15 +0200 Subject: [PATCH] feat(TaskProcessing): add agency audio-to-audio task type Signed-off-by: Julien Veyssier --- lib/composer/composer/autoload_classmap.php | 1 + lib/composer/composer/autoload_static.php | 1 + lib/private/TaskProcessing/Manager.php | 1 + .../TaskTypes/AudioToAudioChat.php | 6 +- .../ContextAgentAudioInteraction.php | 118 ++++++++++++++++++ 5 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php diff --git a/lib/composer/composer/autoload_classmap.php b/lib/composer/composer/autoload_classmap.php index 32334c1d3e32c..0c97cdd2a3052 100644 --- a/lib/composer/composer/autoload_classmap.php +++ b/lib/composer/composer/autoload_classmap.php @@ -844,6 +844,7 @@ 'OCP\\TaskProcessing\\Task' => $baseDir . '/lib/public/TaskProcessing/Task.php', 'OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php', 'OCP\\TaskProcessing\\TaskTypes\\AudioToText' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/AudioToText.php', + 'OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php', 'OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextAgentInteraction.php', 'OCP\\TaskProcessing\\TaskTypes\\ContextWrite' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextWrite.php', 'OCP\\TaskProcessing\\TaskTypes\\GenerateEmoji' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/GenerateEmoji.php', diff --git a/lib/composer/composer/autoload_static.php b/lib/composer/composer/autoload_static.php index f748b36408952..294916ce970af 100644 --- a/lib/composer/composer/autoload_static.php +++ b/lib/composer/composer/autoload_static.php @@ -885,6 +885,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2 'OCP\\TaskProcessing\\Task' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/Task.php', 'OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php', 'OCP\\TaskProcessing\\TaskTypes\\AudioToText' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/AudioToText.php', + 'OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php', 'OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextAgentInteraction.php', 'OCP\\TaskProcessing\\TaskTypes\\ContextWrite' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextWrite.php', 'OCP\\TaskProcessing\\TaskTypes\\GenerateEmoji' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/GenerateEmoji.php', diff --git a/lib/private/TaskProcessing/Manager.php b/lib/private/TaskProcessing/Manager.php index c6d580a18b2b5..60e0eea8ede45 100644 --- a/lib/private/TaskProcessing/Manager.php +++ b/lib/private/TaskProcessing/Manager.php @@ -590,6 +590,7 @@ private function _getTaskTypes(): array { \OCP\TaskProcessing\TaskTypes\TextToTextProofread::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\TextToTextProofread::class), \OCP\TaskProcessing\TaskTypes\TextToSpeech::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\TextToSpeech::class), \OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::class), + \OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::class), ]; foreach ($context->getTaskProcessingTaskTypes() as $providerServiceRegistration) { diff --git a/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php b/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php index c141180f53544..c862437e86b27 100644 --- a/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php +++ b/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php @@ -16,7 +16,7 @@ use OCP\TaskProcessing\ShapeDescriptor; /** - * This is the task processing task type for text chat + * This is the task processing task type for audio chat * @since 32.0.0 */ class AudioToAudioChat implements ITaskType { @@ -75,12 +75,12 @@ public function getInputShape(): array { ), 'input' => new ShapeDescriptor( $this->l->t('Chat voice message'), - $this->l->t('Describe a task that you want the assistant to do or ask a question'), + $this->l->t('Describe a task that you want the assistant to do or ask a question.'), EShapeType::Audio ), 'history' => new ShapeDescriptor( $this->l->t('Chat history'), - $this->l->t('The history of chat messages before the current message, starting with a message by the user'), + $this->l->t('The history of chat messages before the current message, starting with a message by the user.'), EShapeType::ListOfTexts ) ]; diff --git a/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php b/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php new file mode 100644 index 0000000000000..6cd358040b716 --- /dev/null +++ b/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php @@ -0,0 +1,118 @@ +l = $l10nFactory->get('lib'); + } + + /** + * @inheritDoc + * @since 32.0.0 + */ + public function getName(): string { + return 'ContextAgent audio'; // We do not translate this + } + + /** + * @inheritDoc + * @since 32.0.0 + */ + public function getDescription(): string { + return $this->l->t('Chat by voice with an agent'); + } + + /** + * @return string + * @since 32.0.0 + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + * @since 32.0.0 + */ + public function getInputShape(): array { + return [ + 'input' => new ShapeDescriptor( + $this->l->t('Chat voice message'), + $this->l->t('Describe a task that you want the agent to do or ask a question.'), + EShapeType::Audio + ), + 'confirmation' => new ShapeDescriptor( + $this->l->t('Confirmation'), + $this->l->t('Whether to confirm previously requested actions: 0 for denial and 1 for confirmation.'), + EShapeType::Number + ), + 'conversation_token' => new ShapeDescriptor( + $this->l->t('Conversation token'), + $this->l->t('A token representing the conversation.'), + EShapeType::Text + ), + ]; + } + + /** + * @return ShapeDescriptor[] + * @since 32.0.0 + */ + public function getOutputShape(): array { + return [ + 'input_transcript' => new ShapeDescriptor( + $this->l->t('Input transcript'), + $this->l->t('Transcription of the audio input'), + EShapeType::Text, + ), + 'output' => new ShapeDescriptor( + $this->l->t('Response voice message'), + $this->l->t('The generated voice response as part of the conversation'), + EShapeType::Audio + ), + 'output_transcript' => new ShapeDescriptor( + $this->l->t('Output transcript'), + $this->l->t('Transcription of the audio output'), + EShapeType::Text, + ), + 'conversation_token' => new ShapeDescriptor( + $this->l->t('The new conversation token'), + $this->l->t('Send this along with the next interaction.'), + EShapeType::Text + ), + 'actions' => new ShapeDescriptor( + $this->l->t('Requested actions by the agent'), + $this->l->t('Actions that the agent would like to carry out in JSON format.'), + EShapeType::Text + ), + ]; + } +}