Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/composer/composer/autoload_classmap.php
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,7 @@
'OCP\\TaskProcessing\\ShapeDescriptor' => $baseDir . '/lib/public/TaskProcessing/ShapeDescriptor.php',
'OCP\\TaskProcessing\\ShapeEnumValue' => $baseDir . '/lib/public/TaskProcessing/ShapeEnumValue.php',
'OCP\\TaskProcessing\\Task' => $baseDir . '/lib/public/TaskProcessing/Task.php',
'OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php',
'OCP\\TaskProcessing\\TaskTypes\\AudioToText' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/AudioToText.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextAgentInteraction.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextWrite' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextWrite.php',
Expand Down
1 change: 1 addition & 0 deletions lib/composer/composer/autoload_static.php
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2
'OCP\\TaskProcessing\\ShapeDescriptor' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/ShapeDescriptor.php',
'OCP\\TaskProcessing\\ShapeEnumValue' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/ShapeEnumValue.php',
'OCP\\TaskProcessing\\Task' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/Task.php',
'OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php',
'OCP\\TaskProcessing\\TaskTypes\\AudioToText' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/AudioToText.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextAgentInteraction.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextWrite' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextWrite.php',
Expand Down
1 change: 1 addition & 0 deletions lib/private/TaskProcessing/Manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ private function _getTaskTypes(): array {
\OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\ContextAgentInteraction::class),
\OCP\TaskProcessing\TaskTypes\TextToTextProofread::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\TextToTextProofread::class),
\OCP\TaskProcessing\TaskTypes\TextToSpeech::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\TextToSpeech::class),
\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::class),
];

foreach ($context->getTaskProcessingTaskTypes() as $providerServiceRegistration) {
Expand Down
112 changes: 112 additions & 0 deletions lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
<?php

declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCP\TaskProcessing\TaskTypes;

use OCP\IL10N;
use OCP\L10N\IFactory;
use OCP\TaskProcessing\EShapeType;
use OCP\TaskProcessing\ITaskType;
use OCP\TaskProcessing\ShapeDescriptor;

/**
* This is the task processing task type for text chat
* @since 32.0.0
*/
class AudioToAudioChat implements ITaskType {
/**
* @since 32.0.0
*/
public const ID = 'core:audio2audio:chat';

private IL10N $l;

/**
* @param IFactory $l10nFactory
* @since 32.0.0
*/
public function __construct(
IFactory $l10nFactory,
) {
$this->l = $l10nFactory->get('lib');
}


/**
* @inheritDoc
* @since 32.0.0
*/
public function getName(): string {
return $this->l->t('Audio chat');
}

/**
* @inheritDoc
* @since 32.0.0
*/
public function getDescription(): string {
return $this->l->t('Voice chat with the assistant');
}

/**
* @return string
* @since 32.0.0
*/
public function getId(): string {
return self::ID;
}

/**
* @return ShapeDescriptor[]
* @since 32.0.0
*/
public function getInputShape(): array {
return [
'system_prompt' => new ShapeDescriptor(
$this->l->t('System prompt'),
$this->l->t('Define rules and assumptions that the assistant should follow during the conversation.'),
EShapeType::Text
),
'input' => new ShapeDescriptor(
$this->l->t('Chat voice message'),
$this->l->t('Describe a task that you want the assistant to do or ask a question'),
EShapeType::Audio
),
'history' => new ShapeDescriptor(
$this->l->t('Chat history'),
$this->l->t('The history of chat messages before the current message, starting with a message by the user'),
EShapeType::ListOfTexts
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where do we get the texts from?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the chat conversation history in the chat UI.
Ideally we need a mixed list of text and audio but since we will get the transcription of audio responses (as optional output), my plan is to store that in a chat message so the history can be text-only for now. Wdyt?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. But if the transcription is an optional output, we may not get it when other providers implement this task type. Do we schedule another transcription task then?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so, yes.
I even think the text output could be mandatory and part of the task type output shape (because we need it for the history). Wdyt?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was thinking that as well, let's make it mandatory

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The input transcription is optional though. Or is it? We need it too for the history.

)
];
}

/**
* @return ShapeDescriptor[]
* @since 32.0.0
*/
public function getOutputShape(): array {
return [
'input_transcript' => new ShapeDescriptor(
$this->l->t('Input transcript'),
$this->l->t('Transcription of the audio input'),
EShapeType::Text,
),
'output' => new ShapeDescriptor(
$this->l->t('Response voice message'),
$this->l->t('The generated voice response as part of the conversation'),
EShapeType::Audio
),
'output_transcript' => new ShapeDescriptor(
$this->l->t('Output transcript'),
$this->l->t('Transcription of the audio output'),
EShapeType::Text,
),
];
}
}
Loading