diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 7bc1dd37..1ce03b9c 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -42,7 +42,7 @@ jobs:
matrix:
php-versions: [ '8.1' ]
databases: [ 'sqlite' ]
- server-versions: [ 'master', 'stable28', 'stable29' ]
+ server-versions: [ 'master' ]
name: Integration test on ${{ matrix.server-versions }} php@${{ matrix.php-versions }}
@@ -123,7 +123,7 @@ jobs:
php -S localhost:8080 &
- name: Enable context_chat and app_api
- run: ./occ app:enable -vvv -f context_chat app_api
+ run: ./occ app:enable -vvv -f context_chat app_api testing
- name: Checkout documentation
uses: actions/checkout@v4
@@ -166,6 +166,8 @@ jobs:
- name: Run the prompts
run: |
+ ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' &
+ ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' &
./occ context_chat:prompt admin "Which factors are taken into account for the Ethical AI Rating?"
./occ context_chat:prompt admin "Welche Faktoren beeinflussen das Ethical AI Rating?"
diff --git a/appinfo/info.xml b/appinfo/info.xml
index e6c71b20..38ecbd19 100644
--- a/appinfo/info.xml
+++ b/appinfo/info.xml
@@ -25,7 +25,7 @@ Install the given apps for Context Chat to work as desired **in the given order*
https://github.com/nextcloud/context_chat_backend/issues
https://github.com/nextcloud/context_chat_backend.git
-
+
diff --git a/config.cpu.yaml b/config.cpu.yaml
index 9bc217bf..4bb73c40 100644
--- a/config.cpu.yaml
+++ b/config.cpu.yaml
@@ -40,6 +40,8 @@ embedding:
device: cpu
llm:
+ nc_texttotext:
+
llama:
model_path: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
n_batch: 512
diff --git a/config.gpu.yaml b/config.gpu.yaml
index a4ea45ab..e8cff474 100644
--- a/config.gpu.yaml
+++ b/config.gpu.yaml
@@ -40,6 +40,8 @@ embedding:
device: cuda
llm:
+ nc_texttotext:
+
llama:
model_path: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
n_batch: 512
diff --git a/context_chat_backend/chain/query_proc.py b/context_chat_backend/chain/query_proc.py
index a5db2dc3..b6ebfb87 100644
--- a/context_chat_backend/chain/query_proc.py
+++ b/context_chat_backend/chain/query_proc.py
@@ -25,7 +25,7 @@ def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, text_
or llm_config.get('config', {}).get('max_new_tokens') \
or max(
llm_config.get('pipeline_kwargs', {}).get('config', {}).get('max_new_tokens', 0),
- llm_config.get('pipeline_kwargs', {}).get('config', {}).get('max_length')
+ llm_config.get('pipeline_kwargs', {}).get('config', {}).get('max_length', 0)
) \
or 4096
diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py
index c820df31..0b67bda3 100644
--- a/context_chat_backend/controller.py
+++ b/context_chat_backend/controller.py
@@ -12,6 +12,7 @@
from .config_parser import get_config
from .download import background_init, ensure_models
from .dyn_loader import EmbeddingModelLoader, LLMModelLoader, LoaderException, VectorDBLoader
+from .models import LlmException
from .ocs_utils import AppAPIAuthMiddleware
from .setup_functions import ensure_config_file, repair_run, setup_env_vars
from .utils import JSONResponse, enabled_guard, update_progress, value_of
@@ -105,6 +106,11 @@ async def _(request: Request, exc: ValueError):
return JSONResponse(str(exc), 400)
+@app.exception_handler(LlmException)
+async def _(request: Request, exc: LlmException):
+ log_error(f'Llm Error: {request.url.path}:', exc)
+ return JSONResponse(str(exc), 400)
+
# routes
@app.get('/')
diff --git a/context_chat_backend/download.py b/context_chat_backend/download.py
index 090e79d6..e90362cb 100644
--- a/context_chat_backend/download.py
+++ b/context_chat_backend/download.py
@@ -202,7 +202,8 @@ def background_init(app: FastAPI):
for model_type in ('embedding', 'llm'):
model_name = _get_model_name_or_path(config, model_type)
if model_name is None:
- raise Exception(f'Error: Model name/path not found for {model_type}')
+ update_progress(app, progress := progress + 50)
+ continue
if not _download_model(model_name):
raise Exception(f'Error: Model download failed for {model_name}')
@@ -220,7 +221,7 @@ def ensure_models(app: FastAPI) -> bool:
for model_type in ('embedding', 'llm'):
model_name = _get_model_name_or_path(app.extra['CONFIG'], model_type)
if model_name is None:
- return False
+ return True
if not _model_exists(model_name):
return False
diff --git a/context_chat_backend/models/__init__.py b/context_chat_backend/models/__init__.py
index 5bee22be..33b1c254 100644
--- a/context_chat_backend/models/__init__.py
+++ b/context_chat_backend/models/__init__.py
@@ -4,53 +4,56 @@
from langchain.llms.base import LLM
from langchain.schema.embeddings import Embeddings
-_embedding_models = ['llama', 'hugging_face', 'instructor']
-_llm_models = ['llama', 'hugging_face', 'ctransformer']
+_embedding_models = ["llama", "hugging_face", "instructor"]
+_llm_models = ["nc_texttotext", "llama", "hugging_face", "ctransformer"]
models = {
- 'embedding': _embedding_models,
- 'llm': _llm_models,
+ "embedding": _embedding_models,
+ "llm": _llm_models,
}
-__all__ = ['init_model', 'load_model', 'models']
+__all__ = ["init_model", "load_model", "models", "LlmException"]
def load_model(model_type: str, model_info: tuple[str, dict]) -> Embeddings | LLM | None:
- model_name, model_config = model_info
+ model_name, model_config = model_info
- try:
- module = import_module(f'.{model_name}', 'context_chat_backend.models')
- except Exception as e:
- raise AssertionError(f'Error: could not load {model_name} model from context_chat_backend/models') from e
+ try:
+ module = import_module(f".{model_name}", "context_chat_backend.models")
+ except Exception as e:
+ raise AssertionError(f"Error: could not load {model_name} model from context_chat_backend/models") from e
- if module is None or not hasattr(module, 'get_model_for'):
- raise AssertionError(f'Error: could not load {model_name} model')
+ if module is None or not hasattr(module, "get_model_for"):
+ raise AssertionError(f"Error: could not load {model_name} model")
- get_model_for = module.get_model_for
+ get_model_for = module.get_model_for
- if not isinstance(get_model_for, Callable):
- raise AssertionError(f'Error: {model_name} does not have a valid loader function')
+ if not isinstance(get_model_for, Callable):
+ raise AssertionError(f"Error: {model_name} does not have a valid loader function")
- return get_model_for(model_type, model_config)
+ return get_model_for(model_type, model_config)
def init_model(model_type: str, model_info: tuple[str, dict]):
- '''
- Initializes a given model. This function assumes that the model is implemented in a module with
- the same name as the model in the models dir.
- '''
- model_name, _ = model_info
- available_models = models.get(model_type, [])
+ """
+ Initializes a given model. This function assumes that the model is implemented in a module with
+ the same name as the model in the models dir.
+ """
+ model_name, _ = model_info
+ available_models = models.get(model_type, [])
- if model_name not in available_models:
- raise AssertionError(f'Error: {model_type}_model should be one of {available_models}')
+ if model_name not in available_models:
+ raise AssertionError(f"Error: {model_type}_model should be one of {available_models}")
- try:
- model = load_model(model_type, model_info)
- except Exception as e:
- raise AssertionError(f'Error: {model_name} failed to load') from e
+ try:
+ model = load_model(model_type, model_info)
+ except Exception as e:
+ raise AssertionError(f"Error: {model_name} failed to load") from e
- if model_type == 'llm' and not isinstance(model, LLM):
- raise AssertionError(f'Error: {model} does not implement "llm" type or has returned an invalid object')
+ if model_type == "llm" and not isinstance(model, LLM):
+ raise AssertionError(f'Error: {model} does not implement "llm" type or has returned an invalid object')
- return model
+ return model
+
+
+class LlmException(Exception): ...
diff --git a/context_chat_backend/models/ctransformer.py b/context_chat_backend/models/ctransformer.py
index c9f693b6..f75fafb2 100644
--- a/context_chat_backend/models/ctransformer.py
+++ b/context_chat_backend/models/ctransformer.py
@@ -4,16 +4,16 @@
def get_model_for(model_type: str, model_config: dict):
- model_dir = getenv('MODEL_DIR', 'persistent_storage/model_files')
- if str(model_config.get('model')).startswith('/'):
- model_dir = ''
+ model_dir = getenv("MODEL_DIR", "persistent_storage/model_files")
+ if str(model_config.get("model")).startswith("/"):
+ model_dir = ""
- model_path = path.join(model_dir, model_config.get('model', ''))
+ model_path = path.join(model_dir, model_config.get("model", ""))
- if model_config is None:
- return None
+ if model_config is None:
+ return None
- if model_type == 'llm':
- return CTransformers(**{ **model_config, 'model': model_path })
+ if model_type == "llm":
+ return CTransformers(**{**model_config, "model": model_path})
- return None
+ return None
diff --git a/context_chat_backend/models/hugging_face.py b/context_chat_backend/models/hugging_face.py
index 54888807..651936d2 100644
--- a/context_chat_backend/models/hugging_face.py
+++ b/context_chat_backend/models/hugging_face.py
@@ -5,22 +5,22 @@
def get_model_for(model_type: str, model_config: dict):
- if model_config.get('model_path') is not None:
- model_dir = getenv('MODEL_DIR', 'persistent_storage/model_files')
- if str(model_config.get('model_path')).startswith('/'):
- model_dir = ''
+ if model_config.get("model_path") is not None:
+ model_dir = getenv("MODEL_DIR", "persistent_storage/model_files")
+ if str(model_config.get("model_path")).startswith("/"):
+ model_dir = ""
- model_path = path.join(model_dir, model_config.get('model_path', ''))
- else:
- model_path = model_config.get('model_id', '')
+ model_path = path.join(model_dir, model_config.get("model_path", ""))
+ else:
+ model_path = model_config.get("model_id", "")
- if model_config is None:
- return None
+ if model_config is None:
+ return None
- if model_type == 'embedding':
- return HuggingFaceEmbeddings(**model_config)
+ if model_type == "embedding":
+ return HuggingFaceEmbeddings(**model_config)
- if model_type == 'llm':
- return HuggingFacePipeline.from_model_id(**{ **model_config, 'model_id': model_path })
+ if model_type == "llm":
+ return HuggingFacePipeline.from_model_id(**{**model_config, "model_id": model_path})
- return None
+ return None
diff --git a/context_chat_backend/models/instructor.py b/context_chat_backend/models/instructor.py
index e55ed6dd..f6a01ff2 100644
--- a/context_chat_backend/models/instructor.py
+++ b/context_chat_backend/models/instructor.py
@@ -2,10 +2,10 @@
def get_model_for(model_type: str, model_config: dict):
- if model_config is None:
- return None
+ if model_config is None:
+ return None
- if model_type == 'embedding':
- return HuggingFaceInstructEmbeddings(**model_config)
+ if model_type == "embedding":
+ return HuggingFaceInstructEmbeddings(**model_config)
- return None
+ return None
diff --git a/context_chat_backend/models/llama.py b/context_chat_backend/models/llama.py
index 25cc8177..1cbcd8f3 100644
--- a/context_chat_backend/models/llama.py
+++ b/context_chat_backend/models/llama.py
@@ -5,19 +5,19 @@
def get_model_for(model_type: str, model_config: dict):
- model_dir = getenv('MODEL_DIR', 'persistent_storage/model_files')
- if str(model_config.get('model_path')).startswith('/'):
- model_dir = ''
+ model_dir = getenv("MODEL_DIR", "persistent_storage/model_files")
+ if str(model_config.get("model_path")).startswith("/"):
+ model_dir = ""
- model_path = path.join(model_dir, model_config.get('model_path', ''))
+ model_path = path.join(model_dir, model_config.get("model_path", ""))
- if model_config is None:
- return None
+ if model_config is None:
+ return None
- if model_type == 'embedding':
- return LlamaCppEmbeddings(**{ **model_config, 'model_path': model_path })
+ if model_type == "embedding":
+ return LlamaCppEmbeddings(**{**model_config, "model_path": model_path})
- if model_type == 'llm':
- return LlamaCpp(**{ **model_config, 'model_path': model_path })
+ if model_type == "llm":
+ return LlamaCpp(**{**model_config, "model_path": model_path})
- return None
+ return None
diff --git a/context_chat_backend/models/nc_texttotext.py b/context_chat_backend/models/nc_texttotext.py
new file mode 100644
index 00000000..4b609cca
--- /dev/null
+++ b/context_chat_backend/models/nc_texttotext.py
@@ -0,0 +1,100 @@
+import json
+import time
+from typing import Any
+
+from langchain_core.callbacks.manager import CallbackManagerForLLMRun
+from langchain_core.language_models.llms import LLM
+from nc_py_api import Nextcloud
+from pydantic import BaseModel, ValidationError
+
+from context_chat_backend.models import LlmException
+
+
+def get_model_for(model_type: str, model_config: dict):
+ if model_config is None:
+ return None
+
+ if model_type == "llm":
+ return CustomLLM()
+
+ return None
+
+
+class Task(BaseModel):
+ id: int
+ status: str
+ output: dict[str, str] | None = None
+
+
+class CustomLLM(LLM):
+ """A custom chat model that queries Nextcloud's TextToText provider"""
+
+ def _call(
+ self,
+ prompt: str,
+ stop: list[str] | None = None,
+ run_manager: CallbackManagerForLLMRun | None = None,
+ **kwargs: Any,
+ ) -> str:
+ """Run the LLM on the given input.
+
+ Override this method to implement the LLM logic.
+
+ Args:
+ prompt: The prompt to generate from.
+ stop: Stop words to use when generating. Model output is cut off at the
+ first occurrence of any of the stop substrings.
+ If stop tokens are not supported consider raising NotImplementedError.
+ run_manager: Callback manager for the run.
+ **kwargs: Arbitrary additional keyword arguments. These are usually passed
+ to the model provider API call.
+
+ Returns:
+ The model output as a string. Actual completions SHOULD NOT include the prompt.
+ """
+ nc = Nextcloud()
+
+ print(json.dumps(prompt))
+
+ response = nc.ocs(
+ "POST",
+ "/ocs/v1.php/taskprocessing/schedule",
+ json={"type": "core:text2text", "appId": "context_chat_backend", "input": {"input": prompt}},
+ )
+
+ try:
+ task = Task.model_validate(response["task"])
+
+ print(task)
+
+ i = 0
+ # wait for 30 minutes
+ while task.status != "STATUS_SUCCESSFUL" and task.status != "STATUS_FAILED" and i < 60 * 6:
+ time.sleep(5)
+ i += 1
+ response = nc.ocs("GET", f"/ocs/v1.php/taskprocessing/task/{task.id}")
+ task = Task.model_validate(response["task"])
+ print(task)
+ except ValidationError as e:
+ raise LlmException("Failed to parse Nextcloud TaskProcessing task result") from e
+
+ if task.status != "STATUS_SUCCESSFUL":
+ raise LlmException("Nextcloud TaskProcessing Task failed")
+
+ return task.output["output"]
+
+ @property
+ def _identifying_params(self) -> dict[str, Any]:
+ """Return a dictionary of identifying parameters."""
+ return {
+ # The model name allows users to specify custom token counting
+ # rules in LLM monitoring applications (e.g., in LangSmith users
+ # can provide per token pricing for their model and monitor
+ # costs for the given LLM.)
+ "model_name": "NextcloudTextToTextProvider",
+ }
+
+ @property
+ def _llm_type(self) -> str:
+ """Get the type of language model used by this chat model. Used for logging purposes only."""
+ return "nc_texttotetx"
diff --git a/requirements.in.txt b/requirements.in.txt
index 008ee8d9..a5169b9e 100644
--- a/requirements.in.txt
+++ b/requirements.in.txt
@@ -29,3 +29,4 @@ unstructured @ git+https://github.com/kyteinsky/unstructured@d3a404cfb541dae8e16
unstructured-client
weaviate-client
xlrd
+nc_py_api
diff --git a/requirements.txt b/requirements.txt
index 3148a878..f644a6ba 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -82,6 +82,7 @@ mpmath==1.3.0
msg-parser==1.2.0
multidict==6.0.5
mypy-extensions==1.0.0
+nc-py-api==0.14.0
nest-asyncio==1.6.0
networkx==3.3
nltk==3.8.1
@@ -189,5 +190,6 @@ websockets==12.0
wrapt==1.16.0
xlrd==2.0.1
XlsxWriter==3.2.0
+xmltodict==0.13.0
yarl==1.9.4
zipp==3.19.2