diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 7bc1dd37..1ce03b9c 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -42,7 +42,7 @@ jobs: matrix: php-versions: [ '8.1' ] databases: [ 'sqlite' ] - server-versions: [ 'master', 'stable28', 'stable29' ] + server-versions: [ 'master' ] name: Integration test on ${{ matrix.server-versions }} php@${{ matrix.php-versions }} @@ -123,7 +123,7 @@ jobs: php -S localhost:8080 & - name: Enable context_chat and app_api - run: ./occ app:enable -vvv -f context_chat app_api + run: ./occ app:enable -vvv -f context_chat app_api testing - name: Checkout documentation uses: actions/checkout@v4 @@ -166,6 +166,8 @@ jobs: - name: Run the prompts run: | + ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' & + ./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' & ./occ context_chat:prompt admin "Which factors are taken into account for the Ethical AI Rating?" ./occ context_chat:prompt admin "Welche Faktoren beeinflussen das Ethical AI Rating?" diff --git a/appinfo/info.xml b/appinfo/info.xml index e6c71b20..38ecbd19 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -25,7 +25,7 @@ Install the given apps for Context Chat to work as desired **in the given order* https://github.com/nextcloud/context_chat_backend/issues https://github.com/nextcloud/context_chat_backend.git - + diff --git a/config.cpu.yaml b/config.cpu.yaml index 9bc217bf..4bb73c40 100644 --- a/config.cpu.yaml +++ b/config.cpu.yaml @@ -40,6 +40,8 @@ embedding: device: cpu llm: + nc_texttotext: + llama: model_path: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf n_batch: 512 diff --git a/config.gpu.yaml b/config.gpu.yaml index a4ea45ab..e8cff474 100644 --- a/config.gpu.yaml +++ b/config.gpu.yaml @@ -40,6 +40,8 @@ embedding: device: cuda llm: + nc_texttotext: + llama: model_path: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf n_batch: 512 diff --git a/context_chat_backend/chain/query_proc.py b/context_chat_backend/chain/query_proc.py index a5db2dc3..b6ebfb87 100644 --- a/context_chat_backend/chain/query_proc.py +++ b/context_chat_backend/chain/query_proc.py @@ -25,7 +25,7 @@ def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, text_ or llm_config.get('config', {}).get('max_new_tokens') \ or max( llm_config.get('pipeline_kwargs', {}).get('config', {}).get('max_new_tokens', 0), - llm_config.get('pipeline_kwargs', {}).get('config', {}).get('max_length') + llm_config.get('pipeline_kwargs', {}).get('config', {}).get('max_length', 0) ) \ or 4096 diff --git a/context_chat_backend/controller.py b/context_chat_backend/controller.py index c820df31..0b67bda3 100644 --- a/context_chat_backend/controller.py +++ b/context_chat_backend/controller.py @@ -12,6 +12,7 @@ from .config_parser import get_config from .download import background_init, ensure_models from .dyn_loader import EmbeddingModelLoader, LLMModelLoader, LoaderException, VectorDBLoader +from .models import LlmException from .ocs_utils import AppAPIAuthMiddleware from .setup_functions import ensure_config_file, repair_run, setup_env_vars from .utils import JSONResponse, enabled_guard, update_progress, value_of @@ -105,6 +106,11 @@ async def _(request: Request, exc: ValueError): return JSONResponse(str(exc), 400) +@app.exception_handler(LlmException) +async def _(request: Request, exc: LlmException): + log_error(f'Llm Error: {request.url.path}:', exc) + return JSONResponse(str(exc), 400) + # routes @app.get('/') diff --git a/context_chat_backend/download.py b/context_chat_backend/download.py index 090e79d6..e90362cb 100644 --- a/context_chat_backend/download.py +++ b/context_chat_backend/download.py @@ -202,7 +202,8 @@ def background_init(app: FastAPI): for model_type in ('embedding', 'llm'): model_name = _get_model_name_or_path(config, model_type) if model_name is None: - raise Exception(f'Error: Model name/path not found for {model_type}') + update_progress(app, progress := progress + 50) + continue if not _download_model(model_name): raise Exception(f'Error: Model download failed for {model_name}') @@ -220,7 +221,7 @@ def ensure_models(app: FastAPI) -> bool: for model_type in ('embedding', 'llm'): model_name = _get_model_name_or_path(app.extra['CONFIG'], model_type) if model_name is None: - return False + return True if not _model_exists(model_name): return False diff --git a/context_chat_backend/models/__init__.py b/context_chat_backend/models/__init__.py index 5bee22be..33b1c254 100644 --- a/context_chat_backend/models/__init__.py +++ b/context_chat_backend/models/__init__.py @@ -4,53 +4,56 @@ from langchain.llms.base import LLM from langchain.schema.embeddings import Embeddings -_embedding_models = ['llama', 'hugging_face', 'instructor'] -_llm_models = ['llama', 'hugging_face', 'ctransformer'] +_embedding_models = ["llama", "hugging_face", "instructor"] +_llm_models = ["nc_texttotext", "llama", "hugging_face", "ctransformer"] models = { - 'embedding': _embedding_models, - 'llm': _llm_models, + "embedding": _embedding_models, + "llm": _llm_models, } -__all__ = ['init_model', 'load_model', 'models'] +__all__ = ["init_model", "load_model", "models", "LlmException"] def load_model(model_type: str, model_info: tuple[str, dict]) -> Embeddings | LLM | None: - model_name, model_config = model_info + model_name, model_config = model_info - try: - module = import_module(f'.{model_name}', 'context_chat_backend.models') - except Exception as e: - raise AssertionError(f'Error: could not load {model_name} model from context_chat_backend/models') from e + try: + module = import_module(f".{model_name}", "context_chat_backend.models") + except Exception as e: + raise AssertionError(f"Error: could not load {model_name} model from context_chat_backend/models") from e - if module is None or not hasattr(module, 'get_model_for'): - raise AssertionError(f'Error: could not load {model_name} model') + if module is None or not hasattr(module, "get_model_for"): + raise AssertionError(f"Error: could not load {model_name} model") - get_model_for = module.get_model_for + get_model_for = module.get_model_for - if not isinstance(get_model_for, Callable): - raise AssertionError(f'Error: {model_name} does not have a valid loader function') + if not isinstance(get_model_for, Callable): + raise AssertionError(f"Error: {model_name} does not have a valid loader function") - return get_model_for(model_type, model_config) + return get_model_for(model_type, model_config) def init_model(model_type: str, model_info: tuple[str, dict]): - ''' - Initializes a given model. This function assumes that the model is implemented in a module with - the same name as the model in the models dir. - ''' - model_name, _ = model_info - available_models = models.get(model_type, []) + """ + Initializes a given model. This function assumes that the model is implemented in a module with + the same name as the model in the models dir. + """ + model_name, _ = model_info + available_models = models.get(model_type, []) - if model_name not in available_models: - raise AssertionError(f'Error: {model_type}_model should be one of {available_models}') + if model_name not in available_models: + raise AssertionError(f"Error: {model_type}_model should be one of {available_models}") - try: - model = load_model(model_type, model_info) - except Exception as e: - raise AssertionError(f'Error: {model_name} failed to load') from e + try: + model = load_model(model_type, model_info) + except Exception as e: + raise AssertionError(f"Error: {model_name} failed to load") from e - if model_type == 'llm' and not isinstance(model, LLM): - raise AssertionError(f'Error: {model} does not implement "llm" type or has returned an invalid object') + if model_type == "llm" and not isinstance(model, LLM): + raise AssertionError(f'Error: {model} does not implement "llm" type or has returned an invalid object') - return model + return model + + +class LlmException(Exception): ... diff --git a/context_chat_backend/models/ctransformer.py b/context_chat_backend/models/ctransformer.py index c9f693b6..f75fafb2 100644 --- a/context_chat_backend/models/ctransformer.py +++ b/context_chat_backend/models/ctransformer.py @@ -4,16 +4,16 @@ def get_model_for(model_type: str, model_config: dict): - model_dir = getenv('MODEL_DIR', 'persistent_storage/model_files') - if str(model_config.get('model')).startswith('/'): - model_dir = '' + model_dir = getenv("MODEL_DIR", "persistent_storage/model_files") + if str(model_config.get("model")).startswith("/"): + model_dir = "" - model_path = path.join(model_dir, model_config.get('model', '')) + model_path = path.join(model_dir, model_config.get("model", "")) - if model_config is None: - return None + if model_config is None: + return None - if model_type == 'llm': - return CTransformers(**{ **model_config, 'model': model_path }) + if model_type == "llm": + return CTransformers(**{**model_config, "model": model_path}) - return None + return None diff --git a/context_chat_backend/models/hugging_face.py b/context_chat_backend/models/hugging_face.py index 54888807..651936d2 100644 --- a/context_chat_backend/models/hugging_face.py +++ b/context_chat_backend/models/hugging_face.py @@ -5,22 +5,22 @@ def get_model_for(model_type: str, model_config: dict): - if model_config.get('model_path') is not None: - model_dir = getenv('MODEL_DIR', 'persistent_storage/model_files') - if str(model_config.get('model_path')).startswith('/'): - model_dir = '' + if model_config.get("model_path") is not None: + model_dir = getenv("MODEL_DIR", "persistent_storage/model_files") + if str(model_config.get("model_path")).startswith("/"): + model_dir = "" - model_path = path.join(model_dir, model_config.get('model_path', '')) - else: - model_path = model_config.get('model_id', '') + model_path = path.join(model_dir, model_config.get("model_path", "")) + else: + model_path = model_config.get("model_id", "") - if model_config is None: - return None + if model_config is None: + return None - if model_type == 'embedding': - return HuggingFaceEmbeddings(**model_config) + if model_type == "embedding": + return HuggingFaceEmbeddings(**model_config) - if model_type == 'llm': - return HuggingFacePipeline.from_model_id(**{ **model_config, 'model_id': model_path }) + if model_type == "llm": + return HuggingFacePipeline.from_model_id(**{**model_config, "model_id": model_path}) - return None + return None diff --git a/context_chat_backend/models/instructor.py b/context_chat_backend/models/instructor.py index e55ed6dd..f6a01ff2 100644 --- a/context_chat_backend/models/instructor.py +++ b/context_chat_backend/models/instructor.py @@ -2,10 +2,10 @@ def get_model_for(model_type: str, model_config: dict): - if model_config is None: - return None + if model_config is None: + return None - if model_type == 'embedding': - return HuggingFaceInstructEmbeddings(**model_config) + if model_type == "embedding": + return HuggingFaceInstructEmbeddings(**model_config) - return None + return None diff --git a/context_chat_backend/models/llama.py b/context_chat_backend/models/llama.py index 25cc8177..1cbcd8f3 100644 --- a/context_chat_backend/models/llama.py +++ b/context_chat_backend/models/llama.py @@ -5,19 +5,19 @@ def get_model_for(model_type: str, model_config: dict): - model_dir = getenv('MODEL_DIR', 'persistent_storage/model_files') - if str(model_config.get('model_path')).startswith('/'): - model_dir = '' + model_dir = getenv("MODEL_DIR", "persistent_storage/model_files") + if str(model_config.get("model_path")).startswith("/"): + model_dir = "" - model_path = path.join(model_dir, model_config.get('model_path', '')) + model_path = path.join(model_dir, model_config.get("model_path", "")) - if model_config is None: - return None + if model_config is None: + return None - if model_type == 'embedding': - return LlamaCppEmbeddings(**{ **model_config, 'model_path': model_path }) + if model_type == "embedding": + return LlamaCppEmbeddings(**{**model_config, "model_path": model_path}) - if model_type == 'llm': - return LlamaCpp(**{ **model_config, 'model_path': model_path }) + if model_type == "llm": + return LlamaCpp(**{**model_config, "model_path": model_path}) - return None + return None diff --git a/context_chat_backend/models/nc_texttotext.py b/context_chat_backend/models/nc_texttotext.py new file mode 100644 index 00000000..4b609cca --- /dev/null +++ b/context_chat_backend/models/nc_texttotext.py @@ -0,0 +1,100 @@ +import json +import time +from typing import Any + +from langchain_core.callbacks.manager import CallbackManagerForLLMRun +from langchain_core.language_models.llms import LLM +from nc_py_api import Nextcloud +from pydantic import BaseModel, ValidationError + +from context_chat_backend.models import LlmException + + +def get_model_for(model_type: str, model_config: dict): + if model_config is None: + return None + + if model_type == "llm": + return CustomLLM() + + return None + + +class Task(BaseModel): + id: int + status: str + output: dict[str, str] | None = None + + +class CustomLLM(LLM): + """A custom chat model that queries Nextcloud's TextToText provider""" + + def _call( + self, + prompt: str, + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> str: + """Run the LLM on the given input. + + Override this method to implement the LLM logic. + + Args: + prompt: The prompt to generate from. + stop: Stop words to use when generating. Model output is cut off at the + first occurrence of any of the stop substrings. + If stop tokens are not supported consider raising NotImplementedError. + run_manager: Callback manager for the run. + **kwargs: Arbitrary additional keyword arguments. These are usually passed + to the model provider API call. + + Returns: + The model output as a string. Actual completions SHOULD NOT include the prompt. + """ + nc = Nextcloud() + + print(json.dumps(prompt)) + + response = nc.ocs( + "POST", + "/ocs/v1.php/taskprocessing/schedule", + json={"type": "core:text2text", "appId": "context_chat_backend", "input": {"input": prompt}}, + ) + + try: + task = Task.model_validate(response["task"]) + + print(task) + + i = 0 + # wait for 30 minutes + while task.status != "STATUS_SUCCESSFUL" and task.status != "STATUS_FAILED" and i < 60 * 6: + time.sleep(5) + i += 1 + response = nc.ocs("GET", f"/ocs/v1.php/taskprocessing/task/{task.id}") + task = Task.model_validate(response["task"]) + print(task) + except ValidationError as e: + raise LlmException("Failed to parse Nextcloud TaskProcessing task result") from e + + if task.status != "STATUS_SUCCESSFUL": + raise LlmException("Nextcloud TaskProcessing Task failed") + + return task.output["output"] + + @property + def _identifying_params(self) -> dict[str, Any]: + """Return a dictionary of identifying parameters.""" + return { + # The model name allows users to specify custom token counting + # rules in LLM monitoring applications (e.g., in LangSmith users + # can provide per token pricing for their model and monitor + # costs for the given LLM.) + "model_name": "NextcloudTextToTextProvider", + } + + @property + def _llm_type(self) -> str: + """Get the type of language model used by this chat model. Used for logging purposes only.""" + return "nc_texttotetx" diff --git a/requirements.in.txt b/requirements.in.txt index 008ee8d9..a5169b9e 100644 --- a/requirements.in.txt +++ b/requirements.in.txt @@ -29,3 +29,4 @@ unstructured @ git+https://github.com/kyteinsky/unstructured@d3a404cfb541dae8e16 unstructured-client weaviate-client xlrd +nc_py_api diff --git a/requirements.txt b/requirements.txt index 3148a878..f644a6ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -82,6 +82,7 @@ mpmath==1.3.0 msg-parser==1.2.0 multidict==6.0.5 mypy-extensions==1.0.0 +nc-py-api==0.14.0 nest-asyncio==1.6.0 networkx==3.3 nltk==3.8.1 @@ -189,5 +190,6 @@ websockets==12.0 wrapt==1.16.0 xlrd==2.0.1 XlsxWriter==3.2.0 +xmltodict==0.13.0 yarl==1.9.4 zipp==3.19.2