From b435536f93bd1841a581e3359caa9a6ae628428d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 24 Sep 2025 13:43:51 +0000 Subject: [PATCH] Deployed f4b0864 with MkDocs version: 1.6.1 --- .nojekyll | 0 404.html | 2267 ++++++ analytics/ab-test/index.html | 2531 +++++++ analytics/clustering/index.html | 2521 +++++++ analytics/evaluation/index.html | 2421 ++++++ analytics/events/index.html | 2507 ++++++ analytics/fine-tuning/index.html | 2420 ++++++ analytics/language/index.html | 2344 ++++++ analytics/sentiment-analysis/index.html | 2336 ++++++ analytics/sessions-and-users/index.html | 2739 +++++++ analytics/tagging/index.html | 2620 +++++++ analytics/usage-based-billing/index.html | 2487 ++++++ analytics/user-feedback/index.html | 2707 +++++++ api-reference/introduction/index.html | 2395 ++++++ assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.f55a23d4.min.js | 16 + assets/javascripts/bundle.f55a23d4.min.js.map | 7 + assets/javascripts/glightbox.min.js | 1 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.973d3a69.min.js | 42 + .../workers/search.973d3a69.min.js.map | 7 + assets/stylesheets/glightbox.min.css | 1 + assets/stylesheets/main.e53b48f4.min.css | 1 + assets/stylesheets/main.e53b48f4.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + cli/index.html | 2366 ++++++ examples/introduction/index.html | 2578 +++++++ favicon.png | Bin 0 -> 1092 bytes favicon.svg | 1 + getting-started/index.html | 2624 +++++++ guides/LLM-judge/index.html | 2543 +++++++ guides/export-dataset-argilla/index.html | 2413 ++++++ guides/getting-started/index.html | 2737 +++++++ guides/understand-your-data/index.html | 2490 ++++++ guides/user-intent/index.html | 2505 ++++++ guides/welcome-guide/index.html | 2419 ++++++ images/clustering-demo.gif | Bin 0 -> 6464757 bytes images/explore/abtest.jpeg | Bin 0 -> 46440 bytes .../explore/events detection/Create event.png | Bin 0 -> 183899 bytes .../events detection/Event suggestion.png | Bin 0 -> 113562 bytes images/guides/LLM_judge/add_event.png | Bin 0 -> 108029 bytes images/guides/LLM_judge/events_page.png | Bin 0 -> 18800 bytes images/guides/getting_started/add_event.png | Bin 0 -> 109695 bytes images/guides/getting_started/clusters.png | Bin 0 -> 156869 bytes .../guides/getting_started/detect_events.png | Bin 0 -> 28486 bytes images/guides/getting_started/filters.png | Bin 0 -> 37929 bytes images/guides/getting_started/import_data.png | Bin 0 -> 58162 bytes images/guides/getting_started/settings.png | Bin 0 -> 65742 bytes images/guides/user-intentions/clusters.png | Bin 0 -> 156869 bytes images/hero-dark.svg | 161 + images/hero-light.svg | 155 + images/import/api_key_langsmith.png | Bin 0 -> 40156 bytes images/import/import_data.png | Bin 0 -> 50601 bytes images/import/langfuse_api_keys.png | Bin 0 -> 58143 bytes images/import/start_sending_data.png | Bin 0 -> 45229 bytes images/supabase/create_webhook_1.png | Bin 0 -> 176426 bytes images/supabase/create_webhook_2.png | Bin 0 -> 200721 bytes images/supabase/secrets_edge_functions.png | Bin 0 -> 218736 bytes images/supabase/webhook_tab.png | Bin 0 -> 65789 bytes import-data/api-integration/index.html | 2716 +++++++ import-data/import-file/index.html | 2355 ++++++ import-data/import-langfuse/index.html | 2359 ++++++ import-data/import-langsmith/index.html | 2361 ++++++ import-data/tracing/index.html | 2678 +++++++ index.html | 2486 ++++++ integrations/argilla/index.html | 2330 ++++++ integrations/javascript/logging/index.html | 2713 +++++++ integrations/langchain/index.html | 2577 +++++++ integrations/postgresql/index.html | 2330 ++++++ integrations/powerbi/index.html | 2331 ++++++ integrations/python/analytics/index.html | 2876 +++++++ .../python/examples/openai-agent/index.html | 2479 ++++++ .../examples/openai-streamlit/index.html | 2525 +++++++ integrations/python/logging/index.html | 3062 ++++++++ integrations/python/reference/index.html | 2325 ++++++ integrations/python/testing/index.html | 2596 +++++++ integrations/supabase/index.html | 2936 ++++++++ local/custom-job/index.html | 2486 ++++++ local/llm-provider/index.html | 2335 ++++++ local/optimize/index.html | 2682 +++++++ local/quickstart/index.html | 2538 +++++++ models/classify/index.html | 2498 ++++++ models/embeddings/index.html | 2458 ++++++ models/llm/index.html | 2451 ++++++ models/multimodal/index.html | 2397 ++++++ models/tak/index.html | 2537 +++++++ self-hosting/index.html | 2451 ++++++ sitemap.xml | 199 + sitemap.xml.gz | Bin 0 -> 639 bytes 123 files changed, 133633 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 analytics/ab-test/index.html create mode 100644 analytics/clustering/index.html create mode 100644 analytics/evaluation/index.html create mode 100644 analytics/events/index.html create mode 100644 analytics/fine-tuning/index.html create mode 100644 analytics/language/index.html create mode 100644 analytics/sentiment-analysis/index.html create mode 100644 analytics/sessions-and-users/index.html create mode 100644 analytics/tagging/index.html create mode 100644 analytics/usage-based-billing/index.html create mode 100644 analytics/user-feedback/index.html create mode 100644 api-reference/introduction/index.html create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.f55a23d4.min.js create mode 100644 assets/javascripts/bundle.f55a23d4.min.js.map create mode 100644 assets/javascripts/glightbox.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.973d3a69.min.js create mode 100644 assets/javascripts/workers/search.973d3a69.min.js.map create mode 100644 assets/stylesheets/glightbox.min.css create mode 100644 assets/stylesheets/main.e53b48f4.min.css create mode 100644 assets/stylesheets/main.e53b48f4.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 cli/index.html create mode 100644 examples/introduction/index.html create mode 100644 favicon.png create mode 100644 favicon.svg create mode 100644 getting-started/index.html create mode 100644 guides/LLM-judge/index.html create mode 100644 guides/export-dataset-argilla/index.html create mode 100644 guides/getting-started/index.html create mode 100644 guides/understand-your-data/index.html create mode 100644 guides/user-intent/index.html create mode 100644 guides/welcome-guide/index.html create mode 100644 images/clustering-demo.gif create mode 100644 images/explore/abtest.jpeg create mode 100644 images/explore/events detection/Create event.png create mode 100644 images/explore/events detection/Event suggestion.png create mode 100644 images/guides/LLM_judge/add_event.png create mode 100644 images/guides/LLM_judge/events_page.png create mode 100644 images/guides/getting_started/add_event.png create mode 100644 images/guides/getting_started/clusters.png create mode 100644 images/guides/getting_started/detect_events.png create mode 100644 images/guides/getting_started/filters.png create mode 100644 images/guides/getting_started/import_data.png create mode 100644 images/guides/getting_started/settings.png create mode 100644 images/guides/user-intentions/clusters.png create mode 100644 images/hero-dark.svg create mode 100644 images/hero-light.svg create mode 100644 images/import/api_key_langsmith.png create mode 100644 images/import/import_data.png create mode 100644 images/import/langfuse_api_keys.png create mode 100644 images/import/start_sending_data.png create mode 100644 images/supabase/create_webhook_1.png create mode 100644 images/supabase/create_webhook_2.png create mode 100644 images/supabase/secrets_edge_functions.png create mode 100644 images/supabase/webhook_tab.png create mode 100644 import-data/api-integration/index.html create mode 100644 import-data/import-file/index.html create mode 100644 import-data/import-langfuse/index.html create mode 100644 import-data/import-langsmith/index.html create mode 100644 import-data/tracing/index.html create mode 100644 index.html create mode 100644 integrations/argilla/index.html create mode 100644 integrations/javascript/logging/index.html create mode 100644 integrations/langchain/index.html create mode 100644 integrations/postgresql/index.html create mode 100644 integrations/powerbi/index.html create mode 100644 integrations/python/analytics/index.html create mode 100644 integrations/python/examples/openai-agent/index.html create mode 100644 integrations/python/examples/openai-streamlit/index.html create mode 100644 integrations/python/logging/index.html create mode 100644 integrations/python/reference/index.html create mode 100644 integrations/python/testing/index.html create mode 100644 integrations/supabase/index.html create mode 100644 local/custom-job/index.html create mode 100644 local/llm-provider/index.html create mode 100644 local/optimize/index.html create mode 100644 local/quickstart/index.html create mode 100644 models/classify/index.html create mode 100644 models/embeddings/index.html create mode 100644 models/llm/index.html create mode 100644 models/multimodal/index.html create mode 100644 models/tak/index.html create mode 100644 self-hosting/index.html create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..7249874 --- /dev/null +++ b/404.html @@ -0,0 +1,2267 @@ + + + +
+ + + + + + + + + + + + + + +AB testing lets you compare different versions of your app to see which one performs better.
+ +AB testing is a method used to compare two versions of a product to determine which performs better.
+Comparing on a single criteria is hard, especially for LLM apps. Indeed, the performance of a product can be measured in many ways.
+In phosho, the way AB testing is done is by comparing the analytics distribution of two versions: the candidate one and the control one.
+You need to have setup event detection in your project. This will run analytics to measure the performance of your app:
+Click on the button "Create an AB test" on the phospho platform. If you want, customize the version_id
, which is the name of the test.
Send data to the platform by using an SDK, an integration, a file, or more. All new incomming messages will be tagged with the version_id
.
version_id
in your codeAlternatively, you can specify the version_id
in your code. This will override the version_id
set in the platform.
When logging to phospho, add a field version_id
with the name of your version in metadata
. See the example below:
curl -X POST https://api.phospho.ai/v2/log/$PHOSPHO_PROJECT_ID \
+-H "Authorization: Bearer $PHOSPHO_API_KEY" \
+-H "Content-Type: application/json" \
+-d '{
+ "batched_log_events": [
+ {
+ "input": "your_input",
+ "output": "your_output"
+ "metadata": {
+ "version_id": "YOUR_VERSION_ID"
+ }
+ }
+ ]
+}'
+
If you want to run offline tests, you can use the phospho command line interface. Results of the offline tests are also available in the AB test tab.
+phospho CLI
+Learn more about the phospho command line interface
+ +Clustering lets your group user messages based on their intention. This is great to get a feeling of "what are my users talking about?" and to identify the most common topics.
+ +The phospho clustering uses a combination of user intent embedding and unsupervized clustering algorithms to group messages together.
+The user intent embedding is a representation of the user intention in a high dimensional space. This representation is generated using a deep learning model trained on a large dataset of user messages. Learn more here.
+We are constantly evaluating and improving the clustering algorithms to provide the best results.
+To use the clustering feature, you need to have a phospho account and an API key. You can get one by signing up on phospho.ai.
+Import data. If not already done, import your data and setup a payment method.
+Configure clustering. Go to the Clusters tab and click on the Configure clustering detection button. + Select the scope of data to cluster: either messages or sessions. + Filter the data by setting a date range, a specific tag, and more.
+Run clustering. + Click on the Run cluster analysis button to start the clustering process. Depending on the number of messages, it can take a few minutes.
++
+The clustering results are presented in two formats:
+3D Dot Cloud Graph: Each point in the graph corresponds to an embedding of a message (or a session). Clusters are distinct groups of these points.
+Cluster Cards: Each cluster is also displayed as a card. The card shows the cluster size and an automatic summary of a random sample of messages. Click on "Explore" in any card to view the messages in the cluster.
+By default, the clustering is run based on: user intent
You can however modify this instruction in Advanced settings.
+Change the clustering instruction to refine how messages are grouped, to provide insights that are more aligned with your needs. You just need to enter the topic you want to cluster on.
+Examples of what you can enter:
+- For a medical chatbot: type of disease
+- For a customer support chatbot: type of issue (refund, delivery, etc.)
+- For a chatbot in the e-commerce industry: product mentioned
You can use the user intent embeddings to run your own clustering algorithms. The embeddings are available through the API. Learn more here.
+Based on the clusters, define more analytics to run on your data in order to never miss a beat on what your users are talking about. Check the event detection page for more information.
+ + + + + + + + + + + + + + + + +phospho enables you to evaluate the quality (success or failure) of the interactions between your users and your LLM app.
+Every time you log a task, phospho will automatically evaluate the success of the task.
+The evaluation is based on LLM self-critique.
+The evaluation leverages the following sources of information:
+- The tasks annotated in the phospho webapp, by you and your team
+- The user feedbacks sent to phospho
+- The system_prompt (str)
parameter in metadata
when logging
+- Previous tasks in the same session
If the information are not available, phospho will use default heuristics.
+To improve the automatic evaluation, you can:
+- Label tasks in the phospho webapp. Invite your team members to help you!
+- Gather user feedback
+- Pass the system_prompt (str)
parameter in metadata
when logging
+- Group tasks in sessions
+- Override the task evaluations with the analytics endpoints
In the phospho dashboard, you can annotate tasks as a success or a failure.
+In the Transcript tab, view tasks to access the thumbs up and thumbs down buttons. +- A thumbs up means that the task was successful. +- A thumbs down means that the task failed.
+Update the evaluation by clicking on the thumbs.
+The button changes color to mark that this task was evaluated by a human, and not by phospho.
+Add notes and any kind of text with the Notes button next to the thumbs.
+If there is a note already written, the color of the button changes.
+You can gather annotations any way you want. For example, if you have your own tool to collect feedback (such as thumbs up/thumbs down in your chat interface), you can chose to use the phospho API.
+Trigger the API endpoint to send your annotations to phospho at scale.
+Read the full guide about user feedback to learn more.
+Visualize the aggregated results of the evaluations in the Dashboard tab of the phospho webapp.
+You can also visualize the results for each task in the Sessions tab. Click on a session to see the list of tasks in the session.
+A green thumbs up means that the task was successful. A red thumbs down means that the task failed. Improve the automatic evaluation by clicking on the thumbs to annotate the task if needed.
+ + + + + + + + + + + + + + + + +Learn how to define and run events in phospho, and also how they work under the hood and how to improve them.
+Events are actions or behaviours that you want to track in your data. There are three types of events:
+An event is a specific interaction between a user and the system you want to track.
+To define an event, go to the Events tab in the phospho platform and click on the Add button.
+ +In this tab you can setup events in natural language, in this image, we have setup an event to detect when the system is unable to answer the user's question.
+By default, events are detected on all the newly imported data, but not on the past data. You need to run the events on the past data to get insights.
+Once you've defined your events, you need to run them on past data.
+Click on the Detect events button in the Events tab to run an event on your data.
+ +Every message logged to phospho goes through an analytics pipeline. In this pipeline, phospho looks for events defined in your project settings.
+This pipeline uses a combination of rules, machine learning, and large language models to detect events. The rules are defined in the Analytics tab of the phospho dashboard.
+To help you keep track and improve the event detection, phospho enables you annotate and validate the events detected in your data.
+Click on an event in the Transcripts to annotate it. This will display a dropdown where you can validate, remove or edit the event.
+Advanced performance metrics (F1 Score, Accuracy, Recall, Precision, R-squared, MSE) are available when you click on an event in the Analytics tab.
+The event detection models are automatically improved and updated using your feedback.
+Click on an event in the Transcripts to annotate it. This displays a dropdown where you can validate, remove or edit the event.
+We are constantly improving our algorithms to provide the best results. We're an open source project, so feel free to open an issue on our GitHub or contribute to the codebase. We would love to hear from you!
+ + + + + + + + + + + + + + + + +To fine-tune a model for event detection, you need to prepare a csv
dataset that contains the following columns:
detection_scope
(Literal
): can only be one of the following values: task_input_only
or task_output_only
task_input
(str
): the input text for a task (uusually the user input)task_output
(str
): the output text for a task (usually the assistant response)event_description
(str
): the event description, like the prompt you use to define the event you want to dectect while using phospholabel
(bool
): True if the event is indeed present in the text, False otherwiseA good dataset size is at least 2000 examples.
+To upload the dataset to phospho, use directly the API. Don't forget to set your API key in the Authorization
header.
curl -X 'POST' \
+ 'https://api.phospho.ai/v2/files' \
+ -H 'accept: application/json' \
+ -H 'Authorization: Bearer $PHOSPHO_API_KEY' \
+ -H 'Content-Type: multipart/form-data' \
+ -F 'file=@/path/to/your/local/file.csv.csv;type=text/csv'
+
Keep the file_id
returned by the API, you will need it to fine-tune the model.
We recomend using the mistralai/Mistral-7B-Instruct-v0.1
model for event detection.
+Once the dataset is uploaded, you can fine-tune the model using the following API call:
curl -X 'POST' \
+ 'https://api.phospho.ai/v2/fine_tuning/jobs' \
+ -H 'accept: application/json' \
+ -H 'Authorization: Bearer $PHOSPHO_API_KEY' \
+ -H 'Content-Type: application/json' \
+ -d '{
+ "file_id": "YOUR_FILE_ID",
+ "parameters": {"detection_scope": "YOUR_DETECTION_SCOPE", "event_description": "YOUR EVENT DESCRIPTION HERE"},
+ "model": "mistralai/Mistral-7B-Instruct-v0.1"
+}'
+
Note the fine-tuning id returned by the API, you will need it to check the status of the job. It should take approximately 20 minutes to complete.
+The finetuning job will take some time to complete. You can check the status of the job using the following API call:
+curl -X 'GET' \
+ 'https://api.phospho.ai/v2/fine_tuning/jobs/FINE_TUNING_JOB_ID' \
+ -H 'accept: application/json' \
+ -H 'Authorization: Bearer $PHOSPHO_API_KEY'
+
When the fine-tuning job is completed, you can get the fine-tuned model id in the fine_tuned_model
field of the response.
You can now use the fine-tuned model to detect events in your text. To do so, update the configs.
+First, get your current project settings:
+curl -X 'GET' \
+ 'https://api.phospho.ai/v2/projects/YOUR_PROJECT_ID' \
+ -H 'accept: application/json' \
+ -H 'Authorization: Bearer $PHOSPHO_API_KEY'
+
In the settings object, add (or change) the detection_engine
to the fine_tuned_model
id you got from the fine-tuning job. Then, update the project settings:
curl -X 'POST' \
+ 'https://api.phospho.ai/v2/projects/YOUR_PROJECT_ID' \
+ -H 'accept: application/json' \
+ -H 'Authorization: Bearer $PHOSPHO_API_KEY' \
+ -H 'Content-Type: application/json' \
+ -d '{
+ "settings": YOUR_UPDATED_SETTINGS_OBJECT
+}'
+
You're all set! You can now use the fine-tuned model to detect events in your text.
+ + + + + + + + + + + + + + + + +Detect what language your users are speaking in. This lets you analyze in what language your users are interacting with your assistant, and improve it accordingly.
+Language detection is based on the user message, so the interaction below will be flagged as english, despite the assistant answering in French.
+User | +Assistant | +
---|---|
What can you do? | +Je ne peux pas répondre en anglais | +
The language detection method is based on keywords. If the input is very short, the language detection might not be accurate.
+In the Transcripts, you can filter by language.
+ + + + + + + + + + + + + + + + +Detect the sentiment of your users. An automatic sentiment analysis is performed on the user message. This lets you know whether your users are happy, sad, or neutral.
+The sentiment and its magnitude are score. This corresponds to a negative or positive sentiment and how strong it is.
+We then translate this data into a simple, readable label for you: Positive, Neutral, Mixed and Negative.
+You can also filter your data by sentiment in the Transcripts.
+ + + + + + + + + + + + + + + + +A task is a single operation made by the user. For example, a user sending a question to ChatGPT and receiving an answer is a task.
+A session groups multiple tasks that happen in the same context. For example, multiple messages in the same ChatGPT chat is a session.
+A user is the end user of your LLM app. For example, the human chatting with ChatGPT.
+Info
+Tasks, sessions and users are just abstractions. They are meant to help you understand the context of a log. You can use them as you want.
+For example, +- A task can be "Fetch documents in a database" for a RAG. +- A session can be "The code completions in a single file" for a coding copilot. +- A user can be "The microservice querying the API" for a question answering model.
+A task is made of an input
and an optional output
, which are text readable by humans. Think of them as the messages in a chat.
On top of that, you can pass a raw_input
and a raw_output
. Those are the raw data that your LLM app received and produced. They are mostly meant for the developers of your LLM app.
To help you understand the context of a task, you can pass a metadata dict to your tasks.
+For example, the version of the model used, the generation time, the system prompt, the user_id, etc.
+import { phospho } from "phospho";
+
+phospho.init();
+
+phospho.log({
+ input: "What is the meaning of life?",
+ output: "42",
+ // Metadata
+ raw_input={"chat_history": ...},
+ metadata={
+ "system_prompt": "You are a helpful assistant.",
+ "version_id": "1.0.0",
+ "generation_time": 0.1,
+ },
+});
+
The metadata is a dictionary that can contain any key-value pair. We recommend to stick to str keys and str or float values.
+Note that the output is optional, but the input is required.
+system_prompt
: The prompt used to generate the output. It will be displayed separately in the UI.version_id
: The version of the app. Used for AB testing.user_id
: The id of the user. Used for user analytics.A task can be a call to a LLM. But it can also be something completely different.
+For example, a task can be a call to a database, or the result of a complex chain of thought.
+Tasks are an abstraction that you can use as you want.
+By default, when logging, a task id is automatically generated for you.
+Generating your own task id is useful to attach user feedback later on (on this topic, see User Feedback).
+If you're using phospho in a conversational app such a chatbot, group tasks together into sessions.
+To create sessions, pass a session_id
when logging.
The session id can be any string. However, we recommend to use a UUID generated by a random hash function. We provide a helper function to generate a session id.
+import phospho
+from phospho.integrations import PhosphoLangchainCallbackHandler
+
+session_id = phospho.new_session()
+
+response = retrieval_chain.invoke(
+ "Chain input",
+ config={"callbacks": [
+ # Pass the session_id to the callback
+ PhosphoLangchainCallbackHandler(session_id=session_id)
+ ]}
+)
+
Sessions are useful for insights about short term user behavior. +- Monitor for how long a user chats with your LLM app before disconnecting +- Compute the average number of messages per session +- Discover what kind of messages ends a session.
+Find out how specific users interact with your LLM app by logging the user id.
+To do so, attach tasks and sessions to a user_id
when logging. The user id can be any string.
User analytics are available in the tabs Insights/Users. +- Discover aggregated metrics (number of tasks, average session duration, etc.) +- Access the tasks and sessions of a user by clicking on the corresponding row.
+Monitoring users helps you discover power users of your app, abusive users, or users who are struggling with your LLM app.
+ + + + + + + + + + + + + + + + +Every message logged to phospho goes through an analytics pipeline. In this pipeline, phospho looks for tags defined in your project settings.
+Tags are described in natural language. Create tags to detect topics, hallucinations, behaviours, intents, or any other concept you want to track.
+Tags are displayed on the platform and you can use them to filter data.
+Be notified when a tag is detected with webhooks.
+Go to the Analytics tab of the phospho dashboard, and click Add Tagger on the right.
+You will find some event templates like Coherence and Plausibility to get you started.
+ +The event description is a natural language description of the tag. Explain how to detect the tag in an interaction as if you were explaining it to a 5 years old or an alien.
+In the description, refer to your user as "the user" and to your LLM app as "the assistant".
+Example of an event description
+++The user is trying to book a flight. The user asked a question about a flight. +Don't include fight suggestions from the agent if the user didn't ask for it.
+
Manage Tags in the Analytics tab. Click delete to delete a tag detector.
+Note that you can also use the magic wand button on any session to get a suggestion for a possible tag that has been detected in the session.
+ +The button is right next to "Events" in the Session tab.
+Add an optional webhook to be notified when an event is detected. Click on Additional settings to add the webhook URL and the eventual Authorization header.
+Webhooks are automated messages sent from apps when something happens. They have a payload and are sent to a unique URL, which is like an app's phone number or address.
+If you have an LLM app with a backend, you can create webhooks.
+Every time the event is detected, phospho will send a POST
request to the webhook with this payload:
{
+ "id": "xxxxxxxxx", // Unique identifier of the detected event
+ "created_at": 13289238198, // Unix timestamp (in seconds)
+ "event_name": "privacy_policy", // The name of the event, as written in the dashboard
+ "task_id": "xxxxxxx", // The task id where the event was detected
+ "session_id": "xxxxxxx", // The session id where the event was detected
+ "project_id": "xxxxxxx", // The project id where the event was detected
+ "org_id": "xxxxxxx", // The organization id where the event was detected
+ "webhook": "https://your-webhook-url.com", // The webhook URL
+ "source": "phospho-unknown", // Starts with phospho if detected by phospho
+}
+
Retrieve the messages using the task_id
and the phospho API.
Use webhooks to send slack notifications, emails, SMS, notifications, UI updates, or to trigger a function in your backend.
+ + + + + + + + + + + + + + + + +This documents documents the usage based
billing plan of the hosted phospho platform.
Every analytics run on phospho consumes a certain amount of credits.
+At the end of the month, the total credits consumed by all the analytics runs are calculated and the user is billed based on the total credits consumed.
+The cost per credit depends on the plan you are on.
+Analytics run | +Credits consumed | +
---|---|
Logging 1 Task | +0 | +
Event detection on 1 Task: Tagger | +1 | +
Event detection on 1 Task: Scorer | +1 | +
Event detection on 1 Task: Classifier | +1 | +
Clustering on 1 Task | +2 | +
Event detection on 1 Session: Tagger | +1 * number of tasks in the session | +
Event detection on 1 Session: Scorer | +1 * number of tasks in the session | +
Event detection on 1 Session: Classifier | +1 * number of tasks in the session | +
Clustering on 1 Session | +2 * number of tasks in the session | +
Language detection on 1 Task | +1 | +
Sentiment detection on 1 Task | +1 | +
Logging user feedback is a crucial part of evaluating an LLM app. Even though user feedback is subjective and biased towards negative, it is a valuable source of information to improve the quality of your app.
+Setup user feedback in your app to log the user feedback to phospho, review it in the webapp, improve the automatic evaluations, and make your app better.
+In your app, you should collect user feedback after having logged a task to phospho. Every task logged to phospho is identified by a unique task_id.
+For phospho to know what task the user is giving feedback on, you need to keep track of the task_id.
+There are two ways to manage the task_id: frontend or backend.
+Any way you chose, there are helpers in the phospho package to make it easier.
+The phospho package provides multiple helpers to manage the task_id.
+ +Make sure you have initialized the phospho package with your project_id and api_key somewhere in your app.
+ +You can fetch the task_id generated by phospho.log
:
logged_content = phospho.log(input="question", output="answer")
+task_id: str = logged_content["task_id"]
+
To generate a new task_id, you can use the new_task
function.
task_id: str = phospho.new_task()
+
+# Pass it to phospho.log to create a task with this id
+phospho.log(input="question", output="answer", task_id=task_id)
+
To get the latest task_id, you can use the latest_task_id
variable.
The phospho package provides multiple helpers to manage the task_id.
+ +Make sure you have initialized the phospho package with your project_id and api_key somewhere in your app.
+import { phospho } from "phospho";
+phospho.init({ projectId: "your_project_id", apiKey: "your_api_key" });
+
You can fetch the task_id generated by phospho.log
:
const loggedContent = await phospho.log({
+ input: "question",
+ output: "answer",
+});
+const taskId: string = loggedContent.task_id;
+
To generate a new task_id, you can use the newTask
function.
const taskId = phospho.newTask();
+
+// Pass it to phospho.log to create a task with this id
+phospho.log({ input: "question", output: "answer", taskId: taskId });
+
To get the latest task_id, you can use the latestTaskId
variable.
When using the API directly, you need to manage the task_id by yourself.
+Create a task_id by generating a string hash. It needs to be unique for each task.
+ +Pass this task_id to the log
endpoint.
Once your backend has executed the task and logged it to phospho with a known task_id, send the task_id back to your frontend.
+In your frontend, using the task_id, you can collect user feedback and send it to phospho.
+We provide React components to kickstart your user feedback collection in your app.
+ +import "./App.css";
+import { FeedbackDrawer, Feedback } from "phospho-ui-react";
+import "phospho-ui-react/dist/index.css";
+
+function App() {
+ return (
+ <div className="App">
+ <header className="App-header">
+ <FeedbackDrawer
+ // Get your project_id on phospho
+ projectId="..."
+ // The task_id logged to phospho. Fetch it from your backend after logging
+ taskId="..."
+ // Source will be also logged to phospho
+ source={"user_feedback"}
+ // Customize the drawer
+ title="Send Feedback"
+ description="Help us improve our product."
+ onSubmit={(feedback: Feedback) =>
+ console.log("Submitted: ", feedback)
+ }
+ onClose={(feedback: Feedback) => console.log("Closed: ", feedback)}
+ />
+ </header>
+ </div>
+ );
+}
+
+export default App;
+
In the browser, use the sendUserFeedback
function. This function doesn't need your phospho api key. This is done to avoid leaking your phospho API key. However, this function still requires the projectId
.
Here is how to use the sendUserFeedback
function.
import { sendUserFeedback } from "phospho";
+
+// Handle logging in your backend, and send the task_id to the browser
+const taskId = await fetch("https://your-backend.com/some-endpoint", {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify({
+ your: "stuff",
+ }),
+})
+ .then((res) => res.json())
+ .then((data) => data.task_id);
+
+// When you collect feedback, send it to phospho
+// For example, when the user clicks on a button
+sendUserFeedback({
+ projectId: "your_project_id",
+ tastId: taskId,
+ flag: "success", // or "failure"
+ source: "user",
+ notes: "Some notes (can be None)",
+});
+
If you are using a different language or a different way to manage the frontend, you can use the API endpoint tasks/{task-id}/flag
directly.
This endpoint is public. You only need to pass the task_id and project_id. This is done to avoid leaking your phospho API key.
+ +If you don't want to collect user feedback in the frontend, you can instead create an endpoint in your backend and collect user feedback there.
+The phospho python package provides a user_feedback
function to log user feedback.
The phospho javascript module provides a userFeedback
function to log user feedback.
You can use the API endpoint tasks/{task-id}/flag
directly.
Most phospho features are available through the API. The base URL of the phospho API is https://api.phospho.ai/v3
.
If you do not want to use the API directly, we provide several SDKs to make it easier to integrate phospho into your products:
+ +The API full reference is available here
+Contact us at contact@phospho.ai to discuss integrating phospho into your products through dedicated endpoints, allowing seamless, behind-the-scenes functionality for your customers.
+ + + + + + + + + + + + + + + + +