Skip to content

Commit 3245c44

Browse files
Integrate practice assessment flow
1 parent 420740b commit 3245c44

File tree

3 files changed

+250
-74
lines changed

3 files changed

+250
-74
lines changed

sandbox/ai_insights_server.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from learnosity_sdk.request import Init
1212
from docs.quickstart import config
1313

14-
from sandbox.utils.llm_utils import get_llm_feedback
14+
from sandbox.utils.llm_utils import generate_practice_activity, get_llm_feedback
1515
from sandbox.utils.lrn_api_utils import get_report_data
1616

1717

@@ -52,8 +52,6 @@ def build_report_request(user_id: str, session_id: str):
5252
# Build reports init per request using query parameters
5353
generated_request_Items = initItems.generate()
5454

55-
with open('sandbox/json/activity_payload.json', 'r', encoding='utf-8') as f:
56-
assess_request = json.loads(f.read())
5755

5856
class Server(BaseHTTPRequestHandler):
5957
def _ok(self, body: str):
@@ -119,7 +117,8 @@ def do_GET(self):
119117
"domain": host,
120118
"user_id": str(uuid4())
121119
}
122-
initAssess = Init("assess", assess_security, config.consumer_secret, request=assess_request)
120+
assess_data = generate_practice_activity()
121+
initAssess = Init("assess", assess_security, config.consumer_secret, request=assess_data)
123122
generated_request_Assess = initAssess.generate()
124123

125124
with open('sandbox/views/assess.html', 'r', encoding='utf-8') as f:

sandbox/utils/llm_utils.py

Lines changed: 244 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,273 @@
1+
from copy import deepcopy
12
import json
23
import os
34
from openai import OpenAI
45
from dotenv import load_dotenv
56

6-
_SYSTEM_PROMPT = (
7-
"""
8-
# ROLE
9-
You are an expert AI Educational Assessor. Your goal is to analyze student test data, compare their answers against valid keys, and generate constructive, personalized pedagogical feedback.
10-
11-
# CONTEXT
12-
You will be provided with a JSON dataset representing a student's test session.
13-
The data contains two specific question types:
14-
1. "clozetext": Fill-in-the-blank questions.
15-
2. "classification": Sorting items into categories.
16-
17-
# INSTRUCTIONS FOR DATA INTERPRETATION
18-
19-
## 1. How to Grade "clozetext"
20-
- Look at `stimulus` to understand the sentence context.
21-
- Compare the student's `response.value` list against the `validation.valid_response.value` AND `validation.alt_responses`.
22-
- If the student's answer matches *any* valid or alternative response, mark it correct.
23-
- If it does not match, identify the grammatical or factual error.
24-
25-
## 2. How to Grade "classification"
26-
- This type relies on **Index Mapping**.
27-
- The `possible_responses` list contains the actual words (e.g., ["Noise", "Annoys", ...]).
28-
- The `response.value` contains arrays of integers. These integers are **indices** referring to the `possible_responses`.
29-
- **Example Logic:** If `response.value` is `[[6], [0, 2]]`:
30-
- Column 1 contains `possible_responses[6]`.
31-
- Column 2 contains `possible_responses[0]` and `possible_responses[2]`.
32-
- Compare the student's grouping against the `validation.valid_response` grouping to determine accuracy.
33-
34-
# TASK
35-
For each question in the dataset:
36-
1. Determine if the student was correct, partially correct, or incorrect.
37-
2. Generate 4 specific insights:
38-
- **Summary:** A brief description of what the student did (e.g., "Correctly identified all nouns and verbs").
39-
- **Strength:** What specific concept has the student mastered? (e.g., "Strong command of subject-verb agreement").
40-
- **Weakness:** Where did they struggle? If the answer is 100% correct, state "None observed."
41-
- **Recommendation:** A specific next step or study tip. If 100% correct, suggest a more advanced challenge.
42-
43-
3. Generate an **Overall Session Assessment** aggregating the performance across all questions.
44-
45-
# CONSTRAINTS
46-
- Output **ONLY** valid JSON. Do not include markdown formatting (like ```json) or conversational text.
47-
- The keys for the specific questions must be dynamic based on the Question ID (e.g., `que_01`, `que_02`).
48-
- Tone: Encouraging, professional, and objective.
49-
50-
# OUTPUT FORMAT
51-
Your output must strictly follow this schema:
52-
53-
{
54-
"que_[ID]": [
55-
{"type": "summary", "comment": "..."},
56-
{"type": "strength", "comment": "..."},
57-
{"type": "weakness", "comment": "..."},
58-
{"type": "recommendation", "comment": "..."}
59-
],
60-
... (repeat for all questions),
61-
"Overall_Question_Answers": [
62-
{"type": "overall_question_item_summary", "comment": "..."},
63-
{"type": "overall_question_item_strength", "comment": "..."},
64-
{"type": "overall_question_item_weakness", "comment": "..."},
65-
{"type": "overall_question_item_recommendation", "comment": "..."}
66-
]
7+
_FEEDBACK_SYSTEM_PROMPT = (
8+
"""
9+
# ROLE
10+
You are an expert AI Educational Assessor. Your goal is to analyze student test data, compare their answers against valid keys, and generate constructive, personalized pedagogical feedback.
11+
12+
# CONTEXT
13+
You will be provided with a JSON dataset representing a student's test session.
14+
The data contains two specific question types:
15+
1. "clozetext": Fill-in-the-blank questions.
16+
2. "classification": Sorting items into categories.
17+
18+
# INSTRUCTIONS FOR DATA INTERPRETATION
19+
20+
## 1. How to Grade "clozetext"
21+
- Look at `stimulus` to understand the sentence context.
22+
- Compare the student's `response.value` list against the `validation.valid_response.value` AND `validation.alt_responses`.
23+
- If the student's answer matches *any* valid or alternative response, mark it correct.
24+
- If it does not match, identify the grammatical or factual error.
25+
26+
## 2. How to Grade "classification"
27+
- This type relies on **Index Mapping**.
28+
- The `possible_responses` list contains the actual words (e.g., ["Noise", "Annoys", ...]).
29+
- The `response.value` contains arrays of integers. These integers are **indices** referring to the `possible_responses`.
30+
- **Example Logic:** If `response.value` is `[[6], [0, 2]]`:
31+
- Column 1 contains `possible_responses[6]`.
32+
- Column 2 contains `possible_responses[0]` and `possible_responses[2]`.
33+
- Compare the student's grouping against the `validation.valid_response` grouping to determine accuracy.
34+
35+
# TASK
36+
For each question in the dataset:
37+
1. Determine if the student was correct, partially correct, or incorrect.
38+
2. Generate 4 specific insights:
39+
- **Summary:** A brief description of what the student did (e.g., "Correctly identified all nouns and verbs").
40+
- **Strength:** What specific concept has the student mastered? (e.g., "Strong command of subject-verb agreement").
41+
- **Weakness:** Where did they struggle? If the answer is 100% correct, state "None observed."
42+
- **Recommendation:** A specific next step or study tip. If 100% correct, suggest a more advanced challenge.
43+
44+
3. Generate an **Overall Session Assessment** aggregating the performance across all questions.
45+
46+
# CONSTRAINTS
47+
- Output **ONLY** valid JSON. Do not include markdown formatting (like ```json) or conversational text.
48+
- The keys for the specific questions must be dynamic based on the Question ID (e.g., `que_01`, `que_02`).
49+
- Tone: Encouraging, professional, and objective.
50+
51+
# OUTPUT FORMAT
52+
Your output must strictly follow this schema:
53+
54+
{
55+
"que_[ID]": [
56+
{"type": "summary", "comment": "..."},
57+
{"type": "strength", "comment": "..."},
58+
{"type": "weakness", "comment": "..."},
59+
{"type": "recommendation", "comment": "..."}
60+
],
61+
... (repeat for all questions),
62+
"Overall_Question_Answers": [
63+
{"type": "overall_question_item_summary", "comment": "..."},
64+
{"type": "overall_question_item_strength", "comment": "..."},
65+
{"type": "overall_question_item_weakness", "comment": "..."},
66+
{"type": "overall_question_item_recommendation", "comment": "..."}
67+
]
68+
}
69+
"""
70+
)
71+
72+
_ACTIVITY_GENERATION_SYSTEM_PROMPT = (
73+
"""
74+
# ROLE
75+
You are an expert Adaptive Learning Content Designer. Your goal is to generate new, targeted practice questions based on specific feedback about a student's previous performance.
76+
77+
# CONTEXT
78+
You will be provided with a **Feedback JSON** containing analysis of a student's strengths, weaknesses, and recommendations for improvement.
79+
The feedback keys (e.g., `que_01`, `que_05`) correspond to questions of varying types (`clozetext` or `classification`) in no specific order.
80+
Your job is to create a **New Question Set** (in JSON format) that specifically addresses the "recommendation" and "weakness" fields found in the feedback.
81+
82+
# TASK
83+
1. **Analyze & Infer Question Type:**
84+
- Iterate through every question key in the input JSON (ignoring `Overall_Question_Answers`).
85+
- Read the `summary` and `recommendation` comments to **infer** the question type:
86+
- **Clozetext:** Look for keywords like "blank", "gap", "sentence", "tense", "grammar", "verb".
87+
- **Classification:** Look for keywords like "classify", "group", "sort", "category", "match", "columns".
88+
- Extract the specific **recommendation** to understand the skill gap (e.g., "Practice irregular verbs" or "Distinguish between nouns and adjectives").
89+
90+
2. **Generate New Content:**
91+
- For each processed key, generate **one** new question of the **inferred type**.
92+
- **If Clozetext:** Create a new sentence with blanks that specifically targets the recommended skill.
93+
- **If Classification:** Create a new grouping task with categories and items that address the specific confusion identified.
94+
- Ensure the `id` of the new question corresponds to the feedback key (e.g., if feedback was for `que_01`, the new question `id` is "01").
95+
96+
3. **Format the Output:**
97+
- You must strictly follow the provided schema.
98+
- **Important for Classification:** You must generate the `possible_responses` list (the words/items) AND the `valid_response` (the correct grouping).
99+
- **Crucial Logic:** The `valid_response` values are **indices**. If "Apple" is the first word in `possible_responses` (index 0) and it belongs in Column 1, then `valid_response[0]` must contain `0`.
100+
101+
# SCHEMA & CONSTRAINTS
102+
Output a single JSON array containing the new questions. Adhere strictly to this structure:
103+
104+
```json
105+
[
106+
{
107+
"id": "01",
108+
"question": {
109+
"type": "clozetext",
110+
"metadata": { "valid_response_count": 1 },
111+
"instant_feedback": true,
112+
"stimulus": "<p>[Insert instruction, e.g., 'Fill in the blanks using the correct past tense form.']</p>",
113+
"template": "<p>[Insert sentence with {{response}} placeholders]</p>",
114+
"max_length": 15,
115+
"validation": {
116+
"scoring_type": "exactMatch",
117+
"alt_responses": [],
118+
"valid_response": {
119+
"score": 1,
120+
"value": ["[Correct Answer 1]", "[Correct Answer 2]"]
67121
}
68-
"""
122+
}
123+
},
124+
"response": { "value": [] }
125+
},
126+
{
127+
"id": "02",
128+
"question": {
129+
"type": "classification",
130+
"metadata": { "valid_response_count": 1 },
131+
"instant_feedback": true,
132+
"stimulus": "<p>[Insert instruction, e.g., 'Classify the following words...']</p>",
133+
"ui_style": {
134+
"column_count": 2,
135+
"column_titles": ["[Category A]", "[Category B]"]
136+
},
137+
"validation": {
138+
"scoring_type": "exactMatch",
139+
"valid_response": {
140+
"score": 1,
141+
"value": [
142+
[0, 2],
143+
[1, 3]
144+
]
145+
}
146+
},
147+
"possible_responses": ["Word A", "Word B", "Word C", "Word D"]
148+
},
149+
"response": { "value": [] }
150+
}
151+
]```
152+
"""
69153
)
70154

71-
def get_llm_feedback(report_data):
155+
activity_feedback = None
156+
157+
def _configure_llm_client() -> tuple[OpenAI, str]:
72158
load_dotenv()
159+
73160
api_key = os.getenv("OPENAI_API_KEY")
74161
if not api_key:
75162
raise RuntimeError("OPENAI_API_KEY not set; update your .env file before running this script.")
76-
model_name = os.getenv("OPENAI_MODEL", "gpt-4o")
163+
77164
client = OpenAI(api_key=api_key)
165+
model_name = os.getenv("OPENAI_MODEL")
166+
return client, model_name
78167

79-
user_message = f"""
168+
def _build_user_prompt(user_data):
169+
return f"""
80170
# INPUT DATA
81171
###
82-
{report_data}
172+
{user_data}
83173
###
84174
"""
85175

176+
def _send_llm_request(system_prompt, user_prompt, temperature=1.0):
177+
client, model_name = _configure_llm_client()
86178
response = client.chat.completions.create(
87179
model=model_name,
88180
messages=[
89-
{"role": "system", "content": _SYSTEM_PROMPT},
90-
{"role": "user", "content": user_message}
181+
{"role": "system", "content": system_prompt},
182+
{"role": "user", "content": user_prompt},
91183
],
92-
response_format={"type": "json_object"}
184+
temperature=temperature,
185+
response_format={"type": "json_object"},
93186
)
94187

95188
content = response.choices[0].message.content
96189
if content is None:
97190
raise RuntimeError("The model returned an empty response; try a different model or rerun the request.")
191+
192+
return json.loads(content)
193+
194+
def _extract_question_entries(raw_payload):
195+
"""Return a list of question entries from various payload layouts."""
196+
197+
candidates = None
198+
199+
if isinstance(raw_payload, list):
200+
candidates = raw_payload
201+
elif isinstance(raw_payload, dict):
202+
for key in ("questions", "result", "items", "data"):
203+
value = raw_payload.get(key)
204+
if isinstance(value, list):
205+
candidates = value
206+
break
207+
else:
208+
raise ValueError("Unsupported payload type; expected dict or list.")
209+
210+
if candidates is None:
211+
raise ValueError("Payload must include a list under 'questions', 'result', 'items', or 'data'.")
212+
213+
for entry in candidates:
214+
if not isinstance(entry, dict):
215+
raise ValueError("Each question entry must be a dictionary.")
216+
217+
return candidates
218+
219+
def _convert_questions_to_activity(new_questions_payload):
220+
"""Convert generated question data into a Learnosity activity payload."""
221+
222+
items = []
223+
questions = []
224+
225+
for question_entry in _extract_question_entries(new_questions_payload):
226+
question_id = question_entry.get("id")
227+
question_data = question_entry.get("question")
228+
229+
if not question_id or not isinstance(question_id, str):
230+
raise ValueError("Each question entry requires an 'id' string.")
231+
if not isinstance(question_data, dict):
232+
raise ValueError("Each question entry requires a 'question' dictionary.")
233+
234+
response_id = f"generated_{question_id}"
235+
236+
items.append(
237+
{
238+
"content": f"<span class='learnosity-response question-{response_id}'></span>",
239+
"response_ids": [response_id],
240+
"workflow": "",
241+
"reference": f"item-{question_id}",
242+
}
243+
)
244+
245+
question_payload = deepcopy(question_data)
246+
question_payload["response_id"] = response_id
247+
question_payload.setdefault("description", "")
248+
questions.append(question_payload)
249+
250+
return {
251+
"items": items,
252+
"questionsApiActivity": {
253+
"consumer_key": os.getenv("LEARNOSITY_CONSUMER_KEY", "INSERT_CONSUMER_KEY_HERE"),
254+
"timestamp": os.getenv("LEARNOSITY_TIMESTAMP", "INSERT_CURRENT_TIMESTAMP_HERE"),
255+
"signature": os.getenv("LEARNOSITY_SIGNATURE", "INSERT_GENERATED_SIGNATURE_HERE"),
256+
"user_id": os.getenv("LEARNOSITY_USER_ID", "demo_user"),
257+
"type": "submit_practice",
258+
"state": "initial",
259+
"id": os.getenv("LEARNOSITY_ACTIVITY_ID", "generated_practice"),
260+
"name": os.getenv("LEARNOSITY_ACTIVITY_NAME", "Generated Practice"),
261+
"questions": questions,
262+
},
263+
}
264+
265+
def get_llm_feedback(report_data):
266+
global activity_feedback
267+
activity_feedback = _send_llm_request(system_prompt=_FEEDBACK_SYSTEM_PROMPT, user_prompt=_build_user_prompt(report_data))
268+
return activity_feedback
98269

99-
return json.loads(content)
270+
def generate_practice_activity():
271+
new_questions = _send_llm_request(system_prompt=_ACTIVITY_GENERATION_SYSTEM_PROMPT, user_prompt=_build_user_prompt(activity_feedback))
272+
new_activity = _convert_questions_to_activity(new_questions)
273+
return new_activity

sandbox/views/report_feedback.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ <h2>Per-Question Feedback</h2>
5858
{% else %}
5959
<em>No LLM feedback available.</em>
6060
{% endif %}
61+
<button onclick="window.location.href='/assess'" style="margin-top: 16px; padding: 10px 20px; font-size: 16px; cursor: pointer;">
62+
Generate practice activity
63+
</button>
6164
</div>
6265
</div>
6366
<script src="https://reports.learnosity.com?latest-lts"></script>

0 commit comments

Comments
 (0)