Skip to content

Commit c3a6edd

Browse files
committed
fix: add transformers for summarization, the backend now can take the full text summarize it
1 parent b467d10 commit c3a6edd

File tree

1 file changed

+77
-2
lines changed

1 file changed

+77
-2
lines changed

backend/main.py

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,40 @@
11
import os
2+
from dotenv import load_dotenv
23
import asyncio
4+
from pydantic import BaseModel
5+
from typing import Optional
36
import queue
47
import threading
5-
from fastapi import FastAPI
8+
from fastapi import FastAPI, HTTPException
69
from fastapi.middleware.cors import CORSMiddleware
710
import socketio
811
from google.cloud import speech
912
from settings import *
1013

14+
load_dotenv()
15+
16+
from transformers import pipeline
17+
18+
19+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
20+
21+
22+
1123
# Google Cloud Speech-to-Text client
1224
speech_client = speech.SpeechClient()
1325

1426
# FastAPI app
1527
app = FastAPI()
1628

29+
origins = [
30+
"http://localhost:5173",
31+
"localhost:5173"
32+
]
33+
1734
# CORS middleware
1835
app.add_middleware(
1936
CORSMiddleware,
20-
allow_origins=["*"], # Allow all origins (adjust for production)
37+
allow_origins=origins, # Allow all origins (adjust for production)
2138
allow_credentials=True,
2239
allow_methods=["*"],
2340
allow_headers=["*"],
@@ -30,6 +47,8 @@
3047
# Store client data
3148
clients = {}
3249

50+
class TranscriptRequest(BaseModel):
51+
text: str
3352

3453
class ClientData:
3554
def __init__(self, sid, conn, config):
@@ -129,6 +148,62 @@ async def end_stream(sid):
129148
if sid in clients:
130149
clients[sid].stop_transcription()
131150

151+
def create_note_taking_prompt(text):
152+
return f"""Summarize the following text into concise bullet-point notes. Focus on:
153+
1. Main topic and key themes
154+
2. Important points and arguments
155+
3. Examples or evidence provided
156+
4. Technical details (if any)
157+
5. Challenges or limitations
158+
6. Conclusions or takeaways
159+
160+
Text:
161+
{text}
162+
163+
Notes:"""
164+
165+
@app.post("/summary")
166+
async def summarize(request: TranscriptRequest):
167+
try:
168+
# Basic validation
169+
if len(request.text.strip()) < 50: # Very minimal length check
170+
return {
171+
"status": "too_short",
172+
"message": "The transcription is too short to summarize.",
173+
"summary": "Transcription is too short to generate a summary.",
174+
"can_summarize": False
175+
}
176+
177+
# Initialize language model
178+
179+
prompt = create_note_taking_prompt(request.text)
180+
181+
summary_result = summarizer(prompt, max_length=250, min_length=50, do_sample=False)
182+
183+
summary = summary_result[0]['summary_text']
184+
# Generate summary directly
185+
print("summary created")
186+
187+
return {
188+
"status": "success",
189+
"summary": summary,
190+
"message": summary,
191+
"can_summarize": True
192+
}
193+
194+
except Exception as e:
195+
return {
196+
197+
"status": "error",
198+
"message": str(e),
199+
"summary": "An error occurred while generating the summary.",
200+
"can_summarize": False
201+
}
202+
203+
@app.get("/health")
204+
async def health_check():
205+
return {"status": "healthy"}
206+
132207
# Run the app
133208
if __name__ == "__main__":
134209
import uvicorn

0 commit comments

Comments
 (0)