33from langchain .chat_models import ChatOpenAI
44from langchain .prompts import PromptTemplate
55from langchain .docstore .document import Document
6- from langchain .chains .summarize import load_summarize_chain
6+ from langchain .text_splitter import CharacterTextSplitter
7+ from langchain .chains import LLMChain , ReduceDocumentsChain , MapReduceDocumentsChain
8+ from langchain .chains .combine_documents .stuff import StuffDocumentsChain
79
810logging .basicConfig (level = logging .INFO )
911logger = logging .getLogger ("GenerateSummary" )
@@ -14,18 +16,19 @@ class LLM_Summarize:
1416
1517 def __init__ (self , llm_token ):
1618 self .llm = ChatOpenAI (temperature = 0.1 , openai_api_key = llm_token )
17- self .code_summmary_prompt = """You are an elite programmer who can understand Github Repository code give to you in text very
19+ self .code_summary_prompt = """You are an elite programmer who can understand Github Repository code give to you in text very
1820 well and summarize what is written in it.
1921
20- Code : {text }
22+ Code : {codes }
2123
22- Summarize the above code present between delimiters in 50-70 words and in paragraph"""
24+ Summarize the above list of codes present between delimiters in 50-70 words each and in paragraph.
25+ Store it in a list."""
2326 self .all_summary_prompt = """You are great at understanding bigger picture of a codebase by looking at summary of different code
2427 files. Given the following summaries and you have to tell in detail what does the project do.
2528
2629 Summaries : {summary_list}
2730
28- Limit final summary to 2000 words. Provide an elegant answer highlighting its purpose,
31+ Limit final summary to 2000-3000 words. Provide an elegant answer highlighting its purpose,
2932 main features, and key technologies used. Include 2-3 emojis."""
3033
3134 def summarize_repo (self , code_list ):
@@ -36,28 +39,40 @@ def summarize_repo(self, code_list):
3639
3740 code_list = [Document (page_content = code ) for code in code_list ]
3841
39- # Prompt to use in map and reduce stages
40- CODE_SUMMARY = PromptTemplate (
41- template = self .code_summmary_prompt , input_variables = ["text" ]
42+ # Map
43+ MAP_PROMPT = PromptTemplate .from_template (template = self .code_summary_prompt )
44+ map_chain = LLMChain (llm = self .llm , prompt = MAP_PROMPT )
45+
46+ # Reduce
47+ REDUCE_PROMPT = PromptTemplate .from_template (template = self .all_summary_prompt )
48+ reduce_chain = LLMChain (llm = self .llm , prompt = REDUCE_PROMPT )
49+
50+ logger .info ("Prompt Ready" )
51+
52+ combine_documents_chain = StuffDocumentsChain (
53+ llm_chain = reduce_chain , document_variable_name = "summary_list"
4254 )
43- ALL_SUMMARY = PromptTemplate (
44- template = self .all_summary_prompt , input_variables = ["summary_list" ]
55+ reduce_documents_chain = ReduceDocumentsChain (
56+ combine_documents_chain = combine_documents_chain ,
57+ collapse_documents_chain = combine_documents_chain ,
58+ token_max = 4000 ,
4559 )
4660
47- logger .info ("Prompt Ready" )
61+ map_reduce_chain = MapReduceDocumentsChain (
62+ llm_chain = map_chain ,
63+ reduce_documents_chain = reduce_documents_chain ,
64+ document_variable_name = "codes" ,
65+ return_intermediate_steps = False ,
66+ )
4867
49- chain = load_summarize_chain (
50- self .llm ,
51- chain_type = "map_reduce" ,
52- map_prompt = CODE_SUMMARY ,
53- combine_prompt = ALL_SUMMARY ,
54- combine_document_variable_name = "summary_list" ,
68+ # Split text
69+ text_splitter = CharacterTextSplitter .from_tiktoken_encoder (
70+ chunk_size = 1000 , chunk_overlap = 0
5571 )
56- logger . info ( "Running LLM" )
72+ split_docs = text_splitter . split_documents ( code_list )
5773
58- result = chain ({"input_documents" : code_list }, return_only_outputs = True )[
59- "output_text"
60- ]
74+ logger .info ("Running LLM" )
75+ result = map_reduce_chain .run (split_docs )
6176
6277 # Configuring according to HTML page
6378 result = result .replace ("\n " , "<br>" )
0 commit comments