Skip to content

Commit b67745e

Browse files
committed
chore: convert prompts to JSON format
1 parent e6e04a0 commit b67745e

File tree

4 files changed

+27
-41
lines changed

4 files changed

+27
-41
lines changed

config.yaml

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
# * Settings marked with * are advanced settings that won't appear in the Streamlit page and can only be modified manually in config.py
22
# recommend to set in streamlit page
3-
version: "2.2.3"
3+
version: "3.0.0"
44
## ======================== Basic Settings ======================== ##
55
display_language: "zh-CN"
66

77
# API settings
88
api:
9-
key: 'your_api_key'
10-
base_url: 'https://api.302.ai'
11-
model: 'gemini-2.0-flash'
9+
key: 'your-api-key'
10+
base_url: 'https://openrouter.ai/api'
11+
model: 'deepseek/deepseek-chat-v3-0324'
12+
llm_support_json: true
1213

1314
# Language settings, written into the prompt, can be described in natural language
1415
target_language: '简体中文'
@@ -145,13 +146,6 @@ allowed_audio_formats:
145146
- 'flac'
146147
- 'm4a'
147148

148-
# LLMs that support returning JSON format
149-
llm_support_json:
150-
- 'gpt-4o'
151-
- 'gpt-4o-mini'
152-
- 'gemini-2.0-flash'
153-
- 'deepseek-chat'
154-
155149
# Spacy models
156150
spacy_model_map:
157151
en: 'en_core_web_md'

core/_4_2_translate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
console = Console()
1414

1515
# Function to split text into chunks
16-
def split_chunks_by_chars(chunk_size=400, max_i=8):
16+
def split_chunks_by_chars(chunk_size, max_i):
1717
"""Split text into chunks based on character count, return a list of multi-line text chunks"""
1818
with open(_3_2_SPLIT_BY_MEANING, "r", encoding="utf-8") as file:
1919
sentences = file.read().strip().split('\n')

core/prompts.py

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -223,55 +223,47 @@ def get_prompt_expressiveness(faithfulness_result, lines, shared_prompt):
223223
## ================================================================
224224
# @ step6_splitforsub.py
225225
def get_align_prompt(src_sub, tr_sub, src_part):
226-
TARGET_LANGUAGE = load_key("target_language")
227-
src_language = load_key("whisper.detected_language")
226+
targ_lang = load_key("target_language")
227+
src_lang = load_key("whisper.detected_language")
228228
src_splits = src_part.split('\n')
229229
num_parts = len(src_splits)
230230
src_part = src_part.replace('\n', ' [br] ')
231-
align_prompt = '''
231+
align_parts_json = ','.join(
232+
f'''
233+
{{
234+
"src_part_{i+1}": "{src_splits[i]}",
235+
"target_part_{i+1}": "Corresponding aligned {targ_lang} subtitle part"
236+
}}''' for i in range(num_parts)
237+
)
238+
239+
align_prompt = f'''
232240
## Role
233-
You are a Netflix subtitle alignment expert fluent in both {src_language} and {target_language}.
241+
You are a Netflix subtitle alignment expert fluent in both {src_lang} and {targ_lang}.
234242
235243
## Task
236-
We have {src_language} and {target_language} original subtitles for a Netflix program, as well as a pre-processed split version of {src_language} subtitles. Your task is to create the best splitting scheme for the {target_language} subtitles based on this information.
244+
We have {src_lang} and {targ_lang} original subtitles for a Netflix program, as well as a pre-processed split version of {src_language} subtitles. Your task is to create the best splitting scheme for the {target_language} subtitles based on this information.
237245
238-
1. Analyze the word order and structural correspondence between {src_language} and {target_language} subtitles
239-
2. Split the {target_language} subtitles according to the pre-processed {src_language} split version
246+
1. Analyze the word order and structural correspondence between {src_lang} and {targ_lang} subtitles
247+
2. Split the {targ_lang} subtitles according to the pre-processed {src_lang} split version
240248
3. Never leave empty lines. If it's difficult to split based on meaning, you may appropriately rewrite the sentences that need to be aligned
241249
4. Do not add comments or explanations in the translation, as the subtitles are for the audience to read
242250
243251
## INPUT
244252
<subtitles>
245-
{src_language} Original: "{src_sub}"
246-
{target_language} Original: "{tr_sub}"
247-
Pre-processed {src_language} Subtitles ([br] indicates split points): {src_part}
253+
{src_lang} Original: "{src_sub}"
254+
{targ_lang} Original: "{tr_sub}"
255+
Pre-processed {src_lang} Subtitles ([br] indicates split points): {src_part}
248256
</subtitles>
249257
250258
## Output in only JSON format
251259
{{
252-
"analysis": "Brief analysis of word order, structure, and semantic correspondence between {src_language} and {target_language} subtitles",
260+
"analysis": "Brief analysis of word order, structure, and semantic correspondence between two subtitles",
253261
"align": [
254262
{align_parts_json}
255263
]
256264
}}
257265
'''
258-
259-
align_parts_json = ','.join(
260-
f'''
261-
{{
262-
"src_part_{i+1}": "{src_splits[i]}",
263-
"target_part_{i+1}": "Corresponding aligned {TARGET_LANGUAGE} subtitle part"
264-
}}''' for i in range(num_parts)
265-
)
266-
267-
return align_prompt.format(
268-
src_language=src_language,
269-
target_language=TARGET_LANGUAGE,
270-
src_sub=src_sub,
271-
tr_sub=tr_sub,
272-
src_part=src_part,
273-
align_parts_json=align_parts_json,
274-
)
266+
return align_prompt
275267

276268
## ================================================================
277269
# @ step8_gen_audio_task.py @ step10_gen_audio.py

core/utils/ask_gpt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def ask_gpt(prompt, resp_type=None, valid_def=None, log_title="default"):
5858
elif 'v1' not in base_url:
5959
base_url = base_url.strip('/') + '/v1'
6060
client = OpenAI(api_key=load_key("api.key"), base_url=base_url)
61-
response_format = {"type": "json_object"} if resp_type == "json" and model in load_key("llm_support_json") else None
61+
response_format = {"type": "json_object"} if resp_type == "json" and load_key("api.llm_support_json") else None
6262

6363
messages = [{"role": "user", "content": prompt}]
6464

0 commit comments

Comments
 (0)