@@ -30,9 +30,9 @@ async def detect_language(self, text: str) -> Tuple[str, float]:
3030 except UnicodeEncodeError :
3131 return "unknown" , 0.4
3232
33- async def translate (self , text : str , target_language : str ) -> Tuple [str , float ]:
33+ async def translate (self , text : str , _target_language : str ) -> Tuple [str , float ]:
3434 # No translation performed
35- return text , 1 .0
35+ return text , 0 .0
3636
3737
3838try :
@@ -58,22 +58,42 @@ async def detect_language(self, text: str) -> Tuple[str, float]:
5858 import openai
5959
6060 class OpenAIProvider :
61- def __init__ (self ):
61+ def __init__ (self , model : Optional [str ] = None , timeout : float = 30.0 ):
62+ # Prefer modern client; fall back to legacy globals.
63+ self .model = model or os .getenv ("OPENAI_TRANSLATE_MODEL" , "gpt-4o-mini" )
64+ self .timeout = float (os .getenv ("OPENAI_TIMEOUT" , timeout ))
6265 key = os .getenv ("OPENAI_API_KEY" )
63- if key :
66+ # If the new client exists, use it; otherwise fall back to global api_key
67+ self ._client = getattr (openai , "OpenAI" , None )
68+ if self ._client :
69+ # instantiate client with key if provided
70+ self ._client = self ._client (api_key = key ) if key else self ._client ()
71+ elif key and hasattr (openai , "api_key" ):
6472 openai .api_key = key
6573
6674 async def detect_language (self , text : str ) -> Tuple [str , float ]:
6775 try :
68- resp = await asyncio .to_thread (
69- openai .ChatCompletion .create ,
70- model = "gpt-3.5-turbo" ,
71- messages = [
72- {"role" : "system" , "content" : "You are a language detection assistant." },
73- {"role" : "user" , "content" : f"What language is this? Reply with 'lang: <code>' and 'confidence: <0-1>'\n Text:\n { text [:1000 ]} " },
74- ],
75- max_tokens = 20 ,
76- )
76+ if self ._client :
77+ resp = await asyncio .to_thread (
78+ self ._client .chat .completions .create ,
79+ model = self .model ,
80+ messages = [
81+ {"role" : "system" , "content" : "You are a language detection assistant." },
82+ {"role" : "user" , "content" : f"What language is this? Reply with 'lang: <code>' and 'confidence: <0-1>'\n Text:\n { text [:1000 ]} " },
83+ ],
84+ timeout = self .timeout ,
85+ )
86+ else :
87+ resp = await asyncio .to_thread (
88+ openai .ChatCompletion .create ,
89+ model = self .model ,
90+ messages = [
91+ {"role" : "system" , "content" : "You are a language detection assistant." },
92+ {"role" : "user" , "content" : f"What language is this? Reply with 'lang: <code>' and 'confidence: <0-1>'\n Text:\n { text [:1000 ]} " },
93+ ],
94+ max_tokens = 20 ,
95+ request_timeout = self .timeout ,
96+ )
7797 out = resp .choices [0 ].message .content or ""
7898 # naive parse
7999 lang = "unknown"
@@ -92,15 +112,27 @@ async def detect_language(self, text: str) -> Tuple[str, float]:
92112
93113 async def translate (self , text : str , target_language : str ) -> Tuple [str , float ]:
94114 try :
95- resp = await asyncio .to_thread (
96- openai .ChatCompletion .create ,
97- model = "gpt-3.5-turbo" ,
98- messages = [
99- {"role" : "system" , "content" : "You are a helpful translator. Translate the user text to the target language exactly and nothing else." },
100- {"role" : "user" , "content" : f"Translate to { target_language } :\n \n { text [:3000 ]} " },
101- ],
102- max_tokens = 1000 ,
103- )
115+ if self ._client :
116+ resp = await asyncio .to_thread (
117+ self ._client .chat .completions .create ,
118+ model = self .model ,
119+ messages = [
120+ {"role" : "system" , "content" : "You are a helpful translator. Translate the user text to the target language exactly and nothing else." },
121+ {"role" : "user" , "content" : f"Translate to { target_language } :\n \n { text [:3000 ]} " },
122+ ],
123+ timeout = self .timeout ,
124+ )
125+ else :
126+ resp = await asyncio .to_thread (
127+ openai .ChatCompletion .create ,
128+ model = self .model ,
129+ messages = [
130+ {"role" : "system" , "content" : "You are a helpful translator. Translate the user text to the target language exactly and nothing else." },
131+ {"role" : "user" , "content" : f"Translate to { target_language } :\n \n { text [:3000 ]} " },
132+ ],
133+ max_tokens = 1000 ,
134+ request_timeout = self .timeout ,
135+ )
104136 translated = (resp .choices [0 ].message .content or "" ).strip ()
105137 return translated , 0.9
106138 except Exception :
@@ -112,12 +144,12 @@ async def translate(self, text: str, target_language: str) -> Tuple[str, float]:
112144
113145def _get_provider (name : str ) -> TranslationProvider :
114146 """Get translation provider by name."""
147+ name = (name or "noop" ).lower ()
115148 if name == "openai" and OpenAIProvider is not None :
116149 return OpenAIProvider ()
117- elif name == "langdetect" :
150+ if name == "langdetect" :
118151 return LangDetectProvider ()
119- else :
120- return NoOpProvider ()
152+ return NoOpProvider ()
121153
122154
123155async def translate_content (
@@ -148,7 +180,7 @@ async def translate_content(
148180 try :
149181 lang , conf = await provider .detect_language (text )
150182 except Exception :
151- logger .exception ("language detection failed" )
183+ logger .exception ("language detection failed for content_id=%s" , content_id )
152184 lang , conf = "unknown" , 0.0
153185
154186 requires_translation = (lang != target_language ) and (conf >= confidence_threshold )
@@ -162,7 +194,7 @@ async def translate_content(
162194
163195 # attach language metadata to chunk.metadata
164196 chunk .metadata = getattr (chunk , "metadata" , {}) or {}
165- chunk .metadata ["language" ] = lang_meta
197+ chunk .metadata ["language" ] = lang_meta . model_dump ()
166198
167199 # perform translation when necessary
168200 if requires_translation :
@@ -181,10 +213,15 @@ async def translate_content(
181213 translation_provider = translation_provider ,
182214 confidence_score = t_conf ,
183215 )
184- chunk .metadata ["translation" ] = trans
185-
186- # Use translated content for subsequent tasks
187- chunk .text = translated_text
216+ if translated_text != text :
217+ chunk .metadata ["translation" ] = trans .model_dump ()
218+ # Use translated content for subsequent tasks
219+ chunk .text = translated_text
220+ else :
221+ logger .info (
222+ "Skipping translation metadata; provider returned unchanged text for content_id=%s" ,
223+ content_id ,
224+ )
188225
189226 enhanced .append (chunk )
190227
0 commit comments