1+ import os
2+ import shutil
3+ import subprocess
4+ import tarfile
5+ import zipfile
6+ from pathlib import Path
7+ import sys
8+
9+ # --- Configuration ---
10+ # Fix 1: Define the path to your main analyzer script.
11+ # This assumes the benchmark script is in the same directory as main.py
12+ MAIN_PY = Path ("main.py" )
13+
14+ BENCHMARK_DIR = Path ("benchmark_suite" )
15+ HALTING_DIR = BENCHMARK_DIR / "halting"
16+ NON_HALTING_DIR = BENCHMARK_DIR / "non-halting"
17+ COMPLEX_DIR = BENCHMARK_DIR / "complex"
18+
19+ STDLIB_DEST = HALTING_DIR / "stdlib"
20+ PYPI_DEST = HALTING_DIR / "pypi_sources"
21+ SYNTHETIC_DEST = NON_HALTING_DIR / "synthetic"
22+ PARADOXES_DEST = NON_HALTING_DIR / "paradoxes"
23+
24+ # List of PyPI packages to download
25+ PYPI_PACKAGES = [
26+ "requests" , "numpy" , "pandas" , "flask" , "django" ,
27+ "scikit-learn" , "matplotlib" , "beautifulsoup4" , "sqlalchemy" , "celery"
28+ ]
29+
30+ # Number of synthetic non-halting files to generate
31+ NUM_SYNTHETIC = 50
32+
33+ # Patterns for non-halting code
34+ NON_HALTING_PATTERNS = [
35+ ("while_true" , "while True:\n pass" ),
36+ ("unbounded_inc" , "x = 0\n while x >= 0:\n x += 1" ),
37+ ("unbounded_dec" , "x = 0\n while x <= 0:\n x -= 1" ),
38+ ("simple_recursion" , "def f():\n f()\n f()" ),
39+ ("mutual_recursion" , "def f():\n g()\n def g():\n f()\n f()" ),
40+ ]
41+
42+ # Path to your project's scripts directory for paradoxes (adjust as needed)
43+ PROJECT_SCRIPTS_DIR = Path ("scripts" )
44+
45+ # --- Helper Functions ---
46+ def create_directory (path : Path ):
47+ path .mkdir (parents = True , exist_ok = True )
48+ print (f"Created/Ensured directory: { path } " )
49+
50+ def collect_stdlib ():
51+ """Copies all .py files from the standard library."""
52+ create_directory (STDLIB_DEST )
53+ stdlib_path = Path (shutil .__file__ ).parent
54+ print (f"Found standard library at: { stdlib_path } " )
55+
56+ file_count = 0
57+ for root , _ , files in os .walk (stdlib_path ):
58+ for file in files :
59+ if file .endswith (".py" ):
60+ source = Path (root ) / file
61+ try :
62+ shutil .copy (source , STDLIB_DEST )
63+ file_count += 1
64+ except Exception as e :
65+ print (f"Could not copy { source } : { e } " )
66+
67+ print (f"Successfully copied { file_count } stdlib files." )
68+
69+ def download_and_unpack_pypi ():
70+ """Downloads PyPI packages and unpacks their .py files."""
71+ create_directory (PYPI_DEST )
72+
73+ # --- Step 1: Download packages using pip ---
74+ print ("Downloading PyPI packages..." )
75+ subprocess .run ([sys .executable , "-m" , "pip" , "download" , "--no-deps" , "--dest" , str (PYPI_DEST ), * PYPI_PACKAGES ], check = True , capture_output = True )
76+
77+ # --- Step 2: Unpack all archives first ---
78+ unpacked_dir = PYPI_DEST / "unpacked"
79+ create_directory (unpacked_dir )
80+
81+ print ("Unpacking archives..." )
82+ archives_to_delete = []
83+ for archive in PYPI_DEST .iterdir ():
84+ # Only process archive files, ignore directories
85+ if archive .is_file ():
86+ try :
87+ if archive .suffix in (".tar.gz" , ".tgz" ):
88+ with tarfile .open (archive , "r:gz" ) as tar :
89+ tar .extractall (path = unpacked_dir )
90+ archives_to_delete .append (archive )
91+ elif archive .suffix == ".whl" :
92+ with zipfile .ZipFile (archive , "r" ) as zip_ref :
93+ zip_ref .extractall (path = unpacked_dir )
94+ archives_to_delete .append (archive )
95+ except (tarfile .ReadError , zipfile .BadZipFile , EOFError ) as e :
96+ print (f"Warning: Could not unpack { archive .name } : { e } . Skipping." )
97+
98+ # --- Step 3: Collect all .py files from the unpacked directory ---
99+ print ("Collecting .py files..." )
100+ file_count = 0
101+ for root , _ , files in os .walk (unpacked_dir ):
102+ for file in files :
103+ if file .endswith (".py" ):
104+ source = Path (root ) / file
105+ # Use a unique name to prevent overwriting files with the same name from different packages
106+ unique_name = f"{ source .parent .name } _{ source .name } "
107+ dest = PYPI_DEST / unique_name
108+ try :
109+ if not dest .exists ():
110+ shutil .copy (source , dest )
111+ file_count += 1
112+ except Exception as e :
113+ print (f"Could not copy { source } : { e } " )
114+
115+ # --- Step 4: Clean up archives and temporary directory *after* all operations are done ---
116+ print ("Cleaning up temporary files..." )
117+ for archive in archives_to_delete :
118+ try :
119+ archive .unlink ()
120+ except PermissionError as e :
121+ print (f"Warning: Could not delete archive { archive .name } immediately: { e } " )
122+
123+ try :
124+ shutil .rmtree (unpacked_dir )
125+ except PermissionError as e :
126+ print (f"Warning: Could not delete temporary directory { unpacked_dir } immediately: { e } " )
127+
128+ print (f"Successfully unpacked and collected { file_count } PyPI .py files." )
129+
130+
131+ def generate_synthetic_non_halting ():
132+ """Generates synthetic non-halting Python scripts."""
133+ create_directory (SYNTHETIC_DEST )
134+ file_count = 0
135+ for i in range (NUM_SYNTHETIC ):
136+ pattern_name , code = NON_HALTING_PATTERNS [i % len (NON_HALTING_PATTERNS )]
137+ file_path = SYNTHETIC_DEST / f"{ pattern_name } _{ i } .py"
138+ with open (file_path , 'w' ) as f : f .write (code )
139+ file_count += 1
140+ print (f"Successfully generated { file_count } synthetic non-halting files." )
141+
142+ def copy_paradoxes_and_classify ():
143+ """Copies user's scripts, classifying halting/non-halting/complex."""
144+ create_directory (HALTING_DIR )
145+ create_directory (PARADOXES_DEST )
146+ create_directory (COMPLEX_DIR )
147+
148+ if not PROJECT_SCRIPTS_DIR .exists ():
149+ print (f"Warning: Scripts directory { PROJECT_SCRIPTS_DIR } not found. Skipping copy." )
150+ return
151+
152+ # These lists now only contain the base names
153+ halting_scripts = ["bounded_loop.py" , "dynamic_input.py" , "halting.py" , "self_referential.py" , "simple_halting.py" ]
154+ non_halting_scripts = ["complex_non_halting.py" , "final_paradox.py" , "mutating_paradox_A.py" , "mutating_paradox_A_revised.py" , "mutating_paradox_final_test.py" , "non_halting.py" , "obfuscated_paradox.py" , "paradox.py" , "polymorphic_termination_paradox.py" , "semantic_paradox_A.py" , "truly_obfuscated_paradox.py" ]
155+ complex_scripts = ["ackermann.py" , "collatz_conjecture.py" ]
156+
157+ file_count = 0
158+ for file in PROJECT_SCRIPTS_DIR .iterdir ():
159+ if file .suffix == ".py" :
160+ dest_dir = None
161+ if file .name in halting_scripts :
162+ dest_dir = HALTING_DIR
163+ elif file .name in complex_scripts :
164+ dest_dir = COMPLEX_DIR
165+ elif file .name in non_halting_scripts :
166+ dest_dir = PARADOXES_DEST
167+
168+ if dest_dir :
169+ shutil .copy (file , dest_dir / file .name )
170+ file_count += 1
171+
172+ print (f"Successfully copied and classified { file_count } user scripts." )
173+
174+ # Fix 2: Create the placeholder function for clarity
175+ def setup_complex ():
176+ """Placeholder for manual complex files."""
177+ create_directory (COMPLEX_DIR )
178+ print ("Complex directory created. If you have curated complex cases (e.g., a Turing machine), add them manually." )
179+
180+ def run_halting_analyzer ():
181+ """Runs the main.py on the benchmark suite and collects results."""
182+ if not MAIN_PY .exists ():
183+ print (f"Error: main.py not found at { MAIN_PY } . Ensure this script is in the project root." )
184+ return {}
185+
186+ # Modify the main.py content in memory to point to the correct directory
187+ with open (MAIN_PY , 'r' ) as f :
188+ main_code = f .read ()
189+
190+ # This replacement is fragile but works for this specific main.py
191+ modified_code = main_code .replace (
192+ "scripts_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'scripts')" ,
193+ f"scripts_dir = r'{ BENCHMARK_DIR .resolve ()} '"
194+ )
195+
196+ temp_main_path = BENCHMARK_DIR / "temp_main.py"
197+ with open (temp_main_path , 'w' ) as f :
198+ f .write (modified_code )
199+
200+ print ("Running analyzer on the entire corpus... this may take several minutes." )
201+ try :
202+ # Run the modified main script
203+ process = subprocess .run ([sys .executable , str (temp_main_path )], capture_output = True , text = True , timeout = 600 ) # 10-min timeout
204+
205+ results = {}
206+ current_script = None
207+ output = process .stdout
208+
209+ for line in output .splitlines ():
210+ if line .startswith ("[Analyzing]: " ):
211+ # Extract just the filename from the path
212+ full_path = line .split ("[Analyzing]: " )[1 ].strip ()
213+ current_script = os .path .basename (full_path )
214+ elif line .startswith ("Result: " ) and current_script :
215+ result = line .split ("Result: " )[1 ].strip ()
216+ results [current_script ] = result
217+ current_script = None
218+
219+ if process .stderr :
220+ print ("\n --- Analyzer Errors (stderr) ---" )
221+ print (process .stderr )
222+ print ("------------------------------\n " )
223+
224+ return results
225+ except subprocess .TimeoutExpired :
226+ print ("ERROR: The benchmark run timed out. The corpus may be too large or a script caused a severe hang." )
227+ return {}
228+ except Exception as e :
229+ print (f"An unexpected error occurred while running the analyzer: { e } " )
230+ return {}
231+ finally :
232+ if temp_main_path .exists ():
233+ temp_main_path .unlink ()
234+
235+ def calculate_percentage (results ):
236+ """Calculates success percentage based on expected behaviors."""
237+ total = 0
238+ success = 0
239+
240+ print ("\n --- Verifying Benchmark Results ---" )
241+ for category_dir , expected in [(HALTING_DIR , "halts" ), (NON_HALTING_DIR , "does not halt" ), (COMPLEX_DIR , "impossible to determine" )]:
242+ for root , _ , files in os .walk (category_dir ):
243+ for file in files :
244+ if file .endswith (".py" ):
245+ total += 1
246+ analyzer_result = results .get (file , "error (not found in output)" )
247+
248+ is_success = False
249+ if category_dir .name == "halting" :
250+ if analyzer_result == expected :
251+ is_success = True
252+ elif category_dir .name == "non-halting" :
253+ # Success if it correctly says "does not halt" OR safely defers
254+ if analyzer_result in ["does not halt" , "impossible to determine" ]:
255+ is_success = True
256+ elif category_dir .name == "complex" :
257+ # Success if it safely defers OR correctly proves it halts (like Ackermann in theory)
258+ if analyzer_result in ["impossible to determine" , "halts" , "does not halt" ]: # 'does not halt' is safe for Ackermann
259+ is_success = True
260+
261+ if is_success :
262+ success += 1
263+ else :
264+ print (f"MISMATCH in { category_dir .name } : { file } -> Expected '{ expected } ', Got '{ analyzer_result } '" )
265+
266+ if total > 0 :
267+ percentage = (success / total ) * 100
268+ print (f"\n --- Practical Success Rate: { percentage :.2f} % ({ success } /{ total } ) ---" )
269+ else :
270+ print ("No files were analyzed." )
271+
272+ # --- Main Execution ---
273+ if __name__ == "__main__" :
274+ create_directory (BENCHMARK_DIR )
275+
276+ print ("--- Phase 1: Collecting Corpus ---" )
277+ collect_stdlib ()
278+ download_and_unpack_pypi ()
279+ generate_synthetic_non_halting ()
280+ copy_paradoxes_and_classify ()
281+ setup_complex ()
282+
283+ print ("\n --- Phase 2: Running Halting Analyzer ---" )
284+ analysis_results = run_halting_analyzer ()
285+
286+ print ("\n --- Phase 3: Calculating Final Score ---" )
287+ calculate_percentage (analysis_results )
288+
289+ print ("\n --- Automation Complete ---" )
0 commit comments