Merge branch 'main' into fix/callable-evaluator-cross-process

powerpratik · web-flow · commit 18198d459dc8 · 2026-04-15T16:02:45.000-04:00
diff --git a/openevolve/_version.py b/openevolve/_version.py
@@ -1,3 +1,3 @@
 """Version information for openevolve package."""
 
-__version__ = "0.2.26"
+__version__ = "0.2.27"
diff --git a/openevolve/api.py b/openevolve/api.py
@@ -142,6 +142,13 @@ async def _run_evolution_async(
         # Process evaluator
         evaluator_path = _prepare_evaluator(evaluator, temp_dir, temp_files)
 
+        # Auto-disable cascade evaluation if the evaluator doesn't define stage functions
+        if config_obj.evaluator.cascade_evaluation:
+            with open(evaluator_path, "r") as f:
+                eval_content = f.read()
+            if "evaluate_stage1" not in eval_content:
+                config_obj.evaluator.cascade_evaluation = False
+
         # Create and run controller
         controller = OpenEvolve(
             initial_program_path=program_path,
@@ -349,57 +356,67 @@ def initial_sort(arr):
         lines.insert(func_end + 1, " " * (indent + 4) + "# EVOLVE-BLOCK-END")
         func_source = "\n".join(lines)
 
-    # Create evaluator that tests the function
-    def evaluator(program_path):
-        import importlib.util
-        import sys
+    # Create a self-contained evaluator as a code string so it works in subprocesses.
+    # Closure-based evaluators fail with process-based parallelism because subprocess
+    # workers cannot access the parent process's memory.
+    evaluator_code = f"""
+import importlib.util
+import copy
 
-        # Load the evolved program
-        spec = importlib.util.spec_from_file_location("evolved", program_path)
-        if spec is None or spec.loader is None:
-            return {"score": 0.0, "error": "Failed to load program"}
+FUNC_NAME = {func_name!r}
+TEST_CASES = {test_cases!r}
 
-        module = importlib.util.module_from_spec(spec)
+def evaluate(program_path):
+    '''Auto-generated evaluator for evolve_function'''
+    # Load the evolved program
+    spec = importlib.util.spec_from_file_location("evolved", program_path)
+    if spec is None or spec.loader is None:
+        return {{"combined_score": 0.0, "score": 0.0, "error": "Failed to load program"}}
 
-        try:
-            spec.loader.exec_module(module)
-        except Exception as e:
-            return {"score": 0.0, "error": f"Failed to execute program: {str(e)}"}
+    module = importlib.util.module_from_spec(spec)
+
+    try:
+        spec.loader.exec_module(module)
+    except Exception as e:
+        return {{"combined_score": 0.0, "score": 0.0, "error": f"Failed to execute program: {{str(e)}}"}}
 
-        if not hasattr(module, func_name):
-            return {"score": 0.0, "error": f"Function '{func_name}' not found"}
+    if not hasattr(module, FUNC_NAME):
+        return {{"combined_score": 0.0, "score": 0.0, "error": f"Function '{{FUNC_NAME}}' not found"}}
 
-        evolved_func = getattr(module, func_name)
-        correct = 0
-        total = len(test_cases)
-        errors = []
+    evolved_func = getattr(module, FUNC_NAME)
+    correct = 0
+    total = len(TEST_CASES)
+    errors = []
 
-        for input_val, expected in test_cases:
-            try:
-                # Handle case where input is a list/mutable - make a copy
-                if isinstance(input_val, list):
-                    test_input = input_val.copy()
-                else:
-                    test_input = input_val
-
-                result = evolved_func(test_input)
-                if result == expected:
-                    correct += 1
-                else:
-                    errors.append(f"Input {input_val}: expected {expected}, got {result}")
-            except Exception as e:
-                errors.append(f"Input {input_val}: {str(e)}")
-
-        return {
-            "score": correct / total,
-            "test_pass_rate": correct / total,
-            "tests_passed": correct,
-            "total_tests": total,
-            "errors": errors[:3],  # Limit error details
-        }
+    for input_val, expected in TEST_CASES:
+        try:
+            # Handle case where input is a list/mutable - make a copy
+            if isinstance(input_val, list):
+                test_input = input_val.copy()
+            else:
+                test_input = input_val
+
+            result = evolved_func(test_input)
+            if result == expected:
+                correct += 1
+            else:
+                errors.append(f"Input {{input_val}}: expected {{expected}}, got {{result}}")
+        except Exception as e:
+            errors.append(f"Input {{input_val}}: {{str(e)}}")
+
+    score = correct / total if total > 0 else 0.0
+    return {{
+        "combined_score": score,
+        "score": score,
+        "test_pass_rate": score,
+        "tests_passed": correct,
+        "total_tests": total,
+        "errors": errors[:3],
+    }}
+"""
 
     return run_evolution(
-        initial_program=func_source, evaluator=evaluator, iterations=iterations, **kwargs
+        initial_program=func_source, evaluator=evaluator_code, iterations=iterations, **kwargs
     )
 
 
@@ -461,36 +478,51 @@ def benchmark_sort(instance):
         lines.append(" " * (indent + 4) + "# EVOLVE-BLOCK-END")
         class_source = "\n".join(lines)
 
-    # Create evaluator
-    def evaluator(program_path):
-        import importlib.util
+    # Create a self-contained evaluator as a code string so it works in subprocesses.
+    import textwrap
 
-        # Load the evolved program
-        spec = importlib.util.spec_from_file_location("evolved", program_path)
-        if spec is None or spec.loader is None:
-            return {"score": 0.0, "error": "Failed to load program"}
+    class_name = algorithm_class.__name__
+    benchmark_source = textwrap.dedent(inspect.getsource(benchmark))
 
-        module = importlib.util.module_from_spec(spec)
+    evaluator_code = f"""
+import importlib.util
 
-        try:
-            spec.loader.exec_module(module)
-        except Exception as e:
-            return {"score": 0.0, "error": f"Failed to execute program: {str(e)}"}
+CLASS_NAME = {class_name!r}
+
+{benchmark_source}
 
-        if not hasattr(module, algorithm_class.__name__):
-            return {"score": 0.0, "error": f"Class '{algorithm_class.__name__}' not found"}
+def evaluate(program_path):
+    '''Auto-generated evaluator for evolve_algorithm'''
+    spec = importlib.util.spec_from_file_location("evolved", program_path)
+    if spec is None or spec.loader is None:
+        return {{"combined_score": 0.0, "score": 0.0, "error": "Failed to load program"}}
 
-        AlgorithmClass = getattr(module, algorithm_class.__name__)
+    module = importlib.util.module_from_spec(spec)
 
-        try:
-            instance = AlgorithmClass()
-            metrics = benchmark(instance)
-            return metrics if isinstance(metrics, dict) else {"score": metrics}
-        except Exception as e:
-            return {"score": 0.0, "error": str(e)}
+    try:
+        spec.loader.exec_module(module)
+    except Exception as e:
+        return {{"combined_score": 0.0, "score": 0.0, "error": f"Failed to execute program: {{str(e)}}"}}
+
+    if not hasattr(module, CLASS_NAME):
+        return {{"combined_score": 0.0, "score": 0.0, "error": f"Class '{{CLASS_NAME}}' not found"}}
+
+    AlgorithmClass = getattr(module, CLASS_NAME)
+
+    try:
+        instance = AlgorithmClass()
+        metrics = {benchmark.__name__}(instance)
+        if not isinstance(metrics, dict):
+            metrics = {{"score": metrics}}
+        if "combined_score" not in metrics:
+            metrics["combined_score"] = metrics.get("score", 0.0)
+        return metrics
+    except Exception as e:
+        return {{"combined_score": 0.0, "score": 0.0, "error": str(e)}}
+"""
 
     return run_evolution(
-        initial_program=class_source, evaluator=evaluator, iterations=iterations, **kwargs
+        initial_program=class_source, evaluator=evaluator_code, iterations=iterations, **kwargs
     )
 
 
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -159,12 +159,12 @@ def initial_sort(arr):
                     if arr[j] > arr[j+1]:
                         arr[j], arr[j+1] = arr[j+1], arr[j]
             return arr
-        
+
         test_cases = [
             ([3, 1, 2], [1, 2, 3]),
             ([5, 2], [2, 5]),
         ]
-        
+
         # Mock the async controller to avoid actual evolution
         with unittest.mock.patch('openevolve.api._run_evolution_async') as mock_async:
             mock_async.return_value = EvolutionResult(
@@ -174,12 +174,78 @@ def initial_sort(arr):
                 metrics={"score": 1.0, "test_pass_rate": 1.0},
                 output_dir=None
             )
-            
+
             result = evolve_function(initial_sort, test_cases, iterations=1)
-            
+
             self.assertIsInstance(result, EvolutionResult)
             self.assertEqual(result.best_score, 1.0)
             mock_async.assert_called_once()
+
+    def test_evolve_function_evaluator_works_in_subprocess(self):
+        """Test that evolve_function generates an evaluator that works in a subprocess.
+
+        This is a regression test for the bug where callable evaluators stored in
+        globals() could not be accessed by process-based worker subprocesses.
+        """
+        import subprocess
+        import sys
+
+        def bubble_sort(arr):
+            for i in range(len(arr)):
+                for j in range(len(arr) - 1):
+                    if arr[j] > arr[j + 1]:
+                        arr[j], arr[j + 1] = arr[j + 1], arr[j]
+            return arr
+
+        test_cases = [([3, 1, 2], [1, 2, 3]), ([5, 2, 8], [2, 5, 8])]
+
+        # Call evolve_function but intercept the evaluator code it generates
+        # by capturing what gets passed to run_evolution
+        with unittest.mock.patch('openevolve.api.run_evolution') as mock_run:
+            mock_run.return_value = EvolutionResult(
+                best_program=None, best_score=1.0,
+                best_code="", metrics={}, output_dir=None
+            )
+            evolve_function(bubble_sort, test_cases, iterations=1)
+
+            # Extract the evaluator code string passed to run_evolution
+            call_kwargs = mock_run.call_args
+            evaluator_code = call_kwargs.kwargs.get('evaluator') or call_kwargs[1].get('evaluator')
+
+        self.assertIsInstance(evaluator_code, str, "evolve_function should pass evaluator as code string")
+        self.assertIn("def evaluate(program_path)", evaluator_code)
+        self.assertIn("combined_score", evaluator_code)
+
+        # Write the evaluator to a file
+        eval_file = os.path.join(self.temp_dir, "eval_test.py")
+        with open(eval_file, 'w') as f:
+            f.write(evaluator_code)
+
+        # Write a correct program for the evaluator to test
+        program_file = os.path.join(self.temp_dir, "program.py")
+        with open(program_file, 'w') as f:
+            f.write("def bubble_sort(arr):\n    return sorted(arr)\n")
+
+        # Run in a subprocess to verify it works across process boundaries
+        test_script = os.path.join(self.temp_dir, "run_eval.py")
+        with open(test_script, 'w') as f:
+            f.write(f"""
+import importlib.util
+spec = importlib.util.spec_from_file_location("evaluator", {eval_file!r})
+mod = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+result = mod.evaluate({program_file!r})
+assert result["combined_score"] == 1.0, f"Expected 1.0, got {{result['combined_score']}}"
+assert result["tests_passed"] == 2, f"Expected 2, got {{result['tests_passed']}}"
+print("OK")
+""")
+
+        proc = subprocess.run(
+            [sys.executable, test_script],
+            capture_output=True, text=True, timeout=10
+        )
+        self.assertEqual(proc.returncode, 0, f"Subprocess failed: {proc.stderr}")
+        self.assertIn("OK", proc.stdout)
     
     def test_evolve_algorithm_basic(self):
         """Test evolve_algorithm with simple class"""

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""Version information for openevolve package."""`
`2`	`2`
`3`		`-__version__ = "0.2.26"`
	`3`	`+__version__ = "0.2.27"`