Skip to content

Commit 18198d4

Browse files
authored
Merge branch 'main' into fix/callable-evaluator-cross-process
2 parents 79b72b0 + 80945ed commit 18198d4

3 files changed

Lines changed: 168 additions & 70 deletions

File tree

openevolve/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Version information for openevolve package."""
22

3-
__version__ = "0.2.26"
3+
__version__ = "0.2.27"

openevolve/api.py

Lines changed: 97 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,13 @@ async def _run_evolution_async(
142142
# Process evaluator
143143
evaluator_path = _prepare_evaluator(evaluator, temp_dir, temp_files)
144144

145+
# Auto-disable cascade evaluation if the evaluator doesn't define stage functions
146+
if config_obj.evaluator.cascade_evaluation:
147+
with open(evaluator_path, "r") as f:
148+
eval_content = f.read()
149+
if "evaluate_stage1" not in eval_content:
150+
config_obj.evaluator.cascade_evaluation = False
151+
145152
# Create and run controller
146153
controller = OpenEvolve(
147154
initial_program_path=program_path,
@@ -349,57 +356,67 @@ def initial_sort(arr):
349356
lines.insert(func_end + 1, " " * (indent + 4) + "# EVOLVE-BLOCK-END")
350357
func_source = "\n".join(lines)
351358

352-
# Create evaluator that tests the function
353-
def evaluator(program_path):
354-
import importlib.util
355-
import sys
359+
# Create a self-contained evaluator as a code string so it works in subprocesses.
360+
# Closure-based evaluators fail with process-based parallelism because subprocess
361+
# workers cannot access the parent process's memory.
362+
evaluator_code = f"""
363+
import importlib.util
364+
import copy
356365
357-
# Load the evolved program
358-
spec = importlib.util.spec_from_file_location("evolved", program_path)
359-
if spec is None or spec.loader is None:
360-
return {"score": 0.0, "error": "Failed to load program"}
366+
FUNC_NAME = {func_name!r}
367+
TEST_CASES = {test_cases!r}
361368
362-
module = importlib.util.module_from_spec(spec)
369+
def evaluate(program_path):
370+
'''Auto-generated evaluator for evolve_function'''
371+
# Load the evolved program
372+
spec = importlib.util.spec_from_file_location("evolved", program_path)
373+
if spec is None or spec.loader is None:
374+
return {{"combined_score": 0.0, "score": 0.0, "error": "Failed to load program"}}
363375
364-
try:
365-
spec.loader.exec_module(module)
366-
except Exception as e:
367-
return {"score": 0.0, "error": f"Failed to execute program: {str(e)}"}
376+
module = importlib.util.module_from_spec(spec)
377+
378+
try:
379+
spec.loader.exec_module(module)
380+
except Exception as e:
381+
return {{"combined_score": 0.0, "score": 0.0, "error": f"Failed to execute program: {{str(e)}}"}}
368382
369-
if not hasattr(module, func_name):
370-
return {"score": 0.0, "error": f"Function '{func_name}' not found"}
383+
if not hasattr(module, FUNC_NAME):
384+
return {{"combined_score": 0.0, "score": 0.0, "error": f"Function '{{FUNC_NAME}}' not found"}}
371385
372-
evolved_func = getattr(module, func_name)
373-
correct = 0
374-
total = len(test_cases)
375-
errors = []
386+
evolved_func = getattr(module, FUNC_NAME)
387+
correct = 0
388+
total = len(TEST_CASES)
389+
errors = []
376390
377-
for input_val, expected in test_cases:
378-
try:
379-
# Handle case where input is a list/mutable - make a copy
380-
if isinstance(input_val, list):
381-
test_input = input_val.copy()
382-
else:
383-
test_input = input_val
384-
385-
result = evolved_func(test_input)
386-
if result == expected:
387-
correct += 1
388-
else:
389-
errors.append(f"Input {input_val}: expected {expected}, got {result}")
390-
except Exception as e:
391-
errors.append(f"Input {input_val}: {str(e)}")
392-
393-
return {
394-
"score": correct / total,
395-
"test_pass_rate": correct / total,
396-
"tests_passed": correct,
397-
"total_tests": total,
398-
"errors": errors[:3], # Limit error details
399-
}
391+
for input_val, expected in TEST_CASES:
392+
try:
393+
# Handle case where input is a list/mutable - make a copy
394+
if isinstance(input_val, list):
395+
test_input = input_val.copy()
396+
else:
397+
test_input = input_val
398+
399+
result = evolved_func(test_input)
400+
if result == expected:
401+
correct += 1
402+
else:
403+
errors.append(f"Input {{input_val}}: expected {{expected}}, got {{result}}")
404+
except Exception as e:
405+
errors.append(f"Input {{input_val}}: {{str(e)}}")
406+
407+
score = correct / total if total > 0 else 0.0
408+
return {{
409+
"combined_score": score,
410+
"score": score,
411+
"test_pass_rate": score,
412+
"tests_passed": correct,
413+
"total_tests": total,
414+
"errors": errors[:3],
415+
}}
416+
"""
400417

401418
return run_evolution(
402-
initial_program=func_source, evaluator=evaluator, iterations=iterations, **kwargs
419+
initial_program=func_source, evaluator=evaluator_code, iterations=iterations, **kwargs
403420
)
404421

405422

@@ -461,36 +478,51 @@ def benchmark_sort(instance):
461478
lines.append(" " * (indent + 4) + "# EVOLVE-BLOCK-END")
462479
class_source = "\n".join(lines)
463480

464-
# Create evaluator
465-
def evaluator(program_path):
466-
import importlib.util
481+
# Create a self-contained evaluator as a code string so it works in subprocesses.
482+
import textwrap
467483

468-
# Load the evolved program
469-
spec = importlib.util.spec_from_file_location("evolved", program_path)
470-
if spec is None or spec.loader is None:
471-
return {"score": 0.0, "error": "Failed to load program"}
484+
class_name = algorithm_class.__name__
485+
benchmark_source = textwrap.dedent(inspect.getsource(benchmark))
472486

473-
module = importlib.util.module_from_spec(spec)
487+
evaluator_code = f"""
488+
import importlib.util
474489
475-
try:
476-
spec.loader.exec_module(module)
477-
except Exception as e:
478-
return {"score": 0.0, "error": f"Failed to execute program: {str(e)}"}
490+
CLASS_NAME = {class_name!r}
491+
492+
{benchmark_source}
479493
480-
if not hasattr(module, algorithm_class.__name__):
481-
return {"score": 0.0, "error": f"Class '{algorithm_class.__name__}' not found"}
494+
def evaluate(program_path):
495+
'''Auto-generated evaluator for evolve_algorithm'''
496+
spec = importlib.util.spec_from_file_location("evolved", program_path)
497+
if spec is None or spec.loader is None:
498+
return {{"combined_score": 0.0, "score": 0.0, "error": "Failed to load program"}}
482499
483-
AlgorithmClass = getattr(module, algorithm_class.__name__)
500+
module = importlib.util.module_from_spec(spec)
484501
485-
try:
486-
instance = AlgorithmClass()
487-
metrics = benchmark(instance)
488-
return metrics if isinstance(metrics, dict) else {"score": metrics}
489-
except Exception as e:
490-
return {"score": 0.0, "error": str(e)}
502+
try:
503+
spec.loader.exec_module(module)
504+
except Exception as e:
505+
return {{"combined_score": 0.0, "score": 0.0, "error": f"Failed to execute program: {{str(e)}}"}}
506+
507+
if not hasattr(module, CLASS_NAME):
508+
return {{"combined_score": 0.0, "score": 0.0, "error": f"Class '{{CLASS_NAME}}' not found"}}
509+
510+
AlgorithmClass = getattr(module, CLASS_NAME)
511+
512+
try:
513+
instance = AlgorithmClass()
514+
metrics = {benchmark.__name__}(instance)
515+
if not isinstance(metrics, dict):
516+
metrics = {{"score": metrics}}
517+
if "combined_score" not in metrics:
518+
metrics["combined_score"] = metrics.get("score", 0.0)
519+
return metrics
520+
except Exception as e:
521+
return {{"combined_score": 0.0, "score": 0.0, "error": str(e)}}
522+
"""
491523

492524
return run_evolution(
493-
initial_program=class_source, evaluator=evaluator, iterations=iterations, **kwargs
525+
initial_program=class_source, evaluator=evaluator_code, iterations=iterations, **kwargs
494526
)
495527

496528

tests/test_api.py

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,12 @@ def initial_sort(arr):
159159
if arr[j] > arr[j+1]:
160160
arr[j], arr[j+1] = arr[j+1], arr[j]
161161
return arr
162-
162+
163163
test_cases = [
164164
([3, 1, 2], [1, 2, 3]),
165165
([5, 2], [2, 5]),
166166
]
167-
167+
168168
# Mock the async controller to avoid actual evolution
169169
with unittest.mock.patch('openevolve.api._run_evolution_async') as mock_async:
170170
mock_async.return_value = EvolutionResult(
@@ -174,12 +174,78 @@ def initial_sort(arr):
174174
metrics={"score": 1.0, "test_pass_rate": 1.0},
175175
output_dir=None
176176
)
177-
177+
178178
result = evolve_function(initial_sort, test_cases, iterations=1)
179-
179+
180180
self.assertIsInstance(result, EvolutionResult)
181181
self.assertEqual(result.best_score, 1.0)
182182
mock_async.assert_called_once()
183+
184+
def test_evolve_function_evaluator_works_in_subprocess(self):
185+
"""Test that evolve_function generates an evaluator that works in a subprocess.
186+
187+
This is a regression test for the bug where callable evaluators stored in
188+
globals() could not be accessed by process-based worker subprocesses.
189+
"""
190+
import subprocess
191+
import sys
192+
193+
def bubble_sort(arr):
194+
for i in range(len(arr)):
195+
for j in range(len(arr) - 1):
196+
if arr[j] > arr[j + 1]:
197+
arr[j], arr[j + 1] = arr[j + 1], arr[j]
198+
return arr
199+
200+
test_cases = [([3, 1, 2], [1, 2, 3]), ([5, 2, 8], [2, 5, 8])]
201+
202+
# Call evolve_function but intercept the evaluator code it generates
203+
# by capturing what gets passed to run_evolution
204+
with unittest.mock.patch('openevolve.api.run_evolution') as mock_run:
205+
mock_run.return_value = EvolutionResult(
206+
best_program=None, best_score=1.0,
207+
best_code="", metrics={}, output_dir=None
208+
)
209+
evolve_function(bubble_sort, test_cases, iterations=1)
210+
211+
# Extract the evaluator code string passed to run_evolution
212+
call_kwargs = mock_run.call_args
213+
evaluator_code = call_kwargs.kwargs.get('evaluator') or call_kwargs[1].get('evaluator')
214+
215+
self.assertIsInstance(evaluator_code, str, "evolve_function should pass evaluator as code string")
216+
self.assertIn("def evaluate(program_path)", evaluator_code)
217+
self.assertIn("combined_score", evaluator_code)
218+
219+
# Write the evaluator to a file
220+
eval_file = os.path.join(self.temp_dir, "eval_test.py")
221+
with open(eval_file, 'w') as f:
222+
f.write(evaluator_code)
223+
224+
# Write a correct program for the evaluator to test
225+
program_file = os.path.join(self.temp_dir, "program.py")
226+
with open(program_file, 'w') as f:
227+
f.write("def bubble_sort(arr):\n return sorted(arr)\n")
228+
229+
# Run in a subprocess to verify it works across process boundaries
230+
test_script = os.path.join(self.temp_dir, "run_eval.py")
231+
with open(test_script, 'w') as f:
232+
f.write(f"""
233+
import importlib.util
234+
spec = importlib.util.spec_from_file_location("evaluator", {eval_file!r})
235+
mod = importlib.util.module_from_spec(spec)
236+
spec.loader.exec_module(mod)
237+
result = mod.evaluate({program_file!r})
238+
assert result["combined_score"] == 1.0, f"Expected 1.0, got {{result['combined_score']}}"
239+
assert result["tests_passed"] == 2, f"Expected 2, got {{result['tests_passed']}}"
240+
print("OK")
241+
""")
242+
243+
proc = subprocess.run(
244+
[sys.executable, test_script],
245+
capture_output=True, text=True, timeout=10
246+
)
247+
self.assertEqual(proc.returncode, 0, f"Subprocess failed: {proc.stderr}")
248+
self.assertIn("OK", proc.stdout)
183249

184250
def test_evolve_algorithm_basic(self):
185251
"""Test evolve_algorithm with simple class"""

0 commit comments

Comments
 (0)