|
11 | 11 | from concurrent.futures import ( |
12 | 12 | FIRST_COMPLETED, |
13 | 13 | ProcessPoolExecutor, |
| 14 | + ThreadPoolExecutor, |
14 | 15 | wait, |
15 | 16 | ) |
16 | 17 | from dataclasses import dataclass |
@@ -312,15 +313,13 @@ def _load_tasks_serial(self, kinds, kind_graph, parameters): |
312 | 313 |
|
313 | 314 | return all_tasks |
314 | 315 |
|
315 | | - def _load_tasks_parallel(self, kinds, kind_graph, parameters): |
| 316 | + def _load_tasks_parallel(self, kinds, kind_graph, parameters, executor): |
316 | 317 | all_tasks = {} |
317 | 318 | futures_to_kind = {} |
318 | 319 | futures = set() |
319 | 320 | edges = set(kind_graph.edges) |
320 | 321 |
|
321 | | - with ProcessPoolExecutor( |
322 | | - mp_context=multiprocessing.get_context("fork") |
323 | | - ) as executor: |
| 322 | + with executor: |
324 | 323 |
|
325 | 324 | def submit_ready_kinds(): |
326 | 325 | """Create the next batch of tasks for kinds without dependencies.""" |
@@ -434,29 +433,50 @@ def _run(self): |
434 | 433 | yield "kind_graph", kind_graph |
435 | 434 |
|
436 | 435 | logger.info("Generating full task set") |
437 | | - # The short version of the below is: we only support parallel kind |
438 | | - # processing on Linux. |
| 436 | + |
| 437 | + # The next block deals with enabling parallel kind processing, which |
| 438 | + # currently has different support on different platforms. In summary: |
| 439 | + # * Parallel kind processing is supported and enabled by default on |
| 440 | + # Linux. We use multiple processes by default, but experimental |
| 441 | + # support for multiple threads can be enabled instead. |
| 442 | + # * On other platforms, we have experimental support for parallel |
| 443 | + # kind processing with multiple threads. |
439 | 444 | # |
440 | | - # Current parallel generation relies on multiprocessing, and more |
441 | | - # specifically: the "fork" multiprocessing method. This is not supported |
442 | | - # at all on Windows (it uses "spawn"). Forking is supported on macOS, |
443 | | - # but no longer works reliably in all cases, and our usage of it here |
444 | | - # causes crashes. See https://github.com/python/cpython/issues/77906 |
445 | | - # and http://sealiesoftware.com/blog/archive/2017/6/5/Objective-C_and_fork_in_macOS_1013.html |
446 | | - # for more details on that. |
447 | | - # Other methods of multiprocessing (both "spawn" and "forkserver") |
448 | | - # do not work for our use case, because they cause global variables |
449 | | - # to be reinitialized, which are sometimes modified earlier in graph |
450 | | - # generation. These issues can theoretically be worked around by |
451 | | - # eliminating all reliance on globals as part of task generation, but |
452 | | - # is far from a small amount of work in users like Gecko/Firefox. |
453 | | - # In the long term, the better path forward is likely to be switching |
454 | | - # to threading with a free-threaded python to achieve similar parallel |
455 | | - # processing. |
456 | | - if platform.system() != "Linux" or os.environ.get("TASKGRAPH_SERIAL"): |
457 | | - all_tasks = self._load_tasks_serial(kinds, kind_graph, parameters) |
458 | | - else: |
459 | | - all_tasks = self._load_tasks_parallel(kinds, kind_graph, parameters) |
| 445 | + # On all platforms serial kind processing can be enabled by setting |
| 446 | + # TASKGRAPH_SERIAL in the environment. |
| 447 | + # |
| 448 | + # On all platforms, multiple threads can be enabled by setting |
| 449 | + # TASKGRAPH_USE_THREADS in the environment. Taskgraph must be running |
| 450 | + # from a free-threaded Python build to see any performance benefits. |
| 451 | + # |
| 452 | + # In the long term, the goal is turn enabled parallel kind processing for |
| 453 | + # all platforms by default using threads, and remove support for multiple |
| 454 | + # processes altogether. |
| 455 | + def load_tasks(): |
| 456 | + if platform.system() == "Linux": |
| 457 | + if os.environ.get("TASKGRAPH_SERIAL"): |
| 458 | + return self._load_tasks_serial(kinds, kind_graph, parameters) |
| 459 | + elif os.environ.get("TASKGRAPH_USE_THREADS"): |
| 460 | + executor = ThreadPoolExecutor(max_workers=os.process_cpu_count()) |
| 461 | + else: |
| 462 | + executor = ProcessPoolExecutor( |
| 463 | + mp_context=multiprocessing.get_context("fork") |
| 464 | + ) |
| 465 | + return self._load_tasks_parallel( |
| 466 | + kinds, kind_graph, parameters, executor |
| 467 | + ) |
| 468 | + else: |
| 469 | + if os.environ.get("TASKGRAPH_SERIAL") or not os.environ.get( |
| 470 | + "TASKGRAPH_USE_THREADS" |
| 471 | + ): |
| 472 | + return self._load_tasks_serial(kinds, kind_graph, parameters) |
| 473 | + else: |
| 474 | + executor = ThreadPoolExecutor(max_workers=os.process_cpu_count()) |
| 475 | + return self._load_tasks_parallel( |
| 476 | + kinds, kind_graph, parameters, executor |
| 477 | + ) |
| 478 | + |
| 479 | + all_tasks = load_tasks() |
460 | 480 |
|
461 | 481 | full_task_set = TaskGraph(all_tasks, Graph(frozenset(all_tasks), frozenset())) |
462 | 482 | yield self.verify("full_task_set", full_task_set, graph_config, parameters) |
|
0 commit comments