SimHumanity/model1_foundation.slim at main · bhaller/SimHumanity · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// model1_foundation.slim
//
// By Ben Haller, 3 June 2025
// Messer Lab, Cornell University

// This simulates humans with full genomes.  This version simulates all
// mutations including neutral mutations, so it is pretty slow.

// This model requires SLiM 5.0 or later to run.  For further information,
// see the publication associated with this repository.


// This user-defined function sets up paths for our data repository
// NOTE: the base repository path needs to be configured for your setup here!
function (void)defineRepositoryPaths(void)
{
	// Set the working directory to our data repository
	defineConstant("REPO_PATH", "/path/to/SimHumanity/");
	if (!fileExists(REPO_PATH))
		stop("incorrect repository path");
	setwd(REPO_PATH);

	// This is the subpath to the nuclear chromosome data files
	defineConstant("CHR_PATH", "stdpopsim extraction/extracted/");
	if (!fileExists(CHR_PATH))
		stop("malformed repository; missing extracted data");

	// This is the subpath to the mtDNA data files
	defineConstant("MT_PATH", "mtDNA info/");
	if (!fileExists(MT_PATH))
		stop("malformed repository; missing mitochondrial data");
}

// This loads genetic element data from `path` using readCSV() to configure
// the chromosome as a mosaic of coding (g1) and non-coding (g0) regions
function (void)initializeGenomicElementsFromFile(string$ gpath)
{
	ge_df = readCSV(gpath, colNames=c("id", "start", "end"));
	ge_ids = ge_df.getValue("id");
	ge_starts = ge_df.getValue("start");
	ge_ends = ge_df.getValue("end");
	initializeGenomicElement(ge_ids, ge_starts, ge_ends);
}

initialize() {
	// Find our data repository and subpaths within it
	defineRepositoryPaths();

	// Enable modeling of explicit nucleotides
	initializeSLiMOptions(nucleotideBased=T);

	// Enable separate sexes; we want to simulate sex chromosomes
	initializeSex();

	// Population size N
	defineConstant("N", 1000);

	// Define constants for the DFE that stdpopsim calls "PosNeg_R24",
	// from Rodrigues et al., 2024 (https://doi.org/10.1093/genetics/iyae006)
	// MU_NEUTR is the neutral mutation rate within coding regions; in
	// non-coding regions all mutations are neutral so it uses MU_TOTAL
	defineConstant("MU_TOTAL", 2.0e-8);
	defineConstant("MU_BENEF", 1e-12);
	defineConstant("MU_DELET", 1.2e-8);
	defineConstant("MU_NEUTR", MU_TOTAL - (MU_BENEF + MU_DELET));

	// Mutation type m0: neutral mutations
	// Mutation type m1: deleterious mutations
	// Mutation type m2: beneficial mutations
	initializeMutationTypeNuc("m0", 0.5, "f", 0.0);
	initializeMutationTypeNuc("m1", 0.5, "g", -0.03, 0.16);
	initializeMutationTypeNuc("m2", 0.5, "e", 0.01);

	// We use the Jukes-Cantor mutational model with a constant mutation rate
	mutMatrix = mmJukesCantor(MU_TOTAL / 3.0);

	// Genomic element type g0: non-coding regions
	// Genomic element type g1: coding regions
	initializeGenomicElementType("g0", m0, 1.0, mutMatrix);
	initializeGenomicElementType("g1", c(m0, m1, m2),
		c(MU_NEUTR, MU_DELET, MU_BENEF), mutMatrix);

	// Define the ids, symbols, types, and lengths of all nuclear chromosomes
	ids = 1:24;
	symbols = c(1:22, "X", "Y");
	types = c(rep("A", 22), "X", "Y");
	lengths = c(248956422, 242193529, 198295559, 190214555, 181538259,
		170805979, 159345973, 145138636, 138394717, 133797422, 135086622,
		133275309, 114364328, 107043718, 101991189, 90338345, 83257441,
		80373285, 58617616, 64444167, 46709983, 50818468, 156040895,
		57227415);

	for (id in ids, symbol in symbols, type in types, length in lengths)
	{
		initializeChromosome(id, length, type, symbol);

		// Use a random initial nucleotide sequence; this could be read from FASTA
		initializeAncestralNucleotides(randomNucleotides(length));

		// Read the recombination rate map from a file
		rpath = CHR_PATH + "chr" + symbol + "_recombination.txt";
		initializeRecombinationRateFromFile(rpath, length-1, scale=1.0, sep=",");

		// Read the genomic element structure of coding vs. non-coding regions
		// from another file, which gives genomic element types and positions
		gpath = CHR_PATH + "chr" + symbol + "_genomic_elements.txt";
		initializeGenomicElementsFromFile(gpath);
	}

	// Handle the mtDNA separately since its data is in a different place
	// It also loads a FASTA file for its sequence, as a proof of concept
	initializeChromosome(25, 16569, "HF", "MT");
	initializeAncestralNucleotides(MT_PATH + "mtDNA_MOD.FASTA");
	initializeRecombinationRate(0.0);
	initializeHotspotMap(20.0);
	initializeGenomicElementsFromFile(MT_PATH + "chrMT_genomic_elements.txt");
}

1 early() {
	// This version just simulates N individuals in a single subpop
	sim.addSubpop("p1", N);
}

10*N late() {
}