-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel1_foundation.slim
More file actions
125 lines (102 loc) · 4.56 KB
/
model1_foundation.slim
File metadata and controls
125 lines (102 loc) · 4.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// model1_foundation.slim
//
// By Ben Haller, 3 June 2025
// Messer Lab, Cornell University
// This simulates humans with full genomes. This version simulates all
// mutations including neutral mutations, so it is pretty slow.
// This model requires SLiM 5.0 or later to run. For further information,
// see the publication associated with this repository.
// This user-defined function sets up paths for our data repository
// NOTE: the base repository path needs to be configured for your setup here!
function (void)defineRepositoryPaths(void)
{
// Set the working directory to our data repository
defineConstant("REPO_PATH", "/path/to/SimHumanity/");
if (!fileExists(REPO_PATH))
stop("incorrect repository path");
setwd(REPO_PATH);
// This is the subpath to the nuclear chromosome data files
defineConstant("CHR_PATH", "stdpopsim extraction/extracted/");
if (!fileExists(CHR_PATH))
stop("malformed repository; missing extracted data");
// This is the subpath to the mtDNA data files
defineConstant("MT_PATH", "mtDNA info/");
if (!fileExists(MT_PATH))
stop("malformed repository; missing mitochondrial data");
}
// This loads genetic element data from `path` using readCSV() to configure
// the chromosome as a mosaic of coding (g1) and non-coding (g0) regions
function (void)initializeGenomicElementsFromFile(string$ gpath)
{
ge_df = readCSV(gpath, colNames=c("id", "start", "end"));
ge_ids = ge_df.getValue("id");
ge_starts = ge_df.getValue("start");
ge_ends = ge_df.getValue("end");
initializeGenomicElement(ge_ids, ge_starts, ge_ends);
}
initialize() {
// Find our data repository and subpaths within it
defineRepositoryPaths();
// Enable modeling of explicit nucleotides
initializeSLiMOptions(nucleotideBased=T);
// Enable separate sexes; we want to simulate sex chromosomes
initializeSex();
// Population size N
defineConstant("N", 1000);
// Define constants for the DFE that stdpopsim calls "PosNeg_R24",
// from Rodrigues et al., 2024 (https://doi.org/10.1093/genetics/iyae006)
// MU_NEUTR is the neutral mutation rate within coding regions; in
// non-coding regions all mutations are neutral so it uses MU_TOTAL
defineConstant("MU_TOTAL", 2.0e-8);
defineConstant("MU_BENEF", 1e-12);
defineConstant("MU_DELET", 1.2e-8);
defineConstant("MU_NEUTR", MU_TOTAL - (MU_BENEF + MU_DELET));
// Mutation type m0: neutral mutations
// Mutation type m1: deleterious mutations
// Mutation type m2: beneficial mutations
initializeMutationTypeNuc("m0", 0.5, "f", 0.0);
initializeMutationTypeNuc("m1", 0.5, "g", -0.03, 0.16);
initializeMutationTypeNuc("m2", 0.5, "e", 0.01);
// We use the Jukes-Cantor mutational model with a constant mutation rate
mutMatrix = mmJukesCantor(MU_TOTAL / 3.0);
// Genomic element type g0: non-coding regions
// Genomic element type g1: coding regions
initializeGenomicElementType("g0", m0, 1.0, mutMatrix);
initializeGenomicElementType("g1", c(m0, m1, m2),
c(MU_NEUTR, MU_DELET, MU_BENEF), mutMatrix);
// Define the ids, symbols, types, and lengths of all nuclear chromosomes
ids = 1:24;
symbols = c(1:22, "X", "Y");
types = c(rep("A", 22), "X", "Y");
lengths = c(248956422, 242193529, 198295559, 190214555, 181538259,
170805979, 159345973, 145138636, 138394717, 133797422, 135086622,
133275309, 114364328, 107043718, 101991189, 90338345, 83257441,
80373285, 58617616, 64444167, 46709983, 50818468, 156040895,
57227415);
for (id in ids, symbol in symbols, type in types, length in lengths)
{
initializeChromosome(id, length, type, symbol);
// Use a random initial nucleotide sequence; this could be read from FASTA
initializeAncestralNucleotides(randomNucleotides(length));
// Read the recombination rate map from a file
rpath = CHR_PATH + "chr" + symbol + "_recombination.txt";
initializeRecombinationRateFromFile(rpath, length-1, scale=1.0, sep=",");
// Read the genomic element structure of coding vs. non-coding regions
// from another file, which gives genomic element types and positions
gpath = CHR_PATH + "chr" + symbol + "_genomic_elements.txt";
initializeGenomicElementsFromFile(gpath);
}
// Handle the mtDNA separately since its data is in a different place
// It also loads a FASTA file for its sequence, as a proof of concept
initializeChromosome(25, 16569, "HF", "MT");
initializeAncestralNucleotides(MT_PATH + "mtDNA_MOD.FASTA");
initializeRecombinationRate(0.0);
initializeHotspotMap(20.0);
initializeGenomicElementsFromFile(MT_PATH + "chrMT_genomic_elements.txt");
}
1 early() {
// This version just simulates N individuals in a single subpop
sim.addSubpop("p1", N);
}
10*N late() {
}