-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel1_foundation_FIGURE.slim
More file actions
221 lines (181 loc) · 7.94 KB
/
model1_foundation_FIGURE.slim
File metadata and controls
221 lines (181 loc) · 7.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
// model1_foundation_FIGURE.slim
//
// By Ben Haller, 3 June 2025
// Messer Lab, Cornell University
// This simulates humans with full genomes. This version simulates all
// mutations including neutral mutations, so it is pretty slow.
// This model requires SLiM 5.0 or later to run. For further information,
// see the publication associated with this repository.
// This user-defined function sets up paths for our data repository
// NOTE: the base repository path needs to be configured for your setup here!
function (void)defineRepositoryPaths(void)
{
// Set the working directory to our data repository
defineConstant("REPO_PATH", "/path/to/SimHumanity/");
if (!fileExists(REPO_PATH))
stop("incorrect repository path");
setwd(REPO_PATH);
// This is the subpath to the nuclear chromosome data files
defineConstant("CHR_PATH", "stdpopsim extraction/extracted/");
if (!fileExists(CHR_PATH))
stop("malformed repository; missing extracted data");
// This is the subpath to the mtDNA data files
defineConstant("MT_PATH", "mtDNA info/");
if (!fileExists(MT_PATH))
stop("malformed repository; missing mitochondrial data");
}
// This loads genetic element data from `path` using readCSV() to configure
// the chromosome as a mosaic of coding (g1) and non-coding (g0) regions
function (void)initializeGenomicElementsFromFile(string$ gpath)
{
ge_df = readCSV(gpath, colNames=c("id", "start", "end"));
ge_ids = ge_df.getValue("id");
ge_starts = ge_df.getValue("start");
ge_ends = ge_df.getValue("end");
initializeGenomicElement(ge_ids, ge_starts, ge_ends);
}
initialize() {
// this seed value was used to produce Fig. 1 in SLiM 5.0
setSeed(1);
// Find our data repository and subpaths within it
defineRepositoryPaths();
// Enable modeling of explicit nucleotides
initializeSLiMOptions(nucleotideBased=T);
// Enable separate sexes; we want to simulate sex chromosomes
initializeSex();
// Population size N
defineConstant("N", 1000);
// Define constants for the DFE that stdpopsim calls "PosNeg_R24",
// from Rodrigues et al., 2024 (https://doi.org/10.1093/genetics/iyae006)
// MU_NEUTR is the neutral mutation rate within coding regions; in
// non-coding regions all mutations are neutral so it uses MU_TOTAL
defineConstant("MU_TOTAL", 2.0e-8);
defineConstant("MU_BENEF", 1e-12);
defineConstant("MU_DELET", 1.2e-8);
defineConstant("MU_NEUTR", MU_TOTAL - (MU_BENEF + MU_DELET));
// Mutation type m0: neutral mutations
// Mutation type m1: deleterious mutations
// Mutation type m2: beneficial mutations
initializeMutationTypeNuc("m0", 0.5, "f", 0.0);
initializeMutationTypeNuc("m1", 0.5, "g", -0.03, 0.16);
initializeMutationTypeNuc("m2", 0.5, "e", 0.01);
// We use the Jukes-Cantor mutational model with a constant mutation rate
mutMatrix = mmJukesCantor(MU_TOTAL / 3.0);
// Genomic element type g0: non-coding regions
// Genomic element type g1: coding regions
initializeGenomicElementType("g0", m0, 1.0, mutMatrix);
initializeGenomicElementType("g1", c(m0, m1, m2),
c(MU_NEUTR, MU_DELET, MU_BENEF), mutMatrix);
// Define the ids, symbols, types, and lengths of all nuclear chromosomes
ids = 1:24;
symbols = c(1:22, "X", "Y");
types = c(rep("A", 22), "X", "Y");
lengths = c(248956422, 242193529, 198295559, 190214555, 181538259,
170805979, 159345973, 145138636, 138394717, 133797422, 135086622,
133275309, 114364328, 107043718, 101991189, 90338345, 83257441,
80373285, 58617616, 64444167, 46709983, 50818468, 156040895,
57227415);
for (id in ids, symbol in symbols, type in types, length in lengths)
{
initializeChromosome(id, length, type, symbol);
// Use a random initial nucleotide sequence; this could be read from FASTA
initializeAncestralNucleotides(randomNucleotides(length));
// Read the recombination rate map from a file
rpath = CHR_PATH + "chr" + symbol + "_recombination.txt";
initializeRecombinationRateFromFile(rpath, length-1, scale=1.0, sep=",");
// Read the genomic element structure of coding vs. non-coding regions
// from another file, which gives genomic element types and positions
gpath = CHR_PATH + "chr" + symbol + "_genomic_elements.txt";
initializeGenomicElementsFromFile(gpath);
}
// Handle the mtDNA separately since its data is in a different place
// It also loads a FASTA file for its sequence, as a proof of concept
initializeChromosome(25, 16569, "HF", "MT");
initializeAncestralNucleotides(MT_PATH + "mtDNA_MOD.FASTA");
initializeRecombinationRate(0.0);
initializeHotspotMap(20.0);
initializeGenomicElementsFromFile(MT_PATH + "chrMT_genomic_elements.txt");
}
1 early() {
// This version just simulates N individuals in a single subpop
sim.addSubpop("p1", N);
}
10*N late() {
}
// Additional code to produce Fig. 1
// This normalizes an SFS to convert it to density
function (numeric)normalizeSFS(numeric sfs)
{
return sfs / sum(sfs);
}
// This combines adjacent SFS bins to decrease noise
function (numeric)rebinSFS(numeric sfs, integer$ binWidth)
{
originalCount = length(sfs);
newCount = integerDiv(originalCount, binWidth); // rounds down
rebinned = rep(0, newCount);
for (mod in 0:(binWidth - 1))
rebinned = rebinned + sfs[mod + (0:(newCount-1)) * binWidth];
return rebinned;
}
1 late() {
if (!exists("slimgui"))
stop("This version of the model must be run under SLiMgui.");
// The original SFS has count bins from 1 to 2N-1; we combine adjacent bins
defineConstant("SFS_MAX_COUNT", 2*N - 1);
defineConstant("SFS_COUNTS", 1:SFS_MAX_COUNT);
defineConstant("BIN_WIDTH", 10);
defineConstant("BIN_COUNT", integerDiv(SFS_MAX_COUNT, BIN_WIDTH));
defineConstant("SFS_BINS", 1:BIN_COUNT);
// Ticks at which to plot
defineConstant("PLOT_TICKS", seq(from=2*N, to=10*N, by=2*N));
// Calculate the expected SFS
expected = 1 / (1:SFS_MAX_COUNT);
expected_density = normalizeSFS(expected);
expected_rebin = normalizeSFS(rebinSFS(expected, BIN_WIDTH));
// Create our first plot window (not published)
// This is an untransformed SFS
plot1 = slimgui.createPlot(title="SFS ~ Generation",
xrange=c(1, 10), yrange=c(0, 0.20),
xlab="Allele count", ylab="Density",
width=600, height=400, axisLabelSize=20, tickLabelSize=13);
plot1.axis(1, 1:10);
plot1.axis(2, c(0, 0.1, 0.2));
plot1.addLegend("topRight", labelSize=13);
plot1.lines(x=SFS_COUNTS, y=expected_density, color="black", lwd=4.0);
plot1.legendLineEntry("Expected SFS", color="black", lwd=4.0);
// Create our second plot window (published)
// This is a log-transformed SFS
plot2 = slimgui.createPlot(title="SFS ~ Generation (log-transformed)",
xrange=c(1, BIN_COUNT), yrange=c(-11, 0),
xlab="Allele frequency (binned)", ylab="Density (log-transformed)",
width=600, height=400, axisLabelSize=20, tickLabelSize=13);
plot2.axis(1, c(1, BIN_COUNT), c("lowest", "highest"));
plot2.axis(2, seq(-10, 0, by=2));
plot2.addLegend("topRight", labelSize=13);
plot2.lines(x=SFS_BINS, y=log(expected_rebin), color="black", lwd=4.0);
plot2.legendLineEntry("Expected SFS", color="black", lwd=4.0);
}
PLOT_TICKS late() {
// Figure out the color we want to plot in, and calculate our color
index = which(community.tick == PLOT_TICKS);
color = colors(length(PLOT_TICKS) + 2, "turbo")[index + 1]; // exclude ends
// Calculate the SFS, across autosomes, and normalize to get densities
sfs = rep(0, SFS_MAX_COUNT);
for (chr in sim.chromosomesOfType("A"))
{
haplosomes = p1.haplosomesForChromosomes(chr);
muts = sim.subsetMutations(mutType=m0, chromosome=chr);
chr_sfs = calcSFS(NULL, haplosomes, muts, "count");
sfs = sfs + chr_sfs;
}
sfs_density = normalizeSFS(sfs);
sfs_rebin = normalizeSFS(rebinSFS(sfs, BIN_WIDTH));
// Add a line to the plots
plot1 = slimgui.plotWithTitle("SFS ~ Generation");
plot1.lines(x=SFS_COUNTS, y=sfs_density, color=color, lwd=2.0);
plot1.legendLineEntry("Gen. " + community.tick, color=color, lwd=2.0);
plot2 = slimgui.plotWithTitle("SFS ~ Generation (log-transformed)");
plot2.lines(x=SFS_BINS, y=log(sfs_rebin), color=color, lwd=2.0);
plot2.legendLineEntry("Gen. " + community.tick, color=color, lwd=2.0);
}