-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel2_sweeps.R
More file actions
158 lines (110 loc) · 15.7 KB
/
model2_sweeps.R
File metadata and controls
158 lines (110 loc) · 15.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# model2_sweeps.R
#
# Benjamin C. Haller, 22 June 2025
#
# This R script has the raw data for all beneficial substitutions from model 2 (as shown in Fig. 2).
# After getting the data into a usable form, maybe we can find something to do with it...? This
# really goes beyond the scope of the paper, though; we're not trying to find interesting results,
# we're just trying to show off the models themselves. So this is not for publication, just for fun.
library("viridis")
# raw data from the end of model 2, for all beneficial substitutions
positions <- as.integer(unlist(strsplit("114676411 150520165 394924 61280748 58580333 111708052 38085347 1388547 201781011 79783761 77005745 24414584 197535579 24369867 33519869 48176110 53255957 157139188 47112487 77048386 93068511 65581567 31843498 101347919 6088071 93934725 39458263 32092885 34393922 45417520 46550586 111756134 100655163 1792126 76482806 108852513 100407221 1757192 189738726 32625132 165300422 6601042 19306037 67505256 69730629 17104796 93294552 183982867 131675149 67494350 61118455 39381704 122936760 41014134 36289099 83462046 20801338 42250471 8396938 27963651 33180313 78100970 43615495 50925101 157646725 74843352 11217031 50989720 31662059 39061861 22917812 62746969 116699397 39199299 36864957 32078891 112907061 63493272 112235530 41887258 138837935 236183861 73422623 93702782 47334729 156737454 64010105 6750303 109423902 70664282 114295692 103153885 74733944 46581137 73425483 43576475 66364421 77044770 32515681 57600890 109837815 178730334 166849459 32249926 144054731 128818706 63045499 89585808 154540643 203107489 37008154 44782472 71069309 41479275 109870234 129267074 28191978 13390675 19047205 33153483 172441861 71668798 44780965 76627450 29733252 25038983 32149628 31620606 87257932 43583070 152681271 22492567 62585209 220164775 16006199 30759074 38771830 61380260 70970021 45561464 108010799 155071315 8890900 31305244 14516891 50779908 119638276 62759865 85621550 49962142 152177277 71510075 150951528 27717474 41076221 39584147 24678387 70156303 2667128 55181257 66920459 58549823 53014837 86548568 42180327 1054609 28701069 17486752 203395748 18461843 41965196 58330154 62547054 72358396 146144485 66987660 207946433 169186005 44884413 13622422 122514288 56640407 88334977 119626637 103940843 135572821 100819582 184326528 142559352 67843199 233671996 123090238 44176470 26541339 44073270 29299275 1754256 48973274 51452463 178731530 815815 47247434 158934438 15751811 69895998 17331345 32434783 78513194 16698262 36245587 9919689 78122686 146515179 77768792 63297996 47121198 148469714 131981569 146014024 27623755 4575825 15161326 68836996 201786549 44154575 62790845 20742997 100594306 15266 30202775 78025420 75493859 99982767 76166298 178795251 32529568 91295556 56717627 75049695 138131230 53854658 47751166 157766904 45472903 147620067 21906265 31996215 165299606 183095471 65737272 32818817 8229964 21430089 152974929 112134561 13504251 24055046 152148154 232806290 9743172 143698911 30019609 5163955 43773223 4316936 80363161 88857529 17530800 85700538 54157239 113195613 86942616 248917635 75890042 1103729 60892878 100997011 101356565 69586283 58455397 68229259 31455280 85633235 30612209 46662249 66176986 140679838 177294686 153261019 51345286 12610596 28064587 122910903 9204267 100400938 14158584 127388908 184672209 74804545 121496882 48954930 98600124 103687486 127708866 35556942 87887302 747520 30881265 45168013 51943442 129476708 95522055 114463434 84149572 98080970", " ", fixed=T)))
originTick <- as.integer(unlist(strsplit("739 1215 2165 2035 264 1497 2317 3027 3727 2991 2087 3263 3440 2608 3404 3511 5541 2246 4778 5679 6393 5760 4548 6657 5267 5376 6945 6755 9081 8567 4417 9660 10957 10258 9673 8543 11246 10612 10684 12065 11376 12014 12179 12657 7883 11460 12908 13082 13464 13245 12311 13776 13569 12586 15207 13708 13711 8329 16649 16102 17461 15356 17785 16256 16971 18296 19156 19480 20092 19315 18682 19802 16122 19017 21019 18510 19849 20803 20197 21801 21920 21562 22224 21833 22804 22750 23156 23490 24091 22020 23864 24784 23946 24640 24143 23978 25353 25536 21643 21488 25839 26443 25086 26299 26602 24873 27781 28062 28130 28344 28062 27551 27916 28039 28646 28682 28338 28860 29726 30920 30043 29605 31085 31325 29233 31874 32370 32014 33188 33913 33786 34815 32572 34646 34665 35006 35615 35292 34779 35766 34051 36102 36468 35470 31957 36077 38455 38448 35115 20442 38675 38754 39409 38708 35492 38862 39846 39621 39570 39771 35078 41197 41627 42120 41119 42030 42367 41671 37351 42162 43260 41766 42666 41598 43022 43688 42823 44126 44049 42380 44029 43714 44388 44608 46428 47717 45423 42473 46861 47443 47758 45078 49184 48074 48206 49982 50410 51582 52005 52345 52466 50125 50056 49997 52848 52691 49089 52097 49102 49772 51585 53435 52992 49032 48078 53566 51122 54300 53412 55211 55291 55244 55723 55588 55131 55989 55727 57060 56947 56295 57744 59953 59763 52261 57085 59466 60208 60819 51397 61033 59378 61635 60969 60513 62096 60234 59901 62351 62535 61803 63349 58417 62327 63521 62530 63915 63715 62845 64493 64710 64113 63093 65042 66682 66803 66673 67393 64084 67472 68596 68942 68996 68124 69327 69372 69935 69354 69923 67910 70794 66395 70195 69688 70049 67067 71226 71776 72222 72365 72476 69413 71890 73334 73088 73659 74105 73343 74871 73830 71908 75812 74969 75515 73796 75819 74238 72237 74335 76698 75310 77286 76498 75819 77260 77174", " ", fixed=T)))
fixationTick <- as.integer(unlist(strsplit("1366 2032 3022 3051 3521 3668 4169 4186 4696 4800 4816 4977 5072 5570 5669 5893 5909 5915 6205 6355 7035 7065 7176 7333 7342 7439 7559 8054 9754 9818 10711 10902 11518 11554 11861 11869 12017 12049 12337 12700 12794 13130 13197 13497 13806 14398 14515 14593 14612 14665 14724 15144 15169 15607 16295 16487 16972 17674 17716 17924 18270 18555 18712 18783 18961 19701 20279 20422 20485 20612 20646 20935 21136 21238 21514 21727 21743 21783 21833 22839 22884 23046 23544 23588 23836 23857 23946 23957 24620 24744 24826 25340 25648 25652 25701 26046 26198 26252 26734 26872 26913 26943 27023 27735 28258 28417 28540 28725 29027 29087 29504 29621 29626 29866 30399 30426 30672 30840 31384 31690 31850 32145 32372 32425 32439 33358 33582 33628 33767 34802 35604 35671 35729 35973 36128 36217 36339 36611 36682 36882 37044 37323 37353 37444 38538 38673 38930 39116 39560 39765 39937 39938 39989 40043 40303 40836 40876 40886 41456 41848 42207 42279 42518 42895 42976 43110 43126 43209 43245 43446 43815 43823 43824 43866 44034 44581 44800 44906 45094 45114 45365 45378 45542 45624 47683 48245 48331 48477 48949 49586 49758 49896 50000 50573 50975 51090 51661 52307 52604 52983 53059 53081 53189 53383 53384 53462 53669 53735 53840 54395 54408 54692 54779 54870 54975 55170 55342 56149 56158 56300 56428 56538 56695 56874 56926 57622 58191 58377 58389 59212 59426 60307 60328 60826 60960 60969 60977 62095 62390 62459 62490 62800 62898 62943 63599 63613 63733 63767 63796 63917 63949 63970 64044 64671 64682 64736 65065 65204 65376 65447 65601 65875 66476 67427 67940 67992 68318 68420 68603 69787 69832 69956 70129 70428 70519 70834 70924 71269 71317 71578 71751 71978 72155 72181 72406 72486 72940 73029 73242 73530 73539 73713 74137 74567 74974 75340 75739 76146 76347 76442 76454 76711 76871 77046 77094 77163 77535 77699 77713 77801 77820 78222 78279 78761 79006", " ", fixed=T)))
chromosome_id <- as.integer(unlist(strsplit("12 7 6 14 18 3 22 19 1 17 4 22 2 14 1 4 23 5 19 4 10 11 10 3 17 14 4 18 19 3 21 1 3 16 7 1 23 1 3 15 6 12 1 4 4 22 14 3 6 15 11 22 5 4 21 11 2 19 4 12 6 15 15 12 5 11 19 17 12 14 15 17 9 21 22 13 12 20 12 13 7 1 8 11 3 1 14 12 11 16 3 5 7 3 6 15 11 12 2 12 2 2 1 2 8 9 2 15 4 2 13 19 16 21 4 3 15 1 16 3 1 2 19 17 15 22 6 22 6 6 2 7 3 1 1 16 19 2 4 13 2 7 8 17 21 16 2 17 14 12 7 5 7 3 20 15 15 8 17 14 9 19 19 16 8 19 17 19 2 19 7 14 12 5 5 9 2 2 7 6 10 6 11 8 14 23 7 3 3 16 2 5 19 22 7 22 19 20 13 2 19 3 7 3 18 19 11 15 23 21 17 17 5 7 20 3 2 12 5 1 16 19 18 1 17 11 7 23 25 16 15 17 13 4 2 3 8 12 2 3 2 12 1 19 1 12 6 4 2 9 3 17 7 6 6 3 22 7 1 3 4 22 9 15 3 17 15 19 11 20 12 10 1 4 10 10 7 12 5 19 4 22 9 14 16 8 5 5 1 19 19 8 9 11 3 3 9 4 5 11 19 12 1 2 19 9 11 21 15 3 3 5 1 6 9", " ", fixed=T)))
selCoeff <- as.numeric(unlist(strsplit("0.037493 0.034885 0.028166 0.0244732 0.00704684 0.00990825 0.0137227 0.0233991 0.0236335 0.0118358 0.00801849 0.0159076 0.014355 0.00807745 0.00881123 0.0118945 0.0526664 0.00613699 0.0164754 0.0452474 0.0389108 0.0242344 0.012785 0.0328862 0.0145231 0.00936877 0.045823 0.0180606 0.0398249 0.0179892 0.00287251 0.0187479 0.0526677 0.0232279 0.010986 0.00458119 0.025549 0.0203322 0.0126405 0.0383543 0.0158676 0.0255987 0.0231843 0.0258271 0.00518204 0.00825622 0.0105126 0.0150474 0.0211461 0.0168396 0.00571129 0.0174764 0.0156775 0.00588018 0.0206585 0.00585712 0.00444942 0.00179349 0.0242555 0.01386 0.0236266 0.00771808 0.0297327 0.00963609 0.0116747 0.0204557 0.0191064 0.0332295 0.0881813 0.0289701 0.0102825 0.0158924 0.00646827 0.00712189 0.067582 0.00711562 0.010252 0.0245564 0.0183476 0.0293946 0.0227836 0.0130147 0.0146367 0.0139848 0.0167586 0.028291 0.0323965 0.0582903 0.0574125 0.00747642 0.0214688 0.0509715 0.00951984 0.0234714 0.0129071 0.0106526 0.044281 0.0344711 0.00484513 0.00262852 0.0201896 0.0621104 0.0132607 0.0219918 0.00948051 0.00716249 0.0333566 0.0369632 0.02842 0.0298786 0.0122819 0.011747 0.0216918 0.0102286 0.0117511 0.0169153 0.00771501 0.0126875 0.0133993 0.031811 0.0108951 0.0144323 0.0239621 0.024426 0.00513396 0.012363 0.0231366 0.0130525 0.0373894 0.0242695 0.010142 0.0279515 0.00816289 0.0187372 0.0167987 0.0227722 0.0253807 0.0199648 0.0158316 0.0245341 0.00622701 0.0202904 0.0324305 0.0102048 0.00296684 0.00832112 0.0525002 0.0436065 0.00511477 0.00130251 0.0202928 0.0253378 0.0496292 0.0205447 0.00509261 0.0127598 0.024653 0.016261 0.0118908 0.0131789 0.000997967 0.0225996 0.0276101 0.0379099 0.0116473 0.0263648 0.0299251 0.0175366 0.0021735 0.0186054 0.0437532 0.0107095 0.0232802 0.00903989 0.0277932 0.030729 0.0110819 0.0288013 0.0255838 0.00994139 0.017256 0.0185731 0.0210422 0.0220007 0.0183403 0.0354131 0.00866172 0.0054164 0.0079521 0.00997876 0.0141851 0.00504528 0.0252636 0.00996444 0.00694358 0.0205679 0.0195551 0.0393085 0.0443456 0.0513162 0.0455407 0.00981883 0.00660554 0.00760727 0.0490033 0.0311178 0.00330865 0.0141921 0.00177555 0.00302315 0.00745162 0.0246739 0.0165225 0.00265399 0.00365134 0.015026 0.00539455 0.0107187 0.00845039 0.0193136 0.0230748 0.0212432 0.0272943 0.021206 0.0147797 0.0143222 0.00839303 0.0109409 0.00375551 0.00857228 0.0145817 0.0761828 0.0432245 0.000987702 0.00611502 0.0153649 0.0365279 0.0224146 0.00108753 0.020755 0.00631029 0.0207228 0.013635 0.00873725 0.0176926 0.00659318 0.00628273 0.0190094 0.0252738 0.011699 0.0414709 0.00372836 0.0120818 0.0157737 0.0125845 0.0317258 0.0220791 0.0089842 0.0351921 0.0389333 0.0208295 0.00666618 0.0176063 0.0291547 0.0225827 0.0234811 0.024568 0.00352159 0.0216144 0.01466 0.0295314 0.0306855 0.0118617 0.0228048 0.0255205 0.0271305 0.0146444 0.0123325 0.00611845 0.0303277 0.00357175 0.0149346 0.0122669 0.00894748 0.00276144 0.0210147 0.0225453 0.0302374 0.0310278 0.0323406 0.00439292 0.0130176 0.032826 0.0171394 0.0197658 0.0240607 0.00961653 0.0222112 0.0108054 0.00666862 0.0426327 0.0263675 0.0182871 0.00737623 0.0234922 0.00715158 0.00393342 0.00712909 0.0330494 0.0101293 0.057158 0.0158086 0.0105199 0.024849 0.021941", " ", fixed=T)))
# plot the duration of a sweep against its selection coefficient; a larger selection coefficient should tend to produce a faster sweep
duration <- fixationTick - originTick
plot(x=selCoeff, y=duration, pch=19, cex=0.3, col="red")
# add colors to the previous plot indicating how many other sweeps were happening at the same time
duration <- fixationTick - originTick
sweepCount <- NULL
for (i in seq_along(duration))
{
this_origin <- originTick[i]
this_fixation <- fixationTick[i]
simultaneous <- (originTick <= this_fixation) & (fixationTick >= this_origin) & (seq_along(duration) != i)
sweepCount <- c(sweepCount, sum(simultaneous))
}
maxCount <- max(sweepCount)
colors <- plasma(maxCount + 1)
plot(x=selCoeff, y=duration, pch=19, cex=0.3, col=colors[sweepCount + 1])
# instead, color by how much time is spent shared with other sweeps, weighted by the selection coefficients of those sweeps
duration <- fixationTick - originTick
sweepWeight <- NULL
for (i in seq_along(duration))
{
this_origin <- originTick[i]
this_fixation <- fixationTick[i]
is_simultaneous <- (originTick <= this_fixation) & (fixationTick >= this_origin) & (seq_along(duration) != i)
# this narrows down to just sweeps on the same chromosome; physical linkage should be important
is_simultaneous <- is_simultaneous & (chromosome_id == chromosome_id[i])
simul_origin <- originTick[is_simultaneous]
simul_fix <- fixationTick[is_simultaneous]
overlap <- pmin(this_fixation, simul_fix) - pmax(this_origin, simul_origin)
weighted_overlap <- overlap * selCoeff[is_simultaneous]
sweepWeight <- c(sweepWeight, sum(weighted_overlap))
}
# scale sweepWeight values by the duration of the focal mutation; the question is, *per* unit time
# spent sweeping, how many other sweeps are you sharing your duration with?
sweepWeight <- sweepWeight / (fixationTick - originTick)
# let's try a sqrt transform to bring out the detail at the low end more
sweepWeight <- sqrt(sweepWeight)
maxWeight <- max(sweepWeight)
colors <- plasma(100)
plot(x=selCoeff, y=duration, pch=19, cex=0.3, col=colors[(sweepWeight / maxWeight) * 99 + 1])
# this adds a color stripe showing the scale of the sweepWeight colors; toward black is a very low
# sweepWeight value, indicating that the sweep proceeded by itself, whereas towards yellow is a
# high sweepWeight value, indicating that the sweep was accompanied by many other sweeps
for (i in 0:99)
{
bottom <- 3000 + (9000-3000) * (i / 100)
top <- 3000 + (9000-3000) * ((i + 1) / 100)
rect(xleft=0.10, ybottom=bottom, xright=0.105, ytop=top, col=colors[i+1], border=NA)
}
rect(xleft=0.10, ybottom=3000, xright=0.105, ytop=9000, border="black")
# this function is thanks to P. Messer, adapted a bit for use here; it simulates a bunch of sweep
# trajectories for the given parameters and returns the mean fixation time across 1000 runs
mean_fixation_time <- function(N, h, s)
{
times <- c()
i<-0
while(i<1000)
{
x <- 1/(2*N)
t <- 1
while(x>0 && x<1-1.000001/(2*N)) # allow for a bit of numerical error
{
#p <- ((1+s)*x^2 + (1+h*s)*x*(1-x)) / ((1+s)*x^2 + (1+h*s)*2*x*(1-x) + (1-x)^2)
p <- (1+h*s)*x/(1+h*s*x)
n <- rbinom(1,2*N,p)
x <- n/(2*N)
t <- t+1
}
if(x>0)
{
i <- i+1
times <- c(times,t)
}
}
return(mean(times));
}
time <- mean_fixation_time(N=7310, h=0.5, s=0.11)
print(time)
# use the above function to simulate sweeps for the range of s values spanning the plot
# note that this takes some time to complete; be patient! :->
Ne <- 7310 # the value of N for the bulk of the African population stage
selCoeffs <- seq(from=0.001, to=0.11, by=0.001)
expectedDurations <- sapply(selCoeffs, FUN = function(s) { mean_fixation_time(Ne, 0.5, s) })
lines(x=selCoeffs, y=expectedDurations, lwd=1.0, col="red")
# So, some partial conclusions. It is certainly apparent that the low-s mutations are more likely to
# sweep together with other mutations. But is this because for low s, you're likely to be lost unless
# you get helped by another sweep? Or is this because for low s, your sweep duration is longer, and so
# you share your duration with more other sweeps just due to that? I added the line:
#
# sweepWeight <- sweepWeight / (fixationTick - originTick)
#
# to answer this, and with that line added, the pattern seems to me to disappear, although one would
# have to do stats to be sure. But it doesn't look like there's a strong effect of needing help to
# complete for small s, nor does it look like there's a strong pattern of sweeping faster with other
# sweeps than by yourself. Maybe a little bit, but not super obviously.