1+ import array;
2+ import string;
3+
4+ let IM = 139968,
5+ IA = 3877,
6+ IC = 29573,
7+ SEED = 42;
8+
9+ let seed = SEED;
10+
11+ function fasta_rand(max) {
12+ seed = (seed * IA + IC) % IM;
13+ return max * seed / IM;
14+ }
15+
16+ let ALU = string.chars(string.concat(
17+ "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG",
18+ "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA",
19+ "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT",
20+ "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA",
21+ "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG",
22+ "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC",
23+ "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA",
24+ ));
25+
26+ let IUB = string.chars("acgtBDHKMNRSVWY");
27+
28+ let IUB_P = [
29+ 0.27, 0.12, 0.12, 0.27,
30+ 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02
31+ ];
32+
33+ let HomoSapiens = string.chars("acgt");
34+ let HomoSapiens_P = [
35+ 0.3029549426680,
36+ 0.1979883004921,
37+ 0.1975473066391,
38+ 0.3015094502008,
39+ ];
40+
41+ let LINELEN = 60;
42+
43+ function repeat_fasta(seq, n) {
44+ let length = array.length(seq);
45+ let b = array.new(LINELEN);
46+ let i;
47+ for (i = 0; i < n; i = i + 1) {
48+ b[i % LINELEN] = seq[i % length];
49+ if (i % LINELEN == LINELEN - 1) {
50+ println(string.from_chars(b));
51+ }
52+ }
53+ if (i % LINELEN != 0) {
54+ println(string.from_chars(array.slice(b, 0, i % LINELEN)));
55+ }
56+ }
57+
58+ function random_fasta(seq, probability, n) {
59+ let length = array.length(seq);
60+ let i;
61+ let b = array.new(LINELEN);
62+ for (i = 0; i < n; i = i + 1) {
63+ let v = fasta_rand(1.0);
64+ for (let j = 0; j < length; j = j + 1) {
65+ v = v - probability[j];
66+ if (v < 0) {
67+ break;
68+ }
69+ }
70+ b[i % LINELEN] = seq[j];
71+ if (i % LINELEN == LINELEN - 1) {
72+ println(string.from_chars(b));
73+ }
74+ }
75+ if (i % LINELEN != 0) {
76+ println(string.from_chars(array.slice(b, 0, i % LINELEN)));
77+ }
78+ }
79+
80+ let n = 1000;
81+ if (array.length(argv())) {
82+ n = string.parse_integer(argv()[0]);
83+ }
84+
85+ println(">ONE Homo sapiens alu");
86+ repeat_fasta(ALU, n * 2);
87+
88+ println(">TWO IUB ambiguity codes");
89+ random_fasta(IUB, IUB_P, n * 3);
90+
91+ println(">THREE Homo sapiens frequency");
92+ random_fasta(HomoSapiens, HomoSapiens_P, n * 5);
0 commit comments