-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathmle.py
More file actions
executable file
·113 lines (100 loc) · 3.63 KB
/
mle.py
File metadata and controls
executable file
·113 lines (100 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2021 William H. Majoros <bmajoros@alumni.duke.edu>
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3. You might need to update your version of module "future".
import sys
import math
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import Bounds
import ProgramName
import gzip
from Rex import Rex
rex=Rex()
global numDna
global numRna
def log(x):
return math.log(x)
def lgamma(x):
return math.lgamma(x)
def logLik(sumX,numX,rna,theta,alpha,beta,sumDnaLibs,RnaLibs):
#print("theta=",theta)
numX=1
total=0
for i in range(numRna):
Yj=rna[i]
libRatio=RnaLibs[i]/sumDnaLibs
thetaL=theta*libRatio
# With Jacobian correction:
LL=(sumX+alpha)*log(beta+numX)+(Yj-1)*log(thetaL)+lgamma(Yj+sumX+alpha)+\
-lgamma(sumX+alpha)-lgamma(Yj+1)-\
(Yj+sumX+alpha)*log(thetaL+beta+numX)
# Without Jacobian correction:
LL=(sumX+alpha)*log(beta+numX)+Yj*log(thetaL)+lgamma(Yj+sumX+alpha)+\
-lgamma(sumX+alpha)-lgamma(Yj+1)-\
(Yj+sumX+alpha)*log(thetaL+beta+numX)
total+=LL
#print(LL)
return total
def getClosure(data):
def f(theta):
theta=theta[0]
totalLogLik=0
for case in data:
totalLogLik+=logLik(case.dna,numDna,case.rna,theta,1e-10,
1e-10,case.dnaLibSum,case.rnaLibs)
return -totalLogLik
return f
class Case:
def __init__(self,dna,rna,dnaLibSum,rnaLibs):
self.dna=dna
self.rna=rna
self.dnaLibSum=dnaLibSum
self.rnaLibs=rnaLibs
#=========================================================================
# main()
#=========================================================================
if(len(sys.argv)!=3):
exit(ProgramName.get()+" <in-counts.txt.gz> <max-data>\n")
(inCountsFile,maxData)=sys.argv[1:]
maxData=int(maxData)
# Read data
data=[]
lineNum=1
with gzip.open(inCountsFile,"rt") as IN:
header=IN.readline()
if(not rex.find("DNA=(\\d+)\\s+RNA=(\\d+)",header)):
raise Exception("expecting RNA=# DNA=# header")
numDna=int(rex[1]); numRna=int(rex[2])
firstLib=numDna+numRna
for line in IN:
fields=line.rstrip().split()
fields=[int(x) for x in fields]
dnaSum=float(sum(fields[:numDna]))
dnaLibSum=sum(fields[firstLib:(firstLib+numDna)])
rnaLibs=fields[(firstLib+numDna):]
rna=fields[numDna:(numDna+numRna)]
#for i in range(numRna):
# naive=(rna[i]/rnaLibs[i])/(dnaSum/dnaLibSum)
# print("naive =",naive)
case=Case(dnaSum,rna,dnaLibSum,rnaLibs)
data.append(case)
lineNum+=1
if(lineNum>=maxData): break
# Minimize the negative log likelihood
theta0 = np.array([0.5])
bounds = Bounds(0.0001, 1000)
opt = minimize(getClosure(data), theta0, method="trust-constr",
#method='nelder-mead',
#options={'xatol': 1e-8, 'disp': True})
bounds=bounds,
options={'disp': True})
print("theta =",opt.x[0])