in MHP.py [0:0]
def EM(self, Ahat, mhat, omega, seq=[], smx=None, tmx=None, regularize=False,
Tm=-1, maxiter=100, epsilon=0.01, verbose=True):
'''implements MAP EM. Optional to regularize with `smx` and `tmx` matrix (shape=(dim,dim)).
In general, the `tmx` matrix is a pseudocount of parent events from column j,
and the `smx` matrix is a pseudocount of child events from column j -> i,
however, for more details/usage see https://stmorse.github.io/docs/orc-thesis.pdf'''
# if no sequence passed, uses class instance data
if len(seq) == 0:
seq = self.data
N = len(seq)
dim = mhat.shape[0]
Tm = float(seq[-1, 0]) if Tm < 0 else float(Tm)
sequ = seq[:, 1].astype(int)
p_ii = np.random.uniform(0.01, 0.99, size=N)
p_ij = np.random.uniform(0.01, 0.99, size=(N, N))
# PRECOMPUTATIONS
# diffs[i,j] = t_i - t_j for j < i (o.w. zero)
diffs = pairwise_distances(np.array([seq[:, 0]]).T, metric='euclidean')
diffs[np.triu_indices(N)] = 0
# kern[i,j] = omega*np.exp(-omega*diffs[i,j])
kern = omega * np.exp(-omega * diffs)
colidx = np.tile(sequ.reshape((1, N)), (N, 1))
rowidx = np.tile(sequ.reshape((N, 1)), (1, N))
# approx of Gt sum in a_{uu'} denom
seqcnts = np.array([len(np.where(sequ == i)[0]) for i in range(dim)])
seqcnts = np.tile(seqcnts, (dim, 1))
# returns sum of all pmat vals where u_i=a, u_j=b
# *IF* pmat upper tri set to zero, this is
# \sum_{u_i=u}\sum_{u_j=u', j<i} p_{ij}
def sum_pij(a, b):
c = cartesian([np.where(seq[:, 1] == int(a))[0], np.where(seq[:, 1] == int(b))[0]])
return np.sum(p_ij[c[:, 0], c[:, 1]])
vp = np.vectorize(sum_pij)
# \int_0^t g(t') dt' with g(t)=we^{-wt}
# def G(t): return 1 - np.exp(-omega * t)
# vg = np.vectorize(G)
# Gdenom = np.array([np.sum(vg(diffs[-1,np.where(seq[:,1]==i)])) for i in range(dim)])
k = 0
old_LL = -10000
START = T.time()
while k < maxiter:
Auu = Ahat[rowidx, colidx]
ag = np.multiply(Auu, kern)
ag[np.triu_indices(N)] = 0
# compute m_{u_i}
mu = mhat[sequ]
# compute total rates of u_i at time i
rates = mu + np.sum(ag, axis=1)
# compute matrix of p_ii and p_ij (keep separate for later computations)
p_ij = np.divide(ag, np.tile(np.array([rates]).T, (1, N)))
p_ii = np.divide(mu, rates)
# compute mhat: mhat_u = (\sum_{u_i=u} p_ii) / T
mhat = np.array([np.sum(p_ii[np.where(seq[:, 1] == i)])
for i in range(dim)]) / Tm
# ahat_{u,u'} = (\sum_{u_i=u}\sum_{u_j=u', j<i} p_ij) / \sum_{u_j=u'} G(T-t_j)
# approximate with G(T-T_j) = 1
if regularize:
Ahat = np.divide(np.fromfunction(lambda i, j: vp(i, j), (dim, dim)) + (smx - 1),
seqcnts + tmx)
else:
Ahat = np.divide(np.fromfunction(lambda i, j: vp(i, j), (dim, dim)),
seqcnts)
if k % 10 == 0:
try:
term1 = np.sum(np.log(rates))
except:
print('Log error!')
term2 = Tm * np.sum(mhat)
term3 = np.sum(np.sum(Ahat[u, int(seq[j, 1])] for j in range(N)) for u in range(dim))
#new_LL = (1./N) * (term1 - term2 - term3)
new_LL = (1. / N) * (term1 - term3)
if abs(new_LL - old_LL) <= epsilon:
if verbose:
print('Reached stopping criterion. (Old: %1.3f New: %1.3f)' % (old_LL, new_LL))
return Ahat, mhat
if verbose:
print('After ITER %d (old: %1.3f new: %1.3f)' % (k, old_LL, new_LL))
print(' terms %1.4f, %1.4f, %1.4f' % (term1, term2, term3))
old_LL = new_LL
k += 1
if verbose:
print('Reached max iter (%d).' % maxiter)
self.Ahat = Ahat
self.mhat = mhat
return Ahat, mhat