from scipy.optimize import minimize_scalar
import tensorly as tl
import numpy as np
[docs]
def estimate_ranks(layer):
"""
Estimate the optimal ranks for Tucker decomposition of a tensor using Variational Bayesian Matrix Factorization (VBMF).
This function unfolds the input tensor along two modes (rows and columns) and applies VBMF to estimate
the optimal ranks of the matrices corresponding to each mode. The estimated ranks are then returned
as a list.
Args:
layer (torch.nn.Module): A neural network layer whose weights are represented as a tensor.
Returns:
list: A list containing the estimated ranks for the decomposition along the first and second modes.
"""
weights = layer.weight.data.numpy()
if weights.shape[0]==1 and weights.shape[1]>1:
return [1, weights.shape[1]//2]
if weights.shape[0]>1 and weights.shape[1]==1:
return [ weights.shape[0]//2, 1]
if weights.shape[0]==1 and weights.shape[1]==1:
return [1, 1]
unfold_0 = tl.base.unfold(weights, 0)
unfold_1 = tl.base.unfold(weights, 1)
_, diag_0, _, _ = EVBMF(unfold_0)
_, diag_1, _, _ = EVBMF(unfold_1)
ranks = [diag_0.shape[0], diag_1.shape[1]]
return ranks
[docs]
def VBMF(Y, cacb, sigma2=None, H=None):
"""Implementation of the analytical solution to Variational Bayes Matrix Factorization.
This function can be used to calculate the analytical solution to VBMF.
This is based on the paper and MatLab code by Nakajima et al.:
"Global analytic solution of fully-observed variational Bayesian matrix factorization."
Args:
Y (numpy-array): Input matrix that is to be factorized. Y has shape (L,M), where L<=M.
cacb (int): Product of the prior variances of the matrices that factorize the input.
sigma2 (int or None): Variance of the noise on Y.
H (int or None): Maximum rank of the factorized matrices.
Returns:
U (numpy-array): Left-singular vectors.
S (numpy-array): Diagonal matrix of singular values.
V (numpy-array): Right-singular vectors.
(post dictionary): Dictionary containing the computed posterior values.
References:
[1] Nakajima, Shinichi, et al. "Global analytic solution of fully-observed variational Bayesian matrix factorization." Journal of Machine Learning Research 14.Jan (2013): 1-37.
[2] Nakajima, Shinichi, et al. "Perfect dimensionality recovery by variational Bayesian PCA." Advances in Neural Information Processing Systems. 2012.
"""
L,M = Y.shape # has to be L<=M
if H is None:
H = L
# SVD of the input matrix, max rank of H
U,s,V = np.linalg.svd(Y)
U = U[:,:H]
s = s[:H]
V = V[:H].T
# Calculate residual
residual = 0.
if H<L:
residual = np.sum(np.sum(Y**2)-np.sum(s**2))
# Estimation of the variance when sigma2 is unspecified
if sigma2 is None:
upper_bound = (np.sum(s**2)+ residual)/(L+M)
if L==H:
lower_bound = s[-1]**2/M
else:
lower_bound = residual/((L-H)*M)
sigma2_opt = minimize_scalar(VBsigma2, args=(L,M,cacb,s,residual), bounds=[lower_bound, upper_bound], method='bounded')
sigma2 = sigma2_opt.x
print("Estimated sigma2: ", sigma2)
# Threshold gamma term
# Formula above (21) from [1]
thresh_term = (L+M + sigma2/cacb**2)/2
threshold = np.sqrt( sigma2 * (thresh_term + np.sqrt(thresh_term**2 - L*M) ))
# Number of singular values where gamma>threshold
pos = np.sum(s>threshold)
# Formula (10) from [2]
d = np.multiply(s[:pos],
1 - np.multiply(sigma2/(2*s[:pos]**2),
L+M+np.sqrt( (M-L)**2 + 4*s[:pos]**2/cacb**2 )))
# Computation of the posterior
post = {}
zeta = sigma2/(2*L*M) * (L+M+sigma2/cacb**2 - np.sqrt((L+M+sigma2/cacb**2)**2 - 4*L*M))
post['ma'] = np.zeros(H)
post['mb'] = np.zeros(H)
post['sa2'] = cacb * (1-L*zeta/sigma2) * np.ones(H)
post['sb2'] = cacb * (1-M*zeta/sigma2) * np.ones(H)
delta = cacb/sigma2 * (s[:pos]-d- L*sigma2/s[:pos])
post['ma'][:pos] = np.sqrt(np.multiply(d, delta))
post['mb'][:pos] = np.sqrt(np.divide(d, delta))
post['sa2'][:pos] = np.divide(sigma2*delta, s[:pos])
post['sb2'][:pos] = np.divide(sigma2, np.multiply(delta, s[:pos]))
post['sigma2'] = sigma2
post['F'] = 0.5*(L*M*np.log(2*np.pi*sigma2) + (residual+np.sum(s**2))/sigma2 - (L+M)*H
+ np.sum(M*np.log(cacb/post['sa2']) + L*np.log(cacb/post['sb2'])
+ (post['ma']**2 + M*post['sa2'])/cacb + (post['mb']**2 + L*post['sb2'])/cacb
+ (-2 * np.multiply(np.multiply(post['ma'], post['mb']), s)
+ np.multiply(post['ma']**2 + M*post['sa2'],post['mb']**2 + L*post['sb2']))/sigma2))
return U[:,:pos], np.diag(d), V[:,:pos], post
[docs]
def VBsigma2(sigma2, L, M, cacb, s, residual):
"""
Computes the free energy of the Variational Bayesian Matrix Factorization (VBMF) model
to optimize the noise variance parameter.
Args:
sigma2 (float): Variance of the noise in the model.
L (int): Number of rows in the input matrix.
M (int): Number of columns in the input matrix.
cacb (float): Multiplicative factor derived from prior hyperparameters.
s (numpy.ndarray): Singular values of the matrix to factorize.
residual (float): Residual sum of squares from truncated singular values.
Returns:
float: Free energy of the VBMF model for the given noise variance.
"""
H = len(s)
thresh_term = (L + M + sigma2 / cacb ** 2) / 2
threshold = np.sqrt(sigma2 * (thresh_term + np.sqrt(thresh_term ** 2 - L * M)))
pos = np.sum(s > threshold)
d = np.multiply(s[:pos],
1 - np.multiply(sigma2 / (2 * s[:pos] ** 2),
L + M + np.sqrt((M - L) ** 2 + 4 * s[:pos] ** 2 / cacb ** 2)))
zeta = sigma2 / (2 * L * M) * (L + M + sigma2 / cacb ** 2 - np.sqrt((L + M + sigma2 / cacb ** 2) ** 2 - 4 * L * M))
post_ma = np.zeros(H)
post_mb = np.zeros(H)
post_sa2 = cacb * (1 - L * zeta / sigma2) * np.ones(H)
post_sb2 = cacb * (1 - M * zeta / sigma2) * np.ones(H)
delta = cacb / sigma2 * (s[:pos] - d - L * sigma2 / s[:pos])
post_ma[:pos] = np.sqrt(np.multiply(d, delta))
post_mb[:pos] = np.sqrt(np.divide(d, delta))
post_sa2[:pos] = np.divide(sigma2 * delta, s[:pos])
post_sb2[:pos] = np.divide(sigma2, np.multiply(delta, s[:pos]))
F = 0.5 * (L * M * np.log(2 * np.pi * sigma2) + (residual + np.sum(s ** 2)) / sigma2 - (L + M) * H
+ np.sum(M * np.log(cacb / post_sa2) + L * np.log(cacb / post_sb2)
+ (post_ma ** 2 + M * post_sa2) / cacb + (post_mb ** 2 + L * post_sb2) / cacb
+ (-2 * np.multiply(np.multiply(post_ma, post_mb), s)
+ np.multiply(post_ma ** 2 + M * post_sa2, post_mb ** 2 + L * post_sb2)) / sigma2))
return F
[docs]
def EVBMF(Y, sigma2=None, H=None):
"""
Analytical implementation of Empirical Variational Bayesian Matrix Factorization (VBMF).
Args:
Y (numpy.ndarray): Input matrix to factorize with shape (L, M), where L <= M.
sigma2 (float, optional): Noise variance. If None, it is estimated.
H (int, optional): Maximum rank of the factorized matrices. If None, set to L.
Returns:
tuple:
- U (numpy.ndarray): Left singular vectors of the factorized matrix.
- S (numpy.ndarray): Diagonal matrix of singular values.
- V (numpy.ndarray): Right singular vectors of the factorized matrix.
- post (dict): Dictionary containing posterior values including 'sigma2' and 'F'.
References:
- Nakajima, Shinichi, et al. "Global analytic solution of fully-observed variational
Bayesian matrix factorization." Journal of Machine Learning Research 14.Jan (2013).
"""
L, M = Y.shape # has to be L <= M
if H is None:
H = L
alpha = L / M
tauubar = 2.5129 * np.sqrt(alpha)
# SVD of the input matrix, max rank of H
U, s, V = np.linalg.svd(Y)
U = U[:, :H]
s = s[:H]
V = V[:H].T
# Calculate residual
residual = 0.
if H < L:
residual = np.sum(np.sum(Y ** 2) - np.sum(s ** 2))
# Estimation of the variance when sigma2 is unspecified
if sigma2 is None:
xubar = (1 + tauubar) * (1 + alpha / tauubar)
eH_ub = int(np.min([np.ceil(L / (1 + alpha)) - 1, H])) - 1
upper_bound = (np.sum(s ** 2) + residual) / (L * M)
lower_bound = np.max([s[eH_ub + 1] ** 2 / (M * xubar), np.mean(s[eH_ub + 1:] ** 2) / M])
scale = 1. # / lower_bound
s = s * np.sqrt(scale)
residual = residual * scale
lower_bound = lower_bound * scale
upper_bound = upper_bound * scale
sigma2_opt = minimize_scalar(EVBsigma2, args=(L, M, s, residual, xubar), bounds=[lower_bound, upper_bound], method='Bounded')
sigma2 = sigma2_opt.x *0.5
# Threshold gamma term
threshold = np.sqrt(M * sigma2 * (1 + tauubar) * (1 + alpha / tauubar))
pos = np.sum(s > threshold)
# Formula (15) from [2]
d = np.multiply(s[:pos] / 2, 1 - np.divide((L + M) * sigma2, s[:pos] ** 2) + np.sqrt((1 - np.divide((L + M) * sigma2, s[:pos] ** 2)) ** 2 - 4 * L * M * sigma2 ** 2 / s[:pos] ** 4))
# Computation of the posterior
post = {}
post['ma'] = np.zeros(H)
post['mb'] = np.zeros(H)
post['sa2'] = np.zeros(H)
post['sb2'] = np.zeros(H)
post['cacb'] = np.zeros(H)
tau = np.multiply(d, s[:pos]) / (M * sigma2)
delta = np.multiply(np.sqrt(np.divide(M * d, L * s[:pos])), 1 + alpha / tau)
post['ma'][:pos] = np.sqrt(np.multiply(d, delta))
post['mb'][:pos] = np.sqrt(np.divide(d, delta))
post['sa2'][:pos] = np.divide(sigma2 * delta, s[:pos])
post['sb2'][:pos] = np.divide(sigma2, np.multiply(delta, s[:pos]))
post['cacb'][:pos] = np.sqrt(np.multiply(d, s[:pos]) / (L * M))
post['sigma2'] = sigma2
post['F'] = 0.5 * (L * M * np.log(2 * np.pi * sigma2) + (residual + np.sum(s ** 2)) / sigma2
+ np.sum(M * np.log(tau + 1) + L * np.log(tau / alpha + 1) - M * tau))
return U[:, :pos], np.diag(d), V[:, :pos], post
[docs]
def EVBsigma2(sigma2, L, M, s, residual, xubar):
"""
Compute the objective function for optimizing the noise variance (sigma2) in Empirical Variational Bayesian Matrix Factorization (EVBMF).
Args:
sigma2 (float): Noise variance to optimize.
L (int): Number of rows in the input matrix.
M (int): Number of columns in the input matrix.
s (numpy.ndarray): Singular values of the matrix to factorize.
residual (float): Residual sum of squares from truncated singular values.
xubar (float): Threshold for singular values based on prior knowledge.
Returns:
float: Objective function value for the given noise variance.
"""
H = len(s)
alpha = L / M
x = s ** 2 / (M * sigma2)
z1 = x[x > xubar]
z2 = x[x <= xubar]
tau_z1 = tau(z1, alpha)
term1 = np.sum(z2 - np.log(z2))
term2 = np.sum(z1 - tau_z1)
term3 = np.sum(np.log(np.divide(tau_z1 + 1, z1)))
term4 = alpha * np.sum(np.log(tau_z1 / alpha + 1))
obj = term1 + term2 + term3 + term4 + residual / (M * sigma2) + (L - H) * np.log(sigma2)
return obj
[docs]
def phi0(x):
"""
Calculate the phi0 function for EVBMF.
Args:
x (float or numpy.ndarray): Input value(s).
Returns:
float or numpy.ndarray: Result of the phi0 calculation.
"""
return x - np.log(x)
[docs]
def phi1(x, alpha):
"""
Calculate the phi1 function for EVBMF.
Args:
x (float or numpy.ndarray): Input value(s).
alpha (float): Aspect ratio of the input matrix (L / M).
Returns:
float or numpy.ndarray: Result of the phi1 calculation.
"""
return np.log(tau(x, alpha) + 1) + alpha * np.log(tau(x, alpha) / alpha + 1) - tau(x, alpha)
[docs]
def tau(x, alpha):
"""
Compute the tau function, which is part of the EVBMF framework.
Args:
x (float or numpy.ndarray): Input value(s).
alpha (float): Aspect ratio of the input matrix (L / M).
Returns:
float or numpy.ndarray: Computed tau values.
"""
return 0.5 * (x - (1 + alpha) + np.sqrt((x - (1 + alpha)) ** 2 - 4 * alpha))