/* Copyright (C) 1999, 2000, 2001, 2002, Massachusetts Institute of Technology.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "../config.h"
#include <check.h>
#include "matrices.h"
#include "blasglue.h"
/* Simple operations on sqmatrices. Also, some not-so-simple operations,
like inversion and eigenvalue decomposition. */
/* A = B */
void sqmatrix_copy(sqmatrix A, sqmatrix B)
{
CHECK(A.p == B.p, "arrays not conformant");
blasglue_copy(A.p * A.p, B.data, 1, A.data, 1);
}
/* Resize A from its current size to a pxp matrix, assuming that
A was initially allocated to hold at least this big a matrix.
If preserve_data is nonzero, copies the existing data in A (or
a subset of it, if the matrix is shrinking) to the corresponding
entries of the resized matrix. */
void sqmatrix_resize(sqmatrix *A, int p, short preserve_data)
{
CHECK(p <= A->alloc_p, "tried to resize beyond allocated limit");
if (preserve_data) {
int i, j;
if (p < A->p) {
for (i = 0; i < p; ++i)
for (j = 0; j < p; ++j)
A->data[i*p + j] = A->data[i*A->p + j];
}
else {
for (i = A->p-1; i >= 0; --i)
for (j = A->p-1; j >= 0; --j)
A->data[i*p + j] = A->data[i*A->p + j];
}
}
A->p = p;
}
/* U contains the upper triangle of a Hermitian matrix; we copy this
to F and also fill in the lower triangle with the adjoint of the upper. */
void sqmatrix_copy_upper2full(sqmatrix F, sqmatrix U)
{
int i, j;
CHECK(F.p == U.p, "arrays not conformant");
for (i = 0; i < U.p; ++i) {
for (j = 0; j < i; ++j) {
ASSIGN_CONJ(F.data[i*U.p + j], U.data[j*U.p + i]);
}
for (; j < U.p; ++j)
F.data[i*U.p + j] = U.data[i*U.p + j];
}
}
/* Asym = (A + adjoint(A)) / 2. Asym is thus Hermitian. */
void sqmatrix_symmetrize(sqmatrix Asym, sqmatrix A)
{
int i, j;
CHECK(Asym.p == A.p, "arrays not conformant");
for (i = 0; i < A.p; ++i)
for (j = 0; j < A.p; ++j) {
int ij = i * A.p + j, ji = j * A.p + i;
ASSIGN_SCALAR(Asym.data[ij],
0.5 * (SCALAR_RE(A.data[ij]) +
SCALAR_RE(A.data[ji])),
0.5 * (SCALAR_IM(A.data[ij]) -
SCALAR_IM(A.data[ji])));
}
}
/* trace(U) */
scalar sqmatrix_trace(sqmatrix U)
{
int i;
scalar trace = SCALAR_INIT_ZERO;
for (i = 0; i < U.p; ++i)
ACCUMULATE_SUM(trace, U.data[i*U.p + i]);
return trace;
}
/* compute trace(adjoint(A) * B) */
scalar sqmatrix_traceAtB(sqmatrix A, sqmatrix B)
{
scalar trace;
CHECK(A.p == B.p, "matrices not conformant");
trace = blasglue_dotc(A.p * A.p, A.data, 1, B.data, 1);
return trace;
}
/* A = B * C. If bdagger != 0, then adjoint(B) is used; similarly for C.
A must be distinct from B and C. Note that since the matrices
are stored in row-major order, the most efficient operation should
be B * adjoint(C), assuming the BLAS is sane. i.e. if C is hermitian,
you should use cdagger = 1. Conversely, the worst operation is
probably adjoint(B) * C. */
void sqmatrix_AeBC(sqmatrix A, sqmatrix B, short bdagger,
sqmatrix C, short cdagger)
{
CHECK(A.p == B.p && A.p == C.p, "matrices not conformant");
blasglue_gemm(bdagger ? 'C' : 'N', cdagger ? 'C' : 'N', A.p, A.p, A.p,
1.0, B.data, B.p, C.data, C.p, 0.0, A.data, A.p);
}
/* A += a B * C. bdagger, cdagger are as for sqmatrix_AeBC, above. */
void sqmatrix_ApaBC(sqmatrix A, real a, sqmatrix B, short bdagger,
sqmatrix C, short cdagger)
{
CHECK(A.p == B.p && A.p == C.p, "matrices not conformant");
blasglue_gemm(bdagger ? 'C' : 'N', cdagger ? 'C' : 'N', A.p, A.p, A.p,
a, B.data, B.p, C.data, C.p, 1.0, A.data, A.p);
}
/* A += a B */
void sqmatrix_ApaB(sqmatrix A, real a, sqmatrix B)
{
CHECK(A.p == B.p, "matrices not conformant");
blasglue_axpy(A.p * A.p, a, B.data, 1, A.data, 1);
}
/* compute A = a*A + b*B; A and B may be equal. */
void sqmatrix_aApbB(real a, sqmatrix A, real b, sqmatrix B)
{
CHECK(A.p == B.p, "arrays not conformant");
if (a != 1.0)
blasglue_rscal(A.p * A.p, a, A.data, 1);
blasglue_axpy(A.p * A.p, b, B.data, 1, A.data, 1);
}
/* U <- 1/U. U must be Hermitian and, if positive_definite != 0,
positive-definite (e.g. U = Yt*Y). Work is a scratch matrix. */
void sqmatrix_invert(sqmatrix U, short positive_definite,
sqmatrix Work)
{
int i, j;
if (positive_definite) {
/* factorize U: */
lapackglue_potrf('U', U.p, U.data, U.p);
/* QUESTION: would it be more efficient to stop here,
returning the Cholesky factorization of U? This
could then be used to multiply by 1/U without
ever calculating the inverse explicitely. It
would probably be more numerically stable, but
how do the computational costs compare? */
/* Compute 1/U (upper half only) */
lapackglue_potri('U', U.p, U.data, U.p);
}
else {
int *ipiv;
CHK_MALLOC(ipiv, int, U.p);
CHECK(Work.p * Work.p >= U.p, "scratch matrix is too small");
/* factorize U: */
lapackglue_hetrf('U', U.p, U.data, U.p,
ipiv, Work.data, Work.p * Work.p);
/* Compute 1/U (upper half only) */
lapackglue_hetri('U', U.p, U.data, U.p, ipiv, Work.data);
free(ipiv);
}
/* Now, copy the conjugate of the upper half
onto the lower half of U */
for (i = 0; i < U.p; ++i)
for (j = i + 1; j < U.p; ++j) {
ASSIGN_CONJ(U.data[j * U.p + i], U.data[i * U.p + j]);
}
}
/* U <- eigenvectors of U. U must be Hermitian. eigenvals <- eigenvalues.
W is a work array. The columns of adjoint(U') are the eigenvectors, so that
U == adjoint(U') D U', with D = diag(eigenvals).
The eigenvalues are returned in ascending order. */
void sqmatrix_eigensolve(sqmatrix U, real *eigenvals, sqmatrix W)
{
real *work;
CHK_MALLOC(work, real, 3*U.p - 2);
if (W.p * W.p >= 3 * U.p - 1)
lapackglue_heev('V', 'U', U.p, U.data, U.p, eigenvals,
W.data, W.p * W.p, work);
else {
scalar *morework;
CHK_MALLOC(morework, scalar, 3 * U.p - 1);
lapackglue_heev('V', 'U', U.p, U.data, U.p, eigenvals,
morework, 3 * U.p - 1, work);
free(morework);
}
free(work);
}
/* Compute Usqrt <- sqrt(U), where U must be Hermitian positive-definite.
W is a work array, and must be the same size as U. Both U and
W are overwritten. */
void sqmatrix_sqrt(sqmatrix Usqrt, sqmatrix U, sqmatrix W)
{
real *eigenvals;
CHECK(Usqrt.p == U.p && U.p == W.p, "matrices not conformant");
CHK_MALLOC(eigenvals, real, U.p);
sqmatrix_eigensolve(U, eigenvals, W);
{
int i;
/* Compute W = diag(sqrt(eigenvals)) * U; i.e. the rows of W
become the rows of U times sqrt(corresponding eigenvalue) */
for (i = 0; i < U.p; ++i) {
CHECK(eigenvals[i] > 0, "non-positive eigenvalue");
blasglue_copy(U.p, U.data + i*U.p, 1, W.data + i*U.p, 1);
blasglue_rscal(U.p, sqrt(eigenvals[i]), W.data + i*U.p, 1);
}
}
free(eigenvals);
/* compute Usqrt = Ut * W */
sqmatrix_AeBC(Usqrt, U, 1, W, 0);
}
syntax highlighted by Code2HTML, v. 0.9.1