Columbia Physics System: src/util/dense_matrix/dense

00001 #include<config.h>
00002 CPS_START_NAMESPACE
00003 //--------------------------------------------------------------------
00004 //  CVS keywords
00005 //
00006 //      $Author: zs $
00007 //      $Date: 2004/08/18 11:57:47 $
00008 //      $Header: /space/cvs/cps/cps++/src/util/dense_matrix/dense_matrix.C,v 1.5 2004/08/18 11:57:47 zs Exp $
00009 //      $Id: dense_matrix.C,v 1.5 2004/08/18 11:57:47 zs Exp $
00010 //      $Name: v4_9_9 $
00011 //      $Locker:  $
00012 //      $Revision: 1.5 $
00013 //  $Source: /space/cvs/cps/cps++/src/util/dense_matrix/dense_matrix.C,v $
00014 //  $State: Exp $
00015 //
00016 //--------------------------------------------------------------------
00017 
00018 CPS_END_NAMESPACE
00019 #include<config.h>
00020 #include<util/dense_matrix.h>
00021 #include<util/data_types.h>
00022 #include<util/smalloc.h>
00023 CPS_START_NAMESPACE
00024 
00025 CPS_END_NAMESPACE
00026 #include<util/error.h>
00027 CPS_START_NAMESPACE
00028 
00029 
00030 const IFloat MAX_ERROR = 1e-7;
00031 
00032 //----------------------------------------------------------------------
00033 // Local auxiliary functions
00034 //----------------------------------------------------------------------
00035 inline IFloat abs(IFloat x) { return x > 0.0 ? x : -x;
00036 }
00037 
00038 //--------------------------------------------------------------------------
00039 // void mat_hrm_cmpr(IFloat *mat_out, const IFloat *mat_in, int mat_n);
00040 //--------------------------------------------------------------------------
00041 // Purpose:
00042 //   compress a hermitian matrix by storing only the lower triangular
00043 //   matrix elements in the same order.
00044 // Arguments:
00045 //   in:   n x n hermitian matrix allocated as IFloat[n*n*2]
00046 //   out:  compressed matrix allocated as IFloat[n*n]
00047 // Warning:
00048 //   in == out OK but never overlapped or embedded otherwise
00049 //--------------------------------------------------------------------------
00050 void mat_hrm_cmpr(IFloat *mat_out, const IFloat *mat_in, int n)
00051 {
00052   // copy mat_in and mat_out. TARTAN SEEMS SCREWED UP WHEN THE ARGUMENT
00053   // POINTERS ARE MODIFIED !!!
00054   //------------------------------------------------------------------------
00055   IFloat *out = mat_out;         
00056   const IFloat *in = mat_in;
00057   
00058   // copy 2*row+1 IFloating numbers per row.
00059   //------------------------------------------------------------------------
00060   for (int row = 0; row < n; ++row) {
00061     for (int real_number = 0; real_number <= 2*row; ++real_number) {
00062       *out++ = *in++;      
00063     }
00064     in += 2*(n-row) - 1;
00065   }
00066 }
00067 
00068 
00069 //--------------------------------------------------------------------------
00070 // void mat_hrm_decm(IFloat *mat_out, const IFloat *mat_in, int mat_n);
00071 //--------------------------------------------------------------------------
00072 // Purpose:
00073 //   restore a hermitian matrix from its lower triangular half.
00074 // Arguments:
00075 //   out:   n x n hermitian matrix allocated as IFloat[n*n*2]
00076 //   in:    compressed matrix allocated as IFloat[n*n]
00077 // Warning:
00078 //   in == out OK, but never overlapped or embeded otherwise
00079 //--------------------------------------------------------------------------
00080 void mat_hrm_decm(IFloat *mat_out, const IFloat *mat_in, int n)
00081 {
00082   // copy the lower trianglar half in a memory-perserving order.
00083   //------------------------------------------------------------------------
00084   {
00085     IFloat *out = mat_out+n*n*2-1;                   // end of mat_out
00086     const IFloat *in = mat_in+n*n-1;                 // end of mat_in
00087     for (int row = 0; row < n; ++row) {             // row is from bottom up
00088       out -= 2*row;                                 // *out is mat[i][i][im]
00089       *out-- = 0.0;                                 // set mat[i][i][im] = 0
00090       for (int element = 0; element < 2*(n-row)-1; ++element) {
00091         *out-- = *in--;                             // set mat[i][i][re] 
00092       }
00093     }
00094   }
00095   
00096   // set the upper triangular part from the lower part.
00097   //------------------------------------------------------------------------
00098   {
00099     for (int row = 1; row < n; ++row) {            // row is from top down
00100       IFloat *out = mat_out + 2*n*row;              // mat[row][0][re]
00101       for (int col = 0; col < row; ++col) {
00102         int delta = 2*(n-1)*(row-col);             // [i][j][0]-[j][i][0]
00103         *(out-delta) = *out;                       // A[j][i][0]
00104         ++out;
00105         *(out-delta) = - *out;                     // A[j][i][1]
00106         ++out;
00107       }
00108     }
00109   }
00110 
00111 }
00112 
00113 
00114 //--------------------------------------------------------------------------
00115 // void mat_hrm_ldl(IFloat *L, IFloat *D, const IFloat *A, int n);
00116 //--------------------------------------------------------------------------
00117 // Purpose:
00118 //     to decompose a hermitian n by n matrix A into L D L^dag 
00119 // Attention:
00120 //    1. All matrices are stored in compact format for time efficiency.
00121 //    2. A, D and L must be different from each other.
00122 // Arguments:
00123 //    A:  lower triangular half of a hermitian matrix. 
00124 //        Allocated as IFloat[n*n]
00125 //    D:  real diagonal matrix. allocated as Float[n]
00126 //    L:  unit lower-triangular matrix. allocated as Float[n*(n-1)]
00127 //--------------------------------------------------------------------------
00128 // Theorem:
00129 //
00130 // Reference:
00131 //  1. Matrix Computations, chapter 4 by Gene Golub and Charles Van Loan
00132 //  2. Molecular dynamics for full QCD simulations with an improved 
00133 //     action, by Xiang-Qian Luo, hep-lat/9603021
00134 // Algorithm:
00135 //   A = L D L^dag  
00136 //     ==> A_ij = L_ik D_k L_jk*             where  k = 0 .. j,   j <= i
00137 //     ==> A_ii = L_ik D_k L_ik* + D_i       where  k = 0 .. i-1
00138 //         A_ij = L_ik D_k L_jk* + L_ij D_j  where  k = 0 .. j-1, j < i
00139 //     ==> D_i      = A_ii - L_ik D_k L_ik*  where  k = 0 .. i-1
00140 //         L_ij D_j = A_ij - L_ik D_k L_jk*  where  k = 0 .. j-1, j < i
00141 //--------------------------------------------------------------------------
00142 void mat_hrm_ldl(IFloat *L, IFloat *D, const IFloat *A, int n)
00143 {
00144   Float *Lp = (Float *)L;  
00145   Float *Dp = (Float *)D;
00146   const Float *Ap = (const Float *)A;
00147   
00148 
00149   Float *Lij = Lp;                              // *Lij = L[i][j][0] always
00150   const Float *Aij = Ap;                        // *Aij = A[i][j][0] always
00151   for (int i = 0; i < n; ++i) {               
00152     for (int j = 0; j < i; ++j) {               // calculate L[i][j]
00153       Lij[0] = *Aij++;
00154       Lij[1] = *Aij++;
00155       Float *Lik = Lp + i * (i-1);              // *Lik = L[i][k][0]
00156       Float *Ljk = Lp + j * (j-1);              // *Ljk = L[j][k][0]
00157       for (int k = 0; k < j; ++k) {
00158         Lij[0] -= Dp[k] * (Lik[0] * Ljk[0] + Lik[1] * Ljk[1]);
00159         Lij[1] -= Dp[k] * (Lik[1] * Ljk[0] - Lik[0] * Ljk[1]);
00160         Lik += 2;
00161         Ljk += 2;       
00162       }
00163       *Lij++ /= Dp[j];                          // L[i][j][re] /= D[j]
00164       *Lij++ /= Dp[j];                          // L[i][j][im] /= D[j]
00165     }
00166     Dp[i] = *Aij++;                             // calculate D[i][i][0]
00167     Float *Lik = Lp + i * (i-1);                // *Lik = L[i][k][0]
00168     for (int k = 0; k < i; ++k) {
00169       Dp[i] -= Dp[k] * (Lik[0] * Lik[0] + Lik[1] * Lik[1]);
00170       Lik += 2;
00171     }
00172   }
00173 }
00174 
00175 
00176 //--------------------------------------------------------------------------
00177 // void mat_inv(IFloat *out, const IFloat *in, int n, MAT_INV_ALG alg, 
00178 //              IFloat *error_p);  
00179 //--------------------------------------------------------------------------
00180 // Purpose:
00181 //         B = 1/A.    in=out OK
00182 // Arguments:
00183 //     A,B:   n x n complex matrix          (allocated as Float[n][n][2]
00184 //     alg:   algorithm specified to solve the inverse
00185 //     err:   sum { |A * B - I| } 
00186 //            (the absolute value of all the matrix elements)
00187 //--------------------------------------------------------------------------
00188 // Algorithm LDL:
00189 //  A = L D L^dag     ==> A_ij = L_ik D_k L_jk*   where  k = 0..j,   j <= i  
00190 //  AB = I            ==> B = L^dag^inv D^inv L^inv
00191 //  To solve Y = 1/L:
00192 //        LY = I
00193 //        I[i][j] = L[i][k]Y[k][j]                  k = 0..n-1
00194 //                = Y[i][j] + L[i][k]Y[k][j]        k = 0..i-1
00195 //        Y[i][j] = delta_ij - L[i][k] Y[k][j]      k = 0..i-1
00196 //  To solve B = Y^dag D^inv Y:
00197 //        B[i][j] = Y[k][i]*  Y[k][j] / D[k]        bad!
00198 //        L^dag B = 1/D Y                           instead!
00199 //        ==>  L[k][i]*  B[k][j] = 1/D[i] Y[i][j]   k = 0..n-1
00200 //        ==>  B[i][j] = Y[i][j] / D[i] - L[k][i]* B[k][j],  k = i+1..n-1
00201 //--------------------------------------------------------------------------
00202 static void mat_inv_ldl_cmpr(Float *out, const Float *in, int n)
00203 {
00204   // allocate temporary buffer
00205   //------------------------------------------------------------------------
00206   Float *buf = (Float *)smalloc(n*(n+2)*sizeof(*buf));
00207   Float *const Y = buf;           
00208   Float *const D = Y + n*2;
00209   Float *const L = D + n;
00210 
00211   // decompose the hermitian matrix into in = L D L^dag
00212   //------------------------------------------------------------------------
00213   mat_hrm_ldl((IFloat *)L, (IFloat *)D, (IFloat *)in, n);
00214 
00215   // get the inverse matrix.
00216   //------------------------------------------------------------------------
00217   for (int j=0; j<n; ++j) {
00218     int i;
00219     // calculate Y[i,j] = delta_ij - L[i,k] Y[k,j]  where k = 0..i-1
00220     //----------------------------------------------------------------------
00221     for (i=0; i<n; ++i) {
00222       Float *Lik = L + i*(i-1); 
00223       Y[2*i] = (i==j ? 1.0 : 0.0);
00224       Y[2*i+1] = 0.0;
00225       for (int k = 0; k<i; ++k, Lik+=2) {
00226         Y[2*i] -= Lik[0] * Y[2*k] - Lik[1] * Y[2*k+1];
00227         Y[2*i+1] -= Lik[0] * Y[2*k+1] + Lik[1] * Y[2*k];
00228       }
00229     }
00230     // calculate B[i][j] = Y[i][j] / D[i] - L[k][i]* B[k][j],  k = i+1..n-1
00231     //----------------------------------------------------------------------
00232     for (i = n-1; i>=j; --i) {
00233       Float re = Y[2*i] / D[i];
00234       Float im = Y[2*i+1] / D[i];      
00235       for (int k=i+1; k<n; ++k) {
00236         Float *Bkj = out + k*k + 2*j;                
00237         Float *Lki = L + k*(k-1) + 2*i;
00238         re -= Lki[0] * Bkj[0] + Lki[1] * Bkj[1];
00239         im -= Lki[0] * Bkj[1] - Lki[1] * Bkj[0];
00240       }
00241       Float *Bij = out + i*i + 2*j;
00242       Bij[0] = re;
00243       if (i!=j) {
00244         Bij[1] = im;
00245       } 
00246     }
00247   }
00248   sfree(buf); 
00249 }
00250 
00251 //--------------------------------------------------------------------------
00252 //void mat_inv(IFloat *out, const IFloat *in, int n, MAT_INV_ALG alg, IFloat *)
00253 //--------------------------------------------------------------------------
00254 void mat_inv(IFloat *out, const IFloat *in, int n, 
00255              MAT_INV_ALG alg, IFloat *err) 
00256 {
00257   const Float *A = (const Float*)in;
00258   Float *B = (Float *)out;  
00259 
00260   switch (alg) {
00261   case MAT_INV_ALG_LDL_CMPR:     
00262     mat_inv_ldl_cmpr(B, A, n);
00263     break;
00264   case MAT_INV_ALG_LDL:
00265     mat_hrm_cmpr(out, in, n);                
00266     mat_inv_ldl_cmpr(B, B, n);
00267     mat_hrm_decm(out, out, n);
00268     break;
00269   default:
00270     break;
00271   }
00272 
00273   if (err) {   
00274     Float & error = *(Float *)err;    
00275     error = 0.0;
00276     for (int i = 0; i < n; ++i) {
00277       for (int k = 0; k < n; ++k) {
00278         Float re = (i == k ? -1.0 : 0.0);
00279         Float im = 0.0;
00280         for (int j = 0; j < n; ++j) {
00281           const Float *Bjk = B;
00282           const Float *Aij = A;   
00283           switch (alg) {
00284           case MAT_INV_ALG_LDL_CMPR:     
00285             Bjk += j*j + 2*k;
00286             Aij += i*i + 2*j;       
00287             break;
00288           case MAT_INV_ALG_LDL:
00289             Bjk += 2*(n*j+k);
00290             Aij += 2*(n*i+j);       
00291             break;
00292           default:
00293             break;
00294           }
00295           re += Aij[0] * Bjk[0] - Aij[1] * Bjk[1];        
00296           im += Aij[0] * Bjk[1] + Aij[1] * Bjk[0];
00297         }
00298         error += abs(re) + abs(im);       
00299       }
00300     }
00301     if (error > MAX_ERROR)
00302       ERR.General("", "", "Lapack failed to invert.\n");    
00303   }
00304 }
00305 
00306 CPS_END_NAMESPACE