Rocstar  1.0
Rocstar multiphysics simulation application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
inks/MsqHessian.cpp
Go to the documentation of this file.
1 /* *****************************************************************
2  MESQUITE -- The Mesh Quality Improvement Toolkit
3 
4  Copyright 2004 Sandia Corporation and Argonne National
5  Laboratory. Under the terms of Contract DE-AC04-94AL85000
6  with Sandia Corporation, the U.S. Government retains certain
7  rights in this software.
8 
9  This library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU Lesser General Public
11  License as published by the Free Software Foundation; either
12  version 2.1 of the License, or (at your option) any later version.
13 
14  This library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public License
20  (lgpl.txt) along with this library; if not, write to the Free Software
21  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 
23  diachin2@llnl.gov, djmelan@sandia.gov, mbrewer@sandia.gov,
24  pknupp@sandia.gov, tleurent@mcs.anl.gov, tmunson@mcs.anl.gov
25 
26  ***************************************************************** */
27 // -*- Mode : c++; tab-width: 3; c-tab-always-indent: t; indent-tabs-mode: nil; c-basic-offset: 3 -*-
28 //
29 // AUTHOR: Todd Munson <tmunson@mcs.anl.gov>
30 // ORG: Argonne National Laboratory
31 // E-MAIL: tmunson@mcs.anl.gov
32 //
33 // ORIG-DATE: 2-Jan-03 at 11:02:19 by Thomas Leurent
34 // LAST-MOD: 26-Nov-03 at 15:47:42 by Thomas Leurent
35 //
36 // DESCRIPTION:
37 // ============
44 #include "MsqHessian.hpp"
45 #include "MsqTimer.hpp"
46 
47 
48 #ifdef MSQ_USE_OLD_C_HEADERS
49 # include <math.h>
50 #else
51 # include <cmath>
52 #endif
53 
54 #ifdef MSQ_USE_OLD_IO_HEADERS
55 # include <iostream.h>
56 #else
57 # include <iostream>
58 #endif
59 
60 namespace Mesquite {
61 
63  origin_pd(0), mEntries(0), mRowStart(0), mColIndex(0),
64  mAccumulation(0), mAccumElemStart(0), mSize(0),
65  mPreconditioner(0), precondArraySize(0),
66  mR(0), mZ(0), mP(0), mW(0), cgArraySizes(0), maxCGiter(50)
67 { }
68 
69 
70 MsqHessian::~MsqHessian()
71 {
72  delete[] mEntries;
73  delete[] mRowStart;
74  delete[] mColIndex;
75 
76  delete[] mAccumulation;
77  delete[] mAccumElemStart;
78 
79  delete[] mPreconditioner;
80 
81  delete[] mR;
82  delete[] mZ;
83  delete[] mP;
84  delete[] mW;
85 }
86 
87 
91 void MsqHessian::initialize(PatchData &pd, MsqError &err)
92 {
93  MSQ_FUNCTION_TIMER( "MsqHession::initialize" );
94  delete[] mEntries;
95  delete[] mRowStart;
96  delete[] mColIndex;
97  delete[] mAccumulation;
98  delete[] mAccumElemStart;
99 
100  size_t num_vertices = pd.num_vertices();
101  size_t num_elements = pd.num_elements();
102  size_t const * vtx_list;
103  size_t e, r, rs, re, c, cs, ce, nz, nnz, nve, i, j;
104  patchElemArray = pd.get_element_array(err); MSQ_CHKERR(err);
105 
106  if (num_vertices == 0) {
107  MSQ_SETERR( err )( "No vertices in PatchData", MsqError::INVALID_ARG);
108  return;
109  }
110 
111  mSize = num_vertices;
112 
113  // Calculate the offsets for a CSC representation of the accumulation
114  // pattern.
115 
116  size_t* col_start = new size_t[num_vertices + 1];
117  mAccumElemStart = new size_t[num_elements+1];
118  mAccumElemStart[0] = 0;
119 
120  for (i = 0; i < num_vertices; ++i) {
121  col_start[i] = 0;
122  }
123 
124  for (e = 0; e < num_elements; ++e) {
125  nve = patchElemArray[e].vertex_count();
126  vtx_list = patchElemArray[e].get_vertex_index_array();
127  mAccumElemStart[e+1] = mAccumElemStart[e] + (nve+1)*nve/2;
128 
129  for (i = 0; i < nve; ++i) {
130  r = vtx_list[i];
131 
132  for (j = i; j < nve; ++j) {
133  c = vtx_list[j];
134 
135  if (r <= c) {
136  col_start[c]++;
137  }
138  else {
139  col_start[r]++;
140  }
141  }
142  }
143  }
144 
145  nz = 0;
146  for (i = 0; i < num_vertices; ++i) {
147  j = col_start[i];
148  col_start[i] = nz;
149  nz += j;
150  }
151  col_start[i] = nz;
152 
153  // Finished putting matrix into CSC representation
154 
155  int* row_instr = new int[5*nz];
156  size_t* row_index = new size_t[nz];
157 
158  nz = 0;
159  for (e = 0; e < num_elements; ++e) {
160  nve = patchElemArray[e].vertex_count();
161  vtx_list = patchElemArray[e].get_vertex_index_array();
162 
163  for (i = 0; i < nve; ++i) {
164  r = vtx_list[i];
165 
166  for (j = i; j < nve; ++j) {
167  c = vtx_list[j];
168 
169  if (r <= c) {
170  row_index[col_start[c]] = r;
171  row_instr[col_start[c]] = nz;
172  ++col_start[c];
173  }
174  else {
175  row_index[col_start[r]] = c;
176  row_instr[col_start[r]] = -nz;
177  ++col_start[r];
178  }
179 
180  ++nz;
181  }
182  }
183  }
184 
185  for (i = num_vertices-1; i > 0; --i) {
186  col_start[i+1] = col_start[i];
187  }
188  col_start[1] = col_start[0];
189  col_start[0] = 0;
190 
191  // cout << "col_start: ";
192  // for (int t=0; t<num_vertices+1; ++t)
193  // cout << col_start[t] << " ";
194  // cout << endl;
195  // cout << "row_index: ";
196  // for (int t=0; t<nz; ++t)
197  // cout << row_index[t] << " ";
198  // cout << endl;
199  // cout << "row_instr: ";
200  // for (int t=0; t<nz; ++t)
201  // cout << row_instr[t] << " ";
202  // cout << endl;
203 
204 
205  // Convert CSC to CSR
206  // First calculate the offsets in the row
207 
208  size_t* row_start = new size_t[num_vertices + 1];
209 
210  for (i = 0; i < num_vertices; ++i) {
211  row_start[i] = 0;
212  }
213 
214  for (i = 0; i < nz; ++i) {
215  ++row_start[row_index[i]];
216  }
217 
218  nz = 0;
219  for (i = 0; i < num_vertices; ++i) {
220  j = row_start[i];
221  row_start[i] = nz;
222  nz += j;
223  }
224  row_start[i] = nz;
225 
226  // Now calculate the pattern
227 
228  size_t* col_index = new size_t[nz];
229  int* col_instr = new int[nz];
230 
231  for (i = 0; i < num_vertices; ++i) {
232  cs = col_start[i];
233  ce = col_start[i+1];
234 
235  while(cs < ce) {
236  r = row_index[cs];
237 
238  col_index[row_start[r]] = i;
239  col_instr[row_start[r]] = row_instr[cs];
240 
241  ++row_start[r];
242  ++cs;
243  }
244  }
245 
246  for (i = num_vertices-1; i > 0; --i) {
247  row_start[i+1] = row_start[i];
248  }
249  row_start[1] = row_start[0];
250  row_start[0] = 0;
251 
252  delete[] row_index;
253 
254  // Now that the matrix is CSR
255  // Column indices for each row are sorted
256 
257  // Compaction -- count the number of nonzeros
258  mRowStart = col_start; // don't need to reallocate
259  mAccumulation = row_instr; // don't need to reallocate
260 
261  for (i = 0; i <= num_vertices; ++i) {
262  mRowStart[i] = 0;
263  }
264 
265  nnz = 0;
266  for (i = 0; i < num_vertices; ++i) {
267  rs = row_start[i];
268  re = row_start[i+1];
269 
270  c = num_vertices;
271  while(rs < re) {
272  if (c != col_index[rs]) {
273  // This is an unseen nonzero
274 
275  c = col_index[rs];
276  ++mRowStart[i];
277  ++nnz;
278  }
279 
280  if (col_instr[rs] >= 0) {
281  mAccumulation[col_instr[rs]] = nnz - 1;
282  }
283  else {
284  mAccumulation[-col_instr[rs]] = 1 - nnz;
285  }
286 
287  ++rs;
288  }
289  }
290 
291  nnz = 0;
292  for (i = 0; i < num_vertices; ++i) {
293  j = mRowStart[i];
294  mRowStart[i] = nnz;
295  nnz += j;
296  }
297  mRowStart[i] = nnz;
298 
299  delete [] col_instr;
300 
301  // Fill in the compacted hessian matrix
302 
303  mColIndex = new size_t[nnz];
304 
305  for (i = 0; i < num_vertices; ++i) {
306  rs = row_start[i];
307  re = row_start[i+1];
308 
309  c = num_vertices;
310  while(rs < re) {
311  if (c != col_index[rs]) {
312  // This is an unseen nonzero
313 
314  c = col_index[rs];
315  mColIndex[mRowStart[i]] = c;
316  mRowStart[i]++;
317  }
318  ++rs;
319  }
320  }
321 
322  for (i = num_vertices-1; i > 0; --i) {
323  mRowStart[i+1] = mRowStart[i];
324  }
325  mRowStart[1] = mRowStart[0];
326  mRowStart[0] = 0;
327 
328  delete [] row_start;
329  delete [] col_index;
330 
331  mEntries = new Matrix3D[nnz]; // On Solaris, no initializer allowed for new of an array
332  for (i=0;i<nnz;++i) mEntries[i] = 0.; // so we initialize all entries manually.
333 
334  origin_pd = &pd;
335 
336  return;
337 }
338 
339 
341 void MsqHessian::get_diagonal_blocks(msq_std::vector<Matrix3D> &diag,
342  MsqError &/*err*/)
343 {
344  // make sure we have enough memory, so that no reallocation is needed later.
345  if (diag.size() != size()) {
346  diag.reserve(size());
347  }
348 
349  for (size_t i=0; i<size(); ++i) {
350  diag[i] = mEntries[mRowStart[i]];
351  }
352 }
353 
354 
358 void MsqHessian::compute_preconditioner(MsqError &/*err*/)
359 {
360  // reallocates arrays if size of the Hessian has changed too much.
361  if (mSize > precondArraySize || mSize < precondArraySize/10 ) {
362  delete[] mPreconditioner;
363  mPreconditioner = new Matrix3D[mSize];
364  }
365 
366  Matrix3D* diag_block;
367  double sum, tmp;
368  size_t m;
369  // For each diagonal block, the (inverted) preconditioner is
370  // the inverse of the sum of the diagonal entries.
371  for (m=0; m<mSize; ++m) {
372  diag_block = mEntries + mRowStart[m]; // Gets block at position m,m .
373 
374 #if DIAGONAL_PRECONDITIONER
375  // find sum, and computes inverse, or 0 if sum = 0 .
376  sum = (*diag_block)[0][0] + (*diag_block)[1][1] + (*diag_block)[2][2];
377  double inv_sum;
378  if (sum != 0.)
379  inv_sum = 1 / sum;
380  else
381  inv_sum = 0.;
382 
383  mPreconditioner[m][0][0] = inv_sum;
384  mPreconditioner[m][1][1] = inv_sum;
385  mPreconditioner[m][2][2] = inv_sum;
386 #else
387  // calculate LDL^T factorization of the diagonal block
388  // L = [1 pre[0][1] pre[0][2]]
389  // [0 1 pre[1][2]]
390  // [0 0 1 ]
391  // inv(D) = [pre[0][0] 0 0 ]
392  // [0 pre[1][1] 0 ]
393  // [0 0 pre[2][2]]
394 
395  if ((*diag_block)[0][0] == 0.0) {
396  // Either this is a fixed vertex or the diagonal block is not
397  // invertible. Switch to the diagonal preconditioner in this
398  // case.
399 
400  sum = (*diag_block)[0][0] + (*diag_block)[1][1] + (*diag_block)[2][2];
401  if (sum != 0.0)
402  sum = 1 / sum;
403 
404  mPreconditioner[m][0][0] = sum;
405  mPreconditioner[m][0][1] = 0.0;
406  mPreconditioner[m][0][2] = 0.0;
407  mPreconditioner[m][1][1] = sum;
408  mPreconditioner[m][1][2] = 0.0;
409  mPreconditioner[m][2][2] = sum;
410  }
411  else {
412  mPreconditioner[m][0][0] = 1.0 / (*diag_block)[0][0];
413  mPreconditioner[m][0][1] = (*diag_block)[0][1] * mPreconditioner[m][0][0];
414  mPreconditioner[m][0][2] = (*diag_block)[0][2] * mPreconditioner[m][0][0];
415 
416  mPreconditioner[m][1][1] =
417  1.0 / ((*diag_block)[1][1] -
418  (*diag_block)[0][1] * mPreconditioner[m][0][1]);
419 
420  tmp = (*diag_block)[1][2] -
421  (*diag_block)[0][2] * mPreconditioner[m][0][1];
422 
423  mPreconditioner[m][1][2] = mPreconditioner[m][1][1] * tmp;
424 
425  mPreconditioner[m][2][2] =
426  1.0 / ((*diag_block)[2][2] -
427  (*diag_block)[0][2]*mPreconditioner[m][0][2] -
428  mPreconditioner[m][1][2]*tmp);
429  }
430 #endif
431  }
432 }
433 
434 
440 void MsqHessian::cg_solver(Vector3D x[], Vector3D b[], MsqError &err)
441 {
442  MSQ_FUNCTION_TIMER( "MsqHessian::cg_solver" );
443 
444  // reallocates arrays if size of the Hessian has changed too much.
445  if (mSize > cgArraySizes || mSize < cgArraySizes/10 ) {
446  delete[] mR;
447  delete[] mZ;
448  delete[] mP;
449  delete[] mW;
450  mR = new Vector3D[mSize];
451  mZ = new Vector3D[mSize];
452  mP = new Vector3D[mSize];
453  mW = new Vector3D[mSize];
454  cgArraySizes = mSize;
455  }
456 
457  size_t i;
458  double alpha_, alpha, beta;
459  double cg_tol = 1e-2; // 1e-2 will give a reasonably good solution (~1%).
460  double norm_g = length(b, mSize);
461  double norm_r = norm_g;
462  double rzm1; // r^T_{k-1} z_{k-1}
463  double rzm2; // r^T_{k-2} z_{k-2}
464 
465  this->compute_preconditioner(err); MSQ_CHKERR(err); // get M^{-1} for diagonal blocks
466 
467  for (i=0; i<mSize; ++i) x[i] = 0. ;
468  for (i=0; i<mSize; ++i) mR[i] = -b[i] ; // r = -b because x_0 = 0 and we solve H*x = -b
469  norm_g *= cg_tol;
470 
471  this->apply_preconditioner(mZ, mR, err); // solve Mz = r (computes z = M^-1 r)
472  for (i=0; i<mSize; ++i) mP[i] = mZ[i] ; // p_1 = z_0
473  rzm1 = inner(mZ,mR,mSize); // inner product r_{k-1}^T z_{k-1}
474 
475  size_t cg_iter = 0;
476  while ((norm_r > norm_g) && (cg_iter < maxCGiter)) {
477  ++cg_iter;
478 
479  axpy(mW, mSize, *this, mP, mSize, 0,0,err); // w = A * p_k
480 
481  alpha_ = inner(mP,mW,mSize); // alpha_ = p_k^T A p_k
482  if (alpha_ <= 0.0) {
483  if (1 == cg_iter) {
484  for (i=0; i<mSize; ++i) x[i] += mP[i]; // x_{k+1} = x_k + p_{k+1}
485  }
486  break; // Newton goes on with this direction of negative curvature
487  }
488 
489  alpha = rzm1 / alpha_;
490 
491  for (i=0; i<mSize; ++i) x[i] += alpha*mP[i]; // x_{k+1} = x_k + alpha_{k+1} p_{k+1}
492  for (i=0; i<mSize; ++i) mR[i] -= alpha*mW[i]; // r_{k+1} = r_k - alpha_{k+1} A p_{k+1}
493  norm_r = length(mR, mSize);
494 
495  this->apply_preconditioner(mZ, mR, err); // solve Mz = r (computes z = M^-1 r)
496 
497  rzm2 = rzm1;
498  rzm1 = inner(mZ,mR,mSize); // inner product r_{k-1}^T z_{k-1}
499  beta = rzm1 / rzm2;
500  for (i=0; i<mSize; ++i) mP[i] = mZ[i] + beta*mP[i]; // p_k = z_{k-1} + Beta_k * p_{k-1}
501  }
502 }
503 
504 /* ------------------ I/O ----------------- */
505 
507 msq_stdio::ostream& operator<<(msq_stdio::ostream &s, const MsqHessian &h)
508 {
509  size_t i,j;
510  s << "MsqHessian of size: " << h.mSize <<"x"<< h.mSize << "\n";
511  for (i=0; i<h.mSize; ++i) {
512  s << " ROW " << i << " ------------------------\n";
513  for (j=h.mRowStart[i]; j<h.mRowStart[i+1]; ++j) {
514  s << " column " << h.mColIndex[j] << " ----\n";
515  s << h.mEntries[j];
516  }
517  }
518  return s;
519 }
520 
521 } // namespace Mesquite
522 
subroutine rs(nm, n, a, w, matz, z, fv1, fv2, ierr)
void axpy(Vector3D res[], size_t size_r, const MsqHessian &H, const Vector3D x[], size_t size_x, const Vector3D y[], size_t size_y, MsqError &)
double s
Definition: blastest.C:80
double length(Vector3D *const v, int n)
NVec< 3, double > Vector3D
#define MSQ_CHKERR(err)
Mesquite&#39;s Error Checking macro.
#define MSQ_SETERR(err)
Macro to set error - use err.clear() to clear.
blockLoc i
Definition: read.cpp:79
void int int REAL * x
Definition: read.cpp:74
j indices j
Definition: Indexing.h:6
std::ostream & operator<<(std::ostream &os, const COM_exception &ex)
Print out a given exception.
double inner(const Vector3D lhs[], const Vector3D rhs[], int n)