MoDeNa  1.0
Software framework facilitating sequential multi-scale modelling
dagmg.f90
1 ! This file is part of AGMG software package,
2 ! Release 3.1.1 built on "Nov 23 2011"
3 !
4 ! AGMG is free software: you can redistribute it and/or modify
5 ! it under the terms of the GNU General Public License as published by
6 ! the Free Software Foundation, either version 3 of the License, or
7 ! (at your option) any later version.
8 !
9 ! AGMG is distributed in the hope that it will be useful,
10 ! but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ! GNU General Public License for more details.
13 !
14 ! You should have received a copy of the GNU General Public License
15 ! along with AGMG. If not, see <http://www.gnu.org/licenses/>.
16 !
17 ! Up-to-date copies of the AGMG package can be obtained
18 ! from the Web pages <http://homepages.ulb.ac.be/~ynotay/AGMG>.
19 !
20 ! You can acknowledge, citing references [1] [2], and [3], the contribution
21 ! of this package in any scientific publication dependent upon it use.
22 !
23 ! [1] Y. Notay, An aggregation-based algebraic multigrid method,
24 ! Electronic Transactions on Numerical Analysis, vol. 37, pp. 123-146, 2010
25 !
26 ! [2] A. Napov and Y. Notay, An algebraic multigrid method with guaranteed
27 ! convergence rate, Report GANMN 10-03, Universite Libre de Bruxelles,
28 ! Brussels, Belgium, 2010.
29 !
30 ! [3] Y. Notay, Aggregation-based algebraic multigrid for convection-diffusion
31 ! equations, Report GANMN 11-01, Universite Libre de Bruxelles, Brussels,
32 ! Belgium, 2011.
33 !
34 ! See the accompanying userguide for more details on how to use the software,
35 ! and the README file for installation instructions.
36 !
37 ! AGMG Copyright (C) 2011 Yvan NOTAY
38 !
39 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
40 !
41 ! This file provides dagmg and dependencies; dagmg is a sequential
42 ! implementation for real matrices in double precision
43 ! of the method presented in [1], where the used algorithms are described
44 ! in detail. From realease 3.x, the coarsening has been modified according
45 ! to the results in [2,3], see the release notes in the README file
46 ! for more details.
47 !
48 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
49 !!!!!!!!! SUBROUTINE dagmg (MAIN DRIVER): see bottom of this file
50 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
51 !-----------------------------------------------------------------------
52 !!!!!!!!!!!!!!!!!!! PARAMETERS DEFINITON - may be tuned by expert users
53 !-----------------------------------------------------------------------
54  MODULE dagmg_mem
55  SAVE
56 !
57 ! INTEGER
58 !
59 ! maxlev is the maximal number of levels
60 ! (should be large enough - much larger than needed is armless).
61 ! real_len is the length of 1 REAL(kind(0.0d0)) in byte
62 ! (used only to display information on memory usage).
63 ! nsmooth is the number of pre- and post-smoothing steps;
64 ! smoothtype indicates which smoother use:
65 ! if smoothtype==1, the smoothing scheme is
66 ! pre-smoothing: Forward Gauss-Seidel, then Backward GS, then Fwd GS,etc
67 ! post-smoothing: Backward Gauss-Seidel, then Forward GS, then Bwd GS,etc
68 ! (nsmooth sweeps for both pre- and post-smoothing)
69 ! nstep is the maximum number of coarsening steps
70 ! nstep<0 means that coarsening is stopped only according to
71 ! the matrix size, see parameter maxcoarsesize.
72 ! nlvcyc is the number of coarse levels (from bottom) on which V-cycle
73 ! formulation is enforced (Rmk: K-cycle always allowed at first
74 ! coarse level).
75 ! npass is the maximal number of pairwise aggregation passes for each
76 ! coarsening step, according to the algorithms in [2,3].
77 ! maxcoarsesize: when the size of the coarse grid matrix is less than or
78 ! equal to maxcoarsesize*N^(1/3), it is factorized exactly
79 ! and coarsening is stopped;
80 ! maxcoarsesizeslow: in case of slow coarsening,
81 ! exact factorization can be performed when the size of
82 ! the coarse grid matrix is less than or equal to
83 ! maxcoarsesizeslow*N^(1/3).
84 ! (N is the number of rows of the input matrix)
85  INTEGER, PARAMETER :: maxlev=40, real_len=8
86  INTEGER, PARAMETER :: nsmooth=1, smoothtype=1, nstep=-1, nlvcyc=0
87  INTEGER, PARAMETER :: npass=2,maxcoarsesize=400,maxcoarsesizeslow=4000
88 !
89 ! REAL
90 !
91 ! resi is the threshold t for the relative residual error in inner FCG & GCR
92 ! iterations, see Algorithm 3.2 in [1]
93 ! trspos is a threshold: if a row has a positive offdiagonal entry larger
94 ! than trspos times the diagonal entry, the corresponding node is
95 ! transferred unaggregated to the coarse grid
96 ! kaptg_ is the threshold used to accept or not a tentative aggregate
97 ! when applying the coarsening algorithms from [2,3];
98 ! kaptg_blocdia is used for control based on bloc diagonal smoother [2];
99 ! kaptg_dampJac is used for control based on Jacobi smoother [3].
100 ! checkdd is the threshold to keep outside aggregation nodes where
101 ! the matrix is strongly diagonally dominant (based on mean of row
102 ! and column);
103 ! In fact, AGMG uses the maximum of |checkdd| and of the default value
104 ! according to kaptg_ as indicated in [2,3]
105 ! (hence |checkdd| < 1 ensures that one uses this default value)
106 ! checkdd <0 : consider |checkdd|, but base the test on minus the
107 ! sum of offdiagonal elements, without taking the absolute value.
108 ! targetcoarsefac is the target coarsening factor (parameter tau in the main
109 ! coarsening algorithm in [2,3]): further pairwise aggregation passes
110 ! are omitted once the number of nonzero entries has been reduced by a
111 ! factor of at least targetcoarsefac.
112 ! fracnegrcsum: if, at some level, more than fracnegrcsum*nl nodes,
113 ! where nl is the total number of nodes at that level, have
114 ! negative mean row and column sum, then the aggregation algorithm
115 ! of [2,3] is modified, exchanging all diagonal entries for the mean
116 ! row and column sum (that is, the algorithm is applied to a
117 ! modified matrix with mean row and colum sum enforced to be zero).
118  REAL(kind(0.0d0)), PARAMETER :: resi=0.2, trspos=0.45
119  REAL(kind(0.0d0)), PARAMETER :: kaptg_blocdia=8, kaptg_dampjac=10
120  REAL(kind(0.0d0)), PARAMETER :: checkdd=0.5
121  REAL(kind(0.0d0)), PARAMETER :: targetcoarsefac=2.0**npass
122  REAL(kind(0.0d0)), PARAMETER :: fracnegrcsum=0.25
123 !!!!!!!!!!!!!!!!!!!! END of PARAMETERS DEFINITION -----------------
124 !!!!!!!!!!!!!!!!!!! Internal variables declaration
125 !
127  REAL(kind(0.0d0)), DIMENSION(:), POINTER :: a
128  INTEGER, DIMENSION(:), POINTER :: ja
129  INTEGER, DIMENSION(:), POINTER :: ia
130  INTEGER, DIMENSION(:), POINTER :: il
131  INTEGER, DIMENSION(:), POINTER :: iu
132  REAL(kind(0.0d0)), DIMENSION(:), POINTER :: p
133  INTEGER, DIMENSION(:), POINTER :: idiag
134  INTEGER, DIMENSION(:), POINTER :: ind
135  INTEGER, DIMENSION(:), POINTER :: iext
136  INTEGER, DIMENSION(:), POINTER :: ilstout
137  INTEGER, DIMENSION(:), POINTER :: lstout
138  INTEGER, DIMENSION(:), POINTER :: ilstin
139  INTEGER, DIMENSION(:), POINTER :: lstin
140  END TYPE innerdata
141 !
142  TYPE(innerdata) :: dt(maxlev)
143  REAL(kind(0.0d0)), ALLOCATABLE :: scald(:)
144  INTEGER :: nn(maxlev),kstat(2,maxlev)=0,innermax(maxlev)
145  INTEGER :: nlev,nwrkcum,iout,nrst
146  INTEGER :: maxcoarset=maxcoarsesize,maxcoarseslowt=maxcoarsesizeslow
147  REAL(kind(0.0d0)) :: memi=0.0,memax=0.0,memr=0.0,mritr,rlenilen
148  REAL(kind(0.0d0)) :: wcplex(4),fracnz(maxlev)
149  LOGICAL :: spd,wfo,wff,allzero,trans,transint,zerors,gcrin
150  REAL(kind(0.0d0)), PARAMETER :: cplxmax=3.0, xsi=0.6d0
151  REAL(kind(0.0d0)), PARAMETER :: repsmach=1.49d-8 !SQRT(EPSILON(1.0d0)) !because of absoft
152  INTEGER :: nlc(2),nlcp(2),nlc1(2),icum,imult
153  REAL(kind(0.0d0)) :: ngl(2),nglp(2),nlctot(2),ngl1(2),ngltot(2)
154  INTEGER, DIMENSION(:), POINTER :: iextl1
155  INTEGER, PARAMETER :: irank=-9999
156  REAL(kind(0.0d0)), PARAMETER :: &
157  checkddj=1.25d0 !MAX(ABS(checkdd),kaptg_dampJac/(kaptg_dampJac-2)) !because of absoft
158  REAL(kind(0.0d0)), PARAMETER :: &
159  checkddb=1.28571428571429d0 !MAX(ABS(checkdd),(kaptg_blocdia+1)/(kaptg_blocdia-1)) !because of absoft
160  END MODULE dagmg_mem
161 !-----------------------------------------------------------------------
162 !!!!!!!!!!!!!!!!!!! END of Internal variables declaration
163 !-----------------------------------------------------------------------
164 !!!!!!!!!!!!!!!!!!! TIMING
165 !-----------------------------------------------------------------------
166  SUBROUTINE dagmg_mestime(id,cputm,eltm)
167  IMPLICIT NONE
168  INTEGER, SAVE :: cpt_init(10)=-1,cpt_fin,cpt_max,freq,cpt
169  REAL, SAVE :: t1(10), t2
170  REAL(kind(0.0d0)) :: cputm,eltm
171  INTEGER :: id
172  IF (id>0) THEN
173  !Next line may be uncommented if FORTRAN 95 function
174  !CPU_TIME is implemented
175  ! CALL CPU_TIME(t2)
176  CALL system_clock(cpt_fin,freq,cpt_max)
177  !
178  cpt = cpt_fin - cpt_init(id)
179  IF (cpt_fin < cpt_init(id)) cpt = cpt + cpt_max
180  eltm = dble(cpt) / freq
181  cputm = dble(t2 - t1(id))
182  !
183  ELSE
184  !
185  CALL system_clock(cpt_init(-id),freq,cpt_max)
186  !Next line may be uncommented if FORTRAN 95 function
187  !CPU_TIME is implemented
188  ! CALL CPU_TIME(t1(-id))
189  !
190  END IF
191  RETURN
192  END SUBROUTINE dagmg_mestime
193 !-----------------------------------------------------------------------
194 !!!!!!!!!!!!!!!!!!! END of TIMING
195 !-----------------------------------------------------------------------
196 MODULE dagmg_allroutines
197 CONTAINS
198 !-----------------------------------------------------------------------
199 !!!!!!!!!!!!!!!!!!! MEMORY RELEASE
200 !-----------------------------------------------------------------------
201  SUBROUTINE dagmg_relmem
202  USE dagmg_mem
203  IMPLICIT NONE
204  INTEGER :: l
205  !
206  DO l=1,nlev-1
207  IF(nn(l) > 0) DEALLOCATE(dt(l)%a,dt(l)%ja,dt(l)%il,dt(l)%iu)
208  IF (nn(l+1) > 0) DEALLOCATE(dt(l)%ind)
209  END DO
210  memi=0
211  memr=0
212  memax=0
213  RETURN
214  END SUBROUTINE dagmg_relmem
215 !-----------------------------------------------------------------------
216 !!!!!!!!!!!!!!!!!!! END of MEMORY RELEASE
217 !-----------------------------------------------------------------------
218 !!!!!!!!!!!!!!!!!!! APPLY MG PRECONDITIONER
219 !-----------------------------------------------------------------------
220  SUBROUTINE dagmg_applyprec( N,f,X,a,ja,ia,flop)
221  USE dagmg_mem
222  IMPLICIT NONE
223  INTEGER :: n
224  REAL(kind(0.0d0)) :: flop
225  REAL(kind(0.0d0)) :: f(n), x(n)
226  INTEGER :: ja(*), ia(n+1)
227  REAL(kind(0.0d0)) :: a(*)
228  REAL(kind(0.0d0)), ALLOCATABLE :: s(:)
229  !
230  mritr=nwrkcum+n
231  ALLOCATE (s(nwrkcum+n))
232  CALL dagmg_prec_matv(1,f,x,s,flop,s(n+1),.false.)
233  kstat(2,1)=kstat(2,1)+1
234  DEALLOCATE (s)
235  !
236  RETURN
237  END SUBROUTINE dagmg_applyprec
238 !-----------------------------------------------------------------------
239 !!!!!!!!!!!!!!!!!!! END of APPLY MG PRECONDITIONER
240 !-----------------------------------------------------------------------
241 !!!!!!!!!!!!!!!!!!! PARTIAL ORDERING OF MATRIX ROWS
242 !-----------------------------------------------------------------------
243  SUBROUTINE dagmg_partroword(n, a, ja, ia, idiag, w, iw, iext)
244 !
245 ! Performs partial ordering of matrix rows:
246 ! first lower part, next diagonal, eventually upper part;
247 ! idiag(i) is set such that it points to diagonal element
248 !
249 ! w,iw: working arrays; length should be at least equal to the maximum
250 ! number of entries in (the uper part of) a row
251 !
252  IMPLICIT NONE
253  INTEGER :: n, ja(*), ia(n+1), idiag(n)
254  INTEGER, OPTIONAL :: iext(*)
255  INTEGER, TARGET :: iw(*)
256  REAL(kind(0.0d0)) :: a(*), p
257  REAL(kind(0.0d0)), TARGET :: w(*)
258  INTEGER :: i, j, k, ipos, nzu, nze
259  DO i = 1, n
260  ! find diag, save upper part, concatenate lower part
261  ipos = ia(i)
262  nzu = 0
263  nze = 0
264  DO k = ia(i), ia(i+1) - 1
265  j = ja(k)
266  IF (j.GT.i) THEN
267  nzu = nzu + 1
268  w(nzu) = a(k)
269  iw(nzu) = j
270  ELSEIF (j.LT.i) THEN
271  a(ipos) = a(k)
272  ja(ipos) = j
273  ipos = ipos + 1
274  ELSE
275  p = a(k)
276  ENDIF
277  ENDDO
278  !
279  ! copy back diagonal entry
280  idiag(i) = ipos
281  a(ipos) = p
282  ja(ipos) = i
283  !
284  ! copy back upper part
285  DO k = 1, nzu
286  ipos = ipos + 1
287  a(ipos) = w(k)
288  ja(ipos) = iw(k)
289  ENDDO
290  !
291  ENDDO
292  RETURN
293  END SUBROUTINE dagmg_partroword
294 !-----------------------------------------------------------------------
295 !!!!!!!!!!!!!!!!!!! END of PARTIAL ORDERING OF MATRIX ROWS
296 !-----------------------------------------------------------------------
297 !!!!!!!!!!!!!!!!!!! CSR TO DLU FORMAT
298 !-----------------------------------------------------------------------
299  SUBROUTINE dagmg_csrdlu(n,a,ja,ia,idiag,ao,jao,il,iu,trans,iext,iexto)
300 !
301 ! Convert matrix in csr format with partial orederings of the rows
302 ! into separate d+l+u format:
303 ! first diagonal elements in ao(1:n),
304 ! next rows of lower part (pointer: il),
305 ! and thereafter the rows of upper part (pointer: iu)
306 !
307 ! If trans==.TRUE. , performs simultaneously the transposition of the matrix
308 !
309 ! If trans==.FASLE.,
310 ! il, iu (iexto) may point to the same memory location as ia, idiag (iext)
311 ! (if idiag (iext) has length at least n+1 in the calling program)
312 !
313 ! Pay attention that joa is defined as jao(n+1:*)
314 ! (entries jao(1:n) not needed with this format)
315 !
316  IMPLICIT NONE
317  INTEGER :: n, ja(*), ia(n+1), idiag(n), jao(n+1:*), il(n+1), iu(n+1)
318  INTEGER, OPTIONAL :: iext(n),iexto(n+1)
319  REAL(kind(0.0d0)) :: a(*),ao(*)
320  INTEGER :: i,j,ili,iui,iei,ipos,ili0,iui0,iei0,nl,nu,k,next
321  LOGICAL :: trans
322  !
323  ! First set il(i), iu(i) and iext(i) equal to the number of elements in
324  ! repsective rows;
325  ! compute total number of elements in lower and upper part
326  !
327  nl=0
328  nu=0
329  IF (.NOT.trans) THEN
330  DO i=1,n
331  ili=idiag(i)-ia(i)
332  iui=ia(i+1)-idiag(i)-1
333  iu(i)=iui
334  il(i)=ili
335  nl=nl+ili
336  nu=nu+iui
337  END DO
338  ELSE
339  il(2:n+1)=0
340  iu(2:n+1)=0
341  DO i=1,n
342  iui=idiag(i)-ia(i)
343  ili=ia(i+1)-idiag(i)-1
344  nl=nl+ili
345  nu=nu+iui
346  DO k=ia(i),idiag(i)-1
347  iu(ja(k)+1)=iu(ja(k)+1)+1
348  END DO
349  DO k=idiag(i)+1,ia(i+1)-1
350  il(ja(k)+1)=il(ja(k)+1)+1
351  END DO
352  END DO
353  END IF
354  !
355  ! Now perform actual copying in new format
356  !
357  ipos=0
358  ili=n+1
359  iui=ili+nl
360  iei=iui+nu
361  IF (.NOT.trans) THEN
362  DO i=1,n
363  ili0=ili
364  iui0=iui
365  DO j=1,il(i)
366  ipos=ipos+1
367  ao(ili)=a(ipos)
368  jao(ili)=ja(ipos)
369  ili=ili+1
370  END DO
371  ipos=ipos+1
372  ao(i)=a(ipos)
373  DO j=1,iu(i)
374  ipos=ipos+1
375  ao(iui)=a(ipos)
376  jao(iui)=ja(ipos)
377  iui=iui+1
378  END DO
379  iu(i)=iui0
380  il(i)=ili0
381  END DO
382  iu(n+1)=iui
383  il(n+1)=ili
384  ELSE
385  ! compute pointers from lengths
386  il(1)=ili
387  iu(1)=iui
388  DO i=1,n
389  il(i+1) = il(i) + il(i+1)
390  iu(i+1) = iu(i) + iu(i+1)
391  END DO
392  ! now do the actual copying
393  DO i=1,n
394  DO k=ia(i),idiag(i)-1
395  j = ja(k)
396  next = iu(j)
397  ao(next) = a(k)
398  jao(next) = i
399  iu(j) = next+1
400  END DO
401  ao(i)=a(idiag(i))
402  DO k=idiag(i)+1,ia(i+1)-1
403  j = ja(k)
404  next = il(j)
405  ao(next) = a(k)
406  jao(next) = i
407  il(j) = next+1
408  END DO
409  END DO
410  ! reshift il, iu and leave
411  DO i=n,1,-1
412  il(i+1) = il(i)
413  iu(i+1) = iu(i)
414  END DO
415  il(1)=ili
416  iu(1)=iui
417  END IF
418  RETURN
419  END SUBROUTINE dagmg_csrdlu
420 !-----------------------------------------------------------------------
421 !!!!!!!!!!!!!!!!!!! END of CSR TO DLU FORMAT
422 !-----------------------------------------------------------------------
423 !!!!!!!!!!!!!!!!!!! MATRIX VECTOR MULTIPLICATION
424 !-----------------------------------------------------------------------
425  SUBROUTINE dagmg_csrmv(n, a, ja, ifja, ia, x, ifx, y, iad, flop, lstin)
426 !
427 ! Perform multiplication of vector x by matrix A in (n,a,ja,ia)
428 ! in csr format; result in y
429 !
430 ! iad==0,1 : y = A*x
431 ! iad==-1 : y = -A*x
432 ! iad > 1 : y := y+A*x
433 ! iad <-1 : y := y-A*x
434 !
435 ! ja declared as ja(ifja:*) for compatibility with d+l+u format
436 ! (can serve for multiplication by lower or upper part setting ifja=n)
437 ! standard usage: ifja=1
438 !
439 ! x declared as x(ifx:*) for the need of the parallel version;
440 ! standard usage: ifx=1
441 !
442 ! standard usage: lstin <0
443 ! otherwise, lstin decalred as lstin(0:*), and
444 ! consider only rows listed in lstin(1:lstin(0))
445 !
446 ! flop : floating point ops counter
447 !
448  !
449  IMPLICIT NONE
450  INTEGER :: n, ifja, ja(ifja:*), ia(n+1), iad, ifx, i, k, j
451  INTEGER, OPTIONAL :: lstin(0:*)
452  REAL(kind(0.0d0)) :: a(*), x(ifx:*), y(n), t
453  REAL(kind(0.0d0)) :: flop
454  !
455  IF (iad .LT. -1) THEN
456  DO i=1,n
457  t=y(i)
458  DO k=ia(i),ia(i+1)-1
459  t = t - a(k)*x(ja(k))
460  END DO
461  y(i)=t
462  END DO
463  ELSE IF (iad .GT. 1) THEN
464  DO i=1,n
465  t=y(i)
466  DO k=ia(i),ia(i+1)-1
467  t = t + a(k)*x(ja(k))
468  END DO
469  y(i)=t
470  END DO
471  ELSE IF (iad .EQ. -1) THEN
472  DO i=1,n
473  t=0.0d0
474  DO k=ia(i),ia(i+1)-1
475  t = t - a(k)*x(ja(k))
476  END DO
477  y(i)=t
478  END DO
479  ELSE
480  DO i=1,n
481  t=0.0d0
482  DO k=ia(i),ia(i+1)-1
483  t = t + a(k)*x(ja(k))
484  END DO
485  y(i)=t
486  END DO
487  END IF
488  !
489  flop=flop+dble(2*(ia(n+1)-ia(1)))
490  RETURN
491  END SUBROUTINE dagmg_csrmv
492 !-----------------------------------------------------------------------
493 !!!!!!!!!!!!!!!!!!! END of MATRIX VECTOR MULTIPLICATION
494 !-----------------------------------------------------------------------
495 !!!!!!!!!!!!!!!!!!! TRIANGULAR SOLVES
496 !-----------------------------------------------------------------------
497  SUBROUTINE dagmg_csrlsolve(n, a, ja, ifja, ia, p, x, y, iunit, flop)
498 !
499 ! Perform triangular solve with lower triangular matrix L;
500 ! srtrict lower part in (n,a,ja,ia) in csr format and
501 ! INVERSE of the diagonal in p(1:n)
502 ! result in x
503 !
504 ! x and y may point to the same memory location
505 !
506 ! iunit >0 : x = inv(L) * y
507 ! iunit <0 : x = inv( inv(diag(L))*L ) * y = inv(P*L) * y
508 ! iunit==0 : any of the above assuming unit diagonal (p not accessed)
509 !
510 ! ja declared as ja(ifja:*) for compatibility with d+l+u format
511 !
512 ! flop : floating point ops counter
513 !
514  IMPLICIT NONE
515  INTEGER :: n, ifja, ja(ifja:*), ia(n+1), iunit, i, k
516  REAL(kind(0.0d0)) :: a(*), p(n), x(n), y(n), t
517  REAL(kind(0.0d0)) :: flop
518 !
519  IF (iunit .LT. 0) THEN
520  x(1) = y(1)
521  DO i=2,n
522  t = 0.0d0
523  DO k=ia(i),ia(i+1)-1
524  t = t - a(k)*x(ja(k))
525  END DO
526  x(i) = y(i) + p(i)*t
527  END DO
528  flop=flop+dble(n+2*(ia(n+1)-ia(1)))
529  ELSE IF (iunit .GT. 0) THEN
530  x(1) = p(1)*y(1)
531  DO i=2,n
532  t = y(i)
533  DO k=ia(i),ia(i+1)-1
534  t = t - a(k)*x(ja(k))
535  END DO
536  x(i) = p(i)*t
537  END DO
538  flop=flop+dble(n+2*(ia(n+1)-ia(1)))
539  ELSE
540  x(1) = y(1)
541  DO i=2,n
542  t = y(i)
543  DO k=ia(i),ia(i+1)-1
544  t = t - a(k)*x(ja(k))
545  END DO
546  x(i) = t
547  END DO
548  flop=flop+dble(2*(ia(n+1)-ia(1)))
549  END IF
550  RETURN
551  END SUBROUTINE dagmg_csrlsolve
552 !-----------------------------------------------------------------------
553  SUBROUTINE dagmg_csrusolve(n, a, ja, ifja, ia, p, x, y, iunit,flop)
554 !
555 ! Perform triangular solve with upper triangular matrix U;
556 ! srtrict upper part in (n,a,ja,ia) in csr format and
557 ! INVERSE of the diagonal in p(1:n)
558 ! result in x
559 !
560 ! x and y may point to the same memory location
561 !
562 ! iunit >0 : x = inv(U) * y
563 ! iunit <0 : x = inv( inv(diag(U))*U ) * y = inv(P*U) * y
564 ! iunit==0 : any of the above assuming unit diagonal (p not accessed)
565 !
566 ! ja declared as ja(ifja:*) for compatibility with d+l+u format
567 !
568 ! flop : floating point ops counter
569 !
570  IMPLICIT NONE
571  INTEGER :: n, ifja, ja(ifja:*), ia(n+1), iunit, i, k
572  REAL(kind(0.0d0)) :: a(*), p(n), x(n), y(n), t
573  REAL(kind(0.0d0)) :: flop
574 !
575  IF (iunit .LT. 0) THEN
576  x(n) = y(n)
577  DO i=n-1,1,-1
578  t = 0.0d0
579  DO k=ia(i),ia(i+1)-1
580  t = t - a(k)*x(ja(k))
581  END DO
582  x(i) = y(i) + p(i)*t
583  END DO
584  flop=flop+dble(n+2*(ia(n+1)-ia(1)))
585  ELSE IF (iunit .GT. 0) THEN
586  x(n) = p(n)*y(n)
587  DO i=n-1,1,-1
588  t = y(i)
589  DO k=ia(i),ia(i+1)-1
590  t = t - a(k)*x(ja(k))
591  END DO
592  x(i) = p(i)*t
593  END DO
594  flop=flop+dble(n+2*(ia(n+1)-ia(1)))
595  ELSE
596  x(n) = y(n)
597  DO i=n-1,1,-1
598  t = y(i)
599  DO k=ia(i),ia(i+1)-1
600  t = t - a(k)*x(ja(k))
601  END DO
602  x(i) = t
603  END DO
604  flop=flop+dble(2*(ia(n+1)-ia(1)))
605  END IF
606  RETURN
607  END SUBROUTINE dagmg_csrusolve
608 !-----------------------------------------------------------------------
609 !!!!!!!!!!!!!!!!!!! END of TRIANGULAR SOLVES
610 !-----------------------------------------------------------------------
611 !!!!!!!!!!!!!!!!!!! ROUTINES FOR ITERATIVE SOLUTION
612 !-----------------------------------------------------------------------
613  SUBROUTINE dagmg_flexcg(N,f,X,ITER,RESID,a,ja,ia,init,flop)
614  !
615  ! Implements flexible conjugate gradient iterative solver
616  ! with MG preconditioner
617  !
618  USE dagmg_mem
619  IMPLICIT NONE
620  INTEGER :: n, iter, init
621  REAL(kind(0.0d0)) :: flop
622  REAL(kind(0.0d0)) :: f(n), x(n)
623  INTEGER :: ja(*), ia(n+1)
624  REAL(kind(0.0d0)) :: a(*)
625  INTEGER :: maxit, ierr, kk
626  REAL(kind(0.0d0)) :: tol, bnorm, resid, dum0
627  REAL(kind(0.0d0)) :: alpha, bet0, rho, dum(6), td
628  REAL(kind(0.0d0)) :: bet1
629  REAL(kind(0.0d0)), ALLOCATABLE :: sd(:)
630  REAL(kind(0.0d0)), ALLOCATABLE :: s(:),fsc(:)
631  REAL(kind(0.0d0)), external :: ddot
632  REAL(kind(0.0d0)), external :: dnrm2
633  INTEGER , parameter :: ione=1
634  !
635  mritr=nwrkcum+4*n
636  ALLOCATE (s(2*n+1:nwrkcum+5*n),sd(n))
637  !
638  flop=0.0d0
639  kstat=0
640  IF (wfo) THEN
641  WRITE(iout,940) irank
642  END IF
643  IF (wff) THEN
644  IF ( trans) THEN
645  WRITE(iout,941)
646  END IF
647  IF (smoothtype == 1) THEN
648  WRITE(iout,945) nsmooth
649  ELSE
650  WRITE(iout,946) nsmooth
651  END IF
652  WRITE(iout,947)
653  END IF
654  !
655  tol = resid
656  maxit = iter
657  resid = 1.0d0
658  iter = 0
659  dum(3) = dnrm2(n, f, ione)**2
660  flop=flop+dble(2*n)
661  !
662  ! compute initial residual
663  !
664  IF (init==1) THEN
665  CALL dagmg_matv(n,x,sd,a,ja,ia,flop,trans )
666  f(1:n)=f(1:n)-sd(1:n)
667  dum(2) = dnrm2(n, f, ione)**2
668  bnorm=sqrt(dum(3))
669  resid=sqrt(dum(2))
670  IF (bnorm.EQ.0.0d0) THEN
671  !
672  ! R.h.s. is trivial: exit with solution equal to the zero vector
673  !
674  IF(wff) THEN
675  WRITE(iout,998)
676  WRITE(iout,999)
677  END IF
678  x(1:n)=0.0d0
679  RETURN
680  END IF
681  resid=resid/bnorm
682  IF(wff.AND. (maxit <= 0 .OR. resid <= tol)) THEN
683  WRITE(iout,900) 0, resid*bnorm, resid
684  END IF
685  END IF
686  !
687  !
688  DO WHILE ( iter < maxit .AND. resid > tol )
689  iter = iter + 1
690  !
691  ! Preconditioner Solve.
692  !
693  CALL dagmg_prec_matv(1,f,s(1+3*n),s(1+4*n),flop,s(1+5*n),.false.)
694  !
695  ! Compute direction vector.
696  !
697  IF ( iter > 1 ) THEN
698  dum(1) = - ddot(n, sd, ione, s(1+3*n), ione)
699  bet0=dum(1)
700  bet1=bet0/rho
701  CALL daxpy(n, bet1, s(1+2*n), ione, s(1+3*n), ione)
702  flop=flop+dble(4*n)
703  ENDIF
704  CALL dcopy(n, s(1+3*n), ione, s(1+2*n), ione)
705  CALL dagmg_matv(n,s(1+2*n),sd,a,ja,ia,flop,trans )
706  !
707  dum(1) = ddot(n, s(1+2*n), ione, sd, ione)
708  dum(2) = ddot(n,s(1+2*n),ione,f,ione)
709  flop=flop+dble(4*n)
710  !
711  IF (iter==1) THEN
712  IF (init == 0) THEN
713  bnorm=sqrt(dum(3))
714  IF (bnorm.EQ.0.0d0) THEN
715  !
716  ! R.h.s. is trivial: exit with solution equal to the zero vector
717  !
718  IF(wff) THEN
719  WRITE(iout,998)
720  WRITE(iout,999)
721  END IF
722  x(1:n)=0.0d0
723  RETURN
724  END IF
725  END IF
726  IF(wff) THEN
727  WRITE(iout,900) 0, resid*bnorm, resid
728  END IF
729  ELSE
730  END IF
731  !
732  rho=dum(1)
733  alpha=dum(2)/rho
734  !
735  IF (iter == 1 .AND. init == 0) THEN
736  CALL dcopy(n,s(1+2*n),ione,x,ione)
737  CALL dscal(n,alpha,x,ione)
738  flop=flop+dble(n)
739  ELSE
740  CALL daxpy(n, alpha, s(1+2*n), ione, x, ione)
741  flop=flop+dble(2*n)
742  END IF
743  CALL daxpy(n, -alpha, sd, ione, f, ione)
744  !
745  dum0 = dnrm2(n,f,ione)**2
746  resid=dum0
747  resid=sqrt(resid)
748  resid=resid/bnorm
749  IF (wff) THEN
750  WRITE(iout,900) iter, resid*bnorm, resid
751  END IF
752  flop=flop+dble(4*n)
753  END DO
754  !
755  IF( resid > tol ) THEN
756  IF (irank <= 0) THEN
757  WRITE(iout,'()')
758  WRITE(iout,950) iter
759  WRITE(iout,951)
760  WRITE(iout,'()')
761  END IF
762  iter=-iter
763  ELSE
764  IF (wff) THEN
765  WRITE(iout,952) iter
766  WRITE(iout,'()')
767  END IF
768  END IF
769  !
770  kstat(2,1)=abs(iter)
771  !
772  DEALLOCATE(s,sd)
773  IF(ALLOCATED(fsc)) DEALLOCATE(fsc)
774  RETURN
775 900 FORMAT('**** Iter=',i5,' Resid=',e9.2, &
776  ' Relat. res.=',e9.2)
777 940 FORMAT(i3, &
778  '*SOLUTION: flexible conjugate gradient iterations (FCG(1))')
779 941 FORMAT('**** Rmk: solve system with the transpose of the input matrix')
780 945 FORMAT( '**** AMG preconditioner with',i2, &
781  ' Gauss-Seidel pre-and post-smoothing step(s)')
782 946 FORMAT( '**** AMG preconditioner with',i2, &
783  ' ILU(0) pre-and post-smoothing step(s)')
784 947 FORMAT( '**** at each level')
785 950 FORMAT('**** !!! WARNING!!!',i5,' ITERATIONS WERE')
786 951 FORMAT('**** INSUFFICIENT TO ACHIEVE THE DESIRED ACCURACY')
787 952 FORMAT('**** - Convergence reached in',i5,' iterations -')
788 998 FORMAT('**** The norm of the right hand side is zero:')
789 999 FORMAT('**** set x equal to the zero vector and exit')
790  !
791  END SUBROUTINE dagmg_flexcg
792 !-----------------------------------------------------------------------
793  SUBROUTINE dagmg_gcr(N,f,X,ITER,RESID,a,ja,ia,init,flop)
794  USE dagmg_mem
795  IMPLICIT NONE
796  INTEGER :: n, iter, init
797  REAL(kind(0.0d0)) :: flop
798  REAL(kind(0.0d0)) :: f(n), x(n)
799  INTEGER :: ja(*), ia(n+1)
800  REAL(kind(0.0d0)) :: a(*)
801  INTEGER :: maxit,i,itm,irst,ierr,j,k
802  REAL(kind(0.0d0)) :: alpha, bet0
803  REAL(kind(0.0d0)) :: resid, resid2, bnorm2, rho, trs
804  REAL(kind(0.0d0)) :: tol, tol2bnorm2, tolt, dum0
805  REAL(kind(0.0d0)) :: dum(6),xd,t
806  REAL(kind(0.0d0)), ALLOCATABLE :: sc(:),siv(:),sir(:)
807  REAL(kind(0.0d0)), ALLOCATABLE :: su(:),r(:),fsc(:),sw(:)
808  REAL(kind(0.0d0)), external :: ddot
809  REAL(kind(0.0d0)), external :: dnrm2
810  INTEGER , parameter :: ione=1
811  INTEGER :: itm1, m, info
812  !
813  mritr=nwrkcum+n*2*nrst+((nrst+1)*nrst)/2+nrst
814  ALLOCATE (su(n*nrst),sc(n*nrst) &
815  ,sir(((nrst+1)*nrst)/2),siv(nrst) &
816  ,r(nwrkcum))
817  !
818  flop=0.0d0
819  kstat=0
820  IF (wfo) THEN
821  WRITE(iout,938) irank,nrst
822  END IF
823  IF (wff) THEN
824  IF ( trans) THEN
825  WRITE(iout,941)
826  END IF
827  IF (smoothtype == 1) THEN
828  WRITE(iout,945) nsmooth
829  ELSE
830  WRITE(iout,946) nsmooth
831  END IF
832  WRITE(iout,947)
833  END IF
834  !
835  m=min(nrst,iter)
836  trs=epsilon(1.0d0)
837  trs=sqrt(sqrt(trs))
838  tol = resid
839  tol2bnorm2 = tol
840  maxit = iter
841  resid2= 1.0d0
842  iter = 0
843  dum(3) = dnrm2(n, f, ione)**2
844  flop=flop+dble(2*n)
845  !
846  ! compute initial residual
847  !
848  IF (init==1) THEN
849  CALL dagmg_matv(n,x,sc,a,ja,ia,flop,trans )
850  f(1:n)=f(1:n)-sc(1:n)
851  dum(2) = dnrm2(n, f, ione)**2
852  bnorm2=dum(3)
853  resid2=dum(2)
854  IF (bnorm2.EQ.0.0d0) THEN
855  !
856  ! R.h.s. is trivial: exit with solution equal to the zero vector
857  !
858  IF(wff) THEN
859  WRITE(iout,998)
860  WRITE(iout,999)
861  END IF
862  x(1:n)=0.0d0
863  RETURN
864  END IF
865  tol2bnorm2 = tol*tol*bnorm2
866  IF (wff.AND. (maxit <= 0 .OR. resid2 <= tol2bnorm2)) THEN
867  WRITE(iout,900) 0, 0,sqrt(resid2),sqrt(resid2/bnorm2)
868  END IF
869  END IF
870  !
871  itm = -1
872  irst = 0
873  DO WHILE ( iter < maxit .AND. resid2 > tol2bnorm2 )
874  itm = itm + 1
875  iter = iter + 1
876  !
877  ! Restarting
878  IF (itm == m) THEN
879  CALL dtptrs('U','N','U',m,ione,sir,siv,m,info)
880  IF (irst == 0 .AND. init == 0) THEN
881  CALL dgemv('N',n,m,1.0d0,su, &
882  n,siv,ione,0.0d0,x,ione)
883  flop=flop+dble(2*m*n+m*(m+1))
884  ELSE
885  CALL dgemv('N',n,m,1.0d0,su, &
886  n,siv,ione,1.0d0,x,ione)
887  flop=flop+dble((2*m+1)*n+m*(m+1))
888  END IF
889  itm=0
890  irst=irst+1
891  END IF
892  !
893  ! Preconditioner Solve & MATVEC
894  !
895  CALL dagmg_prec_matv(1,f,su(1+itm*n),sc(1+itm*n),flop,r,.false.)
896  CALL dagmg_matv(n, su(1+itm*n), sc(1+itm*n) &
897  , a, ja, ia, flop,trans )
898  !
899  ! Gram-Schmidt
900  !
901  IF (itm > 0) THEN
902  DO i=0,itm-1
903  dum(1)=ddot(n,sc(1+i*n),ione,sc(1+itm*n),ione)
904  bet0=dum(1)
905  bet0=bet0/sir(1+i+(i*(i+1))/2)
906  sir(1+i+(itm*(itm+1))/2)=bet0
907  CALL daxpy(n,-bet0,sc(1+i*n),ione,sc(1+itm*n),ione)
908  flop=flop+dble(4*n)
909  END DO
910  END IF
911  !
912  ! no normalisation: record norm instead
913  !
914  dum(1)=dnrm2(n,sc(1+itm*n),ione)**2
915  dum(2)=ddot(n, sc(1+itm*n), ione, f, ione)
916  IF (iter == 1) THEN
917  IF (init == 0) THEN
918  bnorm2=dum(3)
919  resid2=bnorm2
920  IF (bnorm2.EQ.0.0d0) THEN
921  !
922  ! R.h.s. is trivial: exit with solution equal to the zero vector
923  !
924  IF(wff) THEN
925  WRITE(iout,998)
926  WRITE(iout,999)
927  END IF
928  x(1:n)=0.0d0
929  RETURN
930  END IF
931  tol2bnorm2=tol*tol*bnorm2
932  END IF
933  IF (wff) THEN
934  WRITE(iout,900) 0, 0,sqrt(resid2),sqrt(resid2/bnorm2)
935  END IF
936  tolt = max(tol2bnorm2,trs*resid2)
937  ELSE
938  END IF
939  rho=dum(1)
940  alpha=dum(2)
941  sir(1+itm+(itm*(itm+1))/2)=rho
942  bet0=alpha/rho
943  siv(1+itm)=bet0
944  !
945  CALL daxpy(n, -bet0, sc(1+itm*n), ione, f, ione)
946  flop=flop+dble(6*n)
947  !
948  resid2 = resid2 - alpha*alpha/rho
949  IF (resid2 <= tolt) THEN
950  resid2 = dnrm2(n,f,ione)**2
951  flop=flop+dble(2*n)
952  tolt = max(tol2bnorm2,trs*resid2)
953  END IF
954  IF (wff)THEN
955  WRITE(iout,900) iter,irst,sqrt(abs(resid2)),sqrt(abs(resid2/bnorm2))
956  END IF
957  !
958  END DO
959  !
960  IF (itm >= 0) THEN
961  itm1=itm+1
962  CALL dtptrs('U','N','U',itm1, ione,sir,siv,m,info)
963  IF (irst == 0 .AND. init == 0) THEN
964  CALL dgemv('N',n,itm1,1.0d0,su, &
965  n,siv,ione,0.0d0,x,ione)
966  flop=flop+dble(2*(itm+1)*n+(itm+1)*(itm+2))
967  ELSE
968  CALL dgemv('N',n,itm1,1.0d0,su, &
969  n,siv,ione,1.0d0,x,ione)
970  flop=flop+dble((2*(itm+1)+1)*n+(itm+1)*(itm+2))
971  END IF
972  END IF
973  !
974  resid=sqrt(resid2/bnorm2)
975  IF( resid > tol ) THEN
976  IF (irank <= 0) THEN
977  WRITE(iout,'()')
978  WRITE(iout,950) iter
979  WRITE(iout,951)
980  WRITE(iout,'()')
981  END IF
982  iter=-iter
983  ELSE
984  IF (wff) THEN
985  WRITE(iout,952) iter
986  WRITE(iout,'()')
987  END IF
988  END IF
989  !
990  DEALLOCATE (su,sc,r,siv,sir)
991  IF(ALLOCATED(fsc)) DEALLOCATE(fsc)
992  IF(ALLOCATED(sw)) DEALLOCATE(sw)
993  !
994  kstat(2,1)=abs(iter)
995  !
996  RETURN
997 900 FORMAT('**** Iter=',i5,' (',i2,' rest.) Resid=',e9.2, &
998  ' Relat. res.=',e9.2)
999 938 FORMAT(i3,'*SOLUTION: GCR iterations (GCR(',i2,'))')
1000 941 FORMAT('**** Rmk: solve system with the transpose of the input matrix')
1001 945 FORMAT( '**** AMG preconditioner with',i2, &
1002  ' Gauss-Seidel pre-and post-smoothing step(s)')
1003 946 FORMAT( '**** AMG preconditioner with',i2, &
1004  ' ILU(0) pre-and post-smoothing step(s)')
1005 947 FORMAT( '**** at each level')
1006 950 FORMAT('**** !!! WARNING!!!',i5,' ITERATIONS WERE')
1007 951 FORMAT('**** INSUFFICIENT TO ACHIEVE THE DESIRED ACCURACY')
1008 952 FORMAT('**** - Convergence reached in',i5,' iterations -')
1009 998 FORMAT('**** The norm of the right hand side is zero:')
1010 999 FORMAT('**** set x equal to the zero vector and exit')
1011  !
1012  END SUBROUTINE dagmg_gcr
1013 !--------------------------------------------------------------------
1014  SUBROUTINE dagmg_matv(n, x, y, a, ja, ia, flop, transpose, &
1015  iext, lstout, ilstout, lstin, ilstin)
1016 !
1017 ! Performs matrix vector product by x, result in y
1018 ! If trans==.TRUE., multiply by the transpose
1019 !
1020  IMPLICIT NONE
1021  INTEGER :: n, ja(*), ia(n+1), i, kk, k1, k2, ier, idum(1)
1022  INTEGER, OPTIONAL :: iext(n), lstout(*), ilstout(*), lstin(0:*), ilstin(*)
1023  REAL(kind(0.0d0)) :: y(n), a(*), t, xx
1024  REAL(kind(0.0d0)) :: x(n)
1025  REAL(kind(0.0d0)) :: flop
1026  LOGICAL :: transpose
1027  !
1028  !
1029  IF (.NOT.transpose) THEN
1030  DO i = 1, n
1031  k1 = ia(i)
1032  xx = x(ja(k1))
1033  t = a(k1) * xx
1034  k2 = ia(i+1)-1
1035  DO kk = k1+1, k2
1036  xx = x(ja(kk))
1037  t = t + a(kk)*xx
1038  ENDDO
1039  y(i) = t
1040  ENDDO
1041  ELSE
1042  y(1:n)=0.0d0
1043  DO i = 1, n
1044  xx = x(i)
1045  DO kk = ia(i), ia(i+1)-1
1046  y(ja(kk)) = y(ja(kk)) + a(kk)*xx
1047  ENDDO
1048  ENDDO
1049  END IF
1050  !
1051  flop = flop + dble(2 *(ia(n+1)-1)-n)
1052  !
1053  RETURN
1054  END SUBROUTINE dagmg_matv
1055 !-----------------------------------------------------------------------
1056  RECURSIVE SUBROUTINE dagmg_prec_matv(l,B,X,AX,flop,R,matv)
1057 !
1058 ! Applies AMG preconditioner at level l to vector B ans store the result in X
1059 ! If matv==.TRUE. : computes in addition AX = A*X
1060 ! R : workspace
1061 ! length needed at level l: lr(l)
1062 ! nsmooth*smoothtype == 1:
1063 ! | nn(l+1) if l+1==nlev
1064 ! lr(l) = | 3*nn(l+1)+lr(l+1) otherwise, if innermax(l+1)<=1
1065 ! | 5*nn(l+1)+lr(l+1) otherwise
1066 !
1067 ! otherwise:
1068 ! | max( 2*n , nn(l+1) ) if l+1==nlev
1069 ! lr(l) = | max( 2*n , 3*nn(l+1)+lr(l+1) ) otherwise,
1070 ! | if innermax(l+1)<=1
1071 ! | max( 2*n , 5*nn(l+1)+lr(l+1) ) otherwise
1072 !
1073 ! flop : floating point ops counter
1074 !
1075 ! AX(1:nn(l)) needed as workspace
1076 ! (i.e., to be allocated by calling program even if matv==.FALSE.)
1077 !
1078  USE dagmg_mem
1079  IMPLICIT NONE
1080  INTEGER :: l
1081  REAL(kind(0.0d0)), OPTIONAL :: flop
1082  REAL(kind(0.0d0)), OPTIONAL :: b(*), x(*), ax(*), r(*)
1083  REAL(kind(0.0d0)) :: dum(1)
1084  LOGICAL, OPTIONAL :: matv
1085  LOGICAL :: update
1086  INTEGER :: is,n,nnext,xn,bn,axn,rn,idum(1)
1087 !
1088 ! With ILU(0) (smoothtype==2), i.e. smoothing steps implies
1089 ! 1 forward + 1 backward solve; i.e., two times more than with
1090 ! Gauss-Seidel (smoothtype==1)
1091 !
1092  INTEGER, PARAMETER :: nsmotot=nsmooth*smoothtype
1093  n=nn(l)
1094  nnext=nn(l+1)
1095  !
1096  IF (l == nlev) THEN
1097  !
1098  ! Special case: this level is the last one
1099  ! (Normally, one enters this section iff nlev==1)
1100  !
1101  x(1:n)=b(1:n)
1102  CALL dagmg_mumpsseq(n,dt(l)%a,dt(l)%ja,dt(l)%ia,x,2,flop)
1103  !
1104  RETURN
1105  !
1106  END IF
1107  !
1108  !
1109  ! Presmoothing
1110  !
1111  IF (nsmotot == 1) THEN
1112  !
1113  ! Perform forward GS sweep to solve A*X= B; new residual in AX
1114  ! (X is new: update=.FALSE. ; R will not be used)
1115  CALL dagmg_fwgs(l,b,x,ax,flop,.false.,r)
1116  !
1117  ELSE
1118  !
1119  ! Perform nsmotot/2 forward+backward sweeps to solve A*X=B
1120  ! with initial approx. in X and residual in AX when update=.TRUE.
1121  ! (i.e., symmetric GS or ILU(0), depending on the pivots);
1122  ! result in X, new residual in AX
1123  ! (update=.FALSE. at first call, then .TRUE.)
1124  ! R(1:2*n) as workspace
1125  update=.false.
1126  DO is=2,nsmotot,2
1127  CALL dagmg_fwbwsolve1(l,dt(l)%a,b,x,ax,flop,update,r)
1128  update=.true.
1129  END DO
1130  IF (mod(nsmotot,2) .EQ. 1) THEN
1131  !
1132  ! Perform final forward sweep to solve A*X=B
1133  ! with initial approx. in X and residual in AX
1134  ! (X has to be updated: update=.TRUE.)
1135  ! R(1:n) as workspace
1136  CALL dagmg_fwgs(l,b,x,ax,flop,.true.,r)
1137  !
1138  END IF
1139  END IF
1140  !
1141  ! Coarse grid correction
1142  !
1143  IF (nnext > 0) THEN
1144  !
1145  ! Standard case: coarse grid is nonmpty
1146  ! (otherwise: nothing to be done for coarse grid correction)
1147  ! Restricted residual will be in R(nnext+1:2*nnext) except at last level
1148  ! and solution of coarse system in R(1:nnext)
1149  !
1150  xn=1
1151  bn=xn+nnext
1152  IF (l+1 == nlev) bn=xn
1153  ! Restric the residual in AX
1154  CALL dagmg_restag(n,nnext,ax,r(bn),dt(l)%ind,flop)
1155  !
1156  IF (l+1 == nlev) THEN
1157  !
1158  ! Direct solution at last level
1159  CALL dagmg_mumpsseq(nnext,dt(l+1)%a,dt(l+1)%ja,dt(l+1)%ia,r,2,flop)
1160  !
1161  ELSE IF ( innermax(l+1) <= 1 ) THEN
1162  !
1163  ! V cycle enforced: apply MG preconditiner
1164  ! size of workspace in R at level l+1: length(R)-3*nnext
1165  axn=bn+nnext
1166  rn=axn+nnext
1167  CALL dagmg_prec_matv(l+1,r(bn),r(xn),r(axn),flop,r(rn),.false.)
1168  !
1169  kstat(1,l+1)=max(kstat(1,l+1),1)
1170  kstat(2,l+1)=kstat(2,l+1)+1
1171  !
1172  ELSE
1173  !
1174  ! K-cycle with Galerkin projection
1175  ! will call prec_matv at level l+1 with workspace in R of size
1176  ! at least length(R(BN:*))-4*nnext, i.e. length(R)-5*nnext
1177  CALL dagmg_inner_iter(nnext,r(xn),r(bn),l+1,flop)
1178  !
1179  END IF
1180  !
1181  ! Prolongate just computed coarse solution, and add it to X
1182  CALL dagmg_prolag(n,nnext,x,r(xn),dt(l)%ind,flop)
1183  !
1184  END IF
1185  !
1186  ! Post smoothing
1187  !
1188  IF (nsmotot == 1) THEN
1189  !
1190  ! Perform backward GS sweep to solve B-A*X with initial approx. in X
1191  ! AX to be computed if matv==.TRUE.
1192  CALL dagmg_bwgs(l,b,x,ax,flop,matv)
1193  !
1194  ELSE
1195  IF (mod(nsmotot,2) .EQ. 1) THEN
1196  !
1197  ! Perform initial backward sweep to solve B-A*X
1198  ! with initial approx. in X; do not compute AX
1199  ! (but use it as worskspace)
1200  CALL dagmg_bwgs(l,b,x,ax,flop,.false.)
1201  !
1202  END IF
1203  !
1204  ! Apply nsmotot/2 forward+backward sweeps to solve A*X=B
1205  ! with initial approx. in X
1206  ! (i.e., symmetric GS or ILU(0), depending on the pivots)
1207  ! compute AX at last call if required
1208  ! R(1:2*n) as workspace
1209  update=.false.
1210  DO is=2,nsmotot,2
1211  IF (is .GE. nsmotot-1) update=matv
1212  CALL dagmg_fwbwsolve2(l,dt(l)%a,b,x,ax,flop,update,r)
1213  END DO
1214  END IF
1215  RETURN
1216  END SUBROUTINE dagmg_prec_matv
1217 !-----------------------------------------------------------------------
1218  SUBROUTINE dagmg_fwgs(l,B,X,AX,flop,update,R)
1219 !
1220 ! Performs pre-smoothing step at level l with smoother:
1221 !
1222 ! M = (D+E)
1223 !
1224 ! E strictly lower triangular,
1225 ! D diagonal
1226 !
1227 ! Elements of E stored in (dt(l)%a,dt(l)%ja,dt(l)%il)
1228 ! Inverse of the elements of D strored in dt(l)%a(1:nn(l))
1229 !
1230 ! If update==.FALSE., computes X = inv(M)*B and AX = B - A*X,
1231 ! based on the assumption that either A=D+E+F with elements of F
1232 ! stored in (dt(l)%a,dt(l)%ja,dt(l)%iu) (strict upper part)
1233 !
1234 ! If update==.TRUE., computes X := X + inv(M)*AX and AX := AX - A-inv(M)*AX,
1235 ! based on the assumption that either A=D+E+F with elements of F
1236 ! stored in (dt(l)%a,dt(l)%ja,dt(l)%iu) (strict upper part)
1237  USE dagmg_mem
1238  IMPLICIT NONE
1239  INTEGER :: l, n, idum(1), ier
1240  REAL(kind(0.0d0)) :: flop
1241  REAL(kind(0.0d0)) :: b(*), x(*), ax(*), r(*)
1242  LOGICAL :: update
1243  n=nn(l)
1244  !
1245  IF (update) THEN
1246  !
1247  ! Solve (D+E) R = AX
1248  !
1249  CALL dagmg_csrlsolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,dt(l)%a &
1250  ,r,ax,1,flop)
1251  x(1:n)=x(1:n)+r(1:n)
1252  flop=flop+dble(n)
1253  !
1254  ! AX_old - A*R = AX_old - F * R - (D+E+) * inv(D+E) * AX_old = - F * R
1255  !
1256  ! Hence, compute AX = - F * R
1257  !
1258  CALL dagmg_csrmv(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%iu,r,1,ax,-1,flop)
1259  !
1260  ELSE
1261  !
1262  ! Solve (D+E) X = B
1263  !
1264  CALL dagmg_csrlsolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,dt(l)%a,x,b,1,flop)
1265  !
1266  ! B - A*X = B - F * X - (D+E) * inv(D+E) * B = - F * X
1267  ! Hence, compute AX = - F * R
1268  !
1269  CALL dagmg_csrmv(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%iu,x,1,ax,-1,flop)
1270  !
1271  END IF
1272  RETURN
1273  END SUBROUTINE dagmg_fwgs
1274 !-----------------------------------------------------------------------
1275  SUBROUTINE dagmg_bwgs(l,B,X,AX,flop,matv)
1276 !
1277 ! Performs post-smoothing step at level l with smoother:
1278 !
1279 ! M = (D+F)
1280 !
1281 ! F strictly upper triangular,
1282 ! D diagonal
1283 !
1284 ! Elements of E stored in (dt(l)%a,dt(l)%ja,dt(l)%iu)
1285 ! Inverse of the elements of D strored in dt(l)%a(1:nn(l))
1286 !
1287 ! Computes X := X + inv(M)*(B-A*X),
1288 ! based on the assumption that either A=D+E+F with elements of E
1289 ! stored in (dt(l)%a,dt(l)%ja,dt(l)%il) (strict lower part)
1290 !
1291 ! If matv==.TRUE., computes in addition AX=A*X,
1292 ! based on the same assumptions
1293 !
1294  USE dagmg_mem
1295  IMPLICIT NONE
1296  INTEGER :: l, n, idum(1), ier
1297  REAL(kind(0.0d0)) :: flop
1298  REAL(kind(0.0d0)) :: b(*), x(*), ax(*)
1299  LOGICAL :: matv
1300  n=nn(l)
1301  !
1302  ! Compute AX = B - E * X
1303  !
1304  ax(1:n)=b(1:n)
1305  CALL dagmg_csrmv(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,x,1,ax,-2,flop)
1306  !
1307  ! Solve (D+F) X = AX
1308  ! Then: X := inv((D+F)) * ( B - E * X ) = X + inv(D+F) * ( B - A * X )
1309  !
1310  CALL dagmg_csrusolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%iu,dt(l)%a,x,ax,1,flop)
1311  !
1312  IF (.NOT.matv) RETURN
1313  !
1314  ! Computation of AX := A*X is needed
1315  ! Use A*X = (D+F) * X + E * X = AX + E * X
1316  !
1317  CALL dagmg_csrmv(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,x,1,ax,2,flop)
1318  RETURN
1319  END SUBROUTINE dagmg_bwgs
1320 !-----------------------------------------------------------------------
1321  SUBROUTINE dagmg_fwbwsolve1(l,p,B,X,AX,flop,update,R)
1322 !
1323 ! Performs pre-smoothing step at level l with smoother:
1324 !
1325 ! M = (P+E) inv(P) (P+F)
1326 !
1327 ! E strictly lower triangular,
1328 ! F strictly upper triangular,
1329 ! P diagonal
1330 !
1331 ! Elements of E stored in (dt(l)%a,dt(l)%ja,dt(l)%il)
1332 ! Elements of F stored in (dt(l)%a,dt(l)%ja,dt(l)%iu)
1333 ! Inverse of the elements of P strored in p(1:nn(l))
1334 !
1335 ! If update==.FALSE., computes X = inv(M)*B and AX = B - A*X,
1336 ! based on the assumption that either (smoothtype==1) A=P+E+F
1337 ! or (smoothtype==2) A=P+E+F+Q with Q diagonal stored in
1338 ! dt(l)%a(1:n)
1339 !
1340 ! If update==.TRUE., computes X := X + inv(M)*AX and AX := AX - A-inv(M)*AX,
1341 ! based on the assumption that either (smoothtype==1) A=P+E+F
1342 ! or (smoothtype==2) A=P+E+F+Q with Q diagonal stored in
1343 ! dt(l)%a(1:n)
1344 !
1345  USE dagmg_mem
1346  IMPLICIT NONE
1347  INTEGER :: l, n, idum(1), ier
1348  REAL(kind(0.0d0)) :: p(*), b(*), x(*), ax(*), r(*)
1349  REAL(kind(0.0d0)) :: flop
1350  LOGICAL :: update
1351  !
1352  n=nn(l)
1353  IF (.NOT.update) THEN
1354  !
1355  ! Solve (I+E*inv(P)) R(1:n) = B
1356  !
1357  CALL dagmg_csrlsolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,p &
1358  ,r,b,-1,flop)
1359  !
1360  ! Solve (P+F) X = R(1:n)
1361  !
1362  CALL dagmg_csrusolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%iu,p &
1363  ,x,r,1,flop)
1364  !
1365  ! B - A*X = B - (P+E+F+Q) * inv(P+F) * R(1:n)
1366  ! = B - R(1:n) - (E+Q)*X
1367  !
1368  !
1369  ! First compute AX = B - R(1:n)
1370  !
1371  ax(1:n)=b(1:n)-r(1:n)
1372  flop=flop+dble(n)
1373  !
1374  ! Then AX := AX - E*X
1375  !
1376  CALL dagmg_csrmv(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,x,1,ax,-2,flop)
1377  !
1378  ! Eventually, AX := AX - Q*X if needed
1379  IF (smoothtype == 2) ax(1:n)=ax(1:n)-dt(l)%a(1:n)*x(1:n)
1380  !
1381  ELSE
1382  !
1383  ! Solve (I+E*inv(P)) R = AX
1384  !
1385  CALL dagmg_csrlsolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,p &
1386  ,r,ax,-1,flop)
1387  !
1388  ! Solve (P+F) R(n+1:2*n) = R(1:n)
1389  !
1390  CALL dagmg_csrusolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%iu,p &
1391  ,r(n+1),r,1,flop)
1392  !
1393  ! X = X + inv(M) AX = X + R(n+1:2*n)
1394  !
1395  x(1:n)=x(1:n)+r(n+1:2*n)
1396  !
1397  ! AX - A*inv(M)*AX = AX - (P+E+F+Q) * inv(P+F) * R(1:n)
1398  ! = AX - R(1:n) - (E+Q)*R(n+1:2*n)
1399  !
1400  !
1401  ! First compute AX = AX - R(1:n)
1402  !
1403  ax(1:n)=ax(1:n)-r(1:n)
1404  flop=flop+dble(2*n)
1405  !
1406  ! Then AX := AX - E*R(n+1:2*n)
1407  !
1408  CALL dagmg_csrmv(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,r(n+1),1,ax &
1409  ,-2,flop)
1410  !
1411  ! Eventually, AX := AX - Q*R(n+1:2*n) if needed
1412  IF (smoothtype == 2) ax(1:n)=ax(1:n)-dt(l)%a(1:n)*r(n+1:2*n)
1413  IF (smoothtype == 2) flop=flop+dble(2*n)
1414  !
1415  END IF
1416  !
1417  RETURN
1418  END SUBROUTINE dagmg_fwbwsolve1
1419 !-----------------------------------------------------------------------
1420  SUBROUTINE dagmg_fwbwsolve2(l,p,B,X,AX,flop,matv,R)
1421 !
1422 ! Performs post-smoothing step at level l with smoother:
1423 !
1424 ! M = (P+E) inv(P) (P+F)
1425 !
1426 ! E strictly lower triangular,
1427 ! F strictly upper triangular,
1428 ! P diagonal
1429 !
1430 ! Elements of E stored in (dt(l)%a,dt(l)%ja,dt(l)%il)
1431 ! Elements of F stored in (dt(l)%a,dt(l)%ja,dt(l)%iu)
1432 ! Inverse of the elements of P strored in p(1:nn(l))
1433 !
1434 ! Computes X := X + inv(M)*(B-A*X),
1435 ! based on the assumption that either (smoothtype==1) A=P+E+F
1436 ! or (smoothtype==2) A=P+E+F+Q with Q diagonal stored in
1437 ! dt(l)%a(1:n)
1438 !
1439 ! If matv==.TRUE., computes in addition AX=A*X,
1440 ! based on the same assumptions
1441 !
1442  USE dagmg_mem
1443  IMPLICIT NONE
1444  INTEGER :: l, n, idum(1), ier
1445  REAL(kind(0.0d0)) :: p(*), b(*), x(*), ax(*), r(*)
1446  REAL(kind(0.0d0)) :: flop
1447  LOGICAL :: matv
1448  !
1449  n=nn(l)
1450  !
1451  ! inv(M) * (B-A*X) = inv(I+inv(P)*F) * inv(P+E) *( B- (P+E+F+Q)*X )
1452  ! = inv(I+inv(P)*F) * ( inv(P+E) * (B-(F+Q)*X) - X )
1453  !
1454  ! First compute AX=F*X
1455  !
1456  CALL dagmg_csrmv(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%iu,x,1,ax,0,flop)
1457  !
1458  ! Then, R(1:n)= B - AX
1459  r(1:n)=b(1:n)-ax(1:n)
1460  ! and R(1:n) := R(1:n) - Q*X if needed
1461  IF (smoothtype == 2) r(1:n)=r(1:n)-dt(l)%a(1:n)*x(1:n)
1462  IF (smoothtype == 2) flop=flop+dble(2*n)
1463  !
1464  !
1465  ! Now solve (P+E) R(n+1:2*n)=R(1:n)
1466  !
1467  CALL dagmg_csrlsolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,p &
1468  ,r(n+1),r,1,flop)
1469  !
1470  ! and solve (I+inv(P)*F) R(1:n) = R(n+1:2*n) - X
1471  !
1472  r(1:n) = r(n+1:2*n) - x(1:n)
1473  CALL dagmg_csrusolve(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%iu,p &
1474  ,r,r,-1,flop)
1475  !
1476  ! Eventually, X := X + inv(M) * (B-A*X) = X + R(1:n)
1477  !
1478  x(1:n)=x(1:n) + r(1:n)
1479  flop=flop+dble(3*n)
1480  !
1481  IF (.NOT.matv) RETURN
1482  ! If needed, compute
1483  ! AX := A * X
1484  ! = (P+E+F+Q) * ( X_old + R(1:n) )
1485  ! = AX + (P+E+Q)* X_old + (E+Q) * R(1:n)
1486  ! (P+F) * inv((I+inv(P)*F)) * (R(n+1:2*n)-X_old)
1487  ! = AX + (P+E+Q)*X_old + (E+Q) * R(1:n) + P * (R(n+1:2*n)-X_old)
1488  ! = AX + P * R(n+1:2*n) + (E+Q) * X
1489  !
1490  ax(1:n)=ax(1:n)+r(n+1:2*n)/p(1:n)
1491  flop=flop+dble(2*n)
1492  !
1493  ! Now, AX: = AX + E*X
1494  !
1495  CALL dagmg_csrmv(n,dt(l)%a,dt(l)%ja,n+1,dt(l)%il,x,1,ax,2,flop)
1496  ! and AX := AX + Q*X if needed
1497  IF (smoothtype == 2) ax(1:n)=ax(1:n)+dt(l)%a(1:n)*r(n+1:2*n)
1498  IF (smoothtype == 2) flop=flop+dble(2*n)
1499  RETURN
1500  END SUBROUTINE dagmg_fwbwsolve2
1501 !-----------------------------------------------------------------------
1502  RECURSIVE SUBROUTINE dagmg_inner_iter(n,X,R,l,flop)
1503  !
1504  ! Perform inner iterations (K-cycle) for AGMG
1505  ! Based on projection:
1506  ! Gelerkin projection if gcrin==.FALSE.; in the SPD case, this is
1507  ! equivalent to Flexible CG, implemented as in [1];
1508  ! Minimal residual projection if gcrin==.TRUE.; i.e.,
1509  ! GCR implemented as in [1].
1510  ! In addition, peforms automatic switch to Minimal residual projection
1511  ! if the projection of symmetric part of the matrix is
1512  ! checked to be not postive definite.
1513  !
1514  ! Input: N : number of unknowns at that level
1515  ! R(1:N) : r.h.s. of the system (modified on output)
1516  ! R(N+1,*): work space
1517  ! needed length for R: 4*n + workspace for prec_matv
1518  !
1519  ! [ comments below: Ri means R(1:n,i) ]
1520  !
1521  ! l : level index
1522  !
1523  ! Output: X(1:n) : computed solution
1524  !
1525  USE dagmg_mem
1526  IMPLICIT NONE
1527  INTEGER :: n, iter, l, ierr, k
1528  REAL(kind(0.0d0)) :: resid, flop, bnorm, det
1529  REAL(kind(0.0d0)) :: x(n), r(n,*)
1530  REAL(kind(0.0d0)) :: alpha1,alpha2,bet0,bet1,bet2,rho1,rho2,gamm0,gamm1,zeta
1531  REAL(kind(0.0d0)) :: dum(10),y1,y2
1532  REAL(kind(0.0d0)), external :: ddot
1533  REAL(kind(0.0d0)), external :: dnrm2
1534  INTEGER , parameter :: ione=1
1535  !
1536  ! AT MOST 2 ITERATIONS
1537  !
1538  dum(3)=dnrm2(n, r, ione)**2
1539  !
1540  ! Early exit if r.h.s. is trivial
1541  !
1542  IF (dum(3) .EQ. 0.0d0) THEN
1543  x(1:n)=0.0d0
1544  flop=flop+dble(2*n)
1545  RETURN
1546  END IF
1547  iter = 1
1548  !
1549  ! Apply preconditioner to r.h.s. (result in X) and compute
1550  ! matrix vector pruduct (result in R2: R2=A*X)
1551  ! workspace tranferred to prec_matv: length(R)-2*n
1552  !
1553  CALL dagmg_prec_matv(l,r,x,r(1,2),flop,r(1,3),.true.)
1554  !
1555 !!$ IF (gcrin) GOTO 100
1556  !
1557  dum(1) = ddot(n,x,ione,r(1,2),ione)
1558  dum(2) = ddot(n,x,ione,r,ione)
1559  IF (abs(dum(1)) .LE. repsmach*abs(dum(2))) THEN
1560  ! Projected matrix near singular: switch to GCR projection
1561 !!$ gcrin=.TRUE.
1562  flop=flop+dble(2*n)
1563  GOTO 100
1564  END IF
1565  bnorm = dum(3)
1566  rho1=dum(1)
1567  alpha1=dum(2)
1568  !
1569  bet0=alpha1/rho1
1570  !
1571  !
1572  ! Compute residual corresponding to solution bet0 * X
1573  CALL daxpy(n, -bet0, r(1,2), ione, r, ione)
1574  ! ... and the corresponding square norm
1575  resid = dnrm2(n,r,ione)**2
1576  IF (resid <= resi*resi*bnorm) THEN
1577  ! criterion satified, exit with solution bet0 * X
1578  ! Galerkin projection: the solution computed in this way
1579  ! is such that the corresponding residual
1580  ! R - bet0*A*X = R - bet0*R2
1581  ! is orthogonal to X
1582  ! Indeed: bet0= ( X , R ) / ( X , R2 )
1583  CALL dscal( n, bet0, x, ione )
1584  flop=flop+dble(11*n)
1585  kstat(1,l)=max(kstat(1,l),iter)
1586  kstat(2,l)=kstat(2,l)+iter
1587  RETURN
1588  END IF
1589  !
1590  iter = 2
1591  !
1592  ! Apply preconditioner to this new residual (result in R3) and compute
1593  ! matrix vector pruduct (result in R4: R4=A*R3)
1594  ! R3 is thus the second direction vector
1595  ! workspace tranferred to prec_matv: length(R)-4*n
1596  !
1597  CALL dagmg_prec_matv(l,r,r(1,3),r(1,4),flop,r(1,5),.true.)
1598  !
1599  !
1600 !!$ ! Check if gcrin has been switched while performing iter at lower levels
1601 !!$ IF (gcrin) GOTO 200
1602  !
1603  dum(1) = ddot(n,r(1,3),ione,r(1,2),ione)
1604  dum(2) = ddot(n,r(1,3),ione,r,ione)
1605  dum(3) = ddot(n,r(1,3),ione,r(1,4),ione)
1606  IF (.NOT.spd) dum(4) = ddot(n,x,ione,r(1,4),ione)
1607  gamm0 =dum(1)
1608  alpha2=dum(2)
1609  rho2 =dum(3)
1610  IF (spd) THEN
1611  ! in the SPD case:
1612  ! gamm1 := ( X , R4 ) = ( X, A*R3 ) = ( A*X , R3 ) = ( R2 , R3)
1613  gamm1 = gamm0
1614  ELSE
1615  gamm1 = dum(4)
1616  END IF
1617  !
1618  bet1=rho2-gamm0*gamm1/rho1
1619  bet2=(alpha1-alpha2*gamm1/bet1)/rho1
1620  zeta=alpha2/bet1
1621  !
1622  IF (abs(bet1).LE.repsmach*abs(alpha2) .OR. &
1623  abs(bet2)*repsmach.GE.1.0d0) THEN
1624  ! Projected matrix near singular: switch to GCR projection
1625 !!$ gcrin=.TRUE.
1626  flop=flop+dble(6*n)
1627  IF (.NOT.spd) flop=flop+dble(2*n)
1628  GOTO 200
1629  END IF
1630  CALL dscal(n, bet2, x, ione)
1631  CALL daxpy(n, zeta, r(1,3), ione, x, ione)
1632  !
1633  ! Galerkin projection: the solution computed in this way
1634  ! is such that the corresponding residual
1635  ! R - bet2*A*X - zeta*A*R3 = R - bet2*R2 - zeta*R4
1636  ! is orthogonal to X and R3
1637  ! Indeed:
1638  ! ( X , R ) - bet2* ( X , R2 ) - zeta* ( X , R4 )
1639  ! = alpha1 - bet2*rho1 - zeta*gamm1
1640  ! = alpha1 - (alpha1-alpha2*gamm1/bet1) - (alpha2/bet1)*gamm1
1641  ! = 0
1642  ! ( R3 , R ) - bet2* ( R3 , R2 ) - zeta* ( R3 , R4 )
1643  ! [!: alpha2 is the dot product with update residual ]
1644  ! = ( R3 , R-bet0*R2) + bet0* ( R3 , R2)
1645  ! - bet2* ( R3 , R2 ) - zeta* ( R3 , R4 )
1646  ! = alpha2 + bet0*gamm0 - bet2*gamm0 - zeta*rho2
1647  ! = alpha2 + alpha1*gamm0/rho1
1648  ! - (alpha1-alpha2*gamm1/bet1)*(gamm0/rho1)
1649  ! - (alpha2/bet1)*(bet1+gamm0*gamm1/rho1)
1650  ! = 0
1651  !
1652  kstat(1,l)=max(kstat(1,l),iter)
1653  kstat(2,l)=kstat(2,l)+iter
1654  !
1655  flop=flop+dble(19*n)
1656  IF (.NOT.spd) flop=flop+dble(2*n)
1657  !
1658  RETURN
1659  !
1660 100 CONTINUE
1661  dum(1)=dnrm2(n,r(1,2),ione)**2
1662  dum(2)=ddot(n, r(1,2), ione, r, ione )
1663  bnorm = dum(3)
1664 110 CONTINUE
1665  rho1=dum(1)
1666  alpha1=dum(2)
1667  !
1668  bet0=alpha1/rho1
1669  !
1670  ! RESID below is the square of the residual norm for solution bet0 * X
1671  resid=bnorm-alpha1*bet0
1672  IF (resid <= resi*resi*bnorm) THEN
1673  ! criterion satified, exit with solution bet0 * X
1674  ! GCR projection: the solution computed in this way
1675  ! is such that the corresponding residual
1676  ! R - bet0*A*X = R - bet0*R2
1677  ! is orthogonal to R2
1678  ! Indeed: bet0= ( R2 , R ) / ( R2 , R2 )
1679  CALL dscal( n, bet0, x, ione )
1680  flop=flop+dble(7*n)
1681  kstat(1,l)=max(kstat(1,l),iter)
1682  kstat(2,l)=kstat(2,l)+iter
1683  RETURN
1684  END IF
1685  !
1686  ! Compute residual corresponding to solution bet0 * X
1687  CALL daxpy(n, -bet0, r(1,2), ione, r, ione)
1688  !
1689  iter = 2
1690  !
1691  ! Apply preconditioner to this new residual (result in R3) and compute
1692  ! matrix vector pruduct (result in R4: R4=A*R3)
1693  ! R3 is thus the second direction vector
1694  ! workspace tranferred to prec_matv: length(R)-4*n
1695  !
1696  CALL dagmg_prec_matv(l,r,r(1,3),r(1,4),flop,r(1,5),.true.)
1697  !
1698  !
1699  dum(1) = ddot(n,r(1,4),ione,r(1,2),ione)
1700  dum(2) = ddot(n,r(1,4),ione,r,ione)
1701  dum(3) = dnrm2(n,r(1,4),ione)**2
1702  gamm0 =dum(1)
1703  alpha2=dum(2)
1704  rho2 =dum(3)
1705  ! GCR case:
1706  ! gamm1 := ( R2 , R4 ) = ( A*X, A*R3 ) = ( A*R3 , A*X ) = ( R4 , R2)
1707  gamm1 = gamm0
1708  !
1709  bet1=rho2-gamm0*gamm1/rho1
1710  bet2=(alpha1-alpha2*gamm1/bet1)/rho1
1711  zeta=alpha2/bet1
1712  !
1713  CALL dscal(n, bet2, x, ione)
1714  CALL daxpy(n, zeta, r(1,3), ione, x, ione)
1715  !
1716  ! GCR projection: the solution computed in this way
1717  ! is such that the corresponding residual
1718  ! R - bet2*A*X - zeta*A*R3 = R - bet2*R2 - zeta*R4
1719  ! is orthogonal to R2 and R4
1720  ! Indeed:
1721  ! ( R2 , R ) - bet2* ( R2 , R2 ) - zeta* ( R2 , R4 )
1722  ! = alpha1 - bet2*rho1 - zeta*gamm1
1723  ! = alpha1 - (alpha1-alpha2*gamm1/bet1) - (alpha2/bet1)*gamm1
1724  ! = 0
1725  ! ( R4 , R ) - bet2* ( R4 , R2 ) - zeta* ( R4 , R4 )
1726  ! [!: alpha2 is the dot product with update residual ]
1727  ! = ( R4 , R-bet0*R2) + bet0* ( R4 , R2)
1728  ! - bet2* ( R4 , R2 ) - zeta* ( R4 , R4 )
1729  ! = alpha2 + bet0*gamm0 - bet2*gamm0 - zeta*rho2
1730  ! = alpha2 + alpha1*gamm0/rho1
1731  ! - (alpha1-alpha2*gamm1/bet1)*(gamm0/rho1)
1732  ! - (alpha2/bet1)*(bet1+gamm0*gamm1/rho1)
1733  ! = 0
1734  !
1735  kstat(1,l)=max(kstat(1,l),iter)
1736  kstat(2,l)=kstat(2,l)+iter
1737  !
1738  flop=flop+dble(19*n)
1739  !
1740  RETURN
1741  !
1742 200 CONTINUE
1743  !
1744  ! Switching after 1 iter: compute explicitly projected system
1745  !
1746  ! (A*X A*R2)^T * A * (X R2) * y = (A*X A*R2)^T * R
1747  !
1748  ! i.e.:
1749  !
1750  ! (R2 R4)^T * (R2 R4) * y = (R2 R4)^T * [ (R-bet0*R2) + bet0*R2 ]
1751  !
1752  ! where R-bet0*R2 is the update residual.
1753  !
1754  ! Hence
1755  !
1756  ! | dum1 dum2 | | y1 | | dum4 + bet0*dum1 |
1757  ! | | | | = | |
1758  ! | dum2 dum3 | | y2 | | dum5 + bet0*dum2 |
1759  !
1760  ! where:
1761  !
1762  dum(1) = dnrm2(n,r(1,2),ione)**2
1763  dum(2) = ddot(n,r(1,4),ione,r(1,2),ione)
1764  dum(3) = dnrm2(n,r(1,4),ione)**2
1765  dum(4) = ddot(n,r(1,2),ione,r,ione)
1766  dum(5) = ddot(n,r(1,4),ione,r,ione)
1767  !
1768  dum(4) = dum(4)+bet0*dum(1)
1769  dum(5) = dum(5)+bet0*dum(2)
1770  det = dum(1)*dum(3)-dum(2)*dum(2)
1771  y1 = (dum(3)*dum(4)-dum(2)*dum(5))/det
1772  y2 = (-dum(2)*dum(4)+dum(1)*dum(5))/det
1773  CALL dscal( n, y1, x, ione )
1774  CALL daxpy( n, y2, r(1,3), ione, x, ione )
1775  kstat(1,l)=max(kstat(1,l),iter)
1776  kstat(2,l)=kstat(2,l)+iter
1777  !
1778  flop=flop+dble(23*n)
1779  !
1780  RETURN
1781  !
1782  END SUBROUTINE dagmg_inner_iter
1783 !-----------------------------------------------------------------------
1784  RECURSIVE SUBROUTINE dagmg_galerkin_inner(n,X,R,l,flop)
1785  !
1786  ! Perform inner iterations (K-cycle) for AGMG
1787  ! Based on Galerkin projection; in the SPD case, this is
1788  ! equivalent to Flexible CG, implemented as in [1]
1789  !
1790  ! Input: N : number of unknowns at that level
1791  ! R(1:N) : r.h.s. of the system (modified on output)
1792  ! R(N+1,*): work space
1793  ! needed length for R: 4*n + workspace for prec_matv
1794  !
1795  ! [ comments below: Ri means R(1:n,i) ]
1796  !
1797  ! l : level index
1798  !
1799  ! Output: X(1:n) : computed solution
1800  !
1801  USE dagmg_mem
1802  IMPLICIT NONE
1803  INTEGER :: n, iter, l, ierr, k
1804  REAL(kind(0.0d0)) :: resid, flop, bnorm
1805  REAL(kind(0.0d0)) :: x(n), r(n,*)
1806  REAL(kind(0.0d0)) :: alpha1,alpha2,bet0,bet1,bet2,rho1,rho2,gamm0,gamm1,zeta
1807  REAL(kind(0.0d0)) :: dum(10)
1808  REAL(kind(0.0d0)), external :: ddot
1809  REAL(kind(0.0d0)), external :: dnrm2
1810  INTEGER , parameter :: ione=1
1811  !
1812  ! AT MOST 2 ITERATIONS
1813  !
1814  dum(3)=dnrm2(n, r, ione)**2
1815  !
1816  ! Early exit if r.h.s. is trivial
1817  !
1818  IF (dum(3) .EQ. 0.0d0) THEN
1819  x(1:n)=0.0d0
1820  flop=flop+dble(2*n)
1821  RETURN
1822  END IF
1823  iter = 1
1824  !
1825  ! Apply preconditioner to r.h.s. (result in X) and compute
1826  ! matrix vector pruduct (result in R2: R2=A*X)
1827  ! workspace tranferred to prec_matv: length(R)-2*n
1828  !
1829  CALL dagmg_prec_matv(l,r,x,r(1,2),flop,r(1,3),.true.)
1830  !
1831  dum(1) = ddot(n,x,ione,r(1,2),ione)
1832  dum(2) = ddot(n,x,ione,r,ione)
1833  bnorm = dum(3)
1834  rho1=dum(1)
1835  alpha1=dum(2)
1836  !
1837  bet0=alpha1/rho1
1838  !
1839  !
1840  ! Compute residual corresponding to solution bet0 * X
1841  CALL daxpy(n, -bet0, r(1,2), ione, r, ione)
1842  ! ... and the corresponding square norm
1843  resid = dnrm2(n,r,ione)**2
1844  IF (resid <= resi*resi*bnorm) THEN
1845  ! criterion satified, exit with solution bet0 * X
1846  ! Galerkin projection: the solution computed in this way
1847  ! is such that the corresponding residual
1848  ! R - bet0*A*X = R - bet0*R2
1849  ! is orthogonal to X
1850  ! Indeed: bet0= ( X , R ) / ( X , R2 )
1851  CALL dscal( n, bet0, x, ione )
1852  flop=flop+dble(11*n)
1853  kstat(1,l)=max(kstat(1,l),iter)
1854  kstat(2,l)=kstat(2,l)+iter
1855  RETURN
1856  END IF
1857  !
1858  iter = 2
1859  !
1860  ! Apply preconditioner to this new residual (result in R3) and compute
1861  ! matrix vector pruduct (result in R4: R4=A*R3)
1862  ! R3 is thus the second direction vector
1863  ! workspace tranferred to prec_matv: length(R)-4*n
1864  !
1865  CALL dagmg_prec_matv(l,r,r(1,3),r(1,4),flop,r(1,5),.true.)
1866  !
1867  !
1868  dum(1) = ddot(n,r(1,3),ione,r(1,2),ione)
1869  dum(2) = ddot(n,r(1,3),ione,r,ione)
1870  dum(3) = ddot(n,r(1,3),ione,r(1,4),ione)
1871  IF (.NOT.spd) dum(4) = ddot(n,x,ione,r(1,4),ione)
1872  gamm0 =dum(1)
1873  alpha2=dum(2)
1874  rho2 =dum(3)
1875  IF (spd) THEN
1876  ! in the SPD case:
1877  ! gamm1 := ( X , R4 ) = ( X, A*R3 ) = ( A*X , R3 ) = ( R2 , R3)
1878  gamm1 = gamm0
1879  ELSE
1880  gamm1 = dum(4)
1881  END IF
1882  !
1883  bet1=rho2-gamm0*gamm1/rho1
1884  bet2=(alpha1-alpha2*gamm1/bet1)/rho1
1885  zeta=alpha2/bet1
1886  !
1887  CALL dscal(n, bet2, x, ione)
1888  CALL daxpy(n, zeta, r(1,3), ione, x, ione)
1889  ! Galerkin projection: the solution computed in this way
1890  ! is such that the corresponding residual
1891  ! R - bet2*A*X - zeta*A*R3 = R - bet2*R2 - zeta*R4
1892  ! is orthogonal to X and R3
1893  ! Indeed:
1894  ! ( X , R ) - bet2* ( X , R2 ) - zeta* ( X , R4 )
1895  ! = alpha1 - bet2*rho1 - zeta*gamm1
1896  ! = alpha1 - (alpha1-alpha2*gamm1/bet1) - (alpha2/bet1)*gamm1
1897  ! = 0
1898  ! ( R3 , R ) - bet2* ( R3 , R2 ) - zeta* ( R3 , R4 )
1899  ! [!: alpha2 is the dot product with update residual ]
1900  ! = ( R3 , R-bet0*R2) + bet0* ( R3 , R2)
1901  ! - bet2* ( R3 , R2 ) - zeta* ( R3 , R4 )
1902  ! = alpha2 + bet0*gamm0 - bet2*gamm0 - zeta*rho2
1903  ! = alpha2 + alpha1*gamm0/rho1
1904  ! - (alpha1-alpha2*gamm1/bet1)*(gamm0/rho1)
1905  ! - (alpha2/bet1)*(bet1+gamm0*gamm1/rho1)
1906  ! = 0
1907  !
1908  !
1909  kstat(1,l)=max(kstat(1,l),iter)
1910  kstat(2,l)=kstat(2,l)+iter
1911  !
1912  flop=flop+dble(19*n)
1913  IF (.NOT.spd) flop=flop+dble(2*n)
1914  !
1915  RETURN
1916  !
1917  END SUBROUTINE dagmg_galerkin_inner
1918 !-----------------------------------------------------------------------
1919  SUBROUTINE dagmg_prolag(n, nc, V, B, ind, flop)
1920 !
1921 ! Prolongate vetor B based on ind:
1922 ! P(i,j)=1 iff ind(i)=j (i=1,n ; j=1,nc)
1923 ! Add the result to V
1924 !
1925 ! flop : floating point ops counter
1926 !
1927  IMPLICIT NONE
1928  INTEGER :: n, nc, ind(n), k, i
1929  REAL(kind(0.0d0)) :: v(n), b(nc)
1930  REAL(kind(0.0d0)) :: flop
1931  !
1932  DO i = 1, n
1933  k = ind(i)
1934  IF (k.GT.0) THEN
1935  v(i) = v(i)+b(k)
1936  ENDIF
1937  ENDDO
1938  flop = flop + dble(n)
1939  RETURN
1940  END SUBROUTINE dagmg_prolag
1941 !-----------------------------------------------------------------------
1942  SUBROUTINE dagmg_restag(n, nc, V, B, ind, flop)
1943 !
1944 ! Restrict vetor V based on ind:
1945 ! P(i,j)=1 iff ind(i)=j (i=1,n ; j=1,nc)
1946 ! Result in B
1947 !
1948 ! flop : floating point ops counter
1949 !
1950  IMPLICIT NONE
1951  INTEGER :: n, nc, ind(n), k, i
1952  REAL(kind(0.0d0)) :: v(n), b(nc)
1953  REAL(kind(0.0d0)) :: flop
1954  !
1955  b(1:nc)=0.0d0
1956  !
1957  DO i = 1, n
1958  k = ind(i)
1959  IF (k.GT.0) b(k) = b(k) + v(i)
1960  ENDDO
1961  flop = flop + dble(n)
1962  RETURN
1963  END SUBROUTINE dagmg_restag
1964 !-----------------------------------------------------------------------
1965 !!!!!!!!!!!!!!!!!!! END of ROUTINES FOR ITERATIVE SOLUTION
1966 !-----------------------------------------------------------------------
1967 !!!!!!!!!!!!!!!!!!! ROUTINES FOR SETUP
1968 !-----------------------------------------------------------------------
1969  RECURSIVE SUBROUTINE dagmg_setupl1(n,a,ja,ia,listrank,ifl)
1970 !
1971 ! Performs setup (corase grid definition) at level 1
1972 ! Input: matrix (n,a,ja,ia,dt(1)%idiag,dt(1)%iext)
1973 ! in csr format with partial ordering of the rows
1974 ! Output: coarse grid matrix
1975 ! (nc,dt(2)%a,dt(2)%ja,dt(2)%ia,dt(2)%idiag)
1976 ! in csr format with partial ordering of the rows
1977 ! and aggregation map in dt(1)%ind
1978 ! - except if the level is detected to be the last one:
1979 ! then, call to coarsest level set up -
1980 ! The original matrix is also transformed in d+l+u format
1981 ! (see subroutine csrdlu), stored in
1982 ! (n,dt(1)%a,dt(1)%ja,dt(1)%il,dt(1)%iu,dt(1)%iext)
1983 !
1984  USE dagmg_mem
1985  IMPLICIT NONE
1986  INTEGER :: n
1987  INTEGER :: ja(*),ia(n+1)
1988  REAL(kind(0.0d0)) :: a(*)
1989  INTEGER :: ifl,listrank(ifl:*)
1990  INTEGER :: nc,ierr,i,j,k,nz
1991  LOGICAL :: slcoarse
1992  INTEGER, POINTER, DIMENSION(:) :: jap
1993  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: ap
1994  REAL(kind(0.0d0)) :: ops,eta,dum(2)
1995  CHARACTER(len=13) :: prtint
1996  REAL (kind(0.0d0)) :: fff(1)
1997  !
1998  nn(1)=n
1999  nlc(1)=n
2000  nlc(2)=ia(n+1)-ia(1)
2001  !
2002  ngl=nlc
2003  !
2004  wcplex=1.0d0
2005  nlctot=nlc
2006  ngltot=ngl
2007  ngl1=ngl
2008  nlc1=nlc
2009  icum=1
2010  fracnz(1)=1.0d0
2011  allzero=.false.
2012  nglp=0.0d0
2013  maxcoarset=maxcoarsesize
2014  maxcoarseslowt=maxcoarsesizeslow
2015  maxcoarset=floor(maxcoarset*(ngl(1)**(1.0d0/3)))
2016  maxcoarseslowt=floor(maxcoarseslowt*(ngl(1)**(1.0d0/3)))
2017  !
2018  IF (wfo) THEN
2019  IF (n > 9.9e10) THEN
2020  WRITE(prtint(1:12),'(1pe12.5)') dble(n)
2021  ELSE
2022  WRITE(prtint(1:12),'(i12)') n
2023  END IF
2024  WRITE(iout,'()')
2025  WRITE(iout,918) prtint(1:12)
2026  IF (nlc(2) > 9.9e10) THEN
2027  WRITE(prtint(1:12),'(1pe12.5)') dble(nlc(2))
2028  ELSE
2029  WRITE(prtint(1:12),'(i12)') nlc(2)
2030  END IF
2031  WRITE(iout,919) prtint(1:12),dble(nlc(2))/dble(n)
2032  WRITE(iout,'()')
2033  END IF
2034  IF ( 0 == nstep .OR. 1 == maxlev .OR. ngl(1) <= maxcoarset ) nlev=1
2035  nlcp=nlc
2036  nglp=ngl
2037  !
2038  IF (1 /= nlev) THEN
2039  !
2040  ! Coarsening by aggregation
2041  !
2042  CALL dagmg_aggregation(1,n,a,ja,ia,nc)
2043  !
2044  ! Perform setup at next level
2045  !
2046  CALL dagmg_setup(2,nc)
2047  !
2048  ELSE
2049  !
2050  ! Coarsest level set up
2051  !
2052  DEALLOCATE(dt(1)%idiag)
2053  memi=memi-(n+1)
2054  CALL dagmg_mumpsseq(n,a,ja,ia,fff,1,ops)
2055  IF (wfo) THEN
2056  WRITE(iout,911) irank,ops/dble(11*nn(1)+2*nlc1(2))
2057  END IF
2058  wcplex(4)=0.0d0
2059  IF (ngl(1) > maxcoarset) wcplex(4)=-1.0d0
2060  IF (ngl(1) > maxcoarseslowt) wcplex(4)=ngl(1)/(1000*ngl1(1)**(1.0d0/3))
2061  !
2062  ! Work done: return
2063  !
2064  RETURN
2065  END IF
2066  !
2067  ! Instruction here are executed aftern return from recursive call;
2068  ! i.e., bottom to top
2069  !
2070  ! Change the internal structure from csr with partial ordering of the
2071  ! rows to separate d+l+u format (see subroutine csrdlu for details)
2072  !
2073  nz=ia(n+1)-ia(1)
2074  IF (transint) THEN
2075  ALLOCATE(ap(nz),jap(nz-n),dt(1)%il(n+1),dt(1)%iu(n+1))
2076  memi=memi+nz+n+2
2077  memr=memr+nz
2078  ELSE
2079  ALLOCATE(ap(nz),jap(nz-n),dt(1)%iu(n+1))
2080  dt(1)%il => dt(1)%idiag
2081  memi=memi+nz+1
2082  memr=memr+nz
2083  END IF
2084  memax=max(memax,memr+memi*rlenilen)
2085  !
2086  CALL dagmg_csrdlu(n,a,ja,ia,dt(1)%idiag &
2087  ,ap,jap,dt(1)%il,dt(1)%iu,transint )
2088  IF (transint) THEN
2089  DEALLOCATE(dt(1)%idiag)
2090  memi=memi-(n+1)
2091  ELSE
2092  NULLIFY(dt(1)%idiag) ! il points to same memory
2093  END IF
2094  dt(1)%a => ap
2095  dt(1)%ja => jap
2096  NULLIFY(ap,jap)
2097  !
2098  ! Computes some internal parameter
2099  ! (check if K-cycle is allowed and memory needed for iterative solve)
2100  !
2101  innermax(nlev)=0
2102  innermax(1)=1
2103  eta=xsi/((1-xsi)*(cplxmax-1))
2104  icum=1
2105  DO i=2,nlev-1
2106  innermax(i)=min(2,floor(xsi**(i-1)/(eta*fracnz(i)*icum)))
2107  IF (nlev-i.LE.nlvcyc .AND. i.GT.2) innermax(i)=1
2108  icum=icum*innermax(i)
2109  wcplex(2)=wcplex(2)+icum*fracnz(i)
2110  wcplex(3)=wcplex(3)+(2**(i-1))*fracnz(i)
2111  END DO
2112  wcplex(2)=wcplex(2)+(2**(nlev-1))*fracnz(nlev)
2113  wcplex(3)=wcplex(3)+(2**(nlev-1))*fracnz(nlev)
2114  IF (nsmooth*smoothtype > 1) THEN
2115  nwrkcum=2*nn(nlev-1)
2116  ELSE
2117  nwrkcum=nn(nlev)
2118  END IF
2119  nwrkcum=max(nwrkcum,1)
2120  DO i=nlev-2,1,-1
2121  nwrkcum=nwrkcum+3*nn(i+1)
2122  IF (innermax(i+1) > 1) nwrkcum=nwrkcum+2*nn(i+1)
2123  IF (nsmooth*smoothtype > 1) nwrkcum=max(2*nn(i),nwrkcum)
2124  END DO
2125  IF (wfo) THEN
2126  WRITE(iout,'()')
2127  WRITE(iout,954) nlctot(1)/dble(nlc1(1))
2128  WRITE(iout,955) nlctot(2)/dble(nlc1(2))
2129  END IF
2130  IF (wff) THEN
2131  WRITE(iout,956) wcplex(3)
2132  WRITE(iout,957) wcplex(2)
2133  WRITE(iout,'()')
2134  END IF
2135  RETURN
2136 911 FORMAT(i3,'*',' Exact factorization:',f12.3, &
2137  ' equiv. CG iterations')
2138 918 FORMAT('****',' Number of unknowns:', a12)
2139 919 FORMAT('****',' Nonzeros :', a12, &
2140  ' (per row:',f7.2,')')
2141 920 FORMAT('****',' Number of variables:',a12, &
2142  ' (reduction ratio:',f5.2,')')
2143 921 FORMAT('****',' Nonzeros:',a12, &
2144  ' (per row:',f4.1, &
2145  '; red. ratio:',f5.2,')')
2146 954 FORMAT('****',' Grid complexity:',f9.2)
2147 955 FORMAT('****',' Operator complexity:',f9.2)
2148 956 FORMAT('****',' Theoretical Weighted complexity:',f9.2, &
2149  ' (K-cycle at each level)' )
2150 957 FORMAT('****',' Effective Weighted complexity:',f9.2, &
2151  ' (V-cycle enforced where needed)' )
2152  END SUBROUTINE dagmg_setupl1
2153 !-----------------------------------------------------------------------
2154  RECURSIVE SUBROUTINE dagmg_setup(l,n,listrank)
2155 !
2156 ! Performs setup (corase grid definition) at level l > 1
2157 ! Input: matrix (n,dt(l)%a,dt(l)%ja,dt(l)%ia,dt(l)%idiag,dt(l)%iext)
2158 ! in csr format with partial ordering of the rows
2159 ! Output: coarse grid matrix
2160 ! (nc,dt(l+1)%a,dt(l+1)%ja,dt(l+1)%ia,dt(l+1)%idiag,dt(l+1)%iext)
2161 ! in csr format with partial ordering of the rows
2162 ! and aggregation map in dt(l)%ind
2163 ! - except if the level is detected to be the last one:
2164 ! then, call to coarsest level set up -
2165 ! The original matrix is also transformed in d+l+u format
2166 ! (see subroutine csrdlu), stored in
2167 ! (n,dt(l)%a,dt(l)%ja,dt(l)%il,dt(l)%iu,dt(l)%iext)
2168 !
2169  USE dagmg_mem
2170  IMPLICIT NONE
2171  INTEGER :: l,n
2172  INTEGER, OPTIONAL :: listrank(n+1:*)
2173  INTEGER :: nc,ierr,i,j,k,nz
2174  LOGICAL :: slcoarse
2175  INTEGER, POINTER, DIMENSION(:) :: jap
2176  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: ap
2177  LOGICAL, SAVE :: slowcoarse
2178  REAL(kind(0.0d0)) :: ops,fw,eta,dum(2)
2179  CHARACTER(len=13) :: prtint
2180  REAL (kind(0.0d0)) :: fff(1)
2181  !
2182  nn(l)=n
2183  nlc(1)=n
2184  IF (n > 0) THEN
2185  nlc(2)=dt(l)%ia(n+1)-dt(l)%ia(1)
2186  ELSE
2187  nlc(2)=0
2188  END IF
2189  !
2190  ngl=nlc
2191  !
2192  nlctot=nlctot+nlc
2193  ngltot=ngltot+ngl
2194  IF (wfo) THEN
2195  WRITE(iout,'()')
2196  WRITE(iout,914) irank,l
2197  IF (n > 9.9e10) THEN
2198  WRITE(prtint(1:12),'(1pe12.5)') dble(n)
2199  ELSE
2200  WRITE(prtint(1:12),'(i12)') n
2201  END IF
2202  WRITE(iout,920) prtint(1:12),dble(nlcp(1))/dble(n)
2203  IF (nlc(2) > 9.9e10) THEN
2204  WRITE(prtint(1:12),'(1pe12.5)') dble(nlc(2))
2205  ELSE
2206  WRITE(prtint(1:12),'(i12)') nlc(2)
2207  END IF
2208  WRITE(iout,921) prtint(1:12),dble(nlc(2))/dble(n), &
2209  dble(nlcp(2))/dble(nlc(2))
2210  END IF
2211  fracnz(l)=ngl(2)/ngl1(2)
2212  IF (l==2) THEN
2213  wcplex(1)=ngl1(2)/ngl(2)
2214  slowcoarse=.false.
2215  END IF
2216  !
2217  slcoarse = nglp(1) < 2*ngl(1) .AND. nglp(2) < 2*ngl(2)
2218  IF( l == nstep+1 .OR. l == maxlev &
2219  .OR. ( ngl(1) <= maxcoarset) &
2220  .OR. ( slcoarse .AND. ngl(1) <= maxcoarseslowt ) &
2221 ! .OR. ( zerors .AND. ngl(1) <= maxcoarseslowt ) &
2222  .OR. ( slowcoarse .AND. slcoarse ) &
2223  .OR. nglp(1) ==ngl(1) ) THEN
2224  nlev=l
2225  END IF
2226  slowcoarse=slcoarse
2227  nlcp=nlc
2228  nglp=ngl
2229  IF (n == 0) THEN
2230  nc=0
2231  allzero=.true.
2232  !
2233  ! Work done: return
2234  !
2235  RETURN
2236  END IF
2237  IF (l /= nlev) THEN
2238  !
2239  ! Coarsening by aggregation
2240  !
2241  CALL dagmg_aggregation(l,n,dt(l)%a,dt(l)%ja,dt(l)%ia,nc)
2242  !
2243  ! Perform setup at next level
2244  !
2245  CALL dagmg_setup(l+1,nc)
2246  !
2247  ELSE
2248  !
2249  ! Coarsest level set up
2250  !
2251  DEALLOCATE(dt(l)%idiag)
2252  memi=memi-(n+1)
2253  CALL dagmg_mumpsseq(n,dt(l)%a,dt(l)%ja,dt(l)%ia,fff,1,ops)
2254  !
2255  ! Coarse solver uses its own internal memory: a,ja,ia
2256  ! more needed
2257  memi=memi-size(dt(l)%ja)-n-1
2258  memr=memr-size(dt(l)%a)
2259  DEALLOCATE(dt(l)%a,dt(l)%ja,dt(l)%ia)
2260  !
2261  IF (wfo) THEN
2262  WRITE(iout,911) irank,ops/dble(11*nn(1)+2*nlc1(2))
2263  END IF
2264  wcplex(4)=0.0d0
2265  IF (ngl(1) > maxcoarset) wcplex(4)=-1.0d0
2266  IF (ngl(1) > maxcoarseslowt) wcplex(4)=ngl(1)/(1000*ngl1(1)**(1.0d0/3))
2267  !
2268  ! Work done: return
2269  !
2270  RETURN
2271  END IF
2272  !
2273  ! Instruction here are executed aftern return from recursive call;
2274  ! i.e., bottom to top
2275  !
2276  ! Change the internal structure from csr with partial ordering of the
2277  ! rows to separate d+l+u format (see subroutine csrdlu for details)
2278  !
2279  nz=dt(l)%ia(n+1)-dt(l)%ia(1)
2280  IF (transint) THEN
2281  ALLOCATE(ap(nz),jap(nz-n),dt(l)%il(n+1),dt(l)%iu(n+1))
2282  memi=memi+nz+n+2
2283  memr=memr+nz
2284  ELSE
2285  ALLOCATE(ap(nz),jap(nz-n))
2286  dt(l)%iu => dt(l)%ia
2287  dt(l)%il => dt(l)%idiag
2288  memi=memi+nz-n
2289  memr=memr+nz
2290  END IF
2291  memax=max(memax,memr+memi*rlenilen)
2292  !
2293  CALL dagmg_csrdlu(n,dt(l)%a,dt(l)%ja,dt(l)%ia,dt(l)%idiag &
2294  ,ap,jap,dt(l)%il,dt(l)%iu,transint )
2295  memi=memi-size(dt(l)%ja)
2296  memr=memr-size(dt(l)%a)
2297  DEALLOCATE(dt(l)%a,dt(l)%ja)
2298  IF (transint) THEN
2299  DEALLOCATE(dt(l)%idiag,dt(l)%ia)
2300  memi=memi-2*(n+1)
2301  ELSE
2302  NULLIFY(dt(l)%idiag,dt(l)%ia) ! il,iu point to same memory
2303  END IF
2304  dt(l)%a => ap
2305  dt(l)%ja => jap
2306  NULLIFY(ap,jap)
2307  !
2308  RETURN
2309 911 FORMAT(i3,'*',' Exact factorization:',f12.3, &
2310  ' equiv. CG iterations')
2311 914 FORMAT(i3,'*',' Level:',i12)
2312 918 FORMAT('****',' Number of unknowns:', a12)
2313 919 FORMAT('****',' Nonzeros :', a12, &
2314  ' (per row:',f7.2,')')
2315 920 FORMAT('****',' Number of variables:',a12, &
2316  ' (reduction ratio:',f5.2,')')
2317 921 FORMAT('****',' Nonzeros:',a12, &
2318  ' (per row:',f4.1, &
2319  '; red. ratio:',f5.2,')')
2320 954 FORMAT('****',' relative complexity (grid):',f9.2)
2321 955 FORMAT('****',' relative complexity (nnzs):',f9.2)
2322  END SUBROUTINE dagmg_setup
2323 !-----------------------------------------------------------------------
2324  SUBROUTINE dagmg_smoothsetup
2325  USE dagmg_mem
2326  IMPLICIT NONE
2327  INTEGER :: l,i
2328  !
2329  ! Smoother set up
2330  !
2331  IF (smoothtype == 1) THEN
2332  DO l=1,nlev-1
2333  !
2334  ! Gauss-Seidel smoother: store inverse of diagonal elements
2335  ! in dt(l)%a(1:n)
2336  DO i=1,nn(l)
2337  dt(l)%a(i)=1.0d0/dt(l)%a(i)
2338  END DO
2339  END DO
2340  END IF
2341  RETURN
2342  END SUBROUTINE dagmg_smoothsetup
2343 !------------------------------------------------------------------
2344  SUBROUTINE dagmg_aggregation(l,n,a,ja,ia,nc,listrank)
2345 !
2346 ! Perform aggregation of level l matrix with partial
2347 ! ordering of the rows specified in (n,a,ja,ia,idiag,iext)
2348 ! (iext significant only with the parallel version)
2349 !
2350 ! Output:
2351 ! nc : number of aggregates (= number of coarse unknowns)
2352 ! dt(l)%ind(1:n): ind(i) is the index of the aggregates to which
2353 ! i belongs; ind(i)=0 iff i has been kept outside
2354 ! aggregation (see checkdd).
2355 ! P: associated prologation matrix;
2356 ! P(i,j)=1 iff ind(i)=j and P(i,j)=0 otherwise (i=1,n ; j=1,nc).
2357 !
2358 ! Corresponding coarse grid matrix stored with partially ordered rows
2359 ! in dt(l+1)%a, dt(l+1)%ja, dt(l+1)%ia, dt(l+1)%idiag, dt(l+1)%iext
2360 ! (both these latter with length nc+1).
2361 !
2362 ! note: storage not optimal: dt(l+1)%a, dt(l+1)%ja greater than needed
2363 !
2364  USE dagmg_mem
2365  IMPLICIT NONE
2366  INTEGER :: l,n,nc
2367  INTEGER :: ja(*),ia(n+1)
2368  INTEGER, OPTIONAL :: listrank(*)
2369  REAL (kind(0.0d0)) :: a(*), dum
2370  INTEGER :: ier,i,j,k,maxdg,np,kpass,nzc,m1,ndd,nzp,isize,nddp,npass1,nz
2371  LOGICAL :: skipass
2372  !
2373  INTEGER, POINTER, DIMENSION(:) :: jan,ian,idiagn,iextn,ind2,lcg,lcgn
2374  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: an
2375  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: sinn
2376  !
2377  INTEGER, POINTER, DIMENSION(:) :: jap,iap,idiagp,iextp,lcg1
2378  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: ap
2379  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: sip
2380  !
2381  INTEGER, ALLOCATABLE, DIMENSION(:) :: ldd,iw,iperm,riperm
2382  REAL(kind(0.0d0)), ALLOCATABLE, DIMENSION(:) :: si1,w
2383  REAL(kind(0.0d0)), ALLOCATABLE, DIMENSION(:) :: wc
2384  !
2385  !
2386  IF (l .EQ. 1) THEN
2387  IF (wfo) THEN
2388  WRITE (iout,901) irank
2389  END IF
2390  IF (wff) THEN
2391  IF (spd) THEN
2392  WRITE (iout,906)
2393  ELSE IF ( transint) THEN
2394  WRITE (iout,908)
2395  END IF
2396  IF (.not.spd) then
2397  WRITE (iout,902) 'Jacobi',kaptg_dampjac,checkddj
2398  ELSE
2399  WRITE (iout,902) 'BlockD',kaptg_blocdia,checkddb
2400  END IF
2401  IF (checkdd < 0) THEN
2402  WRITE (iout,904)
2403  END IF
2404  WRITE(iout,903) npass,targetcoarsefac
2405  WRITE (iout,905) trspos
2406  END IF
2407  END IF
2408  !
2409  IF (l .EQ. 1) THEN
2410  ALLOCATE(si1(n),ind2(n),iperm(n),riperm(n))
2411  memi=memi+4*n+1
2412  memr=memr+n
2413  CALL dagmg_setcmk(n,ja,ia,dt(l)%idiag,riperm,iperm)
2414  ELSE
2415  ALLOCATE(si1(n),ind2(n),iperm(n))
2416  memi=memi+2*n
2417  memr=memr+n
2418  iperm(1:n)=1
2419  END IF
2420  memax=max(memax,memr+memi*rlenilen)
2421  !
2422  call dagmg_prepareagg(n,a,ja,ia,dt(l)%idiag,ind2,iperm,si1,ndd,l )
2423  !
2424  IF (ndd .EQ. n) THEN
2425  nc=0
2426  nzc=0
2427  DEALLOCATE(si1,iperm,ind2)
2428  memi=memi-2*n
2429  memr=memr-n
2430  IF (l .EQ. 1) THEN
2431  DEALLOCATE(riperm)
2432  memi=memi-n
2433  END IF
2434  GOTO 999
2435  END IF
2436  !
2437  ALLOCATE(ldd(ndd),lcg(2*(n-ndd)))
2438  memi=memi+2*n-ndd
2439  memax=max(memax,memr+memi*rlenilen)
2440  !
2441  IF (dble(n) .GT. targetcoarsefac*(n-ndd)) THEN
2442  ! skip first pairwise pass if the coarsening seems sufficiently
2443  ! fast based on ndd only
2444  skipass=.true.
2445  ! but add one more pass to compensate in case of need
2446  npass1=npass+1
2447  ELSE
2448  skipass=.false.
2449  npass1=npass
2450  END IF
2451  !
2452  ! perform initial pairwise aggregation; output in ind2 and lcg
2453  !
2454  IF (l > 1) THEN
2455  IF (spd) THEN
2456  CALL dagmg_findpairs_si(n,a,ja,ia,dt(l)%idiag,si1 &
2457  ,ind2,lcg,nc,ndd,ldd,skipass,iperm )
2458  ELSE
2459  CALL dagmg_findpairs_gi(n,a,ja,ia,dt(l)%idiag,si1 &
2460  ,ind2,lcg,nc,ndd,ldd,skipass,iperm )
2461  END IF
2462  DEALLOCATE(iperm)
2463  memi=memi-n
2464  ELSE
2465  IF (spd) THEN
2466  CALL dagmg_findpairs_si1(n,a,ja,ia,dt(l)%idiag,si1 &
2467  ,ind2,lcg,nc,ndd,ldd,skipass,riperm,iperm )
2468  ELSE
2469  CALL dagmg_findpairs_gi1(n,a,ja,ia,dt(l)%idiag,si1 &
2470  ,ind2,lcg,nc,ndd,ldd,skipass,riperm,iperm )
2471  END IF
2472  DEALLOCATE(iperm,riperm)
2473  memi=memi-2*n
2474  END IF
2475  nz=ia(n+1)-1
2476  !
2477  ! Form aggregated matrix in (an,jan,ian)
2478  ! the new matrix has at most the following number of nonzero:
2479  ! nz (the previous number) - ndd - 2*(n-ndd-nc)
2480  ! indeed: from rows listed in ldd, at least ndd
2481  ! diagonal entries are removed;
2482  ! (n-ndd-nc) is the number of pairs formed,
2483  ! and each of them condensate at least 3 entries in a
2484  ! single diagonal entry
2485  IF (npass1.GT.1) THEN
2486  isize=nc
2487  ELSE
2488  isize=1
2489  END IF
2490  ALLOCATE(an(nz-2*(n-nc)+ndd),jan(nz-2*(n-nc)+ndd) &
2491  ,ian(nc+1),idiagn(nc+1),sinn(isize),wc(nc),iw(2*nc))
2492  memi=memi+nz-2*(n-nc)+ndd+2*(nc+1)+2*nc+isize
2493  memr=memr+nz-2*(n-nc)+ndd+nc
2494  memax=max(memax,memr+memi*rlenilen)
2495  CALL dagmg_setcg(n,a,ja,ia,dt(l)%idiag,si1,ind2,lcg,nc,an,jan,ian &
2496  ,idiagn,sinn,npass1.GT.1,maxdg,iw,wc )
2497  DEALLOCATE(wc,iw)
2498  memi=memi-2*nc
2499  memr=memr-nc
2500  nzc=ian(nc+1)-1
2501  IF (dble(nz).GT.targetcoarsefac*nzc .OR. npass1.LE.1) THEN
2502  DEALLOCATE(si1,sinn,lcg)
2503  memi=memi-2*(n-ndd)
2504  memr=memr-n-isize
2505  dt(l)%ind => ind2
2506  dt(l+1)%a => an
2507  dt(l+1)%ja => jan
2508  dt(l+1)%ia => ian
2509  dt(l+1)%idiag=> idiagn
2510  NULLIFY(ind2,an,jan,ian,idiagn)
2511  GOTO 999
2512  END IF
2513  !
2514  DEALLOCATE(ind2)
2515  memi=memi-n
2516  !
2517  lcg1 => lcg
2518  NULLIFY(lcg)
2519  m1=1
2520  !
2521  !
2522  ! Allocated pointers at this stage: an, jan, ian, idiagn, sinn, lcg1
2523  ! (lcg accessed via lcg1)
2524  ! Allocated arrays: si1, ldd
2525  !
2526  DO kpass=2,npass1
2527  m1=2*m1
2528  np = nc
2529  nzp = nzc
2530  ap => an
2531  jap => jan
2532  iap => ian
2533  idiagp => idiagn
2534  sip => sinn
2535  NULLIFY(an,jan,ian,idiagn,sinn)
2536  !
2537  ALLOCATE(lcg(2*np),ind2(np),w(maxdg),iw(maxdg))
2538  memi=memi+maxdg+3*np
2539  memr=memr+maxdg
2540  memax=max(memax,memr+memi*rlenilen)
2541  !
2542  ! perform further pairwise aggregation; output in ind2 and lcg
2543  !
2544  ind2(1:np)=-1
2545  IF (spd) THEN
2546  CALL dagmg_findpairs_sf(np,ap,jap,iap,idiagp,sip &
2547  ,ind2,lcg,nc &
2548  ,m1,lcg1,a,ja,ia,dt(l)%idiag,si1,w,iw )
2549  ELSE
2550  CALL dagmg_findpairs_gf(np,ap,jap,iap,idiagp,sip &
2551  ,ind2,lcg,nc &
2552  ,m1,lcg1,a,ja,ia,dt(l)%idiag,si1,w,iw )
2553  END IF
2554  DEALLOCATE(w,iw)
2555  memi=memi-maxdg
2556  memr=memr-maxdg
2557  IF (kpass.NE.npass1) THEN
2558  isize=nc
2559  ELSE
2560  isize=1
2561  DEALLOCATE(si1)
2562  memr=memr-n
2563  END IF
2564  !
2565  ! Form aggregated matrix in (an,jan,ian,idiagn,iextn)
2566  ! the new matrix has at most the following number of nonzero:
2567  ! nzp (the previous number) - 2*(np-nc)
2568  ! indeed: np-nc is the number of pairs formed,
2569  ! and each of them condensate at least 3 entries in a
2570  ! single diagonal entry
2571  !
2572  ! below we reallocate an,ja,ian,idiagn; the memory they were
2573  ! pointing to can be accessed via ap,jap,iap,idiagp
2574  ALLOCATE(an(nzp-2*(np-nc)),jan(nzp-2*(np-nc)) &
2575  ,ian(nc+1),idiagn(nc+1),sinn(isize),wc(nc),iw(2*nc))
2576  memi=memi+nzp-2*(np-nc)+2*(nc+1)+2*nc+isize
2577  memr=memr+nzp-2*(np-nc)+nc
2578  memax=max(memax,memr+memi*rlenilen)
2579  !
2580  ! Allocated pointers at this stage: an,jan,ian,idiagn,sinn,(iextn)
2581  ! ap,jap,iap,idiagp,sip,(iextp)
2582  ! lcg1,lcg,ind2
2583  ! Allocated arrays: si1,ldd,wc,iw
2584  !
2585  CALL dagmg_setcg(np,ap,jap,iap,idiagp,sip,ind2,lcg,nc,an &
2586  ,jan,ian,idiagn,sinn,kpass.NE.npass1,maxdg,iw,wc )
2587  memi=memi-SIZE(jap)-2*(np+1)-np-2*nc
2588  memr=memr-SIZE(ap)-SIZE(sip)-nc
2589  DEALLOCATE(ap,jap,iap,idiagp,sip,ind2,wc,iw)
2590  !
2591  ! Allocated pointers at this stage: an,jan,ian,idiagn,(iextn)
2592  ! sinn,lcg1,lcg
2593  ! Allocated arrays: si1,ldd
2594  !
2595  ! Form new list of aggregates from previou one and the pairwise step
2596  ! just performed
2597  ALLOCATE(lcgn(2*m1*nc))
2598  memi=memi+2*m1*nc
2599  memax=max(memax,memr+memi*rlenilen)
2600  CALL dagmg_lcgmix(nc,m1,lcg1,lcg,lcgn)
2601  memi=memi-SIZE(lcg)-SIZE(lcg1)
2602  DEALLOCATE(lcg,lcg1)
2603  lcg1 => lcgn
2604  NULLIFY(lcgn)
2605  !
2606  ! Allocated pointers at this stage: an,jan,ian,idiagn,sinn,lcg1,(iextn)
2607  ! Allocated arrays: si1,ldd
2608  ! -- same as before entering the do loop --
2609  nzc=ian(nc+1)-1
2610  IF ( kpass.NE.npass1 .AND. dble(nz).GT.targetcoarsefac*nzc ) THEN
2611  DEALLOCATE(si1)
2612  memr=memr-n
2613  EXIT
2614  END IF
2615  END DO
2616  !
2617  memr=memr-SIZE(sinn)
2618  DEALLOCATE(sinn)
2619  !
2620  ALLOCATE(dt(l)%ind(n))
2621  memi=memi+n
2622  memax=max(memax,memr+memi*rlenilen)
2623  CALL dagmg_setind(nc,ndd,ldd,lcg1,2*m1,dt(l)%ind)
2624  memi=memi-ndd-SIZE(lcg1)
2625  DEALLOCATE(lcg1,ldd)
2626  !
2627  ! Allocated pointers at this stage: an,jan,ian,idiagn,(iextn)
2628  ! Allocated arrays: -
2629  !
2630  dt(l+1)%a => an
2631  dt(l+1)%ja => jan
2632  dt(l+1)%ia => ian
2633  dt(l+1)%idiag=> idiagn
2634  NULLIFY(an,jan,ian,idiagn)
2635  !
2636 999 CONTINUE
2637  !
2638  RETURN
2639 901 FORMAT(i3,'*SETUP: Coarsening by multiple pairwise aggregation')
2640 902 FORMAT('**** Quality threshold (',a6,'):',f6.2, &
2641  ' ; Strong diag. dom. trs:',f5.2)
2642 903 FORMAT('**** Maximal number of passes:',i3, &
2643  ' ; Target coarsening factor:',f5.2)
2644 904 FORMAT('**** Diag. dom. checked w.r.t. sum of offdiag', &
2645  ' (no absolute vaues)')
2646 905 FORMAT('****',22x,'Threshold for rows with large pos. offdiag.:',f5.2)
2647 906 FORMAT('**** Rmk: Setup performed assuming the matrix symmetric')
2648 908 FORMAT('**** Rmk: Setup performed for the transpose of the input matrix')
2649  END SUBROUTINE dagmg_aggregation
2650 !-----------------------------------------------------------------------
2651  SUBROUTINE dagmg_setcmk(n,ja,ia,idiag,riperm,iperm,iext)
2652  !
2653  ! compute CMK permutation
2654  !
2655  USE dagmg_mem
2656  IMPLICIT NONE
2657  INTEGER :: n,ja(*),ia(n+1),idiag(n),riperm(*),iperm(n)
2658  INTEGER, OPTIONAL :: iext(*)
2659  LOGICAL :: exc
2660  INTEGER :: i,j,jj,jk,jd,k,kk,j1,j2,i1,i2,ijs,ijs1,ijs2,dg,mindg,kdim
2661  !
2662  !
2663  ! First find starting node of minimal degree
2664  ! while already numbering nodes whith degree one
2665  ! (i.e., only the diagonal element is present in the row);
2666  ! store the opposite of the degree in iperm
2667  mindg=n+1
2668  jj=1
2669  i2=1
2670  DO i = 1,n
2671  dg=ia(i+1)-ia(i)
2672  IF (dg .GT. 1) THEN
2673  iperm(i)=-dg
2674  IF (dg.LT.mindg) THEN
2675  mindg=dg
2676  jj=i
2677  END IF
2678  ELSE
2679  riperm(i2)=i
2680  iperm(i)=i2
2681  ! one more node is numbered: increase i2
2682  i2=i2+1
2683  END IF
2684  ENDDO
2685  !
2686  ijs=0
2687  i1=i2
2688 15 CONTINUE
2689  !
2690  ! Start with next node of minimum degree: jj
2691  riperm(i2)=jj
2692  iperm(jj)=i2
2693  !
2694  ! in what follows: i2 is the number of nodes already numbered
2695  ! and i1-1 the number of nodes whose neighbors have been processed
2696  DO WHILE (i1.LE.i2 .AND. i2.LT.n)
2697  !
2698  ! Number neighbors of riperm(i1)
2699  !
2700  i=riperm(i1)
2701  ijs1=i2+1
2702  !
2703  j1 =ia(i)
2704  jd=idiag(i)
2705  j2 = ia(i+1)-1
2706  DO kk = j1,jd-1
2707  j=ja(kk)
2708  IF (iperm(j) .LT. 0) THEN
2709  ! one more node is numbered: increase i2
2710  i2=i2+1
2711  riperm(i2)=j
2712  END IF
2713  ENDDO
2714  DO kk = jd+1,j2
2715  j=ja(kk)
2716  IF (iperm(j) .LT. 0) THEN
2717  ! one more node is numbered: increase i2
2718  i2=i2+1
2719  riperm(i2)=j
2720  END IF
2721  ENDDO
2722  !
2723  ijs2=i2
2724  ! Sort just found nodes such that value stored in iperm
2725  ! will be in descending order; since this is the oposite of
2726  ! the degree, degrees will be in ascending order
2727  exc=.true. .AND. ijs2.GT.ijs1
2728  DO WHILE(exc)
2729  exc=.false.
2730  DO kk=ijs1+1,ijs2
2731  IF( iperm(riperm(kk)) .GT. iperm(riperm(kk-1)) )THEN
2732  j=riperm(kk)
2733  riperm(kk)=riperm(kk-1)
2734  riperm(kk-1)=j
2735  exc=.true.
2736  END IF
2737  END DO
2738  END DO
2739  DO kk=ijs1,ijs2
2740  iperm(riperm(kk))=kk
2741  END DO
2742  !
2743  ! Done with riperm(i1): proceed with next node
2744  i1=i1+1
2745  END DO
2746  IF (i2 .LT. n) THEN
2747  !
2748  ! Not all nodes numbered during first pass
2749  ! (may happen if the matrix is reducible)
2750  ! Find a new starting node with minimal degree and proceed
2751  jj=0
2752  DO WHILE (jj .EQ. 0)
2753  ijs=ijs+1
2754  IF (ijs .GT. n) THEN
2755  mindg=mindg+1
2756  ijs=1
2757  END IF
2758  ijs1=ijs
2759  IF (iperm(ijs1).LT.0 .AND. ia(ijs1+1)-ia(ijs1).EQ.mindg) &
2760  jj=ijs1
2761  END DO
2762  i2=i2+1
2763  ! Repeat previous step
2764  GOTO 15
2765  END IF
2766  !
2767  RETURN
2768  END SUBROUTINE dagmg_setcmk
2769 !-----------------------------------------------------------------------
2770  SUBROUTINE dagmg_prepareagg(n,a,ja,ia,idiag,ind2,iperm,si,ndd,l,iext)
2771  !
2772  ! Performs some setup before aggregation:
2773  ! detects nodes to be kept outside because of strong
2774  ! diagonal dominance, as well as nodes to be transferred
2775  ! as is to the coarse grid because of strong positive
2776  ! coupling in the row or column
2777  !
2778  USE dagmg_mem
2779  IMPLICIT NONE
2780  INTEGER :: n,ja(*),ia(n+1),idiag(n),ind2(n),iperm(n)
2781  INTEGER, OPTIONAL :: iext(*)
2782  INTEGER :: ndd, l
2783  REAL(kind(0.0d0)) :: a(*)
2784  REAL(kind(0.0d0)) , TARGET :: si(n)
2785  REAL(kind(0.0d0)) :: checkddl,oda,odm,ods,vald
2786  INTEGER :: i,j,jj,jk,jd,k,kk,j1,j2,i1,i2,ijs,ijs1,ijs2,dg,kdim,nnegrcs
2787  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: odmax,odabs,osi
2788  !
2789  IF (.NOT.spd) THEN
2790  checkddl=checkddj
2791  ELSE
2792  checkddl=checkddb
2793  END IF
2794  !
2795  IF (.NOT.spd) THEN
2796  !
2797  ! Matrix is not symmetric: gather for each node
2798  ! information about elements in column:
2799  ! sum, maximum and, if needed, sum of abs value
2800  !
2801  IF (checkdd > 0) THEN
2802  ALLOCATE(odmax(n),odabs(n))
2803  memi=memi+2*n
2804  odabs(1:n)=0.0d0
2805  ELSE
2806  ALLOCATE(odmax(n))
2807  memi=memi+n
2808  END IF
2809  osi => si(1:n)
2810  si(1:n)=0.0d0
2811  odmax(1:n)=0.0d0
2812  memax=max(memax,memr+memi*rlenilen)
2813  DO i=n,1,-1
2814  j =ia(i)
2815  jd=idiag(i)
2816  jj=ia(i+1)-1
2817  DO k = j,jd-1
2818  jk=ja(k)
2819  osi(jk)=osi(jk)+a(k)
2820  odmax(jk)=max(odmax(jk),a(k))
2821  IF (checkdd > 0) odabs(jk)=odabs(jk)+abs(a(k))
2822  ENDDO
2823  DO k = jd+1,jj
2824  jk=ja(k)
2825  osi(jk)=osi(jk)+a(k)
2826  odmax(jk)=max(odmax(jk),a(k))
2827  IF (checkdd > 0) odabs(jk)=odabs(jk)+abs(a(k))
2828  ENDDO
2829  ENDDO
2830  END IF
2831  !
2832  ! Compute the mean row and column sum,
2833  ! and check if the node will participate to aggregation
2834  !
2835  ndd=0
2836  nnegrcs=0
2837  !
2838  DO i=1,n
2839  j1 =ia(i)
2840  jd=idiag(i)
2841  j2 = ia(i+1)-1
2842  vald = a(jd)
2843  odm=0.0d0
2844  oda=0.0d0
2845  ods=0.0d0
2846  DO kk = j1,jd-1
2847  ods=ods+a(kk)
2848  odm=max(odm,a(kk))
2849  IF (checkdd > 0) oda=oda+abs(a(kk))
2850  ENDDO
2851  DO kk = jd+1,j2
2852  ods=ods+a(kk)
2853  odm=max(odm,a(kk))
2854  IF (checkdd > 0) oda=oda+abs(a(kk))
2855  ENDDO
2856  !
2857  ! Update with info from columns if needed
2858  IF (.NOT.spd) THEN
2859  ods=(osi(i)+ods)/2
2860  odm=max(odm,odmax(i))
2861  IF (checkdd > 0) oda=(oda+odabs(i))/2
2862  END IF
2863  !
2864  ! Count the number of nodes with negative mean row and column sum
2865  IF ((vald+ods) .LT. -repsmach*abs(vald)) nnegrcs=nnegrcs+1
2866  !
2867  ! Now check the category of the node and fill si(i)
2868  !
2869  si(i)=-ods
2870  IF ( (checkdd.GT.0 .AND. vald.GT.checkddl*oda) &
2871  .OR. (checkdd.LT.0 .AND. vald.GT.checkddl*abs(ods)) ) THEN
2872  !
2873  ! Node to be kept outside the aggregation because of strong diagonal
2874  ! dominance: set ind2(i) to 0
2875  ind2(i)=0
2876  ndd=ndd+1
2877  ELSE
2878  ! Normal node: set ind2(i) to negative
2879  ind2(i)=-1
2880  !
2881  ! iperm(i) set to zero for nodes that are to be transeferred as is
2882  ! to the coarse grid
2883  IF (odm .GT. trspos*vald) iperm(i)=0
2884  ENDIF
2885  END DO
2886  !
2887  ! set zerors according to nnegrcs and fracnegrcsum
2888  !
2889  zerors=.false.
2890  IF (nnegrcs .GT. fracnegrcsum*n) THEN
2891  zerors=.true.
2892  ndd=0
2893  ind2(1:n)=-1
2894  END IF
2895  !
2896  ! Release local memory if needed
2897  IF (.NOT.spd) THEN
2898  IF (checkdd > 0) THEN
2899  DEALLOCATE(odmax,odabs)
2900  memi=memi-2*n
2901  ELSE
2902  DEALLOCATE(odmax)
2903  memi=memi-n
2904  END IF
2905  END IF
2906  RETURN
2907  END SUBROUTINE dagmg_prepareagg
2908 !-----------------------------------------------------------------------
2909  SUBROUTINE dagmg_findpairs_gf(n,a,ja,ia,idiag,si,ind,lcg,nc &
2910  ,m1,lcg1,a1,ja1,ia1,idiag1,si1,rtent,jtent )
2911 ! Performs pairwise aggregation according to Algorithms 4.2 and 4.3 in [2,3],
2912 ! with some heuristic enhancements to cover non diagonally dominant matrices.
2913 !
2914 ! Version: further pairwise aggregation for general matrices [3].
2915 !
2916  USE dagmg_mem
2917  IMPLICIT NONE
2918  INTEGER :: n,ja(*),ia(n+1),idiag(n),ind(n),lcg(2,*),nc
2919  REAL(kind(0.0d0)) :: a(*)
2920  REAL(kind(0.0d0)) :: si(n)
2921  INTEGER :: m1,ja1(*),ia1(*),idiag1(*),jtent(*),lcg1(m1,n)
2922  REAL(kind(0.0d0)) :: a1(*)
2923  REAL(kind(0.0d0)) :: si1(*),rtent(*)
2924 !
2925 ! INPUT: n,ja(*),ia(n+1),idiag(n): aggregated matrix from previous pass
2926 ! (CSR format with partial ordering of each row: lower
2927 ! triangular part, next diag., and eventually upper triagular part;
2928 ! idiag point to the diagonal element)
2929 !
2930 ! si(n) : vector s or \tile{s} in Algorithm 4.2 or 4.3 of [2,3]
2931 !
2932 ! ind(n): on input, entries in ind should be negative
2933 !
2934 ! ja1(*),ia1(*),idiag1(*): unaggregated matrix, in which the
2935 ! threshold condition has to be checked
2936 ! si1(*): si in that matrix
2937 !
2938 ! m1 : maximal number of nodes in aggregates from previous passes
2939 !
2940 ! lcg1(m1,n): list of aggregates from previous passes
2941 ! lcg1(2,i) = 0 means that node i has to be transferred unaggregated
2942 ! to the coarse grid, and lcg(2,j) set to 0
2943 ! for the corresponding corase grid node j
2944 !
2945 ! zerors: if true, diagonal entries are not taken into account;
2946 ! instead, the matrix is treated as it would have mean
2947 ! column and row sum equal to zero
2948 !
2949 ! OUTPUT: nc : number of pairs formed
2950 !
2951 ! lcg(2,nc): list of aggregates;
2952 ! lcg(2,i)=0 : lcg(1,i) is a singleton that was
2953 ! forced to be transferred unaggregated
2954 ! lcg(2,i)=-1: lcg(1,i) is a singleton because no valid
2955 ! pair has been found
2956 ! lcg(2,i)>0 : (lcg(1,i),lcg(2,i)) form the ith pair
2957 !
2958 ! ind(n): ind(i)=j means that i belongs to aggregate j
2959 !
2960 ! rtent, jtent are working arrays of size >= the maximal degree of a node in
2961 ! input matrix (a,ja,ia)
2962 !
2963 !----------------
2964 ! Local variables
2965 !
2966  REAL(kind(0.0d0)) :: val,vals,valp,tent,rsi,rsj,epsr
2967  REAL(kind(0.0d0)) :: del1,del2,eta1,eta2,del12,sig1,sig2,rnd,vald
2968  INTEGER :: mindg,i,j,jj,k,kk,jk,isel,dg,ipair,nmark,idd
2969  INTEGER :: i1,i2,i3,ijs,ntentleft,npc,itrs,ijtent,j2
2970  LOGICAL :: acc,ifirst
2971  REAL(kind(0.0d0)) :: kaptg,bndmum1m1,dbndmum1,umdbndmum1
2972 !
2973  ifirst=.true.
2974  1 CONTINUE
2975 !
2976  kaptg=imult*kaptg_dampjac
2977  bndmum1m1=1.0d0/(kaptg-1.0d0)
2978  dbndmum1=2*1.0d0/kaptg
2979  umdbndmum1=1.0d0-dbndmum1
2980  idd=0
2981  nmark=0
2982  nc=0
2983  ijs=1
2984  npc=0
2985  DO WHILE (nmark.LT.n)
2986  isel=ijs
2987  ijs=ijs+1
2988  !
2989  ! Node isel has been selected
2990  !
2991  ! Check if isel has already been processed
2992  IF (ind(isel) .GE. 0) cycle
2993  !
2994  ! A new aggregate is formed that contains isel
2995  nc = nc + 1
2996  lcg(1,nc) = isel
2997  nmark = nmark+1
2998  ind(isel) = nc
2999  ipair = 0
3000  !
3001  ! Check if isel has to be transferred unaggregated
3002  IF (lcg1(2,isel) .EQ. 0) THEN
3003  lcg(2,nc) = 0
3004  npc=npc+1
3005  cycle
3006  END IF
3007  ntentleft=0
3008  !
3009  ! Try to form a pair with isel: follow list of neighbors
3010  !
3011  i2=ia(isel+1)-1
3012  DO i = ia(isel),i2
3013  ! CYCLE means: reject this neighbor, proceed with next one
3014  IF (i .EQ. idiag(isel)) cycle
3015  j = ja(i)
3016  ! check if j is available to form a pair
3017  IF(lcg1(2,j).EQ.0 .OR. ind(j).GE.0) cycle
3018  ! search for the corresponding entry in transposed matrix
3019  kk=0
3020  IF (i .LT. idiag(isel)) THEN
3021  j2=ia(j+1)-1
3022  DO jk=idiag(j)+1,j2
3023  IF (ja(jk) .EQ. isel) THEN
3024  kk=jk
3025  EXIT
3026  END IF
3027  END DO
3028  ELSE
3029  DO jk=ia(j),idiag(j)-1
3030  IF (ja(jk) .EQ. isel) THEN
3031  kk=jk
3032  EXIT
3033  END IF
3034  END DO
3035  ENDIF
3036  vals=-a(i)/2
3037  IF(kk .NE. 0) vals=vals-a(kk)/2
3038  IF (zerors) THEN
3039  rsi=0.0d0
3040  rsj=0.0d0
3041  eta1=2*si(isel)
3042  eta2=2*si(j)
3043  ELSE
3044  rsi=-si(isel)+a(idiag(isel))
3045  rsj=-si(j)+a(idiag(j))
3046  eta1=2*a(idiag(isel))
3047  eta2=2*a(idiag(j))
3048  END IF
3049  sig1=si(isel)-vals
3050  sig2=si(j)-vals
3051  !
3052  ! CYCLE instructions below: pair rejected because A_G is not
3053  ! nonnegative definite
3054  !
3055  ! Heuristic if sigj is negative: set Sigma_G(j)=abs(sigj) (j=1,2)
3056  IF (sig1 > 0.0d0) THEN
3057  del1=rsi
3058  ELSE
3059  del1=rsi+2*sig1
3060  END IF
3061  IF (sig2 > 0.0d0) THEN
3062  del2=rsj
3063  ELSE
3064  del2=rsj+2*sig2
3065  END IF
3066  IF (vals > 0.0d0) THEN
3067  epsr=repsmach*vals
3068  IF (abs(del1) < epsr .AND. abs(del2) < epsr) THEN
3069  valp=(eta1*eta2)/(vals*(eta1+eta2))
3070  ELSE IF (abs(del1) < epsr) THEN
3071  IF (del2 < -epsr) cycle
3072  valp=(eta1*eta2)/(vals*(eta1+eta2))
3073  ELSE IF (abs(del2) < epsr) THEN
3074  IF (del1 < -epsr) cycle
3075  valp=(eta1*eta2)/(vals*(eta1+eta2))
3076  ELSE
3077  del12=del1+del2
3078  IF (del12 < -epsr) cycle
3079  valp=vals+del1*del2/del12
3080  IF (valp < 0.0d0) cycle
3081  valp=((eta1*eta2)/(eta1+eta2))/valp
3082  END IF
3083  ELSE
3084  IF (del1 .LE. 0.0d0 .OR. del2 .LE. 0.0d0) cycle
3085  valp=vals+del1*del2/(del1+del2)
3086  IF (valp < 0.0d0) cycle
3087  vals=(eta1*eta2)/(eta1+eta2)
3088  valp=vals/valp
3089  END IF
3090  ! main threshold test
3091  IF (valp > kaptg) cycle
3092  !
3093  ! A_G is nonneagtive definite and passed the corresponding
3094  ! "quality" threshold test: (isel,j) is possibly
3095  ! an acceptable pair; check if it is the best one
3096  ! and update the list of possible second choices in rtent, jtent
3097  !
3098  tent=valp
3099  IF (ipair.EQ.0) GOTO 10
3100  ntentleft=ntentleft+1
3101  IF (16*(tent-val).LT.-1) GOTO 9
3102  IF (16*(tent-val).LT.1 .AND. j.LT.ipair) GOTO 9
3103  rtent(ntentleft)=tent
3104  jtent(ntentleft)=j
3105  cycle
3106 9 CONTINUE
3107  rtent(ntentleft)=val
3108  jtent(ntentleft)=ipair
3109 10 CONTINUE
3110  ipair = j
3111  val = tent
3112  ENDDO
3113  IF (ipair .EQ. 0) GOTO 25
3114 20 CONTINUE
3115  ! Perform check in unagregated matrix, considering possible pairs
3116  ! in right order
3117  CALL dagmg_checktentagg_gf
3118  IF (.NOT.acc) THEN
3119  ! Tentative pair has been rejected, check for the next one, if any
3120  ipair = 0
3121  IF (ntentleft .GT.0) THEN
3122  i=1
3123  j=1
3124  DO WHILE (i .LE. ntentleft)
3125  IF (jtent(j).GT.0) THEN
3126  tent=rtent(j)
3127  IF (ipair.EQ.0) GOTO 22
3128  IF (16*(tent-val).LT.-1) GOTO 22
3129  IF (16*(tent-val).LT.1 .AND. j.LT.ipair) GOTO 22
3130  GOTO 23
3131 22 CONTINUE
3132  val=tent
3133  ipair=jtent(j)
3134  ijtent=j
3135 23 CONTINUE
3136  i=i+1
3137  END IF
3138  j=j+1
3139  END DO
3140  ntentleft=ntentleft-1
3141  jtent(ijtent)=0
3142  GOTO 20
3143  END IF
3144  END IF
3145  !
3146 25 CONTINUE
3147  !
3148  IF (ipair .EQ. 0) THEN
3149  ! no valid pair found: isel left alone in aggregate nc
3150  lcg(2,nc) = -1
3151  ELSE
3152  ! pair nc is formed with isel and ipair
3153  lcg(2,nc) = ipair
3154  ind(ipair) = nc
3155  nmark = nmark+1
3156  END IF
3157  ENDDO
3158  RETURN
3159  CONTAINS
3160 !-----------------------------------------------------------------------
3161  SUBROUTINE dagmg_checktentagg_gf
3162 !
3163 ! Check the quality in the original matrix of an aggregate formed
3164 ! by grouping two aggregates from previous pass(es)
3165 !
3166 ! INPUT: see dagmg_findpairs_GF
3167 !
3168 ! OUTPUT: acc(logical): acc=.true. if the new aggregate can be accepted and
3169 ! acc=.false. otherwise
3170 !
3171 !
3172  INTEGER, PARAMETER :: mm=max(2**(npass+1),8)
3173  REAL(kind(0.0d0)) :: w(mm,mm), sig(mm), age(mm), v(mm)
3174  REAL(kind(0.0d0)) :: alpha, alp, tmp, beta, f1, f2
3175  INTEGER :: j,jj,k,l,m,info, setdim1, setdim, l2, k2
3176  INTEGER :: set(mm), l1, wdtht
3177  REAL(kind(0.0d0)) :: t
3178  LOGICAL :: exc
3179 !
3180 ! Find indices of submatrix to be extracted and store them in set(1:setdim)
3181  IF (m1.eq.2) THEN
3182  IF (lcg1(2,isel) .LT. 0) THEN
3183  IF (lcg1(2,ipair) .LT. 0) THEN
3184  set(1)=lcg1(1,isel)
3185  set(2)=lcg1(1,ipair)
3186  setdim=2
3187  ELSE
3188  set(1)=lcg1(1,isel)
3189  set(2)=lcg1(1,ipair)
3190  set(3)=lcg1(2,ipair)
3191  setdim=3
3192  END IF
3193  l1=1
3194  ELSE
3195  IF (lcg1(2,ipair) .LT. 0) THEN
3196  set(1)=lcg1(1,isel)
3197  set(2)=lcg1(2,isel)
3198  set(3)=lcg1(1,ipair)
3199  setdim=3
3200  ELSE
3201  set(1)=lcg1(1,isel)
3202  set(2)=lcg1(2,isel)
3203  set(3)=lcg1(1,ipair)
3204  set(4)=lcg1(2,ipair)
3205  setdim=4
3206  END IF
3207  l1=2
3208  END IF
3209  ELSE
3210  l1=m1
3211  IF (lcg1(m1,isel).LT.0) l1=-lcg1(m1,isel)
3212  set(1:l1)=lcg1(1:l1,isel)
3213  l2=m1
3214  IF (lcg1(m1,ipair).LT.0) l2=-lcg1(m1,ipair)
3215  set(l1+1:l1+l2)=lcg1(1:l2,ipair)
3216  setdim=l1+l2
3217  END IF
3218 !
3219 ! Sort indices in set(1:setdim) in increasing order
3220 ! (makes susequent processing faster)
3221  exc=.true.
3222  DO WHILE(exc)
3223  exc=.false.
3224  DO l=2,setdim
3225  IF( set(l)<set(l-1) )THEN
3226  jj=set(l)
3227  set(l)=set(l-1)
3228  set(l-1)=jj
3229  exc=.true.
3230  END IF
3231  END DO
3232  END DO
3233 !
3234 ! Extract submatrix of (the symmetric part of) input matrix and store it in W
3235 ! Store in AGe the mean row and column sum of input matrix for related indices
3236 ! (this is also the row sum of A_G)
3237  DO j=1,setdim
3238  jj=set(j)
3239  sig(j)=si1(jj)
3240  IF (zerors) THEN
3241  w(j,j)=sig(j)
3242  age(j)=0.0d0
3243  ELSE
3244  w(j,j)=a1(idiag1(jj))
3245  age(j)=w(j,j)-sig(j)
3246  END IF
3247  l2=j+1
3248  DO l=l2,setdim
3249  w(j,l)=0.0d0
3250  w(l,j)=0.0d0
3251  END DO
3252  k2=ia1(jj+1)-1
3253  DO k=idiag1(jj)+1,k2
3254  DO l=l2,setdim
3255  m=set(l)
3256  IF(ja1(k)==m)THEN
3257  alpha=a1(k)/2
3258  w(j,l)=alpha
3259  w(l,j)=alpha
3260  EXIT
3261  END IF
3262  END DO
3263  END DO
3264  DO k=ia1(jj),idiag1(jj)-1
3265  DO l=1,j-1
3266  m=set(l)
3267  IF(ja1(k)==m)THEN
3268  alpha=a1(k)/2
3269  w(j,l)=w(j,l)+alpha
3270  w(l,j)=w(j,l)
3271  EXIT
3272  END IF
3273  END DO
3274  END DO
3275  END DO
3276 !
3277  DO j=1,setdim
3278  ! Set sig(j) equal to minus the sum of connections that are
3279  ! external to the tentative aggregate; that is, add the internal
3280  ! offdiagonal entries to current sig(j)
3281  DO k=1,setdim
3282  IF (j.ne.k) THEN
3283  sig(j)=sig(j)+w(j,k)
3284  END IF
3285  ENDDO
3286  ! Heuristic if sig(j) is negative: set Sigma_G(j)=abs(sig(j))
3287  ! Hence need to correct the row sum of A_G stored in AGe
3288  IF (sig(j) < 0.0d0) age(j)=age(j)+2*sig(j)
3289  !
3290  ! Store diagonal element in v
3291  v(j)=w(j,j)
3292  ! 2
3293  ! Now set W = A_G - --- D_G
3294  ! kap
3295  !
3296  ! Then W contains the matrix that need to be nonnegative definite to
3297  ! accept the aggregate according to the theory in [3],
3298  ! up to the rank one correction that is added below
3299  !
3300  ! Offdiagonal entrie of W alraedy OK;
3301  ! for the diagonal ones, note that A_G(j,j)=W(j,j)-abs(sig(j))
3302  ! and D_G(j,j)=W(j,j), hence the rule below with umdbndmum1=1-2/kap
3303  !
3304  w(j,j)=umdbndmum1*w(j,j)-abs(sig(j))
3305  ! beta <- quadratic form (1^T * D_G * 1) ; [ 1 = vector of all ones ]
3306  ! alp is positive if and only if AGe(j) is zero for all j;
3307  ! that is, if and only if A_G has 1 in its null space
3308  IF (j .eq. 1) THEN
3309  beta=v(j)
3310  alp=abs(age(j))
3311  ELSE
3312  beta=beta+v(j)
3313  alp=max(alp,abs(age(j)))
3314  END IF
3315  END DO
3316 !
3317 ! Eventually, add to W the rank one correction
3318 ! 2
3319 ! ------------------- D_G * 1 * 1^T * D_G
3320 ! kap*(1^T * D_G * 1)
3321 !
3322  beta=dbndmum1/beta
3323  DO j=1,setdim
3324  DO k=1,setdim
3325  w(j,k)=w(j,k)+beta*v(j)*v(k)
3326  END DO
3327  END DO
3328 !
3329 ! Now, the rule is to accept the tentative aggregate if and only if
3330 ! W(setdim,setdim) is nonnegative definite
3331 ! Two cases:
3332 ! alp == 0 , then W is singular, hence we check the positive
3333 ! definitenes of W(setdim-1,setdim-1)
3334 ! alp > 0 , then W is normally not singular and we require thus that
3335 ! it is positive definite
3336 !
3337  IF (alp.LT.repsmach*beta) THEN
3338  setdim1=setdim-1
3339  ELSE
3340  setdim1=setdim
3341  END IF
3342 !
3343 ! Perform a Choleky factorization of W(setdim1,setdim1) and check
3344 ! for negative pivots; code expended for small dimensions (usual case)
3345 !
3346  acc=.false.
3347 ! acc has just be defined to FALSE; hence RETURN statement means rejection
3348 !
3349  SELECT CASE (setdim1)
3350  CASE (1)
3351  GOTO 11
3352  CASE (2)
3353  GOTO 12
3354  CASE (3)
3355  GOTO 13
3356  CASE (4)
3357  GOTO 14
3358  CASE (5)
3359  GOTO 15
3360  CASE (6)
3361  GOTO 16
3362  CASE (7)
3363  GOTO 17
3364  CASE (8)
3365  GOTO 18
3366  CASE DEFAULT
3367  CALL dpotrf('U',setdim1,w,mm,info)
3368  IF (info .NE. 0) RETURN
3369  GOTO 10
3370  END SELECT
3371 18 CONTINUE
3372  IF (w(8,8) .LE. 0.0d0) RETURN
3373  w(7,7) = w(7,7) - (w(7,8)/w(8,8)) * w(7,8)
3374  t = w(6,8)/w(8,8)
3375  w(6,7) = w(6,7) - t * w(7,8)
3376  w(6,6) = w(6,6) - t * w(6,8)
3377  t = w(5,8)/w(8,8)
3378  w(5,7) = w(5,7) - t * w(7,8)
3379  w(5,6) = w(5,6) - t * w(6,8)
3380  w(5,5) = w(5,5) - t * w(5,8)
3381  t = w(4,8)/w(8,8)
3382  w(4,7) = w(4,7) - t * w(7,8)
3383  w(4,6) = w(4,6) - t * w(6,8)
3384  w(4,5) = w(4,5) - t * w(5,8)
3385  w(4,4) = w(4,4) - t * w(4,8)
3386  t = w(3,8)/w(8,8)
3387  w(3,7) = w(3,7) - t * w(7,8)
3388  w(3,6) = w(3,6) - t * w(6,8)
3389  w(3,5) = w(3,5) - t * w(5,8)
3390  w(3,4) = w(3,4) - t * w(4,8)
3391  w(3,3) = w(3,3) - t * w(3,8)
3392  t = w(2,8)/w(8,8)
3393  w(2,7) = w(2,7) - t * w(7,8)
3394  w(2,6) = w(2,6) - t * w(6,8)
3395  w(2,5) = w(2,5) - t * w(5,8)
3396  w(2,4) = w(2,4) - t * w(4,8)
3397  w(2,3) = w(2,3) - t * w(3,8)
3398  w(2,2) = w(2,2) - t * w(2,8)
3399  t = w(1,8)/w(8,8)
3400  w(1,7) = w(1,7) - t * w(7,8)
3401  w(1,6) = w(1,6) - t * w(6,8)
3402  w(1,5) = w(1,5) - t * w(5,8)
3403  w(1,4) = w(1,4) - t * w(4,8)
3404  w(1,3) = w(1,3) - t * w(3,8)
3405  w(1,2) = w(1,2) - t * w(2,8)
3406  w(1,1) = w(1,1) - t * w(1,8)
3407 17 CONTINUE
3408  IF (w(7,7) .LE. 0.0d0) RETURN
3409  w(6,6) = w(6,6) - (w(6,7)/w(7,7)) * w(6,7)
3410  t = w(5,7)/w(7,7)
3411  w(5,6) = w(5,6) - t * w(6,7)
3412  w(5,5) = w(5,5) - t * w(5,7)
3413  t = w(4,7)/w(7,7)
3414  w(4,6) = w(4,6) - t * w(6,7)
3415  w(4,5) = w(4,5) - t * w(5,7)
3416  w(4,4) = w(4,4) - t * w(4,7)
3417  t = w(3,7)/w(7,7)
3418  w(3,6) = w(3,6) - t * w(6,7)
3419  w(3,5) = w(3,5) - t * w(5,7)
3420  w(3,4) = w(3,4) - t * w(4,7)
3421  w(3,3) = w(3,3) - t * w(3,7)
3422  t = w(2,7)/w(7,7)
3423  w(2,6) = w(2,6) - t * w(6,7)
3424  w(2,5) = w(2,5) - t * w(5,7)
3425  w(2,4) = w(2,4) - t * w(4,7)
3426  w(2,3) = w(2,3) - t * w(3,7)
3427  w(2,2) = w(2,2) - t * w(2,7)
3428  t = w(1,7)/w(7,7)
3429  w(1,6) = w(1,6) - t * w(6,7)
3430  w(1,5) = w(1,5) - t * w(5,7)
3431  w(1,4) = w(1,4) - t * w(4,7)
3432  w(1,3) = w(1,3) - t * w(3,7)
3433  w(1,2) = w(1,2) - t * w(2,7)
3434  w(1,1) = w(1,1) - t * w(1,7)
3435 16 CONTINUE
3436  IF (w(6,6) .LE. 0.0d0) RETURN
3437  w(5,5) = w(5,5) - (w(5,6)/w(6,6)) * w(5,6)
3438  t = w(4,6)/w(6,6)
3439  w(4,5) = w(4,5) - t * w(5,6)
3440  w(4,4) = w(4,4) - t * w(4,6)
3441  t = w(3,6)/w(6,6)
3442  w(3,5) = w(3,5) - t * w(5,6)
3443  w(3,4) = w(3,4) - t * w(4,6)
3444  w(3,3) = w(3,3) - t * w(3,6)
3445  t = w(2,6)/w(6,6)
3446  w(2,5) = w(2,5) - t * w(5,6)
3447  w(2,4) = w(2,4) - t * w(4,6)
3448  w(2,3) = w(2,3) - t * w(3,6)
3449  w(2,2) = w(2,2) - t * w(2,6)
3450  t = w(1,6)/w(6,6)
3451  w(1,5) = w(1,5) - t * w(5,6)
3452  w(1,4) = w(1,4) - t * w(4,6)
3453  w(1,3) = w(1,3) - t * w(3,6)
3454  w(1,2) = w(1,2) - t * w(2,6)
3455  w(1,1) = w(1,1) - t * w(1,6)
3456 15 CONTINUE
3457  IF (w(5,5) .LE. 0.0d0) RETURN
3458  w(4,4) = w(4,4) - (w(4,5)/w(5,5)) * w(4,5)
3459  t = w(3,5)/w(5,5)
3460  w(3,4) = w(3,4) - t * w(4,5)
3461  w(3,3) = w(3,3) - t * w(3,5)
3462  t = w(2,5)/w(5,5)
3463  w(2,4) = w(2,4) - t * w(4,5)
3464  w(2,3) = w(2,3) - t * w(3,5)
3465  w(2,2) = w(2,2) - t * w(2,5)
3466  t = w(1,5)/w(5,5)
3467  w(1,4) = w(1,4) - t * w(4,5)
3468  w(1,3) = w(1,3) - t * w(3,5)
3469  w(1,2) = w(1,2) - t * w(2,5)
3470  w(1,1) = w(1,1) - t * w(1,5)
3471 14 CONTINUE
3472  IF (w(4,4) .LE. 0.0d0) RETURN
3473  w(3,3) = w(3,3) - (w(3,4)/w(4,4)) * w(3,4)
3474  t = w(2,4)/w(4,4)
3475  w(2,3) = w(2,3) - t * w(3,4)
3476  w(2,2) = w(2,2) - t * w(2,4)
3477  t = w(1,4)/w(4,4)
3478  w(1,3) = w(1,3) - t * w(3,4)
3479  w(1,2) = w(1,2) - t * w(2,4)
3480  w(1,1) = w(1,1) - t * w(1,4)
3481 13 CONTINUE
3482  IF (w(3,3) .LE. 0.0d0) RETURN
3483  w(2,2) = w(2,2) - (w(2,3)/w(3,3)) * w(2,3)
3484  t = w(1,3)/w(3,3)
3485  w(1,2) = w(1,2) - t * w(2,3)
3486  w(1,1) = w(1,1) - t * w(1,3)
3487 12 CONTINUE
3488  IF (w(2,2) .LE. 0.0d0) RETURN
3489  w(1,1) = w(1,1) - (w(1,2)/w(2,2)) * w(1,2)
3490 11 CONTINUE
3491  IF (w(1,1) .LE. 0.0d0) RETURN
3492 10 CONTINUE
3493 !
3494 ! all test passed: accept the aggregate
3495  acc=.true.
3496 !
3497  RETURN
3498  END SUBROUTINE dagmg_checktentagg_gf
3499  END SUBROUTINE dagmg_findpairs_gf
3500 !-----------------------------------------------------------------------
3501  SUBROUTINE dagmg_findpairs_sf(n,a,ja,ia,idiag,si,ind,lcg,nc &
3502  ,m1,lcg1,a1,ja1,ia1,idiag1,si1,rtent,jtent )
3503 ! Performs pairwise aggregation according to Algorithms 4.2 and 4.3 in [2,3],
3504 ! with some heuristic enhancements to cover non diagonally dominant matrices.
3505 !
3506 ! Version: further pairwise aggregation for symm. matrices [2].
3507 !
3508  USE dagmg_mem
3509  IMPLICIT NONE
3510  INTEGER :: n,ja(*),ia(n+1),idiag(n),ind(n),lcg(2,*),nc
3511  REAL(kind(0.0d0)) :: a(*)
3512  REAL(kind(0.0d0)) :: si(n)
3513  INTEGER :: m1,ja1(*),ia1(*),idiag1(*),jtent(*),lcg1(m1,n)
3514  REAL(kind(0.0d0)) :: a1(*)
3515  REAL(kind(0.0d0)) :: si1(*),rtent(*)
3516 !
3517 ! INPUT: n,ja(*),ia(n+1),idiag(n): aggregated matrix from previous pass
3518 ! (CSR format with partial ordering of each row: lower
3519 ! triangular part, next diag., and eventually upper triagular part;
3520 ! idiag point to the diagonal element)
3521 !
3522 ! si(n) : vector s or \tile{s} in Algorithm 4.2 or 4.3 of [2,3]
3523 !
3524 ! ind(n): on input, entries in ind should be negative
3525 !
3526 ! ja1(*),ia1(*),idiag1(*): unaggregated matrix, in which the
3527 ! threshold condition has to be checked
3528 ! si1(*): si in that matrix
3529 !
3530 ! m1 : maximal number of nodes in aggregates from previous passes
3531 !
3532 ! lcg1(m1,n): list of aggregates from previous passes
3533 ! lcg1(2,i) = 0 means that node i has to be transferred unaggregated
3534 ! to the coarse grid, and lcg(2,j) set to 0
3535 ! for the corresponding corase grid node j
3536 !
3537 ! zerors: if true, diagonal entries are not taken into account;
3538 ! instead, the matrix is treated as it would have mean
3539 ! column and row sum equal to zero
3540 !
3541 ! OUTPUT: nc : number of pairs formed
3542 !
3543 ! lcg(2,nc): list of aggregates;
3544 ! lcg(2,i)=0 : lcg(1,i) is a singleton that was
3545 ! forced to be transferred unaggregated
3546 ! lcg(2,i)=-1: lcg(1,i) is a singleton because no valid
3547 ! pair has been found
3548 ! lcg(2,i)>0 : (lcg(1,i),lcg(2,i)) form the ith pair
3549 !
3550 ! ind(n): ind(i)=j means that i belongs to aggregate j
3551 !
3552 ! rtent, jtent are working arrays of size >= the maximal degree of a node in
3553 ! input matrix (a,ja,ia)
3554 !
3555 !----------------
3556 ! Local variables
3557 !
3558  REAL(kind(0.0d0)) :: val,vals,valp,tent,rsi,rsj,epsr
3559  REAL(kind(0.0d0)) :: del1,del2,eta1,eta2,del12,sig1,sig2,rnd,vald
3560  INTEGER :: mindg,i,j,jj,k,kk,jk,isel,dg,ipair,nmark,idd
3561  INTEGER :: i1,i2,i3,ijs,ntentleft,npc,itrs,ijtent,j2
3562  LOGICAL :: acc,ifirst
3563  REAL(kind(0.0d0)) :: kaptg,bndmum1m1,dbndmum1,umdbndmum1
3564 !
3565  ifirst=.true.
3566  1 CONTINUE
3567 !
3568  kaptg=imult*kaptg_blocdia
3569  bndmum1m1=1.0d0/(kaptg-1.0d0)
3570  dbndmum1=2*1.0d0/kaptg
3571  umdbndmum1=1.0d0-dbndmum1
3572  idd=0
3573  nmark=0
3574  nc=0
3575  ijs=1
3576  npc=0
3577  DO WHILE (nmark.LT.n)
3578  isel=ijs
3579  ijs=ijs+1
3580  !
3581  ! Node isel has been selected
3582  !
3583  ! Check if isel has already been processed
3584  IF (ind(isel) .GE. 0) cycle
3585  !
3586  ! A new aggregate is formed that contains isel
3587  nc = nc + 1
3588  lcg(1,nc) = isel
3589  nmark = nmark+1
3590  ind(isel) = nc
3591  ipair = 0
3592  !
3593  ! Check if isel has to be transferred unaggregated
3594  IF (lcg1(2,isel) .EQ. 0) THEN
3595  lcg(2,nc) = 0
3596  npc=npc+1
3597  cycle
3598  END IF
3599  ntentleft=0
3600  !
3601  ! Try to form a pair with isel: follow list of neighbors
3602  !
3603  i2=ia(isel+1)-1
3604  DO i = ia(isel),i2
3605  ! CYCLE means: reject this neighbor, proceed with next one
3606  IF (i .EQ. idiag(isel)) cycle
3607  j = ja(i)
3608  ! check if j is available to form a pair
3609  IF(lcg1(2,j).EQ.0 .OR. ind(j).GE.0) cycle
3610  vals=-a(i)
3611  IF (zerors) THEN
3612  rsi=0.0d0
3613  rsj=0.0d0
3614  ELSE
3615  rsi=-si(isel)+a(idiag(isel))
3616  rsj=-si(j)+a(idiag(j))
3617  END IF
3618  sig1=si(isel)-vals
3619  sig2=si(j)-vals
3620  !
3621  ! CYCLE instructions below: pair rejected because A_G is not
3622  ! nonnegative definite
3623  !
3624  ! Heuristic if sigj is negative: set Sigma_G(j)=abs(sigj) (j=1,2)
3625  IF (sig1 > 0.0d0) THEN
3626  del1=rsi
3627  eta1=rsi+2*sig1
3628  ELSE
3629  del1=rsi+2*sig1
3630  eta1=rsi
3631  END IF
3632  IF (eta1 < 0.0d0) cycle
3633  IF (sig2 > 0.0d0) THEN
3634  del2=rsj
3635  eta2=rsj+2*sig2
3636  ELSE
3637  del2=rsj+2*sig2
3638  eta2=rsj
3639  END IF
3640  IF (eta2 < 0.0d0) cycle
3641  IF (vals > 0.0d0) THEN
3642  epsr=repsmach*vals
3643  IF (abs(del1) < epsr .AND. abs(del2) < epsr) THEN
3644  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
3645  ELSE IF (abs(del1) < epsr) THEN
3646  IF (del2 < -epsr) cycle
3647  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
3648  ELSE IF (abs(del2) < epsr) THEN
3649  IF (del1 < -epsr) cycle
3650  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
3651  ELSE
3652  del12=del1+del2
3653  IF (del12 < -epsr) cycle
3654  valp=vals+del1*del2/del12
3655  IF (valp < 0.0d0) cycle
3656  valp=(vals+(eta1*eta2)/(eta1+eta2))/valp
3657  END IF
3658  ELSE
3659  IF (del1 .LE. 0.0d0 .OR. del2 .LE. 0.0d0) cycle
3660  valp=vals+del1*del2/(del1+del2)
3661  IF (valp < 0.0d0) cycle
3662  vals=vals+(eta1*eta2)/(eta1+eta2)
3663  IF (vals < 0.0d0) cycle
3664  valp=vals/valp
3665  END IF
3666  ! main threshold test
3667  IF (valp > kaptg) cycle
3668  !
3669  ! A_G is nonneagtive definite and passed the corresponding
3670  ! "quality" threshold test: (isel,j) is possibly
3671  ! an acceptable pair; check if it is the best one
3672  ! and update the list of possible second choices in rtent, jtent
3673  !
3674  tent=valp
3675  IF (ipair.EQ.0) GOTO 10
3676  ntentleft=ntentleft+1
3677  IF (16*(tent-val).LT.-1) GOTO 9
3678  IF (16*(tent-val).LT.1 .AND. j.LT.ipair) GOTO 9
3679  rtent(ntentleft)=tent
3680  jtent(ntentleft)=j
3681  cycle
3682 9 CONTINUE
3683  rtent(ntentleft)=val
3684  jtent(ntentleft)=ipair
3685 10 CONTINUE
3686  ipair = j
3687  val = tent
3688  ENDDO
3689  IF (ipair .EQ. 0) GOTO 25
3690 20 CONTINUE
3691  ! Perform check in unagregated matrix, considering possible pairs
3692  ! in right order
3693  CALL dagmg_checktentagg_sf
3694  IF (.NOT.acc) THEN
3695  ! Tentative pair has been rejected, check for the next one, if any
3696  ipair = 0
3697  IF (ntentleft .GT.0) THEN
3698  i=1
3699  j=1
3700  DO WHILE (i .LE. ntentleft)
3701  IF (jtent(j).GT.0) THEN
3702  tent=rtent(j)
3703  IF (ipair.EQ.0) GOTO 22
3704  IF (16*(tent-val).LT.-1) GOTO 22
3705  IF (16*(tent-val).LT.1 .AND. j.LT.ipair) GOTO 22
3706  GOTO 23
3707 22 CONTINUE
3708  val=tent
3709  ipair=jtent(j)
3710  ijtent=j
3711 23 CONTINUE
3712  i=i+1
3713  END IF
3714  j=j+1
3715  END DO
3716  ntentleft=ntentleft-1
3717  jtent(ijtent)=0
3718  GOTO 20
3719  END IF
3720  END IF
3721  !
3722 25 CONTINUE
3723  !
3724  IF (ipair .EQ. 0) THEN
3725  ! no valid pair found: isel left alone in aggregate nc
3726  lcg(2,nc) = -1
3727  ELSE
3728  ! pair nc is formed with isel and ipair
3729  lcg(2,nc) = ipair
3730  ind(ipair) = nc
3731  nmark = nmark+1
3732  END IF
3733  ENDDO
3734  RETURN
3735  CONTAINS
3736 !-----------------------------------------------------------------------
3737  SUBROUTINE dagmg_checktentagg_sf
3738 !
3739 ! Check the quality in the original matrix of an aggregate formed
3740 ! by grouping two aggregates from previous pass(es)
3741 !
3742 ! INPUT: see dagmg_findpairs_SF
3743 !
3744 ! OUTPUT: acc(logical): acc=.true. if the new aggregate can be accepted and
3745 ! acc=.false. otherwise
3746 !
3747 !
3748  INTEGER, PARAMETER :: mm=max(2**(npass+1),8)
3749  REAL(kind(0.0d0)) :: w(mm,mm), sig(mm), age(mm), v(mm)
3750  REAL(kind(0.0d0)) :: alpha, alp, tmp, beta, f1, f2
3751  INTEGER :: j,jj,k,l,m,info, setdim1, setdim, l2, k2
3752  INTEGER :: set(mm), l1, wdtht
3753  REAL(kind(0.0d0)) :: t
3754  LOGICAL :: exc
3755 !
3756 ! Find indices of submatrix to be extracted and store them in set(1:setdim)
3757  IF (m1.eq.2) THEN
3758  IF (lcg1(2,isel) .LT. 0) THEN
3759  IF (lcg1(2,ipair) .LT. 0) THEN
3760  set(1)=lcg1(1,isel)
3761  set(2)=lcg1(1,ipair)
3762  setdim=2
3763  ELSE
3764  set(1)=lcg1(1,isel)
3765  set(2)=lcg1(1,ipair)
3766  set(3)=lcg1(2,ipair)
3767  setdim=3
3768  END IF
3769  l1=1
3770  ELSE
3771  IF (lcg1(2,ipair) .LT. 0) THEN
3772  set(1)=lcg1(1,isel)
3773  set(2)=lcg1(2,isel)
3774  set(3)=lcg1(1,ipair)
3775  setdim=3
3776  ELSE
3777  set(1)=lcg1(1,isel)
3778  set(2)=lcg1(2,isel)
3779  set(3)=lcg1(1,ipair)
3780  set(4)=lcg1(2,ipair)
3781  setdim=4
3782  END IF
3783  l1=2
3784  END IF
3785  ELSE
3786  l1=m1
3787  IF (lcg1(m1,isel).LT.0) l1=-lcg1(m1,isel)
3788  set(1:l1)=lcg1(1:l1,isel)
3789  l2=m1
3790  IF (lcg1(m1,ipair).LT.0) l2=-lcg1(m1,ipair)
3791  set(l1+1:l1+l2)=lcg1(1:l2,ipair)
3792  setdim=l1+l2
3793  END IF
3794 !
3795 ! Sort indices in set(1:setdim) in increasing order
3796 ! (makes susequent processing faster)
3797  exc=.true.
3798  DO WHILE(exc)
3799  exc=.false.
3800  DO l=2,setdim
3801  IF( set(l)<set(l-1) )THEN
3802  jj=set(l)
3803  set(l)=set(l-1)
3804  set(l-1)=jj
3805  exc=.true.
3806  END IF
3807  END DO
3808  END DO
3809 !
3810 ! Extract submatrix of (the symmetric part of) input matrix and store it in W
3811 ! Store in AGe the mean row and column sum of input matrix for related indices
3812 ! (this is also the row sum of A_G)
3813  DO j=1,setdim
3814  jj=set(j)
3815  sig(j)=si1(jj)
3816  IF (zerors) THEN
3817  w(j,j)=sig(j)
3818  age(j)=0.0d0
3819  ELSE
3820  w(j,j)=a1(idiag1(jj))
3821  age(j)=w(j,j)-sig(j)
3822  END IF
3823  l2=j+1
3824  DO l=l2,setdim
3825  w(j,l)=0.0d0
3826  w(l,j)=0.0d0
3827  END DO
3828  k2=ia1(jj+1)-1
3829  DO k=idiag1(jj)+1,k2
3830  DO l=l2,setdim
3831  m=set(l)
3832  IF(ja1(k)==m)THEN
3833  alpha=a1(k)
3834  w(j,l)=alpha
3835  w(l,j)=alpha
3836  EXIT
3837  END IF
3838  END DO
3839  END DO
3840  END DO
3841 !
3842  DO j=1,setdim
3843  ! Set sig(j) equal to minus the sum of connections that are
3844  ! external to the tentative aggregate; that is, add the internal
3845  ! offdiagonal entries to current sig(j)
3846  DO k=1,setdim
3847  IF (j.ne.k) THEN
3848  sig(j)=sig(j)+w(j,k)
3849  END IF
3850  ENDDO
3851  ! Heuristic if sig(j) is negative: set Sigma_G(j)=abs(sig(j))
3852  ! Hence need to correct the row sum of A_G stored in AGe
3853  IF (sig(j) < 0.0d0) age(j)=age(j)+2*sig(j)
3854  !
3855  ! Set W = A_G ; offidagonal entries OK, need only to correct
3856  ! the diagonal
3857  !
3858  w(j,j)=w(j,j)-abs(sig(j))
3859  !
3860  ! kap 1
3861  ! Now set W = ------ A_G - ----- M_G
3862  ! kap-1 kap-1
3863  !
3864  ! Then W contains the matrix that need to be nonnegative definite to
3865  ! accept the aggregate according to the theory in [2],
3866  ! up to the rank one correction that is added below
3867  !
3868  ! Offdiagonal entrie of W alraedy OK [rule: offdiag(A_G)=ofdiag(MG)];
3869  ! for the diagonal ones, note that M_G(j,j)=A_G(j,j)+2*abs(sig(j)),
3870  ! hence the rule below with bndmum1m1=1/(kap-1)
3871  !
3872  tmp=2*abs(sig(j))
3873  w(j,j)=w(j,j)-bndmum1m1*tmp
3874  !
3875  ! Store M_G*1 = rowsum(A_G)+2*sig in v ; [ 1 = vector of all ones ];
3876  ! beta <- quadratic form (1^T * D_G * 1)
3877  ! alp is positive if and only if AGe(j) is zero for all j;
3878  ! that is, if and only if A_G has 1 in its null space
3879  v(j)=tmp+age(j)
3880  IF (j .eq. 1) THEN
3881  beta=v(j)
3882  alp=abs(age(j))
3883  ELSE
3884  beta=beta+v(j)
3885  alp=max(alp,abs(age(j)))
3886  END IF
3887  END DO
3888 !
3889 ! Eventually, add to W the rank one correction
3890 ! 1
3891 ! ------------------------- M_G * 1 * 1^T * M_G
3892 ! (kap-1)*(1^T * M_G * 1)
3893 !
3894  beta=bndmum1m1/beta
3895  DO j=1,setdim
3896  DO k=1,setdim
3897  w(j,k)=w(j,k)+beta*v(j)*v(k)
3898  END DO
3899  END DO
3900 !
3901 ! Now, the rule is to accept the tentative aggregate if and only if
3902 ! W(setdim,setdim) is nonnegative definite
3903 ! Two cases:
3904 ! alp == 0 , then W is singular, hence we check the positive
3905 ! definitenes of W(setdim-1,setdim-1)
3906 ! alp > 0 , then W is normally not singular and we require thus that
3907 ! it is positive definite
3908 !
3909  IF (alp.LT.repsmach*beta) THEN
3910  setdim1=setdim-1
3911  ELSE
3912  setdim1=setdim
3913  END IF
3914 !
3915 ! Perform a Choleky factorization of W(setdim1,setdim1) and check
3916 ! for negative pivots; code expended for small dimensions (usual case)
3917 !
3918  acc=.false.
3919 ! acc has just be defined to FALSE; hence RETURN statement means rejection
3920 !
3921  SELECT CASE (setdim1)
3922  CASE (1)
3923  GOTO 11
3924  CASE (2)
3925  GOTO 12
3926  CASE (3)
3927  GOTO 13
3928  CASE (4)
3929  GOTO 14
3930  CASE (5)
3931  GOTO 15
3932  CASE (6)
3933  GOTO 16
3934  CASE (7)
3935  GOTO 17
3936  CASE (8)
3937  GOTO 18
3938  CASE DEFAULT
3939  CALL dpotrf('U',setdim1,w,mm,info)
3940  IF (info .NE. 0) RETURN
3941  GOTO 10
3942  END SELECT
3943 18 CONTINUE
3944  IF (w(8,8) .LE. 0.0d0) RETURN
3945  w(7,7) = w(7,7) - (w(7,8)/w(8,8)) * w(7,8)
3946  t = w(6,8)/w(8,8)
3947  w(6,7) = w(6,7) - t * w(7,8)
3948  w(6,6) = w(6,6) - t * w(6,8)
3949  t = w(5,8)/w(8,8)
3950  w(5,7) = w(5,7) - t * w(7,8)
3951  w(5,6) = w(5,6) - t * w(6,8)
3952  w(5,5) = w(5,5) - t * w(5,8)
3953  t = w(4,8)/w(8,8)
3954  w(4,7) = w(4,7) - t * w(7,8)
3955  w(4,6) = w(4,6) - t * w(6,8)
3956  w(4,5) = w(4,5) - t * w(5,8)
3957  w(4,4) = w(4,4) - t * w(4,8)
3958  t = w(3,8)/w(8,8)
3959  w(3,7) = w(3,7) - t * w(7,8)
3960  w(3,6) = w(3,6) - t * w(6,8)
3961  w(3,5) = w(3,5) - t * w(5,8)
3962  w(3,4) = w(3,4) - t * w(4,8)
3963  w(3,3) = w(3,3) - t * w(3,8)
3964  t = w(2,8)/w(8,8)
3965  w(2,7) = w(2,7) - t * w(7,8)
3966  w(2,6) = w(2,6) - t * w(6,8)
3967  w(2,5) = w(2,5) - t * w(5,8)
3968  w(2,4) = w(2,4) - t * w(4,8)
3969  w(2,3) = w(2,3) - t * w(3,8)
3970  w(2,2) = w(2,2) - t * w(2,8)
3971  t = w(1,8)/w(8,8)
3972  w(1,7) = w(1,7) - t * w(7,8)
3973  w(1,6) = w(1,6) - t * w(6,8)
3974  w(1,5) = w(1,5) - t * w(5,8)
3975  w(1,4) = w(1,4) - t * w(4,8)
3976  w(1,3) = w(1,3) - t * w(3,8)
3977  w(1,2) = w(1,2) - t * w(2,8)
3978  w(1,1) = w(1,1) - t * w(1,8)
3979 17 CONTINUE
3980  IF (w(7,7) .LE. 0.0d0) RETURN
3981  w(6,6) = w(6,6) - (w(6,7)/w(7,7)) * w(6,7)
3982  t = w(5,7)/w(7,7)
3983  w(5,6) = w(5,6) - t * w(6,7)
3984  w(5,5) = w(5,5) - t * w(5,7)
3985  t = w(4,7)/w(7,7)
3986  w(4,6) = w(4,6) - t * w(6,7)
3987  w(4,5) = w(4,5) - t * w(5,7)
3988  w(4,4) = w(4,4) - t * w(4,7)
3989  t = w(3,7)/w(7,7)
3990  w(3,6) = w(3,6) - t * w(6,7)
3991  w(3,5) = w(3,5) - t * w(5,7)
3992  w(3,4) = w(3,4) - t * w(4,7)
3993  w(3,3) = w(3,3) - t * w(3,7)
3994  t = w(2,7)/w(7,7)
3995  w(2,6) = w(2,6) - t * w(6,7)
3996  w(2,5) = w(2,5) - t * w(5,7)
3997  w(2,4) = w(2,4) - t * w(4,7)
3998  w(2,3) = w(2,3) - t * w(3,7)
3999  w(2,2) = w(2,2) - t * w(2,7)
4000  t = w(1,7)/w(7,7)
4001  w(1,6) = w(1,6) - t * w(6,7)
4002  w(1,5) = w(1,5) - t * w(5,7)
4003  w(1,4) = w(1,4) - t * w(4,7)
4004  w(1,3) = w(1,3) - t * w(3,7)
4005  w(1,2) = w(1,2) - t * w(2,7)
4006  w(1,1) = w(1,1) - t * w(1,7)
4007 16 CONTINUE
4008  IF (w(6,6) .LE. 0.0d0) RETURN
4009  w(5,5) = w(5,5) - (w(5,6)/w(6,6)) * w(5,6)
4010  t = w(4,6)/w(6,6)
4011  w(4,5) = w(4,5) - t * w(5,6)
4012  w(4,4) = w(4,4) - t * w(4,6)
4013  t = w(3,6)/w(6,6)
4014  w(3,5) = w(3,5) - t * w(5,6)
4015  w(3,4) = w(3,4) - t * w(4,6)
4016  w(3,3) = w(3,3) - t * w(3,6)
4017  t = w(2,6)/w(6,6)
4018  w(2,5) = w(2,5) - t * w(5,6)
4019  w(2,4) = w(2,4) - t * w(4,6)
4020  w(2,3) = w(2,3) - t * w(3,6)
4021  w(2,2) = w(2,2) - t * w(2,6)
4022  t = w(1,6)/w(6,6)
4023  w(1,5) = w(1,5) - t * w(5,6)
4024  w(1,4) = w(1,4) - t * w(4,6)
4025  w(1,3) = w(1,3) - t * w(3,6)
4026  w(1,2) = w(1,2) - t * w(2,6)
4027  w(1,1) = w(1,1) - t * w(1,6)
4028 15 CONTINUE
4029  IF (w(5,5) .LE. 0.0d0) RETURN
4030  w(4,4) = w(4,4) - (w(4,5)/w(5,5)) * w(4,5)
4031  t = w(3,5)/w(5,5)
4032  w(3,4) = w(3,4) - t * w(4,5)
4033  w(3,3) = w(3,3) - t * w(3,5)
4034  t = w(2,5)/w(5,5)
4035  w(2,4) = w(2,4) - t * w(4,5)
4036  w(2,3) = w(2,3) - t * w(3,5)
4037  w(2,2) = w(2,2) - t * w(2,5)
4038  t = w(1,5)/w(5,5)
4039  w(1,4) = w(1,4) - t * w(4,5)
4040  w(1,3) = w(1,3) - t * w(3,5)
4041  w(1,2) = w(1,2) - t * w(2,5)
4042  w(1,1) = w(1,1) - t * w(1,5)
4043 14 CONTINUE
4044  IF (w(4,4) .LE. 0.0d0) RETURN
4045  w(3,3) = w(3,3) - (w(3,4)/w(4,4)) * w(3,4)
4046  t = w(2,4)/w(4,4)
4047  w(2,3) = w(2,3) - t * w(3,4)
4048  w(2,2) = w(2,2) - t * w(2,4)
4049  t = w(1,4)/w(4,4)
4050  w(1,3) = w(1,3) - t * w(3,4)
4051  w(1,2) = w(1,2) - t * w(2,4)
4052  w(1,1) = w(1,1) - t * w(1,4)
4053 13 CONTINUE
4054  IF (w(3,3) .LE. 0.0d0) RETURN
4055  w(2,2) = w(2,2) - (w(2,3)/w(3,3)) * w(2,3)
4056  t = w(1,3)/w(3,3)
4057  w(1,2) = w(1,2) - t * w(2,3)
4058  w(1,1) = w(1,1) - t * w(1,3)
4059 12 CONTINUE
4060  IF (w(2,2) .LE. 0.0d0) RETURN
4061  w(1,1) = w(1,1) - (w(1,2)/w(2,2)) * w(1,2)
4062 11 CONTINUE
4063  IF (w(1,1) .LE. 0.0d0) RETURN
4064 10 CONTINUE
4065 !
4066 ! all test passed: accept the aggregate
4067  acc=.true.
4068 !
4069  RETURN
4070  END SUBROUTINE dagmg_checktentagg_sf
4071  END SUBROUTINE dagmg_findpairs_sf
4072 !-----------------------------------------------------------------------
4073  SUBROUTINE dagmg_findpairs_gi(n,a,ja,ia,idiag,si,ind,lcg,nc &
4074  ,nddl,ldd,skipass &
4075  ,ipc )
4076 ! Performs pairwise aggregation according to Algorithms 4.2 and 4.3 in [2,3],
4077 ! with some heuristic enhancements to cover non diagonally dominant matrices.
4078 !
4079 ! Version: initial pairwise aggregation (next levels) for general matrices [3].
4080 !
4081  USE dagmg_mem
4082  IMPLICIT NONE
4083  INTEGER :: n,ja(*),ia(n+1),idiag(n),ind(n),lcg(2,*),nc
4084  REAL(kind(0.0d0)) :: a(*)
4085  REAL(kind(0.0d0)) :: si(n)
4086  INTEGER :: ldd(nddl),nddl
4087  LOGICAL :: skipass
4088  INTEGER :: ipc(n)
4089 !
4090 ! INPUT: n,ja(*),ia(n+1),idiag(n): aggregated matrix from previous pass
4091 ! (CSR format with partial ordering of each row: lower
4092 ! triangular part, next diag., and eventually upper triagular part;
4093 ! idiag point to the diagonal element)
4094 !
4095 ! si(n) : vector s or \tile{s} in Algorithm 4.2 or 4.3 of [2,3]
4096 !
4097 ! ind(n): on input, entries in ind should be nonnegative, and equal to
4098 ! zero only at nodes that are to be kept outside the aggregation
4099 !
4100 ! nddl : number of nodes i such that ind(i)=0
4101 !
4102 ! skipass: if true, pairwise aggregation is skipped; i.e.,
4103 ! the coarsening only removes nodes i such that ind(i)==0
4104 !
4105 ! ipc: ipc(i) = 0 means that node i has to be transferred unaggregated
4106 ! to the coarse grid, and lcg(2,j) set to 0
4107 ! for the corresponding coarse grid node j
4108 !
4109 ! zerors: if true, diagonal entries are not taken into account;
4110 ! instead, the matrix is treated as it would have mean
4111 ! column and row sum equal to zero
4112 !
4113 ! OUTPUT: nc : number of pairs formed
4114 !
4115 ! lcg(2,nc): list of aggregates;
4116 ! lcg(2,i)=0 : lcg(1,i) is a singleton that was
4117 ! forced to be transferred unaggregated
4118 ! lcg(2,i)=-1: lcg(1,i) is a singleton because no valid
4119 ! pair has been found
4120 ! lcg(2,i)>0 : (lcg(1,i),lcg(2,i)) form the ith pair
4121 !
4122 ! ind(n): ind(i)=j means that i belongs to aggregate j
4123 ! ind(i)=0 means that i does not belong to any pair
4124 ! and is listed to ldd
4125 ! (detected from ind(i)=0 on input)
4126 ! ldd(nddl): list of nodes such that ind(i)=0 on input & output
4127 !
4128 !----------------
4129 ! Local variables
4130 !
4131  REAL(kind(0.0d0)) :: val,vals,valp,tent,rsi,rsj,epsr
4132  REAL(kind(0.0d0)) :: del1,del2,eta1,eta2,del12,sig1,sig2,rnd,vald
4133  INTEGER :: mindg,i,j,jj,k,kk,jk,isel,dg,ipair,nmark,idd
4134  INTEGER :: i1,i2,i3,ijs,ntentleft,npc,itrs,ijtent,j2
4135  LOGICAL :: acc,ifirst
4136  REAL(kind(0.0d0)) :: kaptg,bndmum1m1,dbndmum1,umdbndmum1
4137 !
4138  ifirst=.true.
4139  1 CONTINUE
4140 !
4141  kaptg=imult*kaptg_dampjac
4142  bndmum1m1=1.0d0/(kaptg-1.0d0)
4143  dbndmum1=2*1.0d0/kaptg
4144  umdbndmum1=1.0d0-dbndmum1
4145  idd=0
4146  nmark=0
4147  nc=0
4148  ijs=1
4149  npc=0
4150  DO WHILE (nmark.LT.n)
4151  isel=ijs
4152  ijs=ijs+1
4153  !
4154  ! Node isel has been selected
4155  !
4156  ! First check if isel has to be kept outside aggregation
4157  IF (ind(isel) .EQ. 0) THEN
4158  idd=idd+1
4159  nmark=nmark+1
4160  ldd(idd)=isel
4161  cycle
4162  END IF
4163  !
4164  ! Check if isel has already been processed
4165  IF (ind(isel) .GE. 0) cycle
4166  !
4167  ! A new aggregate is formed that contains isel
4168  nc = nc + 1
4169  lcg(1,nc) = isel
4170  nmark = nmark+1
4171  ind(isel) = nc
4172  ipair = 0
4173  !
4174  ! Check if isel has to be transferred unaggregated
4175  IF (ipc(isel) .EQ. 0) THEN
4176  lcg(2,nc) = 0
4177  npc=npc+1
4178  cycle
4179  END IF
4180  ! Skip pairwise aggregation is skipass==true
4181  IF (skipass) THEN
4182  lcg(2,nc) = -1
4183  cycle
4184  END IF
4185  !
4186  ! Try to form a pair with isel: follow list of neighbors
4187  !
4188  i2=ia(isel+1)-1
4189  DO i = ia(isel),i2
4190  ! CYCLE means: reject this neighbor, proceed with next one
4191  IF (i .EQ. idiag(isel)) cycle
4192  j = ja(i)
4193  ! check if j is available to form a pair
4194  IF(ipc(j).EQ.0 .OR. ind(j).GE.0) cycle
4195  ! search for the corresponding entry in transposed matrix
4196  kk=0
4197  IF (i .LT. idiag(isel)) THEN
4198  j2=ia(j+1)-1
4199  DO jk=idiag(j)+1,j2
4200  IF (ja(jk) .EQ. isel) THEN
4201  kk=jk
4202  EXIT
4203  END IF
4204  END DO
4205  ELSE
4206  DO jk=ia(j),idiag(j)-1
4207  IF (ja(jk) .EQ. isel) THEN
4208  kk=jk
4209  EXIT
4210  END IF
4211  END DO
4212  ENDIF
4213  vals=-a(i)/2
4214  IF(kk .NE. 0) vals=vals-a(kk)/2
4215  IF (zerors) THEN
4216  rsi=0.0d0
4217  rsj=0.0d0
4218  eta1=2*si(isel)
4219  eta2=2*si(j)
4220  ELSE
4221  rsi=-si(isel)+a(idiag(isel))
4222  rsj=-si(j)+a(idiag(j))
4223  eta1=2*a(idiag(isel))
4224  eta2=2*a(idiag(j))
4225  END IF
4226  sig1=si(isel)-vals
4227  sig2=si(j)-vals
4228  !
4229  ! CYCLE instructions below: pair rejected because A_G is not
4230  ! nonnegative definite
4231  !
4232  ! Heuristic if sigj is negative: set Sigma_G(j)=abs(sigj) (j=1,2)
4233  IF (sig1 > 0.0d0) THEN
4234  del1=rsi
4235  ELSE
4236  del1=rsi+2*sig1
4237  END IF
4238  IF (sig2 > 0.0d0) THEN
4239  del2=rsj
4240  ELSE
4241  del2=rsj+2*sig2
4242  END IF
4243  IF (vals > 0.0d0) THEN
4244  epsr=repsmach*vals
4245  IF (abs(del1) < epsr .AND. abs(del2) < epsr) THEN
4246  valp=(eta1*eta2)/(vals*(eta1+eta2))
4247  ELSE IF (abs(del1) < epsr) THEN
4248  IF (del2 < -epsr) cycle
4249  valp=(eta1*eta2)/(vals*(eta1+eta2))
4250  ELSE IF (abs(del2) < epsr) THEN
4251  IF (del1 < -epsr) cycle
4252  valp=(eta1*eta2)/(vals*(eta1+eta2))
4253  ELSE
4254  del12=del1+del2
4255  IF (del12 < -epsr) cycle
4256  valp=vals+del1*del2/del12
4257  IF (valp < 0.0d0) cycle
4258  valp=((eta1*eta2)/(eta1+eta2))/valp
4259  END IF
4260  ELSE
4261  IF (del1 .LE. 0.0d0 .OR. del2 .LE. 0.0d0) cycle
4262  valp=vals+del1*del2/(del1+del2)
4263  IF (valp < 0.0d0) cycle
4264  vals=(eta1*eta2)/(eta1+eta2)
4265  valp=vals/valp
4266  END IF
4267  ! main threshold test
4268  IF (valp > kaptg) cycle
4269  !
4270  ! A_G is nonneagtive definite and passed the corresponding
4271  ! "quality" threshold test: (isel,j) is
4272  ! an acceptable pair; check if it is the best one
4273  !
4274  tent=valp
4275  IF (ipair.EQ.0) GOTO 10
4276  IF (16*(tent-val).LT.-1) GOTO 9
4277  IF (16*(tent-val).LT.1 .AND. j.LT.ipair) GOTO 9
4278  cycle
4279 9 CONTINUE
4280 10 CONTINUE
4281  ipair = j
4282  val = tent
4283  ENDDO
4284  !
4285  IF (ipair .EQ. 0) THEN
4286  ! no valid pair found: isel left alone in aggregate nc
4287  lcg(2,nc) = -1
4288  ELSE
4289  ! pair nc is formed with isel and ipair
4290  lcg(2,nc) = ipair
4291  ind(ipair) = nc
4292  nmark = nmark+1
4293  END IF
4294  ENDDO
4295 !
4296 ! Restart aggregation with kaptg twice as large in case of
4297 ! slow coarsening; allowed only once per level
4298 ! (new parameters apply at further levels)
4299 !
4300  IF (ifirst .AND. 3*nc.GE.2*n .AND. ngl(1).GT.maxcoarseslowt) THEN
4301  imult=2*imult
4302  IF (wfo) THEN
4303  WRITE (iout,901) irank,imult*kaptg_dampjac
4304  END IF
4305 901 FORMAT(i3, &
4306  '* Coarsening too slow: quality threshold increased to',f6.2)
4307  ind(1:n)=-1
4308  DO i=1,nddl
4309  ind(ldd(i))=0
4310  END DO
4311  ifirst=.false.
4312  GOTO 1
4313  ENDIF
4314  RETURN
4315  END SUBROUTINE dagmg_findpairs_gi
4316 !-----------------------------------------------------------------------
4317  SUBROUTINE dagmg_findpairs_si(n,a,ja,ia,idiag,si,ind,lcg,nc &
4318  ,nddl,ldd,skipass &
4319  ,ipc )
4320 ! Performs pairwise aggregation according to Algorithms 4.2 and 4.3 in [2,3],
4321 ! with some heuristic enhancements to cover non diagonally dominant matrices.
4322 !
4323 ! Version: initial pairwise aggregation (next levels) for symm. matrices [2].
4324 !
4325  USE dagmg_mem
4326  IMPLICIT NONE
4327  INTEGER :: n,ja(*),ia(n+1),idiag(n),ind(n),lcg(2,*),nc
4328  REAL(kind(0.0d0)) :: a(*)
4329  REAL(kind(0.0d0)) :: si(n)
4330  INTEGER :: ldd(nddl),nddl
4331  LOGICAL :: skipass
4332  INTEGER :: ipc(n)
4333 !
4334 ! INPUT: n,ja(*),ia(n+1),idiag(n): aggregated matrix from previous pass
4335 ! (CSR format with partial ordering of each row: lower
4336 ! triangular part, next diag., and eventually upper triagular part;
4337 ! idiag point to the diagonal element)
4338 !
4339 ! si(n) : vector s or \tile{s} in Algorithm 4.2 or 4.3 of [2,3]
4340 !
4341 ! ind(n): on input, entries in ind should be nonnegative, and equal to
4342 ! zero only at nodes that are to be kept outside the aggregation
4343 !
4344 ! nddl : number of nodes i such that ind(i)=0
4345 !
4346 ! skipass: if true, pairwise aggregation is skipped; i.e.,
4347 ! the coarsening only removes nodes i such that ind(i)==0
4348 !
4349 ! ipc: ipc(i) = 0 means that node i has to be transferred unaggregated
4350 ! to the coarse grid, and lcg(2,j) set to 0
4351 ! for the corresponding coarse grid node j
4352 !
4353 ! zerors: if true, diagonal entries are not taken into account;
4354 ! instead, the matrix is treated as it would have mean
4355 ! column and row sum equal to zero
4356 !
4357 ! OUTPUT: nc : number of pairs formed
4358 !
4359 ! lcg(2,nc): list of aggregates;
4360 ! lcg(2,i)=0 : lcg(1,i) is a singleton that was
4361 ! forced to be transferred unaggregated
4362 ! lcg(2,i)=-1: lcg(1,i) is a singleton because no valid
4363 ! pair has been found
4364 ! lcg(2,i)>0 : (lcg(1,i),lcg(2,i)) form the ith pair
4365 !
4366 ! ind(n): ind(i)=j means that i belongs to aggregate j
4367 ! ind(i)=0 means that i does not belong to any pair
4368 ! and is listed to ldd
4369 ! (detected from ind(i)=0 on input)
4370 ! ldd(nddl): list of nodes such that ind(i)=0 on input & output
4371 !
4372 !----------------
4373 ! Local variables
4374 !
4375  REAL(kind(0.0d0)) :: val,vals,valp,tent,rsi,rsj,epsr
4376  REAL(kind(0.0d0)) :: del1,del2,eta1,eta2,del12,sig1,sig2,rnd,vald
4377  INTEGER :: mindg,i,j,jj,k,kk,jk,isel,dg,ipair,nmark,idd
4378  INTEGER :: i1,i2,i3,ijs,ntentleft,npc,itrs,ijtent,j2
4379  LOGICAL :: acc,ifirst
4380  REAL(kind(0.0d0)) :: kaptg,bndmum1m1,dbndmum1,umdbndmum1
4381 !
4382  ifirst=.true.
4383  1 CONTINUE
4384 !
4385  kaptg=imult*kaptg_blocdia
4386  bndmum1m1=1.0d0/(kaptg-1.0d0)
4387  dbndmum1=2*1.0d0/kaptg
4388  umdbndmum1=1.0d0-dbndmum1
4389  idd=0
4390  nmark=0
4391  nc=0
4392  ijs=1
4393  npc=0
4394  DO WHILE (nmark.LT.n)
4395  isel=ijs
4396  ijs=ijs+1
4397  !
4398  ! Node isel has been selected
4399  !
4400  ! First check if isel has to be kept outside aggregation
4401  IF (ind(isel) .EQ. 0) THEN
4402  idd=idd+1
4403  nmark=nmark+1
4404  ldd(idd)=isel
4405  cycle
4406  END IF
4407  !
4408  ! Check if isel has already been processed
4409  IF (ind(isel) .GE. 0) cycle
4410  !
4411  ! A new aggregate is formed that contains isel
4412  nc = nc + 1
4413  lcg(1,nc) = isel
4414  nmark = nmark+1
4415  ind(isel) = nc
4416  ipair = 0
4417  !
4418  ! Check if isel has to be transferred unaggregated
4419  IF (ipc(isel) .EQ. 0) THEN
4420  lcg(2,nc) = 0
4421  npc=npc+1
4422  cycle
4423  END IF
4424  ! Skip pairwise aggregation is skipass==true
4425  IF (skipass) THEN
4426  lcg(2,nc) = -1
4427  cycle
4428  END IF
4429  !
4430  ! Try to form a pair with isel: follow list of neighbors
4431  !
4432  i2=ia(isel+1)-1
4433  DO i = ia(isel),i2
4434  ! CYCLE means: reject this neighbor, proceed with next one
4435  IF (i .EQ. idiag(isel)) cycle
4436  j = ja(i)
4437  ! check if j is available to form a pair
4438  IF(ipc(j).EQ.0 .OR. ind(j).GE.0) cycle
4439  vals=-a(i)
4440  IF (zerors) THEN
4441  rsi=0.0d0
4442  rsj=0.0d0
4443  ELSE
4444  rsi=-si(isel)+a(idiag(isel))
4445  rsj=-si(j)+a(idiag(j))
4446  END IF
4447  sig1=si(isel)-vals
4448  sig2=si(j)-vals
4449  !
4450  ! CYCLE instructions below: pair rejected because A_G is not
4451  ! nonnegative definite
4452  !
4453  ! Heuristic if sigj is negative: set Sigma_G(j)=abs(sigj) (j=1,2)
4454  IF (sig1 > 0.0d0) THEN
4455  del1=rsi
4456  eta1=rsi+2*sig1
4457  ELSE
4458  del1=rsi+2*sig1
4459  eta1=rsi
4460  END IF
4461  IF (eta1 < 0.0d0) cycle
4462  IF (sig2 > 0.0d0) THEN
4463  del2=rsj
4464  eta2=rsj+2*sig2
4465  ELSE
4466  del2=rsj+2*sig2
4467  eta2=rsj
4468  END IF
4469  IF (eta2 < 0.0d0) cycle
4470  IF (vals > 0.0d0) THEN
4471  epsr=repsmach*vals
4472  IF (abs(del1) < epsr .AND. abs(del2) < epsr) THEN
4473  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
4474  ELSE IF (abs(del1) < epsr) THEN
4475  IF (del2 < -epsr) cycle
4476  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
4477  ELSE IF (abs(del2) < epsr) THEN
4478  IF (del1 < -epsr) cycle
4479  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
4480  ELSE
4481  del12=del1+del2
4482  IF (del12 < -epsr) cycle
4483  valp=vals+del1*del2/del12
4484  IF (valp < 0.0d0) cycle
4485  valp=(vals+(eta1*eta2)/(eta1+eta2))/valp
4486  END IF
4487  ELSE
4488  IF (del1 .LE. 0.0d0 .OR. del2 .LE. 0.0d0) cycle
4489  valp=vals+del1*del2/(del1+del2)
4490  IF (valp < 0.0d0) cycle
4491  vals=vals+(eta1*eta2)/(eta1+eta2)
4492  IF (vals < 0.0d0) cycle
4493  valp=vals/valp
4494  END IF
4495  ! main threshold test
4496  IF (valp > kaptg) cycle
4497  !
4498  ! A_G is nonneagtive definite and passed the corresponding
4499  ! "quality" threshold test: (isel,j) is
4500  ! an acceptable pair; check if it is the best one
4501  !
4502  tent=valp
4503  IF (ipair.EQ.0) GOTO 10
4504  IF (16*(tent-val).LT.-1) GOTO 9
4505  IF (16*(tent-val).LT.1 .AND. j.LT.ipair) GOTO 9
4506  cycle
4507 9 CONTINUE
4508 10 CONTINUE
4509  ipair = j
4510  val = tent
4511  ENDDO
4512  !
4513  IF (ipair .EQ. 0) THEN
4514  ! no valid pair found: isel left alone in aggregate nc
4515  lcg(2,nc) = -1
4516  ELSE
4517  ! pair nc is formed with isel and ipair
4518  lcg(2,nc) = ipair
4519  ind(ipair) = nc
4520  nmark = nmark+1
4521  END IF
4522  ENDDO
4523 !
4524 ! Restart aggregation with kaptg twice as large in case of
4525 ! slow coarsening; allowed only once per level
4526 ! (new parameters apply at further levels)
4527 !
4528  IF (ifirst .AND. 3*nc.GE.2*n .AND. ngl(1).GT.maxcoarseslowt) THEN
4529  imult=2*imult
4530  IF (wfo) THEN
4531  WRITE (iout,901) irank,imult*kaptg_blocdia
4532  END IF
4533 901 FORMAT(i3, &
4534  '* Coarsening too slow: quality threshold increased to',f6.2)
4535  ind(1:n)=-1
4536  DO i=1,nddl
4537  ind(ldd(i))=0
4538  END DO
4539  ifirst=.false.
4540  GOTO 1
4541  ENDIF
4542  RETURN
4543  END SUBROUTINE dagmg_findpairs_si
4544 !-----------------------------------------------------------------------
4545  SUBROUTINE dagmg_findpairs_gi1(n,a,ja,ia,idiag,si,ind,lcg,nc &
4546  ,nddl,ldd,skipass &
4547  ,riperm,iperm )
4548 ! Performs pairwise aggregation according to Algorithms 4.2 and 4.3 in [2,3],
4549 ! with some heuristic enhancements to cover non diagonally dominant matrices.
4550 !
4551 ! Version: initial pairwise aggregation (top level) for general matrices [3].
4552 !
4553  USE dagmg_mem
4554  IMPLICIT NONE
4555  INTEGER :: n,ja(*),ia(n+1),idiag(n),ind(n),lcg(2,*),nc
4556  REAL(kind(0.0d0)) :: a(*)
4557  REAL(kind(0.0d0)) :: si(n)
4558  INTEGER :: ldd(nddl),nddl
4559  LOGICAL :: skipass
4560  INTEGER :: iperm(n),riperm(n)
4561 !
4562 ! INPUT: n,ja(*),ia(n+1),idiag(n): aggregated matrix from previous pass
4563 ! (CSR format with partial ordering of each row: lower
4564 ! triangular part, next diag., and eventually upper triagular part;
4565 ! idiag point to the diagonal element)
4566 !
4567 ! si(n) : vector s or \tile{s} in Algorithm 4.2 or 4.3 of [2,3]
4568 !
4569 ! ind(n): on input, entries in ind should be nonnegative, and equal to
4570 ! zero only at nodes that are to be kept outside the aggregation
4571 !
4572 ! nddl : number of nodes i such that ind(i)=0
4573 !
4574 ! skipass: if true, pairwise aggregation is skipped; i.e.,
4575 ! the coarsening only removes nodes i such that ind(i)==0
4576 !
4577 ! iperm(n),riperm(n): CMK and reverse CMK permutation, respectively
4578 ! iperm(i) = 0 means that node i has to be transferred unaggregated
4579 ! to the coarse grid, and lcg(2,j) set to 0
4580 ! for the corresponding coarse grid node j
4581 !
4582 ! zerors: if true, diagonal entries are not taken into account;
4583 ! instead, the matrix is treated as it would have mean
4584 ! column and row sum equal to zero
4585 !
4586 ! OUTPUT: nc : number of pairs formed
4587 !
4588 ! lcg(2,nc): list of aggregates;
4589 ! lcg(2,i)=0 : lcg(1,i) is a singleton that was
4590 ! forced to be transferred unaggregated
4591 ! lcg(2,i)=-1: lcg(1,i) is a singleton because no valid
4592 ! pair has been found
4593 ! lcg(2,i)>0 : (lcg(1,i),lcg(2,i)) form the ith pair
4594 !
4595 ! ind(n): ind(i)=j means that i belongs to aggregate j
4596 ! ind(i)=0 means that i does not belong to any pair
4597 ! and is listed to ldd
4598 ! (detected from ind(i)=0 on input)
4599 ! ldd(nddl): list of nodes such that ind(i)=0 on input & output
4600 !
4601 !----------------
4602 ! Local variables
4603 !
4604  REAL(kind(0.0d0)) :: val,vals,valp,tent,rsi,rsj,epsr
4605  REAL(kind(0.0d0)) :: del1,del2,eta1,eta2,del12,sig1,sig2,rnd,vald
4606  INTEGER :: mindg,i,j,jj,k,kk,jk,isel,dg,ipair,nmark,idd
4607  INTEGER :: i1,i2,i3,ijs,ntentleft,npc,itrs,ijtent,j2
4608  LOGICAL :: acc,ifirst
4609  REAL(kind(0.0d0)) :: kaptg,bndmum1m1,dbndmum1,umdbndmum1
4610 !
4611  ifirst=.true.
4612  1 CONTINUE
4613 !
4614  kaptg=imult*kaptg_dampjac
4615  bndmum1m1=1.0d0/(kaptg-1.0d0)
4616  dbndmum1=2*1.0d0/kaptg
4617  umdbndmum1=1.0d0-dbndmum1
4618  idd=0
4619  nmark=0
4620  nc=0
4621  ijs=1
4622  npc=0
4623  DO WHILE (nmark.LT.n)
4624  isel=ijs
4625  isel=riperm(ijs)
4626  ijs=ijs+1
4627  !
4628  ! Node isel has been selected
4629  !
4630  ! First check if isel has to be kept outside aggregation
4631  IF (ind(isel) .EQ. 0) THEN
4632  idd=idd+1
4633  nmark=nmark+1
4634  ldd(idd)=isel
4635  cycle
4636  END IF
4637  !
4638  ! Check if isel has already been processed
4639  IF (ind(isel) .GE. 0) cycle
4640  !
4641  ! A new aggregate is formed that contains isel
4642  nc = nc + 1
4643  lcg(1,nc) = isel
4644  nmark = nmark+1
4645  ind(isel) = nc
4646  ipair = 0
4647  !
4648  ! Check if isel has to be transferred unaggregated
4649  IF (iperm(isel) .EQ. 0) THEN
4650  lcg(2,nc) = 0
4651  npc=npc+1
4652  cycle
4653  END IF
4654  ! Skip pairwise aggregation is skipass==true
4655  IF (skipass) THEN
4656  lcg(2,nc) = -1
4657  cycle
4658  END IF
4659  !
4660  ! Try to form a pair with isel: follow list of neighbors
4661  !
4662  i2=ia(isel+1)-1
4663  DO i = ia(isel),i2
4664  ! CYCLE means: reject this neighbor, proceed with next one
4665  IF (i .EQ. idiag(isel)) cycle
4666  j = ja(i)
4667  ! check if j is available to form a pair
4668  IF(iperm(j).EQ.0 .OR. ind(j).GE.0) cycle
4669  ! search for the corresponding entry in transposed matrix
4670  kk=0
4671  IF (i .LT. idiag(isel)) THEN
4672  j2=ia(j+1)-1
4673  DO jk=idiag(j)+1,j2
4674  IF (ja(jk) .EQ. isel) THEN
4675  kk=jk
4676  EXIT
4677  END IF
4678  END DO
4679  ELSE
4680  DO jk=ia(j),idiag(j)-1
4681  IF (ja(jk) .EQ. isel) THEN
4682  kk=jk
4683  EXIT
4684  END IF
4685  END DO
4686  ENDIF
4687  vals=-a(i)/2
4688  IF(kk .NE. 0) vals=vals-a(kk)/2
4689  IF (zerors) THEN
4690  rsi=0.0d0
4691  rsj=0.0d0
4692  eta1=2*si(isel)
4693  eta2=2*si(j)
4694  ELSE
4695  rsi=-si(isel)+a(idiag(isel))
4696  rsj=-si(j)+a(idiag(j))
4697  eta1=2*a(idiag(isel))
4698  eta2=2*a(idiag(j))
4699  END IF
4700  sig1=si(isel)-vals
4701  sig2=si(j)-vals
4702  !
4703  ! CYCLE instructions below: pair rejected because A_G is not
4704  ! nonnegative definite
4705  !
4706  ! Heuristic if sigj is negative: set Sigma_G(j)=abs(sigj) (j=1,2)
4707  IF (sig1 > 0.0d0) THEN
4708  del1=rsi
4709  ELSE
4710  del1=rsi+2*sig1
4711  END IF
4712  IF (sig2 > 0.0d0) THEN
4713  del2=rsj
4714  ELSE
4715  del2=rsj+2*sig2
4716  END IF
4717  IF (vals > 0.0d0) THEN
4718  epsr=repsmach*vals
4719  IF (abs(del1) < epsr .AND. abs(del2) < epsr) THEN
4720  valp=(eta1*eta2)/(vals*(eta1+eta2))
4721  ELSE IF (abs(del1) < epsr) THEN
4722  IF (del2 < -epsr) cycle
4723  valp=(eta1*eta2)/(vals*(eta1+eta2))
4724  ELSE IF (abs(del2) < epsr) THEN
4725  IF (del1 < -epsr) cycle
4726  valp=(eta1*eta2)/(vals*(eta1+eta2))
4727  ELSE
4728  del12=del1+del2
4729  IF (del12 < -epsr) cycle
4730  valp=vals+del1*del2/del12
4731  IF (valp < 0.0d0) cycle
4732  valp=((eta1*eta2)/(eta1+eta2))/valp
4733  END IF
4734  ELSE
4735  IF (del1 .LE. 0.0d0 .OR. del2 .LE. 0.0d0) cycle
4736  valp=vals+del1*del2/(del1+del2)
4737  IF (valp < 0.0d0) cycle
4738  vals=(eta1*eta2)/(eta1+eta2)
4739  valp=vals/valp
4740  END IF
4741  ! main threshold test
4742  IF (valp > kaptg) cycle
4743  !
4744  ! A_G is nonneagtive definite and passed the corresponding
4745  ! "quality" threshold test: (isel,j) is
4746  ! an acceptable pair; check if it is the best one
4747  !
4748  tent=valp
4749  IF (ipair.EQ.0) GOTO 10
4750  IF (16*(tent-val).LT.-1) GOTO 9
4751  IF (16*(tent-val).LT.1 .AND. iperm(j).LT.iperm(ipair)) GOTO 9
4752  cycle
4753 9 CONTINUE
4754 10 CONTINUE
4755  ipair = j
4756  val = tent
4757  ENDDO
4758  !
4759  IF (ipair .EQ. 0) THEN
4760  ! no valid pair found: isel left alone in aggregate nc
4761  lcg(2,nc) = -1
4762  ELSE
4763  ! pair nc is formed with isel and ipair
4764  lcg(2,nc) = ipair
4765  ind(ipair) = nc
4766  nmark = nmark+1
4767  END IF
4768  ENDDO
4769 !
4770 ! Restart aggregation with kaptg twice as large in case of
4771 ! slow coarsening; allowed only once per level
4772 ! (new parameters apply at further levels)
4773 !
4774  IF (ifirst .AND. 3*nc.GE.2*n .AND. ngl(1).GT.maxcoarseslowt) THEN
4775  imult=2*imult
4776  IF (wfo) THEN
4777  WRITE (iout,901) irank,imult*kaptg_dampjac
4778  END IF
4779 901 FORMAT(i3, &
4780  '* Coarsening too slow: quality threshold increased to',f6.2)
4781  ind(1:n)=-1
4782  DO i=1,nddl
4783  ind(ldd(i))=0
4784  END DO
4785  ifirst=.false.
4786  GOTO 1
4787  ENDIF
4788  RETURN
4789  END SUBROUTINE dagmg_findpairs_gi1
4790 !-----------------------------------------------------------------------
4791  SUBROUTINE dagmg_findpairs_si1(n,a,ja,ia,idiag,si,ind,lcg,nc &
4792  ,nddl,ldd,skipass &
4793  ,riperm,iperm )
4794 ! Performs pairwise aggregation according to Algorithms 4.2 and 4.3 in [2,3],
4795 ! with some heuristic enhancements to cover non diagonally dominant matrices.
4796 !
4797 ! Version: initial pairwise aggregation (top level) for symm. matrices [2].
4798 !
4799  USE dagmg_mem
4800  IMPLICIT NONE
4801  INTEGER :: n,ja(*),ia(n+1),idiag(n),ind(n),lcg(2,*),nc
4802  REAL(kind(0.0d0)) :: a(*)
4803  REAL(kind(0.0d0)) :: si(n)
4804  INTEGER :: ldd(nddl),nddl
4805  LOGICAL :: skipass
4806  INTEGER :: iperm(n),riperm(n)
4807 !
4808 ! INPUT: n,ja(*),ia(n+1),idiag(n): aggregated matrix from previous pass
4809 ! (CSR format with partial ordering of each row: lower
4810 ! triangular part, next diag., and eventually upper triagular part;
4811 ! idiag point to the diagonal element)
4812 !
4813 ! si(n) : vector s or \tile{s} in Algorithm 4.2 or 4.3 of [2,3]
4814 !
4815 ! ind(n): on input, entries in ind should be nonnegative, and equal to
4816 ! zero only at nodes that are to be kept outside the aggregation
4817 !
4818 ! nddl : number of nodes i such that ind(i)=0
4819 !
4820 ! skipass: if true, pairwise aggregation is skipped; i.e.,
4821 ! the coarsening only removes nodes i such that ind(i)==0
4822 !
4823 ! iperm(n),riperm(n): CMK and reverse CMK permutation, respectively
4824 ! iperm(i) = 0 means that node i has to be transferred unaggregated
4825 ! to the coarse grid, and lcg(2,j) set to 0
4826 ! for the corresponding coarse grid node j
4827 !
4828 ! zerors: if true, diagonal entries are not taken into account;
4829 ! instead, the matrix is treated as it would have mean
4830 ! column and row sum equal to zero
4831 !
4832 ! OUTPUT: nc : number of pairs formed
4833 !
4834 ! lcg(2,nc): list of aggregates;
4835 ! lcg(2,i)=0 : lcg(1,i) is a singleton that was
4836 ! forced to be transferred unaggregated
4837 ! lcg(2,i)=-1: lcg(1,i) is a singleton because no valid
4838 ! pair has been found
4839 ! lcg(2,i)>0 : (lcg(1,i),lcg(2,i)) form the ith pair
4840 !
4841 ! ind(n): ind(i)=j means that i belongs to aggregate j
4842 ! ind(i)=0 means that i does not belong to any pair
4843 ! and is listed to ldd
4844 ! (detected from ind(i)=0 on input)
4845 ! ldd(nddl): list of nodes such that ind(i)=0 on input & output
4846 !
4847 !----------------
4848 ! Local variables
4849 !
4850  REAL(kind(0.0d0)) :: val,vals,valp,tent,rsi,rsj,epsr
4851  REAL(kind(0.0d0)) :: del1,del2,eta1,eta2,del12,sig1,sig2,rnd,vald
4852  INTEGER :: mindg,i,j,jj,k,kk,jk,isel,dg,ipair,nmark,idd
4853  INTEGER :: i1,i2,i3,ijs,ntentleft,npc,itrs,ijtent,j2
4854  LOGICAL :: acc,ifirst
4855  REAL(kind(0.0d0)) :: kaptg,bndmum1m1,dbndmum1,umdbndmum1
4856 !
4857  ifirst=.true.
4858  1 CONTINUE
4859 !
4860  kaptg=imult*kaptg_blocdia
4861  bndmum1m1=1.0d0/(kaptg-1.0d0)
4862  dbndmum1=2*1.0d0/kaptg
4863  umdbndmum1=1.0d0-dbndmum1
4864  idd=0
4865  nmark=0
4866  nc=0
4867  ijs=1
4868  npc=0
4869  DO WHILE (nmark.LT.n)
4870  isel=ijs
4871  isel=riperm(ijs)
4872  ijs=ijs+1
4873  !
4874  ! Node isel has been selected
4875  !
4876  ! First check if isel has to be kept outside aggregation
4877  IF (ind(isel) .EQ. 0) THEN
4878  idd=idd+1
4879  nmark=nmark+1
4880  ldd(idd)=isel
4881  cycle
4882  END IF
4883  !
4884  ! Check if isel has already been processed
4885  IF (ind(isel) .GE. 0) cycle
4886  !
4887  ! A new aggregate is formed that contains isel
4888  nc = nc + 1
4889  lcg(1,nc) = isel
4890  nmark = nmark+1
4891  ind(isel) = nc
4892  ipair = 0
4893  !
4894  ! Check if isel has to be transferred unaggregated
4895  IF (iperm(isel) .EQ. 0) THEN
4896  lcg(2,nc) = 0
4897  npc=npc+1
4898  cycle
4899  END IF
4900  ! Skip pairwise aggregation is skipass==true
4901  IF (skipass) THEN
4902  lcg(2,nc) = -1
4903  cycle
4904  END IF
4905  !
4906  ! Try to form a pair with isel: follow list of neighbors
4907  !
4908  i2=ia(isel+1)-1
4909  DO i = ia(isel),i2
4910  ! CYCLE means: reject this neighbor, proceed with next one
4911  IF (i .EQ. idiag(isel)) cycle
4912  j = ja(i)
4913  ! check if j is available to form a pair
4914  IF(iperm(j).EQ.0 .OR. ind(j).GE.0) cycle
4915  vals=-a(i)
4916  IF (zerors) THEN
4917  rsi=0.0d0
4918  rsj=0.0d0
4919  ELSE
4920  rsi=-si(isel)+a(idiag(isel))
4921  rsj=-si(j)+a(idiag(j))
4922  END IF
4923  sig1=si(isel)-vals
4924  sig2=si(j)-vals
4925  !
4926  ! CYCLE instructions below: pair rejected because A_G is not
4927  ! nonnegative definite
4928  !
4929  ! Heuristic if sigj is negative: set Sigma_G(j)=abs(sigj) (j=1,2)
4930  IF (sig1 > 0.0d0) THEN
4931  del1=rsi
4932  eta1=rsi+2*sig1
4933  ELSE
4934  del1=rsi+2*sig1
4935  eta1=rsi
4936  END IF
4937  IF (eta1 < 0.0d0) cycle
4938  IF (sig2 > 0.0d0) THEN
4939  del2=rsj
4940  eta2=rsj+2*sig2
4941  ELSE
4942  del2=rsj+2*sig2
4943  eta2=rsj
4944  END IF
4945  IF (eta2 < 0.0d0) cycle
4946  IF (vals > 0.0d0) THEN
4947  epsr=repsmach*vals
4948  IF (abs(del1) < epsr .AND. abs(del2) < epsr) THEN
4949  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
4950  ELSE IF (abs(del1) < epsr) THEN
4951  IF (del2 < -epsr) cycle
4952  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
4953  ELSE IF (abs(del2) < epsr) THEN
4954  IF (del1 < -epsr) cycle
4955  valp=1.0d0+(eta1*eta2)/(vals*(eta1+eta2))
4956  ELSE
4957  del12=del1+del2
4958  IF (del12 < -epsr) cycle
4959  valp=vals+del1*del2/del12
4960  IF (valp < 0.0d0) cycle
4961  valp=(vals+(eta1*eta2)/(eta1+eta2))/valp
4962  END IF
4963  ELSE
4964  IF (del1 .LE. 0.0d0 .OR. del2 .LE. 0.0d0) cycle
4965  valp=vals+del1*del2/(del1+del2)
4966  IF (valp < 0.0d0) cycle
4967  vals=vals+(eta1*eta2)/(eta1+eta2)
4968  IF (vals < 0.0d0) cycle
4969  valp=vals/valp
4970  END IF
4971  ! main threshold test
4972  IF (valp > kaptg) cycle
4973  !
4974  ! A_G is nonneagtive definite and passed the corresponding
4975  ! "quality" threshold test: (isel,j) is
4976  ! an acceptable pair; check if it is the best one
4977  !
4978  tent=valp
4979  IF (ipair.EQ.0) GOTO 10
4980  IF (16*(tent-val).LT.-1) GOTO 9
4981  IF (16*(tent-val).LT.1 .AND. iperm(j).LT.iperm(ipair)) GOTO 9
4982  cycle
4983 9 CONTINUE
4984 10 CONTINUE
4985  ipair = j
4986  val = tent
4987  ENDDO
4988  !
4989  IF (ipair .EQ. 0) THEN
4990  ! no valid pair found: isel left alone in aggregate nc
4991  lcg(2,nc) = -1
4992  ELSE
4993  ! pair nc is formed with isel and ipair
4994  lcg(2,nc) = ipair
4995  ind(ipair) = nc
4996  nmark = nmark+1
4997  END IF
4998  ENDDO
4999 !
5000 ! Restart aggregation with kaptg twice as large in case of
5001 ! slow coarsening; allowed only once per level
5002 ! (new parameters apply at further levels)
5003 !
5004  IF (ifirst .AND. 3*nc.GE.2*n .AND. ngl(1).GT.maxcoarseslowt) THEN
5005  imult=2*imult
5006  IF (wfo) THEN
5007  WRITE (iout,901) irank,imult*kaptg_blocdia
5008  END IF
5009 901 FORMAT(i3, &
5010  '* Coarsening too slow: quality threshold increased to',f6.2)
5011  ind(1:n)=-1
5012  DO i=1,nddl
5013  ind(ldd(i))=0
5014  END DO
5015  ifirst=.false.
5016  GOTO 1
5017  ENDIF
5018  RETURN
5019  END SUBROUTINE dagmg_findpairs_si1
5020 !------------------------------------------------------------------
5021  SUBROUTINE dagmg_lcgmix(nc,m,lcg1,lcg,lcgn)
5022  INTEGER :: nc,m,lcg1(m,*),lcg(2,*),lcgn(2*m,*),i,l,l1,l2
5023  IF (m.eq.2) THEN
5024  DO i=1,nc
5025  IF(lcg(2,i) .EQ. 0) THEN
5026  lcgn(1,i)=lcg1(1,lcg(1,i))
5027  lcgn(2,i)=0
5028  lcgn(4,i)=-1
5029  ELSE IF(lcg(2,i) .LT. 0) THEN
5030  IF (lcg1(2,lcg(1,i)) .LT. 0) THEN
5031  lcgn(1,i)=lcg1(1,lcg(1,i))
5032  lcgn(2,i)=-1
5033  lcgn(4,i)=-1
5034  ELSE
5035  lcgn(1,i)=lcg1(1,lcg(1,i))
5036  lcgn(2,i)=lcg1(2,lcg(1,i))
5037  lcgn(4,i)=-2
5038  END IF
5039  ELSE
5040  IF (lcg1(2,lcg(1,i)) .LT. 0) THEN
5041  IF (lcg1(2,lcg(2,i)) .LT. 0) THEN
5042  lcgn(1,i)=lcg1(1,lcg(1,i))
5043  lcgn(2,i)=lcg1(1,lcg(2,i))
5044  lcgn(4,i)=-2
5045  ELSE
5046  lcgn(1,i)=lcg1(1,lcg(1,i))
5047  lcgn(2,i)=lcg1(1,lcg(2,i))
5048  lcgn(3,i)=lcg1(2,lcg(2,i))
5049  lcgn(4,i)=-3
5050  END IF
5051  ELSE
5052  IF (lcg1(2,lcg(2,i)) .LT. 0) THEN
5053  lcgn(1,i)=lcg1(1,lcg(1,i))
5054  lcgn(2,i)=lcg1(2,lcg(1,i))
5055  lcgn(3,i)=lcg1(1,lcg(2,i))
5056  lcgn(4,i)=-3
5057  ELSE
5058  lcgn(1,i)=lcg1(1,lcg(1,i))
5059  lcgn(2,i)=lcg1(2,lcg(1,i))
5060  lcgn(3,i)=lcg1(1,lcg(2,i))
5061  lcgn(4,i)=lcg1(2,lcg(2,i))
5062  END IF
5063  END IF
5064  END IF
5065  END DO
5066  ELSE
5067  DO i=1,nc
5068  IF(lcg(2,i) .EQ. 0) THEN
5069  lcgn(1,i)=lcg1(1,lcg(1,i))
5070  lcgn(2,i)=0
5071  lcgn(2*m,i)=-1
5072  ELSE
5073  lcgn(2,i)=-1
5074  l1=m
5075  IF (lcg1(m,lcg(1,i)).LT.0) l1=-lcg1(m,lcg(1,i))
5076  lcgn(1:l1,i)=lcg1(1:l1,lcg(1,i))
5077  IF(lcg(2,i) .LT. 0) THEN
5078  l=l1
5079  ELSE
5080  l2=m
5081  IF (lcg1(m,lcg(2,i)).LT.0) l2=-lcg1(m,lcg(2,i))
5082  lcgn(l1+1:l1+l2,i)=lcg1(1:l2,lcg(2,i))
5083  l=l1+l2
5084  END IF
5085  IF(l .LT. 2*m) lcgn(2*m,i)=-l
5086  END IF
5087  END DO
5088  END IF
5089  RETURN
5090  END SUBROUTINE dagmg_lcgmix
5091 !-----------------------------------------------------------------------
5092  SUBROUTINE dagmg_setind(nc,ndd,ldd,lcg,m,ind)
5093  INTEGER :: nc,m,lcg(m,*),nll,ldd(ndd),ind(*),i,k,l
5094  DO i=1,ndd
5095  ind(ldd(i))=0
5096  END DO
5097  DO i=1,nc
5098  l=m
5099  IF (lcg(m,i) .LT. 0) l=-lcg(m,i)
5100  DO k=1,l
5101  ind(lcg(k,i))=i
5102  END DO
5103  END DO
5104  RETURN
5105  END SUBROUTINE dagmg_setind
5106 !-----------------------------------------------------------------------
5107  SUBROUTINE dagmg_setcg(n,a,ja,ia,idiag,si,ind,lcg &
5108  ,nc,a2,ja2,ia2,idiag2,si2,ysi,maxdg,iw,w,iext,iext2)
5109  USE dagmg_mem
5110  IMPLICIT NONE
5111  INTEGER :: n,ja(*),ia(n+1),idiag(n),ind(n),nc,lcg(2,nc)
5112  INTEGER :: ja2(*),ia2(n+1),idiag2(n),maxdg
5113  INTEGER, TARGET :: iw(2*nc)
5114  INTEGER, OPTIONAL :: iext(*),iext2(*)
5115  REAL(kind(0.0d0)) :: a(*),a2(*),w(nc),vald
5116  REAL(kind(0.0d0)) :: si(n),si2(*),sii
5117  LOGICAL :: ysi
5118  INTEGER :: nz,nzu,i,jj,jc,jcol,ki,kb,kf,jpos
5119  INTEGER, POINTER, DIMENSION(:) :: iw1, iw2
5120  !
5121  iw1 => iw(1:nc)
5122  iw2 => iw(nc+1:2*nc)
5123  !
5124  nz = 0
5125  iw1(1:nc)=0
5126  maxdg=0
5127  ia2(1)=1
5128  DO i = 1,nc
5129  sii=0.0d0
5130  vald=0.0d0
5131  nzu=0
5132  DO ki= 1,2
5133  jj = lcg(ki,i)
5134  IF (ki.EQ.1 .OR. jj.GT.0) THEN
5135  IF (ysi) sii=sii+si(jj)
5136  kf = ia(jj+1) - 1
5137  DO kb = ia(jj),kf
5138  jc = ja(kb)
5139  jcol = ind(jc)
5140  IF (jcol .GT. 0) THEN
5141  IF (jcol .LT. i) THEN
5142  jpos = iw1(jcol)
5143  IF (jpos.EQ.0) THEN
5144  nz = nz+1
5145  ja2(nz) = jcol
5146  iw1(jcol) = nz
5147  a2(nz) = a(kb)
5148  ELSE
5149  a2(jpos) = a2(jpos) + a(kb)
5150  ENDIF
5151  ELSE IF (jcol .GT. i) THEN
5152  jpos = iw1(jcol)
5153  IF (jpos.EQ.0) THEN
5154  nzu = nzu+1
5155  iw2(nzu) = jcol
5156  iw1(jcol) = nzu
5157  w(nzu) = a(kb)
5158  ELSE
5159  w(jpos) = w(jpos) + a(kb)
5160  ENDIF
5161  ELSE
5162  vald=vald+a(kb)
5163  IF (ysi .AND. jc.NE.jj) sii=sii+a(kb)
5164  ENDIF
5165  ENDIF
5166  ENDDO
5167  ENDIF
5168  ENDDO
5169  nz=nz+1
5170  a2(nz)=vald
5171  idiag2(i)=nz
5172  ja2(nz)=i
5173  a2(nz+1:nz+nzu)=w(1:nzu)
5174  ja2(nz+1:nz+nzu)=iw2(1:nzu)
5175  nz=nz+nzu
5176  maxdg=max(maxdg,nz-ia2(i))
5177  DO kb = ia2(i), nz
5178  iw1(ja2(kb))=0
5179  ENDDO
5180  IF (ysi) si2(i)=sii
5181  ia2(i+1)=nz+1
5182  ENDDO
5183  RETURN
5184  END SUBROUTINE dagmg_setcg
5185 !-----------------------------------------------------------------------
5186 !!!!!!!!!!!!!!!!!!! END of ROUTINES FOR SETUP
5187 !-----------------------------------------------------------------------
5188 !!!!!!!!!!!!!!!!!!! ROUTINES FOR DIRECT SOLUTION
5189 !-----------------------------------------------------------------------
5190  SUBROUTINE dagmg_lapack(N,a,ja,ia,f,ijb,flop)
5191  USE dagmg_mem
5192  IMPLICIT NONE
5193  INTEGER :: n,ia(n+1),ja(*),ijb
5194  REAL(kind(0.0d0)) :: a(*),f(n)
5195  REAL(kind(0.0d0)) :: flop
5196  !
5197  REAL(kind(0.0d0)), ALLOCATABLE, SAVE :: ac(:,:)
5198  INTEGER , ALLOCATABLE, SAVE :: ipiv(:)
5199  INTEGER, SAVE :: iflop
5200  INTEGER :: i,kk,iext
5201  INTEGER :: ierr
5202  INTEGER , parameter :: ione=1
5203  !
5204  ierr=0
5205  IF (ijb == -2) THEN
5206  !
5207  DEALLOCATE (ac,ipiv)
5208  !
5209  ELSE IF (ijb == 1) THEN
5210  !
5211  ALLOCATE (ac(n,n),ipiv(n))
5212  memi=memi+n
5213  memr=memr+n*n
5214  memax=max(memax,memr+memi*rlenilen)
5215  ac=0.0d0
5216  DO i=1,n
5217  DO kk=ia(i),ia(i+1)-1
5218  ac(i,ja(kk))=a(kk)
5219  END DO
5220  END DO
5221  CALL dgetrf(n,n,ac,n,ipiv,ierr)
5222  IF (ierr /= 0) THEN
5223  WRITE(iout, *) ' FATAL ERROR in GETRF: ierror=',ierr
5224  stop
5225  END IF
5226  iflop=(2*n*n-n)
5227  flop=(2*1.0d0)/(3*1.0d0)*(dble(n)**3)
5228  !
5229  ELSE IF (ijb == 2) THEN
5230  !
5231  CALL dgetrs('N',n,ione,ac,n,ipiv,f,n,ierr)
5232  IF (ierr /= 0) THEN
5233  WRITE(iout, *) ' FATAL ERROR in GETRS: ierror=',ierr
5234  stop
5235  END IF
5236  flop=flop+dble(iflop)
5237  !
5238  END IF
5239  !
5240  RETURN
5241  END SUBROUTINE dagmg_lapack
5242 !-----------------------------------------------------------------------
5243  SUBROUTINE dagmg_mumpsseq(n,a,ja,ia,f,ijob,flop)
5244  USE dagmg_mem
5245  IMPLICIT NONE
5246 !!!!!!!! BEGIN Include DAGMG_MUMPS definitions !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
5247 !
5248 !
5249  TYPE dmumps_root_struc
5250  sequence
5251  INTEGER mblock, nblock, nprow, npcol
5252  INTEGER myrow, mycol
5253  INTEGER root_size, tot_root_size
5254  INTEGER :: cntxt_blacs, truc
5255  INTEGER, DIMENSION(:), POINTER :: rg2l_row
5256  INTEGER, DIMENSION(:), POINTER :: rg2l_col
5257  INTEGER , DIMENSION(:), POINTER :: ipiv
5258  INTEGER, DIMENSION( 9 ) :: descriptor, descb
5259  LOGICAL yes, gridinit_done
5260  INTEGER lpiv, brol
5261 ! Used to access Schur easily from root structure
5262  DOUBLE PRECISION, DIMENSION(:), POINTER :: schur_pointer
5263  INTEGER schur_mloc, schur_nloc, schur_lld, machin
5264 !
5265 ! Data for nullspace/QR
5266 !
5267  DOUBLE PRECISION, DIMENSION(:), POINTER :: qr_tau
5268  DOUBLE PRECISION qr_rcond
5269 !
5270 ! Givens rotations
5271 !
5272  INTEGER maxg, gind
5273  DOUBLE PRECISION, DIMENSION(:),POINTER::grow, gcos, gsin
5274 !
5275 ! RRRLU data
5276 !
5277  INTEGER elg_max,null_max
5278  INTEGER elind,euind,nlupdate,nuupdate
5279  INTEGER,DIMENSION(:),POINTER::perm_row,perm_col
5280  INTEGER,DIMENSION(:),POINTER::elrow, eurow, ptrel, ptreu
5281  DOUBLE PRECISION, DIMENSION(:), POINTER :: elelg, euelg, dl
5282 !
5283  END TYPE dmumps_root_struc
5284  TYPE dmumps_struc
5285  sequence
5286 !
5287 ! This structure contains all parameters
5288 ! for the interface to the user, plus internal
5289 ! information
5290 !
5291 ! *****************
5292 ! INPUT PARAMETERS
5293 ! *****************
5294 ! -----------------
5295 ! MPI Communicator
5296 ! -----------------
5297  INTEGER comm
5298 ! ------------------
5299 ! Problem definition
5300 ! ------------------
5301 ! Solver (SYM=0 unsymmetric,SYM=1 symmetric Positive Definite,
5302 ! SYM=2 general symmetric)
5303 ! Type of parallelism (PAR=1 host working, PAR=0 host not working)
5304  INTEGER sym, par
5305  INTEGER job
5306 ! --------------------
5307 ! Order of Input matrix
5308 ! --------------------
5309  INTEGER n
5310 !
5311 ! ----------------------------------------
5312 ! Assembled input matrix : User interface
5313 ! ----------------------------------------
5314  INTEGER nz
5315  DOUBLE PRECISION, DIMENSION(:), POINTER :: a
5316  INTEGER, DIMENSION(:), POINTER :: irn, jcn
5317  DOUBLE PRECISION, DIMENSION(:), POINTER :: colsca, rowsca, pad0
5318 !
5319 ! ------------------------------------
5320 ! Case of distributed assembled matrix
5321 ! matrix on entry:
5322 ! ------------------------------------
5323  INTEGER nz_loc, pad1
5324  INTEGER, DIMENSION(:), POINTER :: irn_loc, jcn_loc
5325  DOUBLE PRECISION, DIMENSION(:), POINTER :: a_loc, pad2
5326 !
5327 ! ----------------------------------------
5328 ! Unassembled input matrix: User interface
5329 ! ----------------------------------------
5330  INTEGER nelt, pad3
5331  INTEGER, DIMENSION(:), POINTER :: eltptr
5332  INTEGER, DIMENSION(:), POINTER :: eltvar
5333  DOUBLE PRECISION, DIMENSION(:), POINTER :: a_elt, pad4
5334 !
5335 ! ---------------------------------------------
5336 ! Symmetric permutation :
5337 ! PERM_IN if given by user (optional)
5338 ! ---------------------------------------------
5339  INTEGER, DIMENSION(:), POINTER :: perm_in
5340 !
5341 !
5342 ! ******************
5343 ! INPUT/OUTPUT data
5344 ! ******************
5345 ! --------------------------------------------------------
5346 ! RHS / SOL_LOC
5347 ! -------------
5348 ! right-hand side and solution
5349 ! -------------------------------------------------------
5350  DOUBLE PRECISION, DIMENSION(:), POINTER :: rhs, redrhs
5351  DOUBLE PRECISION, DIMENSION(:), POINTER :: rhs_sparse
5352  DOUBLE PRECISION, DIMENSION(:), POINTER :: sol_loc
5353  INTEGER, DIMENSION(:), POINTER :: irhs_sparse
5354  INTEGER, DIMENSION(:), POINTER :: irhs_ptr
5355  INTEGER, DIMENSION(:), POINTER :: isol_loc
5356  INTEGER lrhs, nrhs, nz_rhs, lsol_loc, lredrhs
5357  INTEGER pad5
5358 ! ----------------------------
5359 ! Control parameters,
5360 ! statistics and output data
5361 ! ---------------------------
5362  INTEGER icntl(40)
5363  INTEGER info(40)
5364  INTEGER infog(40)
5365  DOUBLE PRECISION cost_subtrees
5366  DOUBLE PRECISION cntl(15)
5367  DOUBLE PRECISION rinfo(20)
5368  DOUBLE PRECISION rinfog(20)
5369 ! ---------------------------------------------------------
5370 ! Permutations computed during analysis:
5371 ! SYM_PERM: Symmetric permutation
5372 ! UNS_PERM: Column permutations (optionnal)
5373 ! ---------------------------------------------------------
5374  INTEGER, DIMENSION(:), POINTER :: sym_perm, uns_perm
5375 !
5376 ! -------------------------------------
5377 ! Case of distributed matrix on entry:
5378 ! DMUMPS potentially provides mapping
5379 ! -------------------------------------
5380  INTEGER, DIMENSION(:), POINTER :: mapping
5381 !
5382 ! -------------------------------
5383 ! Deficiency and null space basis
5384 ! -------------------------------
5385  DOUBLE PRECISION, DIMENSION(:,:), POINTER :: null_space
5386  INTEGER deficiency, pad6
5387 ! -----
5388 ! Schur
5389 ! -----
5390  INTEGER nprow, npcol, mblock, nblock
5391  INTEGER schur_mloc, schur_nloc, schur_lld
5392  INTEGER size_schur
5393  INTEGER, DIMENSION(:), POINTER :: listvar_schur
5394  DOUBLE PRECISION, DIMENSION(:), POINTER :: schur
5395  DOUBLE PRECISION, DIMENSION(:), POINTER :: schur_cinterface
5396 ! --------------
5397 ! Version number
5398 ! --------------
5399  CHARACTER(LEN=16) version_number
5400 ! -----------
5401 ! Out-of-core
5402 ! -----------
5403  CHARACTER(LEN=256) :: ooc_tmpdir
5404  CHARACTER(LEN=64) :: ooc_prefix
5405 ! ------------------------------------------
5406 ! To save the matrix in matrix market format
5407 ! ------------------------------------------
5408  CHARACTER(LEN=256) write_problem
5409 !
5410 !
5411 ! **********************
5412 ! INTERNAL Working data
5413 ! *********************
5414  INTEGER inst_number
5415 ! For MPI
5416  INTEGER comm_nodes, myid_nodes, comm_load
5417  INTEGER myid, nprocs, nslaves
5418  INTEGER ass_irecv
5419  INTEGER, DIMENSION(:), POINTER :: poids
5420  INTEGER lbufr
5421  INTEGER lbufr_bytes
5422  INTEGER, DIMENSION(:), POINTER :: bufr
5423 ! For analysis/facto/solve phases
5424  INTEGER maxis1, pad7
5425  INTEGER keep(500)
5426  INTEGER*8 keep8(150)
5427 ! IS is used for the factors + workspace for contrib. blocks
5428  INTEGER, DIMENSION(:), POINTER :: is
5429 ! is1 (maxis1) contains working arrays computed
5430 ! and used only during analysis
5431  INTEGER, DIMENSION(:), POINTER :: is1
5432 ! The following data/arrays are computed during the analysis
5433 ! phase and used during the factorization and solve phases.
5434  INTEGER lna
5435  INTEGER nbsa
5436  INTEGER,POINTER,DIMENSION(:)::step, ne_steps, nd_steps
5437 ! Info for pruning tree
5438  INTEGER,POINTER,DIMENSION(:)::step2node
5439 ! ---------------------
5440  INTEGER,POINTER,DIMENSION(:)::frere_steps, dad_steps
5441  INTEGER,POINTER,DIMENSION(:)::fils, ptrar, frtptr, frtelt
5442  INTEGER,POINTER,DIMENSION(:)::na, procnode_steps
5443 ! The two pointer arrays computed in facto and used by the solve
5444 ! (except the factors) are PTLUST_S and PTRFAC.
5445  INTEGER, DIMENSION(:), POINTER :: ptlust_s
5446  INTEGER(8), DIMENSION(:), POINTER :: ptrfac
5447 ! main real working arrays for factorization/solve phases
5448  DOUBLE PRECISION, DIMENSION(:), POINTER :: s
5449 ! Information on mapping
5450  INTEGER, DIMENSION(:), POINTER :: procnode
5451 ! Input matrix ready for numerical assembly
5452 ! -arrowhead format in case of assembled matrix
5453 ! -element format otherwise
5454  INTEGER, DIMENSION(:), POINTER :: intarr
5455  DOUBLE PRECISION, DIMENSION(:), POINTER :: dblarr
5456 ! Element entry: internal data
5457  INTEGER nelt_loc, leltvar, na_elt, pad8
5458  INTEGER, DIMENSION(:), POINTER :: eltproc
5459 ! Candidates and node partitionning
5460  INTEGER, DIMENSION(:,:), POINTER :: candidates
5461  INTEGER, DIMENSION(:), POINTER :: istep_to_iniv2
5462  INTEGER, DIMENSION(:), POINTER :: future_niv2
5463  INTEGER, DIMENSION(:,:), POINTER :: tab_pos_in_pere
5464  LOGICAL, DIMENSION(:), POINTER :: i_am_cand
5465 ! For heterogeneous architecture
5466  INTEGER, DIMENSION(:), POINTER :: mem_dist
5467 ! Compressed RHS
5468  INTEGER, DIMENSION(:), POINTER :: posinrhscomp
5469  DOUBLE PRECISION, DIMENSION(:), POINTER :: rhscomp
5470 ! For C interface
5471 ! Info on the subtrees to be used during factorization
5472  DOUBLE PRECISION, DIMENSION(:), POINTER :: mem_subtree
5473  INTEGER, DIMENSION(:), POINTER :: my_root_sbtr
5474  INTEGER, DIMENSION(:), POINTER :: my_first_leaf
5475  INTEGER, DIMENSION(:), POINTER :: my_nb_leaf
5476  INTEGER, DIMENSION(:), POINTER :: depth_first
5477  DOUBLE PRECISION, DIMENSION(:), POINTER :: cost_trav
5478  INTEGER nbsa_local, zwave
5479  INTEGER(8) :: max_surf_master
5480  INTEGER :: lwk_user, zozo
5481  DOUBLE PRECISION, DIMENSION(:), POINTER :: wk_user
5482 ! For simulating parallel out-of-core stack.
5483  DOUBLE PRECISION, DIMENSION(:),POINTER ::cb_son_size
5484 ! Instance number used/managed by the C/F77 interface
5485  INTEGER instance_number
5486 ! OOC management data that must persist from factorization to solve.
5487  INTEGER ooc_max_nb_nodes_for_zone
5488  INTEGER, DIMENSION(:,:), POINTER :: ooc_inode_sequence
5489  INTEGER(8),DIMENSION(:,:), POINTER :: ooc_size_of_block
5490  INTEGER*8, DIMENSION(:,:), POINTER :: ooc_vaddr
5491  INTEGER,DIMENSION(:), POINTER :: ooc_total_nb_nodes
5492  INTEGER,DIMENSION(:), POINTER :: ooc_nb_files
5493  CHARACTER,DIMENSION(:,:), POINTER :: ooc_file_names
5494  INTEGER,DIMENSION(:), POINTER :: ooc_file_name_length
5495 ! Indices of nul pivots
5496  INTEGER,DIMENSION(:), POINTER :: pivnul_list
5497 ! Internal control array
5498  DOUBLE PRECISION dkeep(30)
5499 ! Array needed to manage additionnal candidate processor
5500  INTEGER, DIMENSION(:,:), POINTER :: sup_proc
5501 ! ------------------------
5502 ! Root structure(internal)
5503 ! ------------------------
5504  type(dmumps_root_struc) :: root
5505  END TYPE dmumps_struc
5506 !!!!!!!! --END Include DAGMG_MUMPS definitions !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
5507  INTEGER :: n,ia(n+1),ijob
5508  INTEGER, TARGET :: ja(*)
5509  REAL(kind(0.0d0)), TARGET :: a(*),f(n)
5510  REAL(kind(0.0d0)) :: flop
5511  !
5512  INTEGER, SAVE :: iflop
5513  TYPE(dmumps_struc), SAVE :: mumps_par
5514  INTEGER :: ierr, i, j, k
5515  !
5516  IF (ijob == -2) THEN
5517  !
5518  ! Destroy the instance (deallocate internal data structures)
5519  mumps_par%JOB = -2
5520  CALL dagmg_mumps(mumps_par)
5521  !
5522  ELSE IF (ijob == 1) THEN
5523  !
5524  ! Define a communicator for the package.
5525  mumps_par%COMM = 0
5526  ! Initialize an instance of the package
5527  ! for L U factorization (sym = 0, with working host)
5528  mumps_par%JOB = -1
5529  mumps_par%SYM = 0
5530  mumps_par%PAR = 1
5531  CALL dagmg_mumps(mumps_par)
5532  mumps_par%ICNTL(2)=-1
5533  mumps_par%ICNTL(3)=-1
5534  mumps_par%ICNTL(4)=0
5535  mumps_par%ICNTL(14)=80
5536  mumps_par%ICNTL(18)=3
5537  mumps_par%ICNTL(24)=1
5538  mumps_par%NZ_loc=ia(n+1)-1
5539  IF (transint) THEN
5540  ALLOCATE( mumps_par%JCN_loc(mumps_par%NZ_loc) )
5541  do i=1,n
5542  do j=ia(i),ia(i+1)-1
5543  mumps_par%JCN_loc(j)=i
5544  end do
5545  end do
5546  mumps_par%IRN_loc => ja(1:mumps_par%NZ_loc)
5547  ELSE
5548  ALLOCATE( mumps_par%IRN_loc(mumps_par%NZ_loc) )
5549  do i=1,n
5550  do j=ia(i),ia(i+1)-1
5551  mumps_par%IRN_loc(j)=i
5552  end do
5553  end do
5554  mumps_par%JCN_loc => ja(1:mumps_par%NZ_loc)
5555  END IF
5556  memi=memi+mumps_par%NZ_loc
5557  memax=max(memax,memr+memi*rlenilen)
5558  mumps_par%A_loc => a(1:mumps_par%NZ_loc)
5559  !
5560  mumps_par%N=n
5561  ! Call package for analysis+factorization
5562  mumps_par%JOB = 4
5563  CALL dagmg_mumps(mumps_par)
5564  flop=mumps_par%RINFO(3)
5565  !iflop=2*mumps_par%INFO(9)-n
5566  IF (transint) THEN
5567  DEALLOCATE(mumps_par%JCN_loc)
5568  NULLIFY(mumps_par%IRN_loc,mumps_par%A_loc)
5569  ELSE
5570  DEALLOCATE(mumps_par%IRN_loc)
5571  NULLIFY(mumps_par%JCN_loc,mumps_par%A_loc)
5572  END IF
5573  memi=memi-mumps_par%NZ_loc
5574  !
5575  ELSE IF (ijob == 2) THEN
5576  !
5577  ! Call package for solution
5578  mumps_par%JOB = 3
5579  mumps_par%RHS => f(1:n)
5580  CALL dagmg_mumps(mumps_par)
5581  END IF
5582  !
5583  RETURN
5584  END SUBROUTINE dagmg_mumpsseq
5585 !-----------------------------------------------------------------------
5586 !!!!!!!!!!!!!!!!!!! END of ROUTINES FOR DIRECT SOLUTION
5587 END MODULE dagmg_allroutines
5588 !-----------------------------------------------------------------------
5589 !!!!!!!!!!!!!!!!!!! MAIN DRIVER
5590 !-----------------------------------------------------------------------
5591  SUBROUTINE dagmg( n,a,ja,ia,f,x,ijob,iprint,nrest,iter,tol )
5592  USE dagmg_mem
5593  USE dagmg_allroutines
5594  IMPLICIT NONE
5595  INTEGER :: n,ia(n+1),ja(*),ijob,iprint,nrest,iter
5596  REAL (kind(0.0d0)) :: a(*),f(n),x(n)
5597  REAL (kind(0.0d0)) :: tol
5598 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
5599 !
5600 ! Arguments
5601 ! =========
5602 !
5603 ! N (input) INTEGER.
5604 ! The dimension of the matrix.
5605 !
5606 ! A (input/output) REAL (kind(0.0d0)). Numerical values of the matrix.
5607 ! IA (input/output) INTEGER. Pointers for every row.
5608 ! JA (input/output) INTEGER. Column indices.
5609 !
5610 ! AGMG ASSUMES THAT ALL DIAGONAL ENTRIES ARE POSITIVE
5611 !
5612 ! Detailed description of the matrix format
5613 !
5614 ! On input, IA(I), I=1,...,N, refers to the physical start
5615 ! of row I. That is, the entries of row I are located
5616 ! in A(K), where K=IA(I),...,IA(I+1)-1. JA(K) carries the
5617 ! associated column indices. IA(N+1) must be defined in such
5618 ! a way that the above rule also works for I=N (that is,
5619 ! the last valid entry in arrays A,JA should correspond to
5620 ! index K=IA(N+1)-1). According what is written
5621 ! above, AGMG assumes that some of these JA(K) (for
5622 ! IA(I)<= K < IA(I+1) ) is equal to I with corresponding
5623 ! A(K) carrying the value of the diagonal element,
5624 ! which must be positive.
5625 !
5626 ! A,IA,JA are "output" parameters because on exit the
5627 ! entries of each row may occur in a different order (The
5628 ! matrix is mathematically the same, but stored in
5629 ! different way).
5630 !
5631 ! F (input/output) REAL (kind(0.0d0)).
5632 ! On input, the right hand side vector f.
5633 ! Overwritten on output.
5634 ! Significant only if IJOB==0, 2, 3, 10, 12, 100, 102, 110, 112
5635 !
5636 ! X (input/output) REAL (kind(0.0d0)).
5637 ! On input and if IJOB== 10, 12, 110, 112: initial guess
5638 ! (for other values of IJOB, the default is used: the zero vector).
5639 ! On output, the computed solution.
5640 !
5641 ! IJOB (input) INTEGER. Tells AGMG what has to be done.
5642 ! 0: performs setup + solve + memory release, no initial guess
5643 ! 10: performs setup + solve + memory release, initial guess in x(1:n)
5644 ! 1: performs setup only
5645 ! (preprocessing: prepares all parameters for subsequent solves)
5646 ! 2: solves only (based on previous setup), no initial guess
5647 ! 12: solves only (based on previous setup), initial guess in x(1:n)
5648 ! 3: the vector returned in x(1:n) is not the solution of the linear
5649 ! system, but the result of the action of the multigrid
5650 ! preconditioner on the right hand side in f(1:n)
5651 ! -1: erases the setup and releases internal memory
5652 !
5653 ! IJOB == 100,110,101,102,112: same as, respectively, IJOB==0,10,1,2,12
5654 ! but, use the TRANSPOSE of the input matrix in A, JA, IA.
5655 !
5656 ! !!! IJOB==2,3,12,102,112 require that one has previously called AGMG
5657 ! with IJOB==1 or IJOB==101
5658 !
5659 ! !!! (change with respect to versions 2.x) !!!
5660 ! The preconditioner defined when calling AGMG
5661 ! with IJOB==1 or IJOB==101 is entirely kept in internal memory.
5662 ! Hence the arrays A, JA and IA are not accessed upon subsequent calls
5663 ! with IJOB==3.
5664 ! Upon subsequent calls with IJOB==2,12,102,112, a matrix needs to
5665 ! be supplied in arrays A, JA, IA, but it will be used to
5666 ! perform matrix vector product within the main iterative
5667 ! solution process (and only for this).
5668 ! Hence the system is solved with this matrix which
5669 ! may differ from the matrix in A, JA, IA that was supplied
5670 ! upon the previous call with IJOB==1 or IJOB==101;
5671 ! then AGMG attempts to solve a linear system with the "new"
5672 ! matrix (supplied when IJOB==2,12,102 or 112) using the
5673 ! preconditioner set up for the "old" one (supplied when
5674 ! IJOB==1 or 101).
5675 ! The same remarks apply to IJOB >= 100 or not: the value IJOB==1
5676 ! or 101 determines whether the preconditioner set up and stored
5677 ! in internal memory is based on the matrix or its transpose;
5678 ! the value IJOB==2,12 or 102,112 is used to determine whether
5679 ! the linear system to be solved is with the matrix or its
5680 ! transpose, independently of the set up.
5681 ! Hence one may set up a preconditioner for a matrix and use it
5682 ! for its transpose.
5683 ! These functionalities (set up a preconditioner and use it for another
5684 ! matrix) are provided for the sake of generality but should be
5685 ! used with care; in general, set up is fast with AGMG and hence
5686 ! it is recommended to rerun it even if the matrix changes only
5687 ! slightly.
5688 !
5689 ! IPRINT (input) INTEGER.
5690 ! Indicates the unit number where information is to be printed
5691 ! (N.B.: 5 is converted to 6). If nonpositive, only error
5692 ! messages are printed on standard output.
5693 !
5694 ! NREST (input) INTEGER.
5695 ! Restart parameter for GCR (an implementation of GMRES)
5696 ! Nonpositive values are converted to NREST=10 (default)
5697 !
5698 ! !! If NREST==1, Flexible CG is used instead of GCR (when IJOB==0,10,2,
5699 ! 12,100,110,102,112) and also (IJOB==0,1,100,101) performs some
5700 ! simplification during the set up based on the assumption
5701 ! that the matrix supplied in A, JA, IA is symmetric (there is
5702 ! then no more difference between IJOB==1 and IJOB==101).
5703 !
5704 ! !!! NREST=1 Should be used if and only if the matrix is really SYMMETRIC
5705 ! !!! (and positive definite).
5706 !
5707 ! ITER (input/output) INTEGER.
5708 ! On input, the maximum number of iterations. Should be positive.
5709 ! On output, actual number of iterations.
5710 ! If this number of iteration was insufficient to meet convergence
5711 ! criterion, ITER will be returned negative and equal to the
5712 ! opposite of the number of iterations performed.
5713 ! Significant only if IJOB==0, 2, 10, 12, 100, 102, 110, 112
5714 !
5715 ! TOL (input) REAL (kind(0.0d0)).
5716 ! The tolerance on residual norm. Iterations are stopped whenever
5717 ! || A*x-f || <= TOL* || f ||
5718 ! Should be positive and less than 1.0
5719 ! Significant only if IJOB==0, 2, 10, 12, 100, 102, 110, 112
5720 !
5721 !!!!! Remark !!!! Except insufficient number of iterations to achieve
5722 ! convergence (characterized by a negative value returned
5723 ! in ITER), all other detected errors are fatal and lead
5724 ! to a STOP statement.
5725 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
5726 !
5727 ! Local variables
5728 !
5729  INTEGER, SAVE :: nza
5730  REAL(kind(0.0d0)), SAVE :: cputm=0.0d0,eltm=0.0d0,flop=0.0d0
5731  LOGICAL, SAVE :: preprocessed=.false.,solve=.false.
5732  INTEGER :: i,init,ijb,k
5733  REAL(kind(0.0d0)) :: cputmp,eltmp,memh
5734  REAL(kind(0.0d0)) :: resid
5735  INTEGER, POINTER, DIMENSION(:) :: iw
5736  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: ascal
5737  REAL(kind(0.0d0)), POINTER, DIMENSION(:) :: w
5738  REAL(kind(0.0d0)) :: adum(1),fdum(1)
5739  INTEGER :: listrank(1),ifirstlistrank
5740 !
5741  wfo=.true.
5742  iout=iprint
5743  IF (iprint <= 0) THEN
5744  iout=6
5745  wfo=.false.
5746  ELSE IF (iprint == 5) THEN
5747  iout=6
5748  END IF
5749  ijb=mod(ijob,100)
5750 ! transpose on input if ijob >= 100
5751  trans=ijob.GE.100
5752 ! matrix is assume symmetric positive definite if and only if nrst==1
5753  spd=nrest.EQ.1
5754 ! transposition disabled at inner levels if matrix supposed spd
5755  transint=trans.AND.(.NOT.spd)
5756 !
5757  wff=wfo.AND.(irank<=0)
5758 !
5759  IF (mod(ijb,10) >= 2 .AND. .NOT.preprocessed) THEN
5760  WRITE (iout,1001) irank,ijob
5761  stop
5762  END IF
5763 !
5764  IF (ijb < 0 .AND. solve) GOTO 450
5765  IF (ijb < 0) GOTO 500
5766  IF (mod(ijb,10) >= 2) GOTO 300
5767  IF (preprocessed) THEN
5768  CALL dagmg_relmem
5769  IF (.NOT.allzero) &
5770  CALL dagmg_mumpsseq(nn(nlev),adum,ja,ia,fdum,-2,flop)
5771  preprocessed=.false.
5772  solve=.false.
5773  eltm=0.0d0
5774  cputm=0.0d0
5775  flop=0.0d0
5776  kstat=0
5777  END IF
5778  CALL dagmg_mestime(-1,0.0d0,0.0d0)
5779 !
5780 ! Initial settings
5781 !
5782  nza=ia(n+1)-ia(1)
5783  IF (huge(n) > 1.0e10) THEN
5784  rlenilen=dble(8)/dble(real_len)
5785  ELSE
5786  rlenilen=dble(4)/dble(real_len)
5787  END IF
5788 !
5789 !
5790 !----- PREPROCESSING
5791 !
5792  nlev=0
5793  imult=1
5794  gcrin=.false.
5795  IF (wfo) THEN
5796  WRITE(iout,900) irank
5797  END IF
5798 !
5799 ! Partial ordering of the rows
5800 !
5801  ALLOCATE(dt(1)%idiag(n+1),w(n),iw(n))
5802  CALL dagmg_partroword(n,a,ja,ia,dt(1)%idiag,w,iw)
5803  DEALLOCATE(w,iw)
5804  !
5805  ! Setup: define hierarchy of grids and matrices
5806  !
5807  CALL dagmg_setupl1(n,a,ja,ia,listrank,ifirstlistrank)
5808  !
5809  ! Matrix is defined at each level: now perform smoother set up
5810  !
5811  CALL dagmg_smoothsetup
5812  preprocessed=.true.
5813  memh=memr+memi*rlenilen
5814 !
5815  CALL dagmg_mestime(1,cputmp,eltmp)
5816  IF(wfo)THEN
5817  IF (nlev > 1) THEN
5818  WRITE(iout,960) irank, memax/nza, real_len &
5819  , memax*real_len/(2**20)
5820  IF(mod(ijb,10) == 1) THEN
5821  WRITE(iout,961) irank, memh/nza, real_len &
5822  , memh*real_len/(2**20)
5823  END IF
5824  END IF
5825  !CPU_TIME: next line may be uncommented if implemented
5826  ! (see subroutine mestime)
5827  ! WRITE(iout,996) IRANK,cputmp
5828  WRITE(iout,997) irank,eltmp
5829  WRITE(iout,'()')
5830  END IF
5831 !
5832  IF (mod(ijb,10) == 1) RETURN
5833 !
5834 !----- SOLUTION PROCESS
5835 !
5836 300 CONTINUE
5837  CALL dagmg_mestime(-2,0.0d0,0.0d0)
5838  resid=tol
5839  nrst=nrest
5840  init=max(ijb/10,0)
5841  IF (nrst <= 0) nrst=10 ! Default restart paramater for GMRESR
5842  !
5843  !- SINGLE APPLICATION OF MG PRECONDITIONER
5844  IF (mod(ijb,10) >= 3) THEN
5845  IF (wfo) THEN
5846  WRITE(iout,901) irank
5847  END IF
5848  CALL dagmg_applyprec(n,f,x,a,ja,ia,flop)
5849  CALL dagmg_mestime(2,cputmp,eltmp)
5850  cputm=cputm+cputmp
5851  eltm=eltm+eltmp
5852  solve=.true.
5853  RETURN
5854  !
5855  !- END OF MG PREC
5856  !
5857  !- GCR SOLUTION
5858  ELSE IF (nrst > 1) THEN
5859  !
5860  CALL dagmg_gcr(n,f,x,iter,resid,a,ja,ia,init,flop)
5861  !
5862  !- END OF GCR SOLVE
5863  !
5864  !- FLEXIBLE CG SOLUTION
5865  ELSE
5866  !
5867  CALL dagmg_flexcg(n,f,x,iter,resid,a,ja,ia,init,flop)
5868  !
5869  !- END OF FCG SOLVE
5870  !
5871  END IF
5872 !
5873  CALL dagmg_mestime(2,cputm,eltm)
5874  solve=.false.
5875  GOTO 455
5876 450 CONTINUE
5877  IF (wfo) THEN
5878  WRITE(iout,'()')
5879  WRITE(iout,990) irank
5880  WRITE(iout,'()')
5881  END IF
5882 455 CONTINUE
5883  IF (wfo) THEN
5884  IF (wff .AND. iter.NE.0) THEN
5885  DO i=2,nlev-1
5886  WRITE(iout,955) i,kstat(2,i-1),kstat(2,i), &
5887  dble(kstat(2,i))/dble(kstat(2,i-1)),kstat(1,i)
5888  END DO
5889  END IF
5890  WRITE(iout,'()')
5891  WRITE(iout,954) irank,flop/dble(11*n+2*nza)
5892  WRITE(iout,962) irank,(memh+mritr)/nza, real_len &
5893  , (memh+mritr)*real_len/(2**20)
5894  !CPU_TIME: next line may be uncommented if implemented
5895  ! (see subroutine mestime)
5896  ! WRITE(iout,998) IRANK,cputm
5897  WRITE(iout,999) irank,eltm
5898  WRITE(iout,'()')
5899  END IF
5900  solve=.false.
5901  eltm=0.0d0
5902  cputm=0.0d0
5903  flop=0.0d0
5904  kstat=0
5905 !
5906  IF (mod(ijb,10) > 0) RETURN
5907 !
5908 !---- RELEASE MEMORY
5909 !
5910 500 CONTINUE
5911  CALL dagmg_relmem
5912  IF (.NOT.allzero) &
5913  CALL dagmg_mumpsseq(nn(nlev),adum,ja,ia,fdum,-2,flop)
5914  preprocessed=.false.
5915  solve=.false.
5916  eltm=0.0d0
5917  cputm=0.0d0
5918  flop=0.0d0
5919  kstat=0
5920  IF (wfo) THEN
5921  WRITE (iout,902) irank
5922  END IF
5923 !
5924  CONTINUE
5925 !
5926  RETURN
5927 900 FORMAT(i3,'*ENTERING AGMG **********************************',&
5928  '***************************')
5929 901 FORMAT(i3,'*ONE APPLICATION OF AGMG PRECONDITIONER')
5930 902 FORMAT(i3,'*LEAVING AGMG * (MEMORY RELEASED) ***************',&
5931  '***************************')
5932 903 FORMAT(i3,'*SYSTEM TOO SMALL: use direct solver')
5933 954 FORMAT(i3,'*',' Equiv. number of CG iter.:',f9.2)
5934 955 FORMAT('**** level',i2,' #call=',i6,' #cycle=',i6, &
5935  ' mean=',f7.2,' max=',i3)
5936 960 FORMAT( i3,'*',' memory used (peak):',f9.2, &
5937  ' real(',i1,') words per nnz (',f8.2,' Mb)')
5938 961 FORMAT( i3,'*',' memory still allocated:',f9.2, &
5939  ' real(',i1,') words per nnz (',f8.2,' Mb)')
5940 962 FORMAT( i3,'*',' memory used for solution:',f9.2, &
5941  ' real(',i1,') words per nnz (',f8.2,' Mb)')
5942 990 FORMAT(i3,'*GLOBAL STATISTICS for preconditioner application:')
5943 996 FORMAT(i3,'*',' Setup time (CPU): ',1pe10.2, &
5944  ' seconds')
5945 997 FORMAT(i3,'*',' Setup time (Elapsed): ',1pe10.2, &
5946  ' seconds')
5947 998 FORMAT(i3,'*',' Solution time (CPU): ',1pe10.2, &
5948  ' seconds')
5949 999 FORMAT(i3,'*',' Solution time (Elapsed): ',1pe10.2, &
5950  ' seconds')
5951 1001 FORMAT(i3,'*',' FATAL ERROR: setup not done: ijob=',i3, &
5952  ' is not allowed')
5953 1002 FORMAT(i3,'*',' FATAL ERROR: ijob=',i3, &
5954  ' (i.e. >= 100: work with transpose) is not allowed in the || case')
5955  END SUBROUTINE dagmg
5956 !-----------------------------------------------------------------------
5957 !!!!!!!!!!!!!!!!!!! END of MAIN DRIVER
5958 !-----------------------------------------------------------------------
5959 !------------------ END of source file ---------------------------------
double dt
time step, s
double sig
correlated to variance of initial distribution