Actual source code: math2opusutils.cu

  1: #include <petsc/private/matimpl.h>
  2: #include <petsc/private/vecimpl.h>
  3: #include <petscsf.h>
  4: #if defined(PETSC_HAVE_CUDA)
  5: #include <thrust/for_each.h>
  6: #include <thrust/device_vector.h>
  7: #include <thrust/execution_policy.h>
  8: #endif

 10: PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
 11: {
 12:   PetscSF           rankssf;
 13:   const PetscSFNode *iremote;
 14:   PetscSFNode       *viremote,*rremotes;
 15:   const PetscInt    *ilocal;
 16:   PetscInt          *vilocal = NULL,*ldrs;
 17:   const PetscMPIInt *ranks;
 18:   PetscMPIInt       *sranks;
 19:   PetscInt          nranks,nr,nl,vnr,vnl,i,v,j,maxl;
 20:   MPI_Comm          comm;
 21:   PetscErrorCode    ierr;

 27:   if (nv == 1) {
 28:     PetscObjectReference((PetscObject)sf);
 29:     *vsf = sf;
 30:     return(0);
 31:   }
 32:   PetscObjectGetComm((PetscObject)sf,&comm);
 33:   PetscSFGetGraph(sf,&nr,&nl,&ilocal,&iremote);
 34:   PetscSFGetLeafRange(sf,NULL,&maxl);
 35:   maxl += 1;
 36:   if (ldl == PETSC_DECIDE) ldl = maxl;
 37:   if (ldr == PETSC_DECIDE) ldr = nr;
 38:   if (ldr < nr) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid leading dimension %D < %D",ldr,nr);
 39:   if (ldl < maxl) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid leading dimension %D < %D",ldl,maxl);
 40:   vnr  = nr*nv;
 41:   vnl  = nl*nv;
 42:   PetscMalloc1(vnl,&viremote);
 43:   if (ilocal) {
 44:     PetscMalloc1(vnl,&vilocal);
 45:   }

 47:   /* TODO: Should this special SF be available, e.g.
 48:      PetscSFGetRanksSF or similar? */
 49:   PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
 50:   PetscMalloc1(nranks,&sranks);
 51:   PetscArraycpy(sranks,ranks,nranks);
 52:   PetscSortMPIInt(nranks,sranks);
 53:   PetscMalloc1(nranks,&rremotes);
 54:   for (i=0;i<nranks;i++) {
 55:     rremotes[i].rank  = sranks[i];
 56:     rremotes[i].index = 0;
 57:   }
 58:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&rankssf);
 59:   PetscSFSetGraph(rankssf,1,nranks,NULL,PETSC_OWN_POINTER,rremotes,PETSC_OWN_POINTER);
 60:   PetscMalloc1(nranks,&ldrs);
 61:   PetscSFBcastBegin(rankssf,MPIU_INT,&ldr,ldrs,MPI_REPLACE);
 62:   PetscSFBcastEnd(rankssf,MPIU_INT,&ldr,ldrs,MPI_REPLACE);
 63:   PetscSFDestroy(&rankssf);

 65:   j = -1;
 66:   for (i=0;i<nl;i++) {
 67:     const PetscInt r  = iremote[i].rank;
 68:     const PetscInt ii = iremote[i].index;

 70:     if (j < 0 || sranks[j] != r) {
 71:       PetscFindMPIInt(r,nranks,sranks,&j);
 72:     }
 73:     if (j < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unable to locate neighbor rank %D",r);
 74:     for (v=0;v<nv;v++) {
 75:       viremote[v*nl + i].rank  = r;
 76:       viremote[v*nl + i].index = v*ldrs[j] + ii;
 77:       if (ilocal) vilocal[v*nl + i] = v*ldl + ilocal[i];
 78:     }
 79:   }
 80:   PetscFree(sranks);
 81:   PetscFree(ldrs);
 82:   PetscSFCreate(comm,vsf);
 83:   PetscSFSetGraph(*vsf,vnr,vnl,vilocal,PETSC_OWN_POINTER,viremote,PETSC_OWN_POINTER);
 84:   return(0);
 85: }

 87: #if defined(PETSC_HAVE_CUDA)
 88: struct SignVector_Functor
 89: {
 90:     const PetscScalar *v;
 91:     PetscScalar *s;
 92:     SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) {}

 94:     __host__ __device__ void operator()(PetscInt i)
 95:     {
 96:         s[i] = (v[i] < 0 ? -1 : 1);
 97:     }
 98: };
 99: #endif

101: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
102: {
103:   const PetscScalar *av;
104:   PetscScalar       *as;
105:   PetscInt          i,n;
106:   PetscErrorCode    ierr;
107: #if defined(PETSC_HAVE_CUDA)
108:   PetscBool         viscuda,siscuda;
109: #endif

114:   VecGetLocalSize(s,&n);
115:   VecGetLocalSize(v,&i);
116:   if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Invalid local sizes %D != %D",i,n);
117: #if defined(PETSC_HAVE_CUDA)
118:   PetscObjectTypeCompareAny((PetscObject)v,&viscuda,VECSEQCUDA,VECMPICUDA,"");
119:   PetscObjectTypeCompareAny((PetscObject)s,&siscuda,VECSEQCUDA,VECMPICUDA,"");
120:   viscuda = (PetscBool)(viscuda && !v->boundtocpu);
121:   siscuda = (PetscBool)(siscuda && !s->boundtocpu);
122:   if (viscuda && siscuda) {
123:     VecCUDAGetArrayRead(v,&av);
124:     VecCUDAGetArrayWrite(s,&as);
125:     SignVector_Functor sign_vector(av, as);
126:     thrust::for_each(thrust::device,thrust::counting_iterator<PetscInt>(0),
127:                      thrust::counting_iterator<PetscInt>(n), sign_vector);
128:     VecCUDARestoreArrayWrite(s,&as);
129:     VecCUDARestoreArrayRead(v,&av);
130:   } else
131: #endif
132:   {
133:     VecGetArrayRead(v,&av);
134:     VecGetArrayWrite(s,&as);
135:     for (i=0;i<n;i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
136:     VecRestoreArrayWrite(s,&as);
137:     VecRestoreArrayRead(v,&av);
138:   }
139:   return(0);
140: }

142: #if defined(PETSC_HAVE_CUDA)
143: struct StandardBasis_Functor
144: {
145:     PetscScalar *v;
146:     PetscInt j;

148:     StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) {}
149:     __host__ __device__ void operator()(PetscInt i)
150:     {
151:         v[i] = (i == j ? 1 : 0);
152:     }
153: };
154: #endif

156: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
157: {
158: #if defined(PETSC_HAVE_CUDA)
159:   PetscBool      iscuda;
160: #endif
161:   PetscInt       st,en;

165:   VecGetOwnershipRange(x,&st,&en);
166: #if defined(PETSC_HAVE_CUDA)
167:   PetscObjectTypeCompareAny((PetscObject)x,&iscuda,VECSEQCUDA,VECMPICUDA,"");
168:   iscuda = (PetscBool)(iscuda && !x->boundtocpu);
169:   if (iscuda) {
170:     PetscScalar *ax;
171:     VecCUDAGetArrayWrite(x,&ax);
172:     StandardBasis_Functor delta(ax, i-st);
173:     thrust::for_each(thrust::device,thrust::counting_iterator<PetscInt>(0),
174:                      thrust::counting_iterator<PetscInt>(en-st), delta);
175:     VecCUDARestoreArrayWrite(x,&ax);
176:   } else
177: #endif
178:   {
179:     VecSet(x,0.);
180:     if (st <= i && i < en) {
181:       VecSetValue(x,i,1.0,INSERT_VALUES);
182:     }
183:     VecAssemblyBegin(x);
184:     VecAssemblyEnd(x);
185:   }
186:   return(0);
187: }

189: /* these are approximate norms */
190: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
191:    NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
192: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal* n)
193: {
194:   Vec            x,y,w,z;
195:   PetscReal      normz,adot;
196:   PetscScalar    dot;
197:   PetscInt       i,j,N,jold = -1;
199:   PetscBool      boundtocpu = PETSC_TRUE;

202: #if defined(PETSC_HAVE_DEVICE)
203:   boundtocpu = A->boundtocpu;
204: #endif
205:   switch (normtype) {
206:   case NORM_INFINITY:
207:   case NORM_1:
208:     if (normsamples < 0) normsamples = 10; /* pure guess */
209:     if (normtype == NORM_INFINITY) {
210:       Mat B;
211:       MatCreateTranspose(A,&B);
212:       A = B;
213:     } else {
214:       PetscObjectReference((PetscObject)A);
215:     }
216:     MatCreateVecs(A,&x,&y);
217:     MatCreateVecs(A,&z,&w);
218:     VecBindToCPU(x,boundtocpu);
219:     VecBindToCPU(y,boundtocpu);
220:     VecBindToCPU(z,boundtocpu);
221:     VecBindToCPU(w,boundtocpu);
222:     VecGetSize(x,&N);
223:     VecSet(x,1./N);
224:     *n   = 0.0;
225:     for (i = 0; i < normsamples; i++) {
226:       MatMult(A,x,y);
227:       VecSign(y,w);
228:       MatMultTranspose(A,w,z);
229:       VecNorm(z,NORM_INFINITY,&normz);
230:       VecDot(x,z,&dot);
231:       adot = PetscAbsScalar(dot);
232:       PetscInfo4(A,"%s norm it %D -> (%g %g)\n",NormTypes[normtype],i,(double)normz,(double)adot);
233:       if (normz <= adot && i > 0) {
234:         VecNorm(y,NORM_1,n);
235:         break;
236:       }
237:       VecMax(z,&j,&normz);
238:       if (j == jold) {
239:         VecNorm(y,NORM_1,n);
240:         PetscInfo2(A,"%s norm it %D -> breakdown (j==jold)\n",NormTypes[normtype],i);
241:         break;
242:       }
243:       jold = j;
244:       VecSetDelta(x,j);
245:     }
246:     MatDestroy(&A);
247:     VecDestroy(&x);
248:     VecDestroy(&w);
249:     VecDestroy(&y);
250:     VecDestroy(&z);
251:     break;
252:   case NORM_2:
253:     if (normsamples < 0) normsamples = 20; /* pure guess */
254:     MatCreateVecs(A,&x,&y);
255:     MatCreateVecs(A,&z,NULL);
256:     VecBindToCPU(x,boundtocpu);
257:     VecBindToCPU(y,boundtocpu);
258:     VecBindToCPU(z,boundtocpu);
259:     VecSetRandom(x,NULL);
260:     VecNormalize(x,NULL);
261:     *n   = 0.0;
262:     for (i = 0; i < normsamples; i++) {
263:       MatMult(A,x,y);
264:       VecNormalize(y,n);
265:       MatMultTranspose(A,y,z);
266:       VecNorm(z,NORM_2,&normz);
267:       VecDot(x,z,&dot);
268:       adot = PetscAbsScalar(dot);
269:       PetscInfo5(A,"%s norm it %D -> %g (%g %g)\n",NormTypes[normtype],i,(double)*n,(double)normz,(double)adot);
270:       if (normz <= adot) break;
271:       if (i < normsamples - 1) {
272:         Vec t;

274:         VecNormalize(z,NULL);
275:         t = x;
276:         x = z;
277:         z = t;
278:       }
279:     }
280:     VecDestroy(&x);
281:     VecDestroy(&y);
282:     VecDestroy(&z);
283:     break;
284:   default:
285:     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"%s norm not supported",NormTypes[normtype]);
286:   }
287:   PetscInfo3(A,"%s norm %g computed in %D iterations\n",NormTypes[normtype],(double)*n,i);
288:   return(0);
289: }