[974] | 1 | #include <math.h> |
---|
| 2 | |
---|
| 3 | enum { batchsize=8 }; |
---|
| 4 | static double Ax[batchsize], Ay[batchsize], Az[batchsize]; |
---|
| 5 | static double Bx[batchsize], By[batchsize], Bz[batchsize]; |
---|
| 6 | static double Cx[batchsize], Cy[batchsize], Cz[batchsize]; |
---|
| 7 | static double Dx[batchsize], Dy[batchsize], Dz[batchsize]; |
---|
| 8 | |
---|
| 9 | /* |
---|
| 10 | |
---|
| 11 | def norm2(a): return a[0]*a[0]+a[1]*a[1]+a[2]*a[2] |
---|
| 12 | def norm(a): return np.sqrt(a[0]*a[0]+a[1]*a[1]+a[2]*a[2]) |
---|
| 13 | |
---|
| 14 | def cross_jit(a,b, c): |
---|
| 15 | c[0]=a[1]*b[2]-b[1]*a[2] |
---|
| 16 | c[1]=a[2]*b[0]-b[2]*a[0] |
---|
| 17 | c[2]=a[0]*b[1]-b[0]*a[1] |
---|
| 18 | |
---|
| 19 | def circum(A,B,C): |
---|
| 20 | # see Dubos et al., GMD 2015, appendix |
---|
| 21 | # the computation is stable, and valid whether a,b,c is clockwise or not |
---|
| 22 | b,c = B-A, C-A |
---|
| 23 | p1,p2 = cross(b,c), norm2(c)*b-norm2(b)*c |
---|
| 24 | p = A+.5*cross(p1,p2)/norm2(p1) |
---|
| 25 | return p/norm(p) |
---|
| 26 | */ |
---|
| 27 | |
---|
| 28 | static inline void circum_main(int N) |
---|
| 29 | { |
---|
| 30 | // Any decent compiler should vectorize this |
---|
| 31 | for(int i=0 ; i<N ; i++) |
---|
| 32 | { |
---|
| 33 | // b,c = B-A, C-A |
---|
| 34 | double bx,by,bz,cx,cy,cz; |
---|
| 35 | bx=Bx[i]-Ax[i]; |
---|
| 36 | by=By[i]-Ay[i]; |
---|
| 37 | bz=Bz[i]-Az[i]; |
---|
| 38 | cx=Cx[i]-Ax[i]; |
---|
| 39 | cy=Cy[i]-Ay[i]; |
---|
| 40 | cz=Cz[i]-Az[i]; |
---|
| 41 | // p = cross(b,c) |
---|
| 42 | double px,py,pz; |
---|
| 43 | px = by*cz-bz*cy; |
---|
| 44 | py = bz*cx-bx*cz; |
---|
| 45 | pz = bx*cy-by*cx; |
---|
| 46 | // q = norm2(c)*b-norm2(b)*c |
---|
| 47 | double b2, c2, qx,qy,qz ; |
---|
| 48 | b2 = bx*bx+by*by+bz*bz; |
---|
| 49 | c2 = cx*cx+cy*cy+cz*cz; |
---|
| 50 | qx = c2*bx-b2*cx; |
---|
| 51 | qy = c2*by-b2*cy; |
---|
| 52 | qz = c2*bz-b2*cz; |
---|
| 53 | // r = A+.5*cross(p,q)/norm2(p) |
---|
| 54 | double p2,rx,ry,rz; |
---|
| 55 | p2 = .5/(px*px+py*py+pz*pz); |
---|
| 56 | rx = Ax[i]+p2*(py*qz-pz*qy); |
---|
| 57 | ry = Ay[i]+p2*(pz*qx-px*qz); |
---|
| 58 | rz = Az[i]+p2*(px*qy-py*qx); |
---|
| 59 | // circ[i]=r/norm(r) |
---|
| 60 | double r=1./sqrt(rx*rx+ry*ry+rz*rz); |
---|
| 61 | Dx[i] = r*rx; |
---|
| 62 | Dy[i] = r*ry; |
---|
| 63 | Dz[i] = r*rz; |
---|
| 64 | } |
---|
| 65 | } |
---|
| 66 | |
---|
| 67 | static inline void circum_copyin(int N, long *ind, double *points) |
---|
| 68 | { |
---|
| 69 | for(int n=0 ; n<N ; n++) |
---|
| 70 | { |
---|
| 71 | int i; const double *ptr; |
---|
| 72 | i=*ind++; ptr=points+3*i; |
---|
| 73 | Ax[n]=*ptr++; |
---|
| 74 | Ay[n]=*ptr++; |
---|
| 75 | Az[n]=*ptr++; |
---|
| 76 | i=*ind++; ptr=points+3*i; |
---|
| 77 | Bx[n]=*ptr++; |
---|
| 78 | By[n]=*ptr++; |
---|
| 79 | Bz[n]=*ptr++; |
---|
| 80 | i=*ind++; ptr=points+3*i; |
---|
| 81 | Cx[n]=*ptr++; |
---|
| 82 | Cy[n]=*ptr++; |
---|
| 83 | Cz[n]=*ptr++; |
---|
| 84 | } |
---|
| 85 | } |
---|
| 86 | |
---|
| 87 | static inline void circum_copyout(int N, double *circ) |
---|
| 88 | { |
---|
| 89 | for(int i=0 ; i<N ; i++) |
---|
| 90 | { |
---|
| 91 | *circ++ = Dx[i]; |
---|
| 92 | *circ++ = Dy[i]; |
---|
| 93 | *circ++ = Dz[i]; |
---|
| 94 | } |
---|
| 95 | } |
---|
| 96 | |
---|
| 97 | static inline void circum_batch(double *points, double *circ) |
---|
| 98 | { |
---|
| 99 | for(int i=0 ; i<batchsize ; i++) |
---|
| 100 | { |
---|
| 101 | Ax[i]=*points++; |
---|
| 102 | Ay[i]=*points++; |
---|
| 103 | Az[i]=*points++; |
---|
| 104 | Bx[i]=*points++; |
---|
| 105 | By[i]=*points++; |
---|
| 106 | Bz[i]=*points++; |
---|
| 107 | Cx[i]=*points++; |
---|
| 108 | Cy[i]=*points++; |
---|
| 109 | Cz[i]=*points++; |
---|
| 110 | } |
---|
| 111 | circum_main(batchsize); |
---|
| 112 | circum_copyout(batchsize, circ); |
---|
| 113 | } |
---|
| 114 | |
---|
| 115 | static inline void circum_fast_(int N, long *ind, double *points, double *circ) |
---|
| 116 | { |
---|
| 117 | // work by batches of size batchsize |
---|
| 118 | while(N>batchsize) |
---|
| 119 | { |
---|
| 120 | circum_copyin(batchsize, ind, points); |
---|
| 121 | circum_main(batchsize); |
---|
| 122 | circum_copyout(batchsize, circ); |
---|
| 123 | ind += 3*batchsize; |
---|
| 124 | circ += 3*batchsize; |
---|
| 125 | N-=batchsize; |
---|
| 126 | } |
---|
| 127 | // finish the job |
---|
| 128 | circum_copyin(N, ind, points); |
---|
| 129 | circum_main(N); |
---|
| 130 | circum_copyout(N, circ); |
---|
| 131 | } |
---|
| 132 | |
---|
| 133 | static inline void fast_scale_vec(double scale, double * vec) |
---|
| 134 | { |
---|
| 135 | vec[0]*=scale; |
---|
| 136 | vec[1]*=scale; |
---|
| 137 | vec[2]*=scale; |
---|
| 138 | } |
---|
| 139 | |
---|
| 140 | /* |
---|
| 141 | |
---|
| 142 | def arc(a,b): return 2.*np.arcsin(.5*norm(a-b)) |
---|
| 143 | |
---|
| 144 | def area(a,b,c): |
---|
| 145 | # http://mathworld.wolfram.com/LHuiliersTheorem.html |
---|
| 146 | a,b,c = .25*arc(b,c), .25*arc(c,a), .25*arc(a,b) |
---|
| 147 | return 4.*arctan(sqrt(tan(a+b+c)*tan(a+b-c)*tan(b+c-a)*tan(a+c-b))) |
---|
| 148 | |
---|
| 149 | def barycenters_fast(regions_size, vert, points, vertices, weight): |
---|
| 150 | N = points.shape[0] |
---|
| 151 | bary = np.zeros((N,3)) |
---|
| 152 | i=0 # running index in vert1, vert2 |
---|
| 153 | for n in range(N): |
---|
| 154 | point = points[n,:] |
---|
| 155 | b = 0. |
---|
| 156 | for j in range(regions_size[n]): |
---|
| 157 | v1 = vertices[vert[i],:] |
---|
| 158 | i = i+1 |
---|
| 159 | v2 = vertices[vert[i],:] |
---|
| 160 | i = i+1 |
---|
| 161 | dA=area(point, v1, v2) |
---|
| 162 | bb = unit_vector(point+v1+v2) |
---|
| 163 | b = b + dA*bb*weight(bb) |
---|
| 164 | bary[n,:] = unit_vector(b) |
---|
| 165 | return bary |
---|
| 166 | |
---|
| 167 | */ |
---|
| 168 | |
---|
| 169 | static inline double arc4(double dx, double dy, double dz) // 1/4 geodesic length, as used in L'Huillier's formula |
---|
| 170 | { |
---|
| 171 | double dist; |
---|
| 172 | // printf("dx dy dz = %f %f %f\n", dx,dy,dz); |
---|
| 173 | dist=dx*dx+dy*dy+dz*dz; |
---|
| 174 | // printf("dist = %f\n", dist); |
---|
| 175 | dist=sqrt(dist); |
---|
| 176 | // printf("dist = %f\n", dist); |
---|
| 177 | dist = .5*asin(.5*dist); |
---|
| 178 | // printf("dist = %f\n", dist); |
---|
| 179 | return dist; |
---|
| 180 | } |
---|
| 181 | |
---|
| 182 | |
---|
| 183 | static inline double barycenters_fast_(int N, const long *degree, const long *ind, |
---|
| 184 | const double *point, const double *w_i, |
---|
| 185 | const double *vertex, const double *w_v, |
---|
| 186 | double *bary) |
---|
| 187 | { |
---|
| 188 | double A=0; // total area |
---|
| 189 | // loop over voronoi cells |
---|
| 190 | for(int cell=0; cell<N ; cell++) |
---|
| 191 | { |
---|
| 192 | double aw,ax,ay,az; // weight and xyz coordinates of generator of voronoi cell |
---|
| 193 | int deg; |
---|
| 194 | double gx=0., gy=0., gz=0.; // barycenter of current voronoi cell |
---|
| 195 | aw=*w_i++; |
---|
| 196 | ax=*point++; |
---|
| 197 | ay=*point++; |
---|
| 198 | az=*point++; |
---|
| 199 | deg=*degree++; |
---|
| 200 | // if(cell<10) printf("deg[%d]=%d\n", cell, deg); |
---|
| 201 | |
---|
| 202 | for(int edge=0; edge<deg ; edge++) // loop over edges of voronoi cell |
---|
| 203 | { |
---|
| 204 | int i,j; const double *ptr; |
---|
| 205 | double bw,bx,by,bz, cw,cx,cy,cz; |
---|
| 206 | i=*ind++; ptr=vertex+3*i; // first vertex |
---|
| 207 | bw=w_v[i]; |
---|
| 208 | bx=*ptr++; |
---|
| 209 | by=*ptr++; |
---|
| 210 | bz=*ptr++; |
---|
| 211 | j=*ind++; ptr=vertex+3*j; // second vertex |
---|
| 212 | cw=w_v[j]; |
---|
| 213 | cx=*ptr++; |
---|
| 214 | cy=*ptr++; |
---|
| 215 | cz=*ptr++; |
---|
| 216 | // printf("cell, vertex1, vertex2 : %d %d %d\n", cell, i, j); |
---|
| 217 | // printf("cell x y z : %f %f %f\n", ax,ay,az); |
---|
| 218 | // printf("vertex1 x y z : %f %f %f\n", bx,by,bz); |
---|
| 219 | // printf("vertex2 x y z : %f %f %f\n", cx,cy,cz); |
---|
| 220 | |
---|
| 221 | double a,b,c,dA; // 1/4 geodesic distances, as needed in L'Huillier's formula |
---|
| 222 | // http://mathworld.wolfram.com/LHuiliersTheorem.html |
---|
| 223 | a=arc4(bx-cx,by-cy,bz-cz); |
---|
| 224 | b=arc4(ax-cx,ay-cy,az-cz); |
---|
| 225 | c=arc4(ax-bx,ay-by,az-bz); |
---|
| 226 | dA=4.*atan(sqrt(tan(a+b+c)*tan(a+b-c)*tan(b+c-a)*tan(a+c-b))); |
---|
| 227 | A+=dA; |
---|
| 228 | gx+=dA*(aw*ax+bw*bx+cw*cx); |
---|
| 229 | gy+=dA*(aw*ay+bw*by+cw*cy); |
---|
| 230 | gz+=dA*(aw*az+bw*bz+cw*cz); |
---|
| 231 | // printf("Triangle sides : %f %f %f\n", a,b,c); |
---|
| 232 | // printf("Local area : dA=%f\n", dA); |
---|
| 233 | } |
---|
| 234 | double g=1./sqrt(gx*gx+gy*gy+gz*gz); |
---|
| 235 | *bary++ = gx*g; |
---|
| 236 | *bary++ = gy*g; |
---|
| 237 | *bary++ = gz*g; |
---|
| 238 | } |
---|
| 239 | // printf("Total area : A=%f\n", A); |
---|
| 240 | return A; // total area should be 4*pi |
---|
| 241 | } |
---|