[974] | 1 | #include <math.h> |
---|
| 2 | |
---|
| 3 | enum { batchsize=8 }; |
---|
| 4 | static double Ax[batchsize], Ay[batchsize], Az[batchsize]; |
---|
| 5 | static double Bx[batchsize], By[batchsize], Bz[batchsize]; |
---|
| 6 | static double Cx[batchsize], Cy[batchsize], Cz[batchsize]; |
---|
| 7 | static double Dx[batchsize], Dy[batchsize], Dz[batchsize]; |
---|
| 8 | |
---|
| 9 | /* |
---|
| 10 | |
---|
| 11 | def norm2(a): return a[0]*a[0]+a[1]*a[1]+a[2]*a[2] |
---|
| 12 | def norm(a): return np.sqrt(a[0]*a[0]+a[1]*a[1]+a[2]*a[2]) |
---|
| 13 | |
---|
| 14 | def cross_jit(a,b, c): |
---|
| 15 | c[0]=a[1]*b[2]-b[1]*a[2] |
---|
| 16 | c[1]=a[2]*b[0]-b[2]*a[0] |
---|
| 17 | c[2]=a[0]*b[1]-b[0]*a[1] |
---|
| 18 | |
---|
| 19 | def circum(A,B,C): |
---|
| 20 | # see Dubos et al., GMD 2015, appendix |
---|
| 21 | # the computation is stable, and valid whether a,b,c is clockwise or not |
---|
| 22 | b,c = B-A, C-A |
---|
| 23 | p1,p2 = cross(b,c), norm2(c)*b-norm2(b)*c |
---|
| 24 | p = A+.5*cross(p1,p2)/norm2(p1) |
---|
| 25 | return p/norm(p) |
---|
| 26 | */ |
---|
| 27 | |
---|
| 28 | static inline void circum_main(int N) |
---|
| 29 | { |
---|
[975] | 30 | int i; |
---|
[974] | 31 | // Any decent compiler should vectorize this |
---|
[975] | 32 | for(i=0 ; i<N ; i++) |
---|
[974] | 33 | { |
---|
| 34 | // b,c = B-A, C-A |
---|
| 35 | double bx,by,bz,cx,cy,cz; |
---|
| 36 | bx=Bx[i]-Ax[i]; |
---|
| 37 | by=By[i]-Ay[i]; |
---|
| 38 | bz=Bz[i]-Az[i]; |
---|
| 39 | cx=Cx[i]-Ax[i]; |
---|
| 40 | cy=Cy[i]-Ay[i]; |
---|
| 41 | cz=Cz[i]-Az[i]; |
---|
| 42 | // p = cross(b,c) |
---|
| 43 | double px,py,pz; |
---|
| 44 | px = by*cz-bz*cy; |
---|
| 45 | py = bz*cx-bx*cz; |
---|
| 46 | pz = bx*cy-by*cx; |
---|
| 47 | // q = norm2(c)*b-norm2(b)*c |
---|
| 48 | double b2, c2, qx,qy,qz ; |
---|
| 49 | b2 = bx*bx+by*by+bz*bz; |
---|
| 50 | c2 = cx*cx+cy*cy+cz*cz; |
---|
| 51 | qx = c2*bx-b2*cx; |
---|
| 52 | qy = c2*by-b2*cy; |
---|
| 53 | qz = c2*bz-b2*cz; |
---|
| 54 | // r = A+.5*cross(p,q)/norm2(p) |
---|
| 55 | double p2,rx,ry,rz; |
---|
| 56 | p2 = .5/(px*px+py*py+pz*pz); |
---|
| 57 | rx = Ax[i]+p2*(py*qz-pz*qy); |
---|
| 58 | ry = Ay[i]+p2*(pz*qx-px*qz); |
---|
| 59 | rz = Az[i]+p2*(px*qy-py*qx); |
---|
| 60 | // circ[i]=r/norm(r) |
---|
| 61 | double r=1./sqrt(rx*rx+ry*ry+rz*rz); |
---|
| 62 | Dx[i] = r*rx; |
---|
| 63 | Dy[i] = r*ry; |
---|
| 64 | Dz[i] = r*rz; |
---|
| 65 | } |
---|
| 66 | } |
---|
| 67 | |
---|
| 68 | static inline void circum_copyin(int N, long *ind, double *points) |
---|
| 69 | { |
---|
[975] | 70 | int n; |
---|
| 71 | for(n=0 ; n<N ; n++) |
---|
[974] | 72 | { |
---|
| 73 | int i; const double *ptr; |
---|
| 74 | i=*ind++; ptr=points+3*i; |
---|
| 75 | Ax[n]=*ptr++; |
---|
| 76 | Ay[n]=*ptr++; |
---|
| 77 | Az[n]=*ptr++; |
---|
| 78 | i=*ind++; ptr=points+3*i; |
---|
| 79 | Bx[n]=*ptr++; |
---|
| 80 | By[n]=*ptr++; |
---|
| 81 | Bz[n]=*ptr++; |
---|
| 82 | i=*ind++; ptr=points+3*i; |
---|
| 83 | Cx[n]=*ptr++; |
---|
| 84 | Cy[n]=*ptr++; |
---|
| 85 | Cz[n]=*ptr++; |
---|
| 86 | } |
---|
| 87 | } |
---|
| 88 | |
---|
| 89 | static inline void circum_copyout(int N, double *circ) |
---|
| 90 | { |
---|
[975] | 91 | int i; |
---|
| 92 | for(i=0 ; i<N ; i++) |
---|
[974] | 93 | { |
---|
| 94 | *circ++ = Dx[i]; |
---|
| 95 | *circ++ = Dy[i]; |
---|
| 96 | *circ++ = Dz[i]; |
---|
| 97 | } |
---|
| 98 | } |
---|
| 99 | |
---|
| 100 | static inline void circum_batch(double *points, double *circ) |
---|
| 101 | { |
---|
[975] | 102 | int i; |
---|
| 103 | for(i=0 ; i<batchsize ; i++) |
---|
[974] | 104 | { |
---|
| 105 | Ax[i]=*points++; |
---|
| 106 | Ay[i]=*points++; |
---|
| 107 | Az[i]=*points++; |
---|
| 108 | Bx[i]=*points++; |
---|
| 109 | By[i]=*points++; |
---|
| 110 | Bz[i]=*points++; |
---|
| 111 | Cx[i]=*points++; |
---|
| 112 | Cy[i]=*points++; |
---|
| 113 | Cz[i]=*points++; |
---|
| 114 | } |
---|
| 115 | circum_main(batchsize); |
---|
| 116 | circum_copyout(batchsize, circ); |
---|
| 117 | } |
---|
| 118 | |
---|
| 119 | static inline void circum_fast_(int N, long *ind, double *points, double *circ) |
---|
| 120 | { |
---|
| 121 | // work by batches of size batchsize |
---|
| 122 | while(N>batchsize) |
---|
| 123 | { |
---|
| 124 | circum_copyin(batchsize, ind, points); |
---|
| 125 | circum_main(batchsize); |
---|
| 126 | circum_copyout(batchsize, circ); |
---|
| 127 | ind += 3*batchsize; |
---|
| 128 | circ += 3*batchsize; |
---|
| 129 | N-=batchsize; |
---|
| 130 | } |
---|
| 131 | // finish the job |
---|
| 132 | circum_copyin(N, ind, points); |
---|
| 133 | circum_main(N); |
---|
| 134 | circum_copyout(N, circ); |
---|
| 135 | } |
---|
| 136 | |
---|
| 137 | static inline void fast_scale_vec(double scale, double * vec) |
---|
| 138 | { |
---|
| 139 | vec[0]*=scale; |
---|
| 140 | vec[1]*=scale; |
---|
| 141 | vec[2]*=scale; |
---|
| 142 | } |
---|
| 143 | |
---|
| 144 | /* |
---|
| 145 | |
---|
| 146 | def arc(a,b): return 2.*np.arcsin(.5*norm(a-b)) |
---|
| 147 | |
---|
| 148 | def area(a,b,c): |
---|
| 149 | # http://mathworld.wolfram.com/LHuiliersTheorem.html |
---|
| 150 | a,b,c = .25*arc(b,c), .25*arc(c,a), .25*arc(a,b) |
---|
| 151 | return 4.*arctan(sqrt(tan(a+b+c)*tan(a+b-c)*tan(b+c-a)*tan(a+c-b))) |
---|
| 152 | |
---|
| 153 | def barycenters_fast(regions_size, vert, points, vertices, weight): |
---|
| 154 | N = points.shape[0] |
---|
| 155 | bary = np.zeros((N,3)) |
---|
| 156 | i=0 # running index in vert1, vert2 |
---|
| 157 | for n in range(N): |
---|
| 158 | point = points[n,:] |
---|
| 159 | b = 0. |
---|
| 160 | for j in range(regions_size[n]): |
---|
| 161 | v1 = vertices[vert[i],:] |
---|
| 162 | i = i+1 |
---|
| 163 | v2 = vertices[vert[i],:] |
---|
| 164 | i = i+1 |
---|
| 165 | dA=area(point, v1, v2) |
---|
| 166 | bb = unit_vector(point+v1+v2) |
---|
| 167 | b = b + dA*bb*weight(bb) |
---|
| 168 | bary[n,:] = unit_vector(b) |
---|
| 169 | return bary |
---|
| 170 | |
---|
| 171 | */ |
---|
| 172 | |
---|
| 173 | static inline double arc4(double dx, double dy, double dz) // 1/4 geodesic length, as used in L'Huillier's formula |
---|
| 174 | { |
---|
| 175 | double dist; |
---|
| 176 | // printf("dx dy dz = %f %f %f\n", dx,dy,dz); |
---|
| 177 | dist=dx*dx+dy*dy+dz*dz; |
---|
| 178 | // printf("dist = %f\n", dist); |
---|
| 179 | dist=sqrt(dist); |
---|
| 180 | // printf("dist = %f\n", dist); |
---|
| 181 | dist = .5*asin(.5*dist); |
---|
| 182 | // printf("dist = %f\n", dist); |
---|
| 183 | return dist; |
---|
| 184 | } |
---|
| 185 | |
---|
| 186 | |
---|
| 187 | static inline double barycenters_fast_(int N, const long *degree, const long *ind, |
---|
| 188 | const double *point, const double *w_i, |
---|
| 189 | const double *vertex, const double *w_v, |
---|
| 190 | double *bary) |
---|
| 191 | { |
---|
[975] | 192 | int cell; |
---|
[974] | 193 | double A=0; // total area |
---|
| 194 | // loop over voronoi cells |
---|
[975] | 195 | for(cell=0; cell<N ; cell++) |
---|
[974] | 196 | { |
---|
| 197 | double aw,ax,ay,az; // weight and xyz coordinates of generator of voronoi cell |
---|
[975] | 198 | int deg, edge; |
---|
[974] | 199 | double gx=0., gy=0., gz=0.; // barycenter of current voronoi cell |
---|
| 200 | aw=*w_i++; |
---|
| 201 | ax=*point++; |
---|
| 202 | ay=*point++; |
---|
| 203 | az=*point++; |
---|
| 204 | deg=*degree++; |
---|
| 205 | // if(cell<10) printf("deg[%d]=%d\n", cell, deg); |
---|
| 206 | |
---|
[975] | 207 | for(edge=0; edge<deg ; edge++) // loop over edges of voronoi cell |
---|
[974] | 208 | { |
---|
| 209 | int i,j; const double *ptr; |
---|
| 210 | double bw,bx,by,bz, cw,cx,cy,cz; |
---|
| 211 | i=*ind++; ptr=vertex+3*i; // first vertex |
---|
| 212 | bw=w_v[i]; |
---|
| 213 | bx=*ptr++; |
---|
| 214 | by=*ptr++; |
---|
| 215 | bz=*ptr++; |
---|
| 216 | j=*ind++; ptr=vertex+3*j; // second vertex |
---|
| 217 | cw=w_v[j]; |
---|
| 218 | cx=*ptr++; |
---|
| 219 | cy=*ptr++; |
---|
| 220 | cz=*ptr++; |
---|
| 221 | // printf("cell, vertex1, vertex2 : %d %d %d\n", cell, i, j); |
---|
| 222 | // printf("cell x y z : %f %f %f\n", ax,ay,az); |
---|
| 223 | // printf("vertex1 x y z : %f %f %f\n", bx,by,bz); |
---|
| 224 | // printf("vertex2 x y z : %f %f %f\n", cx,cy,cz); |
---|
| 225 | |
---|
| 226 | double a,b,c,dA; // 1/4 geodesic distances, as needed in L'Huillier's formula |
---|
| 227 | // http://mathworld.wolfram.com/LHuiliersTheorem.html |
---|
| 228 | a=arc4(bx-cx,by-cy,bz-cz); |
---|
| 229 | b=arc4(ax-cx,ay-cy,az-cz); |
---|
| 230 | c=arc4(ax-bx,ay-by,az-bz); |
---|
| 231 | dA=4.*atan(sqrt(tan(a+b+c)*tan(a+b-c)*tan(b+c-a)*tan(a+c-b))); |
---|
| 232 | A+=dA; |
---|
| 233 | gx+=dA*(aw*ax+bw*bx+cw*cx); |
---|
| 234 | gy+=dA*(aw*ay+bw*by+cw*cy); |
---|
| 235 | gz+=dA*(aw*az+bw*bz+cw*cz); |
---|
| 236 | // printf("Triangle sides : %f %f %f\n", a,b,c); |
---|
| 237 | // printf("Local area : dA=%f\n", dA); |
---|
| 238 | } |
---|
| 239 | double g=1./sqrt(gx*gx+gy*gy+gz*gz); |
---|
| 240 | *bary++ = gx*g; |
---|
| 241 | *bary++ = gy*g; |
---|
| 242 | *bary++ = gz*g; |
---|
| 243 | } |
---|
| 244 | // printf("Total area : A=%f\n", A); |
---|
| 245 | return A; // total area should be 4*pi |
---|
| 246 | } |
---|