lamino_bp_generic.cl 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. // all kernels must process volme voxelwise
  2. // please be careful with local and global workers
  3. // we need a 3D processing (not 2D)
  4. // using tests we show max volume grosse: 1024 x 1024 x 256 or 2048 x 2048 x 64
  5. // theory: 223 MB (CL_DEVICE_MAX_MEM_ALLOC_SIZE) x 5 cards / 4 Bytes (Float) = 1024 x 1024 x 278 voxels
  6. #include <lamino-filter-def.h>
  7. __kernel void lamino_bp_generic ( __global float *proj,
  8. __global float *volume,
  9. __global CLParameters *param)
  10. {
  11. const int vX = get_global_id(0);
  12. const int vY = get_global_id(1);
  13. const int vZ = get_global_id(2);
  14. const int vSX = get_global_size(0);
  15. const int vSY = get_global_size(1);
  16. const int vSZ = get_global_size(2);
  17. const long int idx = (vZ * vSY * vSX) + (vY * vSX) + vX;
  18. //const int idx = (vY * vSX) + vX;
  19. float newz = (float)vZ - param->vol_oz;
  20. float newz_matr02 = newz * param->mat_2 + param->proj_ox;
  21. float newz_matr12 = newz * param->mat_5 + param->proj_oy;
  22. /// prepare y info
  23. float newy = (float)vY - param->vol_oy;
  24. float newy_matr01 = newy * param->mat_1 + newz_matr02;
  25. float newy_matr11 = newy * param->mat_4 + newz_matr12;
  26. /// prepare x info
  27. float newx = (float)vX - param->vol_ox;
  28. float oldy = newx * param->mat_3 + newy_matr11;
  29. float yo = floor(oldy);
  30. float oldx = newx*param->mat_0 + newy_matr01;
  31. float xo = floor(oldx);
  32. // bilinear interpolation
  33. float yf_1 = oldy - yo;
  34. float yf_0 = 1.0f - yf_1;
  35. float xf_1 = oldx - xo;
  36. float xf_0 = 1.0f - xf_1;
  37. int base = (int)xo + (int)yo * param->proj_sx;
  38. float result;
  39. // TODO: check that out of proj plain
  40. result = proj[base ] * xf_0 * yf_0;
  41. result += proj[base + 1] * xf_1 * yf_0;
  42. result += proj[base + param->proj_sx ] * xf_0 * yf_1;
  43. result += proj[base + param->proj_sx + 1] * xf_1 * yf_1;
  44. volume[idx] += result;
  45. }
  46. __kernel void lamino_clean_vol(__global float *volume)
  47. {
  48. const int vX = get_global_id(0);
  49. const int vY = get_global_id(1);
  50. const int vZ = get_global_id(2);
  51. const int vSX = get_global_size(0);
  52. const int vSY = get_global_size(1);
  53. const int vSZ = get_global_size(2);
  54. const int idx = (vZ * vSY * vSX) + (vY * vSX) + vX;
  55. //const int idx = (vY * vSX) + vX;
  56. volume[idx] = 0;
  57. }
  58. __kernel void lamino_norm_vol(__global float *volume,
  59. const float factor)
  60. {
  61. const int vX = get_global_id(0);
  62. const int vY = get_global_id(1);
  63. const int vZ = get_global_id(2);
  64. const int vSX = get_global_size(0);
  65. const int vSY = get_global_size(1);
  66. const int vSZ = get_global_size(2);
  67. const int idx = (vZ * vSY * vSX) + (vY * vSX) + vX;
  68. //const int idx = (vY * vSX) + vX;
  69. float val = volume[idx] * factor;
  70. volume[idx] = val;
  71. }