lamino_bp_generic.cl 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. // all kernels must process volme voxelwise
  2. // please be careful with local and global workers
  3. // we need a 3D processing (not 2D)
  4. // using tests we show max volume grosse: 1024 x 1024 x 256 or 2048 x 2048 x 64
  5. // theory: 223 MB (CL_DEVICE_MAX_MEM_ALLOC_SIZE) x 5 cards / 4 Bytes (Float) = 1024 x 1024 x 278 voxels
  6. #include <lamino-filter-def.h>
  7. kernel void
  8. lamino_bp_generic (global float *proj,
  9. global float *volume,
  10. global CLParameters *param)
  11. {
  12. const ushort vX = get_global_id(0);
  13. const ushort vY = get_global_id(1);
  14. const ushort vZ = get_global_id(2);
  15. /* const ushort vSX = get_global_size(0); */
  16. /* const ushort vSY = get_global_size(1); */
  17. /* const ushort vSZ = get_global_size(2); */
  18. /* const long int idx = (vZ * vSY * vSX) + (vY * vSX) + vX; */
  19. const long int idx = (vZ * get_global_size(1) * get_global_size(0)) +
  20. (vY * get_global_size(0)) + vX;
  21. //const int idx = (vY * vSX) + vX;
  22. const float newz = (float)vZ - param->vol_oz;
  23. /* float newz_matr02 = newz * param->mat_2 + param->proj_ox; */
  24. /* float newz_matr12 = newz * param->mat_5 + param->proj_oy; */
  25. /// prepare y info
  26. const float newy = (float)vY - param->vol_oy;
  27. /* float newy_matr01 = newy * param->mat_1 + newz_matr02; */
  28. /* float newy_matr11 = newy * param->mat_4 + newz_matr12; */
  29. /* float newy_matr01 = newy * param->mat_1 + newz * param->mat_2 + param->proj_ox; */
  30. /* float newy_matr11 = newy * param->mat_4 + newz * param->mat_5 + param->proj_oy; */
  31. /// prepare x info
  32. const float newx = (float)vX - param->vol_ox;
  33. /* float oldx = newx*param->mat_0 + newy_matr01; */
  34. const float oldx = newx * param->mat_0 + newy * param->mat_1 + newz * param->mat_2 + param->proj_ox;
  35. /* float oldy = newx * param->mat_3 + newy_matr11; */
  36. const float oldy = newx * param->mat_3 + newy * param->mat_4 + newz * param->mat_5 + param->proj_oy;
  37. /* const float yo = floor(oldy); */
  38. /* const float xo = floor(oldx); */
  39. // bilinear interpolation
  40. /* const float yf_1 = oldy - yo; */
  41. const float yf_1 = oldy - floor(oldy);
  42. const float yf_0 = 1.0f - yf_1;
  43. /* const float xf_1 = oldx - xo; */
  44. const float xf_1 = oldx - floor(oldx);
  45. const float xf_0 = 1.0f - xf_1;
  46. const int base = ((int) floor(oldx)) + ((int) floor(oldy)) * param->proj_sx;
  47. /* const int base = (int)xo + (int)yo * param->proj_sx; */
  48. float result;
  49. // TODO: check that out of proj plain
  50. result = proj[base ] * xf_0 * yf_0;
  51. result += proj[base + 1] * xf_1 * yf_0;
  52. result += proj[base + param->proj_sx ] * xf_0 * yf_1;
  53. result += proj[base + param->proj_sx + 1] * xf_1 * yf_1;
  54. volume[idx] += result;
  55. }
  56. __kernel void lamino_clean_vol(__global float *volume)
  57. {
  58. const ushort vX = get_global_id(0);
  59. const ushort vY = get_global_id(1);
  60. const ushort vZ = get_global_id(2);
  61. const ushort vSX = get_global_size(0);
  62. const ushort vSY = get_global_size(1);
  63. const ushort vSZ = get_global_size(2);
  64. const int idx = (vZ * vSY * vSX) + (vY * vSX) + vX;
  65. //const int idx = (vY * vSX) + vX;
  66. volume[idx] = 0;
  67. }
  68. __kernel void lamino_norm_vol(__global float *volume,
  69. const float factor)
  70. {
  71. const ushort vX = get_global_id(0);
  72. const ushort vY = get_global_id(1);
  73. const ushort vZ = get_global_id(2);
  74. const ushort vSX = get_global_size(0);
  75. const ushort vSY = get_global_size(1);
  76. const ushort vSZ = get_global_size(2);
  77. const int idx = (vZ * vSY * vSX) + (vY * vSX) + vX;
  78. //const int idx = (vY * vSX) + vX;
  79. float val = volume[idx] * factor;
  80. volume[idx] = val;
  81. }