diff --git a/include/cvortex/libcvtx.h b/include/cvortex/libcvtx.h index 72aa139..bf85b52 100644 --- a/include/cvortex/libcvtx.h +++ b/include/cvortex/libcvtx.h @@ -48,15 +48,38 @@ typedef struct { float strength; /* Vort per unit length */ } cvtx_F3D; +/* A Vortex particle/filament in 2D */ typedef struct { - float(*g_fn)(float rho); - float(*zeta_fn)(float rho); - void(*combined_fn)(float rho, float* g, float* zeta); - float(*eta_fn)(float rho); + bsv_V2f coord; + float vorticity; + float area; +} cvtx_P2D; + +/* Vortex particle regularisation functions + Naming is following that of Winckelmans + - g(rho): normally used in induced vel + (NULL for unsupported) + - zeta(rho): used with g(rho) in induced dvort + (NULL for unsupported) + - combined(rho): combines g and zeta for perf. + (NULL for unsupported) + - eta(rho): used in particle strength exchange + (NULL for unsupported) + - cl_kernel_name_ext: identifies opencl kernel variant to run. + (fall back to OpenMP) + - 2D and 3D variants +*/ +typedef struct { + float(*g_3D)(float rho); + float(*g_2D)(float rho); + float(*zeta_3D)(float rho); + void(*combined_3D)(float rho, float* g, float* zeta); + float(*eta_3D)(float rho); + float(*eta_2D)(float rho); char cl_kernel_name_ext[32]; } cvtx_VortFunc; -/* cvtx_libary controls */ +/* cvtx libary accelerator controls */ CVTX_EXPORT void cvtx_initialise(); CVTX_EXPORT void cvtx_finalise(); CVTX_EXPORT int cvtx_num_accelerators(); @@ -66,7 +89,7 @@ CVTX_EXPORT int cvtx_accelerator_enabled(int accelerator_id); CVTX_EXPORT void cvtx_accelerator_enable(int accelerator_id); CVTX_EXPORT void cvtx_accelerator_disable(int accelerator_id); -/* cvtx_P3D functions */ +/* cvtx_P3D 3D vortex particle functions */ CVTX_EXPORT bsv_V3f cvtx_P3D_S2S_vel( const cvtx_P3D *self, const bsv_V3f mes_point, @@ -142,8 +165,7 @@ CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_winckelmans(void); CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_planetary(void); CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_gaussian(void); -/* cvtx_straight vortex filament functions */ - +/* cvtx_F3D straight vortex filament functions */ CVTX_EXPORT bsv_V3f cvtx_F3D_S2S_vel( const cvtx_F3D *self, const bsv_V3f mes_point); @@ -184,4 +206,27 @@ CVTX_EXPORT void cvtx_F3D_inf_mtrx( const int num_mes, float *result_matrix); +/* cvtx_P2D vortex particle 2D functions */ +CVTX_EXPORT bsv_V2f cvtx_P2D_S2S_vel( + const cvtx_P2D *self, + const bsv_V2f mes_point, + const cvtx_VortFunc *kernel, + float regularisation_radius); + +CVTX_EXPORT bsv_V2f cvtx_P2D_M2S_vel( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f mes_point, + const cvtx_VortFunc *kernel, + float regularisation_radius); + +CVTX_EXPORT void cvtx_P2D_M2M_vel( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f *mes_start, + const int num_mes, + bsv_V2f *result_array, + const cvtx_VortFunc *kernel, + float regularisation_radius); + #endif /* CVTX_LIBCVTX_H */ diff --git a/src/P2D.c b/src/P2D.c new file mode 100644 index 0000000..d7116bc --- /dev/null +++ b/src/P2D.c @@ -0,0 +1,138 @@ +#include "libcvtx.h" +/*============================================================================ +P2D.c + +Vortex particle in 2D with CPU based code. + +Copyright(c) 2019 HJA Bird + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +============================================================================*/ + +#include +#include +#include + +#ifdef CVTX_USING_OPENCL +# include "ocl_P2D.h" +#endif + +/* The induced velocity for a particle excluding the constant +coefficient 1 / 4pi */ +inline bsv_V2f P2D_vel_inner( + const cvtx_P2D * self, + const bsv_V2f mes_point, + const cvtx_VortFunc * kernel, + float recip_reg_rad) +{ + bsv_V2f rad, ret; + float radd, rho, g; + if (bsv_V2f_isequal(self->coord, mes_point)) { + ret = bsv_V2f_zero(); + } + else { + rad = bsv_V2f_minus(mes_point, self->coord); + radd = bsv_V2f_abs(rad); + rho = radd * recip_reg_rad; + g = -kernel->g_2D(rho); + ret.x[0] = rad.x[1] * self->vorticity * g / (radd * radd); + ret.x[1] = -rad.x[0] * self->vorticity * g / (radd * radd); + } + return ret; +} + +CVTX_EXPORT bsv_V2f cvtx_P2D_S2S_vel( + const cvtx_P2D * self, + const bsv_V2f mes_point, + const cvtx_VortFunc * kernel, + float regularisation_radius) +{ + bsv_V2f ret; + ret = P2D_vel_inner(self, mes_point, kernel, + 1.f / fabsf(regularisation_radius)); + return bsv_V2f_mult(ret, -1.f / (2.f * acosf(-1.f))); +} + +CVTX_EXPORT bsv_V2f cvtx_P2D_M2S_vel( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f mes_point, + const cvtx_VortFunc *kernel, + float regularisation_radius) +{ + double rx = 0, ry = 0; + long i; + float recip_reg_rad = 1.f / fabsf(regularisation_radius); + assert(num_particles >= 0); +#pragma omp parallel for reduction(+:rx, ry) + for (i = 0; i < num_particles; ++i) { + bsv_V2f vel = P2D_vel_inner(array_start[i], + mes_point, kernel, recip_reg_rad); + rx += vel.x[0]; + ry += vel.x[1]; + } + bsv_V2f ret = { (float)rx, (float)ry }; + return bsv_V2f_mult(ret, -1.f / (2.f * acosf(-1.f))); +} + + +static void cpu_brute_force_P2D_M2M_vel( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f *mes_start, + const int num_mes, + bsv_V2f *result_array, + const cvtx_VortFunc *kernel, + float regularisation_radius) +{ + long i; +#pragma omp parallel for schedule(static) + for (i = 0; i < num_mes; ++i) { + result_array[i] = cvtx_P2D_M2S_vel( + array_start, num_particles, mes_start[i], + kernel, regularisation_radius); + } + return; +} + +CVTX_EXPORT void cvtx_P2D_M2M_vel( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f *mes_start, + const int num_mes, + bsv_V2f *result_array, + const cvtx_VortFunc *kernel, + float regularisation_radius) +{ +#ifdef CVTX_USING_OPENCL + if (num_particles < 256 + || num_mes < 256 + || kernel->cl_kernel_name_ext == "" + || opencl_brute_force_P2D_M2M_vel( + array_start, num_particles, mes_start, + num_mes, result_array, kernel, regularisation_radius) != 0) +#endif + { + cpu_brute_force_P2D_M2M_vel( + array_start, num_particles, mes_start, + num_mes, result_array, kernel, regularisation_radius); + } + return; +} + diff --git a/src/P3D.c b/src/P3D.c index e0aaa39..abf87de 100644 --- a/src/P3D.c +++ b/src/P3D.c @@ -1,10 +1,10 @@ #include "libcvtx.h" /*============================================================================ -Particle.c +P3D.c -Basic representation of a vortex particle. +Vortex particle in 2D with CPU based code. -Copyright(c) 2018 HJA Bird +Copyright(c) 2019 HJA Bird Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"), to deal @@ -35,7 +35,7 @@ SOFTWARE. /* The induced velocity for a particle excluding the constant coefficient 1 / 4pi */ -inline bsv_V3f particle_ind_vel_inner( +inline bsv_V3f P3D_vel_inner( const cvtx_P3D * self, const bsv_V3f mes_point, const cvtx_VortFunc * kernel, @@ -50,7 +50,7 @@ inline bsv_V3f particle_ind_vel_inner( rad = bsv_V3f_minus(mes_point, self->coord); radd = bsv_V3f_abs(rad); rho = radd * recip_reg_rad; /* Assume positive. */ - cor = -kernel->g_fn(rho); + cor = -kernel->g_3D(rho); den = powf(radd, -3); num = bsv_V3f_cross(rad, self->vorticity); ret = bsv_V3f_mult(num, cor * den); @@ -65,7 +65,7 @@ CVTX_EXPORT bsv_V3f cvtx_P3D_S2S_vel( float regularisation_radius) { bsv_V3f ret; - ret = particle_ind_vel_inner(self, mes_point, kernel, + ret = P3D_vel_inner(self, mes_point, kernel, 1.f/fabsf(regularisation_radius)); return bsv_V3f_mult(ret, 1.f / (4.f * acosf(-1.f))); } @@ -84,7 +84,7 @@ CVTX_EXPORT bsv_V3f cvtx_P3D_S2S_dvort( rad = bsv_V3f_minus(induced_particle->coord, self->coord); radd = bsv_V3f_abs(rad); rho = fabsf(radd / regularisation_radius); - kernel->combined_fn(rho, &g, &f); + kernel->combined_3D(rho, &g, &f); cross_om = bsv_V3f_cross(induced_particle->vorticity, self->vorticity); t1 = (float)1. / ((float)4. * (float)acos(-1) * powf(regularisation_radius, 3)); t21n = bsv_V3f_mult(cross_om, g); @@ -100,10 +100,6 @@ CVTX_EXPORT bsv_V3f cvtx_P3D_S2S_dvort( return ret; } -float sphere_volume(float radius){ - return 4 * (float)acos(-1) * radius * radius * radius / (float) 3.; -} - CVTX_EXPORT bsv_V3f cvtx_P3D_S2S_visc_dvort( const cvtx_P3D * self, const cvtx_P3D * induced_particle, @@ -113,7 +109,7 @@ CVTX_EXPORT bsv_V3f cvtx_P3D_S2S_visc_dvort( { bsv_V3f ret, rad, t211, t212, t21, t2; float radd, rho, t1, t22; - assert(kernel->eta_fn != NULL && "Used vortex regularisation" + assert(kernel->eta_3D != NULL && "Used vortex regularisation" "that did have a defined eta function"); if(bsv_V3f_isequal(self->coord, induced_particle->coord)){ ret = bsv_V3f_zero(); @@ -128,7 +124,7 @@ CVTX_EXPORT bsv_V3f cvtx_P3D_S2S_visc_dvort( t212 = bsv_V3f_mult(induced_particle->vorticity, -1 * self->volume); t21 = bsv_V3f_plus(t211, t212); - t22 = kernel->eta_fn(rho); + t22 = kernel->eta_3D(rho); t2 = bsv_V3f_mult(t21, t22); ret = bsv_V3f_mult(t2, t1); } @@ -148,7 +144,7 @@ CVTX_EXPORT bsv_V3f cvtx_P3D_M2S_vel( assert(num_particles >= 0); #pragma omp parallel for reduction(+:rx, ry, rz) for (i = 0; i < num_particles; ++i) { - bsv_V3f vel = particle_ind_vel_inner(array_start[i], + bsv_V3f vel = P3D_vel_inner(array_start[i], mes_point, kernel, recip_reg_rad); rx += vel.x[0]; ry += vel.x[1]; @@ -203,7 +199,7 @@ CVTX_EXPORT bsv_V3f cvtx_P3D_M2S_visc_dvort( return ret; } -static void cpu_brute_force_ParticleArr_Arr_ind_vel( +static void cpu_brute_force_P3D_M2M_vel( const cvtx_P3D **array_start, const int num_particles, const bsv_V3f *mes_start, @@ -240,14 +236,14 @@ CVTX_EXPORT void cvtx_P3D_M2M_vel( num_mes, result_array, kernel, regularisation_radius) != 0) #endif { - cpu_brute_force_ParticleArr_Arr_ind_vel( + cpu_brute_force_P3D_M2M_vel( array_start, num_particles, mes_start, num_mes, result_array, kernel, regularisation_radius); } return; } -void cpu_brute_force_ParticleArr_Arr_ind_dvort( +void cpu_brute_force_P3D_M2M_dvort( const cvtx_P3D **array_start, const int num_particles, const cvtx_P3D **induced_start, @@ -284,14 +280,14 @@ CVTX_EXPORT void cvtx_P3D_M2M_dvort( num_induced, result_array, kernel, regularisation_radius) != 0) #endif { - cpu_brute_force_ParticleArr_Arr_ind_dvort( + cpu_brute_force_P3D_M2M_dvort( array_start, num_particles, induced_start, num_induced, result_array, kernel, regularisation_radius); } return; } -void cpu_brute_force_ParticleArr_Arr_visc_ind_dvort( +void cpu_brute_force_P3D_M2M_visc_dvort( const cvtx_P3D **array_start, const int num_particles, const cvtx_P3D **induced_start, @@ -330,7 +326,7 @@ CVTX_EXPORT void cvtx_P3D_M2M_visc_dvort( num_induced, result_array, kernel, regularisation_radius, kinematic_visc) != 0) #endif { - cpu_brute_force_ParticleArr_Arr_visc_ind_dvort( + cpu_brute_force_P3D_M2M_visc_dvort( array_start, num_particles, induced_start, num_induced, result_array, kernel, regularisation_radius, kinematic_visc); } diff --git a/src/VortFunc.c b/src/VortFunc.c index 60d52d4..dc604f6 100644 --- a/src/VortFunc.c +++ b/src/VortFunc.c @@ -42,73 +42,99 @@ static float warn_bad_eta_fn(float rho){ return 0; } -static float g_singular(float rho) { +static float g_singular_3D(float rho) { return 1; } -static float zeta_singular(float rho) { +static float zeta_singular_3D(float rho) { return 0; } -static void combined_singular(float rho, float* g, float* zeta) { +static void combined_singular_3D(float rho, float* g, float* zeta) { *g = 1; *zeta = 0; return; } -static float g_winckel(float rho) { +static float g_singular_2D(float rho) { + return 1; +} + +static float g_winckel_3D(float rho) { float a, b, c, d; assert(rho >= 0 && "Rho should not be -ve"); - a = rho * rho + (float)2.5; + a = rho * rho + 2.5f; b = a * rho * rho * rho; c = rho * rho + 1; - d = (b / powf(c, 2.5)); + d = (b / powf(c, 2.5f)); return d; } -static float zeta_winckel(float rho) { +static float zeta_winckel_3D(float rho) { float a, b, c; assert(rho >= 0 && "Rho should not be -ve"); a = rho * rho + 1; - b = powf(a, 3.5); + b = powf(a, 3.5f); c = (float)7.5 / b; return c; } -static float eta_winckel(float rho) { +static float eta_winckel_3D(float rho) { float a, b, c; assert(rho >= 0 && "Rho should not be -ve"); - a = (float) 52.5; + a = 52.5f; b = rho * rho + 1; - c = powf(b, -4.5); + c = powf(b, -4.5f); return a * c; } -static void combined_winckel(float rho, float* g, float* zeta) { +static void combined_winckel_3D(float rho, float* g, float* zeta) { assert(rho >= 0 && "Rho should not be -ve"); - *g = g_winckel(rho); - *zeta = zeta_winckel(rho); + *g = g_winckel_3D(rho); + *zeta = zeta_winckel_3D(rho); return; } -static float g_planetary(float rho) { +static float g_winckel_2D(float rho) { + float num, denom; + num = rho * rho * (rho * rho + 2.f); + denom = rho * rho + 1.f; + denom = denom * denom; + return num / denom; +} + +static float eta_winckel_2D(float rho) { + assert(rho >= 0 && "Rho should not be -ve"); + float a, b, c; + a = rho * rho + 1.f; + b = powf(a, 4); + c = 24.f * expf(4.f / powf(a, 3)); + return c / b; +} + +static float g_planetary_3D(float rho) { assert(rho >= 0 && "Rho should not be -ve"); return rho < (float)1. ? rho * rho * rho : (float)1.; } -static float zeta_planetary(float rho){ +static float zeta_planetary_3D(float rho){ assert(rho >= 0 && "Rho should not be -ve"); - return rho < (float)1. ? (float)3 : (float)0; + return rho < (float)1. ? (float)2 : (float)0; } -static void combined_planetary(float rho, float* g, float* zeta) { +static void combined_planetary_3D(float rho, float* g, float* zeta) { assert(rho >= 0 && "Rho should not be -ve"); - *g = g_planetary(rho); - *zeta = zeta_planetary(rho); + *g = g_planetary_3D(rho); + *zeta = zeta_planetary_3D(rho); return; } -static float g_gaussian(float rho){ +static float g_planetary_2D(float rho) { + assert(rho >= 0 && "Rho should not be -ve"); + return rho < 1.f ? rho * rho : 1.f; +} + +static float g_gaussian_3D(float rho){ /* = 1 to 8sf for rho ~>6. Taylor expansion otherwise */ assert(rho >= 0 && "Rho should not be -ve"); if(rho > (float)6.){ @@ -125,26 +151,38 @@ static float g_gaussian(float rho){ } } -static float zeta_gaussian(float rho){ +static float zeta_gaussian_3D(float rho){ assert(rho >= 0 && "Rho should not be -ve"); const float pi = 3.14159265359f; return sqrtf(2 / pi) * expf(-rho * rho / 2); } -static void combined_gaussian(float rho, float* g, float* zeta) { +static void combined_gaussian_3D(float rho, float* g, float* zeta) { assert(rho >= 0 && "Rho should not be -ve"); - *g = g_gaussian(rho); - *zeta = zeta_gaussian(rho); + *g = g_gaussian_3D(rho); + *zeta = zeta_gaussian_3D(rho); return; } +static float g_gaussian_2D(float rho) { + assert(rho >= 0 && "Rho should not be -ve"); + return 1 - expf(-rho * rho / 2); +} + +static float zeta_gaussian_2D(float rho) { + assert(rho >= 0 && "Rho should not be -ve"); + return expf(-rho * rho / 2.f); +} + CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_singular(void) { cvtx_VortFunc ret; - ret.g_fn = &g_singular; - ret.zeta_fn = &zeta_singular; - ret.eta_fn = &warn_bad_eta_fn; /* Not possible for singular vortex */ - ret.combined_fn = &combined_singular; + ret.g_3D = &g_singular_3D; + ret.g_2D = &g_singular_2D; + ret.zeta_3D = &zeta_singular_3D; + ret.eta_3D = &warn_bad_eta_fn; /* Not possible for singular vortex */ + ret.eta_2D = &warn_bad_eta_fn; + ret.combined_3D = &combined_singular_3D; strcpy(ret.cl_kernel_name_ext, "singular"); return ret; } @@ -152,10 +190,12 @@ CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_singular(void) CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_winckelmans(void) { cvtx_VortFunc ret; - ret.g_fn = &g_winckel; - ret.zeta_fn = &zeta_winckel; - ret.eta_fn = eta_winckel; - ret.combined_fn = &combined_winckel; + ret.g_3D = &g_winckel_3D; + ret.g_2D = &g_winckel_2D; + ret.zeta_3D = &zeta_winckel_3D; + ret.eta_3D = &eta_winckel_3D; + ret.eta_2D = &eta_winckel_2D; + ret.combined_3D = &combined_winckel_3D; strcpy(ret.cl_kernel_name_ext, "winckelmans"); return ret; } @@ -163,21 +203,23 @@ CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_winckelmans(void) CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_planetary(void) { cvtx_VortFunc ret; - ret.g_fn = &g_planetary; - ret.zeta_fn = &zeta_planetary; - ret.eta_fn = &warn_bad_eta_fn; /* Not possible for planetary vortex */ - ret.combined_fn = &combined_winckel; + ret.g_3D = &g_planetary_3D; + ret.zeta_3D = &zeta_planetary_3D; + ret.eta_3D = &warn_bad_eta_fn; /* Not possible for planetary vortex */ + ret.combined_3D = &combined_winckel_3D; strcpy(ret.cl_kernel_name_ext, "planetary"); return ret; } CVTX_EXPORT const cvtx_VortFunc cvtx_VortFunc_gaussian(void){ cvtx_VortFunc ret; - ret.g_fn = &g_gaussian; - ret.zeta_fn = &zeta_gaussian; - ret.eta_fn = &zeta_gaussian; + ret.g_3D = &g_gaussian_3D; + ret.g_2D = &g_gaussian_2D; + ret.zeta_3D = &zeta_gaussian_3D; + ret.eta_3D = &zeta_gaussian_3D; + ret.eta_2D = &zeta_gaussian_2D; /* See Winckelmans et al., C. R. Physique 6 (2005), around eq (28) */ - ret.combined_fn = &combined_winckel; + ret.combined_3D = &combined_winckel_3D; strcpy(ret.cl_kernel_name_ext, "gaussian"); return ret; } diff --git a/src/nbody.cl b/src/nbody.cl index 5e540da..235234d 100644 --- a/src/nbody.cl +++ b/src/nbody.cl @@ -28,10 +28,10 @@ SOFTWARE. /* CVTX_CL_LOG2_WORKGROUP_SIZE controlled with build options from host */ /*############################################################################ -Definitions for the repeated body of kernels\ +Definitions for the repeated body of kernels ############################################################################*/ -"#define CVTX_P_INDVEL_START \\\n" +"#define CVTX_P3D_VEL_START \\\n" "( \\\n" " __global float3* particle_locs, \\\n" " __global float3* particle_vorts, \\\n" @@ -51,9 +51,9 @@ Definitions for the repeated body of kernels\ " radd = length(rad); \\\n" " rho = radd * recip_reg_rad; \n" -" /* Fill in g calc here */\n" +/* Fill in g calc here */ -"#define CVTX_P_INDVEL_END \\\n" +"#define CVTX_P3D_VEL_END \\\n" " cor = - g; /*1/4pi term is done by host. */ \\\n" " den = pown(radd, 3); \\\n" " num = cross(rad, particle_vorts[pidx]); \\\n" @@ -68,7 +68,7 @@ Definitions for the repeated body of kernels\ " return; \\\n" "} \n" -"#define CVTX_P_IND_DVORT_START \\\n" +"#define CVTX_P3D_DVORT_START \\\n" "( \\\n" " __global float3* particle_locs, \\\n" " __global float3* particle_vorts, \\\n" @@ -89,9 +89,9 @@ Definitions for the repeated body of kernels\ " radd = length(rad); \\\n" " rho = radd * recip_reg_rad; \n" -" /* FILL in f & g calc here! */\n" +/* FILL in f & g calc here! */ -"#define CVTX_P_IND_DVORT_END \\\n" +"#define CVTX_P3D_DVORT_END \\\n" " cross_om = cross(induced_vorts[indidx], \\\n" " particle_vorts[sidx]); \\\n" " t21n = cross_om * g; \\\n" @@ -115,7 +115,7 @@ Definitions for the repeated body of kernels\ " return 4 * acos((float)-1) * radius * radius * radius / 3.f; \n" "} \n" -"#define CVTX_P_VISC_IND_DVORT_START \\\n" +"#define CVTX_P3D_VISC_DVORT_START \\\n" "( \\\n" " __global float3* particle_locs, \\\n" " __global float3* particle_vorts, \\\n" @@ -149,9 +149,9 @@ Definitions for the repeated body of kernels\ " t212 = induced_vorts[indidx] * -1 * particle_vols[sidx]; \\\n" " t21 = t211 + t212; \n" -" /* ETA FUNCTION function! here */ " +/* ETA FUNCTION function! here */ -"#define CVTX_P_VISC_IND_DVORT_END \\\n" +"#define CVTX_P3D_VISC_DVORT_END \\\n" " t2 = t21 * eta; \\\n" " ret = t2 * t1; \\\n" " } \\\n" @@ -181,33 +181,87 @@ Definitions for the repeated body of kernels\ " return; \n" "} \n" +/* 2D Vortex particle induced velocity */ +"#define CVTX_P2D_VEL_START \\\n" +"( \\\n" +" __global float2* particle_locs, \\\n" +" __global float* particle_vorts, \\\n" +" float recip_reg_rad, \\\n" +" __global float2* mes_locs, \\\n" +" __global float2* results) \\\n" +"{ \\\n" +" float2 rad, ret; \\\n" +" float cor, den, rho, g, radd; \\\n" +" __local float2 reduction_workspace[CVTX_CL_WORKGROUP_SIZE]; \\\n" +" /* Particle idx, mes_pnt idx and local work item idx */ \\\n" +" uint pidx, midx, widx, loop_idx; \\\n" +" midx = get_global_id(1); \\\n" +" widx = get_local_id(0); \\\n" +" pidx = widx; \\\n" +" rad = mes_locs[midx] - particle_locs[pidx]; \\\n" +" radd = length(rad); \\\n" +" rho = radd * recip_reg_rad; \n" + +/* Fill in g calc here */ + +"#define CVTX_P2D_VEL_END \\\n" +" cor = - g; /*1/2pi term is done by host. */ \\\n" +" den = pown(radd, 2); \\\n" +" ret.x = rad.y * (cor * particle_vorts[pidx] / den); \\\n" +" ret.y = -rad.x * (cor * particle_vorts[pidx] / den); \\\n" +" ret = isnormal(ret) ? ret : (float2)(0.f, 0.f); \\\n" +" reduction_workspace[widx] = ret; \\\n" +" local_workspace_float2_reduce(reduction_workspace); \\\n" +" barrier(CLK_LOCAL_MEM_FENCE); \\\n" +" if( widx == 0 ){ \\\n" +" results[midx] = reduction_workspace[0] + results[midx]; \\\n" +" } \\\n" +" return; \\\n" +"} \n" + +"inline void local_workspace_float2_reduce( \n" +" __local float2* reduction_workspace) \n" +"{ \n" +" uint loop_idx = 2; \n" +" uint widx = get_local_id(0); \n" +" for(; loop_idx <= CVTX_CL_WORKGROUP_SIZE; \n" +" loop_idx *= 2) \n" +" { \n" +" barrier(CLK_LOCAL_MEM_FENCE); \n" +" if( widx < CVTX_CL_WORKGROUP_SIZE/loop_idx ){ \n" +" reduction_workspace[widx] = reduction_workspace[widx] \n" +" + reduction_workspace[widx + CVTX_CL_WORKGROUP_SIZE/loop_idx];\n" +" } \n" +" } \n" +" return; \n" +"} \n" -/* ########################################################### */ -/* Velocity calculation kernels here: */ -/* name cvtx_nb_Particle_ind_vel_XXXXX */ -/* ########################################################### */ +/* ########################################################### + 3D Velocity calculation kernels here: + name cvtx_nb_P3D_vel_XXXXX + ########################################################### */ -"__kernel void cvtx_nb_Particle_ind_vel_singular\n" -" CVTX_P_INDVEL_START \n" +"__kernel void cvtx_nb_P3D_vel_singular\n" +" CVTX_P3D_VEL_START \n" " g = 1.f; \n" -" CVTX_P_INDVEL_END \n" +" CVTX_P3D_VEL_END \n" -"__kernel void cvtx_nb_Particle_ind_vel_winckelmans \n" -" CVTX_P_INDVEL_START \n" +"__kernel void cvtx_nb_P3D_vel_winckelmans \n" +" CVTX_P3D_VEL_START \n" " g = (rho * rho + 2.5f) * rho * rho * rho * rsqrt(pown(rho * rho + 1, 5));\n" -" CVTX_P_INDVEL_END \n" +" CVTX_P3D_VEL_END \n" -"__kernel void cvtx_nb_Particle_ind_vel_planetary \n" -" CVTX_P_INDVEL_START \n" +"__kernel void cvtx_nb_P3D_vel_planetary \n" +" CVTX_P3D_VEL_START \n" " g = rho < 1.f ? rho * rho * rho : 1.f; \n" -" CVTX_P_INDVEL_END \n" +" CVTX_P3D_VEL_END \n" -"__kernel void cvtx_nb_Particle_ind_vel_gaussian \n" -" CVTX_P_INDVEL_START \n" +"__kernel void cvtx_nb_P3D_vel_gaussian \n" +" CVTX_P3D_VEL_START \n" " if(rho > 6.f){ \n" " g = 1.f; \n" " } else { \n" @@ -219,38 +273,38 @@ Definitions for the repeated body of kernels\ " float term2 = rho * sqrt((float)2 / pi) * exp(-rho_sr2 * rho_sr2); \n" " g = erf - term2; \n" " } \n" -" CVTX_P_INDVEL_END \n" +" CVTX_P3D_VEL_END \n" /* ########################################################### - Ind Dvort calculation kernels here: - name cvtx_nb_Particle_ind_dvort_XXXXX + 3D Ind Dvort calculation kernels here: + name cvtx_nb_P3D_dvort_XXXXX ########################################################### */ -"__kernel void cvtx_nb_Particle_ind_dvort_singular\n" -" CVTX_P_IND_DVORT_START\n" +"__kernel void cvtx_nb_P3D_dvort_singular\n" +" CVTX_P3D_DVORT_START\n" " g = 1.f;\n" " f = 0.f;\n" -" CVTX_P_IND_DVORT_END\n" +" CVTX_P3D_DVORT_END\n" -"__kernel void cvtx_nb_Particle_ind_dvort_planetary\n" -" CVTX_P_IND_DVORT_START\n" +"__kernel void cvtx_nb_P3D_dvort_planetary\n" +" CVTX_P3D_DVORT_START\n" " g = rho < 1.f ? rho * rho * rho : (float)1.;\n" " f = rho < 1.f ? (float)3 : (float)0;\n" -" CVTX_P_IND_DVORT_END\n" +" CVTX_P3D_DVORT_END\n" -"__kernel void cvtx_nb_Particle_ind_dvort_winckelmans\n" -" CVTX_P_IND_DVORT_START\n" +"__kernel void cvtx_nb_P3D_dvort_winckelmans\n" +" CVTX_P3D_DVORT_START\n" " g = (rho * rho + 2.5f) * rho * rho * rho * rsqrt(pown(rho * rho + 1, 5));\n" " f = (float)7.5 * rsqrt(pown(rho * rho + 1, 7)); \n" -" CVTX_P_IND_DVORT_END\n" +" CVTX_P3D_DVORT_END\n" -"__kernel void cvtx_nb_Particle_ind_dvort_gaussian\n" -" CVTX_P_IND_DVORT_START \n" +"__kernel void cvtx_nb_P3D_dvort_gaussian\n" +" CVTX_P3D_DVORT_START \n" " const float pi = 3.14159265359f; \n" " if(rho > (float)6.){ \n" " g = 1.f; \n" @@ -263,28 +317,56 @@ Definitions for the repeated body of kernels\ " g = erf - term2; \n" " } \n" " f = sqrt((float) 2 / pi) * exp(-rho * rho / 2); \n" -" CVTX_P_IND_DVORT_END \n" +" CVTX_P3D_DVORT_END \n" /* ########################################################### - viscous ind Dvort calculation kernels here: - name cvtx_nb_Particle_visc_ind_dvort_XXXXX + 3D viscous ind Dvort calculation kernels here: + name cvtx_nb_P3D_visc_dvort_XXXXX ########################################################### */ " /* Viscocity doesn't work for singular & planetary */ \n" -"__kernel void cvtx_nb_Particle_visc_ind_dvort_winckelmans \n" -" CVTX_P_VISC_IND_DVORT_START \n" +"__kernel void cvtx_nb_P3D_visc_dvort_winckelmans \n" +" CVTX_P3D_VISC_DVORT_START \n" " eta = (float)52.5 * pow(rho * rho + 1, (float)-4.5); \n" -" CVTX_P_VISC_IND_DVORT_END \n" +" CVTX_P3D_VISC_DVORT_END \n" -"__kernel void cvtx_nb_Particle_visc_ind_dvort_gaussian \n" -" CVTX_P_VISC_IND_DVORT_START \n" +"__kernel void cvtx_nb_P3D_visc_dvort_gaussian \n" +" CVTX_P3D_VISC_DVORT_START \n" " const float pi = 3.14159265359f; \n" " eta = sqrt((float) 2.f / pi) * exp(-rho * rho / 2.f); \n" -" CVTX_P_VISC_IND_DVORT_END \n" +" CVTX_P3D_VISC_DVORT_END \n" + + +/* ########################################################### + 2DVelocity calculation kernels here: + name cvtx_nb_P2D_vel_XXXXX + ########################################################### */ + +"__kernel void cvtx_nb_P2D_vel_singular\n" +" CVTX_P2D_VEL_START \n" +" g = 1.f; \n" +" CVTX_P2D_VEL_END \n" + + +"__kernel void cvtx_nb_P2D_vel_winckelmans \n" +" CVTX_P2D_VEL_START \n" +" g = (rho * rho + 2.0f) * rho * rho * pown(rho * rho + 1.f, -2); \n" +" CVTX_P2D_VEL_END \n" + + +"__kernel void cvtx_nb_P2D_vel_planetary \n" +" CVTX_P2D_VEL_START \n" +" g = rho < 1.f ? rho * rho : 1.f; \n" +" CVTX_P2D_VEL_END \n" + +"__kernel void cvtx_nb_P2D_vel_gaussian \n" +" CVTX_P2D_VEL_START \n" +" g = 1.f - exp(-rho * rho * 0.5f); \n" +" CVTX_P2D_VEL_END \n" /* ########################################################### vortex_filament code: diff --git a/src/ocl_P2D.c b/src/ocl_P2D.c new file mode 100644 index 0000000..af5d913 --- /dev/null +++ b/src/ocl_P2D.c @@ -0,0 +1,230 @@ +#include "libcvtx.h" +/*============================================================================ +ocl_particle.c + +Handles the opencl accelerated vortex particle methods. + +Copyright(c) 2018 HJA Bird + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +============================================================================*/ + +#ifdef CVTX_USING_OPENCL +#include +#include +#include +#include +#include +#include "opencl_acc.h" +#include "ocl_P2D.h" + +int opencl_brute_force_P2D_M2M_vel( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f *mes_start, + const int num_mes, + bsv_V2f *result_array, + const cvtx_VortFunc *kernel, + float regularisation_radius) +{ + /* Right now we just use the first active device. */ + assert(opencl_is_init()); + cl_program prog; + cl_context cont; + cl_command_queue queue; + + if (opencl_num_active_devices() > 0 && + opencl_get_device_state(0, &prog, &cont, &queue) == 0) { + return opencl_brute_force_P2D_M2M_vel_impl( + array_start, num_particles, mes_start, + num_mes, result_array, kernel, regularisation_radius, + prog, queue, cont); + } + else + { + return -1; + } +} + +int opencl_brute_force_P2D_M2M_vel_impl( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f *mes_start, + const int num_mes, + bsv_V2f *result_array, + const cvtx_VortFunc *kernel, + float regularisation_radius, + cl_program program, + cl_command_queue queue, + cl_context context) +{ + char kernel_name[128] = "cvtx_nb_P2D_vel_"; + int i, n_particle_groups, n_zeroed_particles, n_modelled_particles; + float constant_multiplyer = 1.f / (2.f * acosf(-1)); + size_t global_work_size[2], workgroup_size[2]; + cl_float2 *mes_pos_buff_data, *part_pos_buff_data, *res_buff_data; + cl_float *part_vort_buff_data; + cl_mem mes_pos_buff, res_buff, *part_pos_buff, *part_vort_buff; + cl_int status; + cl_kernel cl_kernel; + cl_event *event_chain; + + if (opencl_init() == 1) + { + strncat(kernel_name, kernel->cl_kernel_name_ext, 32); + cl_kernel = clCreateKernel(program, kernel_name, &status); + if (status != CL_SUCCESS) { + clReleaseKernel(cl_kernel); + return -1; + } + /* This has to match the opencl kernels, so be careful with fiddling */ + workgroup_size[0] = CVTX_WORKGROUP_SIZE; /* Particles per group */ + workgroup_size[1] = 1; /* Only 1 measure pos per workgroup. */ + global_work_size[0] = CVTX_WORKGROUP_SIZE; /* We use multiple particle buffers */ + global_work_size[1] = num_mes; + + /* Generate an buffer for the measurement position data */ + mes_pos_buff_data = malloc(num_mes * sizeof(cl_float2)); + for (i = 0; i < num_mes; ++i) { + mes_pos_buff_data[i].x = mes_start[i].x[0]; + mes_pos_buff_data[i].y = mes_start[i].x[1]; + } + mes_pos_buff = clCreateBuffer(context, + CL_MEM_READ_ONLY, num_mes * sizeof(cl_float2), NULL, &status); + status = clEnqueueWriteBuffer( + queue, mes_pos_buff, CL_FALSE, + 0, num_mes * sizeof(cl_float2), mes_pos_buff_data, 0, NULL, NULL); + assert(status == CL_SUCCESS); + status = clSetKernelArg(cl_kernel, 3, sizeof(cl_mem), &mes_pos_buff); + if (status != CL_SUCCESS) { + free(mes_pos_buff_data); + clReleaseMemObject(mes_pos_buff); + clReleaseKernel(cl_kernel); + return -1; + } + + cl_float cl_recip_regularisation_radius = 1.f / regularisation_radius; + status = clSetKernelArg(cl_kernel, 2, sizeof(cl_float), &cl_recip_regularisation_radius); + assert(status == CL_SUCCESS); + + /* Generate a results buffer */ + res_buff_data = malloc(num_mes * sizeof(cl_float2)); + res_buff = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float2) * num_mes, NULL, &status); + for (i = 0; i < num_mes; ++i) { + res_buff_data[i].x = 0.f; + res_buff_data[i].y = 0.f; + } + status = clEnqueueWriteBuffer( + queue, res_buff, CL_FALSE, + 0, num_mes * sizeof(cl_float2), res_buff_data, 0, NULL, NULL); + if (status != CL_SUCCESS) { + assert(false); + } + status = clSetKernelArg(cl_kernel, 4, sizeof(cl_mem), &res_buff); + assert(status == CL_SUCCESS); + + /* Now create & dispatch particle buffers and kernel. */ + n_particle_groups = num_particles / CVTX_WORKGROUP_SIZE; + if (num_particles % CVTX_WORKGROUP_SIZE) { + n_zeroed_particles = CVTX_WORKGROUP_SIZE + - num_particles % CVTX_WORKGROUP_SIZE; + n_particle_groups += 1; + } + n_modelled_particles = CVTX_WORKGROUP_SIZE * n_particle_groups; + part_pos_buff_data = malloc(n_modelled_particles * sizeof(cl_float2)); + part_vort_buff_data = malloc(n_modelled_particles * sizeof(cl_float)); + for (i = 0; i < num_particles; ++i) { + part_pos_buff_data[i].x = array_start[i]->coord.x[0]; + part_pos_buff_data[i].y = array_start[i]->coord.x[1]; + part_vort_buff_data[i] = array_start[i]->vorticity; + } + /* We need this so that we always have the minimum workgroup size. */ + for (i = num_particles; i < n_modelled_particles; ++i) { + part_pos_buff_data[i].x = 0.f; + part_pos_buff_data[i].y = 0.f; + part_vort_buff_data[i] = 0.f; + } + part_pos_buff = malloc(n_particle_groups * sizeof(cl_mem)); + part_vort_buff = malloc(n_particle_groups * sizeof(cl_mem)); + event_chain = malloc(sizeof(cl_event) * n_particle_groups * 3); + for (i = 0; i < n_particle_groups; ++i) { + part_pos_buff[i] = clCreateBuffer(context, + CL_MEM_READ_ONLY, CVTX_WORKGROUP_SIZE * sizeof(cl_float2), NULL, &status); + assert(status == CL_SUCCESS); + status = clEnqueueWriteBuffer( + queue, part_pos_buff[i], CL_FALSE, + 0, CVTX_WORKGROUP_SIZE * sizeof(cl_float2), + part_pos_buff_data + i * CVTX_WORKGROUP_SIZE, 0, NULL, event_chain + 3 * i); + assert(status == CL_SUCCESS); + part_vort_buff[i] = clCreateBuffer(context, + CL_MEM_READ_ONLY, CVTX_WORKGROUP_SIZE * sizeof(cl_float), NULL, &status); + assert(status == CL_SUCCESS); + status = clEnqueueWriteBuffer( + queue, part_vort_buff[i], CL_FALSE, + 0, CVTX_WORKGROUP_SIZE * sizeof(cl_float), + part_vort_buff_data + i * CVTX_WORKGROUP_SIZE, 0, NULL, event_chain + 3 * i + 1); + assert(status == CL_SUCCESS); + status = clSetKernelArg(cl_kernel, 0, sizeof(cl_mem), part_pos_buff + i); + assert(status == CL_SUCCESS); + status = clSetKernelArg(cl_kernel, 1, sizeof(cl_mem), part_vort_buff + i); + assert(status == CL_SUCCESS); + if (i == 0) { + status = clEnqueueNDRangeKernel(queue, cl_kernel, 2, + NULL, global_work_size, workgroup_size, 2, event_chain, event_chain + 3 * i + 2); + } + else { + status = clEnqueueNDRangeKernel(queue, cl_kernel, 2, + NULL, global_work_size, workgroup_size, 3, event_chain + 3 * i - 1, event_chain + 3 * i + 2); + } + assert(status == CL_SUCCESS); + clReleaseMemObject(part_pos_buff[i]); + clReleaseMemObject(part_vort_buff[i]); + } + + /* Read back our results! */ + clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, + sizeof(cl_float2) * num_mes, res_buff_data, 1, + event_chain + 3 * n_particle_groups - 1, NULL); + for (i = 0; i < n_particle_groups * 3; ++i) { clReleaseEvent(event_chain[i]); } + free(event_chain); /* Its tempting to do this earlier, but remember, this is asynchonous! */ + for (i = 0; i < num_mes; ++i) { + /* Constant multiplyer is constant the 1/2pi term. */ + result_array[i].x[0] = res_buff_data[i].x * constant_multiplyer; + result_array[i].x[1] = res_buff_data[i].y * constant_multiplyer; + } + free(res_buff_data); + + free(part_pos_buff); + free(part_vort_buff); + free(part_pos_buff_data); + free(part_vort_buff_data); + free(mes_pos_buff_data); + clReleaseMemObject(res_buff); + clReleaseMemObject(mes_pos_buff); + clReleaseKernel(cl_kernel); + return 0; + } + else + { + return -1; + } +} + +#endif /* CVTX_USING_OPENCL */ diff --git a/src/ocl_P2D.h b/src/ocl_P2D.h new file mode 100644 index 0000000..900149b --- /dev/null +++ b/src/ocl_P2D.h @@ -0,0 +1,52 @@ +#include "libcvtx.h" +/*============================================================================ +ocl_P2D.h + +Handles the opencl accelerated 2D vortex particle methods. + +Copyright(c) 2019 HJA Bird + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +============================================================================*/ +#ifdef CVTX_USING_OPENCL +#include +#include "opencl_acc.h" + +int opencl_brute_force_P2D_M2M_vel( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f *mes_start, + const int num_mes, + bsv_V2f *result_array, + const cvtx_VortFunc *kernel, + float regularisation_radius); + +int opencl_brute_force_P2D_M2M_vel_impl( + const cvtx_P2D **array_start, + const int num_particles, + const bsv_V2f *mes_start, + const int num_mes, + bsv_V2f *result_array, + const cvtx_VortFunc *kernel, + float regularisation_radius, + cl_program program, + cl_command_queue queue, + cl_context context); + +#endif /* CVTX_USING_OPENCL */ diff --git a/src/ocl_P3D.c b/src/ocl_P3D.c index 1a3b14a..62b986f 100644 --- a/src/ocl_P3D.c +++ b/src/ocl_P3D.c @@ -131,7 +131,7 @@ int opencl_brute_force_ParticleArr_Arr_ind_vel_impl( cl_command_queue queue, cl_context context) { - char kernel_name[128] = "cvtx_nb_Particle_ind_vel_"; + char kernel_name[128] = "cvtx_nb_P3D_vel_"; int i, n_particle_groups, n_zeroed_particles, n_modelled_particles; float constant_multiplyer = 1.f / (4.f * acosf(-1)); size_t global_work_size[2], workgroup_size[2]; @@ -304,7 +304,7 @@ int opencl_brute_force_ParticleArr_Arr_ind_dvort_impl( cl_command_queue queue, cl_context context) { - char kernel_name[128] = "cvtx_nb_Particle_ind_dvort_"; + char kernel_name[128] = "cvtx_nb_P3D_dvort_"; int i, n_particle_groups, n_zeroed_particles, n_modelled_particles; float constant_multiplyer = 1.f / (4.f * acosf(-1) * powf(regularisation_radius, 3)); size_t global_work_size[2], workgroup_size[2]; @@ -490,7 +490,7 @@ int opencl_brute_force_ParticleArr_Arr_visc_ind_dvort_impl( cl_command_queue queue, cl_context context) { - char kernel_name[128] = "cvtx_nb_Particle_visc_ind_dvort_"; + char kernel_name[128] = "cvtx_nb_P3D_visc_dvort_"; int i, n_particle_groups, n_zeroed_particles, n_modelled_particles; size_t global_work_size[2], workgroup_size[2]; cl_float3 *part1_pos_buff_data, *part1_vort_buff_data, *part2_pos_buff_data, *part2_vort_buff_data, *res_buff_data; diff --git a/src/ocl_P3D.h b/src/ocl_P3D.h index 8991253..cb43dae 100644 --- a/src/ocl_P3D.h +++ b/src/ocl_P3D.h @@ -1,10 +1,10 @@ #include "libcvtx.h" /*============================================================================ -ocl_particle.h +ocl_P3D.h -Handles the opencl accelerated vortex particle methods. +Handles the opencl accelerated 3D vortex particle methods. -Copyright(c) 2018 HJA Bird +Copyright(c) 2018-2019 HJA Bird Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"), to deal