Browse Source

made transformation kernels 2d

Iver 2 months ago
parent
commit
60adef140f
2 changed files with 15 additions and 8 deletions
  1. 2 2
      src/kernels/kernels.cl
  2. 13 6
      src/main/main.c

+ 2 - 2
src/kernels/kernels.cl

@@ -265,8 +265,8 @@ void global_quaternion_rotate(__global RI_vector_3 *position, RI_vector_4 rotati
     *position = (RI_vector_3){rotation.x, rotation.y, rotation.z};
 }
 
-__kernel void transformer(__global RI_face *faces, __global RI_vector_3 *vertecies, __global RI_vector_3 *normals, __global RI_vector_2 *uvs, __global RI_renderable_face *renderable_faces, double actor_x, double actor_y, double actor_z, double actor_r_w, double actor_r_x, double actor_r_y, double actor_r_z, double actor_s_x, double actor_s_y, double actor_s_z, int has_normals, int has_uvs, int face_array_offset_index, int face_count, int width, int height, double horizontal_fov_factor, double vertical_fov_factor, float min_clip, float max_clip, double camera_x, double camera_y, double camera_z, double camera_r_w, double camera_r_x, double camera_r_y, double camera_r_z, int renderable_face_offset){
-    int face_index = get_global_id(0); if (face_index >= face_count) return;
+__kernel void transformer(__global RI_face *faces, __global RI_vector_3 *vertecies, __global RI_vector_3 *normals, __global RI_vector_2 *uvs, __global RI_renderable_face *renderable_faces, double actor_x, double actor_y, double actor_z, double actor_r_w, double actor_r_x, double actor_r_y, double actor_r_z, double actor_s_x, double actor_s_y, double actor_s_z, int has_normals, int has_uvs, int face_array_offset_index, int face_count, int width, int height, double horizontal_fov_factor, double vertical_fov_factor, float min_clip, float max_clip, double camera_x, double camera_y, double camera_z, double camera_r_w, double camera_r_x, double camera_r_y, double camera_r_z, int renderable_face_offset, int face_sqrt){
+    int face_index = get_global_id(1) * face_sqrt + get_global_id(0); if (face_index >= face_count) return;
 
     RI_vector_3 current_actor_position = (RI_vector_3){actor_x, actor_y, actor_z};
     RI_vector_4 current_actor_rotation = (RI_vector_4){actor_r_w, actor_r_x, actor_r_y, actor_r_z};

+ 13 - 6
src/main/main.c

@@ -391,11 +391,13 @@ void RI_render(RI_texture *target_texture, RI_scene *scene){
 
         if (scene->actors[actor_index]->face_count <= 0) continue;
         
-        const size_t t_global_work_size[1] = {(int)ceil(scene->actors[actor_index]->face_count / (float)local_group_size_y) * scene->actors[actor_index]->face_count};
-        const size_t t_local_work_size[1] = {(int)fmin(scene->actors[actor_index]->face_count, local_group_size_x)};
+        int face_sqrt = ceil(sqrt(scene->actors[actor_index]->face_count));
 
-        debug("transformer global work size: {%d}", scene->actors[actor_index]->face_count);    
-        debug("transformer local work size: {%d}", (int)fmin(scene->actors[actor_index]->face_count, local_group_size_y));
+        const size_t t_global_work_size[2] = {local_group_size_x * ceil(face_sqrt / (float)local_group_size_x), local_group_size_x * ceil(face_sqrt / (float)local_group_size_y)};
+        const size_t t_local_work_size[2] = {(int)fmin(face_sqrt, local_group_size_x), (int)fmin(face_sqrt, local_group_size_y)};
+
+        debug("transformer global work size: {%d, %d}", t_global_work_size[0], t_global_work_size[1]);    
+        debug("transformer local work size: {%d, %d}", t_local_work_size[0], t_local_work_size[1]);
 
         // 5, double actor_x
         clSetKernelArg(context.opencl.transformation_kernel, 5, sizeof(double), &actor->position.x);
@@ -431,10 +433,13 @@ void RI_render(RI_texture *target_texture, RI_scene *scene){
 
         // 32, int renderable_face_offset
         clSetKernelArg(context.opencl.transformation_kernel, 32, sizeof(int), &renderable_face_index);
+        
+        // 33, int face_sqrt
+        clSetKernelArg(context.opencl.transformation_kernel, 33, sizeof(int), &face_sqrt);
 
         debug("running kernel...");
 
-        clEnqueueNDRangeKernel(context.opencl.queue, context.opencl.transformation_kernel, 1, NULL, t_global_work_size, t_local_work_size, 0, NULL, &event);
+        clEnqueueNDRangeKernel(context.opencl.queue, context.opencl.transformation_kernel, 2, NULL, t_global_work_size, t_local_work_size, 0, NULL, &event);
         clFinish(context.opencl.queue);
 
         clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(start), &start, NULL);
@@ -796,9 +801,11 @@ int RI_init(){
     // clSetKernelArg(context.opencl.transformation_kernel, 30, sizeof(double), &camera_r_y);
     // // 31, double camera_r_z
     // clSetKernelArg(context.opencl.transformation_kernel, 31, sizeof(double), &camera_r_z);
+    
     // // 32, int renderable_face_offset
     // clSetKernelArg(context.opencl.transformation_kernel, 32, sizeof(int), &renderable_face_offset);
-
+    // // 33, int face_sqrt
+    // clSetKernelArg(context.opencl.transformation_kernel, 33, sizeof(int), &face_sqrt);
 
     RI_load_mesh("objects/cube.obj", context.defaults.default_actor);