// rasteriver.c #include #include #include "../headers/rasteriver.h" #include "../headers/memory.h" #include "../headers/pitmap.h" #include cl_int clerror; cl_event clevent; RI_context context; #define RI_realloc(__ptr, __size) written_RI_realloc(__ptr, __size, __func__, __LINE__, context) #define RI_malloc(__size) written_RI_malloc(__size, __func__, __LINE__, context) #define RI_calloc(__nmemb, __size) written_RI_calloc(__nmemb, __size, __func__, __LINE__, context) #define RI_free(__ptr) written_RI_free(__ptr, __func__, __LINE__, context) #define PI 3.14159265359 #define PI2 1.57079632679 void debug(char *string, int debug_flag, ...){ if (!(context.debug_flags & debug_flag)) return; va_list args; va_start(args, debug_flag); char message[500]; strcpy(message, context.debug_prefix); strcat(message, string); vprintf(message, args); printf("\n"); va_end(args); } RI_texture* RI_load_image(char* filename){ PM_image* image = PM_load_image(filename); RI_texture* texture = RI_malloc(sizeof(RI_texture)); int previous_length_of_textures_array = context.opencl.length_of_textures_array; texture->width = image->width; texture->height = image->height; texture->index = previous_length_of_textures_array; context.opencl.length_of_textures_array += image->width * image->height; context.opencl.textures = RI_realloc(context.opencl.textures, context.opencl.length_of_textures_array * sizeof(uint32_t)); printf("%d\n", previous_length_of_textures_array); memcpy(context.opencl.textures + previous_length_of_textures_array, image->frame_buffer, sizeof(uint32_t) * image->width * image->height); if (context.opencl.textures_mem_buffer) clReleaseMemObject(context.opencl.textures_mem_buffer); context.opencl.textures_mem_buffer = clCreateBuffer( context.opencl.context, CL_MEM_READ_WRITE, sizeof(uint32_t) * context.opencl.length_of_textures_array, NULL, NULL ); clEnqueueWriteBuffer( context.opencl.queue, context.opencl.textures_mem_buffer, CL_TRUE, 0, sizeof(uint32_t) * context.opencl.length_of_textures_array, context.opencl.textures, 0, NULL, NULL ); clFinish(context.opencl.queue); clSetKernelArg( context.opencl.rasterization_kernel, 1, sizeof(cl_mem), &context.opencl.textures_mem_buffer ); free(image->frame_buffer); free(image); return texture; } RI_material *RI_new_material(){ RI_material *new_material = RI_malloc(sizeof(RI_material)); new_material->albedo = 0xFFFF00FF; return new_material; } RI_actor *RI_new_actor(){ RI_actor *new_actor = RI_malloc(sizeof(RI_actor)); if (context.defaults.default_actor){ *new_actor = *context.defaults.default_actor; } else { new_actor->position = (RI_vector_3){0, 0, 0}; new_actor->scale = (RI_vector_3){1, 1, 1}; new_actor->rotation = (RI_vector_4){1, 0, 0, 0}; new_actor->active = 1; } return new_actor; } RI_scene *RI_new_scene(){ RI_scene *new_scene = RI_malloc(sizeof(RI_scene)); new_scene->camera.FOV = PI2; new_scene->camera.max_clip = 100000; new_scene->camera.min_clip = 0.01; new_scene->camera.position = (RI_vector_3){0, 0, 0}; new_scene->camera.rotation = (RI_vector_4){1, 0, 0, 0}; return new_scene; } RI_mesh *RI_load_mesh(char *filename){ clock_t start_time, end_time; start_time = clock(); RI_mesh *new_mesh = RI_malloc(sizeof(RI_mesh)); int previous_face_count = context.opencl.face_count; int previous_vertecies_count = context.opencl.vertex_count; int previous_normals_count = context.opencl.normal_count; int previous_uvs_count = context.opencl.uv_count; FILE *file = fopen(filename, "r"); if (!file){ debug("[Mesh Loader] Error! File \"%s\" not found", RI_DEBUG_MESH_LOADER_ERROR, filename); return NULL; } char line[512]; int face_count = 0; int object_face_count = 0; int object_vertecies_count = 0; int object_normals_count = 0; int object_uvs_count = 0; while (fgets(line, sizeof(line), file)) { if (line[0] == 'f' && line[1] == ' ') { // face ++face_count; ++context.opencl.face_count; ++object_face_count; } else if (line[0] == 'v'){ if (line[1] == ' ') { // vertex ++context.opencl.vertex_count; ++object_vertecies_count; } else if (line[1] == 'n') { // normal ++context.opencl.normal_count; ++object_normals_count; } else if (line[1] == 't') { // UV ++context.opencl.uv_count; ++object_uvs_count; } } } rewind(file); debug( "%d faces %d vertecies %d normals %d uvs", RI_DEBUG_MESH_LOADER_FACE_VERT_NORM_UV_COUNT, object_face_count, object_vertecies_count, object_normals_count, object_uvs_count ); context.opencl.faces = RI_realloc(context.opencl.faces, sizeof(RI_face) * context.opencl.face_count); context.opencl.temp_faces = RI_malloc(sizeof(RI_face) * object_face_count); context.opencl.temp_vertecies = RI_malloc(sizeof(RI_vector_3) * object_vertecies_count); if (object_normals_count > 0) context.opencl.temp_normals = RI_malloc( sizeof(RI_vector_3) * object_normals_count ); if (object_uvs_count > 0) context.opencl.temp_uvs = RI_malloc(sizeof(RI_vector_2) * object_uvs_count); int current_face_index = 0; int current_vertex_index = 0; int current_normal_index = 0; int current_uv_index = 0; int has_normals, has_uvs; has_normals = has_uvs = 0; while (fgets(line, sizeof(line), file)) { if (line[0] == 'f' && line[1] == ' ') { int vertex_0_index, vertex_1_index, vertex_2_index, normal_0_index, normal_1_index, normal_2_index, uv_0_index, uv_1_index, uv_2_index ; int matches = sscanf(line, "f %d/%d/%d %d/%d/%d %d/%d/%d/", &vertex_0_index, &uv_0_index, &normal_0_index, &vertex_1_index, &uv_1_index, &normal_1_index, &vertex_2_index, &uv_2_index, &normal_2_index); if (matches != 9){ vertex_0_index = -1; vertex_1_index = -1; vertex_2_index = -1; normal_0_index = -1; normal_1_index = -1; normal_2_index = -1; uv_0_index = -1; uv_1_index = -1; uv_2_index = -1; if (strchr(line, '/')){ sscanf(line, "f %d//%d %d//%d %d//%d", &vertex_0_index, &normal_0_index, &vertex_1_index, &normal_1_index, &vertex_2_index, &normal_2_index); has_normals = 1; } else { sscanf(line, "f %d %d %d", &vertex_0_index, &vertex_1_index, &vertex_2_index); } } else { has_normals = has_uvs = 1; } context.opencl.temp_faces[current_face_index].position_0_index = vertex_0_index - 1; context.opencl.temp_faces[current_face_index].position_1_index = vertex_1_index - 1; context.opencl.temp_faces[current_face_index].position_2_index = vertex_2_index - 1; context.opencl.temp_faces[current_face_index].normal_0_index = normal_0_index - 1; context.opencl.temp_faces[current_face_index].normal_1_index = normal_1_index - 1; context.opencl.temp_faces[current_face_index].normal_2_index = normal_2_index - 1; context.opencl.temp_faces[current_face_index].uv_0_index = uv_0_index - 1; context.opencl.temp_faces[current_face_index].uv_1_index = uv_1_index - 1; context.opencl.temp_faces[current_face_index].uv_2_index = uv_2_index - 1; context.opencl.faces[current_face_index].should_render = 1; ++current_face_index; } else if (line[0] == 'v' && line[1] == ' ') { double x, y, z; sscanf(line, "v %lf %lf %lf", &x, &y, &z); context.opencl.temp_vertecies[current_vertex_index].x = x; context.opencl.temp_vertecies[current_vertex_index].y = y; context.opencl.temp_vertecies[current_vertex_index].z = z; ++current_vertex_index; } else if (line[0] == 'v' && line[1] == 'n') { double x, y, z; sscanf(line, "vn %lf %lf %lf", &x, &y, &z); context.opencl.temp_normals[current_normal_index].x = x; context.opencl.temp_normals[current_normal_index].y = y; context.opencl.temp_normals[current_normal_index].z = z; ++current_normal_index; } else if (line[0] == 'v' && line[1] == 't') { double x, y, z; sscanf(line, "vt %lf %lf %lf", &x, &y, &z); context.opencl.temp_uvs[current_uv_index].x = x; context.opencl.temp_uvs[current_uv_index].y = y; // UVS are almost always 2D so we don't need Z (the type itself is a vector 2f, not 3f) ++current_uv_index; } } for (int i = 0; i < object_face_count; ++i){ context.opencl.faces[i + previous_face_count].position_0 = context.opencl.temp_vertecies[context.opencl.temp_faces[i].position_0_index]; context.opencl.faces[i + previous_face_count].position_1 = context.opencl.temp_vertecies[context.opencl.temp_faces[i].position_1_index]; context.opencl.faces[i + previous_face_count].position_2 = context.opencl.temp_vertecies[context.opencl.temp_faces[i].position_2_index]; context.opencl.faces[i + previous_face_count].normal_0 = context.opencl.temp_normals[context.opencl.temp_faces[i].normal_0_index]; context.opencl.faces[i + previous_face_count].normal_1 = context.opencl.temp_normals[context.opencl.temp_faces[i].normal_1_index]; context.opencl.faces[i + previous_face_count].normal_2 = context.opencl.temp_normals[context.opencl.temp_faces[i].normal_2_index]; context.opencl.faces[i + previous_face_count].uv_0 = context.opencl.temp_uvs[context.opencl.temp_faces[i].uv_0_index]; context.opencl.faces[i + previous_face_count].uv_1 = context.opencl.temp_uvs[context.opencl.temp_faces[i].uv_1_index]; context.opencl.faces[i + previous_face_count].uv_2 = context.opencl.temp_uvs[context.opencl.temp_faces[i].uv_2_index]; } free(context.opencl.temp_faces); free(context.opencl.temp_vertecies); if (object_normals_count > 0) free(context.opencl.temp_normals); if (object_uvs_count > 0) free(context.opencl.temp_uvs); char* loading_mesh_notice_string; if (has_normals && !has_uvs) loading_mesh_notice_string = "normals"; else if (!has_normals && has_uvs) loading_mesh_notice_string = "UVs"; else if (!has_normals && !has_uvs) loading_mesh_notice_string = "normals and UVs"; if (!has_normals || !has_uvs) debug( "[Mesh Loader] Notice! Mesh \"%s\" is missing %s", RI_DEBUG_MESH_LOADER_ERROR, filename, loading_mesh_notice_string ); new_mesh->has_normals = has_normals; new_mesh->has_uvs = has_uvs; new_mesh->face_count = object_face_count; new_mesh->face_index = previous_face_count; debug( "[Mesh Loader] Loaded mesh \"%s\"! %d faces, %d verticies, %d normals, %d uvs", RI_DEBUG_MESH_LOADER_LOADED_MESH, filename, current_face_index, current_vertex_index, current_normal_index, current_uv_index ); clFinish(context.opencl.queue); if (previous_face_count != context.opencl.face_count) { if (context.opencl.faces_mem_buffer) clReleaseMemObject(context.opencl.faces_mem_buffer); context.opencl.faces_mem_buffer = clCreateBuffer( context.opencl.context, CL_MEM_READ_WRITE, sizeof(RI_face) * context.opencl.face_count, NULL, NULL ); clEnqueueWriteBuffer( context.opencl.queue, context.opencl.faces_mem_buffer, CL_TRUE, 0, sizeof(RI_face) * context.opencl.face_count, context.opencl.faces, 0, NULL, NULL ); clFinish(context.opencl.queue); clSetKernelArg( context.opencl.transformation_kernel, 0, sizeof(cl_mem), &context.opencl.faces_mem_buffer ); if (context.opencl.face_count * 2 > context.opencl.length_of_renderable_faces_array){ debug( "old renderable faces count (%d) less than current (%d). Reallocating...", RI_DEBUG_MESH_LOADER_REALLOCATION, context.opencl.length_of_renderable_faces_array, context.opencl.face_count * 2 ); context.opencl.length_of_renderable_faces_array = context.opencl.face_count * 2; debug( "reallocating %f mb (%d renderable faces)", RI_DEBUG_MESH_LOADER_REALLOCATION, sizeof(RI_renderable_face) * context.opencl.length_of_renderable_faces_array / 1048576.0, context.opencl.length_of_renderable_faces_array ); context.opencl.faces_to_render = RI_malloc( sizeof(RI_renderable_face) * context.opencl.length_of_renderable_faces_array ); clerror = clReleaseMemObject(context.opencl.renderable_faces_mem_buffer); if (clerror != CL_SUCCESS){ debug("couldn't free renderable faces memory buffer (error %d)", RI_DEBUG_MESH_LOADER_ERROR, clerror); exit(1); } context.opencl.renderable_faces_mem_buffer = clCreateBuffer( context.opencl.context, CL_MEM_READ_WRITE, sizeof(RI_renderable_face) * context.opencl.length_of_renderable_faces_array, NULL, &clerror ); if (clerror != CL_SUCCESS){ debug("couldn't reallocate renderable faces memory buffer (error %d)", RI_DEBUG_MESH_LOADER_ERROR, clerror); exit(1); } } } fclose(file); end_time = clock(); debug("loading mesh took %lf seconds", RI_DEBUG_MESH_LOADER_TIME, (double)(end_time - start_time) / CLOCKS_PER_SEC); return new_mesh; } void RI_render(RI_scene *scene){ clock_t start_time, end_time; start_time = clock(); debug("---FRAME START-------------------------------------------\n", RI_DEBUG_FRAME_START_END_MARKERS ); // transformer double horizontal_fov_factor = (double)context.window.width / tanf(0.5 * scene->camera.FOV); double vertical_fov_factor = (double)context.window.height / tanf(0.5 * scene->camera.FOV); if (context.window.aspect_mode == RI_ASPECT_MODE_LETTERBOX) horizontal_fov_factor /= horizontal_fov_factor / vertical_fov_factor; // kernel args // 18, double horizontal_fov_factor clSetKernelArg(context.opencl.transformation_kernel, 18, sizeof(double), &horizontal_fov_factor); // 19, double vertical_fov_factor clSetKernelArg(context.opencl.transformation_kernel, 19, sizeof(double), &vertical_fov_factor); // 20, double min_clip clSetKernelArg(context.opencl.transformation_kernel, 20, sizeof(float), &scene->camera.min_clip); // 21, double max_clip clSetKernelArg(context.opencl.transformation_kernel, 21, sizeof(float), &scene->camera.max_clip); // 22, double camera_x clSetKernelArg(context.opencl.transformation_kernel, 22, sizeof(double), &scene->camera.position.x); // 23, double camera_y clSetKernelArg(context.opencl.transformation_kernel, 23, sizeof(double), &scene->camera.position.y); // 24, double camera_z clSetKernelArg(context.opencl.transformation_kernel, 24, sizeof(double), &scene->camera.position.z); // 25, double camera_r_w clSetKernelArg(context.opencl.transformation_kernel, 25, sizeof(double), &scene->camera.rotation.w); // 26, double camera_r_x clSetKernelArg(context.opencl.transformation_kernel, 26, sizeof(double), &scene->camera.rotation.x); // 27, double camera_r_y clSetKernelArg(context.opencl.transformation_kernel, 27, sizeof(double), &scene->camera.rotation.y); // 28, double camera_r_z clSetKernelArg(context.opencl.transformation_kernel, 28, sizeof(double), &scene->camera.rotation.z); int local_group_size_x = 16; int local_group_size_y = 16; // count faces scene->face_count = 0; for (int actor_index = 0; actor_index < scene->length_of_actors_array; ++actor_index){ scene->face_count += scene->actors[actor_index]->mesh->face_count; } // allocate faces_to_render if face count increases if (scene->face_count * 2 > context.opencl.length_of_renderable_faces_array){ context.opencl.faces_to_render = RI_realloc( context.opencl.faces_to_render, sizeof(RI_renderable_face) * scene->face_count * 2 ); // x2 because faces can be split context.opencl.length_of_renderable_faces_array = scene->face_count * 2; debug( "old renderable faces count (%d) less than current (%d). Reallocating %f mb", RI_DEBUG_RENDER_REALLOCATION, context.opencl.length_of_renderable_faces_array, scene->face_count * 2, sizeof(RI_renderable_face) * context.opencl.length_of_renderable_faces_array / 1048576.0 ); context.opencl.faces_to_render = RI_malloc( sizeof(RI_renderable_face) * context.opencl.length_of_renderable_faces_array ); clerror = clReleaseMemObject(context.opencl.renderable_faces_mem_buffer); if (clerror != CL_SUCCESS){ debug("couldn't free renderable faces memory buffer (error %d)", RI_DEBUG_RENDER_ERROR, clerror); exit(1); } context.opencl.renderable_faces_mem_buffer = clCreateBuffer( context.opencl.context, CL_MEM_READ_WRITE, sizeof(RI_renderable_face) * context.opencl.length_of_renderable_faces_array, NULL, &clerror ); if (clerror != CL_SUCCESS){ debug("couldn't reallocate renderable faces memory buffer (error %d)", RI_DEBUG_RENDER_ERROR, clerror); exit(1); } } // set faces_to_render to zero memset(context.opencl.faces_to_render, 0, sizeof(RI_renderable_face) * scene->face_count * 2); clSetKernelArg( context.opencl.transformation_kernel, 1, sizeof(cl_mem), &context.opencl.renderable_faces_mem_buffer ); context.current_renderable_face_index = 0; context.current_split_renderable_face_index = 0; debug("transforming polygons...", RI_DEBUG_TRANSFORMER_MESSAGE); int renderable_face_index = 0; cl_ulong start, end; // transform polygons for (int actor_index = 0; actor_index < scene->length_of_actors_array; ++actor_index){ RI_actor *actor = scene->actors[actor_index]; debug("actor index: %d face count: %d", RI_DEBUG_TRANSFORMER_CURRENT_ACTOR, actor_index, actor->mesh->face_count ); if (scene->actors[actor_index]->mesh->face_count <= 0) continue; int face_sqrt = ceil(sqrt(scene->actors[actor_index]->mesh->face_count)); int local_t_size = (int)fmin(face_sqrt, local_group_size_x); const size_t t_global_work_size[2] = { local_t_size * ceil(face_sqrt / (float)local_t_size), local_t_size * ceil(face_sqrt / (float)local_t_size) }; const size_t t_local_work_size[2] = { (int)fmin(face_sqrt, local_group_size_x), (int)fmin(face_sqrt, local_group_size_y) }; debug("transformer global work size: {%d, %d}", RI_DEBUG_TRANSFORMER_GLOBAL_SIZE, t_global_work_size[0], t_global_work_size[1] ); debug("transformer local work size: {%d, %d}", RI_DEBUG_TRANSFORMER_LOCAL_SIZE, t_local_work_size[0], t_local_work_size[1] ); debug( "(%d extra work items; %d items (%dx%d) - %d faces)", RI_DEBUG_TRANSFORMER_EXTRA_WORK_ITEMS, t_global_work_size[0] * t_global_work_size[1] - scene->actors[actor_index]->mesh->face_count, t_global_work_size[0] * t_global_work_size[1], t_global_work_size[0], t_global_work_size[1], scene->actors[actor_index]->mesh->face_count ); // 2, double actor_x clSetKernelArg(context.opencl.transformation_kernel, 2, sizeof(double), &actor->position.x); // 3, double actor_y clSetKernelArg(context.opencl.transformation_kernel, 3, sizeof(double), &actor->position.y); // 4, double actor_z clSetKernelArg(context.opencl.transformation_kernel, 4, sizeof(double), &actor->position.z); // 5, double actor_r_w clSetKernelArg(context.opencl.transformation_kernel, 5, sizeof(double), &actor->rotation.w); // 6, double actor_r_x clSetKernelArg(context.opencl.transformation_kernel, 6, sizeof(double), &actor->rotation.x); // 7, double actor_r_y clSetKernelArg(context.opencl.transformation_kernel, 7, sizeof(double), &actor->rotation.y); // 8, double actor_r_z clSetKernelArg(context.opencl.transformation_kernel, 8, sizeof(double), &actor->rotation.z); // 9, double actor_s_x clSetKernelArg(context.opencl.transformation_kernel, 9, sizeof(double), &actor->scale.x); // 10, double actor_s_y clSetKernelArg(context.opencl.transformation_kernel, 10, sizeof(double), &actor->scale.y); // 11, double actor_s_z clSetKernelArg(context.opencl.transformation_kernel, 11, sizeof(double), &actor->scale.z); // 12, int has_normals clSetKernelArg(context.opencl.transformation_kernel, 12, sizeof(int), &actor->mesh->has_normals); // 13, int has_uvs clSetKernelArg(context.opencl.transformation_kernel, 13, sizeof(int), &actor->mesh->has_uvs); // 14, int face_array_offset_index clSetKernelArg(context.opencl.transformation_kernel, 14, sizeof(int), &actor->mesh->face_index); // 15, int face_count clSetKernelArg(context.opencl.transformation_kernel, 15, sizeof(int), &actor->mesh->face_count); // 29, int renderable_face_offset clSetKernelArg(context.opencl.transformation_kernel, 29, sizeof(int), &renderable_face_index); // 30, int face_sqrt clSetKernelArg(context.opencl.transformation_kernel, 30, sizeof(int), &face_sqrt); debug("texture width: %d texture height: %d texture index %d", RI_DEBUG_TRANSFORMER_TEXTURE, actor->texture->width, actor->texture->height, actor->texture->index); // 31: uint16_t texture_width clSetKernelArg(context.opencl.transformation_kernel, 31, sizeof(uint16_t), &actor->texture->width); // 32: uint16_t texture_height clSetKernelArg(context.opencl.transformation_kernel, 32, sizeof(uint16_t), &actor->texture->height); // 33: uint32_t texture_index clSetKernelArg(context.opencl.transformation_kernel, 33, sizeof(uint32_t), &actor->texture->index); debug("running actor #%d's transformation kernel...", RI_DEBUG_TRANSFORMER_MESSAGE, actor_index ); clerror = clEnqueueNDRangeKernel( context.opencl.queue, context.opencl.transformation_kernel, 2, NULL, t_global_work_size, t_local_work_size, 0, NULL, &clevent ); if (clerror != CL_SUCCESS) debug("error enqueing kernel (%d)", RI_DEBUG_TRANSFORMER_ERROR, clerror ); clFinish(context.opencl.queue); clGetEventProfilingInfo(clevent, CL_PROFILING_COMMAND_START, sizeof(start), &start, NULL); clGetEventProfilingInfo(clevent, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL); double ns = (double)(end - start); debug("actor #%d's transformation kernel took %f seconds\n", RI_DEBUG_TRANSFORMER_TIME, actor_index, ns / 1e9 ); renderable_face_index += actor->mesh->face_count * 2; } // rasterize // set width, height and kernel width, height int x = context.window.width; int y = context.window.height; int x_div_32 = ceil(context.window.width / (float)local_group_size_x); int y_div_32 = ceil(context.window.height / (float)local_group_size_y); if (context.window.width % local_group_size_x != 0) x = local_group_size_x * x_div_32; if (context.window.height % local_group_size_y != 0) y = local_group_size_y * y_div_32; const size_t r_global_work_size[2] = {x, y}; const size_t r_local_work_size[2] = {local_group_size_x, local_group_size_y}; debug("rasterizer global work size: {%d, %d}", RI_DEBUG_RASTERIZER_GLOBAL_SIZE, x, y ); debug("rasterizer local work size: {%d, %d}", RI_DEBUG_RASTERIZER_GLOBAL_SIZE, local_group_size_x, local_group_size_y ); // kernel args clSetKernelArg( context.opencl.rasterization_kernel, 0, sizeof(cl_mem), &context.opencl.renderable_faces_mem_buffer ); clSetKernelArg(context.opencl.rasterization_kernel, 7, sizeof(int), &scene->face_count); clSetKernelArg( context.opencl.rasterization_kernel, 8, sizeof(int), &context.current_split_renderable_face_index ); debug("rasterizing...", RI_DEBUG_RASTERIZER_MESSAGE); // run raster kernel clEnqueueNDRangeKernel( context.opencl.queue, context.opencl.rasterization_kernel, 2, NULL, r_global_work_size, r_local_work_size, 0, NULL, &clevent ); clFinish(context.opencl.queue); clGetEventProfilingInfo(clevent, CL_PROFILING_COMMAND_START, sizeof(start), &start, NULL); clGetEventProfilingInfo(clevent, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL); debug("rasterization kernel took %f seconds\n", RI_DEBUG_RASTERIZER_TIME, (double)(end - start) / 1e9 ); // put GPU frame buffer into CPU clEnqueueReadBuffer( context.opencl.queue, context.opencl.frame_buffer_mem_buffer, CL_TRUE, 0, context.window.width * context.window.height * sizeof(uint32_t), context.sdl.frame_buffer, 0, NULL, &clevent ); clFinish(context.opencl.queue); clGetEventProfilingInfo(clevent, CL_PROFILING_COMMAND_START, sizeof(start), &start, NULL); clGetEventProfilingInfo(clevent, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL); debug("reading GPU frame buffer took %f seconds", RI_DEBUG_RENDER_FRAME_BUFFER_READ_TIME, (double)(end - start) / 1e9 ); context.opencl.number_of_faces_just_rendered += scene->face_count; end_time = clock(); debug("rendering took %lf seconds", RI_DEBUG_RENDER_TIME, (double)(end_time - start_time) / CLOCKS_PER_SEC ); } void RI_tick(){ clock_t start_time, end_time; start_time = clock(); SDL_Event event; while (SDL_PollEvent(&event)){ switch (event.type){ case SDL_QUIT: { context.is_running = ri_false; break; } default: { break; } } } SDL_LockTexture( context.sdl.frame_buffer_texture, NULL, (void*)&context.sdl.frame_buffer, &context.sdl.pitch ); SDL_UnlockTexture(context.sdl.frame_buffer_texture); SDL_RenderCopy(context.sdl.renderer, context.sdl.frame_buffer_texture, NULL, NULL); SDL_RenderPresent(context.sdl.renderer); ++context.current_frame; end_time = clock(); debug("Done! ticking took %lf seconds", RI_DEBUG_TICK_TIME, (double)(end_time - start_time) / CLOCKS_PER_SEC ); debug("---FRAME END---(frame #%d, %d polygons)----------------\n", RI_DEBUG_FRAME_START_END_MARKERS, context.current_frame, context.opencl.number_of_faces_just_rendered ); context.opencl.number_of_faces_just_rendered = 0; return; } RI_context *RI_get_context(){ context.sdl = (RI_SDL){NULL, NULL, NULL, NULL, -1}; context.opencl = (RI_CL){NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, 1, 0, 0}; context.window = (RI_window){800, 800, 400, 400, "RasterIver Window", RI_ASPECT_MODE_LETTERBOX}; context.debug_flags = RI_DEBUG_ERRORS; context.current_frame = 0; context.is_running = ri_true; context.debug_prefix = "[RasterIver] "; return &context; } // Convert a CL file to a string char *load_kernel_source(const char *filename) { FILE *f = fopen(filename, "rb"); if (f == NULL){ debug("couldn't open kernel file \"%s\"", RI_DEBUG_KERNEL_LOADER_ERROR, filename ); } fseek(f, 0, SEEK_END); size_t size = ftell(f); rewind(f); char *source = malloc(size + 1); fread(source, 1, size, f); source[size] = '\0'; fclose(f); return source; } int RI_init(){ context.window.half_width = context.window.width / 2; context.window.half_height = context.window.height / 2; // init SDL context.sdl.window = SDL_CreateWindow( context.window.title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, context.window.width, context.window.height, 0 ); context.sdl.renderer = SDL_CreateRenderer(context.sdl.window, -1, SDL_RENDERER_ACCELERATED); context.sdl.frame_buffer_texture = SDL_CreateTexture( context.sdl.renderer, SDL_PIXELFORMAT_BGRA8888, SDL_TEXTUREACCESS_STREAMING, context.window.width, context.window.height ); context.sdl.frame_buffer = malloc( sizeof(uint32_t) * context.window.width * context.window.height ); if (!context.debug_flags) context.debug_flags = RI_DEBUG_ERRORS; context.defaults.default_actor = RI_new_actor(); // init OpenCL context.opencl.faces_to_render = RI_malloc( sizeof(RI_renderable_face) * context.opencl.length_of_renderable_faces_array ); context.opencl.faces = RI_malloc(sizeof(RI_face) * context.opencl.face_count); cl_uint num_platforms = 0; clerror = clGetPlatformIDs(0, NULL, &num_platforms); cl_platform_id *platforms = malloc(sizeof(cl_platform_id) * num_platforms); clerror = clGetPlatformIDs(num_platforms, platforms, NULL); cl_device_id *devices; cl_platform_id chosen_platform = NULL; char pname[256]; for (cl_uint i = 0; i < num_platforms; i++) { clerror = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(pname), pname, NULL); debug("get platform info result: %d", RI_DEBUG_INIT_PLATFORMS, clerror); cl_uint num_devices = 0; clerror = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); debug("num devices result: %d", RI_DEBUG_INIT_PLATFORMS, clerror); devices = malloc(sizeof(cl_device_id) * num_devices); clerror = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); if (i == 0){ context.opencl.device = devices[0]; } debug("get device ids result: %d", RI_DEBUG_INIT_PLATFORMS, clerror); debug("-platform #%d: NAME: %s | DEVICE COUNT: %d", RI_DEBUG_INIT_PLATFORMS, i, pname, num_devices); for (cl_uint j = 0; j < num_devices; ++j){ debug("-\\ device #%d: ID: %u", RI_DEBUG_INIT_PLATFORMS, j, devices[j]); } free(devices); } context.opencl.platform = platforms[0]; debug("chosen device id: %u", RI_DEBUG_INIT_PLATFORMS, context.opencl.device); context.opencl.context = clCreateContext(NULL, 1, &context.opencl.device, NULL, NULL, NULL); context.opencl.queue = clCreateCommandQueueWithProperties( context.opencl.context, context.opencl.device, (const cl_queue_properties[]){CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}, &clerror ); if (!context.opencl.context){ debug("failed to create OpenCL context", RI_DEBUG_OPENCL_ERROR); exit(1); } // build programs char *program_source = load_kernel_source("src/kernels/kernels.cl"); cl_program rasterization_program = clCreateProgramWithSource( context.opencl.context, 1, (const char**)&program_source, NULL, NULL ); free(program_source); cl_int result = clBuildProgram(rasterization_program, 1, &context.opencl.device, "", NULL, NULL); if (result != CL_SUCCESS){ char log[10001]; clGetProgramBuildInfo( rasterization_program, context.opencl.device, CL_PROGRAM_BUILD_LOG, 10000, log, NULL); debug("rasterization program build failed (%d). Log: \n %s", RI_DEBUG_OPENCL_ERROR, result, log); return 1; } // kernels context.opencl.rasterization_kernel = clCreateKernel(rasterization_program, "rasterizer", &clerror); if (clerror != CL_SUCCESS){ debug("couldn't create rasterizer kernel", RI_DEBUG_OPENCL_ERROR); return 1; } context.opencl.transformation_kernel = clCreateKernel(rasterization_program, "transformer", &clerror); if (clerror != CL_SUCCESS){ debug("couldn't create transformer kernel", RI_DEBUG_OPENCL_ERROR); return 1; } // rasterizer context.opencl.renderable_faces_mem_buffer = clCreateBuffer( context.opencl.context, CL_MEM_READ_WRITE, sizeof(RI_renderable_face) * context.opencl.length_of_renderable_faces_array, NULL, &clerror); if (clerror != CL_SUCCESS){ debug("couldn't create renderable faces memory buffer", RI_DEBUG_OPENCL_ERROR); return 1; } context.opencl.frame_buffer_mem_buffer = clCreateBuffer( context.opencl.context, CL_MEM_READ_WRITE, sizeof(uint32_t) * context.window.width * context.window.height, NULL, &clerror); if (clerror != CL_SUCCESS || !context.opencl.frame_buffer_mem_buffer){ debug("couldn't create frame buffer memory buffer", RI_DEBUG_OPENCL_ERROR); return 1; } context.opencl.textures_mem_buffer = clCreateBuffer( context.opencl.context, CL_MEM_READ_WRITE, 1, NULL, &clerror); if (clerror != CL_SUCCESS || !context.opencl.textures_mem_buffer){ debug("couldn't create textures memory buffer", RI_DEBUG_OPENCL_ERROR); return 1; } // rasterizer(__global RI_renderable_face *renderable_faces, __global uint *frame_buffer, // int width, int height, int half_width, int half_height, int number_of_renderable_faces, // int number_of_split_renderable_faces) clSetKernelArg(context.opencl.rasterization_kernel, 0, sizeof(cl_mem), &context.opencl.renderable_faces_mem_buffer); clSetKernelArg(context.opencl.rasterization_kernel, 1, sizeof(cl_mem), &context.opencl.textures_mem_buffer); clSetKernelArg(context.opencl.rasterization_kernel, 2, sizeof(cl_mem), &context.opencl.frame_buffer_mem_buffer); clSetKernelArg(context.opencl.rasterization_kernel, 3, sizeof(int), &context.window.width); clSetKernelArg(context.opencl.rasterization_kernel, 4, sizeof(int), &context.window.height); clSetKernelArg(context.opencl.rasterization_kernel, 5, sizeof(int), &context.window.half_width); clSetKernelArg(context.opencl.rasterization_kernel, 6, sizeof(int), &context.window.half_height); clSetKernelArg(context.opencl.rasterization_kernel, 7, sizeof(int), &context.current_renderable_face_index); clSetKernelArg(context.opencl.rasterization_kernel, 8, sizeof(int), &context.current_split_renderable_face_index); // transformer // transformer // __global RI_face *faces, __global RI_vector_3 *vertecies, // __global RI_vector_3 *normals, __global RI_vector_2 *uvs, // __global RI_renderable_face *renderable_faces, // double actor_x, double actor_y, double actor_z, // double actor_r_w, double actor_r_x, double actor_r_y, double actor_r_z, // double actor_s_x, double actor_s_y, double actor_s_z, // int has_normals, int has_uvs, int face_array_offset_index, int face_count, // int width, int height, double horizontal_fov_factor, double vertical_fov_factor, // double min_clip, double max_clip, // double camera_x, double camera_y, double camera_z, // double camera_r_w, double camera_r_x, double camera_r_y, double camera_r_z // // 0: __global RI_face *faces // clSetKernelArg(context.opencl.transformation_kernel, 0, sizeof(cl_mem), &context.opencl.faces_mem_buffer); // // 1: __global RI_renderable_face *renderable_faces // clSetKernelArg(context.opencl.transformation_kernel, 1, sizeof(cl_mem), &context.opencl.renderable_faces_mem_buffer); // // 2: double actor_x // clSetKernelArg(context.opencl.transformation_kernel, 2, sizeof(double), &actor_x); // // 3: double actor_y // clSetKernelArg(context.opencl.transformation_kernel, 3, sizeof(double), &actor_y); // // 4: double actor_z // clSetKernelArg(context.opencl.transformation_kernel, 4, sizeof(double), &actor_z); // // 5: double actor_r_w // clSetKernelArg(context.opencl.transformation_kernel, 5, sizeof(double), &actor_r_w); // // 6: double actor_r_x // clSetKernelArg(context.opencl.transformation_kernel, 6, sizeof(double), &actor_r_x); // // 7: double actor_r_y // clSetKernelArg(context.opencl.transformation_kernel, 7, sizeof(double), &actor_r_y); // // 8: double actor_r_z // clSetKernelArg(context.opencl.transformation_kernel, 8, sizeof(double), &actor_r_z); // // 9: double actor_s_x // clSetKernelArg(context.opencl.transformation_kernel, 9, sizeof(double), &actor_s_x); // // 10: double actor_s_y // clSetKernelArg(context.opencl.transformation_kernel, 10, sizeof(double), &actor_s_y); // // 11: double actor_s_z // clSetKernelArg(context.opencl.transformation_kernel, 11, sizeof(double), &actor_s_z); // // 12: int has_normals // clSetKernelArg(context.opencl.transformation_kernel, 12, sizeof(int), &has_normals); // // 13: int has_uvs // clSetKernelArg(context.opencl.transformation_kernel, 13, sizeof(int), &has_uvs); // // 14: int face_array_offset_index // clSetKernelArg(context.opencl.transformation_kernel, 14, sizeof(int), &face_array_offset_index); // // 15: int face_count // clSetKernelArg(context.opencl.transformation_kernel, 15, sizeof(int), &face_count); // // 16: int width clSetKernelArg(context.opencl.transformation_kernel, 16, sizeof(int), &context.window.width); // // 17: int height clSetKernelArg(context.opencl.transformation_kernel, 17, sizeof(int), &context.window.height); // // 18: double horizontal_fov_factor // clSetKernelArg(context.opencl.transformation_kernel, 18, sizeof(double), &horizontal_fov_factor); // // 19: double vertical_fov_factor // clSetKernelArg(context.opencl.transformation_kernel, 19, sizeof(double), &vertical_fov_factor); // // 20: float min_clip // clSetKernelArg(context.opencl.transformation_kernel, 20, sizeof(float), &min_clip_f); // // 21: float max_clip // clSetKernelArg(context.opencl.transformation_kernel, 21, sizeof(float), &max_clip_f); // // 22: double camera_x // clSetKernelArg(context.opencl.transformation_kernel, 22, sizeof(double), &camera_x); // // 23: double camera_y // clSetKernelArg(context.opencl.transformation_kernel, 23, sizeof(double), &camera_y); // // 24: double camera_z // clSetKernelArg(context.opencl.transformation_kernel, 24, sizeof(double), &camera_z); // // 25: double camera_r_w // clSetKernelArg(context.opencl.transformation_kernel, 25, sizeof(double), &camera_r_w); // // 26: double camera_r_x // clSetKernelArg(context.opencl.transformation_kernel, 26, sizeof(double), &camera_r_x); // // 27: double camera_r_y // clSetKernelArg(context.opencl.transformation_kernel, 27, sizeof(double), &camera_r_y); // // 28: double camera_r_z // clSetKernelArg(context.opencl.transformation_kernel, 28, sizeof(double), &camera_r_z); // // 29: int renderable_face_offset // clSetKernelArg(context.opencl.transformation_kernel, 29, sizeof(int), &renderable_face_offset); // // 30: int face_sqrt // clSetKernelArg(context.opencl.transformation_kernel, 30, sizeof(int), &face_sqrt); // // 31: uint16_t texture_width // clSetKernelArg(context.opencl.transformation_kernel, 31, sizeof(uint16_t), &texture_width); // // 32: uint16_t texture_height // clSetKernelArg(context.opencl.transformation_kernel, 32, sizeof(uint16_t), &texture_height); // // 33: uint32_t texture_index // clSetKernelArg(context.opencl.transformation_kernel, 33, sizeof(uint32_t), &texture_index); context.defaults.default_actor->mesh = RI_load_mesh("objects/error_object.obj"); return 0; }