Browse Source

First Commit!

Iver 8 months ago
commit
a6a80f3bf8
18 changed files with 1572 additions and 0 deletions
  1. 7 0
      .vscode/launch.json
  2. 12 0
      .vscode/settings.json
  3. 1 0
      OUT
  4. 261 0
      apple_hello_world.c
  5. 3 0
      build.sh
  6. 4 0
      compile_all.sh
  7. 3 0
      dcopy.sh
  8. 6 0
      debug.sh
  9. 3 0
      lib_build.sh
  10. BIN
      librasteriver.so
  11. BIN
      main.bin
  12. 18 0
      main.c
  13. 196 0
      main_CPU.c
  14. 492 0
      main_GPU.c
  15. 290 0
      main_GPU_fixed_polygons.c
  16. 253 0
      main_test.c
  17. 23 0
      rasteriver.h
  18. BIN
      rasteriver.so

+ 7 - 0
.vscode/launch.json

@@ -0,0 +1,7 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": []
+}

+ 12 - 0
.vscode/settings.json

@@ -0,0 +1,12 @@
+{
+    "files.associations": {
+        "rasteriver.h": "c",
+        "type_traits": "c",
+        "ratio": "c",
+        "system_error": "c",
+        "array": "c",
+        "functional": "c",
+        "tuple": "c",
+        "utility": "c"
+    }
+}

+ 1 - 0
OUT

@@ -0,0 +1 @@
+bash: strace: command not found

+ 261 - 0
apple_hello_world.c

@@ -0,0 +1,261 @@
+//
+// File:       hello.c
+//
+// Abstract:   A simple "Hello World" compute example showing basic usage of OpenCL which
+//             calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of
+//             floating point values.
+//             
+//
+// Version:    <1.0>
+//
+// Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Inc. ("Apple")
+//             in consideration of your agreement to the following terms, and your use,
+//             installation, modification or redistribution of this Apple software
+//             constitutes acceptance of these terms.  If you do not agree with these
+//             terms, please do not use, install, modify or redistribute this Apple
+//             software.
+//
+//             In consideration of your agreement to abide by the following terms, and
+//             subject to these terms, Apple grants you a personal, non - exclusive
+//             license, under Apple's copyrights in this original Apple software ( the
+//             "Apple Software" ), to use, reproduce, modify and redistribute the Apple
+//             Software, with or without modifications, in source and / or binary forms;
+//             provided that if you redistribute the Apple Software in its entirety and
+//             without modifications, you must retain this notice and the following text
+//             and disclaimers in all such redistributions of the Apple Software. Neither
+//             the name, trademarks, service marks or logos of Apple Inc. may be used to
+//             endorse or promote products derived from the Apple Software without specific
+//             prior written permission from Apple.  Except as expressly stated in this
+//             notice, no other rights or licenses, express or implied, are granted by
+//             Apple herein, including but not limited to any patent rights that may be
+//             infringed by your derivative works or by other works in which the Apple
+//             Software may be incorporated.
+//
+//             The Apple Software is provided by Apple on an "AS IS" basis.  APPLE MAKES NO
+//             WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
+//             WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
+//             PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
+//             ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
+//
+//             IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
+//             CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+//             SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+//             INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
+//             AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
+//             UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
+//             OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
+//
+
+////////////////////////////////////////////////////////////////////////////////
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <CL/cl.h>
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Use a static data size for simplicity
+//
+#define DATA_SIZE (1024)
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Simple compute kernel which computes the square of an input array 
+//
+const char *KernelSource = "\n" \
+"__kernel void square(                                                       \n" \
+"   __global float* input,                                              \n" \
+"   __global float* output,                                             \n" \
+"   const unsigned int count)                                           \n" \
+"{                                                                      \n" \
+"   int i = get_global_id(0);                                           \n" \
+"   if(i < count)                                                       \n" \
+"       output[i] = input[i] * input[i];                                \n" \
+"}                                                                      \n" \
+"\n";
+
+////////////////////////////////////////////////////////////////////////////////
+
+int main(int argc, char** argv)
+{
+    int err;                            // error code returned from api calls
+      
+    float data[DATA_SIZE];              // original data set given to device
+    float results[DATA_SIZE];           // results returned from device
+    unsigned int correct;               // number of correct results returned
+
+    size_t global;                      // global domain size for our calculation
+    size_t local;                       // local domain size for our calculation
+
+    cl_device_id device_id;             // compute device id 
+    cl_context context;                 // compute context
+    cl_command_queue commands;          // compute command queue
+    cl_program program;                 // compute program
+    cl_kernel kernel;                   // compute kernel
+    
+    cl_mem input;                       // device memory used for the input array
+    cl_mem output;                      // device memory used for the output array
+    
+    // Fill our data set with random float values
+    //
+    int i = 0;
+    unsigned int count = DATA_SIZE;
+    for(i = 0; i < count; i++)
+        data[i] = rand() / (float)RAND_MAX;
+    
+    // Connect to a compute device
+    //
+    int gpu = 1;
+    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to create a device group!\n");
+        return EXIT_FAILURE;
+    }
+  
+    // Create a compute context 
+    //
+    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
+    if (!context)
+    {
+        printf("Error: Failed to create a compute context!\n");
+        return EXIT_FAILURE;
+    }
+
+    // Create a command commands
+    //
+    commands = clCreateCommandQueue(context, device_id, 0, &err);
+    if (!commands)
+    {
+        printf("Error: Failed to create a command commands!\n");
+        return EXIT_FAILURE;
+    }
+
+    // Create the compute program from the source buffer
+    //
+    program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
+    if (!program)
+    {
+        printf("Error: Failed to create compute program!\n");
+        return EXIT_FAILURE;
+    }
+
+    // Build the program executable
+    //
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        size_t len;
+        char buffer[2048];
+
+        printf("Error: Failed to build program executable!\n");
+        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+        printf("%s\n", buffer);
+        exit(1);
+    }
+
+    // Create the compute kernel in the program we wish to run
+    //
+    kernel = clCreateKernel(program, "square", &err);
+    if (!kernel || err != CL_SUCCESS)
+    {
+        printf("Error: Failed to create compute kernel!\n");
+        exit(1);
+    }
+
+    // Create the input and output arrays in device memory for our calculation
+    //
+    input = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(float) * count, NULL, NULL);
+    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
+    if (!input || !output)
+    {
+        printf("Error: Failed to allocate device memory!\n");
+        exit(1);
+    }    
+    
+    // Write our data set into the input array in device memory 
+    //
+    err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to write to source array!\n");
+        exit(1);
+    }
+
+    // Set the arguments to our compute kernel
+    //
+    err = 0;
+    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
+    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
+    err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to set kernel arguments! %d\n", err);
+        exit(1);
+    }
+
+    // Get the maximum work group size for executing the kernel on the device
+    //
+    err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to retrieve kernel work group info! %d\n", err);
+        exit(1);
+    }
+
+    // Execute the kernel over the entire range of our 1d input data set
+    // using the maximum number of work group items for this device
+    //
+    global = count;
+    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
+    if (err)
+    {
+        printf("Error: Failed to execute kernel!\n");
+        return EXIT_FAILURE;
+    }
+
+    // Wait for the command commands to get serviced before reading back results
+    //
+    clFinish(commands);
+
+    // Read back the results from the device to verify the output
+    //
+    err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );  
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to read output array! %d\n", err);
+        exit(1);
+    }
+    
+    // Validate our results
+    //
+    correct = 0;
+    for(i = 0; i < count; i++)
+    {
+        if(results[i] == data[i] * data[i])
+            correct++;
+    }
+    
+    // Print a brief summary detailing the results
+    //
+    printf("Computed '%d/%d' correct values!\n", correct, count);
+    
+    // Shutdown and cleanup
+    //
+    clReleaseMemObject(input);
+    clReleaseMemObject(output);
+    clReleaseProgram(program);
+    clReleaseKernel(kernel);
+    clReleaseCommandQueue(commands);
+    clReleaseContext(context);
+
+    return 0;
+}

+ 3 - 0
build.sh

@@ -0,0 +1,3 @@
+echo "[MAIN]     Building..."
+gcc -g -Wall -Wextra -D CL_TARGET_OPENCL_VERSION=120 $1 -o main.bin -L. -lrasteriver -Wl,-rpath=.
+echo "[MAIN]     Built"

+ 4 - 0
compile_all.sh

@@ -0,0 +1,4 @@
+echo "[COMPILER] Building Rasteriver"
+./lib_build.sh $1
+echo "[COMPILER] Building Main"
+./build.sh $2

+ 3 - 0
dcopy.sh

@@ -0,0 +1,3 @@
+echo "[MAIN]     Building..."
+gcc  -Wall -Wextra -D CL_TARGET_OPENCL_VERSION=120 $1 -o main.bin -lSDL2 -lm -lOpenCL
+echo "[MAIN]     Built"

+ 6 - 0
debug.sh

@@ -0,0 +1,6 @@
+clear
+echo building...
+gcc -g -Wall -Wextra -D CL_TARGET_OPENCL_VERSION=120 $1 -o main.bin -lSDL2 -lm -lOpenCL
+echo built...
+echo debugging...
+gdb ./main.bin

+ 3 - 0
lib_build.sh

@@ -0,0 +1,3 @@
+echo "[LIB]      Building..."
+gcc  -Wall -Wextra -D CL_TARGET_OPENCL_VERSION=120 -fPIC -shared -o librasteriver.so $1 -lc -lSDL2 -lm -lOpenCL
+echo "[LIB]      Built"

BIN
librasteriver.so


BIN
main.bin


+ 18 - 0
main.c

@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include "rasteriver.h"
+
+int main(){
+    RI_SetDebugFlag(1);
+    
+    if (RI_Init(800, 800, "Rasteriver Test") == RI_ERROR){
+        return 1;
+    }
+
+    RI_RequestPolygons(100);
+
+    while (RI_IsRunning() == RI_RUNNING){
+        RI_Tick();
+    }
+
+    RI_Stop();
+}

+ 196 - 0
main_CPU.c

@@ -0,0 +1,196 @@
+#include "stdlib.h"
+#include "SDL2/SDL.h"
+#include "math.h"
+#include "time.h"
+#include <CL/cl.h>
+
+const int WIDTH = 800;
+const int HEIGHT = 800;
+const int POLYGON_SIZE = sizeof(float) * 3 * 3;
+const int POLYGONS = 10;
+
+int is_intersecting(int a, int b, int c, int d, int p, int q, int r, int s) {
+    float det, gamma, lambda;
+    
+    det = (c - a) * (s - q) - (r - p) * (d - b);
+    
+    if (det == 0) {
+        return 1;
+    } 
+    else {
+        lambda = ((s - q) * (r - a) + (p - r) * (s - b)) / det;
+        gamma = ((b - d) * (r - a) + (c - a) * (s - b)) / det;
+        return (0 < lambda && lambda < 1) && (0 < gamma && gamma < 1);
+    }
+}
+
+void norm(float dest[2], float a[2]){
+    float magnitude = sqrt((pow(a[0], 2) + pow(a[1], 2)));
+    
+    dest[0] = a[0] / magnitude;
+    dest[1] = a[1] / magnitude;
+}
+
+void sub(float dest[2], float a[2], float b[2]){
+    dest[0] = a[0] - b[0];
+    dest[1] = a[1] - b[1];
+}
+
+void add(float dest[2], float a[2], float b[2]){
+    dest[0] = a[0] + b[0];
+    dest[1] = a[1] + b[1];
+}
+
+int main(){
+    srand(time(NULL));
+
+    float polygons[POLYGONS][3][3];
+
+    SDL_Init(SDL_INIT_VIDEO);
+    SDL_Window* window = SDL_CreateWindow("Rasterizer", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, WIDTH, HEIGHT, SDL_WINDOW_OPENGL);
+    SDL_Renderer* renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
+    SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, WIDTH, HEIGHT);
+
+    int running = 1;
+
+    Uint32 frame_buffer[WIDTH * HEIGHT];
+    float z_buffer[WIDTH * HEIGHT];
+
+    int frame = 0;
+
+    Uint32 start_time, frame_time;
+    float fps;
+
+    while (running) {
+        start_time = SDL_GetTicks();
+
+        //if (frame % 1 == 0){
+            for (int p = 0; p < POLYGONS; p++){
+                for (int point = 0; point < 3; point++){
+                    for (int i = 0; i < 3; i++){
+                        polygons[p][point][i] = rand() % WIDTH + 1;
+                    }
+                }
+            }
+        //}
+
+        for (int i = 0; i < WIDTH * HEIGHT; ++i) {
+            frame_buffer[i] = 0x22222222;
+        }
+
+        memset(&z_buffer, 0, sizeof(float) * WIDTH * HEIGHT);
+
+        SDL_Event event;
+        while (SDL_PollEvent(&event)){
+            switch (event.type){
+                case SDL_QUIT:
+                running = 0;
+            }
+        }
+        
+        for (int polygon = 0; polygon < POLYGONS; polygon++){
+            float x0 = polygons[polygon][0][0];
+            float y0 = polygons[polygon][0][1];
+            float z0 = polygons[polygon][0][2];
+            float x1 = polygons[polygon][1][0];
+            float y1 = polygons[polygon][1][1];
+            float z1 = polygons[polygon][1][2];
+            float x2 = polygons[polygon][2][0];
+            float y2 = polygons[polygon][2][1];
+            float z2 = polygons[polygon][2][2];
+            
+            float smallest_x = x0;
+            float largest_x = x0;
+            float smallest_y = y0;
+            float largest_y = y0;
+            
+            for (int point = 0; point < 3; point++){
+                float x = polygons[polygon][point][0];
+                float y = polygons[polygon][point][1];
+                
+                if (x > largest_x){
+                    largest_x = x;
+                }
+                
+                if (x < smallest_x){
+                    smallest_x = x;
+                }
+                
+                if (y > largest_y){
+                    largest_y = y;
+                }
+                
+                if (y < smallest_y){
+                    smallest_y = y;
+                }
+            }
+            
+            smallest_x = fmin(smallest_x, 0);
+            largest_x = fmax(largest_x, WIDTH);
+            smallest_y = fmin(smallest_y, 0);
+            largest_y = fmax(largest_y, HEIGHT);
+
+            // test every pixel in a rect around the triangle. If it's inside, color it.
+            for (int x = (int)smallest_x; x < largest_x; x++){
+                for (int y = (int)smallest_y; y < largest_y; y++){
+                    int intersections = 0;
+                    
+                    for (int i = 0; i < 3; i++){
+                        intersections += is_intersecting(x, y, 10000, 100000, polygons[polygon][i][0], polygons[polygon][i][1], polygons[polygon][(i + 1) % 3][0], polygons[polygon][(i + 1) % 3][1]);
+                    }
+
+                    if (intersections % 2 == 0){
+                        continue;
+                    }
+
+                    float denominator = (y1 - y2) * (x0 - x2) + (x2 - x1) * (y0 - y2);
+                    float w0 = ((y1 - y2) * (x - x2) + (x2 - x1) * (y - y2)) / denominator;
+                    float w1 = ((y2 - y0) * (x - x0) + (x0 - x2) * (y - y2)) / denominator;
+                    float w2 = 1.0 - w0 - w1;
+
+                    if (denominator < 0) {
+                        w0 = -w0;
+                        w1 = -w1;
+                        w2 = -w2;
+                        denominator = -denominator;
+                    }                    
+
+                    float z = w0 * z0 + w1 * z1 + w2 * z2;
+
+                    
+                                        if (z < 0){
+                                            z *= -1;
+                                        }
+                //    printf("%f\n", z);
+
+                    if (z > z_buffer[y * WIDTH + x]){
+                        z_buffer[y * WIDTH + x] = z;
+                    }
+                    else {
+                        continue;
+                    }
+
+                   frame_buffer[y * WIDTH + x] = 0xFFFFFFFF / POLYGONS * (polygon + 1);
+                }
+            }
+        }
+        
+        SDL_UpdateTexture(texture, NULL, frame_buffer, WIDTH * sizeof(Uint32));
+        
+        SDL_RenderClear(renderer);
+        SDL_RenderCopy(renderer, texture, NULL, NULL);
+        SDL_RenderPresent(renderer);
+        
+        frame++;
+
+        frame_time = SDL_GetTicks()-start_time;
+        fps = (frame_time > 0) ? 1000.0f / frame_time : 0.0f;
+        printf("%f fps\n", fps);
+        printf("%d polygons\n", POLYGONS);
+    }
+    
+    SDL_DestroyTexture(texture);
+    SDL_DestroyRenderer(renderer);
+    SDL_DestroyWindow(window);
+    SDL_Quit();
+}

+ 492 - 0
main_GPU.c

@@ -0,0 +1,492 @@
+#include "stdlib.h"
+#include "SDL2/SDL.h"
+#include "math.h"
+#include "time.h"
+#include <CL/cl.h>
+#include "rasteriver.h"
+#include <stdarg.h>
+
+const char* kernel_source = " \
+int is_intersecting(float a, float b, float c, float d, float p, float q, float r, float s) { \
+    float det, gamma, lambda; \
+    \
+    det = (c - a) * (s - q) - (r - p) * (d - b); \
+    \
+    if (det == 0) { \
+        return 1; \
+    }  \
+    else { \
+        lambda = ((s - q) * (r - a) + (p - r) * (s - b)) / det; \
+        gamma = ((b - d) * (r - a) + (c - a) * (s - b)) / det; \
+        return (0 < lambda && lambda < 1) && (0 < gamma && gamma < 1); \
+    } \
+} \
+\
+void norm(float dest[2], float a[2]){ \
+    float magnitude = sqrt(a[0] * a[0] + a[1] * a[1]); \
+    \
+    dest[0] = a[0] / magnitude; \
+    dest[1] = a[1] / magnitude; \
+    } \
+    \
+    void sub(float dest[2], float a[2], float b[2]){ \
+    dest[0] = a[0] - b[0]; \
+    dest[1] = a[1] - b[1]; \
+    } \
+    \
+    void add(float dest[2], float a[2], float b[2]){ \
+    dest[0] = a[0] + b[0]; \
+    dest[1] = a[1] + b[1]; \
+} \
+\
+__kernel void raster_kernel(__global float* polygons, __global uint* frame_buffer, int polygon_count, int width, int height, int show_z_buffer){ \
+    int id_x = get_global_id(0); \
+    int id_y = get_global_id(1); \
+    \
+    float z_pixel = 0; \
+    uint frame_pixel = 0x22222222; \
+    \
+    for (int polygon = 0; polygon < polygon_count; polygon++){ \
+        int base = polygon * 9; \
+        float x0 = polygons[base]; \
+        float y0 = polygons[base + 1]; \
+        float z0 = polygons[base + 2]; \
+        float x1 = polygons[base + 3]; \
+        float y1 = polygons[base + 4]; \
+        float z1 = polygons[base + 5]; \
+        float x2 = polygons[base + 6]; \
+        float y2 = polygons[base + 7]; \
+        float z2 = polygons[base + 8]; \
+        \
+        float smallest_x = x0; \
+        float largest_x = x0; \
+        float smallest_y = y0; \
+        float largest_y = y0; \
+        \
+        for (int point = 0; point < 3; point++){ \
+            float x = polygons[base + point * 3]; \
+            float y = polygons[base + point * 3 + 1]; \
+            \
+            if (x > largest_x){ \
+                largest_x = x; \
+            } \
+            \
+            if (x < smallest_x){ \
+                smallest_x = x; \
+            } \
+            \
+            if (y > largest_y){ \
+                largest_y = y; \
+            } \
+            \
+            if (y < smallest_y){\
+                smallest_y = y;\
+            } \
+        } \
+        \
+        smallest_x = fmin(smallest_x, 0); \
+        largest_x = fmax(largest_x, width); \
+        smallest_y = fmin(smallest_y, 0); \
+        largest_y = fmax(largest_y, height); \
+        \
+        if (id_x >= smallest_x && id_x <= largest_x && id_y >= smallest_y && id_y <= largest_y){ \
+            int intersections = 0; \
+            \
+            intersections += is_intersecting(id_x, id_y, 10000, 100000, x0, y0, x1, y1); \
+            intersections += is_intersecting(id_x, id_y, 10000, 100000, x1, y1, x2, y2); \
+            intersections += is_intersecting(id_x, id_y, 10000, 100000, x2, y2, x0, y0); \
+            \
+            if (intersections % 2 == 0){ \
+                continue; \
+            } \
+            \
+            float denominator = (y1 - y2) * (x0 - x2) + (x2 - x1) * (y0 - y2); \
+            float w0 = ((y1 - y2) * (id_x - x2) + (x2 - x1) * (id_y - y2)) / denominator; \
+            float w1 = ((y2 - y0) * (id_x - x0) + (x0 - x2) * (id_y - y2)) / denominator; \
+            float w2 = 1.0 - w0 - w1; \
+            \
+            if (denominator < 0) { \
+                w0 = -w0; \
+                w1 = -w1; \
+                w2 = -w2; \
+                denominator = -denominator; \
+            } \
+            \
+            float z = w0 * z0 + w1 * z1 + w2 * z2; \
+            \
+            if (z < 0){ \
+                z *= -1; \
+            } \
+            \
+            if (z > z_pixel){ \
+                z_pixel = z; \
+            } \
+            else { \
+                continue; \
+            } \
+            \
+            frame_pixel = 0xFFFFFFFF / polygon_count * (polygon + 1); \
+        } \
+    } \
+    \
+    frame_buffer[id_y * width + id_x] = frame_pixel; \
+    \
+    if (!show_z_buffer){return;}\
+    \
+    float z = clamp(z_pixel, 0.0f, 800.0f);\
+    \
+    float norm_z = z / 800.0f;\
+    \
+    uchar intensity = (uchar)(norm_z * 255.0f);\
+    \
+    frame_buffer[id_y * width + id_x] = 0xFF000000 | (intensity << 16) | (intensity << 8) | intensity;\
+}\n";
+
+typedef int RI_result;
+typedef cl_uint RI_uint;
+
+const RI_result RI_ERROR = -1;
+const RI_result RI_SUCCESS = 0;
+const RI_result RI_NOT_RUNNING = -2;
+const RI_result RI_RUNNING = 1;
+
+RI_result erchk_func(cl_int error, int line, char *file){
+    if (error != CL_SUCCESS){
+        printf("OpenCL Error: %d at line %d at file %s\n", error, line, file);
+        return RI_ERROR;
+    }
+    
+    return RI_SUCCESS;
+}
+
+#define erchk(error) erchk_func(error, __LINE__, __FILE__)
+
+// ----- Rasteriver Vars
+int width;
+int height;
+
+int show_z_buffer = 0;
+
+int polygon_count = 20000;
+float *polygons = NULL;
+
+int running = 1;
+int frame = 0;
+
+int show_debug = 0;
+// -----
+
+// ----- Rendering Vars
+SDL_Window* window;
+SDL_Renderer* renderer;
+SDL_Texture* texture;
+
+RI_uint *frame_buffer;
+float *z_buffer;
+// -----
+
+// ----- OpenCL Vars
+cl_platform_id platform;
+cl_device_id device;
+RI_uint number_of_platforms, number_of_devices;
+
+cl_int error;
+
+cl_context context;
+cl_command_queue queue;
+
+cl_mem input_memory_buffer;
+cl_mem output_memory_buffer;
+
+cl_program kernel_program;
+cl_kernel compiled_kernel;
+
+size_t size_2d[2];
+
+RI_uint pattern;
+// -----
+
+RI_result debug(char *string, ...){
+    if (!show_debug){
+        return RI_ERROR;
+    }
+
+    va_list args;
+    va_start(args, string);
+    
+    vprintf(strcat("[RasterIver] ", string), args);
+
+    va_end(args);
+
+    return RI_SUCCESS;
+}
+
+RI_result RI_SetDebugFlag(int RI_ShowDebugFlag){
+    show_debug = RI_ShowDebugFlag;
+
+    return RI_SUCCESS;
+}
+
+RI_result Rendering_init(char *title) {
+    debug("Initializing Rendering...");
+
+    frame_buffer = malloc(sizeof(RI_uint) * width * height);
+    z_buffer = malloc(sizeof(float) * width * height);
+
+    if (SDL_Init(SDL_INIT_VIDEO) < 0) {
+        debug("SDL_Init failed");
+        return RI_ERROR;
+    }
+
+    if (width <= 0 || height <= 0) {
+        debug("Invalid width or height");
+        return RI_ERROR;
+    }
+
+    window = SDL_CreateWindow(title, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, width, height, SDL_WINDOW_OPENGL);
+    if (!window) {
+        debug("SDL_CreateWindow failed");
+        return RI_ERROR;
+    }
+
+    renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
+    if (!renderer) {
+        debug("SDL_CreateRenderer failed");
+        return RI_ERROR;
+    }
+
+    texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, width, height);
+    if (!texture) {
+        debug("SDL_CreateTexture failed");
+        return RI_ERROR;
+    }
+
+    debug("Initialized Rendering");
+
+    return RI_SUCCESS;
+}
+
+RI_result RI_ShowZBuffer(int RI_ShowZBufferFlag){
+    show_z_buffer = RI_ShowZBufferFlag;
+
+    return RI_SUCCESS;
+}
+
+RI_result RI_SetBackground(RI_uint RI_BackgroundColor){
+    pattern = RI_BackgroundColor;
+    
+    return RI_SUCCESS;
+}    
+
+RI_result OpenCL_init(){
+    debug("Initializing OpenCL...");
+
+    clGetPlatformIDs(1, &platform, &number_of_platforms);
+    
+    if(number_of_platforms == 0){
+        printf("No OpenCL Platforms\n");
+        return RI_ERROR;
+    }
+    
+    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &number_of_devices);
+    
+    if (number_of_devices == 0){
+        printf("No Valid GPU's Found\n");
+        return RI_ERROR;
+    }
+    
+    context = clCreateContext(NULL, 1, &device, NULL, NULL, &error);
+    erchk(error);
+    queue = clCreateCommandQueue(context, device, 0, &error);
+    erchk(error);
+
+    output_memory_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(RI_uint) * width * height, NULL, &error);
+    erchk(error);
+
+    kernel_program = clCreateProgramWithSource(context, 1, &kernel_source, NULL, &error);
+    erchk(error);
+
+    error = clBuildProgram(kernel_program, 1, &device, NULL, NULL, NULL);
+    erchk(error);
+
+    compiled_kernel = clCreateKernel(kernel_program, "raster_kernel", &error);
+    erchk(error);
+
+    erchk(clSetKernelArg(compiled_kernel, 0, sizeof(cl_mem), &input_memory_buffer));
+    erchk(clSetKernelArg(compiled_kernel, 1, sizeof(cl_mem), &output_memory_buffer));
+    erchk(clSetKernelArg(compiled_kernel, 2, sizeof(int), &polygon_count));
+    erchk(clSetKernelArg(compiled_kernel, 3, sizeof(int), &width));
+    erchk(clSetKernelArg(compiled_kernel, 4, sizeof(int), &height));
+    erchk(clSetKernelArg(compiled_kernel, 5, sizeof(int), &show_z_buffer));
+    
+    size_2d[0] = width;
+    size_2d[1] = height;
+    
+    pattern = 0x22222222;
+
+    debug("Initialized OpenCL");
+    
+    return RI_SUCCESS;
+}
+
+RI_result RI_Stop(){
+    debug("Stopping...");
+
+    running = 0;
+
+    clReleaseMemObject(input_memory_buffer);
+    clReleaseMemObject(output_memory_buffer);
+    clReleaseKernel(compiled_kernel);
+    clReleaseProgram(kernel_program);
+    clReleaseCommandQueue(queue);
+    clReleaseContext(context);
+
+    SDL_DestroyTexture(texture);
+    SDL_DestroyRenderer(renderer);
+    SDL_DestroyWindow(window);
+    SDL_Quit();
+
+    if (polygons != NULL)
+        free(polygons);
+    else
+        debug("Polygons Was Unset on Stop");
+
+    if (frame_buffer != NULL)
+        free(frame_buffer);
+    else
+        debug("Frame-Buffer Was Unset on Stop");
+    
+    if (z_buffer != NULL)
+        free(z_buffer);
+    else   
+        debug("Z-Buffer Was Unset on Stop");
+
+    debug("Stopped");
+
+    return RI_SUCCESS;
+}
+
+RI_result RI_RequestPolygons(int RI_PolygonsToRequest){
+    polygon_count = RI_PolygonsToRequest;
+    
+    debug("Requesting %d Polygons...\n", polygon_count);
+
+    if (polygons != NULL){
+        free(polygons);
+    }
+
+    polygons = malloc(sizeof(float) * 3 * 3 * polygon_count);
+    
+    if (polygons == NULL){
+        debug("Malloc Error");
+        return RI_ERROR;
+    }
+
+    for (int p = 0; p < polygon_count; p++){
+        for (int point = 0; point < 3; point++){
+            for (int i = 0; i < 3; i++){
+                polygons[i] = rand();
+            }
+        }
+    }
+
+    input_memory_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * 3 * 3 * polygon_count, polygons, &error);
+    
+    if (input_memory_buffer == NULL) {
+        debug("OpenCL buffer creation failed for polygons.\n");
+    }
+
+    debug("Request for %d Polygons Granted\n", polygon_count);
+    
+    return erchk(error);
+}
+
+RI_result RI_Tick(){
+    debug("Ticking...");
+
+    if (running) {
+        if (polygons == NULL){
+            debug("Polygons is Unset");
+            return RI_ERROR;
+        }
+
+        if (frame_buffer == NULL){
+            debug("Frame-Buffer is Unset");
+            return RI_ERROR;
+        }
+        
+        if (z_buffer == NULL){
+            debug("Z-Buffer is Unset");
+            return RI_ERROR;
+        }
+
+        if (frame % 1 == 0){
+            for (int p = 0; p < polygon_count; p++){
+                for (int point = 0; point < 3; point++){
+                    for (int i = 0; i < 3; i++){
+                        polygons[i] = rand() % width + 1;
+                    }
+                }
+            }
+        }
+
+        //memset(&z_buffer, 0, sizeof(float) * width * height);
+
+        erchk(clEnqueueWriteBuffer(queue, input_memory_buffer, CL_TRUE, 0, sizeof(float) * 3 * 3 * polygon_count, polygons, 0, NULL, NULL));
+
+        erchk(clEnqueueFillBuffer(queue, output_memory_buffer, &pattern, sizeof(RI_uint), 0, sizeof(RI_uint) * width * height, 0, NULL, NULL));
+
+        erchk(clEnqueueNDRangeKernel(queue, compiled_kernel, 2, NULL, size_2d, NULL, 0, NULL, NULL));
+
+        erchk(clFinish(queue));
+
+        erchk(clEnqueueReadBuffer(queue, output_memory_buffer, CL_TRUE, 0, sizeof(RI_uint) * width * height, &frame_buffer, 0, NULL, NULL));
+
+        SDL_Event event;
+        while (SDL_PollEvent(&event)){
+            switch (event.type){
+                case SDL_QUIT:
+                running = 0;
+            }
+        }
+        
+        SDL_UpdateTexture(texture, NULL, frame_buffer, width * sizeof(RI_uint));
+        
+        SDL_RenderClear(renderer);
+        SDL_RenderCopy(renderer, texture, NULL, NULL);
+        SDL_RenderPresent(renderer);
+        
+        frame++;
+        
+        debug("Ticked");
+
+        return RI_SUCCESS;
+    }
+    else {
+        return RI_ERROR;
+    }
+}
+
+RI_result RI_IsRunning(){
+    if (running){
+        return RI_RUNNING;
+    }
+    else {
+        return RI_NOT_RUNNING;
+    }
+}
+
+RI_result RI_Init(int RI_WindowWidth, int RI_WindowHeight, char *RI_WindowTitle){
+    srand(time(NULL));
+    
+    width = RI_WindowWidth;
+    height = RI_WindowHeight;
+        
+    if(OpenCL_init() == RI_ERROR){
+        return RI_ERROR;
+    }
+
+    Rendering_init(RI_WindowTitle);
+
+    return RI_SUCCESS;
+}

+ 290 - 0
main_GPU_fixed_polygons.c

@@ -0,0 +1,290 @@
+#include "stdlib.h"
+#include "SDL2/SDL.h"
+#include "math.h"
+#include "time.h"
+#include <CL/cl.h>
+
+const char* kernel_source = " \
+int is_intersecting(float a, float b, float c, float d, float p, float q, float r, float s) { \
+    float det, gamma, lambda; \
+    \
+    det = (c - a) * (s - q) - (r - p) * (d - b); \
+    \
+    if (det == 0) { \
+        return 1; \
+    }  \
+    else { \
+        lambda = ((s - q) * (r - a) + (p - r) * (s - b)) / det; \
+        gamma = ((b - d) * (r - a) + (c - a) * (s - b)) / det; \
+        return (0 < lambda && lambda < 1) && (0 < gamma && gamma < 1); \
+    } \
+} \
+\
+void norm(float dest[2], float a[2]){ \
+    float magnitude = sqrt(a[0] * a[0] + a[1] * a[1]); \
+    \
+    dest[0] = a[0] / magnitude; \
+    dest[1] = a[1] / magnitude; \
+    } \
+    \
+    void sub(float dest[2], float a[2], float b[2]){ \
+    dest[0] = a[0] - b[0]; \
+    dest[1] = a[1] - b[1]; \
+    } \
+    \
+    void add(float dest[2], float a[2], float b[2]){ \
+    dest[0] = a[0] + b[0]; \
+    dest[1] = a[1] + b[1]; \
+} \
+\
+__kernel void raster_kernel(__global float* polygons, __global uint* frame_buffer, int POLYGONS, int WIDTH, int HEIGHT, int SHOW_Z_BUFFER){ \
+    int id_x = get_global_id(0); \
+    int id_y = get_global_id(1); \
+    \
+    float z_pixel = 0; \
+    uint frame_pixel = 0x22222222; \
+    \
+    for (int polygon = 0; polygon < POLYGONS; polygon++){ \
+        int base = polygon * 9; \
+        float x0 = polygons[base]; \
+        float y0 = polygons[base + 1]; \
+        float z0 = polygons[base + 2]; \
+        float x1 = polygons[base + 3]; \
+        float y1 = polygons[base + 4]; \
+        float z1 = polygons[base + 5]; \
+        float x2 = polygons[base + 6]; \
+        float y2 = polygons[base + 7]; \
+        float z2 = polygons[base + 8]; \
+        \
+        float smallest_x = x0; \
+        float largest_x = x0; \
+        float smallest_y = y0; \
+        float largest_y = y0; \
+        \
+        for (int point = 0; point < 3; point++){ \
+            float x = polygons[base + point * 3]; \
+            float y = polygons[base + point * 3 + 1]; \
+            \
+            if (x > largest_x){ \
+                largest_x = x; \
+            } \
+            \
+            if (x < smallest_x){ \
+                smallest_x = x; \
+            } \
+            \
+            if (y > largest_y){ \
+                largest_y = y; \
+            } \
+            \
+            if (y < smallest_y){\
+                smallest_y = y;\
+            } \
+        } \
+        \
+        smallest_x = fmin(smallest_x, 0); \
+        largest_x = fmax(largest_x, WIDTH); \
+        smallest_y = fmin(smallest_y, 0); \
+        largest_y = fmax(largest_y, HEIGHT); \
+        \
+        if (id_x >= smallest_x && id_x <= largest_x && id_y >= smallest_y && id_y <= largest_y){ \
+            int intersections = 0; \
+            \
+            intersections += is_intersecting(id_x, id_y, 10000, 100000, x0, y0, x1, y1); \
+            intersections += is_intersecting(id_x, id_y, 10000, 100000, x1, y1, x2, y2); \
+            intersections += is_intersecting(id_x, id_y, 10000, 100000, x2, y2, x0, y0); \
+            \
+            if (intersections % 2 == 0){ \
+                continue; \
+            } \
+            \
+            float denominator = (y1 - y2) * (x0 - x2) + (x2 - x1) * (y0 - y2); \
+            float w0 = ((y1 - y2) * (id_x - x2) + (x2 - x1) * (id_y - y2)) / denominator; \
+            float w1 = ((y2 - y0) * (id_x - x0) + (x0 - x2) * (id_y - y2)) / denominator; \
+            float w2 = 1.0 - w0 - w1; \
+            \
+            if (denominator < 0) { \
+                w0 = -w0; \
+                w1 = -w1; \
+                w2 = -w2; \
+                denominator = -denominator; \
+            } \
+            \
+            float z = w0 * z0 + w1 * z1 + w2 * z2; \
+            \
+            if (z < 0){ \
+                z *= -1; \
+            } \
+            \
+            if (z > z_pixel){ \
+                z_pixel = z; \
+            } \
+            else { \
+                continue; \
+            } \
+            \
+            frame_pixel = 0xFFFFFFFF / POLYGONS * (polygon + 1); \
+        } \
+    } \
+    \
+    frame_buffer[id_y * WIDTH + id_x] = frame_pixel; \
+    \
+    if (!SHOW_Z_BUFFER){return;}\
+    \
+    float z = clamp(z_pixel, 0.0f, 800.0f);\
+    \
+    float norm_z = z / 800.0f;\
+    \
+    uchar intensity = (uchar)(norm_z * 255.0f);\
+    \
+    frame_buffer[id_y * WIDTH + id_x] = 0xFF000000 | (intensity << 16) | (intensity << 8) | intensity;\
+}\n";
+
+void erchk_func(cl_int error, int line, char *file){
+    if (error != CL_SUCCESS){
+        printf("ERROR :O   %d, line %d at file %s\n", error, line, file);
+        exit(1);
+    }
+}
+
+#define erchk(error) erchk_func(error, __LINE__, __FILE__)
+
+const int WIDTH = 800;
+const int HEIGHT = 800;
+const int POLYGONS = 20000;
+const int SHOW_Z_BUFFER = 0;
+
+int main(){
+    srand(time(NULL));
+
+    float polygons[POLYGONS][3][3];
+
+    cl_uint frame_buffer[WIDTH * HEIGHT];
+    float z_buffer[WIDTH * HEIGHT];
+
+    // ----- Check for Valid Platforms & GPUs
+    cl_platform_id platform;
+    cl_device_id device;
+    cl_uint number_of_platforms, number_of_devices;
+
+    clGetPlatformIDs(1, &platform, &number_of_platforms);
+
+    if(number_of_platforms == 0){
+        printf("No OpenCL Platforms");
+    }
+
+    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &number_of_devices);
+
+    if (number_of_devices == 0){
+        printf("No GPU's Found");
+    }
+    // -----
+
+    // ----- Setup OpenCL
+    cl_int error;
+
+    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
+    cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
+
+    cl_mem input_memory_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * 3 * 3 * POLYGONS, polygons, &error);
+    erchk(error);
+
+    cl_mem output_memory_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uint) * WIDTH * HEIGHT, NULL, &error);
+    erchk(error);
+
+    printf("%ld bytes\n", sizeof(float) * 3 * 3 * POLYGONS + WIDTH * HEIGHT * sizeof(cl_uint));
+
+    cl_program kernel_program = clCreateProgramWithSource(context, 1, &kernel_source, NULL, &error);
+    erchk(error);
+
+    error = clBuildProgram(kernel_program, 1, &device, NULL, NULL, NULL);
+    erchk(error);
+
+    cl_kernel compiled_kernel = clCreateKernel(kernel_program, "raster_kernel", &error);
+    erchk(error);
+
+    erchk(clSetKernelArg(compiled_kernel, 0, sizeof(cl_mem), &input_memory_buffer));
+    erchk(clSetKernelArg(compiled_kernel, 1, sizeof(cl_mem), &output_memory_buffer));
+    erchk(clSetKernelArg(compiled_kernel, 2, sizeof(int), &POLYGONS));
+    erchk(clSetKernelArg(compiled_kernel, 3, sizeof(int), &WIDTH));
+    erchk(clSetKernelArg(compiled_kernel, 4, sizeof(int), &HEIGHT));
+    erchk(clSetKernelArg(compiled_kernel, 5, sizeof(int), &SHOW_Z_BUFFER));
+
+    size_t size_2d[2] = {WIDTH, HEIGHT};
+
+    cl_uint pattern = 0x22222222;
+    // -----
+
+    SDL_Init(SDL_INIT_VIDEO);
+    SDL_Window* window = SDL_CreateWindow("Rasterizer", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, WIDTH, HEIGHT, SDL_WINDOW_OPENGL);
+    SDL_Renderer* renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
+    SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, WIDTH, HEIGHT);
+
+    int running = 1;
+
+    int frame = 0;
+
+    Uint64 start_time;
+    double frame_time_ms;
+    double fps;
+
+    while (running) {
+        start_time = SDL_GetPerformanceCounter();
+
+        if (frame % 1 == 0){
+            for (int p = 0; p < POLYGONS; p++){
+                for (int point = 0; point < 3; point++){
+                    for (int i = 0; i < 3; i++){
+                        polygons[p][point][i] = rand() % WIDTH + 1;
+                    }
+                }
+            }
+        }
+
+        memset(&z_buffer, 0, sizeof(float) * WIDTH * HEIGHT);
+
+        erchk(clEnqueueWriteBuffer(queue, input_memory_buffer, CL_TRUE, 0, sizeof(float) * 3 * 3 * POLYGONS, polygons, 0, NULL, NULL));
+
+        erchk(clEnqueueFillBuffer(queue, output_memory_buffer, &pattern, sizeof(cl_uint), 0, sizeof(cl_uint) * WIDTH * HEIGHT, 0, NULL, NULL));
+
+        erchk(clEnqueueNDRangeKernel(queue, compiled_kernel, 2, NULL, size_2d, NULL, 0, NULL, NULL));
+
+        erchk(clFinish(queue));
+
+        erchk(clEnqueueReadBuffer(queue, output_memory_buffer, CL_TRUE, 0, sizeof(cl_uint) * WIDTH * HEIGHT, &frame_buffer, 0, NULL, NULL));
+
+        SDL_Event event;
+        while (SDL_PollEvent(&event)){
+            switch (event.type){
+                case SDL_QUIT:
+                running = 0;
+            }
+        }
+        
+        SDL_UpdateTexture(texture, NULL, frame_buffer, WIDTH * sizeof(cl_uint));
+        
+        SDL_RenderClear(renderer);
+        SDL_RenderCopy(renderer, texture, NULL, NULL);
+        SDL_RenderPresent(renderer);
+        
+        frame++;
+
+        double delta_time = (double)(SDL_GetPerformanceCounter() - start_time) / (double)SDL_GetPerformanceFrequency();
+        double fps = 1.0 / delta_time;
+
+        printf("%lf fps\n", fps);
+        printf("%d polygons\n", POLYGONS);
+    }
+    
+    clReleaseMemObject(input_memory_buffer);
+    clReleaseMemObject(output_memory_buffer);
+    clReleaseKernel(compiled_kernel);
+    clReleaseProgram(kernel_program);
+    clReleaseCommandQueue(queue);
+    clReleaseContext(context);
+
+    SDL_DestroyTexture(texture);
+    SDL_DestroyRenderer(renderer);
+    SDL_DestroyWindow(window);
+    SDL_Quit();
+}

+ 253 - 0
main_test.c

@@ -0,0 +1,253 @@
+#include "stdlib.h"
+#include "SDL2/SDL.h"
+#include "math.h"
+#include "time.h"
+#include <CL/cl.h>
+
+const int WIDTH = 800;
+const int HEIGHT = 800;
+const int POLYGON_SIZE = sizeof(float) * 3 * 3;
+const int POLYGONS = 5;
+
+int is_intersecting(int a, int b, int c, int d, int p, int q, int r, int s) {
+    float det, gamma, lambda;
+    
+    det = (c - a) * (s - q) - (r - p) * (d - b);
+    
+    if (det == 0) {
+        return 1;
+    } 
+    else {
+        lambda = ((s - q) * (r - a) + (p - r) * (s - b)) / det;
+        gamma = ((b - d) * (r - a) + (c - a) * (s - b)) / det;
+        return (0 < lambda && lambda < 1) && (0 < gamma && gamma < 1);
+    }
+}
+
+void norm(float dest[2], float a[2]){
+    float magnitude = sqrt((pow(a[0], 2) + pow(a[1], 2)));
+    
+    dest[0] = a[0] / magnitude;
+    dest[1] = a[1] / magnitude;
+}
+
+void sub(float dest[2], float a[2], float b[2]){
+    dest[0] = a[0] - b[0];
+    dest[1] = a[1] - b[1];
+}
+
+void add(float dest[2], float a[2], float b[2]){
+    dest[0] = a[0] + b[0];
+    dest[1] = a[1] + b[1];
+}
+
+const char* kernel_source = 
+"__kernel void raster_kernel(__global float* polygons, __global uint* frame_buffer)\n"
+"int id = get_global_id(0); \n"
+" \n"
+" \n"
+"frame_buffer[id] = 80085; \n"
+" \n"
+"}\n";
+
+int main(){
+    srand(time(NULL));
+
+    float polygons[POLYGONS][3][3];
+
+    cl_uint frame_buffer[WIDTH * HEIGHT];
+    float z_buffer[WIDTH * HEIGHT];
+
+    // ----- Check for Valid Platforms & GPUs
+    cl_platform_id platform;
+    cl_device_id device;
+    cl_uint number_of_platforms, number_of_devices;
+
+    clGetPlatformIDs(1, &platform, &number_of_platforms);
+
+    if(number_of_platforms == 0){
+        printf("No OpenCL Platforms");
+    }
+
+    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &number_of_devices);
+
+    if (number_of_devices == 0){
+        printf("No GPU's Found");
+    }
+    // -----
+
+    // ----- Setup OpenCL
+    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
+    cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
+
+    cl_mem input_memory_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * 3 * 3 * POLYGONS, polygons, NULL);
+    cl_mem output_memory_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uint) * POLYGONS, NULL, NULL);
+
+    cl_program kernel_program = clCreateProgramWithSource(context, 1, &kernel_source, NULL, NULL);
+    clBuildProgram(kernel_program, 1, &device, NULL, NULL, NULL);
+    cl_kernel compiled_kernel = clCreateKernel(kernel_program, "raster_kernel", NULL);
+    clSetKernelArg(compiled_kernel, 0, sizeof(cl_mem), &input_memory_buffer);
+    clSetKernelArg(compiled_kernel, 1, sizeof(cl_mem), &output_memory_buffer);
+
+    size_t size = POLYGONS;
+
+    cl_uint pattern = 121212;
+    // -----
+
+    SDL_Init(SDL_INIT_VIDEO);
+    SDL_Window* window = SDL_CreateWindow("Rasterizer", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, WIDTH, HEIGHT, SDL_WINDOW_OPENGL);
+    SDL_Renderer* renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED);
+    SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, WIDTH, HEIGHT);
+
+    int running = 1;
+
+    int frame = 0;
+
+    Uint32 start_time, frame_time;
+    float fps;
+
+    while (running) {
+        start_time = SDL_GetTicks();
+
+        //if (frame % 1 == 0){
+            for (int p = 0; p < POLYGONS; p++){
+                for (int point = 0; point < 3; point++){
+                    for (int i = 0; i < 3; i++){
+                        polygons[p][point][i] = rand() % WIDTH + 1;
+                    }
+                }
+            }
+        //}
+
+        memset(&z_buffer, 0, sizeof(float) * WIDTH * HEIGHT);
+
+        clEnqueueFillBuffer(queue, output_memory_buffer, &pattern, sizeof(cl_uint), 0, sizeof(cl_uint) * POLYGONS, 0, NULL, NULL);
+
+        clEnqueueNDRangeKernel(queue, compiled_kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+
+        clFinish(queue);
+
+        clEnqueueReadBuffer(queue, output_memory_buffer, CL_TRUE, 0, sizeof(cl_uint) * POLYGONS, &frame_buffer, 0, NULL, NULL);
+
+        for (int i = 0; i < POLYGONS; i++){
+            printf("%u\n", frame_buffer[i]);
+        }
+
+        SDL_Event event;
+        while (SDL_PollEvent(&event)){
+            switch (event.type){
+                case SDL_QUIT:
+                running = 0;
+            }
+        }
+        
+        // for (int polygon = 0; polygon < POLYGONS; polygon++){
+        //     float x0 = polygons[polygon][0][0];
+        //     float y0 = polygons[polygon][0][1];
+        //     float z0 = polygons[polygon][0][2];
+        //     float x1 = polygons[polygon][1][0];
+        //     float y1 = polygons[polygon][1][1];
+        //     float z1 = polygons[polygon][1][2];
+        //     float x2 = polygons[polygon][2][0];
+        //     float y2 = polygons[polygon][2][1];
+        //     float z2 = polygons[polygon][2][2];
+            
+        //     float smallest_x = x0;
+        //     float largest_x = x0;
+        //     float smallest_y = y0;
+        //     float largest_y = y0;
+            
+        //     for (int point = 0; point < 3; point++){
+        //         float x = polygons[polygon][point][0];
+        //         float y = polygons[polygon][point][1];
+                
+        //         if (x > largest_x){
+        //             largest_x = x;
+        //         }
+                
+        //         if (x < smallest_x){
+        //             smallest_x = x;
+        //         }
+                
+        //         if (y > largest_y){
+        //             largest_y = y;
+        //         }
+                
+        //         if (y < smallest_y){
+        //             smallest_y = y;
+        //         }
+        //     }
+            
+        //     smallest_x = fmin(smallest_x, 0);
+        //     largest_x = fmax(largest_x, WIDTH);
+        //     smallest_y = fmin(smallest_y, 0);
+        //     largest_y = fmax(largest_y, HEIGHT);
+
+        //     // test every pixel in a rect around the triangle. If it's inside, color it.
+        //     for (int x = (int)smallest_x; x < largest_x; x++){
+        //         for (int y = (int)smallest_y; y < largest_y; y++){
+        //             int intersections = 0;
+                    
+        //             for (int i = 0; i < 3; i++){
+        //                 intersections += is_intersecting(x, y, 10000, 100000, polygons[polygon][i][0], polygons[polygon][i][1], polygons[polygon][(i + 1) % 3][0], polygons[polygon][(i + 1) % 3][1]);
+        //             }
+
+        //             if (intersections % 2 == 0){
+        //                 continue;
+        //             }
+
+        //             float denominator = (y1 - y2) * (x0 - x2) + (x2 - x1) * (y0 - y2);
+        //             float w0 = ((y1 - y2) * (x - x2) + (x2 - x1) * (y - y2)) / denominator;
+        //             float w1 = ((y2 - y0) * (x - x0) + (x0 - x2) * (y - y2)) / denominator;
+        //             float w2 = 1.0 - w0 - w1;
+
+        //             if (denominator < 0) {
+        //                 w0 = -w0;
+        //                 w1 = -w1;
+        //                 w2 = -w2;
+        //                 denominator = -denominator;
+        //             }                    
+
+        //             float z = w0 * z0 + w1 * z1 + w2 * z2;
+
+        //             if (z < z_buffer[y * WIDTH + x]){
+        //                 z_buffer[y * WIDTH + x] = z;
+        //             }
+        //             else {
+        //                 continue;
+        //             }
+
+        //             if (z < 0){
+        //                 z *= -1;
+        //             }
+
+        //            frame_buffer[y * WIDTH + x] = 0xFFFFFFFF / POLYGONS * (polygon + 1);
+        //         }
+        //     }
+        // }
+        SDL_UpdateTexture(texture, NULL, frame_buffer, WIDTH * sizeof(cl_uint));
+        
+        SDL_RenderClear(renderer);
+        SDL_RenderCopy(renderer, texture, NULL, NULL);
+        SDL_RenderPresent(renderer);
+        
+        frame++;
+
+        frame_time = SDL_GetTicks()-start_time;
+        fps = (frame_time > 0) ? 1000.0f / frame_time : 0.0f;
+        // printf("%f fps\n", fps);
+        // printf("%d polygons\n", POLYGONS);
+    }
+    
+    clReleaseMemObject(input_memory_buffer);
+    clReleaseMemObject(output_memory_buffer);
+    clReleaseKernel(compiled_kernel);
+    clReleaseProgram(kernel_program);
+    clReleaseCommandQueue(queue);
+    clReleaseContext(context);
+
+    SDL_DestroyTexture(texture);
+    SDL_DestroyRenderer(renderer);
+    SDL_DestroyWindow(window);
+    SDL_Quit();
+}

+ 23 - 0
rasteriver.h

@@ -0,0 +1,23 @@
+#ifndef RASTERIVER_H
+#define RASTERIVER_H
+
+#include <stdint.h>
+
+typedef int RI_result;
+typedef uint32_t RI_uint;
+
+extern const RI_result RI_SUCCESS;
+extern const RI_result RI_ERROR;
+extern const RI_result RI_NOT_RUNNING;
+extern const RI_result RI_RUNNING;
+
+RI_result RI_Init();
+RI_result RI_Stop();
+RI_result RI_IsRunning();
+RI_result RI_RequestPolygons(int RI_PolygonsToRequest);
+RI_result RI_Tick();
+RI_result RI_SetBackground(RI_uint RI_BackgroundColor);
+RI_result RI_ShowZBuffer(int RI_ShowZBufferFlag);
+RI_result RI_SetDebugFlag(int RI_ShowDebugFlag);
+
+#endif // RASTERIVER_H

BIN
rasteriver.so