I am trying to make a port of wolfenstein 3d for the prizm that doesn't require overclocking the cpu. The current version of the raycast 3d engine I am using is too slow for that, and I am having trouble trying to optimize it as I am pretty new to C, and 3d graphics.

This is the engine code:

Code:

#include "color.h"
#include "keyboard.hpp"
#include "keyboard_syscalls.h"
#include "display.h"
#include "display_syscalls.h"
#include "rtc.h"
#include "RTC_syscalls.h"
#include "string.h"
#include "stdlib.h"
#include "math.h"
//#include "raycastTextures.h"
//#include "half.h"

void plot(int x0, int y0, int color);
void drawLine(int x, int y1, int y2, int color);
void fillArea(int x, int y, int width, int height, int color);

void keyupdate(void);
int keydownlast(int basic_keycode);
int keydownhold(int basic_keycode);
void keymenu(void);

static const unsigned short* keyboard_register = (unsigned short*)0xA44B0000;
static unsigned short lastkey[8];
static unsigned short holdkey[8];

#define cosf(x) sinf((PIOVERTWO)+(x))

#define true 1
#define false 0

#define mapWidth 24
#define mapHeight 24

#define BORDER_COLOR COLOR_ALICEBLUE
#define CEILING_COLOR COLOR_GRAY
#define FLOOR_COLOR   COLOR_LIGHTGRAY

#define w LCD_WIDTH_PX
#define h LCD_HEIGHT_PX

int worldMap[mapWidth][mapHeight]=
{
  {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,2,2,2,2,2,0,0,0,0,3,0,3,0,3,0,0,0,1},
  {1,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,2,0,0,0,2,0,0,0,0,3,0,0,0,3,0,0,0,1},
  {1,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,2,2,0,2,2,0,0,0,0,3,0,3,0,3,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,4,0,4,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,4,0,0,0,0,5,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,4,0,4,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,4,0,4,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
  {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}
};

/////////////////////////////////////////////////////////////////////////////////////

int main(void)
{
   enum { mode_splash, mode_menu, mode_play } mode = mode_play;   //   Game modes
   _Bool running = true;
   
   float posX = 21, posY = 12;        //x and y start position
   float dirX = -1, dirY = 0;       //initial direction vector
   float planeX = 0, planeY = 0.66;    //the 2d raycaster version of camera plane
 
   float time = 0;    //time of current frame
   float oldTime = 0; //time of previous frame

   while (running)
   {
      keyupdate();      //   Get key data
      
      switch(mode)
      {
         case mode_splash:   //   Splash screen
            
            
            
            break;
            
         case mode_menu:      //   Menus
            
            
            
            break;
            
         case mode_play:      //   Main game
         
         /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         while(running)
         {
            int x = 0;
            for(x = 0; x < w; x++)
            {
               //calculate ray position and direction
               float cameraX = 2 * x / (float)w - 1; //x-coordinate in camera space
               float rayPosX = posX;
               float rayPosY = posY;
               float rayDirX = dirX + planeX * cameraX;
               float rayDirY = dirY + planeY * cameraX;
               //which box of the map we're in 
               int mapX = (int)rayPosX;
               int mapY = (int)rayPosY;
                  
               //length of ray from current position to next x or y-side
               float sideDistX;
               float sideDistY;
                  
               //length of ray from one x or y-side to next x or y-side
               float deltaDistX = sqrtf(1 + (rayDirY * rayDirY) / (rayDirX * rayDirX));
               float deltaDistY = sqrtf(1 + (rayDirX * rayDirX) / (rayDirY * rayDirY));
               float perpWallDist;
                  
               //what direction to step in x or y-direction (either +1 or -1)
               int stepX;
               int stepY;
               
               int hit = 0; //was there a wall hit?
               int side; //was a NS or a EW wall hit?
               
               //calculate step and initial sideDist
               if (rayDirX < 0)
               {
                  stepX = -1;
                  sideDistX = (rayPosX - mapX) * deltaDistX;
               }
               
               else
               {
                  stepX = 1;
                  sideDistX = (mapX + 1.0 - rayPosX) * deltaDistX;
               }
               
               if (rayDirY < 0)
               {
                  stepY = -1;
                  sideDistY = (rayPosY - mapY) * deltaDistY;
               }
               
               else
               {
                  stepY = 1;
                  sideDistY = (mapY + 1.0 - rayPosY) * deltaDistY;
               }
               
               //perform DDA
               while (hit == 0)
               {
                  //jump to next map square, OR in x-direction, OR in y-direction
                  if (sideDistX < sideDistY)
                  {
                     sideDistX += deltaDistX;
                     mapX += stepX;
                     side = 0;
                  }
                  
                  else
                  {
                     sideDistY += deltaDistY;
                     mapY += stepY;
                     side = 1;
                  }
                  //Check if ray has hit a wall
                  if (worldMap[mapX][mapY] > 0) hit = 1;
               }
               //Calculate distance projected on camera direction (oblique distance will give fisheye effect!)
               if (side == 0)
               perpWallDist = absf((mapX - rayPosX + (1 - stepX) / 2) / rayDirX);
               
               else
               perpWallDist = absf((mapY - rayPosY + (1 - stepY) / 2) / rayDirY);
                
               //Calculate height of line to draw on screen
               int lineHeight = absf((int)(h / perpWallDist));
                  
               //calculate lowest and highest pixel to fill in current stripe
               int drawStart = -lineHeight / 2 + h / 2;
               
               if(drawStart < 0)
               {
                  drawStart = 0;
               }
               
               int drawEnd = lineHeight / 2 + h / 2;
               
               if(drawEnd >= h)
               {
                  drawEnd = h - 1;
               }
               
               //choose wall color
               color_t color;
               switch(worldMap[mapX][mapY])
               {
                  case 1:  color = COLOR_RED;     break; //red
                  case 2:  color = COLOR_GREEN;     break; //green
                  case 3:  color = COLOR_BLUE;      break; //blue
                  case 4:  color = COLOR_WHITE;     break; //white
                  default: color = COLOR_YELLOW;     break; //yellow
               }
                  
               //give x and y sides different brightness
               //if (side == 1) {color = color / 2;}

               //draw the pixels of the stripe as a vertical line
               drawLine(x, drawStart, drawEnd, color);
            }
            //timing for input and FPS counter
            oldTime = time;
            time = RTC_GetTicks();
            float frameTime = (time - oldTime) / 1000.0; //frameTime is the time this frame has taken, in seconds
            /*
            print(1.0 / frameTime); //FPS counter
            */

            Bdisp_PutDisp_DD();//   Draw Screen
            
            //speed modifiers
            float moveSpeed = frameTime * 5.0; //the constant value is in squares/second
            float rotSpeed = frameTime * 3.0; //the constant value is in radians/second
            
            keyupdate(); //   Update key data
            
            //move forward if no wall in front of you
            if (keydownlast(KEY_PRGM_UP))
            {
               if(worldMap[(int)(posX + dirX * moveSpeed)][(int)posY] == false) posX += dirX * moveSpeed;
               if(worldMap[(int)posX][(int)(posY + dirY * moveSpeed)] == false) posY += dirY * moveSpeed;
            }
            //move backwards if no wall behind you
            if (keydownlast(KEY_PRGM_DOWN))
            {
               if(worldMap[(int)(posX - dirX * moveSpeed)][(int)posY] == false) posX -= dirX * moveSpeed;
               if(worldMap[(int)posX][(int)(posY - dirY * moveSpeed)] == false) posY -= dirY * moveSpeed;
            }
            //rotate to the right
            if (keydownlast(KEY_PRGM_RIGHT))
            {
               //both camera direction and camera plane must be rotated
               float oldDirX = dirX;
               dirX = dirX * cosf(-rotSpeed) - dirY * sinf(-rotSpeed);
               dirY = oldDirX * sinf(-rotSpeed) + dirY * cosf(-rotSpeed);
               float oldPlaneX = planeX;
               planeX = planeX * cosf(-rotSpeed) - planeY * sinf(-rotSpeed);
               planeY = oldPlaneX * sinf(-rotSpeed) + planeY * cosf(-rotSpeed);
            }
            //rotate to the left
            if (keydownlast(KEY_PRGM_LEFT))
            {
               //both camera direction and camera plane must be rotated
               float oldDirX = dirX;
               dirX = dirX * cosf(rotSpeed) - dirY * sinf(rotSpeed);
               dirY = oldDirX * sinf(rotSpeed) + dirY * cosf(rotSpeed);
               float oldPlaneX = planeX;
               planeX = planeX * cosf(rotSpeed) - planeY * sinf(rotSpeed);
               planeY = oldPlaneX * sinf(rotSpeed) + planeY * cosf(rotSpeed);
            }
            
            if(keydownlast(KEY_PRGM_MENU))
            {
               running = false;
            }
            
            fillArea(0,0,LCD_WIDTH_PX,(LCD_HEIGHT_PX / 2), CEILING_COLOR);                  //   Clear screen
            fillArea(0,(LCD_HEIGHT_PX / 2),LCD_WIDTH_PX,(LCD_HEIGHT_PX / 2), FLOOR_COLOR);      //
         }
         /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
            
            break;
         
         default:
            break;
      }
      
      if(keydownlast(KEY_PRGM_MENU))
      {
         running = false;
         keymenu();
      }
      
      Bdisp_PutDisp_DD();
   }
   
   return 0;
}

/////////////////////////////////////////////////////////////////////////////////////

void plot(int x0, int y0, int color) {
   char* VRAM = (char*)0xA8000000;
   VRAM += 2*(y0*LCD_WIDTH_PX + x0);
   *(VRAM++) = (color&0x0000FF00)>>8;
   *(VRAM++) = (color&0x000000FF);
   return;
}

void drawLine(int x, int y1, int y2, int color) {
    int i;
   for(i = y1; i<y2; i++)
   {
      plot (x,i,color);
   }
   
}

void fillArea(int x, int y, int width, int height, int color) {
   //only use lower two bytes of color
   int j,i;
   char* VRAM = (char*)0xA8000000;
   VRAM += 2*(LCD_WIDTH_PX*y + x);
   for(j=y; j<y+height; j++) {
      for(i=x; i<x+width;  i++) {
         *(VRAM++) = (color&0x0000FF00)>>8;
         *(VRAM++) = (color&0x000000FF);
      }
      VRAM += 2*(LCD_WIDTH_PX-width);
   }
}


and here is the header I put together for math routines:

Code:

#ifndef _MATH_H
#define _MATH_H

#include "stdlib.h"

//#define cosf(x) sinf((PIOVERTWO)+(x))
#define PIOVERTWO 1.570796327f
#define PI 3.141592654f
#define PISQUARED 9.869604401f
#define EXTRA_PRECISION

float absf(float);
float sqrtf(float);

float sinf(float x)
{
   while (x > PI) x-=(2.f*PI);
   while (x < -PI) x+=(2.f*PI);
    const float B = 4/PI;
    const float C = -4/(PISQUARED);

    float y = B * x + C * x * absf(x);

    #ifdef EXTRA_PRECISION
    //  const float Q = 0.7775;
        const float P = 0.224008178776;

        y = P * (y * absf(y) - y) + y;   // Q * y + P * y * abs(y)
    #endif
   
   return y;
}

float tanf(float x) {
   float y = sinf(x);
   return y/((PIOVERTWO)-y);
}

float sqrtf( float x) 
{
  union
  {
    int i;
    float x;
  } u;
  u.x = x;
  u.i = (1<<29) + (u.i >> 1) - (1<<22);
 
  // Two Babylonian Steps (simplified from:)
  // u.x = 0.5f * (u.x + x/u.x);
  // u.x = 0.5f * (u.x + x/u.x);
  u.x =       u.x + x/u.x;
  u.x = 0.25f*u.x + x/u.x;

  return u.x;
}

float absf(float x)
{
   return sqrtf(x*x);
}

#endif


Most of this is just code from the examples here that I made compatible with the prizm.
The casio prizm's cpu does not appear to have an FPU try replacing floating point math with fixed point math. That should cause a speed increase.
While the Prizm doesn't have a FPU, GCC compiles floating point code perfectly. Of course, it's slow, so as it's been said you should use fixed point math instead.

For those people that absolutely need to use doubles, I had some trouble finding floating-point sin and cos implementations that would work. In Utilities I ended up using an implementation of sin that isn't very fast (see here: https://github.com/gbl08ma/utilities/blob/master/src/timeGUI.cpp#L171 ), but does the job. It's probably not appropriate for this task, though.
I found a fixed point math library here that I have tried to integrate into the existing raycast engine. Unfortunately that hasn't gone over very well, and what it draws to the screen now is unrecognizable. I replaced the Sine and Cosine functions in the library with my own, and I'm not sure if they are the root of the problems.


Code:

#define FIXEDPT_PI        fixedpt_rconst(3.14159265358979323846)
#define FIXEDPT_RAD_DEG   fixedpt_rconst(57.29577951)
#define FIXEDPT_HALF_PI   fixedpt_rconst(3.14159265358979323846 / 2)
#define fixedpt_round(A) fixedpt_toint((A)+ fixedpt_rconst(0.5))

const fixedpt FIXEDPT_SINE[360] =
{
    0, 1144, 2287, 3430, 4572, 5712,
    6850, 7987, 9121, 10252, 11380, 12505,
    13626, 14742, 15855, 16962, 18064, 19161,
    20252, 21336, 22415, 23486, 24550, 25607,
    26656, 27697, 28729, 29753, 30767, 31772,
    32768, 33754, 34729, 35693, 36647, 37590,
    38521, 39441, 40348, 41243, 42126, 42995,
    43852, 44695, 45525, 46341, 47143, 47930,
    48703, 49461, 50203, 50931, 51643, 52339,
    53020, 53684, 54332, 54963, 55578, 56175,
    56756, 57319, 57865, 58393, 58903, 59396,
    59870, 60326, 60764, 61183, 61584, 61966,
    62328, 62672, 62997, 63303, 63589, 63856,
    64104, 64332, 64540, 64729, 64898, 65048,
    65177, 65287, 65376, 65446, 65496, 65526,
    65536, 65526, 65496, 65446, 65376, 65287,
    65177, 65048, 64898, 64729, 64540, 64332,
    64104, 63856, 63589, 63303, 62997, 62672,
    62328, 61966, 61584, 61183, 60764, 60326,
    59870, 59396, 58903, 58393, 57865, 57319,
    56756, 56175, 55578, 54963, 54332, 53684,
    53020, 52339, 51643, 50931, 50203, 49461,
    48703, 47930, 47143, 46341, 45525, 44695,
    43852, 42995, 42126, 41243, 40348, 39441,
    38521, 37590, 36647, 35693, 34729, 33754,
    32768, 31772, 30767, 29753, 28729, 27697,
    26656, 25607, 24550, 23486, 22415, 21336,
    20252, 19161, 18064, 16962, 15855, 14742,
    13626, 12505, 11380, 10252, 9121, 7987,
    6850, 5712, 4572, 3430, 2287, 1144,
    0, -1144, -2287, -3430, -4572, -5712,
    -6850, -7987, -9121, -10252, -11380, -12505,
    -13626, -14742, -15855, -16962, -18064, -19161,
    -20252, -21336, -22415, -23486, -24550, -25607,
    -26656, -27697, -28729, -29753, -30767, -31772,
    -32768, -33754, -34729, -35693, -36647, -37590,
    -38521, -39441, -40348, -41243, -42126, -42995,
    -43852, -44695, -45525, -46341, -47143, -47930,
    -48703, -49461, -50203, -50931, -51643, -52339,
    -53020, -53684, -54332, -54963, -55578, -56175,
    -56756, -57319, -57865, -58393, -58903, -59396,
    -59870, -60326, -60764, -61183, -61584, -61966,
    -62328, -62672, -62997, -63303, -63589, -63856,
    -64104, -64332, -64540, -64729, -64898, -65048,
    -65177, -65287, -65376, -65446, -65496, -65526,
    -65536, -65526, -65496, -65446, -65376, -65287,
    -65177, -65048, -64898, -64729, -64540, -64332,
    -64104, -63856, -63589, -63303, -62997, -62672,
    -62328, -61966, -61584, -61183, -60764, -60326,
    -59870, -59396, -58903, -58393, -57865, -57319,
    -56756, -56175, -55578, -54963, -54332, -53684,
    -53020, -52339, -51643, -50931, -50203, -49461,
    -48703, -47930, -47143, -46341, -45525, -44695,
    -43852, -42995, -42126, -41243, -40348, -39441,
    -38521, -37590, -36647, -35693, -34729, -33754,
    -32768, -31772, -30767, -29753, -28729, -27697,
    -26656, -25607, -24550, -23486, -22415, -21336,
    -20252, -19161, -18064, -16962, -15855, -14742,
    -13626, -12505, -11380, -10252, -9121, -7987,
    -6850, -5712, -4572, -3430, -2287, -1144
};

/* Returns the sine of the given fixedpt number.
   Edited from the original code to use a table*/
static inline fixedpt fixedpt_sin(fixedpt rad)
{
    rad = fixedpt_toint(rad) < 0 ? (-(rad) + FIXEDPT_PI) : rad;
    return FIXEDPT_SINE[fixedpt_round(fixedpt_mul(rad, FIXEDPT_RAD_DEG))% 360];
}

/* Returns the cosine of the given fixedpt number
   Edited from the original code to use a table*/
static inline fixedpt fixedpt_cos(fixedpt rad)
{
    return fixedpt_sin(rad + FIXEDPT_HALF_PI);
}


I'm using 16.16 precision instead of the 24.8 the library automatically has, but it seems to be setup to support variable lengths for the fractional part. Am I doing anything wrong with the calculations here?
*bump*

I've managed to get the program mostly working again (the problem was some of the order of operations weren't preserved when I switched to fixed point), however, now when attempting to turn the camera angle the calculator crashes. This leads me to believe that there is indeed something wrong with these Sine and Cosine functions.

[Edit]

It doesn't always crash. I think it is only as specific angles that cause it to crash. Possibly every 90 degrees or so, but its hard to tell.
Ok here is what is most likely your problem if you have 16.16 fixed point math and you multiply it by another 16.16 then you get 32.32. I have found that to get a 64 bit variable you need to cast it as (long long) then it will work. Or you could always make your numbers less precise.
I don't think that that is the problem, since the multiplication function appears to handle the extra bits:


Code:

#define FIXEDPT_FBITS 16

/* Multiplies two fixedpt numbers, returns the result. */
static inline fixedpt fixedpt_mul(fixedpt A, fixedpt B)
{
   return (((fixedptd)A * (fixedptd)B) >> FIXEDPT_FBITS);
}


I found out that the crashing problem wasn't because of the rotation; I was because of problems with the texture scaling. I've fixed that, and now the only visible problem are holes in the walls. Every 90 degrees there is a small vertical hole in the wall. I remade the sine table so that there weren't any zeroes in it, but the problem is still persisting.
*bump*
I still haven't managed to figure out this problem, does anyone have any ideas as to what could be causing it?
Hmm, I feel like you may have problems using %360 on negative values, then you'll be using a negative array index. I feel like it may be much better overall to use prescaled angles where a full rotation is a power of 2, so that angles wrap around naturally and you can use simple bitshifting and masking to get your array indices.
I think my code is already handling negative values with this line:


Code:

 rad = fixedpt_toint(rad) < 0 ? (-(rad) + FIXEDPT_PI) : rad;


However, the solution you described seems more efficient. Can you give me an example of what this pre-scaled angle system would be like? how would I have to adjust the angle table I already have?
*bump*
Well, pretty much the number of sine values in your table should be a power of 2, like 256, 512, 1024, etc, depending on how much memory you want to use. As for how you want to handle angle values themselves, it's really up to you. You could say that 1.0 in fixed point is a full rotation instead of 2*pi, and in that case you would use (angle & 0xFFFF) >> (16 - bitsInTableIndex) for the table index.
I need the radian values of the angles for my calculations, and I don't understand how (or if) they are being preserved by this method. Could anyone point me to some code examples of this?
*bump*
*double bump*
  
Register to Join the Conversation
Have your own thoughts to add to this or any other topic? Want to ask a question, offer a suggestion, share your own programs and projects, upload a file to the file archives, get help with calculator and computer programming, or simply chat with like-minded coders and tech and calculator enthusiasts via the site-wide AJAX SAX widget? Registration for a free Cemetech account only takes a minute.

» Go to Registration page
Page 1 of 1
» All times are UTC - 5 Hours
 
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum

 

Advertisement