This utility will split open an MPD so that its subfiles that can be saved separately. http://pastebin.com/eQC7fSPx


Code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <regex.h>


#define NOFILE 0
#define INFILE 1

const char* searchText[2] = {"0 NOFILE", "0 FILE"};
#define INFILE_CT 6


typedef struct subfile{
   char* filename;
   char* data;
} subfile_t;

typedef struct lineref{
   char* begin;
   int type;
} lineref_t;


int lineref_comp(const void * elem1, const void * elem2)
{
   return ((lineref_t*)elem1)->begin - ((lineref_t*)elem2)->begin;
}


lineref_t* lineref_insert(lineref_t* lrArray, lineref_t newLR, int *count, int *max)
{
   if(1+(*count) >= (*max))
   {
      (*max) = (int)(1+1.4*(*max)); // increase size more or less geometrically by ~sqrt(2)
      lrArray = (lineref_t*)realloc(lrArray,(*max)*sizeof(lineref_t));
   }
   lrArray[(*count)++] = newLR;
   
   return lrArray;
}


int main(int argc, char* argv[])
{
   if(argc <= 1) exit(1);
   
   FILE *fp;
   char* filename = argv[1];
   char* data = NULL;
   int state;
   lineref_t* lrArray = NULL;
   lineref_t curLR;
   subfile_t* subFiles = NULL;
   subfile_t curSubFile;
   int count, max, i, j;
   count = max = 0;
   
   printf("reading %s\n",filename);
   
   
   
   if((fp = fopen(filename,"r")) == NULL){
      printf("Cannot open file.\n");
      exit(1);
   }
   
   if (fseek(fp, 0L, SEEK_END) == 0) {
      long bufsize = ftell(fp);
      if (bufsize == -1) { printf("Error\n"); exit(1); }
      
      data = (char*)malloc(sizeof(char) * (bufsize + 1));
      
      if (fseek(fp, 0L, SEEK_SET) != 0) {  printf("Error\n"); exit(1);}
      
      size_t newLen = fread(data, sizeof(char), bufsize, fp);
      if (newLen == 0) {
         fputs("Error reading file", stderr);
      } else {
         data[++newLen] = '\0'; /* Just to be safe. */
      }
   }
   
   fclose(fp);
   
   for(state = NOFILE; state <= INFILE; state++){
      char * curPos = data;
      while((curPos = strstr(curPos, searchText[state])) != NULL){
         char prev = (curPos == data)? '\0' : *(curPos-1);
         if((curPos == data) || prev == '\r' || prev == '\n'){
            //At beginning of line!
            curLR.begin = curPos;
            curLR.type = state;
            lrArray = lineref_insert(lrArray, curLR, &count, &max);
         }
         
         curPos++;
      }
   }
   
   qsort(lrArray, count, sizeof(lineref_t), lineref_comp);
   subFiles = (subfile_t*)realloc(subFiles,sizeof(subfile_t)*count);
   for(j = i = 0; i < count; i++){
      if(lrArray[i].type){
         lrArray[i].begin+=INFILE_CT;
         while(lrArray[i].begin[0] <= ' ') lrArray[i].begin++;
         curSubFile.filename = lrArray[i].begin;
         while(lrArray[i].begin[0] != '\0' && lrArray[i].begin[0] != '\n' && lrArray[i].begin[0] != '\r') lrArray[i].begin++;
         while(lrArray[i].begin[0] == '\n' || lrArray[i].begin[0] == '\r') {
            lrArray[i].begin[0] = '\0';
            lrArray[i].begin++;
         }
         curSubFile.data = lrArray[i].begin;
         
         subFiles[j] = curSubFile;
         j++;
      }
      
      if(i+1<count){
         lrArray[i+1].begin[0] = '\0';
      }
   }
   subFiles = (subfile_t*)realloc(subFiles,sizeof(subfile_t)*j);
   
   
   for(i = 0; i < j; i++){
      printf("File: %s\nData:\n%s\n",subFiles[i].filename,subFiles[i].data);
   }
   
   free(data);
   return 0;
}
Aside from a few assorted mutters about the magic numbers '\n' and '\r', which are much less heinous than most magic numbers, very nice code! For the dumb among us, and those who were not in Vermont when you explained it, what does this do for Freebuild as a whole?
KermMartian wrote:
Aside from a few assorted mutters about the magic numbers '\n' and '\r', which are much less heinous than most magic numbers, very nice code!

The escape sequences for standard line endings are hardly magic numbers Razz


KermMartian wrote:
For the dumb among us, and those who were not in Vermont when you explained it, what does this do for Freebuild as a whole?


The file loading code we are using on loan from ldlite (thanks again to the original author for his generosity) lexes, but does not parse the contents of an MPD file and caches the token stream. This behavior is undesirable for us, so this currently stand-alone utility will form the basis of in-engine code to separate MPD files into their component models, which are properly lexed and parsed. This, and the associated caching updates from the Radix Tree thread represent the first significant changes to the code from the ldlite parser. Most of the changes previous to this were for purposes incorporating it into the engine and platform integration, but didn't require changes to the algorithms used to store and access cached data or multipart documents.
  
Register to Join the Conversation
Have your own thoughts to add to this or any other topic? Want to ask a question, offer a suggestion, share your own programs and projects, upload a file to the file archives, get help with calculator and computer programming, or simply chat with like-minded coders and tech and calculator enthusiasts via the site-wide AJAX SAX widget? Registration for a free Cemetech account only takes a minute.

» Go to Registration page
Page 1 of 1
» All times are UTC - 5 Hours
 
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum

 

Advertisement