I\'m trying to remove comments and strings from a c file with c code. I\'ll just stick to comments for the examples. I have a sliding window so I only have character n
Since you only wish to use two characters for the buffer and only one while loop, I would suggest a third char to track your state (whether skipping text or not). I've put together a test program for you with inline comments explaining the logic:
// Program to strip comments and strings from a C file
//
// Build:
// gcc -o strip-comments strip-comments.c
//
// Test:
// ./strip-comments strip-comments.c
#include
#include
#include
#include
#include
#include
/* The following is a block of strings, and comments for testing
* the code.
*/
/* test if three comments *//* chained together */// will be removed.
static int value = 128 /* test comment within valid code *// 2;
const char * test1 = "This is a test of \" processing"; /* testing inline comment */
const char * test2 = "this is a test of \n within strings."; // testing inline comment
// this is a the last test
int strip_c_code(FILE * in, FILE * out)
{
char buff[2];
char skipping;
skipping = '\0';
buff[0] = '\0';
buff[1] = '\0';
// loop through the file
while((buff[0] = fgetc(in)) != EOF)
{
// checking for start of comment or string block
if (!(skipping))
{
// start skipping in "//" comments
if ((buff[1] == '/') && (buff[0] == '/'))
skipping = '/';
// start skipping in "/*" comments
else if ((buff[1] == '/') && (buff[0] == '*'))
skipping = '*';
// start skipping at start of strings, but not character assignments
else if ( ((buff[1] != '\'') && (buff[0] == '"')) &&
((buff[1] != '\\') && (buff[0] == '"')) )
{
fputc(buff[1], out);
skipping = '"';
};
// clear buffer so that processed characters are not interpreted as
// end of skip characters.
if ((skipping))
{
buff[0] = '\0';
buff[1] = '\0';
};
};
// check for characters which terminate skip block
switch(skipping)
{
// if skipping "//" comments, look for new line
case '/':
if (buff[1] == '\n')
skipping = '\0';
break;
// if skipping "/*" comments, look for "*/" terminating string
case '*':
if ((buff[1] == '*') && (buff[0] == '/'))
{
buff[0] = '\0';
buff[1] = '\0';
skipping = '\0';
};
break;
// if skipping strings, look for terminating '"' character
case '"':
if ((buff[1] != '\\') && (buff[0] == '"'))
{
skipping = '\0';
buff[0] = '\0';
buff[1] = '\0';
fprintf(out, "NULL"); // replace string with NULL
};
break;
default:
break;
};
// if not skipping, write character out
if ( (!(skipping)) && ((buff[1])) )
fputc(buff[1], out);
// shift new character to old character position
buff[1] = buff[0];
};
// verify that the comment or string was terminated properly
if ((skipping))
{
fprintf(stderr, "Unterminated comment or string\n");
return(-1);
};
// write last character
fputc(buff[1], out);
return(0);
}
int main(int argc, char * argv[])
{
FILE * fs;
if (argc != 2)
{
fprintf(stderr, "Usage: %s \n", argv[0]);
return(1);
};
if ((fs = fopen(argv[1], "r")) == NULL)
{
perror("fopen()");
return(1);
};
strip_c_code(fs, stdout);
fclose(fs);
return(0);
}
/* end of source file */
I've also posted this code on Github to make it easier to download and compile:
https://gist.github.com/syzdek/5417109