I\'m trying to write a program that can compare two files line by line, word by word, or character by character in C. It has to be able to read in command line options
Docopt has a C implementation that I thought was quite nice: https://github.com/docopt/docopt.c
From a man-page standardized format describing command line options, docopt infers and creates an argument parser. This got started in python; the python version literally just parses the docstring and returns a dict. To do this in C takes a little more work, but it's clean to use and has no external dependencies.
To my knowledge, the three most popular ways how to parse command line arguments in C are:
#include <unistd.h>
from the POSIX C Library), which can solve simple argument parsing tasks. If you're a bit familiar with bash, the getopt built-in of bash is based on Getopt from the GNU libc.#include <argp.h>
from the GNU C Library), which can solve more complex tasks and takes care of stuff like, for example:
-?
, --help
for help message, including email address-V
, --version
for version information--usage
for usage messageThe GNU C Library documentation has some nice examples for Getopt and Argp.
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
bool isCaseInsensitive = false;
int opt;
enum { CHARACTER_MODE, WORD_MODE, LINE_MODE } mode = CHARACTER_MODE;
while ((opt = getopt(argc, argv, "ilw")) != -1) {
switch (opt) {
case 'i': isCaseInsensitive = true; break;
case 'l': mode = LINE_MODE; break;
case 'w': mode = WORD_MODE; break;
default:
fprintf(stderr, "Usage: %s [-ilw] [file...]\n", argv[0]);
exit(EXIT_FAILURE);
}
}
// Now optind (declared extern int by <unistd.h>) is the index of the first non-option argument.
// If it is >= argc, there were no non-option arguments.
// ...
}
#include <argp.h>
#include <stdbool.h>
const char *argp_program_version = "programname programversion";
const char *argp_program_bug_address = "<your@email.address>";
static char doc[] = "Your program description.";
static char args_doc[] = "[FILENAME]...";
static struct argp_option options[] = {
{ "line", 'l', 0, 0, "Compare lines instead of characters."},
{ "word", 'w', 0, 0, "Compare words instead of characters."},
{ "nocase", 'i', 0, 0, "Compare case insensitive instead of case sensitive."},
{ 0 }
};
struct arguments {
enum { CHARACTER_MODE, WORD_MODE, LINE_MODE } mode;
bool isCaseInsensitive;
};
static error_t parse_opt(int key, char *arg, struct argp_state *state) {
struct arguments *arguments = state->input;
switch (key) {
case 'l': arguments->mode = LINE_MODE; break;
case 'w': arguments->mode = WORD_MODE; break;
case 'i': arguments->isCaseInsensitive = true; break;
case ARGP_KEY_ARG: return 0;
default: return ARGP_ERR_UNKNOWN;
}
return 0;
}
static struct argp argp = { options, parse_opt, args_doc, doc, 0, 0, 0 };
int main(int argc, char *argv[])
{
struct arguments arguments;
arguments.mode = CHARACTER_MODE;
arguments.isCaseInsensitive = false;
argp_parse(&argp, argc, argv, 0, 0, &arguments);
// ...
}
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
bool isCaseInsensitive = false;
enum { CHARACTER_MODE, WORD_MODE, LINE_MODE } mode = CHARACTER_MODE;
size_t optind;
for (optind = 1; optind < argc && argv[optind][0] == '-'; optind++) {
switch (argv[optind][1]) {
case 'i': isCaseInsensitive = true; break;
case 'l': mode = LINE_MODE; break;
case 'w': mode = WORD_MODE; break;
default:
fprintf(stderr, "Usage: %s [-ilw] [file...]\n", argv[0]);
exit(EXIT_FAILURE);
}
}
// *argv points to the remaining non-option arguments.
// If *argv is NULL, there were no non-option arguments.
// ...
}
Disclaimer: I am new to Argp, the example might contain errors.
I'm very surprised nobody brought up James Theiler's "opt" package.
You can find opt at http://public.lanl.gov/jt/Software/
and a flattering post with some examples of how it is so much simpler than other approaches is here:
http://www.decompile.com/not_invented_here/opt/
Use getopt(), or perhaps getopt_long().
int iflag = 0;
enum { WORD_MODE, LINE_MODE } op_mode = WORD_MODE; // Default set
int opt;
while ((opt = getopt(argc, argv, "ilw") != -1)
{
switch (opt)
{
case 'i':
iflag = 1;
break;
case 'l':
op_mode = LINE_MODE;
break;
case 'w':
op_mode = WORD_MODE;
break;
default:
fprintf(stderr, "Usage: %s [-ilw] [file ...]\n", argv[0]);
exit(EXIT_FAILURE);
}
}
/* Process file names or stdin */
if (optind >= argc)
process(stdin, "(standard input)", op_mode);
else
{
int i;
for (i = optind; i < argc; i++)
{
FILE *fp = fopen(argv[i], "r");
if (fp == 0)
fprintf(stderr, "%s: failed to open %s (%d %s)\n",
argv[0], argv[i], errno, strerror(errno));
else
{
process(fp, argv[i], op_mode);
fclose(fp);
}
}
}
Note that you need to determine which headers to include (I make it 4 that are required), and the way I wrote the op_mode
type means you have a problem in the function process()
- you can't access the enumeration down there. It's best to move the enumeration outside the function; you might even make op_mode
a file-scope variable without external linkage (a fancy way of saying static
) to avoid passing it to the function. This code does not handle -
as a synonym for standard input, another exercise for the reader. Note that getopt()
automatically takes care of --
to mark the end of options for you.
I've not run any version of the typing above past a compiler; there could be mistakes in it.
For extra credit, write a (library) function:
int filter(int argc, char **argv, int idx, int (*function)(FILE *fp, const char *fn));
which encapsulates the logic for processing file name options after the getopt()
loop. It should handle -
as standard input. Note that using this would indicate that op_mode
should be a static file scope variable. The filter()
function takes argc
, argv
, optind
and a pointer to the processing function. It should return 0 (EXIT_SUCCESS) if it was able to open all the files and all invocations of the function reported 0, otherwise 1 (or EXIT_FAILURE). Having such a function simplifies writing Unix-style 'filter' programs that read files specified on the command line or standard input.
Tooting my own horn if I may, I'd also like to suggest taking a look at an option parsing library that I've written: dropt.
One feature that it offers that many others don't is the ability to override earlier options. For example, if you have a shell alias:
alias bar="foo --flag1 --flag2 --flag3"
and you want to use bar
but with--flag1
disabled, it allows you to do:
bar --flag1=0
/*
Here's a rough one not relying on any libraries.
Example:
-wi | -iw //word case insensitive
-li | -il //line case insensitive
-- file //specify the first filename (you could just get the files
as positional arguments in the else statement instead)
PS: don't mind the #define's, they're just pasting code :D
*/
#ifndef OPT_H
#define OPT_H
//specify option requires argument
#define require \
optarg = opt_pointer + 1; \
if (*optarg == '\0') \
{ \
if (++optind == argc) \
goto opt_err_arg; \
else \
optarg = argv[optind]; \
} \
opt_pointer = opt_null_terminator;
//start processing argv
#define opt \
int optind = 1; \
char *opt_pointer = argv[1]; \
char *optarg = NULL; \
char opt_null_terminator[2] = {'\0','\0'}; \
if (0) \
{ \
opt_err_arg: \
fprintf(stderr,"option %c requires argument.\n",*opt_pointer); \
return 1; \
opt_err_opt: \
fprintf(stderr,"option %c is invalid.\n",*opt_pointer); \
return 1; \
} \
for (; optind < argc; opt_pointer = argv[++optind]) \
if (*opt_pointer++ == '-') \
{ \
for (;;++opt_pointer) \
switch (*opt_pointer) \
{
//stop processing argv
#define done \
default: \
if (*opt_pointer != '\0') \
goto opt_err_opt; \
else \
goto opt_next; \
break; \
} \
opt_next:; \
}
#endif //opt.h
#include <stdio.h>
#include "opt.h"
int
main (int argc, char **argv)
{
#define by_character 0
#define by_word 1
#define by_line 2
int cmp = by_character;
int case_insensitive = 0;
opt
case 'h':
puts ("HELP!");
break;
case 'v':
puts ("fileCMP Version 1.0");
break;
case 'i':
case_insensitive = 1;
break;
case 'w':
cmp = by_word;
break;
case 'l':
cmp = by_line;
break;
case '-':required
printf("first filename: %s\n", optarg);
break;
done
else printf ("Positional Argument %s\n", argv[optind]);
return 0;
}