I want to read an XML file into a char *buffer
using C.
What is the best way to do this?
How should I get started?
And if you want to parse XML, not just reading it into a buffer (something which would not be XML-specific, see Christoph's and Baget's answers), you can use for instance libxml2:
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
int main(int argc, char **argv) {
xmlDoc *document;
xmlNode *root, *first_child, *node;
char *filename;
if (argc < 2) {
fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
return 1;
}
filename = argv[1];
document = xmlReadFile(filename, NULL, 0);
root = xmlDocGetRootElement(document);
fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
first_child = root->children;
for (node = first_child; node; node = node->next) {
fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
}
fprintf(stdout, "...\n");
return 0;
}
On an Unix machine, you typically compile the above with:
% gcc -o read-xml $(xml2-config --cflags) -Wall $(xml2-config --libs) read-xml.c
Is reading the contents of the file into a single, simple buffer really what you want to do? XML files are generally there to be parsed, and you can do this with a library like libxml2, just to give one example (but notably, is implemented in C).
Here is a full program that reads in a whole XML file (really, any file), into a buffer. It includes about as much error-checking as would be useful.
N.B. everything is done in main()
. Turning it into a callable function is left as an exercise for the reader.
(Tested, compiled with GCC 4.3.3. Switches were -Wall -W --pedantic --ansi
.)
Comments on this will be addressed in approximately eight hours.
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char *argv[]) {
char *buffer; /* holds the file contents. */
size_t i; /* indexing into buffer. */
size_t buffer_size; /* size of the buffer. */
char *temp; /* for realloc(). */
char c; /* for reading from the input. */
FILE *input; /* our input stream. */
if (argc == 1) {
fprintf(stderr, "Needs a filename argument.\n");
exit(EXIT_FAILURE);
}
else if (argc > 2) {
fprintf(stderr, "Well, you passed in a few filenames, but I'm only using %s\n", argv[1]);
}
if ((input = fopen(argv[1], "r")) == NULL) {
fprintf(stderr, "Error opening input file %s\n", argv[1]);
exit(EXIT_FAILURE);
}
/* Initial allocation of buffer */
i = 0;
buffer_size = BUFSIZ;
if ((buffer = malloc(buffer_size)) == NULL) {
fprintf(stderr, "Error allocating memory (before reading file).\n");
fclose(input);
}
while ((c = fgetc(input)) != EOF) {
/* Enlarge buffer if necessary. */
if (i == buffer_size) {
buffer_size += BUFSIZ;
if ((temp = realloc(buffer, buffer_size)) == NULL) {
fprintf(stderr, "Ran out of core while reading file.\n");
fclose(input);
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
/* Add input char to the buffer. */
buffer[i++] = c;
}
/* Test if loop terminated from error. */
if (ferror(input)) {
fprintf(stderr, "There was a file input error.\n");
free(buffer);
fclose(input);
exit(EXIT_FAILURE);
}
/* Make the buffer a bona-fide string. */
if (i == buffer_size) {
buffer_size += 1;
if ((temp = realloc(buffer, buffer_size)) == NULL) {
fprintf(stderr, "Ran out of core (and only needed one more byte too ;_;).\n");
fclose(input);
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
buffer[i] = '\0';
puts(buffer);
/* Clean up. */
free(buffer);
fclose(input);
return 0;
}
That's all...
example XML file:
<?xml version="1.0" encoding="utf-8"?>
<config>
<xmlConfig value1="This is a simple XML parsing program in C"/>
<xmlConfig value2="Thank you : Banamali Mishra"/>
<xmlConfig value3="2000000"/>
<xmlConfig value4="80"/>
<xmlConfig value5="10"/>
<xmlConfig value6="1"/>
</config>
Here is the source code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/xmlreader.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
char configReceive[6][80] = { " " };
//xml parsing
void ParsingXMLFile(char *filename) {
char *docname;
xmlDocPtr doc;
xmlNodePtr cur;
xmlChar *uri;
char config[6][80] = { "value1", "value2", "value3", "value4", "value5", "value6" };
int count = 0;
int count1 = 0;
docname = filename;
doc = xmlParseFile(docname);
cur = xmlDocGetRootElement(doc);
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"xmlConfig"))) {
uri = xmlGetProp(cur, (xmlChar *)config[count++]);
strcpy(configReceive[count1++], (char *)uri);
xmlFree(uri);
}
cur = cur->next;
}
count = 0;
count1 = 0;
xmlFreeDoc(doc);
}
This has the potential to cut down on useless copying of the data. The trick is to ask the OS for what you want, instead of doing it. Here's an implementation I made earlier:
#ifndef MMAP_H
#define MMAP_H
#include <sys/types.h>
struct region_t {
void *head;
off_t size;
};
#define OUT_OF_BOUNDS(reg, p) \
(((void *)(p) < (reg)->head) || ((void *)(p) >= ((reg)->head)+(reg)->size))
#define REG_SHOW(reg) \
printf("h: %p, s: %ld (e: %p)\n", reg->head, reg->size, reg->head+reg->size);
struct region_t *do_mmap(const char *fn);
#endif
#include <stdlib.h>
#include <sys/types.h> /* open lseek */
#include <sys/stat.h> /* open */
#include <fcntl.h> /* open */
#include <unistd.h> /* lseek */
#include <sys/mman.h> /* mmap */
#include "mmap.h"
struct region_t *do_mmap(const char *fn)
{
struct region_t *R = calloc(1, sizeof(struct region_t));
if(R != NULL) {
int fd;
fd = open(fn, O_RDONLY);
if(fd != -1) {
R->size = lseek(fd, 0, SEEK_END);
if(R->size != -1) {
R->head = mmap(NULL, R->size, PROT_READ, MAP_PRIVATE, fd, 0);
if(R->head) {
close(fd); /* don't need file-destructor anymore. */
return R;
}
/* no clean up of borked (mmap,) */
}
close(fd); /* clean up of borked (lseek, mmap,) */
}
free(R); /* clean up of borked (open, lseek, mmap,) */
}
return NULL;
}
Hopefully bug-free ISO-C code to read the contents of a file and add a '\0' char:
#include <stdlib.h>
#include <stdio.h>
long fsize(FILE * file)
{
if(fseek(file, 0, SEEK_END))
return -1;
long size = ftell(file);
if(size < 0)
return -1;
if(fseek(file, 0, SEEK_SET))
return -1;
return size;
}
size_t fget_contents(char ** str, const char * name, _Bool * error)
{
FILE * file = NULL;
size_t read = 0;
*str = NULL;
if(error) *error = 1;
do
{
file = fopen(name, "rb");
if(!file) break;
long size = fsize(file);
if(size < 0) break;
if(error) *error = 0;
*str = malloc((size_t)size + 1);
if(!*str) break;
read = fread(*str, 1, (size_t)size, file);
(*str)[read] = 0;
*str = realloc(*str, read + 1);
if(error) *error = (size != (long)read);
}
while(0);
if(file) fclose(file);
return read;
}