How can I read an XML file into a buffer in C?

后端 未结 8 817
逝去的感伤
逝去的感伤 2021-02-01 11:03

I want to read an XML file into a char *buffer using C.

What is the best way to do this?

How should I get started?

相关标签:
8条回答
  • 2021-02-01 11:30

    And if you want to parse XML, not just reading it into a buffer (something which would not be XML-specific, see Christoph's and Baget's answers), you can use for instance libxml2:

    #include <stdio.h>
    #include <string.h>
    #include <libxml/parser.h>
    
    int main(int argc, char **argv) {
       xmlDoc *document;
       xmlNode *root, *first_child, *node;
       char *filename;
    
       if (argc < 2) {
         fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
         return 1;
       }
       filename = argv[1];
    
      document = xmlReadFile(filename, NULL, 0);
      root = xmlDocGetRootElement(document);
      fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
      first_child = root->children;
      for (node = first_child; node; node = node->next) {
         fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
      }
      fprintf(stdout, "...\n");
      return 0;
    }
    

    On an Unix machine, you typically compile the above with:

    % gcc -o read-xml $(xml2-config --cflags) -Wall $(xml2-config --libs) read-xml.c
    
    0 讨论(0)
  • 2021-02-01 11:30

    Is reading the contents of the file into a single, simple buffer really what you want to do? XML files are generally there to be parsed, and you can do this with a library like libxml2, just to give one example (but notably, is implemented in C).

    0 讨论(0)
  • 2021-02-01 11:34

    Here is a full program that reads in a whole XML file (really, any file), into a buffer. It includes about as much error-checking as would be useful.

    N.B. everything is done in main(). Turning it into a callable function is left as an exercise for the reader.

    (Tested, compiled with GCC 4.3.3. Switches were -Wall -W --pedantic --ansi.)

    Comments on this will be addressed in approximately eight hours.

    #include <stdio.h>
    #include <stdlib.h>
    
    
    int main (int argc, char *argv[]) {
     char   *buffer;        /* holds the file contents. */
     size_t  i;             /* indexing into buffer. */
     size_t  buffer_size;   /* size of the buffer. */
     char   *temp;          /* for realloc(). */
     char    c;             /* for reading from the input. */
     FILE   *input;         /* our input stream. */
    
    
     if (argc == 1) {
          fprintf(stderr, "Needs a filename argument.\n");
          exit(EXIT_FAILURE);
     }
     else if (argc > 2) {
          fprintf(stderr, "Well, you passed in a few filenames, but I'm only using %s\n", argv[1]);
     }
    
     if ((input = fopen(argv[1], "r")) == NULL) {
          fprintf(stderr, "Error opening input file %s\n", argv[1]);
          exit(EXIT_FAILURE);
     }
    
     /* Initial allocation of buffer */
     i = 0;
     buffer_size = BUFSIZ;
     if ((buffer = malloc(buffer_size)) == NULL) {
          fprintf(stderr, "Error allocating memory (before reading file).\n");
          fclose(input);
     }
    
     while ((c = fgetc(input)) != EOF) {
          /* Enlarge buffer if necessary. */
          if (i == buffer_size) {
           buffer_size += BUFSIZ;
           if ((temp = realloc(buffer, buffer_size)) == NULL) {
            fprintf(stderr, "Ran out of core while reading file.\n");
            fclose(input);
            free(buffer);
            exit(EXIT_FAILURE);
           }
           buffer = temp;
          }
    
          /* Add input char to the buffer. */
          buffer[i++] = c;
     }
    
     /* Test if loop terminated from error. */
     if (ferror(input)) {
          fprintf(stderr, "There was a file input error.\n");
          free(buffer);
          fclose(input);
          exit(EXIT_FAILURE);
     }
    
     /* Make the buffer a bona-fide string. */
     if (i == buffer_size) {
          buffer_size += 1;
          if ((temp = realloc(buffer, buffer_size)) == NULL) {
           fprintf(stderr, "Ran out of core (and only needed one more byte too ;_;).\n");
           fclose(input);
           free(buffer);
           exit(EXIT_FAILURE);
          }
          buffer = temp;
     }
     buffer[i] = '\0';
    
     puts(buffer);
    
     /* Clean up. */
     free(buffer);
     fclose(input);
    
     return 0;
    }
    
    0 讨论(0)
  • 2021-02-01 11:41
    1. Install libxml2 as a NuGet package in Visual studio(I am using Vs 2015 to test this)
    2. Copy and paste the contents under example XML file in a notepad and save the file as example.xml
    3. Copy and past the code under //xml parsing in to Vs
    4. Call the function from main with xml file name as an argument
    5. You will be getting the xml data in configReceive

    That's all...

    example XML file:

    <?xml version="1.0" encoding="utf-8"?>
        <config>
            <xmlConfig value1="This is a simple XML parsing program in C"/>
            <xmlConfig value2="Thank you : Banamali Mishra"/>
            <xmlConfig value3="2000000"/>
            <xmlConfig value4="80"/>
            <xmlConfig value5="10"/>
            <xmlConfig value6="1"/>
        </config>
    

    Here is the source code:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <libxml/xmlreader.h>
    #include <libxml/xmlmemory.h>
    #include <libxml/parser.h>
    
    char configReceive[6][80] = { " " };
    
    //xml parsing
    void ParsingXMLFile(char *filename) {
        char         *docname;
        xmlDocPtr    doc;
        xmlNodePtr   cur;
        xmlChar      *uri;
        char config[6][80] = { "value1", "value2", "value3", "value4", "value5", "value6" };
        int count = 0;
        int count1 = 0;
    
        docname = filename;
        doc = xmlParseFile(docname);
        cur = xmlDocGetRootElement(doc);
        cur = cur->xmlChildrenNode;
        while (cur != NULL) {
            if ((!xmlStrcmp(cur->name, (const xmlChar *)"xmlConfig"))) {
                uri = xmlGetProp(cur, (xmlChar *)config[count++]);
                strcpy(configReceive[count1++], (char *)uri);
                xmlFree(uri);
            }
            cur = cur->next;
        }
    
        count = 0;
        count1 = 0;
        xmlFreeDoc(doc);
    }
    
    0 讨论(0)
  • 2021-02-01 11:41

    Suggestion: Use memory mapping

    This has the potential to cut down on useless copying of the data. The trick is to ask the OS for what you want, instead of doing it. Here's an implementation I made earlier:

    mmap.h

    #ifndef MMAP_H
    #define MMAP_H
    
    #include <sys/types.h>
    
    struct region_t {
      void *head;
      off_t size;
    };
    
    #define OUT_OF_BOUNDS(reg, p) \
      (((void *)(p) < (reg)->head) || ((void *)(p) >= ((reg)->head)+(reg)->size))
    
    #define REG_SHOW(reg) \
      printf("h: %p, s: %ld (e: %p)\n", reg->head, reg->size, reg->head+reg->size);
    
    struct region_t *do_mmap(const char *fn);
    #endif
    

    mmap.c

    #include <stdlib.h>
    
    #include <sys/types.h>  /* open lseek             */
    #include <sys/stat.h>   /* open                   */
    #include <fcntl.h>      /* open                   */
    #include <unistd.h>     /*      lseek             */
    #include <sys/mman.h>   /*            mmap        */
    
    #include "mmap.h"
    
    struct region_t *do_mmap(const char *fn)
    {
      struct region_t *R = calloc(1, sizeof(struct region_t));
    
      if(R != NULL) {
        int fd;
    
        fd = open(fn, O_RDONLY);
        if(fd != -1) {
          R->size = lseek(fd, 0, SEEK_END);
          if(R->size != -1) {
            R->head = mmap(NULL, R->size, PROT_READ, MAP_PRIVATE, fd, 0);
            if(R->head) {
              close(fd); /* don't need file-destructor anymore. */
              return R;
            }
            /*                no clean up of borked (mmap,) */
          }
          close(fd);   /* clean up of borked (lseek, mmap,) */
        }
        free(R); /* clean up of borked (open, lseek, mmap,) */
      }
      return NULL;
    }
    
    0 讨论(0)
  • 2021-02-01 11:55

    Hopefully bug-free ISO-C code to read the contents of a file and add a '\0' char:

    #include <stdlib.h>
    #include <stdio.h>
    
    long fsize(FILE * file)
    {
        if(fseek(file, 0, SEEK_END))
            return -1;
    
        long size = ftell(file);
        if(size < 0)
            return -1;
    
        if(fseek(file, 0, SEEK_SET))
            return -1;
    
        return size;
    }
    
    size_t fget_contents(char ** str, const char * name, _Bool * error)
    {
        FILE * file = NULL;
        size_t read = 0;
        *str = NULL;
        if(error) *error = 1;
    
        do
        {
            file = fopen(name, "rb");
            if(!file) break;
    
            long size = fsize(file);
            if(size < 0) break;
    
            if(error) *error = 0;
    
            *str = malloc((size_t)size + 1);
            if(!*str) break;
    
            read = fread(*str, 1, (size_t)size, file);
            (*str)[read] = 0;
            *str = realloc(*str, read + 1);
    
            if(error) *error = (size != (long)read);
        }
        while(0);
    
        if(file) fclose(file);
        return read;
    }
    
    0 讨论(0)
提交回复
热议问题