Two examples with expat and libxml2. The second one is, IMHO, much easier to use since it creates a tree in memory, a data structure which is easy to work with. expat, on the other hand, does not build anything (you have to do it yourself), it just allows you to call handlers at specific events during the parsing. But expat may be faster (I didn't measure).

With expat, reading a XML file and displaying the elements indented:

/* 
   A simple test program to parse XML documents with expat
   <http://expat.sourceforge.net/>. It just displays the element
   names.

   On Debian, compile with:

   gcc -Wall -o expat-test -lexpat expat-test.c  

   Inspired from <http://www.xml.com/pub/a/1999/09/expat/index.html> 
*/

#include <expat.h>
#include <stdio.h>
#include <string.h>

/* Keep track of the current level in the XML tree */
int             Depth;

#define MAXCHARS 1000000

void
start(void *data, const char *el, const char **attr)
{
    int             i;

    for (i = 0; i < Depth; i++)
        printf("  ");

    printf("%s", el);

    for (i = 0; attr[i]; i += 2) {
        printf(" %s='%s'", attr[i], attr[i + 1]);
    }

    printf("\n");
    Depth++;
}               /* End of start handler */

void
end(void *data, const char *el)
{
    Depth--;
}               /* End of end handler */

int
main(int argc, char **argv)
{

    char           *filename;
    FILE           *f;
    size_t          size;
    char           *xmltext;
    XML_Parser      parser;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s filename\n", argv[0]);
        return (1);
    }
    filename = argv[1];
    parser = XML_ParserCreate(NULL);
    if (parser == NULL) {
        fprintf(stderr, "Parser not created\n");
        return (1);
    }
    /* Tell expat to use functions start() and end() each times it encounters
     * the start or end of an element. */
    XML_SetElementHandler(parser, start, end);
    f = fopen(filename, "r");
    xmltext = malloc(MAXCHARS);
    /* Slurp the XML file in the buffer xmltext */
    size = fread(xmltext, sizeof(char), MAXCHARS, f);
    if (XML_Parse(parser, xmltext, strlen(xmltext), XML_TRUE) ==
        XML_STATUS_ERROR) {
        fprintf(stderr,
            "Cannot parse %s, file may be too large or not well-formed XML\n",
            filename);
        return (1);
    }
    fclose(f);
    XML_ParserFree(parser);
    fprintf(stdout, "Successfully parsed %i characters in file %s\n", size,
        filename);
    return (0);
}

With libxml2, a program which displays the name of the root element and the names of its children:

/*
   Simple test with libxml2 <http://xmlsoft.org>. It displays the name
   of the root element and the names of all its children (not
   descendents, just children).

   On Debian, compiles with:
   gcc -Wall -o read-xml2 $(xml2-config --cflags) $(xml2-config --libs) \
                    read-xml2.c -lxml2 

*/

#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>

int
main(int argc, char **argv)
{
    xmlDoc         *document;
    xmlNode        *root, *first_child, *node;
    char           *filename;

    if (argc < 2) {
        fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
        return 1;
    }
    filename = argv[1];

    document = xmlReadFile(filename, NULL, 0);
    root = xmlDocGetRootElement(document);
    fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
    first_child = root->children;
    for (node = first_child; node; node = node->next) {
        fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
    }
    fprintf(stdout, "...\n");
    return 0;
}
Answer from bortzmeyer on Stack Overflow
Top answer
1 of 10
79

Two examples with expat and libxml2. The second one is, IMHO, much easier to use since it creates a tree in memory, a data structure which is easy to work with. expat, on the other hand, does not build anything (you have to do it yourself), it just allows you to call handlers at specific events during the parsing. But expat may be faster (I didn't measure).

With expat, reading a XML file and displaying the elements indented:

/* 
   A simple test program to parse XML documents with expat
   <http://expat.sourceforge.net/>. It just displays the element
   names.

   On Debian, compile with:

   gcc -Wall -o expat-test -lexpat expat-test.c  

   Inspired from <http://www.xml.com/pub/a/1999/09/expat/index.html> 
*/

#include <expat.h>
#include <stdio.h>
#include <string.h>

/* Keep track of the current level in the XML tree */
int             Depth;

#define MAXCHARS 1000000

void
start(void *data, const char *el, const char **attr)
{
    int             i;

    for (i = 0; i < Depth; i++)
        printf("  ");

    printf("%s", el);

    for (i = 0; attr[i]; i += 2) {
        printf(" %s='%s'", attr[i], attr[i + 1]);
    }

    printf("\n");
    Depth++;
}               /* End of start handler */

void
end(void *data, const char *el)
{
    Depth--;
}               /* End of end handler */

int
main(int argc, char **argv)
{

    char           *filename;
    FILE           *f;
    size_t          size;
    char           *xmltext;
    XML_Parser      parser;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s filename\n", argv[0]);
        return (1);
    }
    filename = argv[1];
    parser = XML_ParserCreate(NULL);
    if (parser == NULL) {
        fprintf(stderr, "Parser not created\n");
        return (1);
    }
    /* Tell expat to use functions start() and end() each times it encounters
     * the start or end of an element. */
    XML_SetElementHandler(parser, start, end);
    f = fopen(filename, "r");
    xmltext = malloc(MAXCHARS);
    /* Slurp the XML file in the buffer xmltext */
    size = fread(xmltext, sizeof(char), MAXCHARS, f);
    if (XML_Parse(parser, xmltext, strlen(xmltext), XML_TRUE) ==
        XML_STATUS_ERROR) {
        fprintf(stderr,
            "Cannot parse %s, file may be too large or not well-formed XML\n",
            filename);
        return (1);
    }
    fclose(f);
    XML_ParserFree(parser);
    fprintf(stdout, "Successfully parsed %i characters in file %s\n", size,
        filename);
    return (0);
}

With libxml2, a program which displays the name of the root element and the names of its children:

/*
   Simple test with libxml2 <http://xmlsoft.org>. It displays the name
   of the root element and the names of all its children (not
   descendents, just children).

   On Debian, compiles with:
   gcc -Wall -o read-xml2 $(xml2-config --cflags) $(xml2-config --libs) \
                    read-xml2.c -lxml2 

*/

#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>

int
main(int argc, char **argv)
{
    xmlDoc         *document;
    xmlNode        *root, *first_child, *node;
    char           *filename;

    if (argc < 2) {
        fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
        return 1;
    }
    filename = argv[1];

    document = xmlReadFile(filename, NULL, 0);
    root = xmlDocGetRootElement(document);
    fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
    first_child = root->children;
    for (node = first_child; node; node = node->next) {
        fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
    }
    fprintf(stdout, "...\n");
    return 0;
}
2 of 10
45

How about one written in pure assembler :-) Don't forget to check out the benchmarks.

🌐
GitHub
github.com › ooxi › xml.c
GitHub - ooxi/xml.c: Simple XML subset parser comparable to glib's Markup parser, but without any dependencies in one self contained file. · GitHub
Similar to the GLib Markup parser, which also just parses an xml subset, xml.c is a simple, small and self contained xml parser in one file. Ideal for embedding into other projects without the need for big external dependencies.
Starred by 216 users
Forked by 74 users
Languages   C 78.9% | C++ 15.0% | CMake 5.2% | Shell 0.9%
Discussions

A simple C XML parser - Stack Overflow
A lot of the complexity of an XML parser is because it has to parse any generalized XML into a specific internal model. 2010-05-17T20:29:06.08Z+00:00 ... Don’t. Never ever try and write your own XML parser and never ever try and parse XML with a string splitter. You will have encoding and escaping issues. You will have interoperability issues. You will have security issues. Use one of the proven, tested and established libraries ... More on stackoverflow.com
🌐 stackoverflow.com
GNU C library for parsing of XML files
Hi all, Is there a known GNU C library available for parsing XML files for particular attributes ? More on unix.com
🌐 unix.com
0
0
June 14, 2008
Any C++ XML Parsers that are...C++?
I support libraries professionally, They are all C interfaces because the C ABI is portable. Everything can talk to it, EVEN JAVA. Nothing is ABI compatible with a C++ library except MAYBE another C++ project that was built with the same compiler and settings as the library. MAYBE. The C++ ABI isn't portable - even if you were willing to accept that only other C++ projects could possibly hope to use your library, you will still suffer from type erasure problems and dubious exception handling. That said, why don't library writers provide a C++ wrapper around their C interfaces? Node did it with their N-API, for some reason... Well, why don't they provide a wrapper for every other language while they're at it? Where's my Golang wrapper? You mean I have to use the foreign function interface like a plebeian? If you're going to use a shared C++ library object - it can be done; and too many people do it, but it is my professional recommendation you absolutely don't. The only C++ libraries I recommend are those you compile and statically link into your project. Header-only libraries have their own host of problems, and I don't recommend those, either. With that, check GitHub to see if anyone has written a wrapper library for your XML library of choice, and compile that into your program. More on reddit.com
🌐 r/cpp_questions
11
3
December 2, 2019
Fast Haskell: Competing with C at parsing XML

Tada! We matched Hexml, in pure Haskell, using safe accessor functions. We provided a SAX API which is very fast, and a simple demonstration DOM parser with a familiar API which is also quite fast.

No. You didn't match it. As pointed out in many comment, you compare Hexml DOM API to your SAX API.

Even if you replied that "It also talk about implementing DOM API later in another section", that is intentional misleading.

It seems like you intentionally drop the distinction between SAX and DOM. Claiming that your library matched another library. How could it matches if it i not even the same DOM/SAX interface?

Then tack along DOM API in the end of the article, mumbling that "I could do better if I wanted to, but this is good enough", as if to mislead reader into thinking that DOM API is just a bonus. When, in fact, that's the whole point of the API you are comparing to.

More on reddit.com
🌐 r/programming
109
309
November 15, 2014
🌐
Expat
libexpat.github.io
Welcome to Expat! · Expat XML parser
Welcome to Expat, a stream-oriented XML parser library written in C.
🌐
Apache
xerces.apache.org › xerces-c
Xerces-C++ XML Parser
Xerces-C++ is a validating XML parser written in a portable subset of C++. Xerces-C++ makes it easy to give your application the ability to read and write XML data. A shared library is provided for parsing, generating, manipulating, and validating XML documents using the DOM, SAX, and SAX2 APIs.
🌐
GitHub
github.com › capmar › sxml
GitHub - capmar/sxml: Small XML parser in C
Small XML parser in C. Contribute to capmar/sxml development by creating an account on GitHub.
Starred by 91 users
Forked by 11 users
Languages   C 100.0% | C 100.0%
🌐
Free
lars.ruoff.free.fr › xmlcpp
Free C/C++ XML Parser Libraries
February 15, 2012 - XmlLite features a simple "pull" programming model with a stream-oriented XmlReader class. It has support for a large set of common character encodings but only limited support for DTD. As usual for a Microsoft product, there are lots of high quality documentation and articles. ... It is a DOM-style in-situ parser written in modern C++, which tries to be as fast as possible. Claims to be a "seriously fast and small parser, [with] hassle-free integration". Entire library is contained in a single header file, and requires no building or configuration.
Find elsewhere
🌐
SourceForge
sxmlc.sourceforge.net
SXMLC - Simple XML C parser
This is a Simple XML parser written in C. "Simple" means that it does not implement all XML specifications, only the most widely used ones.
🌐
GitHub
github.com › ziord › cxml
GitHub - ziord/cxml: C XML Minimalistic Library (CXML) - An XML library for C with a focus on simplicity and ease of use. · GitHub
C XML Minimalistic Library (CXML) - An XML library for C with a focus on simplicity and ease of use. - ziord/cxml
Starred by 50 users
Forked by 3 users
Languages   C 99.3% | CMake 0.7%
🌐
SourceForge
sourceforge.net › home › open source software › software development › xml parsers
Best Open Source Windows XML Parsers 2026
PLEASE NOTE that we are in the process of moving to GitHub: https://github.com/libexpat/libexpat This is James Clark's Expat XML parser library in C. It is a stream oriented parser that requires setting handlers to deal with the structure that the parser discovers in the document.
🌐
W3C
dev.w3.org › XInclude-Test-Suite › libxml2-2.4.24 › libxml2-2.4.24 › doc › xml.html
The XML C library for Gnome - w3.org - W3C
This document describes libxml, the XML C library developed for the Gnome project. XML is a standard for building tag-based structured documents/data. ... Libxml exports Push (progressive) and Pull (blocking) type parser interfaces for both XML and HTML.
🌐
SourceForge
ezxml.sourceforge.net
ezXML
ezXML is a C library for parsing XML documents inspired by simpleXML for PHP. As the name implies, it's easy to use. It's ideal for parsing XML configuration files or REST web service responses. It's also fast and lightweight (less than 20k compiled).
🌐
SourceForge
xmlparselib.sourceforge.net
Standard XML Parsing Library
XML-Parse library is a lightweight set of re-usable functions for general purpose parsing, checking, and creating xml files. It can support stream-oriented, SAX or DOM parsing styles, and includes an optional xsd schema validator and graphical schema generator.
🌐
The Free Country
thefreecountry.com › sourcecode › xml.shtml
Free XML Parser/Generator Libraries | thefreecountry.com
August 20, 2021 - XMLIO is a high-level C++ library with base classes and functions for reading and writing XML files. It is licensed under the GNU LGPL. (Note that this XMLIO is different from the other project of the same name listed below.) ... Sxmlc, which probably stands for Simple XML parser written in ...
🌐
Microsoft Learn
learn.microsoft.com › en-us › archive › msdn-magazine › 2007 › april › xmllite-a-small-and-fast-xml-parser-for-native-c
XmlLite: A Small And Fast XML Parser For Native C++ | Microsoft Learn
XmlLite provides a powerful XML parser for your native C++ applications. It emphasizes performance, is aware of the system resources it uses, and supports a great deal of flexibility in controlling these characteristics.
🌐
Unix.com
unix.com › applications › programming
GNU C library for parsing of XML files - Programming - Unix Linux Community
June 14, 2008 - Hi all, Is there a known GNU C library available for parsing XML files for particular attributes ?
🌐
ScrapingAnt
scrapingant.com › blog › c-plus-plus-parse-xml
How to Parse XML in C++ | ScrapingAnt
August 5, 2024 - This guide will explore popular XML parsing libraries for C++, including Xerces-C++, RapidXML, PugiXML, TinyXML, and libxml++, and provide insights into different parsing techniques such as top-down and bottom-up parsing.
🌐
Oracle
docs.oracle.com › database › 121 › ADXDK › adx_c_parser.htm
20 Using the XML Parser for C
This chapter explains how to use the Extensible Markup Language (XML) parser for C.
🌐
GitHub
github.com › topics › xml-parser-in-c
xml-parser-in-c · GitHub Topics · GitHub
October 1, 2022 - A simple and tiny validating XML parser library in C.
🌐
HeyCoach Blog
heycoach.in › blog › handling-xml-parsing-with-c-libraries
Handling XML Parsing With C Libraries
December 29, 2024 - Here are some popular C libraries for XML parsing: Let’s roll up our sleeves and get our hands dirty with some code! We’ll use libxml2 because it’s like the Swiss Army knife of XML parsing in C. Here’s how to get started: #include <stdio.h> #include <stdlib.h> #include <libxml/parser.h> #include <libxml/tree.h> void parseXML(const char *filename) { xmlDoc *document = xmlReadFile(filename, NULL, 0); if (document == NULL) { fprintf(stderr, "Could not parse XML file: %s\n", filename); return; } xmlNode *root = xmlDocGetRootElement(document); printf("Root element: %s\n", root->name); xmlFreeDoc(document); } int main() { parseXML("example.xml"); return 0; }