Simple XML parsing for Plan 9 and Linux.
git clone git://r-36.net/xmlpull
commit 53bc6429c1301eec1b676f7f60582a3f094e7f97
Author: Christoph Lohmann <20h@r-36.net>
Date:   Sat,  3 Mar 2018 15:37:07 +0100

Initial commit.

LICENSE | 676+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Makefile | 19+++++++++++++++++++
mkfile | 17+++++++++++++++++
mkfile.plan9port | 18++++++++++++++++++
mkfile.read | 15+++++++++++++++
mkfile.write | 15+++++++++++++++
test.html | 674+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
test.xml | 3+++
test_read.c | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
test_read.xml | 20++++++++++++++++++++
test_write.c | 156+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
test_write.xml | 8++++++++
xmlpull.c | 462+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
xmlpull.h | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
xmlpull.man | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
15 files changed, 2305 insertions(+), 0 deletions(-)

© 2004-2018 Christoph Lohmann <20h at r-36 dot net>

 GNU GENERAL PUBLIC LICENSE
 Version 3, 29 June 2007 diff --git a/test.html b/test.html
@@ -0,0 +1,674 @@ + while((b = nextxmlpull(ret)) != nil){ + switch(b->ev){ + case START_DOCUMENT: + print("Document started.\n"); + break; + case START_TAG: + print("Tag starts: %s\n", b->na); + break; + case START_END_TAG: + print("Startend tag: %s\n", b->na); + break; + case TEXT: + print("Text: %s\n", b->na); + break; + case ATTR: + print("Attr: %s=%s\n", b->na, (b->va == nil) ? "none" : b->va); + break; + case END_TAG: + print("Tag ends: %s\n", b->na); + break; + case END_DOCUMENT: + print("Document ends.\n"); + break; + default: + print("Undocumented: %x\n", b->ev); + break; + } + } + + freexmlpull(ret); + exits(0); +} + diff --git a/test_read.xml b/test_read.xml @@ -0,0 +1,19 @@ +<?xml wdnwodnw="wndownd"?> +<root a="wd wd" b='wdm>'> + <inner_1> + <inner_2> + wdnwodnw + </inner_2> + wdnwodnw + <![CDATA[ ]]> + <inner_3/> + ndoendiebfiebf + <!-- wdnwdnw --> + <inner_4 /> + <inner_5 awdnwdn="qwdnown"/> + </inner_1> +</root> +<root2> + <testing> + </testing> +</root2>+ \ No newline at end of file diff --git a/test_write.c b/test_write.c @@ -0,0 +1,156 @@ +/* + * Copy me if you can. + * by 20h + */ + +#ifndef PLAN9 +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#endif +#ifdef PLAN9 +#include <u.h> +#include <libc.h> +#endif +#include "xmlpull.h" + +int +main(int argc, char *argv[]) +{ + xmlpull *ret; + int fd; + + if(argc < 2){ + print("usage: filename\n"); + exits(0); + } + + fd = open(argv[1], +#ifndef PLAN9 + O_WRONLY); +#endif +#ifdef PLAN9 + OWRITE); +#endif + if(fd < 0){ + perror("open"); + exits(0); + } + + ret = openxmlpull(fd); + + ret->nev = START_DOCUMENT; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = START_TAG; + ret->na = "message"; + ret->ln = 7; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = ATTR; + ret->na = "to"; + ret->ln = 2; + ret->va = "user"; + ret->lv = 4; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = ATTR; + ret->na = "from"; + ret->ln = 4; + ret->va = "programmer"; + ret->lv = 10; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = END_TAG_N; + if(writexmlpull(ret) == nil) + goto end_main; + + if(write(ret->fd, "\n", 1) < 0) + goto end_main; + + ret->nev = START_TAG; + ret->na = "value"; + ret->ln = 5; + if(writexmlpull(ret) == nil) + goto end_main; + + if(write(ret->fd, "\n", 1) < 0) + goto end_main; + + ret->nev = END_TAG_N; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = START_TAG; + ret->na = "!--"; + ret->ln = 3; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = TEXT_C; + ret->na = "Here comes the message."; + ret->ln = 23; + if(writexmlpull(ret) == nil) + goto end_main; + + if(write(ret->fd, "\n", 1) < 0) + goto end_main; + + ret->nev = TEXT; + ret->na = "Thank you for using Libxmlpull.\n"; + ret->ln = 32; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = END_TAG; + ret->na = "value"; + ret->ln = 5; + if(writexmlpull(ret) == nil) + goto end_main; + + if(write(ret->fd, "\n", 1) < 0) + goto end_main; + + ret->nev = START_TAG; + ret->na = "attachments"; + ret->ln = 11; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = ATTR; + ret->na = "num"; + ret->ln = 3; + ret->va = "0"; + ret->lv = 1; + if(writexmlpull(ret) == nil) + goto end_main; + + ret->nev = END_TAG_S; + if(writexmlpull(ret) == nil) + goto end_main; + + if(write(ret->fd, "\n", 1) < 0) + goto end_main; + + ret->nev = END_TAG; + ret->na = "message"; + ret->ln = 7; + if(writexmlpull(ret) == nil) + goto end_main; + + if(write(ret->fd, "\n", 1) < 0) + goto end_main; + + ret->nev = END_DOCUMENT; + if(writexmlpull(ret) == nil) + goto end_main; + +end_main: + free(ret); + exits(0); +} + diff --git a/test_write.xml b/test_write.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<message to="user" from="programmer" > +<value +><!-- Here comes the message. --> +Thank you for using Libxmlpull. +</value> +<attachments num="0" /> +</message> diff --git a/xmlpull.c b/xmlpull.c @@ -0,0 +1,462 @@ +/* + * Copy me if you can. + * by 20h + */ + +#ifndef PLAN9 +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#endif +#ifdef PLAN9 +#include <u.h> +#include <libc.h> +#endif +#include "xmlpull.h" + +void * +reallocp(void *p, int s, short d) +{ + + p = realloc(p, s); + if(p == nil) { + perror("realloc"); + exits("realloc"); + } + + if(d != 0) + memset(p, 0, s); + + return (void *)p; +} + +void +freexmlpull(xmlpull *x) +{ + if(x != nil){ + if(x->na != nil) + free(x->na); + if(x->va != nil) + free(x->va); + free(x); + } + + return; +} + +xmlpull * +openxmlpull(int fd) +{ + xmlpull *ret; + + ret = reallocp(nil, sizeof(xmlpull), 2); + ret->na = nil; + ret->va = nil; + ret->lm = nil; + ret->ln = 0; + ret->lv = 0; + ret->la = 0; + ret->ev = START_DOCUMENT; + ret->nev = START_DOCUMENT; + ret->fd = fd; + + return ret; +} + +char +getchara(xmlpull *x) +{ + char g; + + if(read(x->fd, &g, 1) <= 0){ + x->ev = END_DOCUMENT; + return (char)0; + } + + return g; +} + +char * +addchara(char *b, int *l, char c) +{ + b = reallocp(b, ++(*l) + 1, 0); + b[(*l) - 1] = c; + b[*l] = '\0'; + + return b; +} + +char * +readuntilstr(xmlpull *x, char *str) +{ + char g, *u; + int p; + + u = reallocp(nil, strlen(str) + 1, 2); + p = 0; + + while((g = getchara(x)) != 0) { + u[p++] = g; + if(p < strlen(str)) + continue; + if(!strncmp(u, str, strlen(str))) { + free(u); + return x->na; + } + p--; + + x->na = addchara(x->na, &x->ln, u[0]); + memmove(u, u + 1, strlen(str) - 1); + } + free(u); + + return nil; +} + +char * +readuntil(xmlpull *x, char *b, int *l, char w, char t) +{ + char g; + + while((g = getchara(x)) != 0) { + //print("||%c>%c||", g, w); + if(g == w){ + b = addchara(b, l, '\0'); + return b; + } + + switch(g) { + case '/': + case '>': + if(t != 0) { + addchara(b, l, g); + return nil; + } + case '\t': + case '\r': + case '\n': + case ' ': + if(t != 0) + return b; + b = addchara(b, l, g); + break; + case '\\': + g = getchara(x); + //print("%c", g); + if(g == 0) + return nil; + b = addchara(b, l, g); + break; + default: + b = addchara(b, l, g); + break; + } + } + + return nil; +} + +char * +parseattrib(xmlpull *x) +{ + char g, *b; + + while((g = getchara(x)) != 0) { + //print("%c", g); + switch(g){ + case '\t': + case '\r': + case '\n': + case ' ': + continue; + case '/': + case '>': + x->na = addchara(x->na, &x->ln, g); + return nil; + default: + x->na = addchara(x->na, &x->ln, g); + g = (char)0; + } + if(g == (char)0) + break; + } + + if((b = readuntil(x, x->na, &x->ln, '=', 2)) == nil) + return nil; + x->na = b; + + if((g = getchara(x)) == 0) + return nil; + + //print("magic char: %c\n", g); + switch(g) { + case '"': + case '\'': + if((b = readuntil(x, x->va, &x->lv, g, 0)) == nil) + return nil; + x->va = b; + return x->va; + default: + if((b = readuntil(x, x->va, &x->lv, '>', 2)) == nil) + return nil; + x->va = b; + return x->na; + } + + return x->na; +} + +char * +readname(xmlpull *x) +{ + char g; + + while((g = getchara(x)) != 0){ + //print("%c", g); + switch(g){ + case '\n': + case '\t': + case '\r': + case ' ': + case '>': + case '/': + x->na = addchara(x->na, &x->ln, g); + return x->na; + default: + x->na = addchara(x->na, &x->ln, g); + } + } + + return nil; +} + +xmlpull * +nextxmlpull(xmlpull *x) +{ + char g; + + if(x->va != nil) + free(x->va); + + if(x->ev == START_TAG){ + if(x->lm != nil) + free(x->lm); + x->lm = x->na; + x->la = x->ln; + } else + if(x->na != nil) + free(x->na); + + x->na = nil; + x->va = nil; + x->ln = 0; + x->lv = 0; + g = '\0'; + + switch(x->nev){ + case START_DOCUMENT: + if((x->na = readuntil(x, x->na, &x->ln, '<', 0)) == nil) + x->nev = END_DOCUMENT; + else + x->nev = START_TAG; + x->ev = START_DOCUMENT; + break; + case START_TAG: + g = getchara(x); + //print("%c", g); + if(g == '/') + x->ev = END_TAG; + else { + x->na = addchara(x->na, &x->ln, g); + x->ev = START_TAG; + } + + if(readname(x) == nil) + x->nev = END_DOCUMENT; + else { + if(!strncmp(x->na, "![CDATA[", 8)) { + memmove(x->na, x->na + 8, strlen(x->na) - 8); + x->ln -= 8; + x->na = readuntilstr(x, "]]>"); + x->ev = TEXT; + x->nev = TEXT; + return x; + } + if(!strncmp(x->na, "!--", 3)) { + x->na[x->ln - 1] = '\0'; + x->nev = TEXT_C; + return x; + } + if(x->ev == END_TAG){ + x->na[x->ln - 1] = '\0'; + x->nev = TEXT; + } else { + switch(x->na[x->ln - 1]){ + case '/': + getchara(x); + x->ev = START_END_TAG; + x->nev = TEXT; + x->na[x->ln - 1] = '\0'; + break; + case '>': + x->nev = TEXT; + x->na[x->ln - 1] = '\0'; + break; + default: + x->na[x->ln - 1] = '\0'; + x->nev = ATTR; + } + } + } + break; + case TEXT_C: + g = '>'; + case TEXT: + if(g != '>') + g = '<'; + + if((x->na = readuntil(x, x->na, &x->ln, g, 0)) == nil){ + x->ev = END_DOCUMENT; + x->nev = END_DOCUMENT + 1; + } else { + if(x->nev == TEXT_C) + x->nev = TEXT; + else + x->nev = START_TAG; + x->ev = TEXT; + } + break; + case ATTR: + if(parseattrib(x) == nil){ + //print("%c\n", x->na[x->ln - 1]); + switch(x->na[x->ln - 1]){ + case '/': + free(x->na); + x->na = x->lm; + x->ln = x->la; + x->lm = nil; + x->la = 0; + + getchara(x); + x->ev = END_TAG; + x->nev = TEXT; + return x; + case '>': + default: + x->na[x->ln - 1] = '\0'; + } + x->ev = ATTR; + x->nev = TEXT; + return nextxmlpull(x); + } else + x->nev = ATTR; + x->ev = ATTR; + break; + case END_DOCUMENT: + x->ev = END_DOCUMENT; + x->nev = END_DOCUMENT + 1; + break; + default: + return nil; + } + + return x; +} + +xmlpull * +writexmlpull(xmlpull *x) +{ + char *b; + + b = nil; + + switch(x->nev){ + case START_DOCUMENT: + if(write(x->fd, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39) < 0) + return nil; + return x; + case START_TAG: + if(x->na == nil) + return nil; + + b = reallocp(b, x->ln + 3, 2); + snprint(b, x->ln + 3, "<%s ", x->na); + if(write(x->fd, b, strlen(b)) < 0){ + free(b); + return nil; + } + free(b); + return x; + case START_END_TAG: + if(x->na == nil) + return nil; + + b = reallocp(b, x->ln + 4, 2); + snprint(b, x->ln + 4, "<%s/>", x->na); + if(write(x->fd, b, strlen(b)) < 0){ + free(b); + return nil; + } + free(b); + return x; + case TEXT: + if(x->na == nil) + return nil; + if(write(x->fd, x->na, x->ln) < 0) + return nil; + return x; + case TEXT_C: + if(x->na == nil) + return nil; + + b = reallocp(b, x->ln + 5, 2); + snprint(b, x->ln + 5, "%s -->", x->na); + if(write(x->fd, b, strlen(b)) < 0){ + free(b); + return nil; + } + free(b); + return x; + case ATTR: + if(x->na == nil) + return nil; + + b = reallocp(b, x->ln + x->lv + 5, 2); + snprint(b, x->ln + x->lv + 5, "%s=\"%s\" ", x->na, (x->va == nil) ? "" : x->va); + if(write(x->fd, b, strlen(b)) < 0){ + free(b); + return nil; + } + free(b); + return x; + case END_TAG: + if(x->na == nil) + return nil; + + b = reallocp(b, x->ln + 4, 2); + snprint(b, x->ln + 4, "</%s>", x->na); + if(write(x->fd, b, strlen(b)) < 0){ + free(b); + return nil; + } + free(b); + return x; + case END_TAG_S: + if(write(x->fd, "/>", 2) < 0) + return nil; + return x; + case END_TAG_N: + if(write(x->fd, ">", 1) < 0) + return nil; + return x; + case END_DOCUMENT: + close(x->fd); + return nil; + default: + break; + } + + return nil; +} + diff --git a/xmlpull.h b/xmlpull.h @@ -0,0 +1,51 @@ +/* + * Copy me if you can. + * by 20h + */ + +#ifdef nil +#pragma lib "libxmlpull.a" +#endif + +#ifndef XMLPULL_H +#define XMLPULL_H + +#ifndef nil +#define nil NULL +#define print printf +#define snprint snprintf +#define exits return +#endif + +enum { + START_DOCUMENT = 0x0, + START_TAG, + START_END_TAG, + TEXT, + TEXT_C, + ATTR, + END_TAG, + END_TAG_S, + END_TAG_N, + END_DOCUMENT, +}; + +typedef struct xmlpull xmlpull; +struct xmlpull { + int fd; + char ev; + char nev; + char *lm; + char *na; + char *va; + int la; + int lv; + int ln; +}; + +void freexmlpull(xmlpull *x); +xmlpull *openxmlpull(int fd); +xmlpull *nextxmlpull(xmlpull *x); +xmlpull *writexmlpull(xmlpull *x); + +#endif diff --git a/xmlpull.man b/xmlpull.man @@ -0,0 +1,98 @@ +.TH XMLPULL 2 +.SH NAME +open_xmlpull, +next_xmlpull, +free_xmlpull \- XML-Pull parser functions +.SH SYNOPSIS +.PP +.EX +.ta 4n +4n +4n +4n +#include <u.h> +#include <xmlpull.h> +.sp +enum { + START_DOCUMENT = 0x0, + START_TAG, + START_END_TAG, + TEXT, + TEXT_C, + ATTR, + END_TAG, + END_TAG_S, + END_TAG_N, + END_DOCUMENT, +}; +.sp +.ta \w' 'u +\w'xmlpull 'u +typedef struct xmlpull xmlpull; +struct xmlpull { + int fd; + char ev; + char nev; + char *lm; + char *na; + char *va; + int la; + int lv; + int ln; +}; +.fi +.de XX +.ift .sp 0.5 +.ifn .sp +.. +.PP +.nf +.ft L +.ta \w'\fLxmlpull* 'u +4n +4n +4n +4n +void freexmlpull(xmlpull *x); +xmlpull *openxmlpull(int fd); +xmlpull *nextxmlpull(xmlpull *x); +xmlpull *writexmlpull(xmlpull *x); +.EE +.SH DESCRIPTION +.PP +Libxmlpull is a library for parsing and writing XML-files. The parsing is +done in the way described on http://www.xmlpull.org, in a more simple way. +On the other side the writing does do XML-pull in the oder direction. +.PP +A parsing session is started by calling +.IR openxmlpull +, with a filedescriptor from which is read until it is manually closed. +It returns the structure xmlpull, holding the state of the actual +session. +.PP +Next the function +.IR nextxmlpull +is constantly called and gives back the +next event found in the read data. +.IR nextxmlpull +returns the changed xmlpull structure with the actual +read event. +.PP +The actual event is saved in xmlpull->ev, the name of the element or +attribute in xmlpull->na and its length in xmlpull->ln. When the event +has the value ATTR, xmlpull->va holds the data of the value of the +attribute and xmlpull->lv its length. The events START_DOCUMENT and +END_DOCUMENT do not have any values. END_TAG_S and END_TAG_N are only +used by +.IR write_elem . +.PP +Writing is done by opening a session with +.IR openxmlpull +and then doing a +.IR writexmlpull +in the other direction like +.IR nextxmlpull . +.PP +When the session is finished, all allocated memory can be freed by calling +.IR freexmlpull . +.PP +.SH FILES +.PP +.B test_read.c (mkfile.read) +example for reading an XML-file.. +.B test_write.c (mkfile.write) +example for writing an XML-file. +.SH SOURCE +.B http://www.r-36.net/xmlpull.tgz