Compare commits

...

4 Commits
master ... test

Author SHA1 Message Date
df 219f77f965 Add executable for test 2021-01-15 19:50:18 +00:00
df cf86aaebca Ensure ; immediately follows the code in &#<hex/dec>; 2021-01-15 00:58:18 +00:00
df 5aeab9da9d Build without warnings on-box 2021-01-15 00:31:20 +00:00
df 68c33866cd Add more decoding of XML character entities 2021-01-14 23:55:08 +00:00
3 changed files with 19 additions and 11 deletions

View File

@ -9,7 +9,7 @@ HDRS=
OBJS= $(SRCS:.c=.o)
CC=gcc
#CC=mipsel-linux-gcc
CFLAGS=-g
CFLAGS=-g -std=c99 -D_XOPEN_SOURCE=700
INCS=
LIBS=-lsqlite3
WARN=-pedantic -Wall -W -Wnested-externs -Wpointer-arith -Wno-long-long

BIN
tvdb Executable file

Binary file not shown.

28
tvdb.c
View File

@ -51,23 +51,31 @@ unescape(char *txt)
char *p = txt;
int l = strlen(txt);
while ((p = strchr(p, '&')))
for (; (p = strchr(p, '&')); p++)
{
HANDLE("&quot;", '"', 5);
HANDLE("&amp;", '&', 4);
HANDLE("&#xD;", '\n', 4);
HANDLE("&#xA;", '\r', 4);
p++;
int ll = 0;
unsigned char icode;
/* sscanf -> 1: the code was read; ll>0: ';' came next */
if ((1 == sscanf( p, "&#%hhu;%n", &icode, &ll) ||
1 == sscanf( p, "&#%*[xX]%hhx;%n", &icode, &ll)) &&
ll > 0) {
/* &#x<hex>;, &#<decimal>; */
HANDLE(p, (char)icode, ll-1);
} else {
HANDLE("&amp;", '&', 4);
HANDLE("&quot;", '"', 5);
HANDLE("&apos;", '\'', 5);
HANDLE("&lt;", '<', 3);
HANDLE("&gt;", '>', 3);
}
}
p = txt;
while ((p = memchr(p, '\xe2', l - (p - txt))))
{
for (p = txt; (p = memchr(p, '\xe2', l - (p - txt))); p++)
{ /* curly apostrophe, en dash, curly quotes */
HANDLE("\xe2\x80\x99", '\'', 2);
HANDLE("\xe2\x80\x93", '-', 2);
HANDLE("\xe2\x80\x9c", '"', 2);
HANDLE("\xe2\x80\x9d", '"', 2);
p++;
}
if ((p = strpbrk(txt, "\n\r")))