Add more decoding of XML character entities

This commit is contained in:
df 2021-01-14 23:55:08 +00:00
parent 5e697dc42c
commit 68c33866cd
1 changed files with 17 additions and 11 deletions

28
tvdb.c
View File

@ -48,26 +48,32 @@ struct episode {
void void
unescape(char *txt) unescape(char *txt)
{ {
char *p = txt;
int l = strlen(txt); int l = strlen(txt);
char *p = txt;
while ((p = strchr(p, '&'))) for (; p = strchr(p, '&'); p++)
{ {
HANDLE(""", '"', 5); size_t ll;
HANDLE("&", '&', 4); unsigned char icode;
HANDLE("
", '\n', 4); if (1 == sscanf( p, "&#%hhu;%n", &icode, &ll) ||
HANDLE("
", '\r', 4); 1 == sscanf( p, "&#%*[xX]%hhx;%n", &icode, &ll)) {
p++; /* &#x<hex>;, &#<decimal>; */
HANDLE(p, (char)icode, ll-1);
} else {
HANDLE("&amp;", '&', 4);
HANDLE("&quot;", '"', 5);
HANDLE("&apos;", '\'', 5);
HANDLE("&lt;", '<', 3);
HANDLE("&gt;", '>', 3);
}
} }
p = txt; for (p = txt; p = memchr(p, '\xe2', l - (p - txt)); p++)
while ((p = memchr(p, '\xe2', l - (p - txt)))) { /* curly apostrophe, en dash, curly quotes */
{
HANDLE("\xe2\x80\x99", '\'', 2); HANDLE("\xe2\x80\x99", '\'', 2);
HANDLE("\xe2\x80\x93", '-', 2); HANDLE("\xe2\x80\x93", '-', 2);
HANDLE("\xe2\x80\x9c", '"', 2); HANDLE("\xe2\x80\x9c", '"', 2);
HANDLE("\xe2\x80\x9d", '"', 2); HANDLE("\xe2\x80\x9d", '"', 2);
p++;
} }
if ((p = strpbrk(txt, "\n\r"))) if ((p = strpbrk(txt, "\n\r")))