Add more decoding of XML character entities

This commit is contained in:
df 2021-01-14 23:55:08 +00:00
parent 5e697dc42c
commit 68c33866cd
1 changed files with 17 additions and 11 deletions

28
tvdb.c
View File

@ -48,26 +48,32 @@ struct episode {
void
unescape(char *txt)
{
char *p = txt;
int l = strlen(txt);
char *p = txt;
while ((p = strchr(p, '&')))
for (; p = strchr(p, '&'); p++)
{
HANDLE(""", '"', 5);
HANDLE("&", '&', 4);
HANDLE("
", '\n', 4);
HANDLE("
", '\r', 4);
p++;
size_t ll;
unsigned char icode;
if (1 == sscanf( p, "&#%hhu;%n", &icode, &ll) ||
1 == sscanf( p, "&#%*[xX]%hhx;%n", &icode, &ll)) {
/* &#x<hex>;, &#<decimal>; */
HANDLE(p, (char)icode, ll-1);
} else {
HANDLE("&amp;", '&', 4);
HANDLE("&quot;", '"', 5);
HANDLE("&apos;", '\'', 5);
HANDLE("&lt;", '<', 3);
HANDLE("&gt;", '>', 3);
}
}
p = txt;
while ((p = memchr(p, '\xe2', l - (p - txt))))
{
for (p = txt; p = memchr(p, '\xe2', l - (p - txt)); p++)
{ /* curly apostrophe, en dash, curly quotes */
HANDLE("\xe2\x80\x99", '\'', 2);
HANDLE("\xe2\x80\x93", '-', 2);
HANDLE("\xe2\x80\x9c", '"', 2);
HANDLE("\xe2\x80\x9d", '"', 2);
p++;
}
if ((p = strpbrk(txt, "\n\r")))