diff --git a/tvdb.c b/tvdb.c index 7e68e56..5210bed 100644 --- a/tvdb.c +++ b/tvdb.c @@ -48,26 +48,32 @@ struct episode { void unescape(char *txt) { - char *p = txt; int l = strlen(txt); + char *p = txt; - while ((p = strchr(p, '&'))) + for (; p = strchr(p, '&'); p++) { - HANDLE(""", '"', 5); - HANDLE("&", '&', 4); - HANDLE(" ", '\n', 4); - HANDLE(" ", '\r', 4); - p++; + size_t ll; + unsigned char icode; + if (1 == sscanf( p, "&#%hhu;%n", &icode, &ll) || + 1 == sscanf( p, "&#%*[xX]%hhx;%n", &icode, &ll)) { + /* &#x;, &#; */ + HANDLE(p, (char)icode, ll-1); + } else { + HANDLE("&", '&', 4); + HANDLE(""", '"', 5); + HANDLE("'", '\'', 5); + HANDLE("<", '<', 3); + HANDLE(">", '>', 3); + } } - p = txt; - while ((p = memchr(p, '\xe2', l - (p - txt)))) - { + for (p = txt; p = memchr(p, '\xe2', l - (p - txt)); p++) + { /* curly apostrophe, en dash, curly quotes */ HANDLE("\xe2\x80\x99", '\'', 2); HANDLE("\xe2\x80\x93", '-', 2); HANDLE("\xe2\x80\x9c", '"', 2); HANDLE("\xe2\x80\x9d", '"', 2); - p++; } if ((p = strpbrk(txt, "\n\r")))