#include #include "charset.h" static int add_unicode(char **d, size_t *len, uint16_t u) { if (u < 0x80) { // 1 byte if (*len < 1) return 0; (*d)[0] = u; (*d)++, (*len)--; return 1; } if (u < 0x800) { // 2 bytes if (*len < 2) return 0; (*d)[0] = 0xc0 | ((u >> 6) & 0x1f); (*d)[1] = 0x80 | (u & 0x3f); *d += 2, *len -= 2; return 1; } #if 0 if (u < 0x10000) #endif { // 3 bytes if (*len < 3) return 0; (*d)[0] = 0xe0 | ((u >> 12) & 0xf); (*d)[1] = 0x80 | ((u >> 6) & 0x3f); (*d)[2] = 0x80 | (u & 0x3f); *d += 3, *len -= 3; return 1; } // 4 byte code point not supported. return 0; } static int is_utf_sequence(char *s) { int utf8_len; int i; if ((*s & 0xe0) == 0xc0) // U+0080+ 110xxxxx utf8_len = 1; else if ((*s & 0xf0) == 0xe0) // U+0800+ 1110xxxx utf8_len = 2; else if ((*s & 0xf8) == 0xf0) // U+10000+ 11110xxx utf8_len = 3; else return 0; for (i = 1; i <= utf8_len; i++) { // UTF-8 continuation character 10xxxxxx if ((s[i] & 0xc0) != 0x80) // Invalid UTF-8 sequence return 0; } return 1; } size_t xconv(char *src, char *dst, size_t dstlen) { size_t len = dstlen - 1; char *s, *d; int changed = 0; int i; for (s = src, d = dst; *s && len > 0; s++) { if (!(*s & 0x80)) { *d++ = *s; len--; continue; } if (!changed && is_utf_sequence(s)) return 0; changed = 1; // Check for combined character. if ((*s & 0xf0) == 0xc0 && s[1]) { int k = *s & 0xf; icc_t *p = iso6937_combined[k]; for (i = 0; p[i].c; i++) { if (p[i].c == (s[1] & 0xff)) { add_unicode(&d, &len, p[i].u); break; } } s++; continue; } add_unicode(&d, &len, iso6937_map[*s & 0xff]); } if (!changed) return 0; *d = '\0'; return dstlen - len - 1; }