libxconv/xconv.c

111 lines
1.7 KiB
C

#include "charset.h"
static int
add_unicode(char **d, size_t *len, uint16_t u)
{
if (u < 0x80)
{
// 1 byte
if (*len < 1) return 0;
(*d)[0] = (char)u;
++(*d), --(*len);
return 1;
}
if (u < 0x800)
{
// 2 bytes
if (*len < 2) return 0;
(*d)[0] = 0xc0 | ((u >> 6) & 0x1f);
(*d)[1] = 0x80 | (u & 0x3f);
*d += 2, *len -= 2;
return 1;
}
#if 0
if (u < 0x10000)
#endif
{
// 3 bytes
if (*len < 3) return 0;
(*d)[0] = 0xe0 | ((u >> 12) & 0xf);
(*d)[1] = 0x80 | ((u >> 6) & 0x3f);
(*d)[2] = 0x80 | (u & 0x3f);
*d += 3, *len -= 3;
return 1;
}
// 4 byte code point not supported.
return 0;
}
static int
is_utf_sequence(char *s)
{
size_t utf8_len, i;
if ((*s & 0xe0) == 0xc0) // U+0080+ 110xxxxx
utf8_len = 1;
else if ((*s & 0xf0) == 0xe0) // U+0800+ 1110xxxx
utf8_len = 2;
else if ((*s & 0xf8) == 0xf0) // U+10000+ 11110xxx
utf8_len = 3;
else return 0;
for (i = 1; i <= utf8_len; ++i)
{
// UTF-8 continuation character 10xxxxxx
if ((s[i] & 0xc0) != 0x80)
// Invalid UTF-8 sequence
return 0;
}
return 1;
}
size_t
xconv(char *src, char *dst, size_t dstlen)
{
size_t i, len = dstlen - 1;
char *s, *d;
int changed = 0;
for (s = src, d = dst; *s && len > 0; ++s)
{
if (!(*s & 0x80))
{
*(d++) = *s;
--len;
continue;
}
if (!changed && is_utf_sequence(s))
return 0;
changed = 1;
// Check for combined character.
if ((*s & 0xf0) == 0xc0)
{
if (s[1])
{
icc_t *p = iso6937_combined[*s & 0xf];
for (i = 0; p[i].c; ++i)
if (p[i].c == (s[1] & 0xff))
{
add_unicode(&d, &len, p[i].u);
break;
}
++s;
}
}
else
add_unicode(&d, &len, iso6937_map[*s & 0xff]);
}
if (!changed)
return 0;
*d = '\0';
return dstlen - len - 1;
}