libxconv/xconv.c

115 lines
1.8 KiB
C

#include <stdio.h>
#include "charset.h"
static int
add_unicode(char **d, size_t *len, uint16_t u)
{
if (u < 0x80)
{
// 1 byte
if (*len < 1) return 0;
(*d)[0] = u;
(*d)++, (*len)--;
return 1;
}
if (u < 0x800)
{
// 2 bytes
if (*len < 2) return 0;
(*d)[0] = 0xc0 | ((u >> 6) & 0x1f);
(*d)[1] = 0x80 | (u & 0x3f);
*d += 2, *len -= 2;
return 1;
}
#if 0
if (u < 0x10000)
#endif
{
// 3 bytes
if (*len < 3) return 0;
(*d)[0] = 0xe0 | ((u >> 12) & 0xf);
(*d)[1] = 0x80 | ((u >> 6) & 0x3f);
(*d)[2] = 0x80 | (u & 0x3f);
*d += 3, *len -= 3;
return 1;
}
// 4 byte code point not supported.
return 0;
}
static int
is_utf_sequence(char *s)
{
int utf8_len;
int i;
if ((*s & 0xe0) == 0xc0) // U+0080+ 110xxxxx
utf8_len = 1;
else if ((*s & 0xf0) == 0xe0) // U+0800+ 1110xxxx
utf8_len = 2;
else if ((*s & 0xf8) == 0xf0) // U+10000+ 11110xxx
utf8_len = 3;
else return 0;
for (i = 1; i <= utf8_len; i++)
{
// UTF-8 continuation character 10xxxxxx
if ((s[i] & 0xc0) != 0x80)
// Invalid UTF-8 sequence
return 0;
}
return 1;
}
size_t
xconv(char *src, char *dst, size_t dstlen)
{
size_t len = dstlen - 1;
char *s, *d;
int changed = 0;
int i;
for (s = src, d = dst; *s && len > 0; s++)
{
if (!(*s & 0x80))
{
*d++ = *s;
len--;
continue;
}
if (!changed && is_utf_sequence(s))
return 0;
changed = 1;
// Check for combined character.
if ((*s & 0xf0) == 0xc0 && s[1])
{
int k = *s & 0xf;
icc_t *p = iso6937_combined[k];
for (i = 0; p[i].c; i++)
{
if (p[i].c == (s[1] & 0xff))
{
add_unicode(&d, &len, p[i].u);
break;
}
}
s++;
continue;
}
add_unicode(&d, &len, iso6937_map[*s & 0xff]);
}
if (!changed)
return 0;
*d = '\0';
return dstlen - len - 1;
}