libxconv/xconv.c

115 lines
1.8 KiB
C
Raw Normal View History

2017-03-09 14:55:16 +00:00
#include <stdio.h>
#include "charset.h"
static int
add_unicode(char **d, size_t *len, uint16_t u)
{
if (u < 0x80)
{
// 1 byte
if (*len < 1) return 0;
(*d)[0] = u;
2022-04-08 00:19:15 +00:00
(*d)++, (*len)--;
2017-03-09 14:55:16 +00:00
return 1;
}
if (u < 0x800)
{
// 2 bytes
if (*len < 2) return 0;
(*d)[0] = 0xc0 | ((u >> 6) & 0x1f);
(*d)[1] = 0x80 | (u & 0x3f);
*d += 2, *len -= 2;
return 1;
}
2022-04-08 00:19:15 +00:00
#if 0
2017-03-09 14:55:16 +00:00
if (u < 0x10000)
2022-04-08 00:19:15 +00:00
#endif
2017-03-09 14:55:16 +00:00
{
// 3 bytes
if (*len < 3) return 0;
(*d)[0] = 0xe0 | ((u >> 12) & 0xf);
(*d)[1] = 0x80 | ((u >> 6) & 0x3f);
(*d)[2] = 0x80 | (u & 0x3f);
*d += 3, *len -= 3;
return 1;
}
// 4 byte code point not supported.
return 0;
}
static int
is_utf_sequence(char *s)
{
int utf8_len;
int i;
if ((*s & 0xe0) == 0xc0) // U+0080+ 110xxxxx
utf8_len = 1;
else if ((*s & 0xf0) == 0xe0) // U+0800+ 1110xxxx
utf8_len = 2;
else if ((*s & 0xf8) == 0xf0) // U+10000+ 11110xxx
utf8_len = 3;
else return 0;
for (i = 1; i <= utf8_len; i++)
{
// UTF-8 continuation character 10xxxxxx
if ((s[i] & 0xc0) != 0x80)
// Invalid UTF-8 sequence
return 0;
}
return 1;
}
2017-03-09 20:44:08 +00:00
size_t
2017-03-09 14:55:16 +00:00
xconv(char *src, char *dst, size_t dstlen)
{
size_t len = dstlen - 1;
2017-03-09 14:55:16 +00:00
char *s, *d;
int changed = 0;
2017-03-09 14:55:16 +00:00
int i;
for (s = src, d = dst; *s && len > 0; s++)
{
if (!(*s & 0x80))
{
*d++ = *s;
len--;
continue;
}
if (!changed && is_utf_sequence(s))
return 0;
changed = 1;
2017-03-09 14:55:16 +00:00
// Check for combined character.
if ((*s & 0xf0) == 0xc0 && s[1])
{
int k = *s & 0xf;
2017-03-09 16:36:49 +00:00
icc_t *p = iso6937_combined[k];
2017-03-09 14:55:16 +00:00
for (i = 0; p[i].c; i++)
{
if (p[i].c == (s[1] & 0xff))
{
2017-03-09 16:36:49 +00:00
add_unicode(&d, &len, p[i].u);
2017-03-09 14:55:16 +00:00
break;
}
}
s++;
continue;
}
add_unicode(&d, &len, iso6937_map[*s & 0xff]);
}
if (!changed)
return 0;
2017-03-09 14:55:16 +00:00
*d = '\0';
return dstlen - len - 1;
2017-03-09 14:55:16 +00:00
}