24
|
1 |
#include <stdlib.h> |
|
2 |
#include <string.h> |
|
3 |
|
|
4 |
#include "utf8.h" |
|
5 |
|
|
6 |
|
|
7 |
/* Desc: convert UTF8 -> ASCII |
|
8 |
* |
|
9 |
* In : UTF8 string |
|
10 |
* Out : ASCII string |
|
11 |
* |
|
12 |
* Note: it is up to the caller to free the returned string |
|
13 |
*/ |
|
14 |
char *utf8_decode(const char *src) |
|
15 |
{ |
|
16 |
char *ret = calloc(1, strlen(src) + 1); |
49
|
17 |
unsigned char *aux = (unsigned char*)ret; |
24
|
18 |
|
|
19 |
while (*src) { |
|
20 |
unsigned char lead = *src++; |
|
21 |
if ((lead & 0xe0) == 0xc0) { |
|
22 |
unsigned char ch2 = *src++; |
|
23 |
*aux = ((lead & 0x1f) << 6) | (ch2 & 0x3f); |
|
24 |
} else { |
|
25 |
*aux = lead; |
|
26 |
} |
|
27 |
aux++; |
|
28 |
} |
|
29 |
|
|
30 |
return ret; |
|
31 |
} |
|
32 |
|
|
33 |
|
|
34 |
/* Desc: convert ASCII -> UTF8 |
|
35 |
* |
|
36 |
* In : ASCII string |
|
37 |
* Out : UTF8 string |
|
38 |
* |
|
39 |
* Note: it is up to the caller to free the returned string |
|
40 |
*/ |
|
41 |
char *utf8_encode(const char *src) |
|
42 |
{ |
|
43 |
char *ret = calloc(1, (strlen(src) * 2) + 1); |
48
|
44 |
unsigned char *aux = (unsigned char*)ret; |
24
|
45 |
|
|
46 |
while (*src) { |
|
47 |
unsigned char ch = *src++; |
34
|
48 |
if (ch < 0x80U) { |
|
49 |
*aux++ = ch; |
48
|
50 |
} else { /* if (ch < 0x800U) { */ |
34
|
51 |
*aux++ = 0xc0 | (ch >> 6); |
|
52 |
*aux++ = 0x80 | (ch & 0x3f); |
24
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
return ret; |
|
57 |
} |