mcabber/libjabber/xmltok.c
author Mikael Berthe <mikael@lilotux.net>
Mon, 06 Mar 2006 12:27:00 +0100
changeset 731 f30031cdb56b
parent 414 ec86d759ed54
child 883 0aa9015f06df
permissions -rw-r--r--
Outgoing messages (color_highlight) default to cyan
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     1
/*
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     2
The contents of this file are subject to the Mozilla Public License
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     3
Version 1.1 (the "License"); you may not use this file except in
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     4
compliance with the License. You may obtain a copy of the License at
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     5
http://www.mozilla.org/MPL/
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     6
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     7
Software distributed under the License is distributed on an "AS IS"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     8
basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
     9
License for the specific language governing rights and limitations
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    10
under the License.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    11
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    12
The Original Code is expat.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    13
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    14
The Initial Developer of the Original Code is James Clark.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    15
Portions created by James Clark are Copyright (C) 1998, 1999
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    16
James Clark. All Rights Reserved.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    17
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    18
Contributor(s):
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    19
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    20
Alternatively, the contents of this file may be used under the terms
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    21
of the GNU General Public License (the "GPL"), in which case the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    22
provisions of the GPL are applicable instead of those above.  If you
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    23
wish to allow use of your version of this file only under the terms of
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    24
the GPL and not to allow others to use your version of this file under
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    25
the MPL, indicate your decision by deleting the provisions above and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    26
replace them with the notice and other provisions required by the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    27
GPL. If you do not delete the provisions above, a recipient may use
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    28
your version of this file under either the MPL or the GPL.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    29
*/
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    30
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    31
#include "xmldef.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    32
#include "xmltok.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    33
#include "nametab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    34
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    35
#define VTABLE1 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    36
  { PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    37
  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    38
  PREFIX(sameName), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    39
  PREFIX(nameMatchesAscii), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    40
  PREFIX(nameLength), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    41
  PREFIX(skipS), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    42
  PREFIX(getAtts), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    43
  PREFIX(charRefNumber), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    44
  PREFIX(predefinedEntityName), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    45
  PREFIX(updatePosition), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    46
  PREFIX(isPublicId)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    47
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    48
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    49
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    50
#define UCS2_GET_NAMING(pages, hi, lo) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    51
   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    52
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    53
/* A 2 byte UTF-8 representation splits the characters 11 bits
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    54
between the bottom 5 and 6 bits of the bytes.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    55
We need 8 bits to index into pages, 3 bits to add to that index and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    56
5 bits to generate the mask. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    57
#define UTF8_GET_NAMING2(pages, byte) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    58
    (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    59
		      + ((((byte)[0]) & 3) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    60
		      + ((((byte)[1]) >> 5) & 1)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    61
	 & (1 << (((byte)[1]) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    62
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    63
/* A 3 byte UTF-8 representation splits the characters 16 bits
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    64
between the bottom 4, 6 and 6 bits of the bytes.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    65
We need 8 bits to index into pages, 3 bits to add to that index and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    66
5 bits to generate the mask. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    67
#define UTF8_GET_NAMING3(pages, byte) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    68
  (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    69
			     + ((((byte)[1]) >> 2) & 0xF)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    70
	       << 3) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    71
		      + ((((byte)[1]) & 3) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    72
		      + ((((byte)[2]) >> 5) & 1)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    73
	 & (1 << (((byte)[2]) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    74
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    75
#define UTF8_GET_NAMING(pages, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    76
  ((n) == 2 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    77
  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    78
  : ((n) == 3 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    79
     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    80
     : 0))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    81
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    82
#define UTF8_INVALID3(p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    83
  ((*p) == 0xED \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    84
  ? (((p)[1] & 0x20) != 0) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    85
  : ((*p) == 0xEF \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    86
     ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    87
     : 0))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    88
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    89
#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    90
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    91
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    92
int isNever(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    93
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    94
    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    95
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    96
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    97
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    98
int utf8_isName2(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
    99
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   100
    return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   101
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   102
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   103
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   104
int utf8_isName3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   105
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   106
    return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   107
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   108
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   109
#define utf8_isName4 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   110
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   111
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   112
int utf8_isNmstrt2(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   113
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   114
    return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   115
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   116
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   117
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   118
int utf8_isNmstrt3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   119
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   120
    return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   121
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   122
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   123
#define utf8_isNmstrt4 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   124
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   125
#define utf8_isInvalid2 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   126
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   127
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   128
int utf8_isInvalid3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   129
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   130
    return UTF8_INVALID3((const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   131
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   132
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   133
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   134
int utf8_isInvalid4(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   135
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   136
    return UTF8_INVALID4((const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   137
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   138
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   139
struct normal_encoding {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   140
    ENCODING enc;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   141
    unsigned char type[256];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   142
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   143
    int (*byteType)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   144
    int (*isNameMin)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   145
    int (*isNmstrtMin)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   146
    int (*byteToAscii)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   147
    int (*charMatches)(const ENCODING *, const char *, int);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   148
#endif /* XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   149
    int (*isName2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   150
    int (*isName3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   151
    int (*isName4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   152
    int (*isNmstrt2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   153
    int (*isNmstrt3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   154
    int (*isNmstrt4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   155
    int (*isInvalid2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   156
    int (*isInvalid3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   157
    int (*isInvalid4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   158
};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   159
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   160
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   161
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   162
#define STANDARD_VTABLE(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   163
 E ## byteType, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   164
 E ## isNameMin, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   165
 E ## isNmstrtMin, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   166
 E ## byteToAscii, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   167
 E ## charMatches,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   168
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   169
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   170
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   171
#define STANDARD_VTABLE(E) /* as nothing */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   172
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   173
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   174
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   175
#define NORMAL_VTABLE(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   176
 E ## isName2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   177
 E ## isName3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   178
 E ## isName4, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   179
 E ## isNmstrt2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   180
 E ## isNmstrt3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   181
 E ## isNmstrt4, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   182
 E ## isInvalid2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   183
 E ## isInvalid3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   184
 E ## isInvalid4
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   185
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   186
static int checkCharRefNumber(int);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   187
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   188
#include "xmltok_impl.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   189
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   190
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   191
#define sb_isNameMin isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   192
#define sb_isNmstrtMin isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   193
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   194
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   195
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   196
#define MINBPC(enc) ((enc)->minBytesPerChar)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   197
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   198
/* minimum bytes per character */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   199
#define MINBPC(enc) 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   200
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   201
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   202
#define SB_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   203
  (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   204
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   205
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   206
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   207
int sb_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   208
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   209
    return SB_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   210
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   211
#define BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   212
 (((const struct normal_encoding *)(enc))->byteType(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   213
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   214
#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   215
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   216
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   217
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   218
#define BYTE_TO_ASCII(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   219
 (((const struct normal_encoding *)(enc))->byteToAscii(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   220
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   221
int sb_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   222
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   223
    return *p;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   224
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   225
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   226
#define BYTE_TO_ASCII(enc, p) (*p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   227
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   228
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   229
#define IS_NAME_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   230
 (((const struct normal_encoding *)(enc))->isName ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   231
#define IS_NMSTRT_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   232
 (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   233
#define IS_INVALID_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   234
 (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   235
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   236
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   237
#define IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   238
 (((const struct normal_encoding *)(enc))->isNameMin(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   239
#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   240
 (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   241
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   242
#define IS_NAME_CHAR_MINBPC(enc, p) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   243
#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   244
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   245
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   246
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   247
#define CHAR_MATCHES(enc, p, c) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   248
 (((const struct normal_encoding *)(enc))->charMatches(enc, p, c))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   249
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   250
int sb_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   251
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   252
    return *p == c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   253
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   254
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   255
/* c is an ASCII character */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   256
#define CHAR_MATCHES(enc, p, c) (*(p) == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   257
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   258
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   259
#define PREFIX(ident) normal_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   260
#include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   261
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   262
#undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   263
#undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   264
#undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   265
#undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   266
#undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   267
#undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   268
#undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   269
#undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   270
#undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   271
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   272
enum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   273
    UTF8_cval1 = 0x00,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   274
    UTF8_cval2 = 0xc0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   275
    UTF8_cval3 = 0xe0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   276
    UTF8_cval4 = 0xf0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   277
};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   278
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   279
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   280
void utf8_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   281
		 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   282
		 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   283
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   284
    char *to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   285
    const char *from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   286
    if (fromLim - *fromP > toLim - *toP) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   287
	/* Avoid copying partial characters. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   288
	for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   289
	    if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   290
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   291
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   292
    for (to = *toP, from = *fromP; from != fromLim; from++, to++)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   293
	*to = *from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   294
    *fromP = from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   295
    *toP = to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   296
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   297
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   298
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   299
void utf8_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   300
		  const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   301
		  unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   302
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   303
    unsigned short *to = *toP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   304
    const char *from = *fromP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   305
    while (from != fromLim && to != toLim) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   306
	switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   307
	case BT_LEAD2:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   308
	    *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   309
	    from += 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   310
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   311
	case BT_LEAD3:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   312
	    *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   313
	    from += 3;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   314
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   315
	case BT_LEAD4:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   316
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   317
		unsigned long n;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   318
		if (to + 1 == toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   319
		    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   320
		n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   321
		n -= 0x10000;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   322
		to[0] = (unsigned short)((n >> 10) | 0xD800);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   323
		to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   324
		to += 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   325
		from += 4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   326
	    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   327
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   328
	default:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   329
	    *to++ = *from++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   330
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   331
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   332
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   333
    *fromP = from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   334
    *toP = to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   335
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   336
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   337
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   338
static const struct normal_encoding utf8_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   339
	{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   340
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   341
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   342
#include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   343
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   344
	    STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   345
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   346
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   347
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   348
static const struct normal_encoding utf8_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   349
	{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   350
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   351
#define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   352
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   353
#undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   354
#include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   355
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   356
	    STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   357
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   358
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   359
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   360
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   361
static const struct normal_encoding internal_utf8_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   362
	{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   363
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   364
#include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   365
#include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   366
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   367
	    STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   368
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   369
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   370
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   371
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   372
static const struct normal_encoding internal_utf8_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   373
	{ VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   374
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   375
#define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   376
#include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   377
#undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   378
#include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   379
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   380
	    STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   381
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   382
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   383
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   384
void latin1_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   385
		   const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   386
		   char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   387
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   388
    for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   389
	unsigned char c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   390
	if (*fromP == fromLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   391
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   392
	c = (unsigned char)**fromP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   393
	if (c & 0x80) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   394
	    if (toLim - *toP < 2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   395
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   396
	    *(*toP)++ = ((c >> 6) | UTF8_cval2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   397
	    *(*toP)++ = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   398
	    (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   399
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   400
	else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   401
	    if (*toP == toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   402
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   403
	    *(*toP)++ = *(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   404
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   405
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   406
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   407
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   408
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   409
void latin1_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   410
		    const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   411
		    unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   412
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   413
    while (*fromP != fromLim && *toP != toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   414
	*(*toP)++ = (unsigned char)*(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   415
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   416
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   417
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   418
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   419
static const struct normal_encoding latin1_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   420
	{ VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   421
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   422
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   423
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   424
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   425
	    STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   426
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   427
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   428
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   429
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   430
static const struct normal_encoding latin1_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   431
	{ VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   432
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   433
#define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   434
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   435
#undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   436
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   437
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   438
	    STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   439
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   440
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   441
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   442
void ascii_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   443
		  const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   444
		  char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   445
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   446
    while (*fromP != fromLim && *toP != toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   447
	*(*toP)++ = *(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   448
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   449
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   450
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   451
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   452
static const struct normal_encoding ascii_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   453
	{ VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   454
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   455
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   456
		/* BT_NONXML == 0 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   457
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   458
	    STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   459
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   460
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   461
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   462
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   463
static const struct normal_encoding ascii_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   464
	{ VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   465
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   466
#define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   467
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   468
#undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   469
		/* BT_NONXML == 0 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   470
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   471
	    STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   472
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   473
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   474
static int unicode_byte_type(char hi, char lo)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   475
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   476
    switch ((unsigned char)hi) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   477
case 0xD8: case 0xD9: case 0xDA: case 0xDB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   478
	return BT_LEAD4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   479
case 0xDC: case 0xDD: case 0xDE: case 0xDF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   480
	return BT_TRAIL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   481
    case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   482
	switch ((unsigned char)lo) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   483
	case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   484
	case 0xFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   485
	    return BT_NONXML;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   486
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   487
	break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   488
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   489
    return BT_NONASCII;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   490
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   491
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   492
#define DEFINE_UTF16_TO_UTF8(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   493
static \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   494
void E ## toUtf8(const ENCODING *enc, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   495
	 const char **fromP, const char *fromLim, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   496
	 char **toP, const char *toLim) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   497
{ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   498
  const char *from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   499
  for (from = *fromP; from != fromLim; from += 2) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   500
    int plane; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   501
    unsigned char lo2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   502
    unsigned char lo = GET_LO(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   503
    unsigned char hi = GET_HI(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   504
    switch (hi) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   505
    case 0: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   506
      if (lo < 0x80) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   507
	if (*toP == toLim) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   508
	  *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   509
      return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   510
	} \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   511
	*(*toP)++ = lo; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   512
	break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   513
      } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   514
      /* fall through */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   515
    case 0x1: case 0x2: case 0x3: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   516
    case 0x4: case 0x5: case 0x6: case 0x7: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   517
      if (toLim -  *toP < 2) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   518
	*fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   519
    return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   520
      } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   521
      *(*toP)++ = ((lo >> 6) | (hi << 2) |  UTF8_cval2); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   522
      *(*toP)++ = ((lo & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   523
      break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   524
    default: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   525
      if (toLim -  *toP < 3)  { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   526
	*fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   527
    return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   528
      } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   529
      /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   530
      *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   531
      *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   532
      *(*toP)++ = ((lo & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   533
      break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   534
    case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   535
      if (toLim -  *toP < 4) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   536
    *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   537
    return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   538
      } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   539
      plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   540
      *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   541
      *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   542
      from += 2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   543
      lo2 = GET_LO(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   544
      *(*toP)++ = (((lo & 0x3) << 4) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   545
	       | ((GET_HI(from) & 0x3) << 2) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   546
	   | (lo2 >> 6) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   547
	   | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   548
      *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   549
      break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   550
    } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   551
  } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   552
  *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   553
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   554
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   555
#define DEFINE_UTF16_TO_UTF16(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   556
static \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   557
void E ## toUtf16(const ENCODING *enc, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   558
	  const char **fromP, const char *fromLim, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   559
	  unsigned short **toP, const unsigned short *toLim) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   560
{ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   561
  /* Avoid copying first half only of surrogate */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   562
  if (fromLim - *fromP > ((toLim - *toP) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   563
      && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   564
    fromLim -= 2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   565
  for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   566
    *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   567
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   568
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   569
#define SET2(ptr, ch) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   570
  (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   571
#define GET_LO(ptr) ((unsigned char)(ptr)[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   572
#define GET_HI(ptr) ((unsigned char)(ptr)[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   573
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   574
DEFINE_UTF16_TO_UTF8(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   575
DEFINE_UTF16_TO_UTF16(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   576
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   577
#undef SET2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   578
#undef GET_LO
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   579
#undef GET_HI
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   580
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   581
#define SET2(ptr, ch) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   582
  (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   583
#define GET_LO(ptr) ((unsigned char)(ptr)[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   584
#define GET_HI(ptr) ((unsigned char)(ptr)[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   585
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   586
DEFINE_UTF16_TO_UTF8(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   587
DEFINE_UTF16_TO_UTF16(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   588
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   589
#undef SET2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   590
#undef GET_LO
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   591
#undef GET_HI
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   592
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   593
#define LITTLE2_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   594
 ((p)[1] == 0 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   595
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   596
  : unicode_byte_type((p)[1], (p)[0]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   597
#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   598
#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   599
#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   600
  UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   601
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   602
  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   603
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   604
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   605
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   606
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   607
int little2_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   608
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   609
    return LITTLE2_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   610
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   611
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   612
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   613
int little2_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   614
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   615
    return LITTLE2_BYTE_TO_ASCII(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   616
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   617
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   618
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   619
int little2_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   620
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   621
    return LITTLE2_CHAR_MATCHES(enc, p, c);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   622
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   623
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   624
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   625
int little2_isNameMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   626
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   627
    return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   628
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   629
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   630
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   631
int little2_isNmstrtMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   632
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   633
    return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   634
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   635
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   636
#undef VTABLE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   637
#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   638
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   639
#else /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   640
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   641
#undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   642
#define PREFIX(ident) little2_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   643
#define MINBPC(enc) 2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   644
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   645
#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
414
ec86d759ed54 Trailing whitespace cleanup
Mikael Berthe <mikael@lilotux.net>
parents: 237
diff changeset
   646
#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   647
#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   648
#define IS_NAME_CHAR(enc, p, n) 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   649
#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   650
#define IS_NMSTRT_CHAR(enc, p, n) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   651
#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   652
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   653
#include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   654
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   655
#undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   656
#undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   657
#undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   658
#undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   659
#undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   660
#undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   661
#undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   662
#undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   663
#undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   664
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   665
#endif /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   666
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   667
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   668
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   669
static const struct normal_encoding little2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   670
	    { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   671
#if XML_BYTE_ORDER == 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   672
		1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   673
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   674
0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   675
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   676
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   677
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   678
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   679
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   680
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   681
	    STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   682
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   683
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   684
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   685
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   686
static const struct normal_encoding little2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   687
	    { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   688
#if XML_BYTE_ORDER == 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   689
		1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   690
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   691
		0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   692
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   693
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   694
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   695
#define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   696
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   697
#undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   698
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   699
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   700
	    STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   701
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   702
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   703
#if XML_BYTE_ORDER != 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   704
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   705
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   706
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   707
static const struct normal_encoding internal_little2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   708
	{ VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   709
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   710
#include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   711
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   712
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   713
	    STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   714
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   715
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   716
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   717
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   718
static const struct normal_encoding internal_little2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   719
	{ VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   720
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   721
#define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   722
#include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   723
#undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   724
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   725
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   726
	    STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   727
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   728
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   729
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   730
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   731
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   732
#define BIG2_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   733
 ((p)[0] == 0 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   734
  ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   735
  : unicode_byte_type((p)[0], (p)[1]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   736
#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   737
#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   738
#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   739
  UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   740
#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   741
  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   742
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   743
#ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   744
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   745
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   746
int big2_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   747
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   748
    return BIG2_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   749
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   750
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   751
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   752
int big2_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   753
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   754
    return BIG2_BYTE_TO_ASCII(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   755
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   756
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   757
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   758
int big2_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   759
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   760
    return BIG2_CHAR_MATCHES(enc, p, c);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   761
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   762
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   763
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   764
int big2_isNameMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   765
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   766
    return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   767
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   768
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   769
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   770
int big2_isNmstrtMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   771
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   772
    return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   773
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   774
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   775
#undef VTABLE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   776
#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   777
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   778
#else /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   779
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   780
#undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   781
#define PREFIX(ident) big2_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   782
#define MINBPC(enc) 2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   783
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   784
#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
414
ec86d759ed54 Trailing whitespace cleanup
Mikael Berthe <mikael@lilotux.net>
parents: 237
diff changeset
   785
#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   786
#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   787
#define IS_NAME_CHAR(enc, p, n) 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   788
#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   789
#define IS_NMSTRT_CHAR(enc, p, n) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   790
#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   791
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   792
#include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   793
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   794
#undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   795
#undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   796
#undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   797
#undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   798
#undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   799
#undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   800
#undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   801
#undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   802
#undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   803
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   804
#endif /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   805
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   806
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   807
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   808
static const struct normal_encoding big2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   809
	    { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   810
#if XML_BYTE_ORDER == 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   811
		1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   812
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   813
0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   814
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   815
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   816
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   817
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   818
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   819
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   820
	    STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   821
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   822
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   823
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   824
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   825
static const struct normal_encoding big2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   826
	    { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   827
#if XML_BYTE_ORDER == 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   828
		1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   829
#else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   830
		0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   831
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   832
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   833
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   834
#define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   835
#include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   836
#undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   837
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   838
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   839
	    STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   840
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   841
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   842
#if XML_BYTE_ORDER != 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   843
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   844
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   845
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   846
static const struct normal_encoding internal_big2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   847
	{ VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   848
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   849
#include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   850
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   851
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   852
	    STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   853
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   854
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   855
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   856
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   857
static const struct normal_encoding internal_big2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   858
	{ VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   859
	    {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   860
#define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   861
#include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   862
#undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   863
#include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   864
	    },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   865
	    STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   866
	};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   867
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   868
#endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   869
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   870
#undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   871
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   872
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   873
int streqci(const char *s1, const char *s2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   874
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   875
    for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   876
	char c1 = *s1++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   877
	char c2 = *s2++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   878
	if ('a' <= c1 && c1 <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   879
	    c1 += 'A' - 'a';
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   880
	if ('a' <= c2 && c2 <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   881
	    c2 += 'A' - 'a';
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   882
	if (c1 != c2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   883
	    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   884
	if (!c1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   885
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   886
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   887
    return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   888
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   889
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   890
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   891
void initUpdatePosition(const ENCODING *enc, const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   892
			const char *end, POSITION *pos)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   893
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   894
    normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   895
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   896
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   897
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   898
int toAscii(const ENCODING *enc, const char *ptr, const char *end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   899
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   900
    char buf[1];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   901
    char *p = buf;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   902
    XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   903
    if (p == buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   904
	return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   905
    else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   906
	return buf[0];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   907
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   908
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   909
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   910
int isSpace(int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   911
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   912
    switch (c) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   913
    case 0x20:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   914
    case 0xD:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   915
    case 0xA:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   916
    case 0x9:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   917
	return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   918
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   919
    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   920
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   921
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   922
/* Return 1 if there's just optional white space
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   923
or there's an S followed by name=val. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   924
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   925
int parsePseudoAttribute(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   926
			 const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   927
			 const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   928
			 const char **namePtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   929
			 const char **valPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   930
			 const char **nextTokPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   931
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   932
    int c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   933
    char open;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   934
    if (ptr == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   935
	*namePtr = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   936
	return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   937
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   938
    if (!isSpace(toAscii(enc, ptr, end))) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   939
	*nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   940
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   941
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   942
    do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   943
	ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   944
    } while (isSpace(toAscii(enc, ptr, end)));
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   945
    if (ptr == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   946
	*namePtr = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   947
	return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   948
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   949
    *namePtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   950
    for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   951
	c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   952
	if (c == -1) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   953
	    *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   954
	    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   955
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   956
	if (c == '=')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   957
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   958
	if (isSpace(c)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   959
	    do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   960
		ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   961
	    } while (isSpace(c = toAscii(enc, ptr, end)));
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   962
	    if (c != '=') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   963
		*nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   964
		return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   965
	    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   966
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   967
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   968
	ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   969
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   970
    if (ptr == *namePtr) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   971
	*nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   972
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   973
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   974
    ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   975
    c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   976
    while (isSpace(c)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   977
	ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   978
	c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   979
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   980
    if (c != '"' && c != '\'') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   981
	*nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   982
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   983
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   984
    open = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   985
    ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   986
    *valPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   987
    for (;; ptr += enc->minBytesPerChar) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   988
	c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   989
	if (c == open)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   990
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   991
	if (!('a' <= c && c <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   992
		&& !('A' <= c && c <= 'Z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   993
		&& !('0' <= c && c <= '9')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   994
		&& c != '.'
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   995
		&& c != '-'
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   996
		&& c != '_') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   997
	    *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   998
	    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
   999
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1000
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1001
    *nextTokPtr = ptr + enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1002
    return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1003
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1004
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1005
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1006
int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1007
		   const char *,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1008
		   const char *),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1009
		   int isGeneralTextEntity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1010
		   const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1011
		   const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1012
		   const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1013
		   const char **badPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1014
		   const char **versionPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1015
		   const char **encodingName,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1016
		   const ENCODING **encoding,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1017
		   int *standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1018
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1019
    const char *val = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1020
    const char *name = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1021
    ptr += 5 * enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1022
    end -= 2 * enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1023
    if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1024
	*badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1025
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1026
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1027
    if (!XmlNameMatchesAscii(enc, name, "version")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1028
	if (!isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1029
	    *badPtr = name;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1030
	    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1031
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1032
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1033
    else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1034
	if (versionPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1035
	    *versionPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1036
	if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1037
	    *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1038
	    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1039
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1040
	if (!name) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1041
	    if (isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1042
		/* a TextDecl must have an EncodingDecl */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1043
		*badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1044
		return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1045
	    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1046
	    return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1047
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1048
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1049
    if (XmlNameMatchesAscii(enc, name, "encoding")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1050
	int c = toAscii(enc, val, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1051
	if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1052
	    *badPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1053
	    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1054
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1055
	if (encodingName)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1056
	    *encodingName = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1057
	if (encoding)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1058
	    *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1059
	if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1060
	    *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1061
	    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1062
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1063
	if (!name)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1064
	    return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1065
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1066
    if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1067
	*badPtr = name;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1068
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1069
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1070
    if (XmlNameMatchesAscii(enc, val, "yes")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1071
	if (standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1072
	    *standalone = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1073
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1074
    else if (XmlNameMatchesAscii(enc, val, "no")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1075
	if (standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1076
	    *standalone = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1077
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1078
    else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1079
	*badPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1080
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1081
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1082
    while (isSpace(toAscii(enc, ptr, end)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1083
	ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1084
    if (ptr != end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1085
	*badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1086
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1087
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1088
    return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1089
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1090
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1091
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1092
int checkCharRefNumber(int result)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1093
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1094
    switch (result >> 8) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1095
case 0xD8: case 0xD9: case 0xDA: case 0xDB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1096
case 0xDC: case 0xDD: case 0xDE: case 0xDF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1097
	return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1098
    case 0:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1099
	if (latin1_encoding.type[result] == BT_NONXML)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1100
	    return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1101
	break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1102
    case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1103
	if (result == 0xFFFE || result == 0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1104
	    return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1105
	break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1106
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1107
    return result;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1108
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1109
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1110
int XmlUtf8Encode(int c, char *buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1111
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1112
    enum {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1113
	/* minN is minimum legal resulting value for N byte sequence */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1114
	min2 = 0x80,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1115
	min3 = 0x800,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1116
	min4 = 0x10000
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1117
    };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1118
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1119
    if (c < 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1120
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1121
    if (c < min2) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1122
	buf[0] = (c | UTF8_cval1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1123
	return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1124
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1125
    if (c < min3) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1126
	buf[0] = ((c >> 6) | UTF8_cval2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1127
	buf[1] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1128
	return 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1129
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1130
    if (c < min4) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1131
	buf[0] = ((c >> 12) | UTF8_cval3);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1132
	buf[1] = (((c >> 6) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1133
	buf[2] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1134
	return 3;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1135
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1136
    if (c < 0x110000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1137
	buf[0] = ((c >> 18) | UTF8_cval4);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1138
	buf[1] = (((c >> 12) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1139
	buf[2] = (((c >> 6) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1140
	buf[3] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1141
	return 4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1142
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1143
    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1144
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1145
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1146
int XmlUtf16Encode(int charNum, unsigned short *buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1147
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1148
    if (charNum < 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1149
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1150
    if (charNum < 0x10000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1151
	buf[0] = charNum;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1152
	return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1153
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1154
    if (charNum < 0x110000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1155
	charNum -= 0x10000;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1156
	buf[0] = (charNum >> 10) + 0xD800;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1157
	buf[1] = (charNum & 0x3FF) + 0xDC00;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1158
	return 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1159
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1160
    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1161
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1162
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1163
struct unknown_encoding {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1164
    struct normal_encoding normal;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1165
    int (*convert)(void *userData, const char *p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1166
    void *userData;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1167
    unsigned short utf16[256];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1168
    char utf8[256][4];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1169
};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1170
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1171
int XmlSizeOfUnknownEncoding()
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1172
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1173
    return sizeof(struct unknown_encoding);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1174
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1175
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1176
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1177
int unknown_isName(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1178
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1179
    int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1180
	    ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1181
    if (c & ~0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1182
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1183
    return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1184
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1185
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1186
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1187
int unknown_isNmstrt(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1188
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1189
    int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1190
	    ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1191
    if (c & ~0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1192
	return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1193
    return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1194
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1195
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1196
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1197
int unknown_isInvalid(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1198
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1199
    int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1200
	    ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1201
    return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1202
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1203
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1204
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1205
void unknown_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1206
		    const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1207
		    char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1208
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1209
    char buf[XML_UTF8_ENCODE_MAX];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1210
    for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1211
	const char *utf8;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1212
	int n;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1213
	if (*fromP == fromLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1214
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1215
	utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1216
	n = *utf8++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1217
	if (n == 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1218
	    int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1219
		    ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1220
	    n = XmlUtf8Encode(c, buf);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1221
	    if (n > toLim - *toP)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1222
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1223
	    utf8 = buf;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1224
	    *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1225
		      - (BT_LEAD2 - 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1226
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1227
	else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1228
	    if (n > toLim - *toP)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1229
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1230
	    (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1231
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1232
	do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1233
	    *(*toP)++ = *utf8++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1234
	} while (--n != 0);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1235
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1236
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1237
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1238
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1239
void unknown_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1240
		     const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1241
		     unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1242
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1243
    while (*fromP != fromLim && *toP != toLim) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1244
	unsigned short c
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1245
	= ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1246
	if (c == 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1247
	    c = (unsigned short)((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1248
		->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1249
	    *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1250
		      - (BT_LEAD2 - 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1251
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1252
	else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1253
	    (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1254
	*(*toP)++ = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1255
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1256
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1257
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1258
ENCODING *
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1259
XmlInitUnknownEncoding(void *mem,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1260
		       int *table,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1261
		       int (*convert)(void *userData, const char *p),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1262
		       void *userData)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1263
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1264
    int i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1265
    struct unknown_encoding *e = mem;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1266
    for (i = 0; i < sizeof(struct normal_encoding); i++)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1267
	((char *)mem)[i] = ((char *)&latin1_encoding)[i];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1268
    for (i = 0; i < 128; i++)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1269
	if (latin1_encoding.type[i] != BT_OTHER
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1270
		&& latin1_encoding.type[i] != BT_NONXML
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1271
		&& table[i] != i)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1272
	    return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1273
    for (i = 0; i < 256; i++) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1274
	int c = table[i];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1275
	if (c == -1) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1276
	    e->normal.type[i] = BT_MALFORM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1277
	    /* This shouldn't really get used. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1278
	    e->utf16[i] = 0xFFFF;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1279
	    e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1280
	    e->utf8[i][1] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1281
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1282
	else if (c < 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1283
	    if (c < -4)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1284
		return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1285
	    e->normal.type[i] = BT_LEAD2 - (c + 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1286
	    e->utf8[i][0] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1287
	    e->utf16[i] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1288
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1289
	else if (c < 0x80) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1290
	    if (latin1_encoding.type[c] != BT_OTHER
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1291
		    && latin1_encoding.type[c] != BT_NONXML
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1292
		    && c != i)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1293
		return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1294
	    e->normal.type[i] = latin1_encoding.type[c];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1295
	    e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1296
	    e->utf8[i][1] = (char)c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1297
	    e->utf16[i] = c == 0 ? 0xFFFF : c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1298
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1299
	else if (checkCharRefNumber(c) < 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1300
	    e->normal.type[i] = BT_NONXML;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1301
	    /* This shouldn't really get used. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1302
	    e->utf16[i] = 0xFFFF;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1303
	    e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1304
	    e->utf8[i][1] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1305
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1306
	else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1307
	    if (c > 0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1308
		return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1309
	    if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1310
		e->normal.type[i] = BT_NMSTRT;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1311
	    else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1312
		e->normal.type[i] = BT_NAME;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1313
	    else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1314
		e->normal.type[i] = BT_OTHER;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1315
	    e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1316
	    e->utf16[i] = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1317
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1318
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1319
    e->userData = userData;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1320
    e->convert = convert;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1321
    if (convert) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1322
	e->normal.isName2 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1323
	e->normal.isName3 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1324
	e->normal.isName4 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1325
	e->normal.isNmstrt2 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1326
	e->normal.isNmstrt3 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1327
	e->normal.isNmstrt4 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1328
	e->normal.isInvalid2 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1329
	e->normal.isInvalid3 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1330
	e->normal.isInvalid4 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1331
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1332
    e->normal.enc.utf8Convert = unknown_toUtf8;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1333
    e->normal.enc.utf16Convert = unknown_toUtf16;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1334
    return &(e->normal.enc);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1335
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1336
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1337
/* If this enumeration is changed, getEncodingIndex and encodings
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1338
must also be changed. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1339
enum {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1340
    UNKNOWN_ENC = -1,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1341
    ISO_8859_1_ENC = 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1342
    US_ASCII_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1343
    UTF_8_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1344
    UTF_16_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1345
    UTF_16BE_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1346
    UTF_16LE_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1347
    /* must match encodingNames up to here */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1348
    NO_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1349
};
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1350
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1351
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1352
int getEncodingIndex(const char *name)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1353
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1354
    static const char *encodingNames[] = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1355
	"ISO-8859-1",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1356
	"US-ASCII",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1357
	"UTF-8",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1358
	"UTF-16",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1359
	"UTF-16BE"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1360
	"UTF-16LE",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1361
    };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1362
    int i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1363
    if (name == 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1364
	return NO_ENC;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1365
    for (i = 0; i < sizeof(encodingNames)/sizeof(encodingNames[0]); i++)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1366
	if (streqci(name, encodingNames[i]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1367
	    return i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1368
    return UNKNOWN_ENC;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1369
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1370
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1371
/* For binary compatibility, we store the index of the encoding specified
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1372
at initialization in the isUtf16 member. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1373
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1374
#define INIT_ENC_INDEX(enc) ((enc)->initEnc.isUtf16)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1375
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1376
/* This is what detects the encoding.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1377
encodingTable maps from encoding indices to encodings;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1378
INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1379
state is XML_CONTENT_STATE if we're parsing an external text entity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1380
and XML_PROLOG_STATE otherwise.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1381
*/
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1382
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1383
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1384
static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1385
int initScan(const ENCODING **encodingTable,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1386
	     const INIT_ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1387
	     int state,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1388
	     const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1389
	     const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1390
	     const char **nextTokPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1391
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1392
    const ENCODING **encPtr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1393
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1394
    if (ptr == end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1395
	return XML_TOK_NONE;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1396
    encPtr = enc->encPtr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1397
    if (ptr + 1 == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1398
	/* only a single byte available for auto-detection */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1399
	/* a well-formed document entity must have more than one byte */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1400
	if (state != XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1401
	    return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1402
	/* so we're parsing an external text entity... */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1403
	/* if UTF-16 was externally specified, then we need at least 2 bytes */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1404
	switch (INIT_ENC_INDEX(enc)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1405
	case UTF_16_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1406
	case UTF_16LE_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1407
	case UTF_16BE_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1408
	    return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1409
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1410
	switch ((unsigned char)*ptr) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1411
	case 0xFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1412
	case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1413
	case 0xEF: /* possibly first byte of UTF-8 BOM */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1414
	    if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1415
		    && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1416
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1417
	    /* fall through */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1418
	case 0x00:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1419
	case 0x3C:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1420
	    return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1421
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1422
    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1423
    else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1424
	switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1425
	case 0xFEFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1426
	    if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1427
		    && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1428
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1429
	    *nextTokPtr = ptr + 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1430
	    *encPtr = encodingTable[UTF_16BE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1431
	    return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1432
	    /* 00 3C is handled in the default case */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1433
	case 0x3C00:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1434
	    if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1435
		    || INIT_ENC_INDEX(enc) == UTF_16_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1436
		    && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1437
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1438
	    *encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1439
	    return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1440
	case 0xFFFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1441
	    if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1442
		    && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1443
		break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1444
	    *nextTokPtr = ptr + 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1445
	    *encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1446
	    return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1447
	case 0xEFBB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1448
	    /* Maybe a UTF-8 BOM (EF BB BF) */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1449
	    /* If there's an explicitly specified (external) encoding
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1450
	       of ISO-8859-1 or some flavour of UTF-16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1451
	       and this is an external text entity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1452
	    don't look for the BOM,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1453
	       because it might be a legal data. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1454
	    if (state == XML_CONTENT_STATE) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1455
		int e = INIT_ENC_INDEX(enc);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1456
		if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1457
		    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1458
	    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1459
	    if (ptr + 2 == end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1460
		return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1461
	    if ((unsigned char)ptr[2] == 0xBF) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1462
		*encPtr = encodingTable[UTF_8_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1463
		return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1464
	    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1465
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1466
	default:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1467
	    if (ptr[0] == '\0') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1468
		/* 0 isn't a legal data character. Furthermore a document entity can only
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1469
		   start with ASCII characters.  So the only way this can fail to be big-endian
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1470
		   UTF-16 if it it's an external parsed general entity that's labelled as
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1471
		   UTF-16LE. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1472
		if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1473
		    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1474
		*encPtr = encodingTable[UTF_16BE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1475
		return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1476
	    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1477
	    else if (ptr[1] == '\0') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1478
		/* We could recover here in the case:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1479
		    - parsing an external entity
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1480
		    - second byte is 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1481
		    - no externally specified encoding
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1482
		    - no encoding declaration
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1483
		   by assuming UTF-16LE.  But we don't, because this would mean when
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1484
		   presented just with a single byte, we couldn't reliably determine
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1485
		   whether we needed further bytes. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1486
		if (state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1487
		    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1488
		*encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1489
		return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1490
	    }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1491
	    break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1492
	}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1493
    }
237
c8df64f43625 [/trunk] Changeset 250 by mikael
mikael
parents: 25
diff changeset
  1494
    *encPtr = encodingTable[(int)INIT_ENC_INDEX(enc)];
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1495
    return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1496
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1497
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1498
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1499
#define NS(x) x
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1500
#define ns(x) x
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1501
#include "xmltok_ns_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1502
#undef NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1503
#undef ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1504
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1505
#ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1506
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1507
#define NS(x) x ## NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1508
#define ns(x) x ## _ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1509
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1510
#include "xmltok_ns_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1511
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1512
#undef NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1513
#undef ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1514
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1515
ENCODING *
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1516
XmlInitUnknownEncodingNS(void *mem,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1517
			 int *table,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1518
			 int (*convert)(void *userData, const char *p),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1519
			 void *userData)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1520
{
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1521
    ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1522
    if (enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1523
	((struct normal_encoding *)enc)->type[':'] = BT_COLON;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1524
    return enc;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1525
}
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1526
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
  1527
#endif /* XML_NS */