tools/erlparse.lua
author Matthew Wild <mwild1@gmail.com>
Sun, 17 Mar 2024 10:10:24 +0000
changeset 13464 a688947fab1e
parent 7822 ad709ee7d3d8
permissions -rw-r--r--
mod_bosh: Set base_type on session This fixes a traceback with mod_saslauth. Ideally we move this to util.session at some point, though.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1523
841d61be198f Remove version number from copyright headers
Matthew Wild <mwild1@gmail.com>
parents: 896
diff changeset
     1
-- Prosody IM
2923
b7049746bd29 Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents: 1783
diff changeset
     2
-- Copyright (C) 2008-2010 Matthew Wild
b7049746bd29 Update copyright headers for 2010
Matthew Wild <mwild1@gmail.com>
parents: 1783
diff changeset
     3
-- Copyright (C) 2008-2010 Waqas Hussain
5776
bd0ff8ae98a8 Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents: 5080
diff changeset
     4
--
758
b1885732e979 GPL->MIT!
Matthew Wild <mwild1@gmail.com>
parents: 615
diff changeset
     5
-- This project is MIT/X11 licensed. Please see the
b1885732e979 GPL->MIT!
Matthew Wild <mwild1@gmail.com>
parents: 615
diff changeset
     6
-- COPYING file in the source package for more information.
519
cccd610a0ef9 Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents: 489
diff changeset
     7
--
cccd610a0ef9 Insert copyright/license headers
Matthew Wild <mwild1@gmail.com>
parents: 489
diff changeset
     8
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
     9
local string_byte, string_char = string.byte, string.char;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    10
local t_concat, t_insert = table.concat, table.insert;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    11
local type, tonumber, tostring = type, tonumber, tostring;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    12
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    13
local file = nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    14
local last = nil;
2946
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
    15
local line = 1;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    16
local function read(expected)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    17
	local ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    18
	if last then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    19
		ch = last; last = nil;
2946
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
    20
	else
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
    21
		ch = file:read(1);
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
    22
		if ch == "\n" then line = line + 1; end
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
    23
	end
ad306c5ae689 tools/erlparse: Report the line number when showing a syntax error in the input file
Matthew Wild <mwild1@gmail.com>
parents: 2945
diff changeset
    24
	if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil").." on line "..line); end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    25
	return ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    26
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    27
local function pushback(ch)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    28
	if last then error(); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    29
	last = ch;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    30
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    31
local function peek()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    32
	if not last then last = read(); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    33
	return last;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    34
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    35
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    36
local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    37
local function isLowerAlpha(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    38
	ch = string_byte(ch) or 0;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    39
	return (ch >= _a and ch <= _z);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    40
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    41
local function isNumeric(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    42
	ch = string_byte(ch) or 0;
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    43
	return (ch >= _0 and ch <= _9) or ch == _minus;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    44
end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    45
local function isAtom(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    46
	ch = string_byte(ch) or 0;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    47
	return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at;
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    48
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    49
local function isSpace(ch)
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    50
	ch = string_byte(ch) or "x";
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    51
	return ch <= _space;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    52
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    53
5079
2ab99e239d45 tools/erlparse: Fix erlang string escape sequences.
Waqas Hussain <waqas20@gmail.com>
parents: 2947
diff changeset
    54
local escapes = {["\\b"]="\b", ["\\d"]="\127", ["\\e"]="\27", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]=" ", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"};
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    55
local function readString()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    56
	read("\""); -- skip quote
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    57
	local slash = nil;
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    58
	local str = {};
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    59
	while true do
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    60
		local ch = read();
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    61
		if slash then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    62
			slash = slash..ch;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    63
			if not escapes[slash] then error("Unknown escape sequence: "..slash); end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    64
			str[#str+1] = escapes[slash];
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    65
			slash = nil;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    66
		elseif ch == "\"" then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    67
			break;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    68
		elseif ch == "\\" then
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    69
			slash = ch;
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    70
		else
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    71
			str[#str+1] = ch;
1783
f79972ad8965 ejabberd2prosody: Fixed escape code processing when parsing strings.
Waqas Hussain <waqas20@gmail.com>
parents: 1572
diff changeset
    72
		end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    73
	end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    74
	return t_concat(str);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    75
end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    76
local function readAtom1()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    77
	local var = { read() };
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    78
	while isAtom(peek()) do
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    79
		var[#var+1] = read();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    80
	end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    81
	return t_concat(var);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    82
end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    83
local function readAtom2()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    84
	local str = { read("'") };
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    85
	local slash = nil;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    86
	while true do
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    87
		local ch = read();
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    88
		str[#str+1] = ch;
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    89
		if ch == "'" and not slash then break; end
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    90
	end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    91
	return t_concat(str);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
    92
end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    93
local function readNumber()
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    94
	local num = { read() };
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    95
	while isNumeric(peek()) do
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
    96
		num[#num+1] = read();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
    97
	end
5080
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
    98
	if peek() == "." then
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
    99
		num[#num+1] = read();
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
   100
		while isNumeric(peek()) do
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
   101
			num[#num+1] = read();
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
   102
		end
52767b1f057b tools/erlparse: Handle decimal point in numbers.
Waqas Hussain <waqas20@gmail.com>
parents: 5079
diff changeset
   103
	end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   104
	return tonumber(t_concat(num));
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   105
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   106
local readItem = nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   107
local function readTuple()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   108
	local t = {};
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   109
	local s = {}; -- string representation
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   110
	read(); -- read {, or [, or <
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   111
	while true do
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   112
		local item = readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   113
		if not item then break; end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   114
		if type(item) ~= "number" or item > 255 then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   115
			s = nil;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   116
		elseif s then
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   117
			s[#s+1] = string_char(item);
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   118
		end
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   119
		t_insert(t, item);
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   120
	end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   121
	read(); -- read }, or ], or >
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   122
	if s and #s > 0  then
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   123
		return t_concat(s)
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   124
	else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   125
		return t
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   126
	end;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   127
end
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   128
local function readBinary()
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   129
	read("<"); -- read <
2947
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
   130
	-- Discard PIDs
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
   131
	if isNumeric(peek()) then
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
   132
		while peek() ~= ">" do read(); end
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
   133
		read(">");
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
   134
		return {};
ff7f6668b34f tools/erlparse: Discard erlang PIDs instead of choking on them as a syntax error
Matthew Wild <mwild1@gmail.com>
parents: 2946
diff changeset
   135
	end
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   136
	local t = readTuple();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   137
	read(">") -- read >
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   138
	local ch = peek();
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   139
	if type(t) == "string" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   140
		-- binary is a list of integers
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   141
		return t;
2945
475dee08b400 tools/erlparse: Optimisations aplenty for faster processing of large files
Matthew Wild <mwild1@gmail.com>
parents: 2923
diff changeset
   142
	elseif type(t) == "table" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   143
		if t[1] then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   144
			-- binary contains string
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   145
			return t[1];
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   146
		else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   147
			-- binary is empty
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   148
			return "";
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   149
		end;
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   150
	else
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   151
		error();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   152
	end
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   153
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   154
readItem = function()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   155
	local ch = peek();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   156
	if ch == nil then return nil end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   157
	if ch == "{" or ch == "[" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   158
		return readTuple();
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   159
	elseif isLowerAlpha(ch) then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   160
		return readAtom1();
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   161
	elseif ch == "'" then
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   162
		return readAtom2();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   163
	elseif isNumeric(ch) then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   164
		return readNumber();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   165
	elseif ch == "\"" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   166
		return readString();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   167
	elseif ch == "<" then
1572
1b87dfb76caa ejabberd2prosody, erlparse: Add support for parsing non-ASCII strings and binaries, and atoms enclosed in single quotes
Sergei Golovan
parents: 1567
diff changeset
   168
		return readBinary();
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   169
	elseif isSpace(ch) or ch == "," or ch == "|" then
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   170
		read();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   171
		return readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   172
	else
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   173
		--print("Unknown char: "..ch);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   174
		return nil;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   175
	end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   176
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   177
local function readChunk()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   178
	local x = readItem();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   179
	if x then read("."); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   180
	return x;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   181
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   182
local function readFile(filename)
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   183
	file = io.open(filename);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   184
	if not file then error("File not found: "..filename); os.exit(0); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   185
	return function()
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   186
		local x = readChunk();
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   187
		if not x and peek() then error("Invalid char: "..peek()); end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   188
		return x;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   189
	end;
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   190
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   191
7822
ad709ee7d3d8 tools/erlparse: Remove use of deprecated module() function
Kim Alvefur <zash@zash.se>
parents: 5776
diff changeset
   192
local _M = {};
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   193
7822
ad709ee7d3d8 tools/erlparse: Remove use of deprecated module() function
Kim Alvefur <zash@zash.se>
parents: 5776
diff changeset
   194
function _M.parseFile(file)
1567
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   195
	return readFile(file);
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   196
end
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   197
e052a3bdb8b5 ejabberd2prosody, erlparse.lua: Convert from Windows line endings (thanks teo)
Matthew Wild <mwild1@gmail.com>
parents: 1523
diff changeset
   198
return _M;