1
|
import { FSM, makeTransition } from "@webassemblyjs/helper-fsm";
|
2
|
import { codeFrameFromSource } from "@webassemblyjs/helper-code-frame";
|
3
|
|
4
|
// eslint-disable-next-line
|
5
|
function getCodeFrame(source, line, column) {
|
6
|
var loc = {
|
7
|
start: {
|
8
|
line: line,
|
9
|
column: column
|
10
|
}
|
11
|
};
|
12
|
return "\n" + codeFrameFromSource(source, loc) + "\n";
|
13
|
}
|
14
|
|
15
|
var WHITESPACE = /\s/;
|
16
|
var PARENS = /\(|\)/;
|
17
|
var LETTERS = /[a-z0-9_/]/i;
|
18
|
var idchar = /[a-z0-9!#$%&*+./:<=>?@\\[\]^_`|~-]/i;
|
19
|
var valtypes = ["i32", "i64", "f32", "f64"];
|
20
|
var NUMBERS = /[0-9|.|_]/;
|
21
|
var NUMBER_KEYWORDS = /nan|inf/;
|
22
|
|
23
|
function isNewLine(char) {
|
24
|
return char.charCodeAt(0) === 10 || char.charCodeAt(0) === 13;
|
25
|
}
|
26
|
|
27
|
function Token(type, value, start, end) {
|
28
|
var opts = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : {};
|
29
|
var token = {
|
30
|
type: type,
|
31
|
value: value,
|
32
|
loc: {
|
33
|
start: start,
|
34
|
end: end
|
35
|
}
|
36
|
};
|
37
|
|
38
|
if (Object.keys(opts).length > 0) {
|
39
|
// $FlowIgnore
|
40
|
token["opts"] = opts;
|
41
|
}
|
42
|
|
43
|
return token;
|
44
|
}
|
45
|
|
46
|
var tokenTypes = {
|
47
|
openParen: "openParen",
|
48
|
closeParen: "closeParen",
|
49
|
number: "number",
|
50
|
string: "string",
|
51
|
name: "name",
|
52
|
identifier: "identifier",
|
53
|
valtype: "valtype",
|
54
|
dot: "dot",
|
55
|
comment: "comment",
|
56
|
equal: "equal",
|
57
|
keyword: "keyword"
|
58
|
};
|
59
|
export var keywords = {
|
60
|
module: "module",
|
61
|
func: "func",
|
62
|
param: "param",
|
63
|
result: "result",
|
64
|
export: "export",
|
65
|
loop: "loop",
|
66
|
block: "block",
|
67
|
if: "if",
|
68
|
then: "then",
|
69
|
else: "else",
|
70
|
call: "call",
|
71
|
call_indirect: "call_indirect",
|
72
|
import: "import",
|
73
|
memory: "memory",
|
74
|
table: "table",
|
75
|
global: "global",
|
76
|
anyfunc: "anyfunc",
|
77
|
mut: "mut",
|
78
|
data: "data",
|
79
|
type: "type",
|
80
|
elem: "elem",
|
81
|
start: "start",
|
82
|
offset: "offset"
|
83
|
};
|
84
|
var NUMERIC_SEPARATOR = "_";
|
85
|
/**
|
86
|
* Build the FSM for number literals
|
87
|
*/
|
88
|
|
89
|
var numberLiteralFSM = new FSM({
|
90
|
START: [makeTransition(/-|\+/, "AFTER_SIGN"), makeTransition(/nan:0x/, "NAN_HEX", {
|
91
|
n: 6
|
92
|
}), makeTransition(/nan|inf/, "STOP", {
|
93
|
n: 3
|
94
|
}), makeTransition(/0x/, "HEX", {
|
95
|
n: 2
|
96
|
}), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
|
97
|
AFTER_SIGN: [makeTransition(/nan:0x/, "NAN_HEX", {
|
98
|
n: 6
|
99
|
}), makeTransition(/nan|inf/, "STOP", {
|
100
|
n: 3
|
101
|
}), makeTransition(/0x/, "HEX", {
|
102
|
n: 2
|
103
|
}), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
|
104
|
DEC_FRAC: [makeTransition(/[0-9]/, "DEC_FRAC", {
|
105
|
allowedSeparator: NUMERIC_SEPARATOR
|
106
|
}), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
|
107
|
DEC: [makeTransition(/[0-9]/, "DEC", {
|
108
|
allowedSeparator: NUMERIC_SEPARATOR
|
109
|
}), makeTransition(/\./, "DEC_FRAC"), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
|
110
|
DEC_SIGNED_EXP: [makeTransition(/\+|-/, "DEC_EXP"), makeTransition(/[0-9]/, "DEC_EXP")],
|
111
|
DEC_EXP: [makeTransition(/[0-9]/, "DEC_EXP", {
|
112
|
allowedSeparator: NUMERIC_SEPARATOR
|
113
|
})],
|
114
|
HEX: [makeTransition(/[0-9|A-F|a-f]/, "HEX", {
|
115
|
allowedSeparator: NUMERIC_SEPARATOR
|
116
|
}), makeTransition(/\./, "HEX_FRAC"), makeTransition(/p|P/, "HEX_SIGNED_EXP")],
|
117
|
HEX_FRAC: [makeTransition(/[0-9|A-F|a-f]/, "HEX_FRAC", {
|
118
|
allowedSeparator: NUMERIC_SEPARATOR
|
119
|
}), makeTransition(/p|P|/, "HEX_SIGNED_EXP")],
|
120
|
HEX_SIGNED_EXP: [makeTransition(/[0-9|+|-]/, "HEX_EXP")],
|
121
|
HEX_EXP: [makeTransition(/[0-9]/, "HEX_EXP", {
|
122
|
allowedSeparator: NUMERIC_SEPARATOR
|
123
|
})],
|
124
|
NAN_HEX: [makeTransition(/[0-9|A-F|a-f]/, "NAN_HEX", {
|
125
|
allowedSeparator: NUMERIC_SEPARATOR
|
126
|
})],
|
127
|
STOP: []
|
128
|
}, "START", "STOP");
|
129
|
export function tokenize(input) {
|
130
|
var current = 0;
|
131
|
var char = input[current]; // Used by SourceLocation
|
132
|
|
133
|
var column = 1;
|
134
|
var line = 1;
|
135
|
var tokens = [];
|
136
|
/**
|
137
|
* Creates a pushToken function for a given type
|
138
|
*/
|
139
|
|
140
|
function pushToken(type) {
|
141
|
return function (v) {
|
142
|
var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
143
|
var startColumn = opts.startColumn || column - String(v).length;
|
144
|
delete opts.startColumn;
|
145
|
var endColumn = opts.endColumn || startColumn + String(v).length - 1;
|
146
|
delete opts.endColumn;
|
147
|
var start = {
|
148
|
line: line,
|
149
|
column: startColumn
|
150
|
};
|
151
|
var end = {
|
152
|
line: line,
|
153
|
column: endColumn
|
154
|
};
|
155
|
tokens.push(Token(type, v, start, end, opts));
|
156
|
};
|
157
|
}
|
158
|
/**
|
159
|
* Functions to save newly encountered tokens
|
160
|
*/
|
161
|
|
162
|
|
163
|
var pushCloseParenToken = pushToken(tokenTypes.closeParen);
|
164
|
var pushOpenParenToken = pushToken(tokenTypes.openParen);
|
165
|
var pushNumberToken = pushToken(tokenTypes.number);
|
166
|
var pushValtypeToken = pushToken(tokenTypes.valtype);
|
167
|
var pushNameToken = pushToken(tokenTypes.name);
|
168
|
var pushIdentifierToken = pushToken(tokenTypes.identifier);
|
169
|
var pushKeywordToken = pushToken(tokenTypes.keyword);
|
170
|
var pushDotToken = pushToken(tokenTypes.dot);
|
171
|
var pushStringToken = pushToken(tokenTypes.string);
|
172
|
var pushCommentToken = pushToken(tokenTypes.comment);
|
173
|
var pushEqualToken = pushToken(tokenTypes.equal);
|
174
|
/**
|
175
|
* Can be used to look at the next character(s).
|
176
|
*
|
177
|
* The default behavior `lookahead()` simply returns the next character without consuming it.
|
178
|
* Letters are always returned in lowercase.
|
179
|
*
|
180
|
* @param {number} length How many characters to query. Default = 1
|
181
|
* @param {number} offset How many characters to skip forward from current one. Default = 1
|
182
|
*
|
183
|
*/
|
184
|
|
185
|
function lookahead() {
|
186
|
var length = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
|
187
|
var offset = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
|
188
|
return input.substring(current + offset, current + offset + length).toLowerCase();
|
189
|
}
|
190
|
/**
|
191
|
* Advances the cursor in the input by a certain amount
|
192
|
*
|
193
|
* @param {number} amount How many characters to consume. Default = 1
|
194
|
*/
|
195
|
|
196
|
|
197
|
function eatCharacter() {
|
198
|
var amount = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
|
199
|
column += amount;
|
200
|
current += amount;
|
201
|
char = input[current];
|
202
|
}
|
203
|
|
204
|
while (current < input.length) {
|
205
|
// ;;
|
206
|
if (char === ";" && lookahead() === ";") {
|
207
|
var startColumn = column;
|
208
|
eatCharacter(2);
|
209
|
var text = "";
|
210
|
|
211
|
while (!isNewLine(char)) {
|
212
|
text += char;
|
213
|
eatCharacter();
|
214
|
|
215
|
if (char === undefined) {
|
216
|
break;
|
217
|
}
|
218
|
}
|
219
|
|
220
|
var endColumn = column;
|
221
|
pushCommentToken(text, {
|
222
|
type: "leading",
|
223
|
startColumn: startColumn,
|
224
|
endColumn: endColumn
|
225
|
});
|
226
|
continue;
|
227
|
} // (;
|
228
|
|
229
|
|
230
|
if (char === "(" && lookahead() === ";") {
|
231
|
var _startColumn = column;
|
232
|
eatCharacter(2);
|
233
|
var _text = ""; // ;)
|
234
|
|
235
|
while (true) {
|
236
|
char = input[current];
|
237
|
|
238
|
if (char === ";" && lookahead() === ")") {
|
239
|
eatCharacter(2);
|
240
|
break;
|
241
|
}
|
242
|
|
243
|
_text += char;
|
244
|
eatCharacter();
|
245
|
|
246
|
if (isNewLine(char)) {
|
247
|
line++;
|
248
|
column = 0;
|
249
|
}
|
250
|
}
|
251
|
|
252
|
var _endColumn = column;
|
253
|
pushCommentToken(_text, {
|
254
|
type: "block",
|
255
|
startColumn: _startColumn,
|
256
|
endColumn: _endColumn
|
257
|
});
|
258
|
continue;
|
259
|
}
|
260
|
|
261
|
if (char === "(") {
|
262
|
pushOpenParenToken(char);
|
263
|
eatCharacter();
|
264
|
continue;
|
265
|
}
|
266
|
|
267
|
if (char === "=") {
|
268
|
pushEqualToken(char);
|
269
|
eatCharacter();
|
270
|
continue;
|
271
|
}
|
272
|
|
273
|
if (char === ")") {
|
274
|
pushCloseParenToken(char);
|
275
|
eatCharacter();
|
276
|
continue;
|
277
|
}
|
278
|
|
279
|
if (isNewLine(char)) {
|
280
|
line++;
|
281
|
eatCharacter();
|
282
|
column = 0;
|
283
|
continue;
|
284
|
}
|
285
|
|
286
|
if (WHITESPACE.test(char)) {
|
287
|
eatCharacter();
|
288
|
continue;
|
289
|
}
|
290
|
|
291
|
if (char === "$") {
|
292
|
var _startColumn2 = column;
|
293
|
eatCharacter();
|
294
|
var value = "";
|
295
|
|
296
|
while (idchar.test(char)) {
|
297
|
value += char;
|
298
|
eatCharacter();
|
299
|
}
|
300
|
|
301
|
var _endColumn2 = column;
|
302
|
pushIdentifierToken(value, {
|
303
|
startColumn: _startColumn2,
|
304
|
endColumn: _endColumn2
|
305
|
});
|
306
|
continue;
|
307
|
}
|
308
|
|
309
|
if (NUMBERS.test(char) || NUMBER_KEYWORDS.test(lookahead(3, 0)) || char === "-" || char === "+") {
|
310
|
var _startColumn3 = column;
|
311
|
|
312
|
var _value = numberLiteralFSM.run(input.slice(current));
|
313
|
|
314
|
if (_value === "") {
|
315
|
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
|
316
|
}
|
317
|
|
318
|
pushNumberToken(_value, {
|
319
|
startColumn: _startColumn3
|
320
|
});
|
321
|
eatCharacter(_value.length);
|
322
|
|
323
|
if (char && !PARENS.test(char) && !WHITESPACE.test(char)) {
|
324
|
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
|
325
|
}
|
326
|
|
327
|
continue;
|
328
|
}
|
329
|
|
330
|
if (char === '"') {
|
331
|
var _startColumn4 = column;
|
332
|
var _value2 = "";
|
333
|
eatCharacter(); // "
|
334
|
|
335
|
while (char !== '"') {
|
336
|
if (isNewLine(char)) {
|
337
|
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
|
338
|
}
|
339
|
|
340
|
_value2 += char;
|
341
|
eatCharacter(); // char
|
342
|
}
|
343
|
|
344
|
eatCharacter(); // "
|
345
|
|
346
|
var _endColumn3 = column;
|
347
|
pushStringToken(_value2, {
|
348
|
startColumn: _startColumn4,
|
349
|
endColumn: _endColumn3
|
350
|
});
|
351
|
continue;
|
352
|
}
|
353
|
|
354
|
if (LETTERS.test(char)) {
|
355
|
var _value3 = "";
|
356
|
var _startColumn5 = column;
|
357
|
|
358
|
while (char && LETTERS.test(char)) {
|
359
|
_value3 += char;
|
360
|
eatCharacter();
|
361
|
}
|
362
|
/*
|
363
|
* Handle MemberAccess
|
364
|
*/
|
365
|
|
366
|
|
367
|
if (char === ".") {
|
368
|
var dotStartColumn = column;
|
369
|
|
370
|
if (valtypes.indexOf(_value3) !== -1) {
|
371
|
pushValtypeToken(_value3, {
|
372
|
startColumn: _startColumn5
|
373
|
});
|
374
|
} else {
|
375
|
pushNameToken(_value3);
|
376
|
}
|
377
|
|
378
|
eatCharacter();
|
379
|
_value3 = "";
|
380
|
var nameStartColumn = column;
|
381
|
|
382
|
while (LETTERS.test(char)) {
|
383
|
_value3 += char;
|
384
|
eatCharacter();
|
385
|
}
|
386
|
|
387
|
pushDotToken(".", {
|
388
|
startColumn: dotStartColumn
|
389
|
});
|
390
|
pushNameToken(_value3, {
|
391
|
startColumn: nameStartColumn
|
392
|
});
|
393
|
continue;
|
394
|
}
|
395
|
/*
|
396
|
* Handle keywords
|
397
|
*/
|
398
|
// $FlowIgnore
|
399
|
|
400
|
|
401
|
if (typeof keywords[_value3] === "string") {
|
402
|
pushKeywordToken(_value3, {
|
403
|
startColumn: _startColumn5
|
404
|
});
|
405
|
continue;
|
406
|
}
|
407
|
/*
|
408
|
* Handle types
|
409
|
*/
|
410
|
|
411
|
|
412
|
if (valtypes.indexOf(_value3) !== -1) {
|
413
|
pushValtypeToken(_value3, {
|
414
|
startColumn: _startColumn5
|
415
|
});
|
416
|
continue;
|
417
|
}
|
418
|
/*
|
419
|
* Handle literals
|
420
|
*/
|
421
|
|
422
|
|
423
|
pushNameToken(_value3, {
|
424
|
startColumn: _startColumn5
|
425
|
});
|
426
|
continue;
|
427
|
}
|
428
|
|
429
|
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
|
430
|
}
|
431
|
|
432
|
return tokens;
|
433
|
}
|
434
|
export var tokens = tokenTypes;
|