1
|
var util = require('./util');
|
2
|
var types = require('./types');
|
3
|
var sets = require('./sets');
|
4
|
var positions = require('./positions');
|
5
|
|
6
|
|
7
|
module.exports = function(regexpStr) {
|
8
|
var i = 0, l, c,
|
9
|
start = { type: types.ROOT, stack: []},
|
10
|
|
11
|
// Keep track of last clause/group and stack.
|
12
|
lastGroup = start,
|
13
|
last = start.stack,
|
14
|
groupStack = [];
|
15
|
|
16
|
|
17
|
var repeatErr = function(i) {
|
18
|
util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1));
|
19
|
};
|
20
|
|
21
|
// Decode a few escaped characters.
|
22
|
var str = util.strToChars(regexpStr);
|
23
|
l = str.length;
|
24
|
|
25
|
// Iterate through each character in string.
|
26
|
while (i < l) {
|
27
|
c = str[i++];
|
28
|
|
29
|
switch (c) {
|
30
|
// Handle escaped characters, inclues a few sets.
|
31
|
case '\\':
|
32
|
c = str[i++];
|
33
|
|
34
|
switch (c) {
|
35
|
case 'b':
|
36
|
last.push(positions.wordBoundary());
|
37
|
break;
|
38
|
|
39
|
case 'B':
|
40
|
last.push(positions.nonWordBoundary());
|
41
|
break;
|
42
|
|
43
|
case 'w':
|
44
|
last.push(sets.words());
|
45
|
break;
|
46
|
|
47
|
case 'W':
|
48
|
last.push(sets.notWords());
|
49
|
break;
|
50
|
|
51
|
case 'd':
|
52
|
last.push(sets.ints());
|
53
|
break;
|
54
|
|
55
|
case 'D':
|
56
|
last.push(sets.notInts());
|
57
|
break;
|
58
|
|
59
|
case 's':
|
60
|
last.push(sets.whitespace());
|
61
|
break;
|
62
|
|
63
|
case 'S':
|
64
|
last.push(sets.notWhitespace());
|
65
|
break;
|
66
|
|
67
|
default:
|
68
|
// Check if c is integer.
|
69
|
// In which case it's a reference.
|
70
|
if (/\d/.test(c)) {
|
71
|
last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
|
72
|
|
73
|
// Escaped character.
|
74
|
} else {
|
75
|
last.push({ type: types.CHAR, value: c.charCodeAt(0) });
|
76
|
}
|
77
|
}
|
78
|
|
79
|
break;
|
80
|
|
81
|
|
82
|
// Positionals.
|
83
|
case '^':
|
84
|
last.push(positions.begin());
|
85
|
break;
|
86
|
|
87
|
case '$':
|
88
|
last.push(positions.end());
|
89
|
break;
|
90
|
|
91
|
|
92
|
// Handle custom sets.
|
93
|
case '[':
|
94
|
// Check if this class is 'anti' i.e. [^abc].
|
95
|
var not;
|
96
|
if (str[i] === '^') {
|
97
|
not = true;
|
98
|
i++;
|
99
|
} else {
|
100
|
not = false;
|
101
|
}
|
102
|
|
103
|
// Get all the characters in class.
|
104
|
var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
|
105
|
|
106
|
// Increase index by length of class.
|
107
|
i += classTokens[1];
|
108
|
last.push({
|
109
|
type: types.SET,
|
110
|
set: classTokens[0],
|
111
|
not: not,
|
112
|
});
|
113
|
|
114
|
break;
|
115
|
|
116
|
|
117
|
// Class of any character except \n.
|
118
|
case '.':
|
119
|
last.push(sets.anyChar());
|
120
|
break;
|
121
|
|
122
|
|
123
|
// Push group onto stack.
|
124
|
case '(':
|
125
|
// Create group.
|
126
|
var group = {
|
127
|
type: types.GROUP,
|
128
|
stack: [],
|
129
|
remember: true,
|
130
|
};
|
131
|
|
132
|
c = str[i];
|
133
|
|
134
|
// If if this is a special kind of group.
|
135
|
if (c === '?') {
|
136
|
c = str[i + 1];
|
137
|
i += 2;
|
138
|
|
139
|
// Match if followed by.
|
140
|
if (c === '=') {
|
141
|
group.followedBy = true;
|
142
|
|
143
|
// Match if not followed by.
|
144
|
} else if (c === '!') {
|
145
|
group.notFollowedBy = true;
|
146
|
|
147
|
} else if (c !== ':') {
|
148
|
util.error(regexpStr,
|
149
|
'Invalid group, character \'' + c +
|
150
|
'\' after \'?\' at column ' + (i - 1));
|
151
|
}
|
152
|
|
153
|
group.remember = false;
|
154
|
}
|
155
|
|
156
|
// Insert subgroup into current group stack.
|
157
|
last.push(group);
|
158
|
|
159
|
// Remember the current group for when the group closes.
|
160
|
groupStack.push(lastGroup);
|
161
|
|
162
|
// Make this new group the current group.
|
163
|
lastGroup = group;
|
164
|
last = group.stack;
|
165
|
break;
|
166
|
|
167
|
|
168
|
// Pop group out of stack.
|
169
|
case ')':
|
170
|
if (groupStack.length === 0) {
|
171
|
util.error(regexpStr, 'Unmatched ) at column ' + (i - 1));
|
172
|
}
|
173
|
lastGroup = groupStack.pop();
|
174
|
|
175
|
// Check if this group has a PIPE.
|
176
|
// To get back the correct last stack.
|
177
|
last = lastGroup.options ?
|
178
|
lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
|
179
|
break;
|
180
|
|
181
|
|
182
|
// Use pipe character to give more choices.
|
183
|
case '|':
|
184
|
// Create array where options are if this is the first PIPE
|
185
|
// in this clause.
|
186
|
if (!lastGroup.options) {
|
187
|
lastGroup.options = [lastGroup.stack];
|
188
|
delete lastGroup.stack;
|
189
|
}
|
190
|
|
191
|
// Create a new stack and add to options for rest of clause.
|
192
|
var stack = [];
|
193
|
lastGroup.options.push(stack);
|
194
|
last = stack;
|
195
|
break;
|
196
|
|
197
|
|
198
|
// Repetition.
|
199
|
// For every repetition, remove last element from last stack
|
200
|
// then insert back a RANGE object.
|
201
|
// This design is chosen because there could be more than
|
202
|
// one repetition symbols in a regex i.e. `a?+{2,3}`.
|
203
|
case '{':
|
204
|
var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
|
205
|
if (rs !== null) {
|
206
|
if (last.length === 0) {
|
207
|
repeatErr(i);
|
208
|
}
|
209
|
min = parseInt(rs[1], 10);
|
210
|
max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
|
211
|
i += rs[0].length;
|
212
|
|
213
|
last.push({
|
214
|
type: types.REPETITION,
|
215
|
min: min,
|
216
|
max: max,
|
217
|
value: last.pop(),
|
218
|
});
|
219
|
} else {
|
220
|
last.push({
|
221
|
type: types.CHAR,
|
222
|
value: 123,
|
223
|
});
|
224
|
}
|
225
|
break;
|
226
|
|
227
|
case '?':
|
228
|
if (last.length === 0) {
|
229
|
repeatErr(i);
|
230
|
}
|
231
|
last.push({
|
232
|
type: types.REPETITION,
|
233
|
min: 0,
|
234
|
max: 1,
|
235
|
value: last.pop(),
|
236
|
});
|
237
|
break;
|
238
|
|
239
|
case '+':
|
240
|
if (last.length === 0) {
|
241
|
repeatErr(i);
|
242
|
}
|
243
|
last.push({
|
244
|
type: types.REPETITION,
|
245
|
min: 1,
|
246
|
max: Infinity,
|
247
|
value: last.pop(),
|
248
|
});
|
249
|
break;
|
250
|
|
251
|
case '*':
|
252
|
if (last.length === 0) {
|
253
|
repeatErr(i);
|
254
|
}
|
255
|
last.push({
|
256
|
type: types.REPETITION,
|
257
|
min: 0,
|
258
|
max: Infinity,
|
259
|
value: last.pop(),
|
260
|
});
|
261
|
break;
|
262
|
|
263
|
|
264
|
// Default is a character that is not `\[](){}?+*^$`.
|
265
|
default:
|
266
|
last.push({
|
267
|
type: types.CHAR,
|
268
|
value: c.charCodeAt(0),
|
269
|
});
|
270
|
}
|
271
|
|
272
|
}
|
273
|
|
274
|
// Check if any groups have not been closed.
|
275
|
if (groupStack.length !== 0) {
|
276
|
util.error(regexpStr, 'Unterminated group');
|
277
|
}
|
278
|
|
279
|
return start;
|
280
|
};
|
281
|
|
282
|
module.exports.types = types;
|