Projekt

Obecné

Profil

Stáhnout (6.4 KB) Statistiky
| Větev: | Revize:
1
var util      = require('./util');
2
var types     = require('./types');
3
var sets      = require('./sets');
4
var positions = require('./positions');
5

    
6

    
7
module.exports = function(regexpStr) {
8
  var i = 0, l, c,
9
      start = { type: types.ROOT, stack: []},
10

    
11
      // Keep track of last clause/group and stack.
12
      lastGroup = start,
13
      last = start.stack,
14
      groupStack = [];
15

    
16

    
17
  var repeatErr = function(i) {
18
    util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1));
19
  };
20

    
21
  // Decode a few escaped characters.
22
  var str = util.strToChars(regexpStr);
23
  l = str.length;
24

    
25
  // Iterate through each character in string.
26
  while (i < l) {
27
    c = str[i++];
28

    
29
    switch (c) {
30
      // Handle escaped characters, inclues a few sets.
31
      case '\\':
32
        c = str[i++];
33

    
34
        switch (c) {
35
          case 'b':
36
            last.push(positions.wordBoundary());
37
            break;
38

    
39
          case 'B':
40
            last.push(positions.nonWordBoundary());
41
            break;
42

    
43
          case 'w':
44
            last.push(sets.words());
45
            break;
46

    
47
          case 'W':
48
            last.push(sets.notWords());
49
            break;
50

    
51
          case 'd':
52
            last.push(sets.ints());
53
            break;
54

    
55
          case 'D':
56
            last.push(sets.notInts());
57
            break;
58

    
59
          case 's':
60
            last.push(sets.whitespace());
61
            break;
62

    
63
          case 'S':
64
            last.push(sets.notWhitespace());
65
            break;
66

    
67
          default:
68
            // Check if c is integer.
69
            // In which case it's a reference.
70
            if (/\d/.test(c)) {
71
              last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
72

    
73
            // Escaped character.
74
            } else {
75
              last.push({ type: types.CHAR, value: c.charCodeAt(0) });
76
            }
77
        }
78

    
79
        break;
80

    
81

    
82
      // Positionals.
83
      case '^':
84
          last.push(positions.begin());
85
        break;
86

    
87
      case '$':
88
          last.push(positions.end());
89
        break;
90

    
91

    
92
      // Handle custom sets.
93
      case '[':
94
        // Check if this class is 'anti' i.e. [^abc].
95
        var not;
96
        if (str[i] === '^') {
97
          not = true;
98
          i++;
99
        } else {
100
          not = false;
101
        }
102

    
103
        // Get all the characters in class.
104
        var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
105

    
106
        // Increase index by length of class.
107
        i += classTokens[1];
108
        last.push({
109
          type: types.SET,
110
          set: classTokens[0],
111
          not: not,
112
        });
113

    
114
        break;
115

    
116

    
117
      // Class of any character except \n.
118
      case '.':
119
        last.push(sets.anyChar());
120
        break;
121

    
122

    
123
      // Push group onto stack.
124
      case '(':
125
        // Create group.
126
        var group = {
127
          type: types.GROUP,
128
          stack: [],
129
          remember: true,
130
        };
131

    
132
        c = str[i];
133

    
134
        // If if this is a special kind of group.
135
        if (c === '?') {
136
          c = str[i + 1];
137
          i += 2;
138

    
139
          // Match if followed by.
140
          if (c === '=') {
141
            group.followedBy = true;
142

    
143
          // Match if not followed by.
144
          } else if (c === '!') {
145
            group.notFollowedBy = true;
146

    
147
          } else if (c !== ':') {
148
            util.error(regexpStr,
149
              'Invalid group, character \'' + c +
150
              '\' after \'?\' at column ' + (i - 1));
151
          }
152

    
153
          group.remember = false;
154
        }
155

    
156
        // Insert subgroup into current group stack.
157
        last.push(group);
158

    
159
        // Remember the current group for when the group closes.
160
        groupStack.push(lastGroup);
161

    
162
        // Make this new group the current group.
163
        lastGroup = group;
164
        last = group.stack;
165
        break;
166

    
167

    
168
      // Pop group out of stack.
169
      case ')':
170
        if (groupStack.length === 0) {
171
          util.error(regexpStr, 'Unmatched ) at column ' + (i - 1));
172
        }
173
        lastGroup = groupStack.pop();
174

    
175
        // Check if this group has a PIPE.
176
        // To get back the correct last stack.
177
        last = lastGroup.options ?
178
          lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
179
        break;
180

    
181

    
182
      // Use pipe character to give more choices.
183
      case '|':
184
        // Create array where options are if this is the first PIPE
185
        // in this clause.
186
        if (!lastGroup.options) {
187
          lastGroup.options = [lastGroup.stack];
188
          delete lastGroup.stack;
189
        }
190

    
191
        // Create a new stack and add to options for rest of clause.
192
        var stack = [];
193
        lastGroup.options.push(stack);
194
        last = stack;
195
        break;
196

    
197

    
198
      // Repetition.
199
      // For every repetition, remove last element from last stack
200
      // then insert back a RANGE object.
201
      // This design is chosen because there could be more than
202
      // one repetition symbols in a regex i.e. `a?+{2,3}`.
203
      case '{':
204
        var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
205
        if (rs !== null) {
206
          if (last.length === 0) {
207
            repeatErr(i);
208
          }
209
          min = parseInt(rs[1], 10);
210
          max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
211
          i += rs[0].length;
212

    
213
          last.push({
214
            type: types.REPETITION,
215
            min: min,
216
            max: max,
217
            value: last.pop(),
218
          });
219
        } else {
220
          last.push({
221
            type: types.CHAR,
222
            value: 123,
223
          });
224
        }
225
        break;
226

    
227
      case '?':
228
        if (last.length === 0) {
229
          repeatErr(i);
230
        }
231
        last.push({
232
          type: types.REPETITION,
233
          min: 0,
234
          max: 1,
235
          value: last.pop(),
236
        });
237
        break;
238

    
239
      case '+':
240
        if (last.length === 0) {
241
          repeatErr(i);
242
        }
243
        last.push({
244
          type: types.REPETITION,
245
          min: 1,
246
          max: Infinity,
247
          value: last.pop(),
248
        });
249
        break;
250

    
251
      case '*':
252
        if (last.length === 0) {
253
          repeatErr(i);
254
        }
255
        last.push({
256
          type: types.REPETITION,
257
          min: 0,
258
          max: Infinity,
259
          value: last.pop(),
260
        });
261
        break;
262

    
263

    
264
      // Default is a character that is not `\[](){}?+*^$`.
265
      default:
266
        last.push({
267
          type: types.CHAR,
268
          value: c.charCodeAt(0),
269
        });
270
    }
271

    
272
  }
273

    
274
  // Check if any groups have not been closed.
275
  if (groupStack.length !== 0) {
276
    util.error(regexpStr, 'Unterminated group');
277
  }
278

    
279
  return start;
280
};
281

    
282
module.exports.types = types;
(1-1/5)