1 |
9bb1e829
|
cagy
|
var types = require('./types');
|
2 |
|
|
var sets = require('./sets');
|
3 |
|
|
|
4 |
|
|
|
5 |
|
|
// All of these are private and only used by randexp.
|
6 |
|
|
// It's assumed that they will always be called with the correct input.
|
7 |
|
|
|
8 |
|
|
var CTRL = '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^ ?';
|
9 |
|
|
var SLSH = { '0': 0, 't': 9, 'n': 10, 'v': 11, 'f': 12, 'r': 13 };
|
10 |
|
|
|
11 |
|
|
/**
|
12 |
|
|
* Finds character representations in str and convert all to
|
13 |
|
|
* their respective characters
|
14 |
|
|
*
|
15 |
|
|
* @param {String} str
|
16 |
|
|
* @return {String}
|
17 |
|
|
*/
|
18 |
|
|
exports.strToChars = function(str) {
|
19 |
|
|
/* jshint maxlen: false */
|
20 |
|
|
var chars_regex = /(\[\\b\])|(\\)?\\(?:u([A-F0-9]{4})|x([A-F0-9]{2})|(0?[0-7]{2})|c([@A-Z\[\\\]\^?])|([0tnvfr]))/g;
|
21 |
|
|
str = str.replace(chars_regex, function(s, b, lbs, a16, b16, c8, dctrl, eslsh) {
|
22 |
|
|
if (lbs) {
|
23 |
|
|
return s;
|
24 |
|
|
}
|
25 |
|
|
|
26 |
|
|
var code = b ? 8 :
|
27 |
|
|
a16 ? parseInt(a16, 16) :
|
28 |
|
|
b16 ? parseInt(b16, 16) :
|
29 |
|
|
c8 ? parseInt(c8, 8) :
|
30 |
|
|
dctrl ? CTRL.indexOf(dctrl) :
|
31 |
|
|
SLSH[eslsh];
|
32 |
|
|
|
33 |
|
|
var c = String.fromCharCode(code);
|
34 |
|
|
|
35 |
|
|
// Escape special regex characters.
|
36 |
|
|
if (/[\[\]{}\^$.|?*+()]/.test(c)) {
|
37 |
|
|
c = '\\' + c;
|
38 |
|
|
}
|
39 |
|
|
|
40 |
|
|
return c;
|
41 |
|
|
});
|
42 |
|
|
|
43 |
|
|
return str;
|
44 |
|
|
};
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
/**
|
48 |
|
|
* turns class into tokens
|
49 |
|
|
* reads str until it encounters a ] not preceeded by a \
|
50 |
|
|
*
|
51 |
|
|
* @param {String} str
|
52 |
|
|
* @param {String} regexpStr
|
53 |
|
|
* @return {Array.<Array.<Object>, Number>}
|
54 |
|
|
*/
|
55 |
|
|
exports.tokenizeClass = function(str, regexpStr) {
|
56 |
|
|
/* jshint maxlen: false */
|
57 |
|
|
var tokens = [];
|
58 |
|
|
var regexp = /\\(?:(w)|(d)|(s)|(W)|(D)|(S))|((?:(?:\\)(.)|([^\]\\]))-(?:\\)?([^\]]))|(\])|(?:\\)?(.)/g;
|
59 |
|
|
var rs, c;
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
while ((rs = regexp.exec(str)) != null) {
|
63 |
|
|
if (rs[1]) {
|
64 |
|
|
tokens.push(sets.words());
|
65 |
|
|
|
66 |
|
|
} else if (rs[2]) {
|
67 |
|
|
tokens.push(sets.ints());
|
68 |
|
|
|
69 |
|
|
} else if (rs[3]) {
|
70 |
|
|
tokens.push(sets.whitespace());
|
71 |
|
|
|
72 |
|
|
} else if (rs[4]) {
|
73 |
|
|
tokens.push(sets.notWords());
|
74 |
|
|
|
75 |
|
|
} else if (rs[5]) {
|
76 |
|
|
tokens.push(sets.notInts());
|
77 |
|
|
|
78 |
|
|
} else if (rs[6]) {
|
79 |
|
|
tokens.push(sets.notWhitespace());
|
80 |
|
|
|
81 |
|
|
} else if (rs[7]) {
|
82 |
|
|
tokens.push({
|
83 |
|
|
type: types.RANGE,
|
84 |
|
|
from: (rs[8] || rs[9]).charCodeAt(0),
|
85 |
|
|
to: rs[10].charCodeAt(0),
|
86 |
|
|
});
|
87 |
|
|
|
88 |
|
|
} else if (c = rs[12]) {
|
89 |
|
|
tokens.push({
|
90 |
|
|
type: types.CHAR,
|
91 |
|
|
value: c.charCodeAt(0),
|
92 |
|
|
});
|
93 |
|
|
|
94 |
|
|
} else {
|
95 |
|
|
return [tokens, regexp.lastIndex];
|
96 |
|
|
}
|
97 |
|
|
}
|
98 |
|
|
|
99 |
|
|
exports.error(regexpStr, 'Unterminated character class');
|
100 |
|
|
};
|
101 |
|
|
|
102 |
|
|
|
103 |
|
|
/**
|
104 |
|
|
* Shortcut to throw errors.
|
105 |
|
|
*
|
106 |
|
|
* @param {String} regexp
|
107 |
|
|
* @param {String} msg
|
108 |
|
|
*/
|
109 |
|
|
exports.error = function(regexp, msg) {
|
110 |
|
|
throw new SyntaxError('Invalid regular expression: /' + regexp + '/: ' + msg);
|
111 |
|
|
};
|