regjsgen.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /*!
  2. * regjsgen 0.5.2
  3. * Copyright 2014-2020 Benjamin Tan <https://ofcr.se/>
  4. * Available under the MIT license <https://github.com/bnjmnt4n/regjsgen/blob/master/LICENSE-MIT.txt>
  5. */
  6. ;(function() {
  7. 'use strict';
  8. // Used to determine if values are of the language type `Object`.
  9. var objectTypes = {
  10. 'function': true,
  11. 'object': true
  12. };
  13. // Used as a reference to the global object.
  14. var root = (objectTypes[typeof window] && window) || this;
  15. // Detect free variable `exports`.
  16. var freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports;
  17. // Detect free variable `module`.
  18. var hasFreeModule = objectTypes[typeof module] && module && !module.nodeType;
  19. // Detect free variable `global` from Node.js or Browserified code and use it as `root`.
  20. var freeGlobal = freeExports && hasFreeModule && typeof global == 'object' && global;
  21. if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
  22. root = freeGlobal;
  23. }
  24. // Used to check objects for own properties.
  25. var hasOwnProperty = Object.prototype.hasOwnProperty;
  26. /*--------------------------------------------------------------------------*/
  27. // Generates a string based on the given code point.
  28. // Based on https://mths.be/fromcodepoint by @mathias.
  29. function fromCodePoint() {
  30. var codePoint = Number(arguments[0]);
  31. if (
  32. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  33. codePoint < 0 || // not a valid Unicode code point
  34. codePoint > 0x10FFFF || // not a valid Unicode code point
  35. Math.floor(codePoint) != codePoint // not an integer
  36. ) {
  37. throw RangeError('Invalid code point: ' + codePoint);
  38. }
  39. if (codePoint <= 0xFFFF) {
  40. // BMP code point
  41. return String.fromCharCode(codePoint);
  42. } else {
  43. // Astral code point; split in surrogate halves
  44. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  45. codePoint -= 0x10000;
  46. var highSurrogate = (codePoint >> 10) + 0xD800;
  47. var lowSurrogate = (codePoint % 0x400) + 0xDC00;
  48. return String.fromCharCode(highSurrogate, lowSurrogate);
  49. }
  50. }
  51. /*--------------------------------------------------------------------------*/
  52. // Ensures that nodes have the correct types.
  53. var assertTypeRegexMap = {};
  54. function assertType(type, expected) {
  55. if (expected.indexOf('|') == -1) {
  56. if (type == expected) {
  57. return;
  58. }
  59. throw Error('Invalid node type: ' + type + '; expected type: ' + expected);
  60. }
  61. expected = hasOwnProperty.call(assertTypeRegexMap, expected)
  62. ? assertTypeRegexMap[expected]
  63. : (assertTypeRegexMap[expected] = RegExp('^(?:' + expected + ')$'));
  64. if (expected.test(type)) {
  65. return;
  66. }
  67. throw Error('Invalid node type: ' + type + '; expected types: ' + expected);
  68. }
  69. /*--------------------------------------------------------------------------*/
  70. // Generates a regular expression string based on an AST.
  71. function generate(node) {
  72. var type = node.type;
  73. if (hasOwnProperty.call(generators, type)) {
  74. return generators[type](node);
  75. }
  76. throw Error('Invalid node type: ' + type);
  77. }
  78. // Constructs a string by concatentating the output of each term.
  79. function generateSequence(generator, terms, /* optional */ separator) {
  80. var i = -1,
  81. length = terms.length,
  82. result = '',
  83. term;
  84. while (++i < length) {
  85. term = terms[i];
  86. if (separator && i > 0) result += separator;
  87. // Ensure that `\0` null escapes followed by number symbols are not
  88. // treated as backreferences.
  89. if (
  90. i + 1 < length &&
  91. terms[i].type == 'value' &&
  92. terms[i].kind == 'null' &&
  93. terms[i + 1].type == 'value' &&
  94. terms[i + 1].kind == 'symbol' &&
  95. terms[i + 1].codePoint >= 48 &&
  96. terms[i + 1].codePoint <= 57
  97. ) {
  98. result += '\\000';
  99. continue;
  100. }
  101. result += generator(term);
  102. }
  103. return result;
  104. }
  105. /*--------------------------------------------------------------------------*/
  106. function generateAlternative(node) {
  107. assertType(node.type, 'alternative');
  108. return generateSequence(generateTerm, node.body);
  109. }
  110. function generateAnchor(node) {
  111. assertType(node.type, 'anchor');
  112. switch (node.kind) {
  113. case 'start':
  114. return '^';
  115. case 'end':
  116. return '$';
  117. case 'boundary':
  118. return '\\b';
  119. case 'not-boundary':
  120. return '\\B';
  121. default:
  122. throw Error('Invalid assertion');
  123. }
  124. }
  125. var atomType = 'anchor|characterClass|characterClassEscape|dot|group|reference|unicodePropertyEscape|value';
  126. function generateAtom(node) {
  127. assertType(node.type, atomType);
  128. return generate(node);
  129. }
  130. function generateCharacterClass(node) {
  131. assertType(node.type, 'characterClass');
  132. var kind = node.kind;
  133. var separator = kind === 'intersection' ? '&&' : kind === 'subtraction' ? '--' : '';
  134. return '[' +
  135. (node.negative ? '^' : '') +
  136. generateSequence(generateClassAtom, node.body, separator) +
  137. ']';
  138. }
  139. function generateCharacterClassEscape(node) {
  140. assertType(node.type, 'characterClassEscape');
  141. return '\\' + node.value;
  142. }
  143. function generateCharacterClassRange(node) {
  144. assertType(node.type, 'characterClassRange');
  145. var min = node.min,
  146. max = node.max;
  147. if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
  148. throw Error('Invalid character class range');
  149. }
  150. return generateClassAtom(min) + '-' + generateClassAtom(max);
  151. }
  152. function generateClassAtom(node) {
  153. assertType(node.type, 'anchor|characterClass|characterClassEscape|characterClassRange|dot|value|unicodePropertyEscape|classStrings');
  154. return generate(node);
  155. }
  156. function generateClassStrings(node) {
  157. assertType(node.type, 'classStrings');
  158. return '\\q{' + generateSequence(generateClassString, node.strings, '|') + '}';
  159. }
  160. function generateClassString(node) {
  161. assertType(node.type, 'classString');
  162. return generateSequence(generate, node.characters);
  163. }
  164. function generateDisjunction(node) {
  165. assertType(node.type, 'disjunction');
  166. return generateSequence(generate, node.body, '|');
  167. }
  168. function generateDot(node) {
  169. assertType(node.type, 'dot');
  170. return '.';
  171. }
  172. function generateGroup(node) {
  173. assertType(node.type, 'group');
  174. var result = '';
  175. switch (node.behavior) {
  176. case 'normal':
  177. if (node.name) {
  178. result += '?<' + generateIdentifier(node.name) + '>';
  179. }
  180. break;
  181. case 'ignore':
  182. result += '?:';
  183. break;
  184. case 'lookahead':
  185. result += '?=';
  186. break;
  187. case 'negativeLookahead':
  188. result += '?!';
  189. break;
  190. case 'lookbehind':
  191. result += '?<=';
  192. break;
  193. case 'negativeLookbehind':
  194. result += '?<!';
  195. break;
  196. default:
  197. throw Error('Invalid behaviour: ' + node.behaviour);
  198. }
  199. result += generateSequence(generate, node.body);
  200. return '(' + result + ')';
  201. }
  202. function generateIdentifier(node) {
  203. assertType(node.type, 'identifier');
  204. return node.value;
  205. }
  206. function generateQuantifier(node) {
  207. assertType(node.type, 'quantifier');
  208. var quantifier = '',
  209. min = node.min,
  210. max = node.max;
  211. if (max == null) {
  212. if (min == 0) {
  213. quantifier = '*';
  214. } else if (min == 1) {
  215. quantifier = '+';
  216. } else {
  217. quantifier = '{' + min + ',}';
  218. }
  219. } else if (min == max) {
  220. quantifier = '{' + min + '}';
  221. } else if (min == 0 && max == 1) {
  222. quantifier = '?';
  223. } else {
  224. quantifier = '{' + min + ',' + max + '}';
  225. }
  226. if (!node.greedy) {
  227. quantifier += '?';
  228. }
  229. return generateAtom(node.body[0]) + quantifier;
  230. }
  231. function generateReference(node) {
  232. assertType(node.type, 'reference');
  233. if (node.matchIndex) {
  234. return '\\' + node.matchIndex;
  235. }
  236. if (node.name) {
  237. return '\\k<' + generateIdentifier(node.name) + '>';
  238. }
  239. throw new Error('Unknown reference type');
  240. }
  241. function generateTerm(node) {
  242. assertType(node.type, atomType + '|empty|quantifier');
  243. return generate(node);
  244. }
  245. function generateUnicodePropertyEscape(node) {
  246. assertType(node.type, 'unicodePropertyEscape');
  247. return '\\' + (node.negative ? 'P' : 'p') + '{' + node.value + '}';
  248. }
  249. function generateValue(node) {
  250. assertType(node.type, 'value');
  251. var kind = node.kind,
  252. codePoint = node.codePoint;
  253. if (typeof codePoint != 'number') {
  254. throw new Error('Invalid code point: ' + codePoint);
  255. }
  256. switch (kind) {
  257. case 'controlLetter':
  258. return '\\c' + fromCodePoint(codePoint + 64);
  259. case 'hexadecimalEscape':
  260. return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
  261. case 'identifier':
  262. return '\\' + fromCodePoint(codePoint);
  263. case 'null':
  264. return '\\' + codePoint;
  265. case 'octal':
  266. return '\\' + ('000' + codePoint.toString(8)).slice(-3);
  267. case 'singleEscape':
  268. switch (codePoint) {
  269. case 0x0008:
  270. return '\\b';
  271. case 0x0009:
  272. return '\\t';
  273. case 0x000A:
  274. return '\\n';
  275. case 0x000B:
  276. return '\\v';
  277. case 0x000C:
  278. return '\\f';
  279. case 0x000D:
  280. return '\\r';
  281. case 0x002D:
  282. return '\\-';
  283. default:
  284. throw Error('Invalid code point: ' + codePoint);
  285. }
  286. case 'symbol':
  287. return fromCodePoint(codePoint);
  288. case 'unicodeEscape':
  289. return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
  290. case 'unicodeCodePointEscape':
  291. return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
  292. default:
  293. throw Error('Unsupported node kind: ' + kind);
  294. }
  295. }
  296. /*--------------------------------------------------------------------------*/
  297. // Used to generate strings for each node type.
  298. var generators = {
  299. 'alternative': generateAlternative,
  300. 'anchor': generateAnchor,
  301. 'characterClass': generateCharacterClass,
  302. 'characterClassEscape': generateCharacterClassEscape,
  303. 'characterClassRange': generateCharacterClassRange,
  304. 'classStrings': generateClassStrings,
  305. 'disjunction': generateDisjunction,
  306. 'dot': generateDot,
  307. 'group': generateGroup,
  308. 'quantifier': generateQuantifier,
  309. 'reference': generateReference,
  310. 'unicodePropertyEscape': generateUnicodePropertyEscape,
  311. 'value': generateValue
  312. };
  313. /*--------------------------------------------------------------------------*/
  314. // Export regjsgen.
  315. var regjsgen = {
  316. 'generate': generate
  317. };
  318. // Some AMD build optimizers, like r.js, check for condition patterns like the following:
  319. if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
  320. // Define as an anonymous module so it can be aliased through path mapping.
  321. define(function() {
  322. return regjsgen;
  323. });
  324. root.regjsgen = regjsgen;
  325. }
  326. // Check for `exports` after `define` in case a build optimizer adds an `exports` object.
  327. else if (freeExports && hasFreeModule) {
  328. // Export for CommonJS support.
  329. freeExports.generate = generate;
  330. }
  331. else {
  332. // Export to the global object.
  333. root.regjsgen = regjsgen;
  334. }
  335. }.call(this));