tokenizer.js 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. 'use strict';
  2. module.exports = factory;
  3. var MERGEABLE_NODES = {
  4. text: mergeText,
  5. blockquote: mergeBlockquote
  6. };
  7. /* Check whether a node is mergeable with adjacent nodes. */
  8. function mergeable(node) {
  9. var start;
  10. var end;
  11. if (node.type !== 'text' || !node.position) {
  12. return true;
  13. }
  14. start = node.position.start;
  15. end = node.position.end;
  16. /* Only merge nodes which occupy the same size as their
  17. * `value`. */
  18. return start.line !== end.line ||
  19. end.column - start.column === node.value.length;
  20. }
  21. /* Merge two text nodes: `node` into `prev`. */
  22. function mergeText(prev, node) {
  23. prev.value += node.value;
  24. return prev;
  25. }
  26. /* Merge two blockquotes: `node` into `prev`, unless in
  27. * CommonMark mode. */
  28. function mergeBlockquote(prev, node) {
  29. if (this.options.commonmark) {
  30. return node;
  31. }
  32. prev.children = prev.children.concat(node.children);
  33. return prev;
  34. }
  35. /* Construct a tokenizer. This creates both
  36. * `tokenizeInline` and `tokenizeBlock`. */
  37. function factory(type) {
  38. return tokenize;
  39. /* Tokenizer for a bound `type`. */
  40. function tokenize(value, location) {
  41. var self = this;
  42. var offset = self.offset;
  43. var tokens = [];
  44. var methods = self[type + 'Methods'];
  45. var tokenizers = self[type + 'Tokenizers'];
  46. var line = location.line;
  47. var column = location.column;
  48. var index;
  49. var length;
  50. var method;
  51. var name;
  52. var matched;
  53. var valueLength;
  54. /* Trim white space only lines. */
  55. if (!value) {
  56. return tokens;
  57. }
  58. /* Expose on `eat`. */
  59. eat.now = now;
  60. eat.file = self.file;
  61. /* Sync initial offset. */
  62. updatePosition('');
  63. /* Iterate over `value`, and iterate over all
  64. * tokenizers. When one eats something, re-iterate
  65. * with the remaining value. If no tokenizer eats,
  66. * something failed (should not happen) and an
  67. * exception is thrown. */
  68. while (value) {
  69. index = -1;
  70. length = methods.length;
  71. matched = false;
  72. while (++index < length) {
  73. name = methods[index];
  74. method = tokenizers[name];
  75. if (
  76. method &&
  77. /* istanbul ignore next */ (!method.onlyAtStart || self.atStart) &&
  78. (!method.notInList || !self.inList) &&
  79. (!method.notInBlock || !self.inBlock) &&
  80. (!method.notInLink || !self.inLink)
  81. ) {
  82. valueLength = value.length;
  83. method.apply(self, [eat, value]);
  84. matched = valueLength !== value.length;
  85. if (matched) {
  86. break;
  87. }
  88. }
  89. }
  90. /* istanbul ignore if */
  91. if (!matched) {
  92. self.file.fail(new Error('Infinite loop'), eat.now());
  93. }
  94. }
  95. self.eof = now();
  96. return tokens;
  97. /* Update line, column, and offset based on
  98. * `value`. */
  99. function updatePosition(subvalue) {
  100. var lastIndex = -1;
  101. var index = subvalue.indexOf('\n');
  102. while (index !== -1) {
  103. line++;
  104. lastIndex = index;
  105. index = subvalue.indexOf('\n', index + 1);
  106. }
  107. if (lastIndex === -1) {
  108. column += subvalue.length;
  109. } else {
  110. column = subvalue.length - lastIndex;
  111. }
  112. if (line in offset) {
  113. if (lastIndex !== -1) {
  114. column += offset[line];
  115. } else if (column <= offset[line]) {
  116. column = offset[line] + 1;
  117. }
  118. }
  119. }
  120. /* Get offset. Called before the first character is
  121. * eaten to retrieve the range's offsets. */
  122. function getOffset() {
  123. var indentation = [];
  124. var pos = line + 1;
  125. /* Done. Called when the last character is
  126. * eaten to retrieve the range’s offsets. */
  127. return function () {
  128. var last = line + 1;
  129. while (pos < last) {
  130. indentation.push((offset[pos] || 0) + 1);
  131. pos++;
  132. }
  133. return indentation;
  134. };
  135. }
  136. /* Get the current position. */
  137. function now() {
  138. var pos = {line: line, column: column};
  139. pos.offset = self.toOffset(pos);
  140. return pos;
  141. }
  142. /* Store position information for a node. */
  143. function Position(start) {
  144. this.start = start;
  145. this.end = now();
  146. }
  147. /* Throw when a value is incorrectly eaten.
  148. * This shouldn’t happen but will throw on new,
  149. * incorrect rules. */
  150. function validateEat(subvalue) {
  151. /* istanbul ignore if */
  152. if (value.substring(0, subvalue.length) !== subvalue) {
  153. /* Capture stack-trace. */
  154. self.file.fail(
  155. new Error(
  156. 'Incorrectly eaten value: please report this ' +
  157. 'warning on http://git.io/vg5Ft'
  158. ),
  159. now()
  160. );
  161. }
  162. }
  163. /* Mark position and patch `node.position`. */
  164. function position() {
  165. var before = now();
  166. return update;
  167. /* Add the position to a node. */
  168. function update(node, indent) {
  169. var prev = node.position;
  170. var start = prev ? prev.start : before;
  171. var combined = [];
  172. var n = prev && prev.end.line;
  173. var l = before.line;
  174. node.position = new Position(start);
  175. /* If there was already a `position`, this
  176. * node was merged. Fixing `start` wasn’t
  177. * hard, but the indent is different.
  178. * Especially because some information, the
  179. * indent between `n` and `l` wasn’t
  180. * tracked. Luckily, that space is
  181. * (should be?) empty, so we can safely
  182. * check for it now. */
  183. if (prev && indent && prev.indent) {
  184. combined = prev.indent;
  185. if (n < l) {
  186. while (++n < l) {
  187. combined.push((offset[n] || 0) + 1);
  188. }
  189. combined.push(before.column);
  190. }
  191. indent = combined.concat(indent);
  192. }
  193. node.position.indent = indent || [];
  194. return node;
  195. }
  196. }
  197. /* Add `node` to `parent`s children or to `tokens`.
  198. * Performs merges where possible. */
  199. function add(node, parent) {
  200. var children = parent ? parent.children : tokens;
  201. var prev = children[children.length - 1];
  202. if (
  203. prev &&
  204. node.type === prev.type &&
  205. node.type in MERGEABLE_NODES &&
  206. mergeable(prev) &&
  207. mergeable(node)
  208. ) {
  209. node = MERGEABLE_NODES[node.type].call(self, prev, node);
  210. }
  211. if (node !== prev) {
  212. children.push(node);
  213. }
  214. if (self.atStart && tokens.length !== 0) {
  215. self.exitStart();
  216. }
  217. return node;
  218. }
  219. /* Remove `subvalue` from `value`.
  220. * `subvalue` must be at the start of `value`. */
  221. function eat(subvalue) {
  222. var indent = getOffset();
  223. var pos = position();
  224. var current = now();
  225. validateEat(subvalue);
  226. apply.reset = reset;
  227. reset.test = test;
  228. apply.test = test;
  229. value = value.substring(subvalue.length);
  230. updatePosition(subvalue);
  231. indent = indent();
  232. return apply;
  233. /* Add the given arguments, add `position` to
  234. * the returned node, and return the node. */
  235. function apply(node, parent) {
  236. return pos(add(pos(node), parent), indent);
  237. }
  238. /* Functions just like apply, but resets the
  239. * content: the line and column are reversed,
  240. * and the eaten value is re-added.
  241. * This is useful for nodes with a single
  242. * type of content, such as lists and tables.
  243. * See `apply` above for what parameters are
  244. * expected. */
  245. function reset() {
  246. var node = apply.apply(null, arguments);
  247. line = current.line;
  248. column = current.column;
  249. value = subvalue + value;
  250. return node;
  251. }
  252. /* Test the position, after eating, and reverse
  253. * to a not-eaten state. */
  254. function test() {
  255. var result = pos({});
  256. line = current.line;
  257. column = current.column;
  258. value = subvalue + value;
  259. return result.position;
  260. }
  261. }
  262. }
  263. }