escape.js 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. 'use strict';
  2. var decimal = require('is-decimal');
  3. var alphanumeric = require('is-alphanumeric');
  4. var whitespace = require('is-whitespace-character');
  5. var escapes = require('markdown-escapes');
  6. var prefix = require('./util/entity-prefix-length');
  7. module.exports = factory;
  8. var BACKSLASH = '\\';
  9. var BULLETS = ['*', '-', '+'];
  10. var ALLIGNMENT = [':', '-', ' ', '|'];
  11. var entities = {'<': '&lt;', ':': '&#x3A;', '&': '&amp;', '|': '&#x7C;', '~': '&#x7E;'};
  12. /* Factory to escape characters. */
  13. function factory(options) {
  14. return escape;
  15. /* Escape punctuation characters in a node's value. */
  16. function escape(value, node, parent) {
  17. var self = this;
  18. var gfm = options.gfm;
  19. var commonmark = options.commonmark;
  20. var pedantic = options.pedantic;
  21. var markers = commonmark ? ['.', ')'] : ['.'];
  22. var siblings = parent && parent.children;
  23. var index = siblings && siblings.indexOf(node);
  24. var prev = siblings && siblings[index - 1];
  25. var next = siblings && siblings[index + 1];
  26. var length = value.length;
  27. var escapable = escapes(options);
  28. var position = -1;
  29. var queue = [];
  30. var escaped = queue;
  31. var afterNewLine;
  32. var character;
  33. var wordCharBefore;
  34. var wordCharAfter;
  35. var offset;
  36. var replace;
  37. if (prev) {
  38. afterNewLine = text(prev) && /\n\s*$/.test(prev.value);
  39. } else {
  40. afterNewLine = !parent || parent.type === 'root' || parent.type === 'paragraph';
  41. }
  42. function one(character) {
  43. return escapable.indexOf(character) === -1 ?
  44. entities[character] : BACKSLASH + character;
  45. }
  46. while (++position < length) {
  47. character = value.charAt(position);
  48. replace = false;
  49. if (character === '\n') {
  50. afterNewLine = true;
  51. } else if (
  52. character === BACKSLASH ||
  53. character === '`' ||
  54. character === '*' ||
  55. character === '[' ||
  56. character === '<' ||
  57. (character === '&' && prefix(value.slice(position)) > 0) ||
  58. (character === ']' && self.inLink) ||
  59. (gfm && character === '~' && value.charAt(position + 1) === '~') ||
  60. (gfm && character === '|' && (self.inTable || alignment(value, position))) ||
  61. (
  62. character === '_' &&
  63. /* Delegate leading/trailing underscores
  64. * to the multinode version below. */
  65. position > 0 &&
  66. position < length - 1 &&
  67. (
  68. pedantic ||
  69. !alphanumeric(value.charAt(position - 1)) ||
  70. !alphanumeric(value.charAt(position + 1))
  71. )
  72. ) ||
  73. (gfm && !self.inLink && character === ':' && protocol(queue.join('')))
  74. ) {
  75. replace = true;
  76. } else if (afterNewLine) {
  77. if (
  78. character === '>' ||
  79. character === '#' ||
  80. BULLETS.indexOf(character) !== -1
  81. ) {
  82. replace = true;
  83. } else if (decimal(character)) {
  84. offset = position + 1;
  85. while (offset < length) {
  86. if (!decimal(value.charAt(offset))) {
  87. break;
  88. }
  89. offset++;
  90. }
  91. if (markers.indexOf(value.charAt(offset)) !== -1) {
  92. next = value.charAt(offset + 1);
  93. if (!next || next === ' ' || next === '\t' || next === '\n') {
  94. queue.push(value.slice(position, offset));
  95. position = offset;
  96. character = value.charAt(position);
  97. replace = true;
  98. }
  99. }
  100. }
  101. }
  102. if (afterNewLine && !whitespace(character)) {
  103. afterNewLine = false;
  104. }
  105. queue.push(replace ? one(character) : character);
  106. }
  107. /* Multi-node versions. */
  108. if (siblings && text(node)) {
  109. /* Check for an opening parentheses after a
  110. * link-reference (which can be joined by
  111. * white-space). */
  112. if (prev && prev.referenceType === 'shortcut') {
  113. position = -1;
  114. length = escaped.length;
  115. while (++position < length) {
  116. character = escaped[position];
  117. if (character === ' ' || character === '\t') {
  118. continue;
  119. }
  120. if (character === '(' || character === ':') {
  121. escaped[position] = one(character);
  122. }
  123. break;
  124. }
  125. /* If the current node is all spaces / tabs,
  126. * preceded by a shortcut, and followed by
  127. * a text starting with `(`, escape it. */
  128. if (
  129. text(next) &&
  130. position === length &&
  131. next.value.charAt(0) === '('
  132. ) {
  133. escaped.push(BACKSLASH);
  134. }
  135. }
  136. /* Ensure non-auto-links are not seen as links.
  137. * This pattern needs to check the preceding
  138. * nodes too. */
  139. if (
  140. gfm &&
  141. !self.inLink &&
  142. text(prev) &&
  143. value.charAt(0) === ':' &&
  144. protocol(prev.value.slice(-6))
  145. ) {
  146. escaped[0] = one(':');
  147. }
  148. /* Escape ampersand if it would otherwise
  149. * start an entity. */
  150. if (
  151. text(next) &&
  152. value.charAt(length - 1) === '&' &&
  153. prefix('&' + next.value) !== 0
  154. ) {
  155. escaped[escaped.length - 1] = one('&');
  156. }
  157. /* Escape double tildes in GFM. */
  158. if (
  159. gfm &&
  160. text(next) &&
  161. value.charAt(length - 1) === '~' &&
  162. next.value.charAt(0) === '~'
  163. ) {
  164. escaped.splice(escaped.length - 1, 0, BACKSLASH);
  165. }
  166. /* Escape underscores, but not mid-word (unless
  167. * in pedantic mode). */
  168. wordCharBefore = text(prev) && alphanumeric(prev.value.slice(-1));
  169. wordCharAfter = text(next) && alphanumeric(next.value.charAt(0));
  170. if (length === 1) {
  171. if (value === '_' && (pedantic || !wordCharBefore || !wordCharAfter)) {
  172. escaped.unshift(BACKSLASH);
  173. }
  174. } else {
  175. if (
  176. value.charAt(0) === '_' &&
  177. (pedantic || !wordCharBefore || !alphanumeric(value.charAt(1)))
  178. ) {
  179. escaped.unshift(BACKSLASH);
  180. }
  181. if (
  182. value.charAt(length - 1) === '_' &&
  183. (pedantic || !wordCharAfter || !alphanumeric(value.charAt(length - 2)))
  184. ) {
  185. escaped.splice(escaped.length - 1, 0, BACKSLASH);
  186. }
  187. }
  188. }
  189. return escaped.join('');
  190. }
  191. }
  192. /* Check if `index` in `value` is inside an alignment row. */
  193. function alignment(value, index) {
  194. var start = value.lastIndexOf('\n', index);
  195. var end = value.indexOf('\n', index);
  196. start = start === -1 ? -1 : start;
  197. end = end === -1 ? value.length : end;
  198. while (++start < end) {
  199. if (ALLIGNMENT.indexOf(value.charAt(start)) === -1) {
  200. return false;
  201. }
  202. }
  203. return true;
  204. }
  205. /* Check if `node` is a text node. */
  206. function text(node) {
  207. return node && node.type === 'text';
  208. }
  209. /* Check if `value` ends in a protocol. */
  210. function protocol(value) {
  211. var val = value.slice(-6).toLowerCase();
  212. return val === 'mailto' || val.slice(-5) === 'https' || val.slice(-4) === 'http';
  213. }