turndown.es.js 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895
  1. function extend (destination) {
  2. for (var i = 1; i < arguments.length; i++) {
  3. var source = arguments[i];
  4. for (var key in source) {
  5. if (source.hasOwnProperty(key)) destination[key] = source[key];
  6. }
  7. }
  8. return destination
  9. }
  10. function repeat (character, count) {
  11. return Array(count + 1).join(character)
  12. }
  13. var blockElements = [
  14. 'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
  15. 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
  16. 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
  17. 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
  18. 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
  19. 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
  20. ];
  21. function isBlock (node) {
  22. return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
  23. }
  24. var voidElements = [
  25. 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
  26. 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
  27. ];
  28. function isVoid (node) {
  29. return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
  30. }
  31. var voidSelector = voidElements.join();
  32. function hasVoid (node) {
  33. return node.querySelector && node.querySelector(voidSelector)
  34. }
  35. var rules = {};
  36. rules.paragraph = {
  37. filter: 'p',
  38. replacement: function (content) {
  39. return '\n\n' + content + '\n\n'
  40. }
  41. };
  42. rules.lineBreak = {
  43. filter: 'br',
  44. replacement: function (content, node, options) {
  45. return options.br + '\n'
  46. }
  47. };
  48. rules.heading = {
  49. filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
  50. replacement: function (content, node, options) {
  51. var hLevel = Number(node.nodeName.charAt(1));
  52. if (options.headingStyle === 'setext' && hLevel < 3) {
  53. var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
  54. return (
  55. '\n\n' + content + '\n' + underline + '\n\n'
  56. )
  57. } else {
  58. return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
  59. }
  60. }
  61. };
  62. rules.blockquote = {
  63. filter: 'blockquote',
  64. replacement: function (content) {
  65. content = content.replace(/^\n+|\n+$/g, '');
  66. content = content.replace(/^/gm, '> ');
  67. return '\n\n' + content + '\n\n'
  68. }
  69. };
  70. rules.list = {
  71. filter: ['ul', 'ol'],
  72. replacement: function (content, node) {
  73. var parent = node.parentNode;
  74. if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
  75. return '\n' + content
  76. } else {
  77. return '\n\n' + content + '\n\n'
  78. }
  79. }
  80. };
  81. rules.listItem = {
  82. filter: 'li',
  83. replacement: function (content, node, options) {
  84. content = content
  85. .replace(/^\n+/, '') // remove leading newlines
  86. .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
  87. .replace(/\n/gm, '\n '); // indent
  88. var prefix = options.bulletListMarker + ' ';
  89. var parent = node.parentNode;
  90. if (parent.nodeName === 'OL') {
  91. var start = parent.getAttribute('start');
  92. var index = Array.prototype.indexOf.call(parent.children, node);
  93. prefix = (start ? Number(start) + index : index + 1) + '. ';
  94. }
  95. return (
  96. prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
  97. )
  98. }
  99. };
  100. rules.indentedCodeBlock = {
  101. filter: function (node, options) {
  102. return (
  103. options.codeBlockStyle === 'indented' &&
  104. node.nodeName === 'PRE' &&
  105. node.firstChild &&
  106. node.firstChild.nodeName === 'CODE'
  107. )
  108. },
  109. replacement: function (content, node, options) {
  110. return (
  111. '\n\n ' +
  112. node.firstChild.textContent.replace(/\n/g, '\n ') +
  113. '\n\n'
  114. )
  115. }
  116. };
  117. rules.fencedCodeBlock = {
  118. filter: function (node, options) {
  119. return (
  120. options.codeBlockStyle === 'fenced' &&
  121. node.nodeName === 'PRE' &&
  122. node.firstChild &&
  123. node.firstChild.nodeName === 'CODE'
  124. )
  125. },
  126. replacement: function (content, node, options) {
  127. var className = node.firstChild.className || '';
  128. var language = (className.match(/language-(\S+)/) || [null, ''])[1];
  129. var code = node.firstChild.textContent;
  130. var fenceChar = options.fence.charAt(0);
  131. var fenceSize = 3;
  132. var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
  133. var match;
  134. while ((match = fenceInCodeRegex.exec(code))) {
  135. if (match[0].length >= fenceSize) {
  136. fenceSize = match[0].length + 1;
  137. }
  138. }
  139. var fence = repeat(fenceChar, fenceSize);
  140. return (
  141. '\n\n' + fence + language + '\n' +
  142. code.replace(/\n$/, '') +
  143. '\n' + fence + '\n\n'
  144. )
  145. }
  146. };
  147. rules.horizontalRule = {
  148. filter: 'hr',
  149. replacement: function (content, node, options) {
  150. return '\n\n' + options.hr + '\n\n'
  151. }
  152. };
  153. rules.inlineLink = {
  154. filter: function (node, options) {
  155. return (
  156. options.linkStyle === 'inlined' &&
  157. node.nodeName === 'A' &&
  158. node.getAttribute('href')
  159. )
  160. },
  161. replacement: function (content, node) {
  162. var href = node.getAttribute('href');
  163. var title = node.title ? ' "' + node.title + '"' : '';
  164. return '[' + content + '](' + href + title + ')'
  165. }
  166. };
  167. rules.referenceLink = {
  168. filter: function (node, options) {
  169. return (
  170. options.linkStyle === 'referenced' &&
  171. node.nodeName === 'A' &&
  172. node.getAttribute('href')
  173. )
  174. },
  175. replacement: function (content, node, options) {
  176. var href = node.getAttribute('href');
  177. var title = node.title ? ' "' + node.title + '"' : '';
  178. var replacement;
  179. var reference;
  180. switch (options.linkReferenceStyle) {
  181. case 'collapsed':
  182. replacement = '[' + content + '][]';
  183. reference = '[' + content + ']: ' + href + title;
  184. break
  185. case 'shortcut':
  186. replacement = '[' + content + ']';
  187. reference = '[' + content + ']: ' + href + title;
  188. break
  189. default:
  190. var id = this.references.length + 1;
  191. replacement = '[' + content + '][' + id + ']';
  192. reference = '[' + id + ']: ' + href + title;
  193. }
  194. this.references.push(reference);
  195. return replacement
  196. },
  197. references: [],
  198. append: function (options) {
  199. var references = '';
  200. if (this.references.length) {
  201. references = '\n\n' + this.references.join('\n') + '\n\n';
  202. this.references = []; // Reset references
  203. }
  204. return references
  205. }
  206. };
  207. rules.emphasis = {
  208. filter: ['em', 'i'],
  209. replacement: function (content, node, options) {
  210. if (!content.trim()) return ''
  211. return options.emDelimiter + content + options.emDelimiter
  212. }
  213. };
  214. rules.strong = {
  215. filter: ['strong', 'b'],
  216. replacement: function (content, node, options) {
  217. if (!content.trim()) return ''
  218. return options.strongDelimiter + content + options.strongDelimiter
  219. }
  220. };
  221. rules.code = {
  222. filter: function (node) {
  223. var hasSiblings = node.previousSibling || node.nextSibling;
  224. var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
  225. return node.nodeName === 'CODE' && !isCodeBlock
  226. },
  227. replacement: function (content) {
  228. if (!content.trim()) return ''
  229. var delimiter = '`';
  230. var leadingSpace = '';
  231. var trailingSpace = '';
  232. var matches = content.match(/`+/gm);
  233. if (matches) {
  234. if (/^`/.test(content)) leadingSpace = ' ';
  235. if (/`$/.test(content)) trailingSpace = ' ';
  236. while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
  237. }
  238. return delimiter + leadingSpace + content + trailingSpace + delimiter
  239. }
  240. };
  241. rules.image = {
  242. filter: 'img',
  243. replacement: function (content, node) {
  244. var alt = node.alt || '';
  245. var src = node.getAttribute('src') || '';
  246. var title = node.title || '';
  247. var titlePart = title ? ' "' + title + '"' : '';
  248. return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
  249. }
  250. };
  251. /**
  252. * Manages a collection of rules used to convert HTML to Markdown
  253. */
  254. function Rules (options) {
  255. this.options = options;
  256. this._keep = [];
  257. this._remove = [];
  258. this.blankRule = {
  259. replacement: options.blankReplacement
  260. };
  261. this.keepReplacement = options.keepReplacement;
  262. this.defaultRule = {
  263. replacement: options.defaultReplacement
  264. };
  265. this.array = [];
  266. for (var key in options.rules) this.array.push(options.rules[key]);
  267. }
  268. Rules.prototype = {
  269. add: function (key, rule) {
  270. this.array.unshift(rule);
  271. },
  272. keep: function (filter) {
  273. this._keep.unshift({
  274. filter: filter,
  275. replacement: this.keepReplacement
  276. });
  277. },
  278. remove: function (filter) {
  279. this._remove.unshift({
  280. filter: filter,
  281. replacement: function () {
  282. return ''
  283. }
  284. });
  285. },
  286. forNode: function (node) {
  287. if (node.isBlank) return this.blankRule
  288. var rule;
  289. if ((rule = findRule(this.array, node, this.options))) return rule
  290. if ((rule = findRule(this._keep, node, this.options))) return rule
  291. if ((rule = findRule(this._remove, node, this.options))) return rule
  292. return this.defaultRule
  293. },
  294. forEach: function (fn) {
  295. for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
  296. }
  297. };
  298. function findRule (rules, node, options) {
  299. for (var i = 0; i < rules.length; i++) {
  300. var rule = rules[i];
  301. if (filterValue(rule, node, options)) return rule
  302. }
  303. return void 0
  304. }
  305. function filterValue (rule, node, options) {
  306. var filter = rule.filter;
  307. if (typeof filter === 'string') {
  308. if (filter === node.nodeName.toLowerCase()) return true
  309. } else if (Array.isArray(filter)) {
  310. if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
  311. } else if (typeof filter === 'function') {
  312. if (filter.call(rule, node, options)) return true
  313. } else {
  314. throw new TypeError('`filter` needs to be a string, array, or function')
  315. }
  316. }
  317. /**
  318. * The collapseWhitespace function is adapted from collapse-whitespace
  319. * by Luc Thevenard.
  320. *
  321. * The MIT License (MIT)
  322. *
  323. * Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
  324. *
  325. * Permission is hereby granted, free of charge, to any person obtaining a copy
  326. * of this software and associated documentation files (the "Software"), to deal
  327. * in the Software without restriction, including without limitation the rights
  328. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  329. * copies of the Software, and to permit persons to whom the Software is
  330. * furnished to do so, subject to the following conditions:
  331. *
  332. * The above copyright notice and this permission notice shall be included in
  333. * all copies or substantial portions of the Software.
  334. *
  335. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  336. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  337. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  338. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  339. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  340. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  341. * THE SOFTWARE.
  342. */
  343. /**
  344. * collapseWhitespace(options) removes extraneous whitespace from an the given element.
  345. *
  346. * @param {Object} options
  347. */
  348. function collapseWhitespace (options) {
  349. var element = options.element;
  350. var isBlock = options.isBlock;
  351. var isVoid = options.isVoid;
  352. var isPre = options.isPre || function (node) {
  353. return node.nodeName === 'PRE'
  354. };
  355. if (!element.firstChild || isPre(element)) return
  356. var prevText = null;
  357. var prevVoid = false;
  358. var prev = null;
  359. var node = next(prev, element, isPre);
  360. while (node !== element) {
  361. if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
  362. var text = node.data.replace(/[ \r\n\t]+/g, ' ');
  363. if ((!prevText || / $/.test(prevText.data)) &&
  364. !prevVoid && text[0] === ' ') {
  365. text = text.substr(1);
  366. }
  367. // `text` might be empty at this point.
  368. if (!text) {
  369. node = remove(node);
  370. continue
  371. }
  372. node.data = text;
  373. prevText = node;
  374. } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
  375. if (isBlock(node) || node.nodeName === 'BR') {
  376. if (prevText) {
  377. prevText.data = prevText.data.replace(/ $/, '');
  378. }
  379. prevText = null;
  380. prevVoid = false;
  381. } else if (isVoid(node)) {
  382. // Avoid trimming space around non-block, non-BR void elements.
  383. prevText = null;
  384. prevVoid = true;
  385. }
  386. } else {
  387. node = remove(node);
  388. continue
  389. }
  390. var nextNode = next(prev, node, isPre);
  391. prev = node;
  392. node = nextNode;
  393. }
  394. if (prevText) {
  395. prevText.data = prevText.data.replace(/ $/, '');
  396. if (!prevText.data) {
  397. remove(prevText);
  398. }
  399. }
  400. }
  401. /**
  402. * remove(node) removes the given node from the DOM and returns the
  403. * next node in the sequence.
  404. *
  405. * @param {Node} node
  406. * @return {Node} node
  407. */
  408. function remove (node) {
  409. var next = node.nextSibling || node.parentNode;
  410. node.parentNode.removeChild(node);
  411. return next
  412. }
  413. /**
  414. * next(prev, current, isPre) returns the next node in the sequence, given the
  415. * current and previous nodes.
  416. *
  417. * @param {Node} prev
  418. * @param {Node} current
  419. * @param {Function} isPre
  420. * @return {Node}
  421. */
  422. function next (prev, current, isPre) {
  423. if ((prev && prev.parentNode === current) || isPre(current)) {
  424. return current.nextSibling || current.parentNode
  425. }
  426. return current.firstChild || current.nextSibling || current.parentNode
  427. }
  428. /*
  429. * Set up window for Node.js
  430. */
  431. var root = (typeof window !== 'undefined' ? window : {});
  432. /*
  433. * Parsing HTML strings
  434. */
  435. function canParseHTMLNatively () {
  436. var Parser = root.DOMParser;
  437. var canParse = false;
  438. // Adapted from https://gist.github.com/1129031
  439. // Firefox/Opera/IE throw errors on unsupported types
  440. try {
  441. // WebKit returns null on unsupported types
  442. if (new Parser().parseFromString('', 'text/html')) {
  443. canParse = true;
  444. }
  445. } catch (e) {}
  446. return canParse
  447. }
  448. function createHTMLParser () {
  449. var Parser = function () {};
  450. {
  451. var JSDOM = require('jsdom').JSDOM;
  452. Parser.prototype.parseFromString = function (string) {
  453. return new JSDOM(string).window.document
  454. };
  455. }
  456. return Parser
  457. }
  458. var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
  459. function RootNode (input) {
  460. var root;
  461. if (typeof input === 'string') {
  462. var doc = htmlParser().parseFromString(
  463. // DOM parsers arrange elements in the <head> and <body>.
  464. // Wrapping in a custom element ensures elements are reliably arranged in
  465. // a single element.
  466. '<x-turndown id="turndown-root">' + input + '</x-turndown>',
  467. 'text/html'
  468. );
  469. root = doc.getElementById('turndown-root');
  470. } else {
  471. root = input.cloneNode(true);
  472. }
  473. collapseWhitespace({
  474. element: root,
  475. isBlock: isBlock,
  476. isVoid: isVoid
  477. });
  478. return root
  479. }
  480. var _htmlParser;
  481. function htmlParser () {
  482. _htmlParser = _htmlParser || new HTMLParser();
  483. return _htmlParser
  484. }
  485. function Node (node) {
  486. node.isBlock = isBlock(node);
  487. node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode;
  488. node.isBlank = isBlank(node);
  489. node.flankingWhitespace = flankingWhitespace(node);
  490. return node
  491. }
  492. function isBlank (node) {
  493. return (
  494. ['A', 'TH', 'TD', 'IFRAME', 'SCRIPT', 'AUDIO', 'VIDEO'].indexOf(node.nodeName) === -1 &&
  495. /^\s*$/i.test(node.textContent) &&
  496. !isVoid(node) &&
  497. !hasVoid(node)
  498. )
  499. }
  500. function flankingWhitespace (node) {
  501. var leading = '';
  502. var trailing = '';
  503. if (!node.isBlock) {
  504. var hasLeading = /^\s/.test(node.textContent);
  505. var hasTrailing = /\s$/.test(node.textContent);
  506. var blankWithSpaces = node.isBlank && hasLeading && hasTrailing;
  507. if (hasLeading && !isFlankedByWhitespace('left', node)) {
  508. leading = ' ';
  509. }
  510. if (!blankWithSpaces && hasTrailing && !isFlankedByWhitespace('right', node)) {
  511. trailing = ' ';
  512. }
  513. }
  514. return { leading: leading, trailing: trailing }
  515. }
  516. function isFlankedByWhitespace (side, node) {
  517. var sibling;
  518. var regExp;
  519. var isFlanked;
  520. if (side === 'left') {
  521. sibling = node.previousSibling;
  522. regExp = / $/;
  523. } else {
  524. sibling = node.nextSibling;
  525. regExp = /^ /;
  526. }
  527. if (sibling) {
  528. if (sibling.nodeType === 3) {
  529. isFlanked = regExp.test(sibling.nodeValue);
  530. } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
  531. isFlanked = regExp.test(sibling.textContent);
  532. }
  533. }
  534. return isFlanked
  535. }
  536. var reduce = Array.prototype.reduce;
  537. var leadingNewLinesRegExp = /^\n*/;
  538. var trailingNewLinesRegExp = /\n*$/;
  539. var escapes = [
  540. [/\\/g, '\\\\'],
  541. [/\*/g, '\\*'],
  542. [/^-/g, '\\-'],
  543. [/^\+ /g, '\\+ '],
  544. [/^(=+)/g, '\\$1'],
  545. [/^(#{1,6}) /g, '\\$1 '],
  546. [/`/g, '\\`'],
  547. [/^~~~/g, '\\~~~'],
  548. [/\[/g, '\\['],
  549. [/\]/g, '\\]'],
  550. [/^>/g, '\\>'],
  551. [/_/g, '\\_'],
  552. [/^(\d+)\. /g, '$1\\. ']
  553. ];
  554. function TurndownService (options) {
  555. if (!(this instanceof TurndownService)) return new TurndownService(options)
  556. var defaults = {
  557. rules: rules,
  558. headingStyle: 'setext',
  559. hr: '* * *',
  560. bulletListMarker: '*',
  561. codeBlockStyle: 'indented',
  562. fence: '```',
  563. emDelimiter: '_',
  564. strongDelimiter: '**',
  565. linkStyle: 'inlined',
  566. linkReferenceStyle: 'full',
  567. br: ' ',
  568. blankReplacement: function (content, node) {
  569. return node.isBlock ? '\n\n' : ''
  570. },
  571. keepReplacement: function (content, node) {
  572. return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
  573. },
  574. defaultReplacement: function (content, node) {
  575. return node.isBlock ? '\n\n' + content + '\n\n' : content
  576. }
  577. };
  578. this.options = extend({}, defaults, options);
  579. this.rules = new Rules(this.options);
  580. }
  581. TurndownService.prototype = {
  582. /**
  583. * The entry point for converting a string or DOM node to Markdown
  584. * @public
  585. * @param {String|HTMLElement} input The string or DOM node to convert
  586. * @returns A Markdown representation of the input
  587. * @type String
  588. */
  589. turndown: function (input) {
  590. if (!canConvert(input)) {
  591. throw new TypeError(
  592. input + ' is not a string, or an element/document/fragment node.'
  593. )
  594. }
  595. if (input === '') return ''
  596. var output = process.call(this, new RootNode(input));
  597. return postProcess.call(this, output)
  598. },
  599. /**
  600. * Add one or more plugins
  601. * @public
  602. * @param {Function|Array} plugin The plugin or array of plugins to add
  603. * @returns The Turndown instance for chaining
  604. * @type Object
  605. */
  606. use: function (plugin) {
  607. if (Array.isArray(plugin)) {
  608. for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
  609. } else if (typeof plugin === 'function') {
  610. plugin(this);
  611. } else {
  612. throw new TypeError('plugin must be a Function or an Array of Functions')
  613. }
  614. return this
  615. },
  616. /**
  617. * Adds a rule
  618. * @public
  619. * @param {String} key The unique key of the rule
  620. * @param {Object} rule The rule
  621. * @returns The Turndown instance for chaining
  622. * @type Object
  623. */
  624. addRule: function (key, rule) {
  625. this.rules.add(key, rule);
  626. return this
  627. },
  628. /**
  629. * Keep a node (as HTML) that matches the filter
  630. * @public
  631. * @param {String|Array|Function} filter The unique key of the rule
  632. * @returns The Turndown instance for chaining
  633. * @type Object
  634. */
  635. keep: function (filter) {
  636. this.rules.keep(filter);
  637. return this
  638. },
  639. /**
  640. * Remove a node that matches the filter
  641. * @public
  642. * @param {String|Array|Function} filter The unique key of the rule
  643. * @returns The Turndown instance for chaining
  644. * @type Object
  645. */
  646. remove: function (filter) {
  647. this.rules.remove(filter);
  648. return this
  649. },
  650. /**
  651. * Escapes Markdown syntax
  652. * @public
  653. * @param {String} string The string to escape
  654. * @returns A string with Markdown syntax escaped
  655. * @type String
  656. */
  657. escape: function (string) {
  658. return escapes.reduce(function (accumulator, escape) {
  659. return accumulator.replace(escape[0], escape[1])
  660. }, string)
  661. }
  662. };
  663. /**
  664. * Reduces a DOM node down to its Markdown string equivalent
  665. * @private
  666. * @param {HTMLElement} parentNode The node to convert
  667. * @returns A Markdown representation of the node
  668. * @type String
  669. */
  670. function process (parentNode) {
  671. var self = this;
  672. return reduce.call(parentNode.childNodes, function (output, node) {
  673. node = new Node(node);
  674. var replacement = '';
  675. if (node.nodeType === 3) {
  676. replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
  677. } else if (node.nodeType === 1) {
  678. replacement = replacementForNode.call(self, node);
  679. }
  680. return join(output, replacement)
  681. }, '')
  682. }
  683. /**
  684. * Appends strings as each rule requires and trims the output
  685. * @private
  686. * @param {String} output The conversion output
  687. * @returns A trimmed version of the ouput
  688. * @type String
  689. */
  690. function postProcess (output) {
  691. var self = this;
  692. this.rules.forEach(function (rule) {
  693. if (typeof rule.append === 'function') {
  694. output = join(output, rule.append(self.options));
  695. }
  696. });
  697. return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
  698. }
  699. /**
  700. * Converts an element node to its Markdown equivalent
  701. * @private
  702. * @param {HTMLElement} node The node to convert
  703. * @returns A Markdown representation of the node
  704. * @type String
  705. */
  706. function replacementForNode (node) {
  707. var rule = this.rules.forNode(node);
  708. var content = process.call(this, node);
  709. var whitespace = node.flankingWhitespace;
  710. if (whitespace.leading || whitespace.trailing) content = content.trim();
  711. return (
  712. whitespace.leading +
  713. rule.replacement(content, node, this.options) +
  714. whitespace.trailing
  715. )
  716. }
  717. /**
  718. * Determines the new lines between the current output and the replacement
  719. * @private
  720. * @param {String} output The current conversion output
  721. * @param {String} replacement The string to append to the output
  722. * @returns The whitespace to separate the current output and the replacement
  723. * @type String
  724. */
  725. function separatingNewlines (output, replacement) {
  726. var newlines = [
  727. output.match(trailingNewLinesRegExp)[0],
  728. replacement.match(leadingNewLinesRegExp)[0]
  729. ].sort();
  730. var maxNewlines = newlines[newlines.length - 1];
  731. return maxNewlines.length < 2 ? maxNewlines : '\n\n'
  732. }
  733. function join (string1, string2) {
  734. var separator = separatingNewlines(string1, string2);
  735. // Remove trailing/leading newlines and replace with separator
  736. string1 = string1.replace(trailingNewLinesRegExp, '');
  737. string2 = string2.replace(leadingNewLinesRegExp, '');
  738. return string1 + separator + string2
  739. }
  740. /**
  741. * Determines whether an input can be converted
  742. * @private
  743. * @param {String|HTMLElement} input Describe this parameter
  744. * @returns Describe what it returns
  745. * @type String|Object|Array|Boolean|Number
  746. */
  747. function canConvert (input) {
  748. return (
  749. input != null && (
  750. typeof input === 'string' ||
  751. (input.nodeType && (
  752. input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
  753. ))
  754. )
  755. )
  756. }
  757. export default TurndownService;