turndown.cjs.js 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897
  1. 'use strict';
  2. function extend (destination) {
  3. for (var i = 1; i < arguments.length; i++) {
  4. var source = arguments[i];
  5. for (var key in source) {
  6. if (source.hasOwnProperty(key)) destination[key] = source[key];
  7. }
  8. }
  9. return destination
  10. }
  11. function repeat (character, count) {
  12. return Array(count + 1).join(character)
  13. }
  14. var blockElements = [
  15. 'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
  16. 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
  17. 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
  18. 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
  19. 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
  20. 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
  21. ];
  22. function isBlock (node) {
  23. return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
  24. }
  25. var voidElements = [
  26. 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
  27. 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
  28. ];
  29. function isVoid (node) {
  30. return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
  31. }
  32. var voidSelector = voidElements.join();
  33. function hasVoid (node) {
  34. return node.querySelector && node.querySelector(voidSelector)
  35. }
  36. var rules = {};
  37. rules.paragraph = {
  38. filter: 'p',
  39. replacement: function (content) {
  40. return '\n\n' + content + '\n\n'
  41. }
  42. };
  43. rules.lineBreak = {
  44. filter: 'br',
  45. replacement: function (content, node, options) {
  46. return options.br + '\n'
  47. }
  48. };
  49. rules.heading = {
  50. filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
  51. replacement: function (content, node, options) {
  52. var hLevel = Number(node.nodeName.charAt(1));
  53. if (options.headingStyle === 'setext' && hLevel < 3) {
  54. var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
  55. return (
  56. '\n\n' + content + '\n' + underline + '\n\n'
  57. )
  58. } else {
  59. return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
  60. }
  61. }
  62. };
  63. rules.blockquote = {
  64. filter: 'blockquote',
  65. replacement: function (content) {
  66. content = content.replace(/^\n+|\n+$/g, '');
  67. content = content.replace(/^/gm, '> ');
  68. return '\n\n' + content + '\n\n'
  69. }
  70. };
  71. rules.list = {
  72. filter: ['ul', 'ol'],
  73. replacement: function (content, node) {
  74. var parent = node.parentNode;
  75. if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
  76. return '\n' + content
  77. } else {
  78. return '\n\n' + content + '\n\n'
  79. }
  80. }
  81. };
  82. rules.listItem = {
  83. filter: 'li',
  84. replacement: function (content, node, options) {
  85. content = content
  86. .replace(/^\n+/, '') // remove leading newlines
  87. .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
  88. .replace(/\n/gm, '\n '); // indent
  89. var prefix = options.bulletListMarker + ' ';
  90. var parent = node.parentNode;
  91. if (parent.nodeName === 'OL') {
  92. var start = parent.getAttribute('start');
  93. var index = Array.prototype.indexOf.call(parent.children, node);
  94. prefix = (start ? Number(start) + index : index + 1) + '. ';
  95. }
  96. return (
  97. prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
  98. )
  99. }
  100. };
  101. rules.indentedCodeBlock = {
  102. filter: function (node, options) {
  103. return (
  104. options.codeBlockStyle === 'indented' &&
  105. node.nodeName === 'PRE' &&
  106. node.firstChild &&
  107. node.firstChild.nodeName === 'CODE'
  108. )
  109. },
  110. replacement: function (content, node, options) {
  111. return (
  112. '\n\n ' +
  113. node.firstChild.textContent.replace(/\n/g, '\n ') +
  114. '\n\n'
  115. )
  116. }
  117. };
  118. rules.fencedCodeBlock = {
  119. filter: function (node, options) {
  120. return (
  121. options.codeBlockStyle === 'fenced' &&
  122. node.nodeName === 'PRE' &&
  123. node.firstChild &&
  124. node.firstChild.nodeName === 'CODE'
  125. )
  126. },
  127. replacement: function (content, node, options) {
  128. var className = node.firstChild.className || '';
  129. var language = (className.match(/language-(\S+)/) || [null, ''])[1];
  130. var code = node.firstChild.textContent;
  131. var fenceChar = options.fence.charAt(0);
  132. var fenceSize = 3;
  133. var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
  134. var match;
  135. while ((match = fenceInCodeRegex.exec(code))) {
  136. if (match[0].length >= fenceSize) {
  137. fenceSize = match[0].length + 1;
  138. }
  139. }
  140. var fence = repeat(fenceChar, fenceSize);
  141. return (
  142. '\n\n' + fence + language + '\n' +
  143. code.replace(/\n$/, '') +
  144. '\n' + fence + '\n\n'
  145. )
  146. }
  147. };
  148. rules.horizontalRule = {
  149. filter: 'hr',
  150. replacement: function (content, node, options) {
  151. return '\n\n' + options.hr + '\n\n'
  152. }
  153. };
  154. rules.inlineLink = {
  155. filter: function (node, options) {
  156. return (
  157. options.linkStyle === 'inlined' &&
  158. node.nodeName === 'A' &&
  159. node.getAttribute('href')
  160. )
  161. },
  162. replacement: function (content, node) {
  163. var href = node.getAttribute('href');
  164. var title = node.title ? ' "' + node.title + '"' : '';
  165. return '[' + content + '](' + href + title + ')'
  166. }
  167. };
  168. rules.referenceLink = {
  169. filter: function (node, options) {
  170. return (
  171. options.linkStyle === 'referenced' &&
  172. node.nodeName === 'A' &&
  173. node.getAttribute('href')
  174. )
  175. },
  176. replacement: function (content, node, options) {
  177. var href = node.getAttribute('href');
  178. var title = node.title ? ' "' + node.title + '"' : '';
  179. var replacement;
  180. var reference;
  181. switch (options.linkReferenceStyle) {
  182. case 'collapsed':
  183. replacement = '[' + content + '][]';
  184. reference = '[' + content + ']: ' + href + title;
  185. break
  186. case 'shortcut':
  187. replacement = '[' + content + ']';
  188. reference = '[' + content + ']: ' + href + title;
  189. break
  190. default:
  191. var id = this.references.length + 1;
  192. replacement = '[' + content + '][' + id + ']';
  193. reference = '[' + id + ']: ' + href + title;
  194. }
  195. this.references.push(reference);
  196. return replacement
  197. },
  198. references: [],
  199. append: function (options) {
  200. var references = '';
  201. if (this.references.length) {
  202. references = '\n\n' + this.references.join('\n') + '\n\n';
  203. this.references = []; // Reset references
  204. }
  205. return references
  206. }
  207. };
  208. rules.emphasis = {
  209. filter: ['em', 'i'],
  210. replacement: function (content, node, options) {
  211. if (!content.trim()) return ''
  212. return options.emDelimiter + content + options.emDelimiter
  213. }
  214. };
  215. rules.strong = {
  216. filter: ['strong', 'b'],
  217. replacement: function (content, node, options) {
  218. if (!content.trim()) return ''
  219. return options.strongDelimiter + content + options.strongDelimiter
  220. }
  221. };
  222. rules.code = {
  223. filter: function (node) {
  224. var hasSiblings = node.previousSibling || node.nextSibling;
  225. var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
  226. return node.nodeName === 'CODE' && !isCodeBlock
  227. },
  228. replacement: function (content) {
  229. if (!content.trim()) return ''
  230. var delimiter = '`';
  231. var leadingSpace = '';
  232. var trailingSpace = '';
  233. var matches = content.match(/`+/gm);
  234. if (matches) {
  235. if (/^`/.test(content)) leadingSpace = ' ';
  236. if (/`$/.test(content)) trailingSpace = ' ';
  237. while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
  238. }
  239. return delimiter + leadingSpace + content + trailingSpace + delimiter
  240. }
  241. };
  242. rules.image = {
  243. filter: 'img',
  244. replacement: function (content, node) {
  245. var alt = node.alt || '';
  246. var src = node.getAttribute('src') || '';
  247. var title = node.title || '';
  248. var titlePart = title ? ' "' + title + '"' : '';
  249. return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
  250. }
  251. };
  252. /**
  253. * Manages a collection of rules used to convert HTML to Markdown
  254. */
  255. function Rules (options) {
  256. this.options = options;
  257. this._keep = [];
  258. this._remove = [];
  259. this.blankRule = {
  260. replacement: options.blankReplacement
  261. };
  262. this.keepReplacement = options.keepReplacement;
  263. this.defaultRule = {
  264. replacement: options.defaultReplacement
  265. };
  266. this.array = [];
  267. for (var key in options.rules) this.array.push(options.rules[key]);
  268. }
  269. Rules.prototype = {
  270. add: function (key, rule) {
  271. this.array.unshift(rule);
  272. },
  273. keep: function (filter) {
  274. this._keep.unshift({
  275. filter: filter,
  276. replacement: this.keepReplacement
  277. });
  278. },
  279. remove: function (filter) {
  280. this._remove.unshift({
  281. filter: filter,
  282. replacement: function () {
  283. return ''
  284. }
  285. });
  286. },
  287. forNode: function (node) {
  288. if (node.isBlank) return this.blankRule
  289. var rule;
  290. if ((rule = findRule(this.array, node, this.options))) return rule
  291. if ((rule = findRule(this._keep, node, this.options))) return rule
  292. if ((rule = findRule(this._remove, node, this.options))) return rule
  293. return this.defaultRule
  294. },
  295. forEach: function (fn) {
  296. for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
  297. }
  298. };
  299. function findRule (rules, node, options) {
  300. for (var i = 0; i < rules.length; i++) {
  301. var rule = rules[i];
  302. if (filterValue(rule, node, options)) return rule
  303. }
  304. return void 0
  305. }
  306. function filterValue (rule, node, options) {
  307. var filter = rule.filter;
  308. if (typeof filter === 'string') {
  309. if (filter === node.nodeName.toLowerCase()) return true
  310. } else if (Array.isArray(filter)) {
  311. if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
  312. } else if (typeof filter === 'function') {
  313. if (filter.call(rule, node, options)) return true
  314. } else {
  315. throw new TypeError('`filter` needs to be a string, array, or function')
  316. }
  317. }
  318. /**
  319. * The collapseWhitespace function is adapted from collapse-whitespace
  320. * by Luc Thevenard.
  321. *
  322. * The MIT License (MIT)
  323. *
  324. * Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
  325. *
  326. * Permission is hereby granted, free of charge, to any person obtaining a copy
  327. * of this software and associated documentation files (the "Software"), to deal
  328. * in the Software without restriction, including without limitation the rights
  329. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  330. * copies of the Software, and to permit persons to whom the Software is
  331. * furnished to do so, subject to the following conditions:
  332. *
  333. * The above copyright notice and this permission notice shall be included in
  334. * all copies or substantial portions of the Software.
  335. *
  336. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  337. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  338. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  339. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  340. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  341. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  342. * THE SOFTWARE.
  343. */
  344. /**
  345. * collapseWhitespace(options) removes extraneous whitespace from an the given element.
  346. *
  347. * @param {Object} options
  348. */
  349. function collapseWhitespace (options) {
  350. var element = options.element;
  351. var isBlock = options.isBlock;
  352. var isVoid = options.isVoid;
  353. var isPre = options.isPre || function (node) {
  354. return node.nodeName === 'PRE'
  355. };
  356. if (!element.firstChild || isPre(element)) return
  357. var prevText = null;
  358. var prevVoid = false;
  359. var prev = null;
  360. var node = next(prev, element, isPre);
  361. while (node !== element) {
  362. if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
  363. var text = node.data.replace(/[ \r\n\t]+/g, ' ');
  364. if ((!prevText || / $/.test(prevText.data)) &&
  365. !prevVoid && text[0] === ' ') {
  366. text = text.substr(1);
  367. }
  368. // `text` might be empty at this point.
  369. if (!text) {
  370. node = remove(node);
  371. continue
  372. }
  373. node.data = text;
  374. prevText = node;
  375. } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
  376. if (isBlock(node) || node.nodeName === 'BR') {
  377. if (prevText) {
  378. prevText.data = prevText.data.replace(/ $/, '');
  379. }
  380. prevText = null;
  381. prevVoid = false;
  382. } else if (isVoid(node)) {
  383. // Avoid trimming space around non-block, non-BR void elements.
  384. prevText = null;
  385. prevVoid = true;
  386. }
  387. } else {
  388. node = remove(node);
  389. continue
  390. }
  391. var nextNode = next(prev, node, isPre);
  392. prev = node;
  393. node = nextNode;
  394. }
  395. if (prevText) {
  396. prevText.data = prevText.data.replace(/ $/, '');
  397. if (!prevText.data) {
  398. remove(prevText);
  399. }
  400. }
  401. }
  402. /**
  403. * remove(node) removes the given node from the DOM and returns the
  404. * next node in the sequence.
  405. *
  406. * @param {Node} node
  407. * @return {Node} node
  408. */
  409. function remove (node) {
  410. var next = node.nextSibling || node.parentNode;
  411. node.parentNode.removeChild(node);
  412. return next
  413. }
  414. /**
  415. * next(prev, current, isPre) returns the next node in the sequence, given the
  416. * current and previous nodes.
  417. *
  418. * @param {Node} prev
  419. * @param {Node} current
  420. * @param {Function} isPre
  421. * @return {Node}
  422. */
  423. function next (prev, current, isPre) {
  424. if ((prev && prev.parentNode === current) || isPre(current)) {
  425. return current.nextSibling || current.parentNode
  426. }
  427. return current.firstChild || current.nextSibling || current.parentNode
  428. }
  429. /*
  430. * Set up window for Node.js
  431. */
  432. var root = (typeof window !== 'undefined' ? window : {});
  433. /*
  434. * Parsing HTML strings
  435. */
  436. function canParseHTMLNatively () {
  437. var Parser = root.DOMParser;
  438. var canParse = false;
  439. // Adapted from https://gist.github.com/1129031
  440. // Firefox/Opera/IE throw errors on unsupported types
  441. try {
  442. // WebKit returns null on unsupported types
  443. if (new Parser().parseFromString('', 'text/html')) {
  444. canParse = true;
  445. }
  446. } catch (e) {}
  447. return canParse
  448. }
  449. function createHTMLParser () {
  450. var Parser = function () {};
  451. {
  452. var JSDOM = require('jsdom').JSDOM;
  453. Parser.prototype.parseFromString = function (string) {
  454. return new JSDOM(string).window.document
  455. };
  456. }
  457. return Parser
  458. }
  459. var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
  460. function RootNode (input) {
  461. var root;
  462. if (typeof input === 'string') {
  463. var doc = htmlParser().parseFromString(
  464. // DOM parsers arrange elements in the <head> and <body>.
  465. // Wrapping in a custom element ensures elements are reliably arranged in
  466. // a single element.
  467. '<x-turndown id="turndown-root">' + input + '</x-turndown>',
  468. 'text/html'
  469. );
  470. root = doc.getElementById('turndown-root');
  471. } else {
  472. root = input.cloneNode(true);
  473. }
  474. collapseWhitespace({
  475. element: root,
  476. isBlock: isBlock,
  477. isVoid: isVoid
  478. });
  479. return root
  480. }
  481. var _htmlParser;
  482. function htmlParser () {
  483. _htmlParser = _htmlParser || new HTMLParser();
  484. return _htmlParser
  485. }
  486. function Node (node) {
  487. node.isBlock = isBlock(node);
  488. node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode;
  489. node.isBlank = isBlank(node);
  490. node.flankingWhitespace = flankingWhitespace(node);
  491. return node
  492. }
  493. function isBlank (node) {
  494. return (
  495. ['A', 'TH', 'TD', 'IFRAME', 'SCRIPT', 'AUDIO', 'VIDEO'].indexOf(node.nodeName) === -1 &&
  496. /^\s*$/i.test(node.textContent) &&
  497. !isVoid(node) &&
  498. !hasVoid(node)
  499. )
  500. }
  501. function flankingWhitespace (node) {
  502. var leading = '';
  503. var trailing = '';
  504. if (!node.isBlock) {
  505. var hasLeading = /^\s/.test(node.textContent);
  506. var hasTrailing = /\s$/.test(node.textContent);
  507. var blankWithSpaces = node.isBlank && hasLeading && hasTrailing;
  508. if (hasLeading && !isFlankedByWhitespace('left', node)) {
  509. leading = ' ';
  510. }
  511. if (!blankWithSpaces && hasTrailing && !isFlankedByWhitespace('right', node)) {
  512. trailing = ' ';
  513. }
  514. }
  515. return { leading: leading, trailing: trailing }
  516. }
  517. function isFlankedByWhitespace (side, node) {
  518. var sibling;
  519. var regExp;
  520. var isFlanked;
  521. if (side === 'left') {
  522. sibling = node.previousSibling;
  523. regExp = / $/;
  524. } else {
  525. sibling = node.nextSibling;
  526. regExp = /^ /;
  527. }
  528. if (sibling) {
  529. if (sibling.nodeType === 3) {
  530. isFlanked = regExp.test(sibling.nodeValue);
  531. } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
  532. isFlanked = regExp.test(sibling.textContent);
  533. }
  534. }
  535. return isFlanked
  536. }
  537. var reduce = Array.prototype.reduce;
  538. var leadingNewLinesRegExp = /^\n*/;
  539. var trailingNewLinesRegExp = /\n*$/;
  540. var escapes = [
  541. [/\\/g, '\\\\'],
  542. [/\*/g, '\\*'],
  543. [/^-/g, '\\-'],
  544. [/^\+ /g, '\\+ '],
  545. [/^(=+)/g, '\\$1'],
  546. [/^(#{1,6}) /g, '\\$1 '],
  547. [/`/g, '\\`'],
  548. [/^~~~/g, '\\~~~'],
  549. [/\[/g, '\\['],
  550. [/\]/g, '\\]'],
  551. [/^>/g, '\\>'],
  552. [/_/g, '\\_'],
  553. [/^(\d+)\. /g, '$1\\. ']
  554. ];
  555. function TurndownService (options) {
  556. if (!(this instanceof TurndownService)) return new TurndownService(options)
  557. var defaults = {
  558. rules: rules,
  559. headingStyle: 'setext',
  560. hr: '* * *',
  561. bulletListMarker: '*',
  562. codeBlockStyle: 'indented',
  563. fence: '```',
  564. emDelimiter: '_',
  565. strongDelimiter: '**',
  566. linkStyle: 'inlined',
  567. linkReferenceStyle: 'full',
  568. br: ' ',
  569. blankReplacement: function (content, node) {
  570. return node.isBlock ? '\n\n' : ''
  571. },
  572. keepReplacement: function (content, node) {
  573. return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
  574. },
  575. defaultReplacement: function (content, node) {
  576. return node.isBlock ? '\n\n' + content + '\n\n' : content
  577. }
  578. };
  579. this.options = extend({}, defaults, options);
  580. this.rules = new Rules(this.options);
  581. }
  582. TurndownService.prototype = {
  583. /**
  584. * The entry point for converting a string or DOM node to Markdown
  585. * @public
  586. * @param {String|HTMLElement} input The string or DOM node to convert
  587. * @returns A Markdown representation of the input
  588. * @type String
  589. */
  590. turndown: function (input) {
  591. if (!canConvert(input)) {
  592. throw new TypeError(
  593. input + ' is not a string, or an element/document/fragment node.'
  594. )
  595. }
  596. if (input === '') return ''
  597. var output = process.call(this, new RootNode(input));
  598. return postProcess.call(this, output)
  599. },
  600. /**
  601. * Add one or more plugins
  602. * @public
  603. * @param {Function|Array} plugin The plugin or array of plugins to add
  604. * @returns The Turndown instance for chaining
  605. * @type Object
  606. */
  607. use: function (plugin) {
  608. if (Array.isArray(plugin)) {
  609. for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
  610. } else if (typeof plugin === 'function') {
  611. plugin(this);
  612. } else {
  613. throw new TypeError('plugin must be a Function or an Array of Functions')
  614. }
  615. return this
  616. },
  617. /**
  618. * Adds a rule
  619. * @public
  620. * @param {String} key The unique key of the rule
  621. * @param {Object} rule The rule
  622. * @returns The Turndown instance for chaining
  623. * @type Object
  624. */
  625. addRule: function (key, rule) {
  626. this.rules.add(key, rule);
  627. return this
  628. },
  629. /**
  630. * Keep a node (as HTML) that matches the filter
  631. * @public
  632. * @param {String|Array|Function} filter The unique key of the rule
  633. * @returns The Turndown instance for chaining
  634. * @type Object
  635. */
  636. keep: function (filter) {
  637. this.rules.keep(filter);
  638. return this
  639. },
  640. /**
  641. * Remove a node that matches the filter
  642. * @public
  643. * @param {String|Array|Function} filter The unique key of the rule
  644. * @returns The Turndown instance for chaining
  645. * @type Object
  646. */
  647. remove: function (filter) {
  648. this.rules.remove(filter);
  649. return this
  650. },
  651. /**
  652. * Escapes Markdown syntax
  653. * @public
  654. * @param {String} string The string to escape
  655. * @returns A string with Markdown syntax escaped
  656. * @type String
  657. */
  658. escape: function (string) {
  659. return escapes.reduce(function (accumulator, escape) {
  660. return accumulator.replace(escape[0], escape[1])
  661. }, string)
  662. }
  663. };
  664. /**
  665. * Reduces a DOM node down to its Markdown string equivalent
  666. * @private
  667. * @param {HTMLElement} parentNode The node to convert
  668. * @returns A Markdown representation of the node
  669. * @type String
  670. */
  671. function process (parentNode) {
  672. var self = this;
  673. return reduce.call(parentNode.childNodes, function (output, node) {
  674. node = new Node(node);
  675. var replacement = '';
  676. if (node.nodeType === 3) {
  677. replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
  678. } else if (node.nodeType === 1) {
  679. replacement = replacementForNode.call(self, node);
  680. }
  681. return join(output, replacement)
  682. }, '')
  683. }
  684. /**
  685. * Appends strings as each rule requires and trims the output
  686. * @private
  687. * @param {String} output The conversion output
  688. * @returns A trimmed version of the ouput
  689. * @type String
  690. */
  691. function postProcess (output) {
  692. var self = this;
  693. this.rules.forEach(function (rule) {
  694. if (typeof rule.append === 'function') {
  695. output = join(output, rule.append(self.options));
  696. }
  697. });
  698. return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
  699. }
  700. /**
  701. * Converts an element node to its Markdown equivalent
  702. * @private
  703. * @param {HTMLElement} node The node to convert
  704. * @returns A Markdown representation of the node
  705. * @type String
  706. */
  707. function replacementForNode (node) {
  708. var rule = this.rules.forNode(node);
  709. var content = process.call(this, node);
  710. var whitespace = node.flankingWhitespace;
  711. if (whitespace.leading || whitespace.trailing) content = content.trim();
  712. return (
  713. whitespace.leading +
  714. rule.replacement(content, node, this.options) +
  715. whitespace.trailing
  716. )
  717. }
  718. /**
  719. * Determines the new lines between the current output and the replacement
  720. * @private
  721. * @param {String} output The current conversion output
  722. * @param {String} replacement The string to append to the output
  723. * @returns The whitespace to separate the current output and the replacement
  724. * @type String
  725. */
  726. function separatingNewlines (output, replacement) {
  727. var newlines = [
  728. output.match(trailingNewLinesRegExp)[0],
  729. replacement.match(leadingNewLinesRegExp)[0]
  730. ].sort();
  731. var maxNewlines = newlines[newlines.length - 1];
  732. return maxNewlines.length < 2 ? maxNewlines : '\n\n'
  733. }
  734. function join (string1, string2) {
  735. var separator = separatingNewlines(string1, string2);
  736. // Remove trailing/leading newlines and replace with separator
  737. string1 = string1.replace(trailingNewLinesRegExp, '');
  738. string2 = string2.replace(leadingNewLinesRegExp, '');
  739. return string1 + separator + string2
  740. }
  741. /**
  742. * Determines whether an input can be converted
  743. * @private
  744. * @param {String|HTMLElement} input Describe this parameter
  745. * @returns Describe what it returns
  746. * @type String|Object|Array|Boolean|Number
  747. */
  748. function canConvert (input) {
  749. return (
  750. input != null && (
  751. typeof input === 'string' ||
  752. (input.nodeType && (
  753. input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
  754. ))
  755. )
  756. )
  757. }
  758. module.exports = TurndownService;