turndown.browser.umd.js 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
  1. (function (global, factory) {
  2. typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
  3. typeof define === 'function' && define.amd ? define(factory) :
  4. (global = global || self, global.TurndownService = factory());
  5. }(this, (function () { 'use strict';
  6. function extend (destination) {
  7. for (var i = 1; i < arguments.length; i++) {
  8. var source = arguments[i];
  9. for (var key in source) {
  10. if (source.hasOwnProperty(key)) destination[key] = source[key];
  11. }
  12. }
  13. return destination
  14. }
  15. function repeat (character, count) {
  16. return Array(count + 1).join(character)
  17. }
  18. var blockElements = [
  19. 'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
  20. 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
  21. 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
  22. 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
  23. 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
  24. 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
  25. ];
  26. function isBlock (node) {
  27. return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
  28. }
  29. var voidElements = [
  30. 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
  31. 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
  32. ];
  33. function isVoid (node) {
  34. return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
  35. }
  36. var voidSelector = voidElements.join();
  37. function hasVoid (node) {
  38. return node.querySelector && node.querySelector(voidSelector)
  39. }
  40. var rules = {};
  41. rules.paragraph = {
  42. filter: 'p',
  43. replacement: function (content) {
  44. return '\n\n' + content + '\n\n'
  45. }
  46. };
  47. rules.lineBreak = {
  48. filter: 'br',
  49. replacement: function (content, node, options) {
  50. return options.br + '\n'
  51. }
  52. };
  53. rules.heading = {
  54. filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
  55. replacement: function (content, node, options) {
  56. var hLevel = Number(node.nodeName.charAt(1));
  57. if (options.headingStyle === 'setext' && hLevel < 3) {
  58. var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
  59. return (
  60. '\n\n' + content + '\n' + underline + '\n\n'
  61. )
  62. } else {
  63. return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
  64. }
  65. }
  66. };
  67. rules.blockquote = {
  68. filter: 'blockquote',
  69. replacement: function (content) {
  70. content = content.replace(/^\n+|\n+$/g, '');
  71. content = content.replace(/^/gm, '> ');
  72. return '\n\n' + content + '\n\n'
  73. }
  74. };
  75. rules.list = {
  76. filter: ['ul', 'ol'],
  77. replacement: function (content, node) {
  78. var parent = node.parentNode;
  79. if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
  80. return '\n' + content
  81. } else {
  82. return '\n\n' + content + '\n\n'
  83. }
  84. }
  85. };
  86. rules.listItem = {
  87. filter: 'li',
  88. replacement: function (content, node, options) {
  89. content = content
  90. .replace(/^\n+/, '') // remove leading newlines
  91. .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
  92. .replace(/\n/gm, '\n '); // indent
  93. var prefix = options.bulletListMarker + ' ';
  94. var parent = node.parentNode;
  95. if (parent.nodeName === 'OL') {
  96. var start = parent.getAttribute('start');
  97. var index = Array.prototype.indexOf.call(parent.children, node);
  98. prefix = (start ? Number(start) + index : index + 1) + '. ';
  99. }
  100. return (
  101. prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
  102. )
  103. }
  104. };
  105. rules.indentedCodeBlock = {
  106. filter: function (node, options) {
  107. return (
  108. options.codeBlockStyle === 'indented' &&
  109. node.nodeName === 'PRE' &&
  110. node.firstChild &&
  111. node.firstChild.nodeName === 'CODE'
  112. )
  113. },
  114. replacement: function (content, node, options) {
  115. return (
  116. '\n\n ' +
  117. node.firstChild.textContent.replace(/\n/g, '\n ') +
  118. '\n\n'
  119. )
  120. }
  121. };
  122. rules.fencedCodeBlock = {
  123. filter: function (node, options) {
  124. return (
  125. options.codeBlockStyle === 'fenced' &&
  126. node.nodeName === 'PRE' &&
  127. node.firstChild &&
  128. node.firstChild.nodeName === 'CODE'
  129. )
  130. },
  131. replacement: function (content, node, options) {
  132. var className = node.firstChild.className || '';
  133. var language = (className.match(/language-(\S+)/) || [null, ''])[1];
  134. var code = node.firstChild.textContent;
  135. var fenceChar = options.fence.charAt(0);
  136. var fenceSize = 3;
  137. var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
  138. var match;
  139. while ((match = fenceInCodeRegex.exec(code))) {
  140. if (match[0].length >= fenceSize) {
  141. fenceSize = match[0].length + 1;
  142. }
  143. }
  144. var fence = repeat(fenceChar, fenceSize);
  145. return (
  146. '\n\n' + fence + language + '\n' +
  147. code.replace(/\n$/, '') +
  148. '\n' + fence + '\n\n'
  149. )
  150. }
  151. };
  152. rules.horizontalRule = {
  153. filter: 'hr',
  154. replacement: function (content, node, options) {
  155. return '\n\n' + options.hr + '\n\n'
  156. }
  157. };
  158. rules.inlineLink = {
  159. filter: function (node, options) {
  160. return (
  161. options.linkStyle === 'inlined' &&
  162. node.nodeName === 'A' &&
  163. node.getAttribute('href')
  164. )
  165. },
  166. replacement: function (content, node) {
  167. var href = node.getAttribute('href');
  168. var title = node.title ? ' "' + node.title + '"' : '';
  169. return '[' + content + '](' + href + title + ')'
  170. }
  171. };
  172. rules.referenceLink = {
  173. filter: function (node, options) {
  174. return (
  175. options.linkStyle === 'referenced' &&
  176. node.nodeName === 'A' &&
  177. node.getAttribute('href')
  178. )
  179. },
  180. replacement: function (content, node, options) {
  181. var href = node.getAttribute('href');
  182. var title = node.title ? ' "' + node.title + '"' : '';
  183. var replacement;
  184. var reference;
  185. switch (options.linkReferenceStyle) {
  186. case 'collapsed':
  187. replacement = '[' + content + '][]';
  188. reference = '[' + content + ']: ' + href + title;
  189. break
  190. case 'shortcut':
  191. replacement = '[' + content + ']';
  192. reference = '[' + content + ']: ' + href + title;
  193. break
  194. default:
  195. var id = this.references.length + 1;
  196. replacement = '[' + content + '][' + id + ']';
  197. reference = '[' + id + ']: ' + href + title;
  198. }
  199. this.references.push(reference);
  200. return replacement
  201. },
  202. references: [],
  203. append: function (options) {
  204. var references = '';
  205. if (this.references.length) {
  206. references = '\n\n' + this.references.join('\n') + '\n\n';
  207. this.references = []; // Reset references
  208. }
  209. return references
  210. }
  211. };
  212. rules.emphasis = {
  213. filter: ['em', 'i'],
  214. replacement: function (content, node, options) {
  215. if (!content.trim()) return ''
  216. return options.emDelimiter + content + options.emDelimiter
  217. }
  218. };
  219. rules.strong = {
  220. filter: ['strong', 'b'],
  221. replacement: function (content, node, options) {
  222. if (!content.trim()) return ''
  223. return options.strongDelimiter + content + options.strongDelimiter
  224. }
  225. };
  226. rules.code = {
  227. filter: function (node) {
  228. var hasSiblings = node.previousSibling || node.nextSibling;
  229. var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
  230. return node.nodeName === 'CODE' && !isCodeBlock
  231. },
  232. replacement: function (content) {
  233. if (!content.trim()) return ''
  234. var delimiter = '`';
  235. var leadingSpace = '';
  236. var trailingSpace = '';
  237. var matches = content.match(/`+/gm);
  238. if (matches) {
  239. if (/^`/.test(content)) leadingSpace = ' ';
  240. if (/`$/.test(content)) trailingSpace = ' ';
  241. while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
  242. }
  243. return delimiter + leadingSpace + content + trailingSpace + delimiter
  244. }
  245. };
  246. rules.image = {
  247. filter: 'img',
  248. replacement: function (content, node) {
  249. var alt = node.alt || '';
  250. var src = node.getAttribute('src') || '';
  251. var title = node.title || '';
  252. var titlePart = title ? ' "' + title + '"' : '';
  253. return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
  254. }
  255. };
  256. /**
  257. * Manages a collection of rules used to convert HTML to Markdown
  258. */
  259. function Rules (options) {
  260. this.options = options;
  261. this._keep = [];
  262. this._remove = [];
  263. this.blankRule = {
  264. replacement: options.blankReplacement
  265. };
  266. this.keepReplacement = options.keepReplacement;
  267. this.defaultRule = {
  268. replacement: options.defaultReplacement
  269. };
  270. this.array = [];
  271. for (var key in options.rules) this.array.push(options.rules[key]);
  272. }
  273. Rules.prototype = {
  274. add: function (key, rule) {
  275. this.array.unshift(rule);
  276. },
  277. keep: function (filter) {
  278. this._keep.unshift({
  279. filter: filter,
  280. replacement: this.keepReplacement
  281. });
  282. },
  283. remove: function (filter) {
  284. this._remove.unshift({
  285. filter: filter,
  286. replacement: function () {
  287. return ''
  288. }
  289. });
  290. },
  291. forNode: function (node) {
  292. if (node.isBlank) return this.blankRule
  293. var rule;
  294. if ((rule = findRule(this.array, node, this.options))) return rule
  295. if ((rule = findRule(this._keep, node, this.options))) return rule
  296. if ((rule = findRule(this._remove, node, this.options))) return rule
  297. return this.defaultRule
  298. },
  299. forEach: function (fn) {
  300. for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
  301. }
  302. };
  303. function findRule (rules, node, options) {
  304. for (var i = 0; i < rules.length; i++) {
  305. var rule = rules[i];
  306. if (filterValue(rule, node, options)) return rule
  307. }
  308. return void 0
  309. }
  310. function filterValue (rule, node, options) {
  311. var filter = rule.filter;
  312. if (typeof filter === 'string') {
  313. if (filter === node.nodeName.toLowerCase()) return true
  314. } else if (Array.isArray(filter)) {
  315. if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
  316. } else if (typeof filter === 'function') {
  317. if (filter.call(rule, node, options)) return true
  318. } else {
  319. throw new TypeError('`filter` needs to be a string, array, or function')
  320. }
  321. }
  322. /**
  323. * The collapseWhitespace function is adapted from collapse-whitespace
  324. * by Luc Thevenard.
  325. *
  326. * The MIT License (MIT)
  327. *
  328. * Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
  329. *
  330. * Permission is hereby granted, free of charge, to any person obtaining a copy
  331. * of this software and associated documentation files (the "Software"), to deal
  332. * in the Software without restriction, including without limitation the rights
  333. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  334. * copies of the Software, and to permit persons to whom the Software is
  335. * furnished to do so, subject to the following conditions:
  336. *
  337. * The above copyright notice and this permission notice shall be included in
  338. * all copies or substantial portions of the Software.
  339. *
  340. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  341. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  342. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  343. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  344. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  345. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  346. * THE SOFTWARE.
  347. */
  348. /**
  349. * collapseWhitespace(options) removes extraneous whitespace from an the given element.
  350. *
  351. * @param {Object} options
  352. */
  353. function collapseWhitespace (options) {
  354. var element = options.element;
  355. var isBlock = options.isBlock;
  356. var isVoid = options.isVoid;
  357. var isPre = options.isPre || function (node) {
  358. return node.nodeName === 'PRE'
  359. };
  360. if (!element.firstChild || isPre(element)) return
  361. var prevText = null;
  362. var prevVoid = false;
  363. var prev = null;
  364. var node = next(prev, element, isPre);
  365. while (node !== element) {
  366. if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
  367. var text = node.data.replace(/[ \r\n\t]+/g, ' ');
  368. if ((!prevText || / $/.test(prevText.data)) &&
  369. !prevVoid && text[0] === ' ') {
  370. text = text.substr(1);
  371. }
  372. // `text` might be empty at this point.
  373. if (!text) {
  374. node = remove(node);
  375. continue
  376. }
  377. node.data = text;
  378. prevText = node;
  379. } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
  380. if (isBlock(node) || node.nodeName === 'BR') {
  381. if (prevText) {
  382. prevText.data = prevText.data.replace(/ $/, '');
  383. }
  384. prevText = null;
  385. prevVoid = false;
  386. } else if (isVoid(node)) {
  387. // Avoid trimming space around non-block, non-BR void elements.
  388. prevText = null;
  389. prevVoid = true;
  390. }
  391. } else {
  392. node = remove(node);
  393. continue
  394. }
  395. var nextNode = next(prev, node, isPre);
  396. prev = node;
  397. node = nextNode;
  398. }
  399. if (prevText) {
  400. prevText.data = prevText.data.replace(/ $/, '');
  401. if (!prevText.data) {
  402. remove(prevText);
  403. }
  404. }
  405. }
  406. /**
  407. * remove(node) removes the given node from the DOM and returns the
  408. * next node in the sequence.
  409. *
  410. * @param {Node} node
  411. * @return {Node} node
  412. */
  413. function remove (node) {
  414. var next = node.nextSibling || node.parentNode;
  415. node.parentNode.removeChild(node);
  416. return next
  417. }
  418. /**
  419. * next(prev, current, isPre) returns the next node in the sequence, given the
  420. * current and previous nodes.
  421. *
  422. * @param {Node} prev
  423. * @param {Node} current
  424. * @param {Function} isPre
  425. * @return {Node}
  426. */
  427. function next (prev, current, isPre) {
  428. if ((prev && prev.parentNode === current) || isPre(current)) {
  429. return current.nextSibling || current.parentNode
  430. }
  431. return current.firstChild || current.nextSibling || current.parentNode
  432. }
  433. /*
  434. * Set up window for Node.js
  435. */
  436. var root = (typeof window !== 'undefined' ? window : {});
  437. /*
  438. * Parsing HTML strings
  439. */
  440. function canParseHTMLNatively () {
  441. var Parser = root.DOMParser;
  442. var canParse = false;
  443. // Adapted from https://gist.github.com/1129031
  444. // Firefox/Opera/IE throw errors on unsupported types
  445. try {
  446. // WebKit returns null on unsupported types
  447. if (new Parser().parseFromString('', 'text/html')) {
  448. canParse = true;
  449. }
  450. } catch (e) {}
  451. return canParse
  452. }
  453. function createHTMLParser () {
  454. var Parser = function () {};
  455. {
  456. if (shouldUseActiveX()) {
  457. Parser.prototype.parseFromString = function (string) {
  458. var doc = new window.ActiveXObject('htmlfile');
  459. doc.designMode = 'on'; // disable on-page scripts
  460. doc.open();
  461. doc.write(string);
  462. doc.close();
  463. return doc
  464. };
  465. } else {
  466. Parser.prototype.parseFromString = function (string) {
  467. var doc = document.implementation.createHTMLDocument('');
  468. doc.open();
  469. doc.write(string);
  470. doc.close();
  471. return doc
  472. };
  473. }
  474. }
  475. return Parser
  476. }
  477. function shouldUseActiveX () {
  478. var useActiveX = false;
  479. try {
  480. document.implementation.createHTMLDocument('').open();
  481. } catch (e) {
  482. if (window.ActiveXObject) useActiveX = true;
  483. }
  484. return useActiveX
  485. }
  486. var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
  487. function RootNode (input) {
  488. var root;
  489. if (typeof input === 'string') {
  490. var doc = htmlParser().parseFromString(
  491. // DOM parsers arrange elements in the <head> and <body>.
  492. // Wrapping in a custom element ensures elements are reliably arranged in
  493. // a single element.
  494. '<x-turndown id="turndown-root">' + input + '</x-turndown>',
  495. 'text/html'
  496. );
  497. root = doc.getElementById('turndown-root');
  498. } else {
  499. root = input.cloneNode(true);
  500. }
  501. collapseWhitespace({
  502. element: root,
  503. isBlock: isBlock,
  504. isVoid: isVoid
  505. });
  506. return root
  507. }
  508. var _htmlParser;
  509. function htmlParser () {
  510. _htmlParser = _htmlParser || new HTMLParser();
  511. return _htmlParser
  512. }
  513. function Node (node) {
  514. node.isBlock = isBlock(node);
  515. node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode;
  516. node.isBlank = isBlank(node);
  517. node.flankingWhitespace = flankingWhitespace(node);
  518. return node
  519. }
  520. function isBlank (node) {
  521. return (
  522. ['A', 'TH', 'TD', 'IFRAME', 'SCRIPT', 'AUDIO', 'VIDEO'].indexOf(node.nodeName) === -1 &&
  523. /^\s*$/i.test(node.textContent) &&
  524. !isVoid(node) &&
  525. !hasVoid(node)
  526. )
  527. }
  528. function flankingWhitespace (node) {
  529. var leading = '';
  530. var trailing = '';
  531. if (!node.isBlock) {
  532. var hasLeading = /^\s/.test(node.textContent);
  533. var hasTrailing = /\s$/.test(node.textContent);
  534. var blankWithSpaces = node.isBlank && hasLeading && hasTrailing;
  535. if (hasLeading && !isFlankedByWhitespace('left', node)) {
  536. leading = ' ';
  537. }
  538. if (!blankWithSpaces && hasTrailing && !isFlankedByWhitespace('right', node)) {
  539. trailing = ' ';
  540. }
  541. }
  542. return { leading: leading, trailing: trailing }
  543. }
  544. function isFlankedByWhitespace (side, node) {
  545. var sibling;
  546. var regExp;
  547. var isFlanked;
  548. if (side === 'left') {
  549. sibling = node.previousSibling;
  550. regExp = / $/;
  551. } else {
  552. sibling = node.nextSibling;
  553. regExp = /^ /;
  554. }
  555. if (sibling) {
  556. if (sibling.nodeType === 3) {
  557. isFlanked = regExp.test(sibling.nodeValue);
  558. } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
  559. isFlanked = regExp.test(sibling.textContent);
  560. }
  561. }
  562. return isFlanked
  563. }
  564. var reduce = Array.prototype.reduce;
  565. var leadingNewLinesRegExp = /^\n*/;
  566. var trailingNewLinesRegExp = /\n*$/;
  567. var escapes = [
  568. [/\\/g, '\\\\'],
  569. [/\*/g, '\\*'],
  570. [/^-/g, '\\-'],
  571. [/^\+ /g, '\\+ '],
  572. [/^(=+)/g, '\\$1'],
  573. [/^(#{1,6}) /g, '\\$1 '],
  574. [/`/g, '\\`'],
  575. [/^~~~/g, '\\~~~'],
  576. [/\[/g, '\\['],
  577. [/\]/g, '\\]'],
  578. [/^>/g, '\\>'],
  579. [/_/g, '\\_'],
  580. [/^(\d+)\. /g, '$1\\. ']
  581. ];
  582. function TurndownService (options) {
  583. if (!(this instanceof TurndownService)) return new TurndownService(options)
  584. var defaults = {
  585. rules: rules,
  586. headingStyle: 'setext',
  587. hr: '* * *',
  588. bulletListMarker: '*',
  589. codeBlockStyle: 'indented',
  590. fence: '```',
  591. emDelimiter: '_',
  592. strongDelimiter: '**',
  593. linkStyle: 'inlined',
  594. linkReferenceStyle: 'full',
  595. br: ' ',
  596. blankReplacement: function (content, node) {
  597. return node.isBlock ? '\n\n' : ''
  598. },
  599. keepReplacement: function (content, node) {
  600. return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
  601. },
  602. defaultReplacement: function (content, node) {
  603. return node.isBlock ? '\n\n' + content + '\n\n' : content
  604. }
  605. };
  606. this.options = extend({}, defaults, options);
  607. this.rules = new Rules(this.options);
  608. }
  609. TurndownService.prototype = {
  610. /**
  611. * The entry point for converting a string or DOM node to Markdown
  612. * @public
  613. * @param {String|HTMLElement} input The string or DOM node to convert
  614. * @returns A Markdown representation of the input
  615. * @type String
  616. */
  617. turndown: function (input) {
  618. if (!canConvert(input)) {
  619. throw new TypeError(
  620. input + ' is not a string, or an element/document/fragment node.'
  621. )
  622. }
  623. if (input === '') return ''
  624. var output = process.call(this, new RootNode(input));
  625. return postProcess.call(this, output)
  626. },
  627. /**
  628. * Add one or more plugins
  629. * @public
  630. * @param {Function|Array} plugin The plugin or array of plugins to add
  631. * @returns The Turndown instance for chaining
  632. * @type Object
  633. */
  634. use: function (plugin) {
  635. if (Array.isArray(plugin)) {
  636. for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
  637. } else if (typeof plugin === 'function') {
  638. plugin(this);
  639. } else {
  640. throw new TypeError('plugin must be a Function or an Array of Functions')
  641. }
  642. return this
  643. },
  644. /**
  645. * Adds a rule
  646. * @public
  647. * @param {String} key The unique key of the rule
  648. * @param {Object} rule The rule
  649. * @returns The Turndown instance for chaining
  650. * @type Object
  651. */
  652. addRule: function (key, rule) {
  653. this.rules.add(key, rule);
  654. return this
  655. },
  656. /**
  657. * Keep a node (as HTML) that matches the filter
  658. * @public
  659. * @param {String|Array|Function} filter The unique key of the rule
  660. * @returns The Turndown instance for chaining
  661. * @type Object
  662. */
  663. keep: function (filter) {
  664. this.rules.keep(filter);
  665. return this
  666. },
  667. /**
  668. * Remove a node that matches the filter
  669. * @public
  670. * @param {String|Array|Function} filter The unique key of the rule
  671. * @returns The Turndown instance for chaining
  672. * @type Object
  673. */
  674. remove: function (filter) {
  675. this.rules.remove(filter);
  676. return this
  677. },
  678. /**
  679. * Escapes Markdown syntax
  680. * @public
  681. * @param {String} string The string to escape
  682. * @returns A string with Markdown syntax escaped
  683. * @type String
  684. */
  685. escape: function (string) {
  686. return escapes.reduce(function (accumulator, escape) {
  687. return accumulator.replace(escape[0], escape[1])
  688. }, string)
  689. }
  690. };
  691. /**
  692. * Reduces a DOM node down to its Markdown string equivalent
  693. * @private
  694. * @param {HTMLElement} parentNode The node to convert
  695. * @returns A Markdown representation of the node
  696. * @type String
  697. */
  698. function process (parentNode) {
  699. var self = this;
  700. return reduce.call(parentNode.childNodes, function (output, node) {
  701. node = new Node(node);
  702. var replacement = '';
  703. if (node.nodeType === 3) {
  704. replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
  705. } else if (node.nodeType === 1) {
  706. replacement = replacementForNode.call(self, node);
  707. }
  708. return join(output, replacement)
  709. }, '')
  710. }
  711. /**
  712. * Appends strings as each rule requires and trims the output
  713. * @private
  714. * @param {String} output The conversion output
  715. * @returns A trimmed version of the ouput
  716. * @type String
  717. */
  718. function postProcess (output) {
  719. var self = this;
  720. this.rules.forEach(function (rule) {
  721. if (typeof rule.append === 'function') {
  722. output = join(output, rule.append(self.options));
  723. }
  724. });
  725. return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
  726. }
  727. /**
  728. * Converts an element node to its Markdown equivalent
  729. * @private
  730. * @param {HTMLElement} node The node to convert
  731. * @returns A Markdown representation of the node
  732. * @type String
  733. */
  734. function replacementForNode (node) {
  735. var rule = this.rules.forNode(node);
  736. var content = process.call(this, node);
  737. var whitespace = node.flankingWhitespace;
  738. if (whitespace.leading || whitespace.trailing) content = content.trim();
  739. return (
  740. whitespace.leading +
  741. rule.replacement(content, node, this.options) +
  742. whitespace.trailing
  743. )
  744. }
  745. /**
  746. * Determines the new lines between the current output and the replacement
  747. * @private
  748. * @param {String} output The current conversion output
  749. * @param {String} replacement The string to append to the output
  750. * @returns The whitespace to separate the current output and the replacement
  751. * @type String
  752. */
  753. function separatingNewlines (output, replacement) {
  754. var newlines = [
  755. output.match(trailingNewLinesRegExp)[0],
  756. replacement.match(leadingNewLinesRegExp)[0]
  757. ].sort();
  758. var maxNewlines = newlines[newlines.length - 1];
  759. return maxNewlines.length < 2 ? maxNewlines : '\n\n'
  760. }
  761. function join (string1, string2) {
  762. var separator = separatingNewlines(string1, string2);
  763. // Remove trailing/leading newlines and replace with separator
  764. string1 = string1.replace(trailingNewLinesRegExp, '');
  765. string2 = string2.replace(leadingNewLinesRegExp, '');
  766. return string1 + separator + string2
  767. }
  768. /**
  769. * Determines whether an input can be converted
  770. * @private
  771. * @param {String|HTMLElement} input Describe this parameter
  772. * @returns Describe what it returns
  773. * @type String|Object|Array|Boolean|Number
  774. */
  775. function canConvert (input) {
  776. return (
  777. input != null && (
  778. typeof input === 'string' ||
  779. (input.nodeType && (
  780. input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
  781. ))
  782. )
  783. )
  784. }
  785. return TurndownService;
  786. })));