index.js 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. 'use strict'
  2. var entities = require('character-entities-html4')
  3. var legacy = require('character-entities-legacy')
  4. var hexadecimal = require('is-hexadecimal')
  5. var alphanumerical = require('is-alphanumerical')
  6. var dangerous = require('./dangerous.json')
  7. /* Expose. */
  8. module.exports = encode
  9. encode.escape = escape
  10. var own = {}.hasOwnProperty
  11. /* List of enforced escapes. */
  12. var escapes = ['"', "'", '<', '>', '&', '`']
  13. /* Map of characters to names. */
  14. var characters = construct()
  15. /* Default escapes. */
  16. var defaultEscapes = toExpression(escapes)
  17. /* Surrogate pairs. */
  18. var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
  19. /* Non-ASCII characters. */
  20. // eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
  21. var bmp = /[\x01-\t\x0B\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g
  22. /* Encode special characters in `value`. */
  23. function encode(value, options) {
  24. var settings = options || {}
  25. var subset = settings.subset
  26. var set = subset ? toExpression(subset) : defaultEscapes
  27. var escapeOnly = settings.escapeOnly
  28. var omit = settings.omitOptionalSemicolons
  29. value = value.replace(set, function(char, pos, val) {
  30. return one(char, val.charAt(pos + 1), settings)
  31. })
  32. if (subset || escapeOnly) {
  33. return value
  34. }
  35. return value
  36. .replace(surrogatePair, replaceSurrogatePair)
  37. .replace(bmp, replaceBmp)
  38. function replaceSurrogatePair(pair, pos, val) {
  39. return toHexReference(
  40. (pair.charCodeAt(0) - 0xd800) * 0x400 +
  41. pair.charCodeAt(1) -
  42. 0xdc00 +
  43. 0x10000,
  44. val.charAt(pos + 2),
  45. omit
  46. )
  47. }
  48. function replaceBmp(char, pos, val) {
  49. return one(char, val.charAt(pos + 1), settings)
  50. }
  51. }
  52. /* Shortcut to escape special characters in HTML. */
  53. function escape(value) {
  54. return encode(value, {
  55. escapeOnly: true,
  56. useNamedReferences: true
  57. })
  58. }
  59. /* Encode `char` according to `options`. */
  60. function one(char, next, options) {
  61. var shortest = options.useShortestReferences
  62. var omit = options.omitOptionalSemicolons
  63. var named
  64. var numeric
  65. if ((shortest || options.useNamedReferences) && own.call(characters, char)) {
  66. named = toNamed(characters[char], next, omit, options.attribute)
  67. }
  68. if (shortest || !named) {
  69. numeric = toHexReference(char.charCodeAt(0), next, omit)
  70. }
  71. if (named && (!shortest || named.length < numeric.length)) {
  72. return named
  73. }
  74. return numeric
  75. }
  76. /* Transform `code` into an entity. */
  77. function toNamed(name, next, omit, attribute) {
  78. var value = '&' + name
  79. if (
  80. omit &&
  81. own.call(legacy, name) &&
  82. dangerous.indexOf(name) === -1 &&
  83. (!attribute || (next && next !== '=' && !alphanumerical(next)))
  84. ) {
  85. return value
  86. }
  87. return value + ';'
  88. }
  89. /* Transform `code` into a hexadecimal character reference. */
  90. function toHexReference(code, next, omit) {
  91. var value = '&#x' + code.toString(16).toUpperCase()
  92. return omit && next && !hexadecimal(next) ? value : value + ';'
  93. }
  94. /* Create an expression for `characters`. */
  95. function toExpression(characters) {
  96. return new RegExp('[' + characters.join('') + ']', 'g')
  97. }
  98. /* Construct the map. */
  99. function construct() {
  100. var chars = {}
  101. var name
  102. for (name in entities) {
  103. chars[entities[name]] = name
  104. }
  105. return chars
  106. }