decodeHtml.ts 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import { ParserOptions } from '@vue/compiler-core'
  2. import namedCharacterReferences from './namedChars.json'
  3. // lazy compute this to make this file tree-shakable for browser
  4. let maxCRNameLength: number
  5. export const decodeHtml: ParserOptions['decodeEntities'] = (
  6. rawText,
  7. asAttr
  8. ) => {
  9. let offset = 0
  10. const end = rawText.length
  11. let decodedText = ''
  12. function advance(length: number) {
  13. offset += length
  14. rawText = rawText.slice(length)
  15. }
  16. while (offset < end) {
  17. const head = /&(?:#x?)?/i.exec(rawText)
  18. if (!head || offset + head.index >= end) {
  19. const remaining = end - offset
  20. decodedText += rawText.slice(0, remaining)
  21. advance(remaining)
  22. break
  23. }
  24. // Advance to the "&".
  25. decodedText += rawText.slice(0, head.index)
  26. advance(head.index)
  27. if (head[0] === '&') {
  28. // Named character reference.
  29. let name = ''
  30. let value: string | undefined = undefined
  31. if (/[0-9a-z]/i.test(rawText[1])) {
  32. if (!maxCRNameLength) {
  33. maxCRNameLength = Object.keys(namedCharacterReferences).reduce(
  34. (max, name) => Math.max(max, name.length),
  35. 0
  36. )
  37. }
  38. for (let length = maxCRNameLength; !value && length > 0; --length) {
  39. name = rawText.substr(1, length)
  40. value = (namedCharacterReferences as Record<string, string>)[name]
  41. }
  42. if (value) {
  43. const semi = name.endsWith(';')
  44. if (
  45. asAttr &&
  46. !semi &&
  47. /[=a-z0-9]/i.test(rawText[name.length + 1] || '')
  48. ) {
  49. decodedText += '&' + name
  50. advance(1 + name.length)
  51. } else {
  52. decodedText += value
  53. advance(1 + name.length)
  54. }
  55. } else {
  56. decodedText += '&' + name
  57. advance(1 + name.length)
  58. }
  59. } else {
  60. decodedText += '&'
  61. advance(1)
  62. }
  63. } else {
  64. // Numeric character reference.
  65. const hex = head[0] === '&#x'
  66. const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
  67. const body = pattern.exec(rawText)
  68. if (!body) {
  69. decodedText += head[0]
  70. advance(head[0].length)
  71. } else {
  72. // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
  73. let cp = Number.parseInt(body[1], hex ? 16 : 10)
  74. if (cp === 0) {
  75. cp = 0xfffd
  76. } else if (cp > 0x10ffff) {
  77. cp = 0xfffd
  78. } else if (cp >= 0xd800 && cp <= 0xdfff) {
  79. cp = 0xfffd
  80. } else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
  81. // noop
  82. } else if (
  83. (cp >= 0x01 && cp <= 0x08) ||
  84. cp === 0x0b ||
  85. (cp >= 0x0d && cp <= 0x1f) ||
  86. (cp >= 0x7f && cp <= 0x9f)
  87. ) {
  88. cp = CCR_REPLACEMENTS[cp] || cp
  89. }
  90. decodedText += String.fromCodePoint(cp)
  91. advance(body[0].length)
  92. }
  93. }
  94. }
  95. return decodedText
  96. }
  97. // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
  98. const CCR_REPLACEMENTS: Record<number, number | undefined> = {
  99. 0x80: 0x20ac,
  100. 0x82: 0x201a,
  101. 0x83: 0x0192,
  102. 0x84: 0x201e,
  103. 0x85: 0x2026,
  104. 0x86: 0x2020,
  105. 0x87: 0x2021,
  106. 0x88: 0x02c6,
  107. 0x89: 0x2030,
  108. 0x8a: 0x0160,
  109. 0x8b: 0x2039,
  110. 0x8c: 0x0152,
  111. 0x8e: 0x017d,
  112. 0x91: 0x2018,
  113. 0x92: 0x2019,
  114. 0x93: 0x201c,
  115. 0x94: 0x201d,
  116. 0x95: 0x2022,
  117. 0x96: 0x2013,
  118. 0x97: 0x2014,
  119. 0x98: 0x02dc,
  120. 0x99: 0x2122,
  121. 0x9a: 0x0161,
  122. 0x9b: 0x203a,
  123. 0x9c: 0x0153,
  124. 0x9e: 0x017e,
  125. 0x9f: 0x0178
  126. }