parse.ts 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. import { ParserOptions } from './options'
  2. import { NO, isArray } from '@vue/shared'
  3. import { ErrorCodes, createCompilerError, defaultOnError } from './errors'
  4. import {
  5. assert,
  6. advancePositionWithMutation,
  7. advancePositionWithClone,
  8. isCoreComponent
  9. } from './utils'
  10. import {
  11. Namespaces,
  12. AttributeNode,
  13. CommentNode,
  14. DirectiveNode,
  15. ElementNode,
  16. ElementTypes,
  17. ExpressionNode,
  18. NodeTypes,
  19. Position,
  20. RootNode,
  21. SourceLocation,
  22. TextNode,
  23. TemplateChildNode,
  24. InterpolationNode
  25. } from './ast'
  26. import { extend } from '@vue/shared'
  27. type OptionalOptions = 'isNativeTag' | 'isBuiltInComponent'
  28. type MergedParserOptions = Omit<Required<ParserOptions>, OptionalOptions> &
  29. Pick<ParserOptions, OptionalOptions>
  30. export const defaultParserOptions: MergedParserOptions = {
  31. delimiters: [`{{`, `}}`],
  32. getNamespace: () => Namespaces.HTML,
  33. getTextMode: () => TextModes.DATA,
  34. isVoidTag: NO,
  35. isPreTag: NO,
  36. isCustomElement: NO,
  37. namedCharacterReferences: {
  38. 'gt;': '>',
  39. 'lt;': '<',
  40. 'amp;': '&',
  41. 'apos;': "'",
  42. 'quot;': '"'
  43. },
  44. maxCRNameLength: 5,
  45. onError: defaultOnError
  46. }
  47. export const enum TextModes {
  48. // | Elements | Entities | End sign | Inside of
  49. DATA, // | ✔ | ✔ | End tags of ancestors |
  50. RCDATA, // | ✘ | ✔ | End tag of the parent | <textarea>
  51. RAWTEXT, // | ✘ | ✘ | End tag of the parent | <style>,<script>
  52. CDATA,
  53. ATTRIBUTE_VALUE
  54. }
  55. interface ParserContext {
  56. options: MergedParserOptions
  57. readonly originalSource: string
  58. source: string
  59. offset: number
  60. line: number
  61. column: number
  62. inPre: boolean
  63. }
  64. export function baseParse(
  65. content: string,
  66. options: ParserOptions = {}
  67. ): RootNode {
  68. const context = createParserContext(content, options)
  69. const start = getCursor(context)
  70. return {
  71. type: NodeTypes.ROOT,
  72. children: parseChildren(context, TextModes.DATA, []),
  73. helpers: [],
  74. components: [],
  75. directives: [],
  76. hoists: [],
  77. imports: [],
  78. cached: 0,
  79. codegenNode: undefined,
  80. loc: getSelection(context, start)
  81. }
  82. }
  83. function createParserContext(
  84. content: string,
  85. options: ParserOptions
  86. ): ParserContext {
  87. return {
  88. options: {
  89. ...defaultParserOptions,
  90. ...options
  91. },
  92. column: 1,
  93. line: 1,
  94. offset: 0,
  95. originalSource: content,
  96. source: content,
  97. inPre: false
  98. }
  99. }
  100. function parseChildren(
  101. context: ParserContext,
  102. mode: TextModes,
  103. ancestors: ElementNode[]
  104. ): TemplateChildNode[] {
  105. const parent = last(ancestors)
  106. const ns = parent ? parent.ns : Namespaces.HTML
  107. const nodes: TemplateChildNode[] = []
  108. while (!isEnd(context, mode, ancestors)) {
  109. __TEST__ && assert(context.source.length > 0)
  110. const s = context.source
  111. let node: TemplateChildNode | TemplateChildNode[] | undefined = undefined
  112. if (mode === TextModes.DATA) {
  113. if (!context.inPre && startsWith(s, context.options.delimiters[0])) {
  114. // '{{'
  115. node = parseInterpolation(context, mode)
  116. } else if (s[0] === '<') {
  117. // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
  118. if (s.length === 1) {
  119. emitError(context, ErrorCodes.EOF_BEFORE_TAG_NAME, 1)
  120. } else if (s[1] === '!') {
  121. // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
  122. if (startsWith(s, '<!--')) {
  123. node = parseComment(context)
  124. } else if (startsWith(s, '<!DOCTYPE')) {
  125. // Ignore DOCTYPE by a limitation.
  126. node = parseBogusComment(context)
  127. } else if (startsWith(s, '<![CDATA[')) {
  128. if (ns !== Namespaces.HTML) {
  129. node = parseCDATA(context, ancestors)
  130. } else {
  131. emitError(context, ErrorCodes.CDATA_IN_HTML_CONTENT)
  132. node = parseBogusComment(context)
  133. }
  134. } else {
  135. emitError(context, ErrorCodes.INCORRECTLY_OPENED_COMMENT)
  136. node = parseBogusComment(context)
  137. }
  138. } else if (s[1] === '/') {
  139. // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
  140. if (s.length === 2) {
  141. emitError(context, ErrorCodes.EOF_BEFORE_TAG_NAME, 2)
  142. } else if (s[2] === '>') {
  143. emitError(context, ErrorCodes.MISSING_END_TAG_NAME, 2)
  144. advanceBy(context, 3)
  145. continue
  146. } else if (/[a-z]/i.test(s[2])) {
  147. emitError(context, ErrorCodes.X_INVALID_END_TAG)
  148. parseTag(context, TagType.End, parent)
  149. continue
  150. } else {
  151. emitError(
  152. context,
  153. ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
  154. 2
  155. )
  156. node = parseBogusComment(context)
  157. }
  158. } else if (/[a-z]/i.test(s[1])) {
  159. node = parseElement(context, ancestors)
  160. } else if (s[1] === '?') {
  161. emitError(
  162. context,
  163. ErrorCodes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
  164. 1
  165. )
  166. node = parseBogusComment(context)
  167. } else {
  168. emitError(context, ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME, 1)
  169. }
  170. }
  171. }
  172. if (!node) {
  173. node = parseText(context, mode)
  174. }
  175. if (isArray(node)) {
  176. for (let i = 0; i < node.length; i++) {
  177. pushNode(nodes, node[i])
  178. }
  179. } else {
  180. pushNode(nodes, node)
  181. }
  182. }
  183. // Whitespace management for more efficient output
  184. // (same as v2 whitespace: 'condense')
  185. let removedWhitespace = false
  186. if (
  187. mode !== TextModes.RAWTEXT &&
  188. (!parent || !context.options.isPreTag(parent.tag))
  189. ) {
  190. for (let i = 0; i < nodes.length; i++) {
  191. const node = nodes[i]
  192. if (node.type === NodeTypes.TEXT) {
  193. if (!node.content.trim()) {
  194. const prev = nodes[i - 1]
  195. const next = nodes[i + 1]
  196. // If:
  197. // - the whitespace is the first or last node, or:
  198. // - the whitespace is adjacent to a comment, or:
  199. // - the whitespace is between two elements AND contains newline
  200. // Then the whitespace is ignored.
  201. if (
  202. !prev ||
  203. !next ||
  204. prev.type === NodeTypes.COMMENT ||
  205. next.type === NodeTypes.COMMENT ||
  206. (prev.type === NodeTypes.ELEMENT &&
  207. next.type === NodeTypes.ELEMENT &&
  208. /[\r\n]/.test(node.content))
  209. ) {
  210. removedWhitespace = true
  211. nodes[i] = null as any
  212. } else {
  213. // Otherwise, condensed consecutive whitespace inside the text down to
  214. // a single space
  215. node.content = ' '
  216. }
  217. } else {
  218. node.content = node.content.replace(/\s+/g, ' ')
  219. }
  220. }
  221. }
  222. }
  223. return removedWhitespace ? nodes.filter(node => node !== null) : nodes
  224. }
  225. function pushNode(nodes: TemplateChildNode[], node: TemplateChildNode): void {
  226. // ignore comments in production
  227. /* istanbul ignore next */
  228. if (!__DEV__ && node.type === NodeTypes.COMMENT) {
  229. return
  230. }
  231. if (node.type === NodeTypes.TEXT) {
  232. const prev = last(nodes)
  233. // Merge if both this and the previous node are text and those are
  234. // consecutive. This happens for cases like "a < b".
  235. if (
  236. prev &&
  237. prev.type === NodeTypes.TEXT &&
  238. prev.loc.end.offset === node.loc.start.offset
  239. ) {
  240. prev.content += node.content
  241. prev.loc.end = node.loc.end
  242. prev.loc.source += node.loc.source
  243. return
  244. }
  245. }
  246. nodes.push(node)
  247. }
  248. function parseCDATA(
  249. context: ParserContext,
  250. ancestors: ElementNode[]
  251. ): TemplateChildNode[] {
  252. __TEST__ &&
  253. assert(last(ancestors) == null || last(ancestors)!.ns !== Namespaces.HTML)
  254. __TEST__ && assert(startsWith(context.source, '<![CDATA['))
  255. advanceBy(context, 9)
  256. const nodes = parseChildren(context, TextModes.CDATA, ancestors)
  257. if (context.source.length === 0) {
  258. emitError(context, ErrorCodes.EOF_IN_CDATA)
  259. } else {
  260. __TEST__ && assert(startsWith(context.source, ']]>'))
  261. advanceBy(context, 3)
  262. }
  263. return nodes
  264. }
  265. function parseComment(context: ParserContext): CommentNode {
  266. __TEST__ && assert(startsWith(context.source, '<!--'))
  267. const start = getCursor(context)
  268. let content: string
  269. // Regular comment.
  270. const match = /--(\!)?>/.exec(context.source)
  271. if (!match) {
  272. content = context.source.slice(4)
  273. advanceBy(context, context.source.length)
  274. emitError(context, ErrorCodes.EOF_IN_COMMENT)
  275. } else {
  276. if (match.index <= 3) {
  277. emitError(context, ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT)
  278. }
  279. if (match[1]) {
  280. emitError(context, ErrorCodes.INCORRECTLY_CLOSED_COMMENT)
  281. }
  282. content = context.source.slice(4, match.index)
  283. // Advancing with reporting nested comments.
  284. const s = context.source.slice(0, match.index)
  285. let prevIndex = 1,
  286. nestedIndex = 0
  287. while ((nestedIndex = s.indexOf('<!--', prevIndex)) !== -1) {
  288. advanceBy(context, nestedIndex - prevIndex + 1)
  289. if (nestedIndex + 4 < s.length) {
  290. emitError(context, ErrorCodes.NESTED_COMMENT)
  291. }
  292. prevIndex = nestedIndex + 1
  293. }
  294. advanceBy(context, match.index + match[0].length - prevIndex + 1)
  295. }
  296. return {
  297. type: NodeTypes.COMMENT,
  298. content,
  299. loc: getSelection(context, start)
  300. }
  301. }
  302. function parseBogusComment(context: ParserContext): CommentNode | undefined {
  303. __TEST__ && assert(/^<(?:[\!\?]|\/[^a-z>])/i.test(context.source))
  304. const start = getCursor(context)
  305. const contentStart = context.source[1] === '?' ? 1 : 2
  306. let content: string
  307. const closeIndex = context.source.indexOf('>')
  308. if (closeIndex === -1) {
  309. content = context.source.slice(contentStart)
  310. advanceBy(context, context.source.length)
  311. } else {
  312. content = context.source.slice(contentStart, closeIndex)
  313. advanceBy(context, closeIndex + 1)
  314. }
  315. return {
  316. type: NodeTypes.COMMENT,
  317. content,
  318. loc: getSelection(context, start)
  319. }
  320. }
  321. function parseElement(
  322. context: ParserContext,
  323. ancestors: ElementNode[]
  324. ): ElementNode | undefined {
  325. __TEST__ && assert(/^<[a-z]/i.test(context.source))
  326. // Start tag.
  327. const wasInPre = context.inPre
  328. const parent = last(ancestors)
  329. const element = parseTag(context, TagType.Start, parent)
  330. const isPreBoundary = context.inPre && !wasInPre
  331. if (element.isSelfClosing || context.options.isVoidTag(element.tag)) {
  332. return element
  333. }
  334. // Children.
  335. ancestors.push(element)
  336. const mode = context.options.getTextMode(element.tag, element.ns, parent)
  337. const children = parseChildren(context, mode, ancestors)
  338. ancestors.pop()
  339. element.children = children
  340. // End tag.
  341. if (startsWithEndTagOpen(context.source, element.tag)) {
  342. parseTag(context, TagType.End, parent)
  343. } else {
  344. emitError(context, ErrorCodes.X_MISSING_END_TAG, 0, element.loc.start)
  345. if (context.source.length === 0 && element.tag.toLowerCase() === 'script') {
  346. const first = children[0]
  347. if (first && startsWith(first.loc.source, '<!--')) {
  348. emitError(context, ErrorCodes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT)
  349. }
  350. }
  351. }
  352. element.loc = getSelection(context, element.loc.start)
  353. if (isPreBoundary) {
  354. context.inPre = false
  355. }
  356. return element
  357. }
  358. const enum TagType {
  359. Start,
  360. End
  361. }
  362. /**
  363. * Parse a tag (E.g. `<div id=a>`) with that type (start tag or end tag).
  364. */
  365. function parseTag(
  366. context: ParserContext,
  367. type: TagType,
  368. parent: ElementNode | undefined
  369. ): ElementNode {
  370. __TEST__ && assert(/^<\/?[a-z]/i.test(context.source))
  371. __TEST__ &&
  372. assert(
  373. type === (startsWith(context.source, '</') ? TagType.End : TagType.Start)
  374. )
  375. // Tag open.
  376. const start = getCursor(context)
  377. const match = /^<\/?([a-z][^\t\r\n\f />]*)/i.exec(context.source)!
  378. const tag = match[1]
  379. const ns = context.options.getNamespace(tag, parent)
  380. advanceBy(context, match[0].length)
  381. advanceSpaces(context)
  382. // save current state in case we need to re-parse attributes with v-pre
  383. const cursor = getCursor(context)
  384. const currentSource = context.source
  385. // Attributes.
  386. let props = parseAttributes(context, type)
  387. // check v-pre
  388. if (
  389. !context.inPre &&
  390. props.some(p => p.type === NodeTypes.DIRECTIVE && p.name === 'pre')
  391. ) {
  392. context.inPre = true
  393. // reset context
  394. extend(context, cursor)
  395. context.source = currentSource
  396. // re-parse attrs and filter out v-pre itself
  397. props = parseAttributes(context, type).filter(p => p.name !== 'v-pre')
  398. }
  399. // Tag close.
  400. let isSelfClosing = false
  401. if (context.source.length === 0) {
  402. emitError(context, ErrorCodes.EOF_IN_TAG)
  403. } else {
  404. isSelfClosing = startsWith(context.source, '/>')
  405. if (type === TagType.End && isSelfClosing) {
  406. emitError(context, ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS)
  407. }
  408. advanceBy(context, isSelfClosing ? 2 : 1)
  409. }
  410. let tagType = ElementTypes.ELEMENT
  411. const options = context.options
  412. if (!context.inPre && !options.isCustomElement(tag)) {
  413. if (options.isNativeTag) {
  414. if (!options.isNativeTag(tag)) tagType = ElementTypes.COMPONENT
  415. } else if (
  416. isCoreComponent(tag) ||
  417. (options.isBuiltInComponent && options.isBuiltInComponent(tag)) ||
  418. /^[A-Z]/.test(tag)
  419. ) {
  420. tagType = ElementTypes.COMPONENT
  421. }
  422. if (tag === 'slot') {
  423. tagType = ElementTypes.SLOT
  424. } else if (tag === 'template') {
  425. tagType = ElementTypes.TEMPLATE
  426. }
  427. }
  428. return {
  429. type: NodeTypes.ELEMENT,
  430. ns,
  431. tag,
  432. tagType,
  433. props,
  434. isSelfClosing,
  435. children: [],
  436. loc: getSelection(context, start),
  437. codegenNode: undefined // to be created during transform phase
  438. }
  439. }
  440. function parseAttributes(
  441. context: ParserContext,
  442. type: TagType
  443. ): (AttributeNode | DirectiveNode)[] {
  444. const props = []
  445. const attributeNames = new Set<string>()
  446. while (
  447. context.source.length > 0 &&
  448. !startsWith(context.source, '>') &&
  449. !startsWith(context.source, '/>')
  450. ) {
  451. if (startsWith(context.source, '/')) {
  452. emitError(context, ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG)
  453. advanceBy(context, 1)
  454. advanceSpaces(context)
  455. continue
  456. }
  457. if (type === TagType.End) {
  458. emitError(context, ErrorCodes.END_TAG_WITH_ATTRIBUTES)
  459. }
  460. const attr = parseAttribute(context, attributeNames)
  461. if (type === TagType.Start) {
  462. props.push(attr)
  463. }
  464. if (/^[^\t\r\n\f />]/.test(context.source)) {
  465. emitError(context, ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES)
  466. }
  467. advanceSpaces(context)
  468. }
  469. return props
  470. }
  471. function parseAttribute(
  472. context: ParserContext,
  473. nameSet: Set<string>
  474. ): AttributeNode | DirectiveNode {
  475. __TEST__ && assert(/^[^\t\r\n\f />]/.test(context.source))
  476. // Name.
  477. const start = getCursor(context)
  478. const match = /^[^\t\r\n\f />][^\t\r\n\f />=]*/.exec(context.source)!
  479. const name = match[0]
  480. if (nameSet.has(name)) {
  481. emitError(context, ErrorCodes.DUPLICATE_ATTRIBUTE)
  482. }
  483. nameSet.add(name)
  484. if (name[0] === '=') {
  485. emitError(context, ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME)
  486. }
  487. {
  488. const pattern = /["'<]/g
  489. let m: RegExpExecArray | null
  490. while ((m = pattern.exec(name)) !== null) {
  491. emitError(
  492. context,
  493. ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
  494. m.index
  495. )
  496. }
  497. }
  498. advanceBy(context, name.length)
  499. // Value
  500. let value:
  501. | {
  502. content: string
  503. isQuoted: boolean
  504. loc: SourceLocation
  505. }
  506. | undefined = undefined
  507. if (/^[\t\r\n\f ]*=/.test(context.source)) {
  508. advanceSpaces(context)
  509. advanceBy(context, 1)
  510. advanceSpaces(context)
  511. value = parseAttributeValue(context)
  512. if (!value) {
  513. emitError(context, ErrorCodes.MISSING_ATTRIBUTE_VALUE)
  514. }
  515. }
  516. const loc = getSelection(context, start)
  517. if (!context.inPre && /^(v-|:|@|#)/.test(name)) {
  518. const match = /(?:^v-([a-z0-9-]+))?(?:(?::|^@|^#)([^\.]+))?(.+)?$/i.exec(
  519. name
  520. )!
  521. let arg: ExpressionNode | undefined
  522. if (match[2]) {
  523. const startOffset = name.indexOf(match[2])
  524. const loc = getSelection(
  525. context,
  526. getNewPosition(context, start, startOffset),
  527. getNewPosition(context, start, startOffset + match[2].length)
  528. )
  529. let content = match[2]
  530. let isStatic = true
  531. if (content.startsWith('[')) {
  532. isStatic = false
  533. if (!content.endsWith(']')) {
  534. emitError(
  535. context,
  536. ErrorCodes.X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
  537. )
  538. }
  539. content = content.substr(1, content.length - 2)
  540. }
  541. arg = {
  542. type: NodeTypes.SIMPLE_EXPRESSION,
  543. content,
  544. isStatic,
  545. isConstant: isStatic,
  546. loc
  547. }
  548. }
  549. if (value && value.isQuoted) {
  550. const valueLoc = value.loc
  551. valueLoc.start.offset++
  552. valueLoc.start.column++
  553. valueLoc.end = advancePositionWithClone(valueLoc.start, value.content)
  554. valueLoc.source = valueLoc.source.slice(1, -1)
  555. }
  556. return {
  557. type: NodeTypes.DIRECTIVE,
  558. name:
  559. match[1] ||
  560. (startsWith(name, ':')
  561. ? 'bind'
  562. : startsWith(name, '@')
  563. ? 'on'
  564. : 'slot'),
  565. exp: value && {
  566. type: NodeTypes.SIMPLE_EXPRESSION,
  567. content: value.content,
  568. isStatic: false,
  569. // Treat as non-constant by default. This can be potentially set to
  570. // true by `transformExpression` to make it eligible for hoisting.
  571. isConstant: false,
  572. loc: value.loc
  573. },
  574. arg,
  575. modifiers: match[3] ? match[3].substr(1).split('.') : [],
  576. loc
  577. }
  578. }
  579. return {
  580. type: NodeTypes.ATTRIBUTE,
  581. name,
  582. value: value && {
  583. type: NodeTypes.TEXT,
  584. content: value.content,
  585. loc: value.loc
  586. },
  587. loc
  588. }
  589. }
  590. function parseAttributeValue(
  591. context: ParserContext
  592. ):
  593. | {
  594. content: string
  595. isQuoted: boolean
  596. loc: SourceLocation
  597. }
  598. | undefined {
  599. const start = getCursor(context)
  600. let content: string
  601. const quote = context.source[0]
  602. const isQuoted = quote === `"` || quote === `'`
  603. if (isQuoted) {
  604. // Quoted value.
  605. advanceBy(context, 1)
  606. const endIndex = context.source.indexOf(quote)
  607. if (endIndex === -1) {
  608. content = parseTextData(
  609. context,
  610. context.source.length,
  611. TextModes.ATTRIBUTE_VALUE
  612. )
  613. } else {
  614. content = parseTextData(context, endIndex, TextModes.ATTRIBUTE_VALUE)
  615. advanceBy(context, 1)
  616. }
  617. } else {
  618. // Unquoted
  619. const match = /^[^\t\r\n\f >]+/.exec(context.source)
  620. if (!match) {
  621. return undefined
  622. }
  623. let unexpectedChars = /["'<=`]/g
  624. let m: RegExpExecArray | null
  625. while ((m = unexpectedChars.exec(match[0])) !== null) {
  626. emitError(
  627. context,
  628. ErrorCodes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
  629. m.index
  630. )
  631. }
  632. content = parseTextData(context, match[0].length, TextModes.ATTRIBUTE_VALUE)
  633. }
  634. return { content, isQuoted, loc: getSelection(context, start) }
  635. }
  636. function parseInterpolation(
  637. context: ParserContext,
  638. mode: TextModes
  639. ): InterpolationNode | undefined {
  640. const [open, close] = context.options.delimiters
  641. __TEST__ && assert(startsWith(context.source, open))
  642. const closeIndex = context.source.indexOf(close, open.length)
  643. if (closeIndex === -1) {
  644. emitError(context, ErrorCodes.X_MISSING_INTERPOLATION_END)
  645. return undefined
  646. }
  647. const start = getCursor(context)
  648. advanceBy(context, open.length)
  649. const innerStart = getCursor(context)
  650. const innerEnd = getCursor(context)
  651. const rawContentLength = closeIndex - open.length
  652. const rawContent = context.source.slice(0, rawContentLength)
  653. const preTrimContent = parseTextData(context, rawContentLength, mode)
  654. const content = preTrimContent.trim()
  655. const startOffset = preTrimContent.indexOf(content)
  656. if (startOffset > 0) {
  657. advancePositionWithMutation(innerStart, rawContent, startOffset)
  658. }
  659. const endOffset =
  660. rawContentLength - (preTrimContent.length - content.length - startOffset)
  661. advancePositionWithMutation(innerEnd, rawContent, endOffset)
  662. advanceBy(context, close.length)
  663. return {
  664. type: NodeTypes.INTERPOLATION,
  665. content: {
  666. type: NodeTypes.SIMPLE_EXPRESSION,
  667. isStatic: false,
  668. // Set `isConstant` to false by default and will decide in transformExpression
  669. isConstant: false,
  670. content,
  671. loc: getSelection(context, innerStart, innerEnd)
  672. },
  673. loc: getSelection(context, start)
  674. }
  675. }
  676. function parseText(context: ParserContext, mode: TextModes): TextNode {
  677. __TEST__ && assert(context.source.length > 0)
  678. const endTokens = ['<', context.options.delimiters[0]]
  679. if (mode === TextModes.CDATA) {
  680. endTokens.push(']]>')
  681. }
  682. let endIndex = context.source.length
  683. for (let i = 0; i < endTokens.length; i++) {
  684. const index = context.source.indexOf(endTokens[i], 1)
  685. if (index !== -1 && endIndex > index) {
  686. endIndex = index
  687. }
  688. }
  689. __TEST__ && assert(endIndex > 0)
  690. const start = getCursor(context)
  691. const content = parseTextData(context, endIndex, mode)
  692. return {
  693. type: NodeTypes.TEXT,
  694. content,
  695. loc: getSelection(context, start)
  696. }
  697. }
  698. /**
  699. * Get text data with a given length from the current location.
  700. * This translates HTML entities in the text data.
  701. */
  702. function parseTextData(
  703. context: ParserContext,
  704. length: number,
  705. mode: TextModes
  706. ): string {
  707. let rawText = context.source.slice(0, length)
  708. if (
  709. mode === TextModes.RAWTEXT ||
  710. mode === TextModes.CDATA ||
  711. rawText.indexOf('&') === -1
  712. ) {
  713. advanceBy(context, length)
  714. return rawText
  715. }
  716. // DATA or RCDATA containing "&"". Entity decoding required.
  717. const end = context.offset + length
  718. let decodedText = ''
  719. function advance(length: number) {
  720. advanceBy(context, length)
  721. rawText = rawText.slice(length)
  722. }
  723. while (context.offset < end) {
  724. const head = /&(?:#x?)?/i.exec(rawText)
  725. if (!head || context.offset + head.index >= end) {
  726. const remaining = end - context.offset
  727. decodedText += rawText.slice(0, remaining)
  728. advance(remaining)
  729. break
  730. }
  731. // Advance to the "&".
  732. decodedText += rawText.slice(0, head.index)
  733. advance(head.index)
  734. if (head[0] === '&') {
  735. // Named character reference.
  736. let name = '',
  737. value: string | undefined = undefined
  738. if (/[0-9a-z]/i.test(rawText[1])) {
  739. for (
  740. let length = context.options.maxCRNameLength;
  741. !value && length > 0;
  742. --length
  743. ) {
  744. name = rawText.substr(1, length)
  745. value = context.options.namedCharacterReferences[name]
  746. }
  747. if (value) {
  748. const semi = name.endsWith(';')
  749. if (
  750. mode === TextModes.ATTRIBUTE_VALUE &&
  751. !semi &&
  752. /[=a-z0-9]/i.test(rawText[1 + name.length] || '')
  753. ) {
  754. decodedText += '&' + name
  755. advance(1 + name.length)
  756. } else {
  757. decodedText += value
  758. advance(1 + name.length)
  759. if (!semi) {
  760. emitError(
  761. context,
  762. ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
  763. )
  764. }
  765. }
  766. } else {
  767. emitError(context, ErrorCodes.UNKNOWN_NAMED_CHARACTER_REFERENCE)
  768. decodedText += '&' + name
  769. advance(1 + name.length)
  770. }
  771. } else {
  772. decodedText += '&'
  773. advance(1)
  774. }
  775. } else {
  776. // Numeric character reference.
  777. const hex = head[0] === '&#x'
  778. const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
  779. const body = pattern.exec(rawText)
  780. if (!body) {
  781. decodedText += head[0]
  782. emitError(
  783. context,
  784. ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
  785. )
  786. advance(head[0].length)
  787. } else {
  788. // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
  789. let cp = Number.parseInt(body[1], hex ? 16 : 10)
  790. if (cp === 0) {
  791. emitError(context, ErrorCodes.NULL_CHARACTER_REFERENCE)
  792. cp = 0xfffd
  793. } else if (cp > 0x10ffff) {
  794. emitError(
  795. context,
  796. ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE
  797. )
  798. cp = 0xfffd
  799. } else if (cp >= 0xd800 && cp <= 0xdfff) {
  800. emitError(context, ErrorCodes.SURROGATE_CHARACTER_REFERENCE)
  801. cp = 0xfffd
  802. } else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
  803. emitError(context, ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE)
  804. } else if (
  805. (cp >= 0x01 && cp <= 0x08) ||
  806. cp === 0x0b ||
  807. (cp >= 0x0d && cp <= 0x1f) ||
  808. (cp >= 0x7f && cp <= 0x9f)
  809. ) {
  810. emitError(context, ErrorCodes.CONTROL_CHARACTER_REFERENCE)
  811. cp = CCR_REPLACEMENTS[cp] || cp
  812. }
  813. decodedText += String.fromCodePoint(cp)
  814. advance(body[0].length)
  815. if (!body![0].endsWith(';')) {
  816. emitError(
  817. context,
  818. ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
  819. )
  820. }
  821. }
  822. }
  823. }
  824. return decodedText
  825. }
  826. function getCursor(context: ParserContext): Position {
  827. const { column, line, offset } = context
  828. return { column, line, offset }
  829. }
  830. function getSelection(
  831. context: ParserContext,
  832. start: Position,
  833. end?: Position
  834. ): SourceLocation {
  835. end = end || getCursor(context)
  836. return {
  837. start,
  838. end,
  839. source: context.originalSource.slice(start.offset, end.offset)
  840. }
  841. }
  842. function last<T>(xs: T[]): T | undefined {
  843. return xs[xs.length - 1]
  844. }
  845. function startsWith(source: string, searchString: string): boolean {
  846. return source.startsWith(searchString)
  847. }
  848. function advanceBy(context: ParserContext, numberOfCharacters: number): void {
  849. const { source } = context
  850. __TEST__ && assert(numberOfCharacters <= source.length)
  851. advancePositionWithMutation(context, source, numberOfCharacters)
  852. context.source = source.slice(numberOfCharacters)
  853. }
  854. function advanceSpaces(context: ParserContext): void {
  855. const match = /^[\t\r\n\f ]+/.exec(context.source)
  856. if (match) {
  857. advanceBy(context, match[0].length)
  858. }
  859. }
  860. function getNewPosition(
  861. context: ParserContext,
  862. start: Position,
  863. numberOfCharacters: number
  864. ): Position {
  865. return advancePositionWithClone(
  866. start,
  867. context.originalSource.slice(start.offset, numberOfCharacters),
  868. numberOfCharacters
  869. )
  870. }
  871. function emitError(
  872. context: ParserContext,
  873. code: ErrorCodes,
  874. offset?: number,
  875. loc: Position = getCursor(context)
  876. ): void {
  877. if (offset) {
  878. loc.offset += offset
  879. loc.column += offset
  880. }
  881. context.options.onError(
  882. createCompilerError(code, {
  883. start: loc,
  884. end: loc,
  885. source: ''
  886. })
  887. )
  888. }
  889. function isEnd(
  890. context: ParserContext,
  891. mode: TextModes,
  892. ancestors: ElementNode[]
  893. ): boolean {
  894. const s = context.source
  895. switch (mode) {
  896. case TextModes.DATA:
  897. if (startsWith(s, '</')) {
  898. //TODO: probably bad performance
  899. for (let i = ancestors.length - 1; i >= 0; --i) {
  900. if (startsWithEndTagOpen(s, ancestors[i].tag)) {
  901. return true
  902. }
  903. }
  904. }
  905. break
  906. case TextModes.RCDATA:
  907. case TextModes.RAWTEXT: {
  908. const parent = last(ancestors)
  909. if (parent && startsWithEndTagOpen(s, parent.tag)) {
  910. return true
  911. }
  912. break
  913. }
  914. case TextModes.CDATA:
  915. if (startsWith(s, ']]>')) {
  916. return true
  917. }
  918. break
  919. }
  920. return !s
  921. }
  922. function startsWithEndTagOpen(source: string, tag: string): boolean {
  923. return (
  924. startsWith(source, '</') &&
  925. source.substr(2, tag.length).toLowerCase() === tag.toLowerCase() &&
  926. /[\t\n\f />]/.test(source[2 + tag.length] || '>')
  927. )
  928. }
  929. // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
  930. const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
  931. 0x80: 0x20ac,
  932. 0x82: 0x201a,
  933. 0x83: 0x0192,
  934. 0x84: 0x201e,
  935. 0x85: 0x2026,
  936. 0x86: 0x2020,
  937. 0x87: 0x2021,
  938. 0x88: 0x02c6,
  939. 0x89: 0x2030,
  940. 0x8a: 0x0160,
  941. 0x8b: 0x2039,
  942. 0x8c: 0x0152,
  943. 0x8e: 0x017d,
  944. 0x91: 0x2018,
  945. 0x92: 0x2019,
  946. 0x93: 0x201c,
  947. 0x94: 0x201d,
  948. 0x95: 0x2022,
  949. 0x96: 0x2013,
  950. 0x97: 0x2014,
  951. 0x98: 0x02dc,
  952. 0x99: 0x2122,
  953. 0x9a: 0x0161,
  954. 0x9b: 0x203a,
  955. 0x9c: 0x0153,
  956. 0x9e: 0x017e,
  957. 0x9f: 0x0178
  958. }