parse.ts 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020
  1. import { NO } from '@vue/shared'
  2. import {
  3. ErrorCodes,
  4. createCompilerError,
  5. defaultOnError,
  6. CompilerError
  7. } from './errors'
  8. import {
  9. assert,
  10. advancePositionWithMutation,
  11. advancePositionWithClone
  12. } from './utils'
  13. import {
  14. Namespace,
  15. Namespaces,
  16. AttributeNode,
  17. CommentNode,
  18. DirectiveNode,
  19. ElementNode,
  20. ElementTypes,
  21. ExpressionNode,
  22. NodeTypes,
  23. Position,
  24. RootNode,
  25. SourceLocation,
  26. TextNode,
  27. TemplateChildNode,
  28. InterpolationNode
  29. } from './ast'
  30. import { extend } from '@vue/shared'
  31. export interface ParserOptions {
  32. isVoidTag?: (tag: string) => boolean // e.g. img, br, hr
  33. isNativeTag?: (tag: string) => boolean // e.g. loading-indicator in weex
  34. getNamespace?: (tag: string, parent: ElementNode | undefined) => Namespace
  35. getTextMode?: (tag: string, ns: Namespace) => TextModes
  36. delimiters?: [string, string] // ['{{', '}}']
  37. ignoreSpaces?: boolean
  38. // Map to HTML entities. E.g., `{ "amp;": "&" }`
  39. // The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
  40. namedCharacterReferences?: { [name: string]: string | undefined }
  41. onError?: (error: CompilerError) => void
  42. }
  43. // `isNativeTag` is optional, others are required
  44. type MergedParserOptions = Pick<
  45. Required<ParserOptions>,
  46. Exclude<keyof ParserOptions, 'isNativeTag'>
  47. > &
  48. Pick<ParserOptions, 'isNativeTag'>
  49. export const defaultParserOptions: MergedParserOptions = {
  50. delimiters: [`{{`, `}}`],
  51. ignoreSpaces: true,
  52. getNamespace: () => Namespaces.HTML,
  53. getTextMode: () => TextModes.DATA,
  54. isVoidTag: NO,
  55. namedCharacterReferences: {
  56. 'gt;': '>',
  57. 'lt;': '<',
  58. 'amp;': '&',
  59. 'apos;': "'",
  60. 'quot;': '"'
  61. },
  62. onError: defaultOnError
  63. }
  64. export const enum TextModes {
  65. // | Elements | Entities | End sign | Inside of
  66. DATA, // | ✔ | ✔ | End tags of ancestors |
  67. RCDATA, // | ✘ | ✔ | End tag of the parent | <textarea>
  68. RAWTEXT, // | ✘ | ✘ | End tag of the parent | <style>,<script>
  69. CDATA,
  70. ATTRIBUTE_VALUE
  71. }
  72. interface ParserContext {
  73. options: MergedParserOptions
  74. readonly originalSource: string
  75. source: string
  76. offset: number
  77. line: number
  78. column: number
  79. maxCRNameLength: number
  80. inPre: boolean
  81. }
  82. export function parse(content: string, options: ParserOptions = {}): RootNode {
  83. const context = createParserContext(content, options)
  84. const start = getCursor(context)
  85. return {
  86. type: NodeTypes.ROOT,
  87. children: parseChildren(context, TextModes.DATA, []),
  88. helpers: [],
  89. components: [],
  90. directives: [],
  91. hoists: [],
  92. codegenNode: undefined,
  93. loc: getSelection(context, start)
  94. }
  95. }
  96. function createParserContext(
  97. content: string,
  98. options: ParserOptions
  99. ): ParserContext {
  100. return {
  101. options: {
  102. ...defaultParserOptions,
  103. ...options
  104. },
  105. column: 1,
  106. line: 1,
  107. offset: 0,
  108. originalSource: content,
  109. source: content,
  110. maxCRNameLength: Object.keys(
  111. options.namedCharacterReferences ||
  112. defaultParserOptions.namedCharacterReferences
  113. ).reduce((max, name) => Math.max(max, name.length), 0),
  114. inPre: false
  115. }
  116. }
  117. function parseChildren(
  118. context: ParserContext,
  119. mode: TextModes,
  120. ancestors: ElementNode[]
  121. ): TemplateChildNode[] {
  122. const parent = last(ancestors)
  123. const ns = parent ? parent.ns : Namespaces.HTML
  124. const nodes: TemplateChildNode[] = []
  125. while (!isEnd(context, mode, ancestors)) {
  126. __DEV__ && assert(context.source.length > 0)
  127. const s = context.source
  128. let node: TemplateChildNode | TemplateChildNode[] | undefined = undefined
  129. if (!context.inPre && startsWith(s, context.options.delimiters[0])) {
  130. // '{{'
  131. node = parseInterpolation(context, mode)
  132. } else if (mode === TextModes.DATA && s[0] === '<') {
  133. // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
  134. if (s.length === 1) {
  135. emitError(context, ErrorCodes.EOF_BEFORE_TAG_NAME, 1)
  136. } else if (s[1] === '!') {
  137. // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
  138. if (startsWith(s, '<!--')) {
  139. node = parseComment(context)
  140. } else if (startsWith(s, '<!DOCTYPE')) {
  141. // Ignore DOCTYPE by a limitation.
  142. node = parseBogusComment(context)
  143. } else if (startsWith(s, '<![CDATA[')) {
  144. if (ns !== Namespaces.HTML) {
  145. node = parseCDATA(context, ancestors)
  146. } else {
  147. emitError(context, ErrorCodes.CDATA_IN_HTML_CONTENT)
  148. node = parseBogusComment(context)
  149. }
  150. } else {
  151. emitError(context, ErrorCodes.INCORRECTLY_OPENED_COMMENT)
  152. node = parseBogusComment(context)
  153. }
  154. } else if (s[1] === '/') {
  155. // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
  156. if (s.length === 2) {
  157. emitError(context, ErrorCodes.EOF_BEFORE_TAG_NAME, 2)
  158. } else if (s[2] === '>') {
  159. emitError(context, ErrorCodes.MISSING_END_TAG_NAME, 2)
  160. advanceBy(context, 3)
  161. continue
  162. } else if (/[a-z]/i.test(s[2])) {
  163. emitError(context, ErrorCodes.X_INVALID_END_TAG)
  164. parseTag(context, TagType.End, parent)
  165. continue
  166. } else {
  167. emitError(context, ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME, 2)
  168. node = parseBogusComment(context)
  169. }
  170. } else if (/[a-z]/i.test(s[1])) {
  171. node = parseElement(context, ancestors)
  172. } else if (s[1] === '?') {
  173. emitError(
  174. context,
  175. ErrorCodes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
  176. 1
  177. )
  178. node = parseBogusComment(context)
  179. } else {
  180. emitError(context, ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME, 1)
  181. }
  182. }
  183. if (!node) {
  184. node = parseText(context, mode)
  185. }
  186. if (Array.isArray(node)) {
  187. for (let i = 0; i < node.length; i++) {
  188. pushNode(context, nodes, node[i])
  189. }
  190. } else {
  191. pushNode(context, nodes, node)
  192. }
  193. }
  194. return nodes
  195. }
  196. function pushNode(
  197. context: ParserContext,
  198. nodes: TemplateChildNode[],
  199. node: TemplateChildNode
  200. ): void {
  201. // ignore comments in production
  202. /* istanbul ignore next */
  203. if (!__DEV__ && node.type === NodeTypes.COMMENT) {
  204. return
  205. }
  206. if (
  207. context.options.ignoreSpaces &&
  208. node.type === NodeTypes.TEXT &&
  209. node.isEmpty
  210. ) {
  211. return
  212. }
  213. // Merge if both this and the previous node are text and those are consecutive.
  214. // This happens on "a < b" or something like.
  215. const prev = last(nodes)
  216. if (
  217. prev &&
  218. prev.type === NodeTypes.TEXT &&
  219. node.type === NodeTypes.TEXT &&
  220. prev.loc.end.offset === node.loc.start.offset
  221. ) {
  222. prev.content += node.content
  223. prev.isEmpty = prev.content.trim().length === 0
  224. prev.loc.end = node.loc.end
  225. prev.loc.source += node.loc.source
  226. } else {
  227. nodes.push(node)
  228. }
  229. }
  230. function parseCDATA(
  231. context: ParserContext,
  232. ancestors: ElementNode[]
  233. ): TemplateChildNode[] {
  234. __DEV__ &&
  235. assert(last(ancestors) == null || last(ancestors)!.ns !== Namespaces.HTML)
  236. __DEV__ && assert(startsWith(context.source, '<![CDATA['))
  237. advanceBy(context, 9)
  238. const nodes = parseChildren(context, TextModes.CDATA, ancestors)
  239. if (context.source.length === 0) {
  240. emitError(context, ErrorCodes.EOF_IN_CDATA)
  241. } else {
  242. __DEV__ && assert(startsWith(context.source, ']]>'))
  243. advanceBy(context, 3)
  244. }
  245. return nodes
  246. }
  247. function parseComment(context: ParserContext): CommentNode {
  248. __DEV__ && assert(startsWith(context.source, '<!--'))
  249. const start = getCursor(context)
  250. let content: string
  251. // Regular comment.
  252. const match = /--(\!)?>/.exec(context.source)
  253. if (!match) {
  254. content = context.source.slice(4)
  255. advanceBy(context, context.source.length)
  256. emitError(context, ErrorCodes.EOF_IN_COMMENT)
  257. } else {
  258. if (match.index <= 3) {
  259. emitError(context, ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT)
  260. }
  261. if (match[1]) {
  262. emitError(context, ErrorCodes.INCORRECTLY_CLOSED_COMMENT)
  263. }
  264. content = context.source.slice(4, match.index)
  265. // Advancing with reporting nested comments.
  266. const s = context.source.slice(0, match.index)
  267. let prevIndex = 1,
  268. nestedIndex = 0
  269. while ((nestedIndex = s.indexOf('<!--', prevIndex)) !== -1) {
  270. advanceBy(context, nestedIndex - prevIndex + 1)
  271. if (nestedIndex + 4 < s.length) {
  272. emitError(context, ErrorCodes.NESTED_COMMENT)
  273. }
  274. prevIndex = nestedIndex + 1
  275. }
  276. advanceBy(context, match.index + match[0].length - prevIndex + 1)
  277. }
  278. return {
  279. type: NodeTypes.COMMENT,
  280. content,
  281. loc: getSelection(context, start)
  282. }
  283. }
  284. function parseBogusComment(context: ParserContext): CommentNode | undefined {
  285. __DEV__ && assert(/^<(?:[\!\?]|\/[^a-z>])/i.test(context.source))
  286. const start = getCursor(context)
  287. const contentStart = context.source[1] === '?' ? 1 : 2
  288. let content: string
  289. const closeIndex = context.source.indexOf('>')
  290. if (closeIndex === -1) {
  291. content = context.source.slice(contentStart)
  292. advanceBy(context, context.source.length)
  293. } else {
  294. content = context.source.slice(contentStart, closeIndex)
  295. advanceBy(context, closeIndex + 1)
  296. }
  297. return {
  298. type: NodeTypes.COMMENT,
  299. content,
  300. loc: getSelection(context, start)
  301. }
  302. }
  303. function parseElement(
  304. context: ParserContext,
  305. ancestors: ElementNode[]
  306. ): ElementNode | undefined {
  307. __DEV__ && assert(/^<[a-z]/i.test(context.source))
  308. // Start tag.
  309. const wasInPre = context.inPre
  310. const parent = last(ancestors)
  311. const element = parseTag(context, TagType.Start, parent)
  312. const isPreBoundary = context.inPre && !wasInPre
  313. if (element.isSelfClosing || context.options.isVoidTag(element.tag)) {
  314. return element
  315. }
  316. // Children.
  317. ancestors.push(element)
  318. const mode = context.options.getTextMode(element.tag, element.ns)
  319. const children = parseChildren(context, mode, ancestors)
  320. ancestors.pop()
  321. element.children = children
  322. // End tag.
  323. if (startsWithEndTagOpen(context.source, element.tag)) {
  324. parseTag(context, TagType.End, parent)
  325. } else {
  326. emitError(context, ErrorCodes.X_MISSING_END_TAG)
  327. if (context.source.length === 0 && element.tag.toLowerCase() === 'script') {
  328. const first = children[0]
  329. if (first && startsWith(first.loc.source, '<!--')) {
  330. emitError(context, ErrorCodes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT)
  331. }
  332. }
  333. }
  334. element.loc = getSelection(context, element.loc.start)
  335. if (isPreBoundary) {
  336. context.inPre = false
  337. }
  338. return element
  339. }
  340. const enum TagType {
  341. Start,
  342. End
  343. }
  344. /**
  345. * Parse a tag (E.g. `<div id=a>`) with that type (start tag or end tag).
  346. */
  347. function parseTag(
  348. context: ParserContext,
  349. type: TagType,
  350. parent: ElementNode | undefined
  351. ): ElementNode {
  352. __DEV__ && assert(/^<\/?[a-z]/i.test(context.source))
  353. __DEV__ &&
  354. assert(
  355. type === (startsWith(context.source, '</') ? TagType.End : TagType.Start)
  356. )
  357. // Tag open.
  358. const start = getCursor(context)
  359. const match = /^<\/?([a-z][^\t\r\n\f />]*)/i.exec(context.source)!
  360. const tag = match[1]
  361. const ns = context.options.getNamespace(tag, parent)
  362. advanceBy(context, match[0].length)
  363. advanceSpaces(context)
  364. // save current state in case we need to re-parse attributes with v-pre
  365. const cursor = getCursor(context)
  366. const currentSource = context.source
  367. // Attributes.
  368. let props = parseAttributes(context, type)
  369. // check v-pre
  370. if (
  371. !context.inPre &&
  372. props.some(p => p.type === NodeTypes.DIRECTIVE && p.name === 'pre')
  373. ) {
  374. context.inPre = true
  375. // reset context
  376. extend(context, cursor)
  377. context.source = currentSource
  378. // re-parse attrs and filter out v-pre itself
  379. props = parseAttributes(context, type).filter(p => p.name !== 'v-pre')
  380. }
  381. // Tag close.
  382. let isSelfClosing = false
  383. if (context.source.length === 0) {
  384. emitError(context, ErrorCodes.EOF_IN_TAG)
  385. } else {
  386. isSelfClosing = startsWith(context.source, '/>')
  387. if (type === TagType.End && isSelfClosing) {
  388. emitError(context, ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS)
  389. }
  390. advanceBy(context, isSelfClosing ? 2 : 1)
  391. }
  392. let tagType = ElementTypes.ELEMENT
  393. if (!context.inPre) {
  394. if (context.options.isNativeTag) {
  395. if (!context.options.isNativeTag(tag)) tagType = ElementTypes.COMPONENT
  396. } else {
  397. if (/^[A-Z]/.test(tag)) tagType = ElementTypes.COMPONENT
  398. }
  399. if (tag === 'slot') tagType = ElementTypes.SLOT
  400. else if (tag === 'template') tagType = ElementTypes.TEMPLATE
  401. }
  402. return {
  403. type: NodeTypes.ELEMENT,
  404. ns,
  405. tag,
  406. tagType,
  407. props,
  408. isSelfClosing,
  409. children: [],
  410. loc: getSelection(context, start),
  411. codegenNode: undefined // to be created during transform phase
  412. }
  413. }
  414. function parseAttributes(
  415. context: ParserContext,
  416. type: TagType
  417. ): (AttributeNode | DirectiveNode)[] {
  418. const props = []
  419. const attributeNames = new Set<string>()
  420. while (
  421. context.source.length > 0 &&
  422. !startsWith(context.source, '>') &&
  423. !startsWith(context.source, '/>')
  424. ) {
  425. if (startsWith(context.source, '/')) {
  426. emitError(context, ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG)
  427. advanceBy(context, 1)
  428. advanceSpaces(context)
  429. continue
  430. }
  431. if (type === TagType.End) {
  432. emitError(context, ErrorCodes.END_TAG_WITH_ATTRIBUTES)
  433. }
  434. const attr = parseAttribute(context, attributeNames)
  435. if (type === TagType.Start) {
  436. props.push(attr)
  437. }
  438. if (/^[^\t\r\n\f />]/.test(context.source)) {
  439. emitError(context, ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES)
  440. }
  441. advanceSpaces(context)
  442. }
  443. return props
  444. }
  445. function parseAttribute(
  446. context: ParserContext,
  447. nameSet: Set<string>
  448. ): AttributeNode | DirectiveNode {
  449. __DEV__ && assert(/^[^\t\r\n\f />]/.test(context.source))
  450. // Name.
  451. const start = getCursor(context)
  452. const match = /^[^\t\r\n\f />][^\t\r\n\f />=]*/.exec(context.source)!
  453. const name = match[0]
  454. if (nameSet.has(name)) {
  455. emitError(context, ErrorCodes.DUPLICATE_ATTRIBUTE)
  456. }
  457. nameSet.add(name)
  458. if (name[0] === '=') {
  459. emitError(context, ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME)
  460. }
  461. {
  462. const pattern = /["'<]/g
  463. let m: RegExpExecArray | null
  464. while ((m = pattern.exec(name)) !== null) {
  465. emitError(
  466. context,
  467. ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
  468. m.index
  469. )
  470. }
  471. }
  472. advanceBy(context, name.length)
  473. // Value
  474. let value:
  475. | {
  476. content: string
  477. isQuoted: boolean
  478. loc: SourceLocation
  479. }
  480. | undefined = undefined
  481. if (/^[\t\r\n\f ]*=/.test(context.source)) {
  482. advanceSpaces(context)
  483. advanceBy(context, 1)
  484. advanceSpaces(context)
  485. value = parseAttributeValue(context)
  486. if (!value) {
  487. emitError(context, ErrorCodes.MISSING_ATTRIBUTE_VALUE)
  488. }
  489. }
  490. const loc = getSelection(context, start)
  491. if (!context.inPre && /^(v-|:|@|#)/.test(name)) {
  492. const match = /(?:^v-([a-z0-9-]+))?(?:(?::|^@|^#)([^\.]+))?(.+)?$/i.exec(
  493. name
  494. )!
  495. let arg: ExpressionNode | undefined
  496. if (match[2]) {
  497. const startOffset = name.split(match[2], 2)!.shift()!.length
  498. const loc = getSelection(
  499. context,
  500. getNewPosition(context, start, startOffset),
  501. getNewPosition(context, start, startOffset + match[2].length)
  502. )
  503. let content = match[2]
  504. let isStatic = true
  505. if (content.startsWith('[')) {
  506. isStatic = false
  507. if (!content.endsWith(']')) {
  508. emitError(
  509. context,
  510. ErrorCodes.X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
  511. )
  512. }
  513. content = content.substr(1, content.length - 2)
  514. }
  515. arg = {
  516. type: NodeTypes.SIMPLE_EXPRESSION,
  517. content,
  518. isStatic,
  519. loc
  520. }
  521. }
  522. if (value && value.isQuoted) {
  523. const valueLoc = value.loc
  524. valueLoc.start.offset++
  525. valueLoc.start.column++
  526. valueLoc.end = advancePositionWithClone(valueLoc.start, value.content)
  527. valueLoc.source = valueLoc.source.slice(1, -1)
  528. }
  529. return {
  530. type: NodeTypes.DIRECTIVE,
  531. name:
  532. match[1] ||
  533. (startsWith(name, ':')
  534. ? 'bind'
  535. : startsWith(name, '@')
  536. ? 'on'
  537. : 'slot'),
  538. exp: value && {
  539. type: NodeTypes.SIMPLE_EXPRESSION,
  540. content: value.content,
  541. isStatic: false,
  542. loc: value.loc
  543. },
  544. arg,
  545. modifiers: match[3] ? match[3].substr(1).split('.') : [],
  546. loc
  547. }
  548. }
  549. return {
  550. type: NodeTypes.ATTRIBUTE,
  551. name,
  552. value: value && {
  553. type: NodeTypes.TEXT,
  554. content: value.content,
  555. isEmpty: value.content.trim().length === 0,
  556. loc: value.loc
  557. },
  558. loc
  559. }
  560. }
  561. function parseAttributeValue(
  562. context: ParserContext
  563. ):
  564. | {
  565. content: string
  566. isQuoted: boolean
  567. loc: SourceLocation
  568. }
  569. | undefined {
  570. const start = getCursor(context)
  571. let content: string
  572. const quote = context.source[0]
  573. const isQuoted = quote === `"` || quote === `'`
  574. if (isQuoted) {
  575. // Quoted value.
  576. advanceBy(context, 1)
  577. const endIndex = context.source.indexOf(quote)
  578. if (endIndex === -1) {
  579. content = parseTextData(
  580. context,
  581. context.source.length,
  582. TextModes.ATTRIBUTE_VALUE
  583. )
  584. } else {
  585. content = parseTextData(context, endIndex, TextModes.ATTRIBUTE_VALUE)
  586. advanceBy(context, 1)
  587. }
  588. } else {
  589. // Unquoted
  590. const match = /^[^\t\r\n\f >]+/.exec(context.source)
  591. if (!match) {
  592. return undefined
  593. }
  594. let unexpectedChars = /["'<=`]/g
  595. let m: RegExpExecArray | null
  596. while ((m = unexpectedChars.exec(match[0])) !== null) {
  597. emitError(
  598. context,
  599. ErrorCodes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
  600. m.index
  601. )
  602. }
  603. content = parseTextData(context, match[0].length, TextModes.ATTRIBUTE_VALUE)
  604. }
  605. return { content, isQuoted, loc: getSelection(context, start) }
  606. }
  607. function parseInterpolation(
  608. context: ParserContext,
  609. mode: TextModes
  610. ): InterpolationNode | undefined {
  611. const [open, close] = context.options.delimiters
  612. __DEV__ && assert(startsWith(context.source, open))
  613. const closeIndex = context.source.indexOf(close, open.length)
  614. if (closeIndex === -1) {
  615. emitError(context, ErrorCodes.X_MISSING_INTERPOLATION_END)
  616. return undefined
  617. }
  618. const start = getCursor(context)
  619. advanceBy(context, open.length)
  620. const innerStart = getCursor(context)
  621. const innerEnd = getCursor(context)
  622. const rawContentLength = closeIndex - open.length
  623. const rawContent = context.source.slice(0, rawContentLength)
  624. const preTrimContent = parseTextData(context, rawContentLength, mode)
  625. const content = preTrimContent.trim()
  626. const startOffset = preTrimContent.indexOf(content)
  627. if (startOffset > 0) {
  628. advancePositionWithMutation(innerStart, rawContent, startOffset)
  629. }
  630. const endOffset =
  631. rawContentLength - (preTrimContent.length - content.length - startOffset)
  632. advancePositionWithMutation(innerEnd, rawContent, endOffset)
  633. advanceBy(context, close.length)
  634. return {
  635. type: NodeTypes.INTERPOLATION,
  636. content: {
  637. type: NodeTypes.SIMPLE_EXPRESSION,
  638. isStatic: false,
  639. content,
  640. loc: getSelection(context, innerStart, innerEnd)
  641. },
  642. loc: getSelection(context, start)
  643. }
  644. }
  645. function parseText(context: ParserContext, mode: TextModes): TextNode {
  646. __DEV__ && assert(context.source.length > 0)
  647. const [open] = context.options.delimiters
  648. const endIndex = Math.min(
  649. ...[
  650. context.source.indexOf('<', 1),
  651. context.source.indexOf(open, 1),
  652. mode === TextModes.CDATA ? context.source.indexOf(']]>') : -1,
  653. context.source.length
  654. ].filter(n => n !== -1)
  655. )
  656. __DEV__ && assert(endIndex > 0)
  657. const start = getCursor(context)
  658. const content = parseTextData(context, endIndex, mode)
  659. return {
  660. type: NodeTypes.TEXT,
  661. content,
  662. loc: getSelection(context, start),
  663. isEmpty: !content.trim()
  664. }
  665. }
  666. /**
  667. * Get text data with a given length from the current location.
  668. * This translates HTML entities in the text data.
  669. */
  670. function parseTextData(
  671. context: ParserContext,
  672. length: number,
  673. mode: TextModes
  674. ): string {
  675. if (mode === TextModes.RAWTEXT || mode === TextModes.CDATA) {
  676. const text = context.source.slice(0, length)
  677. advanceBy(context, length)
  678. return text
  679. }
  680. // DATA or RCDATA. Entity decoding required.
  681. const end = context.offset + length
  682. let text: string = ''
  683. while (context.offset < end) {
  684. const head = /&(?:#x?)?/i.exec(context.source)
  685. if (!head || context.offset + head.index >= end) {
  686. const remaining = end - context.offset
  687. text += context.source.slice(0, remaining)
  688. advanceBy(context, remaining)
  689. break
  690. }
  691. // Advance to the "&".
  692. text += context.source.slice(0, head.index)
  693. advanceBy(context, head.index)
  694. if (head[0] === '&') {
  695. // Named character reference.
  696. let name = '',
  697. value: string | undefined = undefined
  698. if (/[0-9a-z]/i.test(context.source[1])) {
  699. for (
  700. let length = context.maxCRNameLength;
  701. !value && length > 0;
  702. --length
  703. ) {
  704. name = context.source.substr(1, length)
  705. value = context.options.namedCharacterReferences[name]
  706. }
  707. if (value) {
  708. const semi = name.endsWith(';')
  709. if (
  710. mode === TextModes.ATTRIBUTE_VALUE &&
  711. !semi &&
  712. /[=a-z0-9]/i.test(context.source[1 + name.length] || '')
  713. ) {
  714. text += '&'
  715. text += name
  716. advanceBy(context, 1 + name.length)
  717. } else {
  718. text += value
  719. advanceBy(context, 1 + name.length)
  720. if (!semi) {
  721. emitError(
  722. context,
  723. ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
  724. )
  725. }
  726. }
  727. } else {
  728. emitError(context, ErrorCodes.UNKNOWN_NAMED_CHARACTER_REFERENCE)
  729. text += '&'
  730. text += name
  731. advanceBy(context, 1 + name.length)
  732. }
  733. } else {
  734. text += '&'
  735. advanceBy(context, 1)
  736. }
  737. } else {
  738. // Numeric character reference.
  739. const hex = head[0] === '&#x'
  740. const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
  741. const body = pattern.exec(context.source)
  742. if (!body) {
  743. text += head[0]
  744. emitError(
  745. context,
  746. ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
  747. )
  748. advanceBy(context, head[0].length)
  749. } else {
  750. // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
  751. let cp = Number.parseInt(body[1], hex ? 16 : 10)
  752. if (cp === 0) {
  753. emitError(context, ErrorCodes.NULL_CHARACTER_REFERENCE)
  754. cp = 0xfffd
  755. } else if (cp > 0x10ffff) {
  756. emitError(
  757. context,
  758. ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE
  759. )
  760. cp = 0xfffd
  761. } else if (cp >= 0xd800 && cp <= 0xdfff) {
  762. emitError(context, ErrorCodes.SURROGATE_CHARACTER_REFERENCE)
  763. cp = 0xfffd
  764. } else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
  765. emitError(context, ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE)
  766. } else if (
  767. (cp >= 0x01 && cp <= 0x08) ||
  768. cp === 0x0b ||
  769. (cp >= 0x0d && cp <= 0x1f) ||
  770. (cp >= 0x7f && cp <= 0x9f)
  771. ) {
  772. emitError(context, ErrorCodes.CONTROL_CHARACTER_REFERENCE)
  773. cp = CCR_REPLACEMENTS[cp] || cp
  774. }
  775. text += String.fromCodePoint(cp)
  776. advanceBy(context, body[0].length)
  777. if (!body![0].endsWith(';')) {
  778. emitError(
  779. context,
  780. ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
  781. )
  782. }
  783. }
  784. }
  785. }
  786. return text
  787. }
  788. function getCursor(context: ParserContext): Position {
  789. const { column, line, offset } = context
  790. return { column, line, offset }
  791. }
  792. function getSelection(
  793. context: ParserContext,
  794. start: Position,
  795. end?: Position
  796. ): SourceLocation {
  797. end = end || getCursor(context)
  798. return {
  799. start,
  800. end,
  801. source: context.originalSource.slice(start.offset, end.offset)
  802. }
  803. }
  804. function last<T>(xs: T[]): T | undefined {
  805. return xs[xs.length - 1]
  806. }
  807. function startsWith(source: string, searchString: string): boolean {
  808. return source.startsWith(searchString)
  809. }
  810. function advanceBy(context: ParserContext, numberOfCharacters: number): void {
  811. const { source } = context
  812. __DEV__ && assert(numberOfCharacters <= source.length)
  813. advancePositionWithMutation(context, source, numberOfCharacters)
  814. context.source = source.slice(numberOfCharacters)
  815. }
  816. function advanceSpaces(context: ParserContext): void {
  817. const match = /^[\t\r\n\f ]+/.exec(context.source)
  818. if (match) {
  819. advanceBy(context, match[0].length)
  820. }
  821. }
  822. function getNewPosition(
  823. context: ParserContext,
  824. start: Position,
  825. numberOfCharacters: number
  826. ): Position {
  827. return advancePositionWithClone(
  828. start,
  829. context.originalSource.slice(start.offset, numberOfCharacters),
  830. numberOfCharacters
  831. )
  832. }
  833. function emitError(
  834. context: ParserContext,
  835. code: ErrorCodes,
  836. offset?: number
  837. ): void {
  838. const loc = getCursor(context)
  839. if (offset) {
  840. loc.offset += offset
  841. loc.column += offset
  842. }
  843. context.options.onError(
  844. createCompilerError(code, {
  845. start: loc,
  846. end: loc,
  847. source: ''
  848. })
  849. )
  850. }
  851. function isEnd(
  852. context: ParserContext,
  853. mode: TextModes,
  854. ancestors: ElementNode[]
  855. ): boolean {
  856. const s = context.source
  857. switch (mode) {
  858. case TextModes.DATA:
  859. if (startsWith(s, '</')) {
  860. //TODO: probably bad performance
  861. for (let i = ancestors.length - 1; i >= 0; --i) {
  862. if (startsWithEndTagOpen(s, ancestors[i].tag)) {
  863. return true
  864. }
  865. }
  866. }
  867. break
  868. case TextModes.RCDATA:
  869. case TextModes.RAWTEXT: {
  870. const parent = last(ancestors)
  871. if (parent && startsWithEndTagOpen(s, parent.tag)) {
  872. return true
  873. }
  874. break
  875. }
  876. case TextModes.CDATA:
  877. if (startsWith(s, ']]>')) {
  878. return true
  879. }
  880. break
  881. }
  882. return !s
  883. }
  884. function startsWithEndTagOpen(source: string, tag: string): boolean {
  885. return (
  886. startsWith(source, '</') &&
  887. source.substr(2, tag.length).toLowerCase() === tag.toLowerCase() &&
  888. /[\t\n\f />]/.test(source[2 + tag.length] || '>')
  889. )
  890. }
  891. // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
  892. const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
  893. 0x80: 0x20ac,
  894. 0x82: 0x201a,
  895. 0x83: 0x0192,
  896. 0x84: 0x201e,
  897. 0x85: 0x2026,
  898. 0x86: 0x2020,
  899. 0x87: 0x2021,
  900. 0x88: 0x02c6,
  901. 0x89: 0x2030,
  902. 0x8a: 0x0160,
  903. 0x8b: 0x2039,
  904. 0x8c: 0x0152,
  905. 0x8e: 0x017d,
  906. 0x91: 0x2018,
  907. 0x92: 0x2019,
  908. 0x93: 0x201c,
  909. 0x94: 0x201d,
  910. 0x95: 0x2022,
  911. 0x96: 0x2013,
  912. 0x97: 0x2014,
  913. 0x98: 0x02dc,
  914. 0x99: 0x2122,
  915. 0x9a: 0x0161,
  916. 0x9b: 0x203a,
  917. 0x9c: 0x0153,
  918. 0x9e: 0x017e,
  919. 0x9f: 0x0178
  920. }