| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274 |
- /**
- * Not type-checking this file because it's mostly vendor code.
- */
- /*!
- * HTML Parser By John Resig (ejohn.org)
- * Modified by Juriy "kangax" Zaytsev
- * Original code by Erik Arvidsson, Mozilla Public License
- * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
- */
- import { makeMap, no } from 'shared/util'
- import { isNonPhrasingTag, canBeLeftOpenTag } from 'web/util/index'
- // Regular Expressions for parsing tags and attributes
- const singleAttrIdentifier = /([^\s"'<>\/=]+)/
- const singleAttrAssign = /(?:=)/
- const singleAttrValues = [
- // attr value double quotes
- /"([^"]*)"+/.source,
- // attr value, single quotes
- /'([^']*)'+/.source,
- // attr value, no quotes
- /([^\s"'=<>`]+)/.source
- ]
- const attribute = new RegExp(
- '^\\s*' + singleAttrIdentifier.source +
- '(?:\\s*(' + singleAttrAssign.source + ')' +
- '\\s*(?:' + singleAttrValues.join('|') + '))?'
- )
- // could use https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
- // but for Vue templates we can enforce a simple charset
- const ncname = '[a-zA-Z_][\\w\\-\\.]*'
- const qnameCapture = '((?:' + ncname + '\\:)?' + ncname + ')'
- const startTagOpen = new RegExp('^<' + qnameCapture)
- const startTagClose = /^\s*(\/?)>/
- const endTag = new RegExp('^<\\/' + qnameCapture + '[^>]*>')
- const doctype = /^<!DOCTYPE [^>]+>/i
- let IS_REGEX_CAPTURING_BROKEN = false
- 'x'.replace(/x(.)?/g, function (m, g) {
- IS_REGEX_CAPTURING_BROKEN = g === ''
- })
- // Special Elements (can contain anything)
- const isSpecialTag = makeMap('script,style', true)
- const reCache = {}
- const ampRE = /&/g
- const ltRE = /</g
- const gtRE = />/g
- function decodeAttr (value, shouldDecodeTags) {
- if (shouldDecodeTags) {
- value = value.replace(ltRE, '<').replace(gtRE, '>')
- }
- return value.replace(ampRE, '&')
- }
- export function parseHTML (html, options) {
- const stack = []
- const expectHTML = options.expectHTML
- const isUnaryTag = options.isUnaryTag || no
- const isFromDOM = options.isFromDOM
- const shouldDecodeTags = options.shouldDecodeTags
- let index = 0
- let last, lastTag
- while (html) {
- last = html
- // Make sure we're not in a script or style element
- if (!lastTag || !isSpecialTag(lastTag)) {
- const textEnd = html.indexOf('<')
- if (textEnd === 0) {
- // Comment:
- if (/^<!--/.test(html)) {
- const commentEnd = html.indexOf('-->')
- if (commentEnd >= 0) {
- advance(commentEnd + 3)
- continue
- }
- }
- // http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
- if (/^<!\[/.test(html)) {
- const conditionalEnd = html.indexOf(']>')
- if (conditionalEnd >= 0) {
- advance(conditionalEnd + 2)
- continue
- }
- }
- // Doctype:
- const doctypeMatch = html.match(doctype)
- if (doctypeMatch) {
- advance(doctypeMatch[0].length)
- continue
- }
- // End tag:
- const endTagMatch = html.match(endTag)
- if (endTagMatch) {
- const curIndex = index
- advance(endTagMatch[0].length)
- parseEndTag(endTagMatch[0], endTagMatch[1], curIndex, index)
- continue
- }
- // Start tag:
- const startTagMatch = parseStartTag()
- if (startTagMatch) {
- handleStartTag(startTagMatch)
- continue
- }
- }
- let text
- if (textEnd >= 0) {
- text = html.substring(0, textEnd)
- advance(textEnd)
- } else {
- text = html
- html = ''
- }
- if (options.chars) {
- options.chars(text)
- }
- } else {
- const stackedTag = lastTag.toLowerCase()
- const reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)(</' + stackedTag + '[^>]*>)', 'i'))
- let endTagLength = 0
- const rest = html.replace(reStackedTag, function (all, text, endTag) {
- endTagLength = endTag.length
- if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
- text = text
- .replace(/<!--([\s\S]*?)-->/g, '$1')
- .replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1')
- }
- if (options.chars) {
- options.chars(text)
- }
- return ''
- })
- index += html.length - rest.length
- html = rest
- parseEndTag('</' + stackedTag + '>', stackedTag, index - endTagLength, index)
- }
- if (html === last) {
- throw new Error('Error parsing template:\n\n' + html)
- }
- }
- // Clean up any remaining tags
- parseEndTag()
- function advance (n) {
- index += n
- html = html.substring(n)
- }
- function parseStartTag () {
- const start = html.match(startTagOpen)
- if (start) {
- const match = {
- tagName: start[1],
- attrs: [],
- start: index
- }
- advance(start[0].length)
- let end, attr
- while (!(end = html.match(startTagClose)) && (attr = html.match(attribute))) {
- advance(attr[0].length)
- match.attrs.push(attr)
- }
- if (end) {
- match.unarySlash = end[1]
- advance(end[0].length)
- match.end = index
- return match
- }
- }
- }
- function handleStartTag (match) {
- const tagName = match.tagName
- let unarySlash = match.unarySlash
- if (expectHTML) {
- if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
- parseEndTag('', lastTag)
- }
- if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
- parseEndTag('', tagName)
- }
- }
- const unary = isUnaryTag(tagName) || tagName === 'html' && lastTag === 'head' || !!unarySlash
- const l = match.attrs.length
- const attrs = new Array(l)
- for (let i = 0; i < l; i++) {
- const args = match.attrs[i]
- // hackish work around FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
- if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
- if (args[3] === '') { delete args[3] }
- if (args[4] === '') { delete args[4] }
- if (args[5] === '') { delete args[5] }
- }
- const value = args[3] || args[4] || args[5] || ''
- attrs[i] = {
- name: args[1],
- value: isFromDOM ? decodeAttr(value, shouldDecodeTags) : value
- }
- }
- if (!unary) {
- stack.push({ tag: tagName, attrs: attrs })
- lastTag = tagName
- unarySlash = ''
- }
- if (options.start) {
- options.start(tagName, attrs, unary, match.start, match.end)
- }
- }
- function parseEndTag (tag, tagName, start, end) {
- let pos
- if (start == null) start = index
- if (end == null) end = index
- // Find the closest opened tag of the same type
- if (tagName) {
- const needle = tagName.toLowerCase()
- for (pos = stack.length - 1; pos >= 0; pos--) {
- if (stack[pos].tag.toLowerCase() === needle) {
- break
- }
- }
- } else {
- // If no tag name is provided, clean shop
- pos = 0
- }
- if (pos >= 0) {
- // Close all the open elements, up the stack
- for (let i = stack.length - 1; i >= pos; i--) {
- if (options.end) {
- options.end(stack[i].tag, start, end)
- }
- }
- // Remove the open elements from the stack
- stack.length = pos
- lastTag = pos && stack[pos - 1].tag
- } else if (tagName.toLowerCase() === 'br') {
- if (options.start) {
- options.start(tagName, [], true, start, end)
- }
- } else if (tagName.toLowerCase() === 'p') {
- if (options.start) {
- options.start(tagName, [], false, start, end)
- }
- if (options.end) {
- options.end(tagName, start, end)
- }
- }
- }
- }
|