vendor/github.com/russross/blackfriday/v2/inline.go
changeset 256 6d9efbef00a9
parent 251 1c52a0eeb952
child 260 445e01aede7e
equal deleted inserted replaced
255:4f153a23adab 256:6d9efbef00a9
       
     1 //
       
     2 // Blackfriday Markdown Processor
       
     3 // Available at http://github.com/russross/blackfriday
       
     4 //
       
     5 // Copyright © 2011 Russ Ross <russ@russross.com>.
       
     6 // Distributed under the Simplified BSD License.
       
     7 // See README.md for details.
       
     8 //
       
     9 
       
    10 //
       
    11 // Functions to parse inline elements.
       
    12 //
       
    13 
       
    14 package blackfriday
       
    15 
       
    16 import (
       
    17 	"bytes"
       
    18 	"regexp"
       
    19 	"strconv"
       
    20 )
       
    21 
       
    22 var (
       
    23 	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
       
    24 	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
       
    25 
       
    26 	// https://www.w3.org/TR/html5/syntax.html#character-references
       
    27 	// highest unicode code point in 17 planes (2^20): 1,114,112d =
       
    28 	// 7 dec digits or 6 hex digits
       
    29 	// named entity references can be 2-31 characters with stuff like &lt;
       
    30 	// at one end and &CounterClockwiseContourIntegral; at the other. There
       
    31 	// are also sometimes numbers at the end, although this isn't inherent
       
    32 	// in the specification; there are never numbers anywhere else in
       
    33 	// current character references, though; see &frac34; and &blk12;, etc.
       
    34 	// https://www.w3.org/TR/html5/syntax.html#named-character-references
       
    35 	//
       
    36 	// entity := "&" (named group | number ref) ";"
       
    37 	// named group := [a-zA-Z]{2,31}[0-9]{0,2}
       
    38 	// number ref := "#" (dec ref | hex ref)
       
    39 	// dec ref := [0-9]{1,7}
       
    40 	// hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
       
    41 	htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
       
    42 )
       
    43 
       
    44 // Functions to parse text within a block
       
    45 // Each function returns the number of chars taken care of
       
    46 // data is the complete block being rendered
       
    47 // offset is the number of valid chars before the current cursor
       
    48 
       
    49 func (p *Markdown) inline(currBlock *Node, data []byte) {
       
    50 	// handlers might call us recursively: enforce a maximum depth
       
    51 	if p.nesting >= p.maxNesting || len(data) == 0 {
       
    52 		return
       
    53 	}
       
    54 	p.nesting++
       
    55 	beg, end := 0, 0
       
    56 	for end < len(data) {
       
    57 		handler := p.inlineCallback[data[end]]
       
    58 		if handler != nil {
       
    59 			if consumed, node := handler(p, data, end); consumed == 0 {
       
    60 				// No action from the callback.
       
    61 				end++
       
    62 			} else {
       
    63 				// Copy inactive chars into the output.
       
    64 				currBlock.AppendChild(text(data[beg:end]))
       
    65 				if node != nil {
       
    66 					currBlock.AppendChild(node)
       
    67 				}
       
    68 				// Skip past whatever the callback used.
       
    69 				beg = end + consumed
       
    70 				end = beg
       
    71 			}
       
    72 		} else {
       
    73 			end++
       
    74 		}
       
    75 	}
       
    76 	if beg < len(data) {
       
    77 		if data[end-1] == '\n' {
       
    78 			end--
       
    79 		}
       
    80 		currBlock.AppendChild(text(data[beg:end]))
       
    81 	}
       
    82 	p.nesting--
       
    83 }
       
    84 
       
    85 // single and double emphasis parsing
       
    86 func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
       
    87 	data = data[offset:]
       
    88 	c := data[0]
       
    89 
       
    90 	if len(data) > 2 && data[1] != c {
       
    91 		// whitespace cannot follow an opening emphasis;
       
    92 		// strikethrough only takes two characters '~~'
       
    93 		if c == '~' || isspace(data[1]) {
       
    94 			return 0, nil
       
    95 		}
       
    96 		ret, node := helperEmphasis(p, data[1:], c)
       
    97 		if ret == 0 {
       
    98 			return 0, nil
       
    99 		}
       
   100 
       
   101 		return ret + 1, node
       
   102 	}
       
   103 
       
   104 	if len(data) > 3 && data[1] == c && data[2] != c {
       
   105 		if isspace(data[2]) {
       
   106 			return 0, nil
       
   107 		}
       
   108 		ret, node := helperDoubleEmphasis(p, data[2:], c)
       
   109 		if ret == 0 {
       
   110 			return 0, nil
       
   111 		}
       
   112 
       
   113 		return ret + 2, node
       
   114 	}
       
   115 
       
   116 	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
       
   117 		if c == '~' || isspace(data[3]) {
       
   118 			return 0, nil
       
   119 		}
       
   120 		ret, node := helperTripleEmphasis(p, data, 3, c)
       
   121 		if ret == 0 {
       
   122 			return 0, nil
       
   123 		}
       
   124 
       
   125 		return ret + 3, node
       
   126 	}
       
   127 
       
   128 	return 0, nil
       
   129 }
       
   130 
       
   131 func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
       
   132 	data = data[offset:]
       
   133 
       
   134 	nb := 0
       
   135 
       
   136 	// count the number of backticks in the delimiter
       
   137 	for nb < len(data) && data[nb] == '`' {
       
   138 		nb++
       
   139 	}
       
   140 
       
   141 	// find the next delimiter
       
   142 	i, end := 0, 0
       
   143 	for end = nb; end < len(data) && i < nb; end++ {
       
   144 		if data[end] == '`' {
       
   145 			i++
       
   146 		} else {
       
   147 			i = 0
       
   148 		}
       
   149 	}
       
   150 
       
   151 	// no matching delimiter?
       
   152 	if i < nb && end >= len(data) {
       
   153 		return 0, nil
       
   154 	}
       
   155 
       
   156 	// trim outside whitespace
       
   157 	fBegin := nb
       
   158 	for fBegin < end && data[fBegin] == ' ' {
       
   159 		fBegin++
       
   160 	}
       
   161 
       
   162 	fEnd := end - nb
       
   163 	for fEnd > fBegin && data[fEnd-1] == ' ' {
       
   164 		fEnd--
       
   165 	}
       
   166 
       
   167 	// render the code span
       
   168 	if fBegin != fEnd {
       
   169 		code := NewNode(Code)
       
   170 		code.Literal = data[fBegin:fEnd]
       
   171 		return end, code
       
   172 	}
       
   173 
       
   174 	return end, nil
       
   175 }
       
   176 
       
   177 // newline preceded by two spaces becomes <br>
       
   178 func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
       
   179 	origOffset := offset
       
   180 	for offset < len(data) && data[offset] == ' ' {
       
   181 		offset++
       
   182 	}
       
   183 
       
   184 	if offset < len(data) && data[offset] == '\n' {
       
   185 		if offset-origOffset >= 2 {
       
   186 			return offset - origOffset + 1, NewNode(Hardbreak)
       
   187 		}
       
   188 		return offset - origOffset, nil
       
   189 	}
       
   190 	return 0, nil
       
   191 }
       
   192 
       
   193 // newline without two spaces works when HardLineBreak is enabled
       
   194 func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
       
   195 	if p.extensions&HardLineBreak != 0 {
       
   196 		return 1, NewNode(Hardbreak)
       
   197 	}
       
   198 	return 0, nil
       
   199 }
       
   200 
       
   201 type linkType int
       
   202 
       
   203 const (
       
   204 	linkNormal linkType = iota
       
   205 	linkImg
       
   206 	linkDeferredFootnote
       
   207 	linkInlineFootnote
       
   208 )
       
   209 
       
   210 func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
       
   211 	if t == linkDeferredFootnote {
       
   212 		return false
       
   213 	}
       
   214 	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
       
   215 }
       
   216 
       
   217 func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
       
   218 	if offset < len(data)-1 && data[offset+1] == '[' {
       
   219 		return link(p, data, offset)
       
   220 	}
       
   221 	return 0, nil
       
   222 }
       
   223 
       
   224 func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
       
   225 	if offset < len(data)-1 && data[offset+1] == '[' {
       
   226 		return link(p, data, offset)
       
   227 	}
       
   228 	return 0, nil
       
   229 }
       
   230 
       
   231 // '[': parse a link or an image or a footnote
       
   232 func link(p *Markdown, data []byte, offset int) (int, *Node) {
       
   233 	// no links allowed inside regular links, footnote, and deferred footnotes
       
   234 	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
       
   235 		return 0, nil
       
   236 	}
       
   237 
       
   238 	var t linkType
       
   239 	switch {
       
   240 	// special case: ![^text] == deferred footnote (that follows something with
       
   241 	// an exclamation point)
       
   242 	case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
       
   243 		t = linkDeferredFootnote
       
   244 	// ![alt] == image
       
   245 	case offset >= 0 && data[offset] == '!':
       
   246 		t = linkImg
       
   247 		offset++
       
   248 	// ^[text] == inline footnote
       
   249 	// [^refId] == deferred footnote
       
   250 	case p.extensions&Footnotes != 0:
       
   251 		if offset >= 0 && data[offset] == '^' {
       
   252 			t = linkInlineFootnote
       
   253 			offset++
       
   254 		} else if len(data)-1 > offset && data[offset+1] == '^' {
       
   255 			t = linkDeferredFootnote
       
   256 		}
       
   257 	// [text] == regular link
       
   258 	default:
       
   259 		t = linkNormal
       
   260 	}
       
   261 
       
   262 	data = data[offset:]
       
   263 
       
   264 	var (
       
   265 		i                       = 1
       
   266 		noteID                  int
       
   267 		title, link, altContent []byte
       
   268 		textHasNl               = false
       
   269 	)
       
   270 
       
   271 	if t == linkDeferredFootnote {
       
   272 		i++
       
   273 	}
       
   274 
       
   275 	// look for the matching closing bracket
       
   276 	for level := 1; level > 0 && i < len(data); i++ {
       
   277 		switch {
       
   278 		case data[i] == '\n':
       
   279 			textHasNl = true
       
   280 
       
   281 		case data[i-1] == '\\':
       
   282 			continue
       
   283 
       
   284 		case data[i] == '[':
       
   285 			level++
       
   286 
       
   287 		case data[i] == ']':
       
   288 			level--
       
   289 			if level <= 0 {
       
   290 				i-- // compensate for extra i++ in for loop
       
   291 			}
       
   292 		}
       
   293 	}
       
   294 
       
   295 	if i >= len(data) {
       
   296 		return 0, nil
       
   297 	}
       
   298 
       
   299 	txtE := i
       
   300 	i++
       
   301 	var footnoteNode *Node
       
   302 
       
   303 	// skip any amount of whitespace or newline
       
   304 	// (this is much more lax than original markdown syntax)
       
   305 	for i < len(data) && isspace(data[i]) {
       
   306 		i++
       
   307 	}
       
   308 
       
   309 	// inline style link
       
   310 	switch {
       
   311 	case i < len(data) && data[i] == '(':
       
   312 		// skip initial whitespace
       
   313 		i++
       
   314 
       
   315 		for i < len(data) && isspace(data[i]) {
       
   316 			i++
       
   317 		}
       
   318 
       
   319 		linkB := i
       
   320 
       
   321 		// look for link end: ' " )
       
   322 	findlinkend:
       
   323 		for i < len(data) {
       
   324 			switch {
       
   325 			case data[i] == '\\':
       
   326 				i += 2
       
   327 
       
   328 			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
       
   329 				break findlinkend
       
   330 
       
   331 			default:
       
   332 				i++
       
   333 			}
       
   334 		}
       
   335 
       
   336 		if i >= len(data) {
       
   337 			return 0, nil
       
   338 		}
       
   339 		linkE := i
       
   340 
       
   341 		// look for title end if present
       
   342 		titleB, titleE := 0, 0
       
   343 		if data[i] == '\'' || data[i] == '"' {
       
   344 			i++
       
   345 			titleB = i
       
   346 
       
   347 		findtitleend:
       
   348 			for i < len(data) {
       
   349 				switch {
       
   350 				case data[i] == '\\':
       
   351 					i += 2
       
   352 
       
   353 				case data[i] == ')':
       
   354 					break findtitleend
       
   355 
       
   356 				default:
       
   357 					i++
       
   358 				}
       
   359 			}
       
   360 
       
   361 			if i >= len(data) {
       
   362 				return 0, nil
       
   363 			}
       
   364 
       
   365 			// skip whitespace after title
       
   366 			titleE = i - 1
       
   367 			for titleE > titleB && isspace(data[titleE]) {
       
   368 				titleE--
       
   369 			}
       
   370 
       
   371 			// check for closing quote presence
       
   372 			if data[titleE] != '\'' && data[titleE] != '"' {
       
   373 				titleB, titleE = 0, 0
       
   374 				linkE = i
       
   375 			}
       
   376 		}
       
   377 
       
   378 		// remove whitespace at the end of the link
       
   379 		for linkE > linkB && isspace(data[linkE-1]) {
       
   380 			linkE--
       
   381 		}
       
   382 
       
   383 		// remove optional angle brackets around the link
       
   384 		if data[linkB] == '<' {
       
   385 			linkB++
       
   386 		}
       
   387 		if data[linkE-1] == '>' {
       
   388 			linkE--
       
   389 		}
       
   390 
       
   391 		// build escaped link and title
       
   392 		if linkE > linkB {
       
   393 			link = data[linkB:linkE]
       
   394 		}
       
   395 
       
   396 		if titleE > titleB {
       
   397 			title = data[titleB:titleE]
       
   398 		}
       
   399 
       
   400 		i++
       
   401 
       
   402 	// reference style link
       
   403 	case isReferenceStyleLink(data, i, t):
       
   404 		var id []byte
       
   405 		altContentConsidered := false
       
   406 
       
   407 		// look for the id
       
   408 		i++
       
   409 		linkB := i
       
   410 		for i < len(data) && data[i] != ']' {
       
   411 			i++
       
   412 		}
       
   413 		if i >= len(data) {
       
   414 			return 0, nil
       
   415 		}
       
   416 		linkE := i
       
   417 
       
   418 		// find the reference
       
   419 		if linkB == linkE {
       
   420 			if textHasNl {
       
   421 				var b bytes.Buffer
       
   422 
       
   423 				for j := 1; j < txtE; j++ {
       
   424 					switch {
       
   425 					case data[j] != '\n':
       
   426 						b.WriteByte(data[j])
       
   427 					case data[j-1] != ' ':
       
   428 						b.WriteByte(' ')
       
   429 					}
       
   430 				}
       
   431 
       
   432 				id = b.Bytes()
       
   433 			} else {
       
   434 				id = data[1:txtE]
       
   435 				altContentConsidered = true
       
   436 			}
       
   437 		} else {
       
   438 			id = data[linkB:linkE]
       
   439 		}
       
   440 
       
   441 		// find the reference with matching id
       
   442 		lr, ok := p.getRef(string(id))
       
   443 		if !ok {
       
   444 			return 0, nil
       
   445 		}
       
   446 
       
   447 		// keep link and title from reference
       
   448 		link = lr.link
       
   449 		title = lr.title
       
   450 		if altContentConsidered {
       
   451 			altContent = lr.text
       
   452 		}
       
   453 		i++
       
   454 
       
   455 	// shortcut reference style link or reference or inline footnote
       
   456 	default:
       
   457 		var id []byte
       
   458 
       
   459 		// craft the id
       
   460 		if textHasNl {
       
   461 			var b bytes.Buffer
       
   462 
       
   463 			for j := 1; j < txtE; j++ {
       
   464 				switch {
       
   465 				case data[j] != '\n':
       
   466 					b.WriteByte(data[j])
       
   467 				case data[j-1] != ' ':
       
   468 					b.WriteByte(' ')
       
   469 				}
       
   470 			}
       
   471 
       
   472 			id = b.Bytes()
       
   473 		} else {
       
   474 			if t == linkDeferredFootnote {
       
   475 				id = data[2:txtE] // get rid of the ^
       
   476 			} else {
       
   477 				id = data[1:txtE]
       
   478 			}
       
   479 		}
       
   480 
       
   481 		footnoteNode = NewNode(Item)
       
   482 		if t == linkInlineFootnote {
       
   483 			// create a new reference
       
   484 			noteID = len(p.notes) + 1
       
   485 
       
   486 			var fragment []byte
       
   487 			if len(id) > 0 {
       
   488 				if len(id) < 16 {
       
   489 					fragment = make([]byte, len(id))
       
   490 				} else {
       
   491 					fragment = make([]byte, 16)
       
   492 				}
       
   493 				copy(fragment, slugify(id))
       
   494 			} else {
       
   495 				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
       
   496 			}
       
   497 
       
   498 			ref := &reference{
       
   499 				noteID:   noteID,
       
   500 				hasBlock: false,
       
   501 				link:     fragment,
       
   502 				title:    id,
       
   503 				footnote: footnoteNode,
       
   504 			}
       
   505 
       
   506 			p.notes = append(p.notes, ref)
       
   507 
       
   508 			link = ref.link
       
   509 			title = ref.title
       
   510 		} else {
       
   511 			// find the reference with matching id
       
   512 			lr, ok := p.getRef(string(id))
       
   513 			if !ok {
       
   514 				return 0, nil
       
   515 			}
       
   516 
       
   517 			if t == linkDeferredFootnote {
       
   518 				lr.noteID = len(p.notes) + 1
       
   519 				lr.footnote = footnoteNode
       
   520 				p.notes = append(p.notes, lr)
       
   521 			}
       
   522 
       
   523 			// keep link and title from reference
       
   524 			link = lr.link
       
   525 			// if inline footnote, title == footnote contents
       
   526 			title = lr.title
       
   527 			noteID = lr.noteID
       
   528 		}
       
   529 
       
   530 		// rewind the whitespace
       
   531 		i = txtE + 1
       
   532 	}
       
   533 
       
   534 	var uLink []byte
       
   535 	if t == linkNormal || t == linkImg {
       
   536 		if len(link) > 0 {
       
   537 			var uLinkBuf bytes.Buffer
       
   538 			unescapeText(&uLinkBuf, link)
       
   539 			uLink = uLinkBuf.Bytes()
       
   540 		}
       
   541 
       
   542 		// links need something to click on and somewhere to go
       
   543 		if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
       
   544 			return 0, nil
       
   545 		}
       
   546 	}
       
   547 
       
   548 	// call the relevant rendering function
       
   549 	var linkNode *Node
       
   550 	switch t {
       
   551 	case linkNormal:
       
   552 		linkNode = NewNode(Link)
       
   553 		linkNode.Destination = normalizeURI(uLink)
       
   554 		linkNode.Title = title
       
   555 		if len(altContent) > 0 {
       
   556 			linkNode.AppendChild(text(altContent))
       
   557 		} else {
       
   558 			// links cannot contain other links, so turn off link parsing
       
   559 			// temporarily and recurse
       
   560 			insideLink := p.insideLink
       
   561 			p.insideLink = true
       
   562 			p.inline(linkNode, data[1:txtE])
       
   563 			p.insideLink = insideLink
       
   564 		}
       
   565 
       
   566 	case linkImg:
       
   567 		linkNode = NewNode(Image)
       
   568 		linkNode.Destination = uLink
       
   569 		linkNode.Title = title
       
   570 		linkNode.AppendChild(text(data[1:txtE]))
       
   571 		i++
       
   572 
       
   573 	case linkInlineFootnote, linkDeferredFootnote:
       
   574 		linkNode = NewNode(Link)
       
   575 		linkNode.Destination = link
       
   576 		linkNode.Title = title
       
   577 		linkNode.NoteID = noteID
       
   578 		linkNode.Footnote = footnoteNode
       
   579 		if t == linkInlineFootnote {
       
   580 			i++
       
   581 		}
       
   582 
       
   583 	default:
       
   584 		return 0, nil
       
   585 	}
       
   586 
       
   587 	return i, linkNode
       
   588 }
       
   589 
       
   590 func (p *Markdown) inlineHTMLComment(data []byte) int {
       
   591 	if len(data) < 5 {
       
   592 		return 0
       
   593 	}
       
   594 	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
       
   595 		return 0
       
   596 	}
       
   597 	i := 5
       
   598 	// scan for an end-of-comment marker, across lines if necessary
       
   599 	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
       
   600 		i++
       
   601 	}
       
   602 	// no end-of-comment marker
       
   603 	if i >= len(data) {
       
   604 		return 0
       
   605 	}
       
   606 	return i + 1
       
   607 }
       
   608 
       
   609 func stripMailto(link []byte) []byte {
       
   610 	if bytes.HasPrefix(link, []byte("mailto://")) {
       
   611 		return link[9:]
       
   612 	} else if bytes.HasPrefix(link, []byte("mailto:")) {
       
   613 		return link[7:]
       
   614 	} else {
       
   615 		return link
       
   616 	}
       
   617 }
       
   618 
       
   619 // autolinkType specifies a kind of autolink that gets detected.
       
   620 type autolinkType int
       
   621 
       
   622 // These are the possible flag values for the autolink renderer.
       
   623 const (
       
   624 	notAutolink autolinkType = iota
       
   625 	normalAutolink
       
   626 	emailAutolink
       
   627 )
       
   628 
       
   629 // '<' when tags or autolinks are allowed
       
   630 func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
       
   631 	data = data[offset:]
       
   632 	altype, end := tagLength(data)
       
   633 	if size := p.inlineHTMLComment(data); size > 0 {
       
   634 		end = size
       
   635 	}
       
   636 	if end > 2 {
       
   637 		if altype != notAutolink {
       
   638 			var uLink bytes.Buffer
       
   639 			unescapeText(&uLink, data[1:end+1-2])
       
   640 			if uLink.Len() > 0 {
       
   641 				link := uLink.Bytes()
       
   642 				node := NewNode(Link)
       
   643 				node.Destination = link
       
   644 				if altype == emailAutolink {
       
   645 					node.Destination = append([]byte("mailto:"), link...)
       
   646 				}
       
   647 				node.AppendChild(text(stripMailto(link)))
       
   648 				return end, node
       
   649 			}
       
   650 		} else {
       
   651 			htmlTag := NewNode(HTMLSpan)
       
   652 			htmlTag.Literal = data[:end]
       
   653 			return end, htmlTag
       
   654 		}
       
   655 	}
       
   656 
       
   657 	return end, nil
       
   658 }
       
   659 
       
   660 // '\\' backslash escape
       
   661 var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
       
   662 
       
   663 func escape(p *Markdown, data []byte, offset int) (int, *Node) {
       
   664 	data = data[offset:]
       
   665 
       
   666 	if len(data) > 1 {
       
   667 		if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
       
   668 			return 2, NewNode(Hardbreak)
       
   669 		}
       
   670 		if bytes.IndexByte(escapeChars, data[1]) < 0 {
       
   671 			return 0, nil
       
   672 		}
       
   673 
       
   674 		return 2, text(data[1:2])
       
   675 	}
       
   676 
       
   677 	return 2, nil
       
   678 }
       
   679 
       
   680 func unescapeText(ob *bytes.Buffer, src []byte) {
       
   681 	i := 0
       
   682 	for i < len(src) {
       
   683 		org := i
       
   684 		for i < len(src) && src[i] != '\\' {
       
   685 			i++
       
   686 		}
       
   687 
       
   688 		if i > org {
       
   689 			ob.Write(src[org:i])
       
   690 		}
       
   691 
       
   692 		if i+1 >= len(src) {
       
   693 			break
       
   694 		}
       
   695 
       
   696 		ob.WriteByte(src[i+1])
       
   697 		i += 2
       
   698 	}
       
   699 }
       
   700 
       
   701 // '&' escaped when it doesn't belong to an entity
       
   702 // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
       
   703 func entity(p *Markdown, data []byte, offset int) (int, *Node) {
       
   704 	data = data[offset:]
       
   705 
       
   706 	end := 1
       
   707 
       
   708 	if end < len(data) && data[end] == '#' {
       
   709 		end++
       
   710 	}
       
   711 
       
   712 	for end < len(data) && isalnum(data[end]) {
       
   713 		end++
       
   714 	}
       
   715 
       
   716 	if end < len(data) && data[end] == ';' {
       
   717 		end++ // real entity
       
   718 	} else {
       
   719 		return 0, nil // lone '&'
       
   720 	}
       
   721 
       
   722 	ent := data[:end]
       
   723 	// undo &amp; escaping or it will be converted to &amp;amp; by another
       
   724 	// escaper in the renderer
       
   725 	if bytes.Equal(ent, []byte("&amp;")) {
       
   726 		ent = []byte{'&'}
       
   727 	}
       
   728 
       
   729 	return end, text(ent)
       
   730 }
       
   731 
       
   732 func linkEndsWithEntity(data []byte, linkEnd int) bool {
       
   733 	entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
       
   734 	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
       
   735 }
       
   736 
       
   737 // hasPrefixCaseInsensitive is a custom implementation of
       
   738 //     strings.HasPrefix(strings.ToLower(s), prefix)
       
   739 // we rolled our own because ToLower pulls in a huge machinery of lowercasing
       
   740 // anything from Unicode and that's very slow. Since this func will only be
       
   741 // used on ASCII protocol prefixes, we can take shortcuts.
       
   742 func hasPrefixCaseInsensitive(s, prefix []byte) bool {
       
   743 	if len(s) < len(prefix) {
       
   744 		return false
       
   745 	}
       
   746 	delta := byte('a' - 'A')
       
   747 	for i, b := range prefix {
       
   748 		if b != s[i] && b != s[i]+delta {
       
   749 			return false
       
   750 		}
       
   751 	}
       
   752 	return true
       
   753 }
       
   754 
       
   755 var protocolPrefixes = [][]byte{
       
   756 	[]byte("http://"),
       
   757 	[]byte("https://"),
       
   758 	[]byte("ftp://"),
       
   759 	[]byte("file://"),
       
   760 	[]byte("mailto:"),
       
   761 }
       
   762 
       
   763 const shortestPrefix = 6 // len("ftp://"), the shortest of the above
       
   764 
       
   765 func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
       
   766 	// quick check to rule out most false hits
       
   767 	if p.insideLink || len(data) < offset+shortestPrefix {
       
   768 		return 0, nil
       
   769 	}
       
   770 	for _, prefix := range protocolPrefixes {
       
   771 		endOfHead := offset + 8 // 8 is the len() of the longest prefix
       
   772 		if endOfHead > len(data) {
       
   773 			endOfHead = len(data)
       
   774 		}
       
   775 		if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
       
   776 			return autoLink(p, data, offset)
       
   777 		}
       
   778 	}
       
   779 	return 0, nil
       
   780 }
       
   781 
       
   782 func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
       
   783 	// Now a more expensive check to see if we're not inside an anchor element
       
   784 	anchorStart := offset
       
   785 	offsetFromAnchor := 0
       
   786 	for anchorStart > 0 && data[anchorStart] != '<' {
       
   787 		anchorStart--
       
   788 		offsetFromAnchor++
       
   789 	}
       
   790 
       
   791 	anchorStr := anchorRe.Find(data[anchorStart:])
       
   792 	if anchorStr != nil {
       
   793 		anchorClose := NewNode(HTMLSpan)
       
   794 		anchorClose.Literal = anchorStr[offsetFromAnchor:]
       
   795 		return len(anchorStr) - offsetFromAnchor, anchorClose
       
   796 	}
       
   797 
       
   798 	// scan backward for a word boundary
       
   799 	rewind := 0
       
   800 	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
       
   801 		rewind++
       
   802 	}
       
   803 	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
       
   804 		return 0, nil
       
   805 	}
       
   806 
       
   807 	origData := data
       
   808 	data = data[offset-rewind:]
       
   809 
       
   810 	if !isSafeLink(data) {
       
   811 		return 0, nil
       
   812 	}
       
   813 
       
   814 	linkEnd := 0
       
   815 	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
       
   816 		linkEnd++
       
   817 	}
       
   818 
       
   819 	// Skip punctuation at the end of the link
       
   820 	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
       
   821 		linkEnd--
       
   822 	}
       
   823 
       
   824 	// But don't skip semicolon if it's a part of escaped entity:
       
   825 	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
       
   826 		linkEnd--
       
   827 	}
       
   828 
       
   829 	// See if the link finishes with a punctuation sign that can be closed.
       
   830 	var copen byte
       
   831 	switch data[linkEnd-1] {
       
   832 	case '"':
       
   833 		copen = '"'
       
   834 	case '\'':
       
   835 		copen = '\''
       
   836 	case ')':
       
   837 		copen = '('
       
   838 	case ']':
       
   839 		copen = '['
       
   840 	case '}':
       
   841 		copen = '{'
       
   842 	default:
       
   843 		copen = 0
       
   844 	}
       
   845 
       
   846 	if copen != 0 {
       
   847 		bufEnd := offset - rewind + linkEnd - 2
       
   848 
       
   849 		openDelim := 1
       
   850 
       
   851 		/* Try to close the final punctuation sign in this same line;
       
   852 		 * if we managed to close it outside of the URL, that means that it's
       
   853 		 * not part of the URL. If it closes inside the URL, that means it
       
   854 		 * is part of the URL.
       
   855 		 *
       
   856 		 * Examples:
       
   857 		 *
       
   858 		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
       
   859 		 *              => http://www.pokemon.com/Pikachu_(Electric)
       
   860 		 *
       
   861 		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
       
   862 		 *              => http://www.pokemon.com/Pikachu_(Electric)
       
   863 		 *
       
   864 		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
       
   865 		 *              => http://www.pokemon.com/Pikachu_(Electric))
       
   866 		 *
       
   867 		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
       
   868 		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
       
   869 		 */
       
   870 
       
   871 		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
       
   872 			if origData[bufEnd] == data[linkEnd-1] {
       
   873 				openDelim++
       
   874 			}
       
   875 
       
   876 			if origData[bufEnd] == copen {
       
   877 				openDelim--
       
   878 			}
       
   879 
       
   880 			bufEnd--
       
   881 		}
       
   882 
       
   883 		if openDelim == 0 {
       
   884 			linkEnd--
       
   885 		}
       
   886 	}
       
   887 
       
   888 	var uLink bytes.Buffer
       
   889 	unescapeText(&uLink, data[:linkEnd])
       
   890 
       
   891 	if uLink.Len() > 0 {
       
   892 		node := NewNode(Link)
       
   893 		node.Destination = uLink.Bytes()
       
   894 		node.AppendChild(text(uLink.Bytes()))
       
   895 		return linkEnd, node
       
   896 	}
       
   897 
       
   898 	return linkEnd, nil
       
   899 }
       
   900 
       
   901 func isEndOfLink(char byte) bool {
       
   902 	return isspace(char) || char == '<'
       
   903 }
       
   904 
       
   905 var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
       
   906 var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
       
   907 
       
   908 func isSafeLink(link []byte) bool {
       
   909 	for _, path := range validPaths {
       
   910 		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
       
   911 			if len(link) == len(path) {
       
   912 				return true
       
   913 			} else if isalnum(link[len(path)]) {
       
   914 				return true
       
   915 			}
       
   916 		}
       
   917 	}
       
   918 
       
   919 	for _, prefix := range validUris {
       
   920 		// TODO: handle unicode here
       
   921 		// case-insensitive prefix test
       
   922 		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
       
   923 			return true
       
   924 		}
       
   925 	}
       
   926 
       
   927 	return false
       
   928 }
       
   929 
       
   930 // return the length of the given tag, or 0 is it's not valid
       
   931 func tagLength(data []byte) (autolink autolinkType, end int) {
       
   932 	var i, j int
       
   933 
       
   934 	// a valid tag can't be shorter than 3 chars
       
   935 	if len(data) < 3 {
       
   936 		return notAutolink, 0
       
   937 	}
       
   938 
       
   939 	// begins with a '<' optionally followed by '/', followed by letter or number
       
   940 	if data[0] != '<' {
       
   941 		return notAutolink, 0
       
   942 	}
       
   943 	if data[1] == '/' {
       
   944 		i = 2
       
   945 	} else {
       
   946 		i = 1
       
   947 	}
       
   948 
       
   949 	if !isalnum(data[i]) {
       
   950 		return notAutolink, 0
       
   951 	}
       
   952 
       
   953 	// scheme test
       
   954 	autolink = notAutolink
       
   955 
       
   956 	// try to find the beginning of an URI
       
   957 	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
       
   958 		i++
       
   959 	}
       
   960 
       
   961 	if i > 1 && i < len(data) && data[i] == '@' {
       
   962 		if j = isMailtoAutoLink(data[i:]); j != 0 {
       
   963 			return emailAutolink, i + j
       
   964 		}
       
   965 	}
       
   966 
       
   967 	if i > 2 && i < len(data) && data[i] == ':' {
       
   968 		autolink = normalAutolink
       
   969 		i++
       
   970 	}
       
   971 
       
   972 	// complete autolink test: no whitespace or ' or "
       
   973 	switch {
       
   974 	case i >= len(data):
       
   975 		autolink = notAutolink
       
   976 	case autolink != notAutolink:
       
   977 		j = i
       
   978 
       
   979 		for i < len(data) {
       
   980 			if data[i] == '\\' {
       
   981 				i += 2
       
   982 			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
       
   983 				break
       
   984 			} else {
       
   985 				i++
       
   986 			}
       
   987 
       
   988 		}
       
   989 
       
   990 		if i >= len(data) {
       
   991 			return autolink, 0
       
   992 		}
       
   993 		if i > j && data[i] == '>' {
       
   994 			return autolink, i + 1
       
   995 		}
       
   996 
       
   997 		// one of the forbidden chars has been found
       
   998 		autolink = notAutolink
       
   999 	}
       
  1000 	i += bytes.IndexByte(data[i:], '>')
       
  1001 	if i < 0 {
       
  1002 		return autolink, 0
       
  1003 	}
       
  1004 	return autolink, i + 1
       
  1005 }
       
  1006 
       
  1007 // look for the address part of a mail autolink and '>'
       
  1008 // this is less strict than the original markdown e-mail address matching
       
  1009 func isMailtoAutoLink(data []byte) int {
       
  1010 	nb := 0
       
  1011 
       
  1012 	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
       
  1013 	for i := 0; i < len(data); i++ {
       
  1014 		if isalnum(data[i]) {
       
  1015 			continue
       
  1016 		}
       
  1017 
       
  1018 		switch data[i] {
       
  1019 		case '@':
       
  1020 			nb++
       
  1021 
       
  1022 		case '-', '.', '_':
       
  1023 			break
       
  1024 
       
  1025 		case '>':
       
  1026 			if nb == 1 {
       
  1027 				return i + 1
       
  1028 			}
       
  1029 			return 0
       
  1030 		default:
       
  1031 			return 0
       
  1032 		}
       
  1033 	}
       
  1034 
       
  1035 	return 0
       
  1036 }
       
  1037 
       
  1038 // look for the next emph char, skipping other constructs
       
  1039 func helperFindEmphChar(data []byte, c byte) int {
       
  1040 	i := 0
       
  1041 
       
  1042 	for i < len(data) {
       
  1043 		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
       
  1044 			i++
       
  1045 		}
       
  1046 		if i >= len(data) {
       
  1047 			return 0
       
  1048 		}
       
  1049 		// do not count escaped chars
       
  1050 		if i != 0 && data[i-1] == '\\' {
       
  1051 			i++
       
  1052 			continue
       
  1053 		}
       
  1054 		if data[i] == c {
       
  1055 			return i
       
  1056 		}
       
  1057 
       
  1058 		if data[i] == '`' {
       
  1059 			// skip a code span
       
  1060 			tmpI := 0
       
  1061 			i++
       
  1062 			for i < len(data) && data[i] != '`' {
       
  1063 				if tmpI == 0 && data[i] == c {
       
  1064 					tmpI = i
       
  1065 				}
       
  1066 				i++
       
  1067 			}
       
  1068 			if i >= len(data) {
       
  1069 				return tmpI
       
  1070 			}
       
  1071 			i++
       
  1072 		} else if data[i] == '[' {
       
  1073 			// skip a link
       
  1074 			tmpI := 0
       
  1075 			i++
       
  1076 			for i < len(data) && data[i] != ']' {
       
  1077 				if tmpI == 0 && data[i] == c {
       
  1078 					tmpI = i
       
  1079 				}
       
  1080 				i++
       
  1081 			}
       
  1082 			i++
       
  1083 			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
       
  1084 				i++
       
  1085 			}
       
  1086 			if i >= len(data) {
       
  1087 				return tmpI
       
  1088 			}
       
  1089 			if data[i] != '[' && data[i] != '(' { // not a link
       
  1090 				if tmpI > 0 {
       
  1091 					return tmpI
       
  1092 				}
       
  1093 				continue
       
  1094 			}
       
  1095 			cc := data[i]
       
  1096 			i++
       
  1097 			for i < len(data) && data[i] != cc {
       
  1098 				if tmpI == 0 && data[i] == c {
       
  1099 					return i
       
  1100 				}
       
  1101 				i++
       
  1102 			}
       
  1103 			if i >= len(data) {
       
  1104 				return tmpI
       
  1105 			}
       
  1106 			i++
       
  1107 		}
       
  1108 	}
       
  1109 	return 0
       
  1110 }
       
  1111 
       
  1112 func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
       
  1113 	i := 0
       
  1114 
       
  1115 	// skip one symbol if coming from emph3
       
  1116 	if len(data) > 1 && data[0] == c && data[1] == c {
       
  1117 		i = 1
       
  1118 	}
       
  1119 
       
  1120 	for i < len(data) {
       
  1121 		length := helperFindEmphChar(data[i:], c)
       
  1122 		if length == 0 {
       
  1123 			return 0, nil
       
  1124 		}
       
  1125 		i += length
       
  1126 		if i >= len(data) {
       
  1127 			return 0, nil
       
  1128 		}
       
  1129 
       
  1130 		if i+1 < len(data) && data[i+1] == c {
       
  1131 			i++
       
  1132 			continue
       
  1133 		}
       
  1134 
       
  1135 		if data[i] == c && !isspace(data[i-1]) {
       
  1136 
       
  1137 			if p.extensions&NoIntraEmphasis != 0 {
       
  1138 				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
       
  1139 					continue
       
  1140 				}
       
  1141 			}
       
  1142 
       
  1143 			emph := NewNode(Emph)
       
  1144 			p.inline(emph, data[:i])
       
  1145 			return i + 1, emph
       
  1146 		}
       
  1147 	}
       
  1148 
       
  1149 	return 0, nil
       
  1150 }
       
  1151 
       
  1152 func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
       
  1153 	i := 0
       
  1154 
       
  1155 	for i < len(data) {
       
  1156 		length := helperFindEmphChar(data[i:], c)
       
  1157 		if length == 0 {
       
  1158 			return 0, nil
       
  1159 		}
       
  1160 		i += length
       
  1161 
       
  1162 		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
       
  1163 			nodeType := Strong
       
  1164 			if c == '~' {
       
  1165 				nodeType = Del
       
  1166 			}
       
  1167 			node := NewNode(nodeType)
       
  1168 			p.inline(node, data[:i])
       
  1169 			return i + 2, node
       
  1170 		}
       
  1171 		i++
       
  1172 	}
       
  1173 	return 0, nil
       
  1174 }
       
  1175 
       
  1176 func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
       
  1177 	i := 0
       
  1178 	origData := data
       
  1179 	data = data[offset:]
       
  1180 
       
  1181 	for i < len(data) {
       
  1182 		length := helperFindEmphChar(data[i:], c)
       
  1183 		if length == 0 {
       
  1184 			return 0, nil
       
  1185 		}
       
  1186 		i += length
       
  1187 
       
  1188 		// skip whitespace preceded symbols
       
  1189 		if data[i] != c || isspace(data[i-1]) {
       
  1190 			continue
       
  1191 		}
       
  1192 
       
  1193 		switch {
       
  1194 		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
       
  1195 			// triple symbol found
       
  1196 			strong := NewNode(Strong)
       
  1197 			em := NewNode(Emph)
       
  1198 			strong.AppendChild(em)
       
  1199 			p.inline(em, data[:i])
       
  1200 			return i + 3, strong
       
  1201 		case (i+1 < len(data) && data[i+1] == c):
       
  1202 			// double symbol found, hand over to emph1
       
  1203 			length, node := helperEmphasis(p, origData[offset-2:], c)
       
  1204 			if length == 0 {
       
  1205 				return 0, nil
       
  1206 			}
       
  1207 			return length - 2, node
       
  1208 		default:
       
  1209 			// single symbol found, hand over to emph2
       
  1210 			length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
       
  1211 			if length == 0 {
       
  1212 				return 0, nil
       
  1213 			}
       
  1214 			return length - 1, node
       
  1215 		}
       
  1216 	}
       
  1217 	return 0, nil
       
  1218 }
       
  1219 
       
  1220 func text(s []byte) *Node {
       
  1221 	node := NewNode(Text)
       
  1222 	node.Literal = s
       
  1223 	return node
       
  1224 }
       
  1225 
       
  1226 func normalizeURI(s []byte) []byte {
       
  1227 	return s // TODO: implement
       
  1228 }