vendor/github.com/russross/blackfriday/inline.go
changeset 246 0998f404dd31
parent 245 910f00ab2799
child 247 1ca743b3eb80
equal deleted inserted replaced
245:910f00ab2799 246:0998f404dd31
     1 //
       
     2 // Blackfriday Markdown Processor
       
     3 // Available at http://github.com/russross/blackfriday
       
     4 //
       
     5 // Copyright © 2011 Russ Ross <russ@russross.com>.
       
     6 // Distributed under the Simplified BSD License.
       
     7 // See README.md for details.
       
     8 //
       
     9 
       
    10 //
       
    11 // Functions to parse inline elements.
       
    12 //
       
    13 
       
    14 package blackfriday
       
    15 
       
    16 import (
       
    17 	"bytes"
       
    18 	"regexp"
       
    19 	"strconv"
       
    20 )
       
    21 
       
    22 var (
       
    23 	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
       
    24 	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
       
    25 
       
    26 	// TODO: improve this regexp to catch all possible entities:
       
    27 	htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
       
    28 )
       
    29 
       
    30 // Functions to parse text within a block
       
    31 // Each function returns the number of chars taken care of
       
    32 // data is the complete block being rendered
       
    33 // offset is the number of valid chars before the current cursor
       
    34 
       
    35 func (p *Markdown) inline(currBlock *Node, data []byte) {
       
    36 	// handlers might call us recursively: enforce a maximum depth
       
    37 	if p.nesting >= p.maxNesting || len(data) == 0 {
       
    38 		return
       
    39 	}
       
    40 	p.nesting++
       
    41 	beg, end := 0, 0
       
    42 	for end < len(data) {
       
    43 		handler := p.inlineCallback[data[end]]
       
    44 		if handler != nil {
       
    45 			if consumed, node := handler(p, data, end); consumed == 0 {
       
    46 				// No action from the callback.
       
    47 				end++
       
    48 			} else {
       
    49 				// Copy inactive chars into the output.
       
    50 				currBlock.AppendChild(text(data[beg:end]))
       
    51 				if node != nil {
       
    52 					currBlock.AppendChild(node)
       
    53 				}
       
    54 				// Skip past whatever the callback used.
       
    55 				beg = end + consumed
       
    56 				end = beg
       
    57 			}
       
    58 		} else {
       
    59 			end++
       
    60 		}
       
    61 	}
       
    62 	if beg < len(data) {
       
    63 		if data[end-1] == '\n' {
       
    64 			end--
       
    65 		}
       
    66 		currBlock.AppendChild(text(data[beg:end]))
       
    67 	}
       
    68 	p.nesting--
       
    69 }
       
    70 
       
    71 // single and double emphasis parsing
       
    72 func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
       
    73 	data = data[offset:]
       
    74 	c := data[0]
       
    75 
       
    76 	if len(data) > 2 && data[1] != c {
       
    77 		// whitespace cannot follow an opening emphasis;
       
    78 		// strikethrough only takes two characters '~~'
       
    79 		if c == '~' || isspace(data[1]) {
       
    80 			return 0, nil
       
    81 		}
       
    82 		ret, node := helperEmphasis(p, data[1:], c)
       
    83 		if ret == 0 {
       
    84 			return 0, nil
       
    85 		}
       
    86 
       
    87 		return ret + 1, node
       
    88 	}
       
    89 
       
    90 	if len(data) > 3 && data[1] == c && data[2] != c {
       
    91 		if isspace(data[2]) {
       
    92 			return 0, nil
       
    93 		}
       
    94 		ret, node := helperDoubleEmphasis(p, data[2:], c)
       
    95 		if ret == 0 {
       
    96 			return 0, nil
       
    97 		}
       
    98 
       
    99 		return ret + 2, node
       
   100 	}
       
   101 
       
   102 	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
       
   103 		if c == '~' || isspace(data[3]) {
       
   104 			return 0, nil
       
   105 		}
       
   106 		ret, node := helperTripleEmphasis(p, data, 3, c)
       
   107 		if ret == 0 {
       
   108 			return 0, nil
       
   109 		}
       
   110 
       
   111 		return ret + 3, node
       
   112 	}
       
   113 
       
   114 	return 0, nil
       
   115 }
       
   116 
       
   117 func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
       
   118 	data = data[offset:]
       
   119 
       
   120 	nb := 0
       
   121 
       
   122 	// count the number of backticks in the delimiter
       
   123 	for nb < len(data) && data[nb] == '`' {
       
   124 		nb++
       
   125 	}
       
   126 
       
   127 	// find the next delimiter
       
   128 	i, end := 0, 0
       
   129 	for end = nb; end < len(data) && i < nb; end++ {
       
   130 		if data[end] == '`' {
       
   131 			i++
       
   132 		} else {
       
   133 			i = 0
       
   134 		}
       
   135 	}
       
   136 
       
   137 	// no matching delimiter?
       
   138 	if i < nb && end >= len(data) {
       
   139 		return 0, nil
       
   140 	}
       
   141 
       
   142 	// trim outside whitespace
       
   143 	fBegin := nb
       
   144 	for fBegin < end && data[fBegin] == ' ' {
       
   145 		fBegin++
       
   146 	}
       
   147 
       
   148 	fEnd := end - nb
       
   149 	for fEnd > fBegin && data[fEnd-1] == ' ' {
       
   150 		fEnd--
       
   151 	}
       
   152 
       
   153 	// render the code span
       
   154 	if fBegin != fEnd {
       
   155 		code := NewNode(Code)
       
   156 		code.Literal = data[fBegin:fEnd]
       
   157 		return end, code
       
   158 	}
       
   159 
       
   160 	return end, nil
       
   161 }
       
   162 
       
   163 // newline preceded by two spaces becomes <br>
       
   164 func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
       
   165 	origOffset := offset
       
   166 	for offset < len(data) && data[offset] == ' ' {
       
   167 		offset++
       
   168 	}
       
   169 
       
   170 	if offset < len(data) && data[offset] == '\n' {
       
   171 		if offset-origOffset >= 2 {
       
   172 			return offset - origOffset + 1, NewNode(Hardbreak)
       
   173 		}
       
   174 		return offset - origOffset, nil
       
   175 	}
       
   176 	return 0, nil
       
   177 }
       
   178 
       
   179 // newline without two spaces works when HardLineBreak is enabled
       
   180 func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
       
   181 	if p.extensions&HardLineBreak != 0 {
       
   182 		return 1, NewNode(Hardbreak)
       
   183 	}
       
   184 	return 0, nil
       
   185 }
       
   186 
       
   187 type linkType int
       
   188 
       
   189 const (
       
   190 	linkNormal linkType = iota
       
   191 	linkImg
       
   192 	linkDeferredFootnote
       
   193 	linkInlineFootnote
       
   194 )
       
   195 
       
   196 func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
       
   197 	if t == linkDeferredFootnote {
       
   198 		return false
       
   199 	}
       
   200 	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
       
   201 }
       
   202 
       
   203 func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
       
   204 	if offset < len(data)-1 && data[offset+1] == '[' {
       
   205 		return link(p, data, offset)
       
   206 	}
       
   207 	return 0, nil
       
   208 }
       
   209 
       
   210 func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
       
   211 	if offset < len(data)-1 && data[offset+1] == '[' {
       
   212 		return link(p, data, offset)
       
   213 	}
       
   214 	return 0, nil
       
   215 }
       
   216 
       
   217 // '[': parse a link or an image or a footnote
       
   218 func link(p *Markdown, data []byte, offset int) (int, *Node) {
       
   219 	// no links allowed inside regular links, footnote, and deferred footnotes
       
   220 	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
       
   221 		return 0, nil
       
   222 	}
       
   223 
       
   224 	var t linkType
       
   225 	switch {
       
   226 	// special case: ![^text] == deferred footnote (that follows something with
       
   227 	// an exclamation point)
       
   228 	case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
       
   229 		t = linkDeferredFootnote
       
   230 	// ![alt] == image
       
   231 	case offset >= 0 && data[offset] == '!':
       
   232 		t = linkImg
       
   233 		offset++
       
   234 	// ^[text] == inline footnote
       
   235 	// [^refId] == deferred footnote
       
   236 	case p.extensions&Footnotes != 0:
       
   237 		if offset >= 0 && data[offset] == '^' {
       
   238 			t = linkInlineFootnote
       
   239 			offset++
       
   240 		} else if len(data)-1 > offset && data[offset+1] == '^' {
       
   241 			t = linkDeferredFootnote
       
   242 		}
       
   243 	// [text] == regular link
       
   244 	default:
       
   245 		t = linkNormal
       
   246 	}
       
   247 
       
   248 	data = data[offset:]
       
   249 
       
   250 	var (
       
   251 		i                       = 1
       
   252 		noteID                  int
       
   253 		title, link, altContent []byte
       
   254 		textHasNl               = false
       
   255 	)
       
   256 
       
   257 	if t == linkDeferredFootnote {
       
   258 		i++
       
   259 	}
       
   260 
       
   261 	// look for the matching closing bracket
       
   262 	for level := 1; level > 0 && i < len(data); i++ {
       
   263 		switch {
       
   264 		case data[i] == '\n':
       
   265 			textHasNl = true
       
   266 
       
   267 		case data[i-1] == '\\':
       
   268 			continue
       
   269 
       
   270 		case data[i] == '[':
       
   271 			level++
       
   272 
       
   273 		case data[i] == ']':
       
   274 			level--
       
   275 			if level <= 0 {
       
   276 				i-- // compensate for extra i++ in for loop
       
   277 			}
       
   278 		}
       
   279 	}
       
   280 
       
   281 	if i >= len(data) {
       
   282 		return 0, nil
       
   283 	}
       
   284 
       
   285 	txtE := i
       
   286 	i++
       
   287 	var footnoteNode *Node
       
   288 
       
   289 	// skip any amount of whitespace or newline
       
   290 	// (this is much more lax than original markdown syntax)
       
   291 	for i < len(data) && isspace(data[i]) {
       
   292 		i++
       
   293 	}
       
   294 
       
   295 	// inline style link
       
   296 	switch {
       
   297 	case i < len(data) && data[i] == '(':
       
   298 		// skip initial whitespace
       
   299 		i++
       
   300 
       
   301 		for i < len(data) && isspace(data[i]) {
       
   302 			i++
       
   303 		}
       
   304 
       
   305 		linkB := i
       
   306 
       
   307 		// look for link end: ' " )
       
   308 	findlinkend:
       
   309 		for i < len(data) {
       
   310 			switch {
       
   311 			case data[i] == '\\':
       
   312 				i += 2
       
   313 
       
   314 			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
       
   315 				break findlinkend
       
   316 
       
   317 			default:
       
   318 				i++
       
   319 			}
       
   320 		}
       
   321 
       
   322 		if i >= len(data) {
       
   323 			return 0, nil
       
   324 		}
       
   325 		linkE := i
       
   326 
       
   327 		// look for title end if present
       
   328 		titleB, titleE := 0, 0
       
   329 		if data[i] == '\'' || data[i] == '"' {
       
   330 			i++
       
   331 			titleB = i
       
   332 
       
   333 		findtitleend:
       
   334 			for i < len(data) {
       
   335 				switch {
       
   336 				case data[i] == '\\':
       
   337 					i += 2
       
   338 
       
   339 				case data[i] == ')':
       
   340 					break findtitleend
       
   341 
       
   342 				default:
       
   343 					i++
       
   344 				}
       
   345 			}
       
   346 
       
   347 			if i >= len(data) {
       
   348 				return 0, nil
       
   349 			}
       
   350 
       
   351 			// skip whitespace after title
       
   352 			titleE = i - 1
       
   353 			for titleE > titleB && isspace(data[titleE]) {
       
   354 				titleE--
       
   355 			}
       
   356 
       
   357 			// check for closing quote presence
       
   358 			if data[titleE] != '\'' && data[titleE] != '"' {
       
   359 				titleB, titleE = 0, 0
       
   360 				linkE = i
       
   361 			}
       
   362 		}
       
   363 
       
   364 		// remove whitespace at the end of the link
       
   365 		for linkE > linkB && isspace(data[linkE-1]) {
       
   366 			linkE--
       
   367 		}
       
   368 
       
   369 		// remove optional angle brackets around the link
       
   370 		if data[linkB] == '<' {
       
   371 			linkB++
       
   372 		}
       
   373 		if data[linkE-1] == '>' {
       
   374 			linkE--
       
   375 		}
       
   376 
       
   377 		// build escaped link and title
       
   378 		if linkE > linkB {
       
   379 			link = data[linkB:linkE]
       
   380 		}
       
   381 
       
   382 		if titleE > titleB {
       
   383 			title = data[titleB:titleE]
       
   384 		}
       
   385 
       
   386 		i++
       
   387 
       
   388 	// reference style link
       
   389 	case isReferenceStyleLink(data, i, t):
       
   390 		var id []byte
       
   391 		altContentConsidered := false
       
   392 
       
   393 		// look for the id
       
   394 		i++
       
   395 		linkB := i
       
   396 		for i < len(data) && data[i] != ']' {
       
   397 			i++
       
   398 		}
       
   399 		if i >= len(data) {
       
   400 			return 0, nil
       
   401 		}
       
   402 		linkE := i
       
   403 
       
   404 		// find the reference
       
   405 		if linkB == linkE {
       
   406 			if textHasNl {
       
   407 				var b bytes.Buffer
       
   408 
       
   409 				for j := 1; j < txtE; j++ {
       
   410 					switch {
       
   411 					case data[j] != '\n':
       
   412 						b.WriteByte(data[j])
       
   413 					case data[j-1] != ' ':
       
   414 						b.WriteByte(' ')
       
   415 					}
       
   416 				}
       
   417 
       
   418 				id = b.Bytes()
       
   419 			} else {
       
   420 				id = data[1:txtE]
       
   421 				altContentConsidered = true
       
   422 			}
       
   423 		} else {
       
   424 			id = data[linkB:linkE]
       
   425 		}
       
   426 
       
   427 		// find the reference with matching id
       
   428 		lr, ok := p.getRef(string(id))
       
   429 		if !ok {
       
   430 			return 0, nil
       
   431 		}
       
   432 
       
   433 		// keep link and title from reference
       
   434 		link = lr.link
       
   435 		title = lr.title
       
   436 		if altContentConsidered {
       
   437 			altContent = lr.text
       
   438 		}
       
   439 		i++
       
   440 
       
   441 	// shortcut reference style link or reference or inline footnote
       
   442 	default:
       
   443 		var id []byte
       
   444 
       
   445 		// craft the id
       
   446 		if textHasNl {
       
   447 			var b bytes.Buffer
       
   448 
       
   449 			for j := 1; j < txtE; j++ {
       
   450 				switch {
       
   451 				case data[j] != '\n':
       
   452 					b.WriteByte(data[j])
       
   453 				case data[j-1] != ' ':
       
   454 					b.WriteByte(' ')
       
   455 				}
       
   456 			}
       
   457 
       
   458 			id = b.Bytes()
       
   459 		} else {
       
   460 			if t == linkDeferredFootnote {
       
   461 				id = data[2:txtE] // get rid of the ^
       
   462 			} else {
       
   463 				id = data[1:txtE]
       
   464 			}
       
   465 		}
       
   466 
       
   467 		footnoteNode = NewNode(Item)
       
   468 		if t == linkInlineFootnote {
       
   469 			// create a new reference
       
   470 			noteID = len(p.notes) + 1
       
   471 
       
   472 			var fragment []byte
       
   473 			if len(id) > 0 {
       
   474 				if len(id) < 16 {
       
   475 					fragment = make([]byte, len(id))
       
   476 				} else {
       
   477 					fragment = make([]byte, 16)
       
   478 				}
       
   479 				copy(fragment, slugify(id))
       
   480 			} else {
       
   481 				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
       
   482 			}
       
   483 
       
   484 			ref := &reference{
       
   485 				noteID:   noteID,
       
   486 				hasBlock: false,
       
   487 				link:     fragment,
       
   488 				title:    id,
       
   489 				footnote: footnoteNode,
       
   490 			}
       
   491 
       
   492 			p.notes = append(p.notes, ref)
       
   493 
       
   494 			link = ref.link
       
   495 			title = ref.title
       
   496 		} else {
       
   497 			// find the reference with matching id
       
   498 			lr, ok := p.getRef(string(id))
       
   499 			if !ok {
       
   500 				return 0, nil
       
   501 			}
       
   502 
       
   503 			if t == linkDeferredFootnote {
       
   504 				lr.noteID = len(p.notes) + 1
       
   505 				lr.footnote = footnoteNode
       
   506 				p.notes = append(p.notes, lr)
       
   507 			}
       
   508 
       
   509 			// keep link and title from reference
       
   510 			link = lr.link
       
   511 			// if inline footnote, title == footnote contents
       
   512 			title = lr.title
       
   513 			noteID = lr.noteID
       
   514 		}
       
   515 
       
   516 		// rewind the whitespace
       
   517 		i = txtE + 1
       
   518 	}
       
   519 
       
   520 	var uLink []byte
       
   521 	if t == linkNormal || t == linkImg {
       
   522 		if len(link) > 0 {
       
   523 			var uLinkBuf bytes.Buffer
       
   524 			unescapeText(&uLinkBuf, link)
       
   525 			uLink = uLinkBuf.Bytes()
       
   526 		}
       
   527 
       
   528 		// links need something to click on and somewhere to go
       
   529 		if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
       
   530 			return 0, nil
       
   531 		}
       
   532 	}
       
   533 
       
   534 	// call the relevant rendering function
       
   535 	var linkNode *Node
       
   536 	switch t {
       
   537 	case linkNormal:
       
   538 		linkNode = NewNode(Link)
       
   539 		linkNode.Destination = normalizeURI(uLink)
       
   540 		linkNode.Title = title
       
   541 		if len(altContent) > 0 {
       
   542 			linkNode.AppendChild(text(altContent))
       
   543 		} else {
       
   544 			// links cannot contain other links, so turn off link parsing
       
   545 			// temporarily and recurse
       
   546 			insideLink := p.insideLink
       
   547 			p.insideLink = true
       
   548 			p.inline(linkNode, data[1:txtE])
       
   549 			p.insideLink = insideLink
       
   550 		}
       
   551 
       
   552 	case linkImg:
       
   553 		linkNode = NewNode(Image)
       
   554 		linkNode.Destination = uLink
       
   555 		linkNode.Title = title
       
   556 		linkNode.AppendChild(text(data[1:txtE]))
       
   557 		i++
       
   558 
       
   559 	case linkInlineFootnote, linkDeferredFootnote:
       
   560 		linkNode = NewNode(Link)
       
   561 		linkNode.Destination = link
       
   562 		linkNode.Title = title
       
   563 		linkNode.NoteID = noteID
       
   564 		linkNode.Footnote = footnoteNode
       
   565 		if t == linkInlineFootnote {
       
   566 			i++
       
   567 		}
       
   568 
       
   569 	default:
       
   570 		return 0, nil
       
   571 	}
       
   572 
       
   573 	return i, linkNode
       
   574 }
       
   575 
       
   576 func (p *Markdown) inlineHTMLComment(data []byte) int {
       
   577 	if len(data) < 5 {
       
   578 		return 0
       
   579 	}
       
   580 	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
       
   581 		return 0
       
   582 	}
       
   583 	i := 5
       
   584 	// scan for an end-of-comment marker, across lines if necessary
       
   585 	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
       
   586 		i++
       
   587 	}
       
   588 	// no end-of-comment marker
       
   589 	if i >= len(data) {
       
   590 		return 0
       
   591 	}
       
   592 	return i + 1
       
   593 }
       
   594 
       
   595 func stripMailto(link []byte) []byte {
       
   596 	if bytes.HasPrefix(link, []byte("mailto://")) {
       
   597 		return link[9:]
       
   598 	} else if bytes.HasPrefix(link, []byte("mailto:")) {
       
   599 		return link[7:]
       
   600 	} else {
       
   601 		return link
       
   602 	}
       
   603 }
       
   604 
       
   605 // autolinkType specifies a kind of autolink that gets detected.
       
   606 type autolinkType int
       
   607 
       
   608 // These are the possible flag values for the autolink renderer.
       
   609 const (
       
   610 	notAutolink autolinkType = iota
       
   611 	normalAutolink
       
   612 	emailAutolink
       
   613 )
       
   614 
       
   615 // '<' when tags or autolinks are allowed
       
   616 func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
       
   617 	data = data[offset:]
       
   618 	altype, end := tagLength(data)
       
   619 	if size := p.inlineHTMLComment(data); size > 0 {
       
   620 		end = size
       
   621 	}
       
   622 	if end > 2 {
       
   623 		if altype != notAutolink {
       
   624 			var uLink bytes.Buffer
       
   625 			unescapeText(&uLink, data[1:end+1-2])
       
   626 			if uLink.Len() > 0 {
       
   627 				link := uLink.Bytes()
       
   628 				node := NewNode(Link)
       
   629 				node.Destination = link
       
   630 				if altype == emailAutolink {
       
   631 					node.Destination = append([]byte("mailto:"), link...)
       
   632 				}
       
   633 				node.AppendChild(text(stripMailto(link)))
       
   634 				return end, node
       
   635 			}
       
   636 		} else {
       
   637 			htmlTag := NewNode(HTMLSpan)
       
   638 			htmlTag.Literal = data[:end]
       
   639 			return end, htmlTag
       
   640 		}
       
   641 	}
       
   642 
       
   643 	return end, nil
       
   644 }
       
   645 
       
   646 // '\\' backslash escape
       
   647 var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
       
   648 
       
   649 func escape(p *Markdown, data []byte, offset int) (int, *Node) {
       
   650 	data = data[offset:]
       
   651 
       
   652 	if len(data) > 1 {
       
   653 		if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
       
   654 			return 2, NewNode(Hardbreak)
       
   655 		}
       
   656 		if bytes.IndexByte(escapeChars, data[1]) < 0 {
       
   657 			return 0, nil
       
   658 		}
       
   659 
       
   660 		return 2, text(data[1:2])
       
   661 	}
       
   662 
       
   663 	return 2, nil
       
   664 }
       
   665 
       
   666 func unescapeText(ob *bytes.Buffer, src []byte) {
       
   667 	i := 0
       
   668 	for i < len(src) {
       
   669 		org := i
       
   670 		for i < len(src) && src[i] != '\\' {
       
   671 			i++
       
   672 		}
       
   673 
       
   674 		if i > org {
       
   675 			ob.Write(src[org:i])
       
   676 		}
       
   677 
       
   678 		if i+1 >= len(src) {
       
   679 			break
       
   680 		}
       
   681 
       
   682 		ob.WriteByte(src[i+1])
       
   683 		i += 2
       
   684 	}
       
   685 }
       
   686 
       
   687 // '&' escaped when it doesn't belong to an entity
       
   688 // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
       
   689 func entity(p *Markdown, data []byte, offset int) (int, *Node) {
       
   690 	data = data[offset:]
       
   691 
       
   692 	end := 1
       
   693 
       
   694 	if end < len(data) && data[end] == '#' {
       
   695 		end++
       
   696 	}
       
   697 
       
   698 	for end < len(data) && isalnum(data[end]) {
       
   699 		end++
       
   700 	}
       
   701 
       
   702 	if end < len(data) && data[end] == ';' {
       
   703 		end++ // real entity
       
   704 	} else {
       
   705 		return 0, nil // lone '&'
       
   706 	}
       
   707 
       
   708 	ent := data[:end]
       
   709 	// undo &amp; escaping or it will be converted to &amp;amp; by another
       
   710 	// escaper in the renderer
       
   711 	if bytes.Equal(ent, []byte("&amp;")) {
       
   712 		ent = []byte{'&'}
       
   713 	}
       
   714 
       
   715 	return end, text(ent)
       
   716 }
       
   717 
       
   718 func linkEndsWithEntity(data []byte, linkEnd int) bool {
       
   719 	entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
       
   720 	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
       
   721 }
       
   722 
       
   723 // hasPrefixCaseInsensitive is a custom implementation of
       
   724 //     strings.HasPrefix(strings.ToLower(s), prefix)
       
   725 // we rolled our own because ToLower pulls in a huge machinery of lowercasing
       
   726 // anything from Unicode and that's very slow. Since this func will only be
       
   727 // used on ASCII protocol prefixes, we can take shortcuts.
       
   728 func hasPrefixCaseInsensitive(s, prefix []byte) bool {
       
   729 	if len(s) < len(prefix) {
       
   730 		return false
       
   731 	}
       
   732 	delta := byte('a' - 'A')
       
   733 	for i, b := range prefix {
       
   734 		if b != s[i] && b != s[i]+delta {
       
   735 			return false
       
   736 		}
       
   737 	}
       
   738 	return true
       
   739 }
       
   740 
       
   741 var protocolPrefixes = [][]byte{
       
   742 	[]byte("http://"),
       
   743 	[]byte("https://"),
       
   744 	[]byte("ftp://"),
       
   745 	[]byte("file://"),
       
   746 	[]byte("mailto:"),
       
   747 }
       
   748 
       
   749 const shortestPrefix = 6 // len("ftp://"), the shortest of the above
       
   750 
       
   751 func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
       
   752 	// quick check to rule out most false hits
       
   753 	if p.insideLink || len(data) < offset+shortestPrefix {
       
   754 		return 0, nil
       
   755 	}
       
   756 	for _, prefix := range protocolPrefixes {
       
   757 		endOfHead := offset + 8 // 8 is the len() of the longest prefix
       
   758 		if endOfHead > len(data) {
       
   759 			endOfHead = len(data)
       
   760 		}
       
   761 		if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
       
   762 			return autoLink(p, data, offset)
       
   763 		}
       
   764 	}
       
   765 	return 0, nil
       
   766 }
       
   767 
       
   768 func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
       
   769 	// Now a more expensive check to see if we're not inside an anchor element
       
   770 	anchorStart := offset
       
   771 	offsetFromAnchor := 0
       
   772 	for anchorStart > 0 && data[anchorStart] != '<' {
       
   773 		anchorStart--
       
   774 		offsetFromAnchor++
       
   775 	}
       
   776 
       
   777 	anchorStr := anchorRe.Find(data[anchorStart:])
       
   778 	if anchorStr != nil {
       
   779 		anchorClose := NewNode(HTMLSpan)
       
   780 		anchorClose.Literal = anchorStr[offsetFromAnchor:]
       
   781 		return len(anchorStr) - offsetFromAnchor, anchorClose
       
   782 	}
       
   783 
       
   784 	// scan backward for a word boundary
       
   785 	rewind := 0
       
   786 	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
       
   787 		rewind++
       
   788 	}
       
   789 	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
       
   790 		return 0, nil
       
   791 	}
       
   792 
       
   793 	origData := data
       
   794 	data = data[offset-rewind:]
       
   795 
       
   796 	if !isSafeLink(data) {
       
   797 		return 0, nil
       
   798 	}
       
   799 
       
   800 	linkEnd := 0
       
   801 	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
       
   802 		linkEnd++
       
   803 	}
       
   804 
       
   805 	// Skip punctuation at the end of the link
       
   806 	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
       
   807 		linkEnd--
       
   808 	}
       
   809 
       
   810 	// But don't skip semicolon if it's a part of escaped entity:
       
   811 	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
       
   812 		linkEnd--
       
   813 	}
       
   814 
       
   815 	// See if the link finishes with a punctuation sign that can be closed.
       
   816 	var copen byte
       
   817 	switch data[linkEnd-1] {
       
   818 	case '"':
       
   819 		copen = '"'
       
   820 	case '\'':
       
   821 		copen = '\''
       
   822 	case ')':
       
   823 		copen = '('
       
   824 	case ']':
       
   825 		copen = '['
       
   826 	case '}':
       
   827 		copen = '{'
       
   828 	default:
       
   829 		copen = 0
       
   830 	}
       
   831 
       
   832 	if copen != 0 {
       
   833 		bufEnd := offset - rewind + linkEnd - 2
       
   834 
       
   835 		openDelim := 1
       
   836 
       
   837 		/* Try to close the final punctuation sign in this same line;
       
   838 		 * if we managed to close it outside of the URL, that means that it's
       
   839 		 * not part of the URL. If it closes inside the URL, that means it
       
   840 		 * is part of the URL.
       
   841 		 *
       
   842 		 * Examples:
       
   843 		 *
       
   844 		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
       
   845 		 *              => http://www.pokemon.com/Pikachu_(Electric)
       
   846 		 *
       
   847 		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
       
   848 		 *              => http://www.pokemon.com/Pikachu_(Electric)
       
   849 		 *
       
   850 		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
       
   851 		 *              => http://www.pokemon.com/Pikachu_(Electric))
       
   852 		 *
       
   853 		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
       
   854 		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
       
   855 		 */
       
   856 
       
   857 		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
       
   858 			if origData[bufEnd] == data[linkEnd-1] {
       
   859 				openDelim++
       
   860 			}
       
   861 
       
   862 			if origData[bufEnd] == copen {
       
   863 				openDelim--
       
   864 			}
       
   865 
       
   866 			bufEnd--
       
   867 		}
       
   868 
       
   869 		if openDelim == 0 {
       
   870 			linkEnd--
       
   871 		}
       
   872 	}
       
   873 
       
   874 	var uLink bytes.Buffer
       
   875 	unescapeText(&uLink, data[:linkEnd])
       
   876 
       
   877 	if uLink.Len() > 0 {
       
   878 		node := NewNode(Link)
       
   879 		node.Destination = uLink.Bytes()
       
   880 		node.AppendChild(text(uLink.Bytes()))
       
   881 		return linkEnd, node
       
   882 	}
       
   883 
       
   884 	return linkEnd, nil
       
   885 }
       
   886 
       
   887 func isEndOfLink(char byte) bool {
       
   888 	return isspace(char) || char == '<'
       
   889 }
       
   890 
       
   891 var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
       
   892 var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
       
   893 
       
   894 func isSafeLink(link []byte) bool {
       
   895 	for _, path := range validPaths {
       
   896 		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
       
   897 			if len(link) == len(path) {
       
   898 				return true
       
   899 			} else if isalnum(link[len(path)]) {
       
   900 				return true
       
   901 			}
       
   902 		}
       
   903 	}
       
   904 
       
   905 	for _, prefix := range validUris {
       
   906 		// TODO: handle unicode here
       
   907 		// case-insensitive prefix test
       
   908 		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
       
   909 			return true
       
   910 		}
       
   911 	}
       
   912 
       
   913 	return false
       
   914 }
       
   915 
       
   916 // return the length of the given tag, or 0 is it's not valid
       
   917 func tagLength(data []byte) (autolink autolinkType, end int) {
       
   918 	var i, j int
       
   919 
       
   920 	// a valid tag can't be shorter than 3 chars
       
   921 	if len(data) < 3 {
       
   922 		return notAutolink, 0
       
   923 	}
       
   924 
       
   925 	// begins with a '<' optionally followed by '/', followed by letter or number
       
   926 	if data[0] != '<' {
       
   927 		return notAutolink, 0
       
   928 	}
       
   929 	if data[1] == '/' {
       
   930 		i = 2
       
   931 	} else {
       
   932 		i = 1
       
   933 	}
       
   934 
       
   935 	if !isalnum(data[i]) {
       
   936 		return notAutolink, 0
       
   937 	}
       
   938 
       
   939 	// scheme test
       
   940 	autolink = notAutolink
       
   941 
       
   942 	// try to find the beginning of an URI
       
   943 	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
       
   944 		i++
       
   945 	}
       
   946 
       
   947 	if i > 1 && i < len(data) && data[i] == '@' {
       
   948 		if j = isMailtoAutoLink(data[i:]); j != 0 {
       
   949 			return emailAutolink, i + j
       
   950 		}
       
   951 	}
       
   952 
       
   953 	if i > 2 && i < len(data) && data[i] == ':' {
       
   954 		autolink = normalAutolink
       
   955 		i++
       
   956 	}
       
   957 
       
   958 	// complete autolink test: no whitespace or ' or "
       
   959 	switch {
       
   960 	case i >= len(data):
       
   961 		autolink = notAutolink
       
   962 	case autolink != notAutolink:
       
   963 		j = i
       
   964 
       
   965 		for i < len(data) {
       
   966 			if data[i] == '\\' {
       
   967 				i += 2
       
   968 			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
       
   969 				break
       
   970 			} else {
       
   971 				i++
       
   972 			}
       
   973 
       
   974 		}
       
   975 
       
   976 		if i >= len(data) {
       
   977 			return autolink, 0
       
   978 		}
       
   979 		if i > j && data[i] == '>' {
       
   980 			return autolink, i + 1
       
   981 		}
       
   982 
       
   983 		// one of the forbidden chars has been found
       
   984 		autolink = notAutolink
       
   985 	}
       
   986 	i += bytes.IndexByte(data[i:], '>')
       
   987 	if i < 0 {
       
   988 		return autolink, 0
       
   989 	}
       
   990 	return autolink, i + 1
       
   991 }
       
   992 
       
   993 // look for the address part of a mail autolink and '>'
       
   994 // this is less strict than the original markdown e-mail address matching
       
   995 func isMailtoAutoLink(data []byte) int {
       
   996 	nb := 0
       
   997 
       
   998 	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
       
   999 	for i := 0; i < len(data); i++ {
       
  1000 		if isalnum(data[i]) {
       
  1001 			continue
       
  1002 		}
       
  1003 
       
  1004 		switch data[i] {
       
  1005 		case '@':
       
  1006 			nb++
       
  1007 
       
  1008 		case '-', '.', '_':
       
  1009 			break
       
  1010 
       
  1011 		case '>':
       
  1012 			if nb == 1 {
       
  1013 				return i + 1
       
  1014 			}
       
  1015 			return 0
       
  1016 		default:
       
  1017 			return 0
       
  1018 		}
       
  1019 	}
       
  1020 
       
  1021 	return 0
       
  1022 }
       
  1023 
       
  1024 // look for the next emph char, skipping other constructs
       
  1025 func helperFindEmphChar(data []byte, c byte) int {
       
  1026 	i := 0
       
  1027 
       
  1028 	for i < len(data) {
       
  1029 		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
       
  1030 			i++
       
  1031 		}
       
  1032 		if i >= len(data) {
       
  1033 			return 0
       
  1034 		}
       
  1035 		// do not count escaped chars
       
  1036 		if i != 0 && data[i-1] == '\\' {
       
  1037 			i++
       
  1038 			continue
       
  1039 		}
       
  1040 		if data[i] == c {
       
  1041 			return i
       
  1042 		}
       
  1043 
       
  1044 		if data[i] == '`' {
       
  1045 			// skip a code span
       
  1046 			tmpI := 0
       
  1047 			i++
       
  1048 			for i < len(data) && data[i] != '`' {
       
  1049 				if tmpI == 0 && data[i] == c {
       
  1050 					tmpI = i
       
  1051 				}
       
  1052 				i++
       
  1053 			}
       
  1054 			if i >= len(data) {
       
  1055 				return tmpI
       
  1056 			}
       
  1057 			i++
       
  1058 		} else if data[i] == '[' {
       
  1059 			// skip a link
       
  1060 			tmpI := 0
       
  1061 			i++
       
  1062 			for i < len(data) && data[i] != ']' {
       
  1063 				if tmpI == 0 && data[i] == c {
       
  1064 					tmpI = i
       
  1065 				}
       
  1066 				i++
       
  1067 			}
       
  1068 			i++
       
  1069 			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
       
  1070 				i++
       
  1071 			}
       
  1072 			if i >= len(data) {
       
  1073 				return tmpI
       
  1074 			}
       
  1075 			if data[i] != '[' && data[i] != '(' { // not a link
       
  1076 				if tmpI > 0 {
       
  1077 					return tmpI
       
  1078 				}
       
  1079 				continue
       
  1080 			}
       
  1081 			cc := data[i]
       
  1082 			i++
       
  1083 			for i < len(data) && data[i] != cc {
       
  1084 				if tmpI == 0 && data[i] == c {
       
  1085 					return i
       
  1086 				}
       
  1087 				i++
       
  1088 			}
       
  1089 			if i >= len(data) {
       
  1090 				return tmpI
       
  1091 			}
       
  1092 			i++
       
  1093 		}
       
  1094 	}
       
  1095 	return 0
       
  1096 }
       
  1097 
       
  1098 func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
       
  1099 	i := 0
       
  1100 
       
  1101 	// skip one symbol if coming from emph3
       
  1102 	if len(data) > 1 && data[0] == c && data[1] == c {
       
  1103 		i = 1
       
  1104 	}
       
  1105 
       
  1106 	for i < len(data) {
       
  1107 		length := helperFindEmphChar(data[i:], c)
       
  1108 		if length == 0 {
       
  1109 			return 0, nil
       
  1110 		}
       
  1111 		i += length
       
  1112 		if i >= len(data) {
       
  1113 			return 0, nil
       
  1114 		}
       
  1115 
       
  1116 		if i+1 < len(data) && data[i+1] == c {
       
  1117 			i++
       
  1118 			continue
       
  1119 		}
       
  1120 
       
  1121 		if data[i] == c && !isspace(data[i-1]) {
       
  1122 
       
  1123 			if p.extensions&NoIntraEmphasis != 0 {
       
  1124 				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
       
  1125 					continue
       
  1126 				}
       
  1127 			}
       
  1128 
       
  1129 			emph := NewNode(Emph)
       
  1130 			p.inline(emph, data[:i])
       
  1131 			return i + 1, emph
       
  1132 		}
       
  1133 	}
       
  1134 
       
  1135 	return 0, nil
       
  1136 }
       
  1137 
       
  1138 func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
       
  1139 	i := 0
       
  1140 
       
  1141 	for i < len(data) {
       
  1142 		length := helperFindEmphChar(data[i:], c)
       
  1143 		if length == 0 {
       
  1144 			return 0, nil
       
  1145 		}
       
  1146 		i += length
       
  1147 
       
  1148 		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
       
  1149 			nodeType := Strong
       
  1150 			if c == '~' {
       
  1151 				nodeType = Del
       
  1152 			}
       
  1153 			node := NewNode(nodeType)
       
  1154 			p.inline(node, data[:i])
       
  1155 			return i + 2, node
       
  1156 		}
       
  1157 		i++
       
  1158 	}
       
  1159 	return 0, nil
       
  1160 }
       
  1161 
       
  1162 func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
       
  1163 	i := 0
       
  1164 	origData := data
       
  1165 	data = data[offset:]
       
  1166 
       
  1167 	for i < len(data) {
       
  1168 		length := helperFindEmphChar(data[i:], c)
       
  1169 		if length == 0 {
       
  1170 			return 0, nil
       
  1171 		}
       
  1172 		i += length
       
  1173 
       
  1174 		// skip whitespace preceded symbols
       
  1175 		if data[i] != c || isspace(data[i-1]) {
       
  1176 			continue
       
  1177 		}
       
  1178 
       
  1179 		switch {
       
  1180 		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
       
  1181 			// triple symbol found
       
  1182 			strong := NewNode(Strong)
       
  1183 			em := NewNode(Emph)
       
  1184 			strong.AppendChild(em)
       
  1185 			p.inline(em, data[:i])
       
  1186 			return i + 3, strong
       
  1187 		case (i+1 < len(data) && data[i+1] == c):
       
  1188 			// double symbol found, hand over to emph1
       
  1189 			length, node := helperEmphasis(p, origData[offset-2:], c)
       
  1190 			if length == 0 {
       
  1191 				return 0, nil
       
  1192 			}
       
  1193 			return length - 2, node
       
  1194 		default:
       
  1195 			// single symbol found, hand over to emph2
       
  1196 			length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
       
  1197 			if length == 0 {
       
  1198 				return 0, nil
       
  1199 			}
       
  1200 			return length - 1, node
       
  1201 		}
       
  1202 	}
       
  1203 	return 0, nil
       
  1204 }
       
  1205 
       
  1206 func text(s []byte) *Node {
       
  1207 	node := NewNode(Text)
       
  1208 	node.Literal = s
       
  1209 	return node
       
  1210 }
       
  1211 
       
  1212 func normalizeURI(s []byte) []byte {
       
  1213 	return s // TODO: implement
       
  1214 }