vendor/github.com/russross/blackfriday/inline.go
changeset 251 1c52a0eeb952
equal deleted inserted replaced
250:c040f992052f 251:1c52a0eeb952
       
     1 //
       
     2 // Blackfriday Markdown Processor
       
     3 // Available at http://github.com/russross/blackfriday
       
     4 //
       
     5 // Copyright © 2011 Russ Ross <russ@russross.com>.
       
     6 // Distributed under the Simplified BSD License.
       
     7 // See README.md for details.
       
     8 //
       
     9 
       
    10 //
       
    11 // Functions to parse inline elements.
       
    12 //
       
    13 
       
    14 package blackfriday
       
    15 
       
    16 import (
       
    17 	"bytes"
       
    18 	"regexp"
       
    19 	"strconv"
       
    20 )
       
    21 
       
    22 var (
       
    23 	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
       
    24 	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
       
    25 )
       
    26 
       
    27 // Functions to parse text within a block
       
    28 // Each function returns the number of chars taken care of
       
    29 // data is the complete block being rendered
       
    30 // offset is the number of valid chars before the current cursor
       
    31 
       
    32 func (p *parser) inline(out *bytes.Buffer, data []byte) {
       
    33 	// this is called recursively: enforce a maximum depth
       
    34 	if p.nesting >= p.maxNesting {
       
    35 		return
       
    36 	}
       
    37 	p.nesting++
       
    38 
       
    39 	i, end := 0, 0
       
    40 	for i < len(data) {
       
    41 		// copy inactive chars into the output
       
    42 		for end < len(data) && p.inlineCallback[data[end]] == nil {
       
    43 			end++
       
    44 		}
       
    45 
       
    46 		p.r.NormalText(out, data[i:end])
       
    47 
       
    48 		if end >= len(data) {
       
    49 			break
       
    50 		}
       
    51 		i = end
       
    52 
       
    53 		// call the trigger
       
    54 		handler := p.inlineCallback[data[end]]
       
    55 		if consumed := handler(p, out, data, i); consumed == 0 {
       
    56 			// no action from the callback; buffer the byte for later
       
    57 			end = i + 1
       
    58 		} else {
       
    59 			// skip past whatever the callback used
       
    60 			i += consumed
       
    61 			end = i
       
    62 		}
       
    63 	}
       
    64 
       
    65 	p.nesting--
       
    66 }
       
    67 
       
    68 // single and double emphasis parsing
       
    69 func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int {
       
    70 	data = data[offset:]
       
    71 	c := data[0]
       
    72 	ret := 0
       
    73 
       
    74 	if len(data) > 2 && data[1] != c {
       
    75 		// whitespace cannot follow an opening emphasis;
       
    76 		// strikethrough only takes two characters '~~'
       
    77 		if c == '~' || isspace(data[1]) {
       
    78 			return 0
       
    79 		}
       
    80 		if ret = helperEmphasis(p, out, data[1:], c); ret == 0 {
       
    81 			return 0
       
    82 		}
       
    83 
       
    84 		return ret + 1
       
    85 	}
       
    86 
       
    87 	if len(data) > 3 && data[1] == c && data[2] != c {
       
    88 		if isspace(data[2]) {
       
    89 			return 0
       
    90 		}
       
    91 		if ret = helperDoubleEmphasis(p, out, data[2:], c); ret == 0 {
       
    92 			return 0
       
    93 		}
       
    94 
       
    95 		return ret + 2
       
    96 	}
       
    97 
       
    98 	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
       
    99 		if c == '~' || isspace(data[3]) {
       
   100 			return 0
       
   101 		}
       
   102 		if ret = helperTripleEmphasis(p, out, data, 3, c); ret == 0 {
       
   103 			return 0
       
   104 		}
       
   105 
       
   106 		return ret + 3
       
   107 	}
       
   108 
       
   109 	return 0
       
   110 }
       
   111 
       
   112 func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int {
       
   113 	data = data[offset:]
       
   114 
       
   115 	nb := 0
       
   116 
       
   117 	// count the number of backticks in the delimiter
       
   118 	for nb < len(data) && data[nb] == '`' {
       
   119 		nb++
       
   120 	}
       
   121 
       
   122 	// find the next delimiter
       
   123 	i, end := 0, 0
       
   124 	for end = nb; end < len(data) && i < nb; end++ {
       
   125 		if data[end] == '`' {
       
   126 			i++
       
   127 		} else {
       
   128 			i = 0
       
   129 		}
       
   130 	}
       
   131 
       
   132 	// no matching delimiter?
       
   133 	if i < nb && end >= len(data) {
       
   134 		return 0
       
   135 	}
       
   136 
       
   137 	// trim outside whitespace
       
   138 	fBegin := nb
       
   139 	for fBegin < end && data[fBegin] == ' ' {
       
   140 		fBegin++
       
   141 	}
       
   142 
       
   143 	fEnd := end - nb
       
   144 	for fEnd > fBegin && data[fEnd-1] == ' ' {
       
   145 		fEnd--
       
   146 	}
       
   147 
       
   148 	// render the code span
       
   149 	if fBegin != fEnd {
       
   150 		p.r.CodeSpan(out, data[fBegin:fEnd])
       
   151 	}
       
   152 
       
   153 	return end
       
   154 
       
   155 }
       
   156 
       
   157 // newline preceded by two spaces becomes <br>
       
   158 // newline without two spaces works when EXTENSION_HARD_LINE_BREAK is enabled
       
   159 func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int {
       
   160 	// remove trailing spaces from out
       
   161 	outBytes := out.Bytes()
       
   162 	end := len(outBytes)
       
   163 	eol := end
       
   164 	for eol > 0 && outBytes[eol-1] == ' ' {
       
   165 		eol--
       
   166 	}
       
   167 	out.Truncate(eol)
       
   168 
       
   169 	precededByTwoSpaces := offset >= 2 && data[offset-2] == ' ' && data[offset-1] == ' '
       
   170 	precededByBackslash := offset >= 1 && data[offset-1] == '\\' // see http://spec.commonmark.org/0.18/#example-527
       
   171 	precededByBackslash = precededByBackslash && p.flags&EXTENSION_BACKSLASH_LINE_BREAK != 0
       
   172 
       
   173 	if p.flags&EXTENSION_JOIN_LINES != 0 {
       
   174 		return 1
       
   175 	}
       
   176 
       
   177 	// should there be a hard line break here?
       
   178 	if p.flags&EXTENSION_HARD_LINE_BREAK == 0 && !precededByTwoSpaces && !precededByBackslash {
       
   179 		return 0
       
   180 	}
       
   181 
       
   182 	if precededByBackslash && eol > 0 {
       
   183 		out.Truncate(eol - 1)
       
   184 	}
       
   185 	p.r.LineBreak(out)
       
   186 	return 1
       
   187 }
       
   188 
       
   189 type linkType int
       
   190 
       
   191 const (
       
   192 	linkNormal linkType = iota
       
   193 	linkImg
       
   194 	linkDeferredFootnote
       
   195 	linkInlineFootnote
       
   196 )
       
   197 
       
   198 func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
       
   199 	if t == linkDeferredFootnote {
       
   200 		return false
       
   201 	}
       
   202 	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
       
   203 }
       
   204 
       
   205 // '[': parse a link or an image or a footnote
       
   206 func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
       
   207 	// no links allowed inside regular links, footnote, and deferred footnotes
       
   208 	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
       
   209 		return 0
       
   210 	}
       
   211 
       
   212 	var t linkType
       
   213 	switch {
       
   214 	// special case: ![^text] == deferred footnote (that follows something with
       
   215 	// an exclamation point)
       
   216 	case p.flags&EXTENSION_FOOTNOTES != 0 && len(data)-1 > offset && data[offset+1] == '^':
       
   217 		t = linkDeferredFootnote
       
   218 	// ![alt] == image
       
   219 	case offset > 0 && data[offset-1] == '!':
       
   220 		t = linkImg
       
   221 	// ^[text] == inline footnote
       
   222 	// [^refId] == deferred footnote
       
   223 	case p.flags&EXTENSION_FOOTNOTES != 0:
       
   224 		if offset > 0 && data[offset-1] == '^' {
       
   225 			t = linkInlineFootnote
       
   226 		} else if len(data)-1 > offset && data[offset+1] == '^' {
       
   227 			t = linkDeferredFootnote
       
   228 		}
       
   229 	// [text] == regular link
       
   230 	default:
       
   231 		t = linkNormal
       
   232 	}
       
   233 
       
   234 	data = data[offset:]
       
   235 
       
   236 	var (
       
   237 		i                       = 1
       
   238 		noteId                  int
       
   239 		title, link, altContent []byte
       
   240 		textHasNl               = false
       
   241 	)
       
   242 
       
   243 	if t == linkDeferredFootnote {
       
   244 		i++
       
   245 	}
       
   246 
       
   247 	brace := 0
       
   248 
       
   249 	// look for the matching closing bracket
       
   250 	for level := 1; level > 0 && i < len(data); i++ {
       
   251 		switch {
       
   252 		case data[i] == '\n':
       
   253 			textHasNl = true
       
   254 
       
   255 		case data[i-1] == '\\':
       
   256 			continue
       
   257 
       
   258 		case data[i] == '[':
       
   259 			level++
       
   260 
       
   261 		case data[i] == ']':
       
   262 			level--
       
   263 			if level <= 0 {
       
   264 				i-- // compensate for extra i++ in for loop
       
   265 			}
       
   266 		}
       
   267 	}
       
   268 
       
   269 	if i >= len(data) {
       
   270 		return 0
       
   271 	}
       
   272 
       
   273 	txtE := i
       
   274 	i++
       
   275 
       
   276 	// skip any amount of whitespace or newline
       
   277 	// (this is much more lax than original markdown syntax)
       
   278 	for i < len(data) && isspace(data[i]) {
       
   279 		i++
       
   280 	}
       
   281 
       
   282 	switch {
       
   283 	// inline style link
       
   284 	case i < len(data) && data[i] == '(':
       
   285 		// skip initial whitespace
       
   286 		i++
       
   287 
       
   288 		for i < len(data) && isspace(data[i]) {
       
   289 			i++
       
   290 		}
       
   291 
       
   292 		linkB := i
       
   293 
       
   294 		// look for link end: ' " ), check for new opening braces and take this
       
   295 		// into account, this may lead for overshooting and probably will require
       
   296 		// some fine-tuning.
       
   297 	findlinkend:
       
   298 		for i < len(data) {
       
   299 			switch {
       
   300 			case data[i] == '\\':
       
   301 				i += 2
       
   302 
       
   303 			case data[i] == '(':
       
   304 				brace++
       
   305 				i++
       
   306 
       
   307 			case data[i] == ')':
       
   308 				if brace <= 0 {
       
   309 					break findlinkend
       
   310 				}
       
   311 				brace--
       
   312 				i++
       
   313 
       
   314 			case data[i] == '\'' || data[i] == '"':
       
   315 				break findlinkend
       
   316 
       
   317 			default:
       
   318 				i++
       
   319 			}
       
   320 		}
       
   321 
       
   322 		if i >= len(data) {
       
   323 			return 0
       
   324 		}
       
   325 		linkE := i
       
   326 
       
   327 		// look for title end if present
       
   328 		titleB, titleE := 0, 0
       
   329 		if data[i] == '\'' || data[i] == '"' {
       
   330 			i++
       
   331 			titleB = i
       
   332 
       
   333 		findtitleend:
       
   334 			for i < len(data) {
       
   335 				switch {
       
   336 				case data[i] == '\\':
       
   337 					i += 2
       
   338 
       
   339 				case data[i] == ')':
       
   340 					break findtitleend
       
   341 
       
   342 				default:
       
   343 					i++
       
   344 				}
       
   345 			}
       
   346 
       
   347 			if i >= len(data) {
       
   348 				return 0
       
   349 			}
       
   350 
       
   351 			// skip whitespace after title
       
   352 			titleE = i - 1
       
   353 			for titleE > titleB && isspace(data[titleE]) {
       
   354 				titleE--
       
   355 			}
       
   356 
       
   357 			// check for closing quote presence
       
   358 			if data[titleE] != '\'' && data[titleE] != '"' {
       
   359 				titleB, titleE = 0, 0
       
   360 				linkE = i
       
   361 			}
       
   362 		}
       
   363 
       
   364 		// remove whitespace at the end of the link
       
   365 		for linkE > linkB && isspace(data[linkE-1]) {
       
   366 			linkE--
       
   367 		}
       
   368 
       
   369 		// remove optional angle brackets around the link
       
   370 		if data[linkB] == '<' {
       
   371 			linkB++
       
   372 		}
       
   373 		if data[linkE-1] == '>' {
       
   374 			linkE--
       
   375 		}
       
   376 
       
   377 		// build escaped link and title
       
   378 		if linkE > linkB {
       
   379 			link = data[linkB:linkE]
       
   380 		}
       
   381 
       
   382 		if titleE > titleB {
       
   383 			title = data[titleB:titleE]
       
   384 		}
       
   385 
       
   386 		i++
       
   387 
       
   388 	// reference style link
       
   389 	case isReferenceStyleLink(data, i, t):
       
   390 		var id []byte
       
   391 		altContentConsidered := false
       
   392 
       
   393 		// look for the id
       
   394 		i++
       
   395 		linkB := i
       
   396 		for i < len(data) && data[i] != ']' {
       
   397 			i++
       
   398 		}
       
   399 		if i >= len(data) {
       
   400 			return 0
       
   401 		}
       
   402 		linkE := i
       
   403 
       
   404 		// find the reference
       
   405 		if linkB == linkE {
       
   406 			if textHasNl {
       
   407 				var b bytes.Buffer
       
   408 
       
   409 				for j := 1; j < txtE; j++ {
       
   410 					switch {
       
   411 					case data[j] != '\n':
       
   412 						b.WriteByte(data[j])
       
   413 					case data[j-1] != ' ':
       
   414 						b.WriteByte(' ')
       
   415 					}
       
   416 				}
       
   417 
       
   418 				id = b.Bytes()
       
   419 			} else {
       
   420 				id = data[1:txtE]
       
   421 				altContentConsidered = true
       
   422 			}
       
   423 		} else {
       
   424 			id = data[linkB:linkE]
       
   425 		}
       
   426 
       
   427 		// find the reference with matching id
       
   428 		lr, ok := p.getRef(string(id))
       
   429 		if !ok {
       
   430 			return 0
       
   431 		}
       
   432 
       
   433 		// keep link and title from reference
       
   434 		link = lr.link
       
   435 		title = lr.title
       
   436 		if altContentConsidered {
       
   437 			altContent = lr.text
       
   438 		}
       
   439 		i++
       
   440 
       
   441 	// shortcut reference style link or reference or inline footnote
       
   442 	default:
       
   443 		var id []byte
       
   444 
       
   445 		// craft the id
       
   446 		if textHasNl {
       
   447 			var b bytes.Buffer
       
   448 
       
   449 			for j := 1; j < txtE; j++ {
       
   450 				switch {
       
   451 				case data[j] != '\n':
       
   452 					b.WriteByte(data[j])
       
   453 				case data[j-1] != ' ':
       
   454 					b.WriteByte(' ')
       
   455 				}
       
   456 			}
       
   457 
       
   458 			id = b.Bytes()
       
   459 		} else {
       
   460 			if t == linkDeferredFootnote {
       
   461 				id = data[2:txtE] // get rid of the ^
       
   462 			} else {
       
   463 				id = data[1:txtE]
       
   464 			}
       
   465 		}
       
   466 
       
   467 		if t == linkInlineFootnote {
       
   468 			// create a new reference
       
   469 			noteId = len(p.notes) + 1
       
   470 
       
   471 			var fragment []byte
       
   472 			if len(id) > 0 {
       
   473 				if len(id) < 16 {
       
   474 					fragment = make([]byte, len(id))
       
   475 				} else {
       
   476 					fragment = make([]byte, 16)
       
   477 				}
       
   478 				copy(fragment, slugify(id))
       
   479 			} else {
       
   480 				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...)
       
   481 			}
       
   482 
       
   483 			ref := &reference{
       
   484 				noteId:   noteId,
       
   485 				hasBlock: false,
       
   486 				link:     fragment,
       
   487 				title:    id,
       
   488 			}
       
   489 
       
   490 			p.notes = append(p.notes, ref)
       
   491 			p.notesRecord[string(ref.link)] = struct{}{}
       
   492 
       
   493 			link = ref.link
       
   494 			title = ref.title
       
   495 		} else {
       
   496 			// find the reference with matching id
       
   497 			lr, ok := p.getRef(string(id))
       
   498 			if !ok {
       
   499 				return 0
       
   500 			}
       
   501 
       
   502 			if t == linkDeferredFootnote && !p.isFootnote(lr) {
       
   503 				lr.noteId = len(p.notes) + 1
       
   504 				p.notes = append(p.notes, lr)
       
   505 				p.notesRecord[string(lr.link)] = struct{}{}
       
   506 			}
       
   507 
       
   508 			// keep link and title from reference
       
   509 			link = lr.link
       
   510 			// if inline footnote, title == footnote contents
       
   511 			title = lr.title
       
   512 			noteId = lr.noteId
       
   513 		}
       
   514 
       
   515 		// rewind the whitespace
       
   516 		i = txtE + 1
       
   517 	}
       
   518 
       
   519 	// build content: img alt is escaped, link content is parsed
       
   520 	var content bytes.Buffer
       
   521 	if txtE > 1 {
       
   522 		if t == linkImg {
       
   523 			content.Write(data[1:txtE])
       
   524 		} else {
       
   525 			// links cannot contain other links, so turn off link parsing temporarily
       
   526 			insideLink := p.insideLink
       
   527 			p.insideLink = true
       
   528 			p.inline(&content, data[1:txtE])
       
   529 			p.insideLink = insideLink
       
   530 		}
       
   531 	}
       
   532 
       
   533 	var uLink []byte
       
   534 	if t == linkNormal || t == linkImg {
       
   535 		if len(link) > 0 {
       
   536 			var uLinkBuf bytes.Buffer
       
   537 			unescapeText(&uLinkBuf, link)
       
   538 			uLink = uLinkBuf.Bytes()
       
   539 		}
       
   540 
       
   541 		// links need something to click on and somewhere to go
       
   542 		if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) {
       
   543 			return 0
       
   544 		}
       
   545 	}
       
   546 
       
   547 	// call the relevant rendering function
       
   548 	switch t {
       
   549 	case linkNormal:
       
   550 		if len(altContent) > 0 {
       
   551 			p.r.Link(out, uLink, title, altContent)
       
   552 		} else {
       
   553 			p.r.Link(out, uLink, title, content.Bytes())
       
   554 		}
       
   555 
       
   556 	case linkImg:
       
   557 		outSize := out.Len()
       
   558 		outBytes := out.Bytes()
       
   559 		if outSize > 0 && outBytes[outSize-1] == '!' {
       
   560 			out.Truncate(outSize - 1)
       
   561 		}
       
   562 
       
   563 		p.r.Image(out, uLink, title, content.Bytes())
       
   564 
       
   565 	case linkInlineFootnote:
       
   566 		outSize := out.Len()
       
   567 		outBytes := out.Bytes()
       
   568 		if outSize > 0 && outBytes[outSize-1] == '^' {
       
   569 			out.Truncate(outSize - 1)
       
   570 		}
       
   571 
       
   572 		p.r.FootnoteRef(out, link, noteId)
       
   573 
       
   574 	case linkDeferredFootnote:
       
   575 		p.r.FootnoteRef(out, link, noteId)
       
   576 
       
   577 	default:
       
   578 		return 0
       
   579 	}
       
   580 
       
   581 	return i
       
   582 }
       
   583 
       
   584 func (p *parser) inlineHTMLComment(out *bytes.Buffer, data []byte) int {
       
   585 	if len(data) < 5 {
       
   586 		return 0
       
   587 	}
       
   588 	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
       
   589 		return 0
       
   590 	}
       
   591 	i := 5
       
   592 	// scan for an end-of-comment marker, across lines if necessary
       
   593 	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
       
   594 		i++
       
   595 	}
       
   596 	// no end-of-comment marker
       
   597 	if i >= len(data) {
       
   598 		return 0
       
   599 	}
       
   600 	return i + 1
       
   601 }
       
   602 
       
   603 // '<' when tags or autolinks are allowed
       
   604 func leftAngle(p *parser, out *bytes.Buffer, data []byte, offset int) int {
       
   605 	data = data[offset:]
       
   606 	altype := LINK_TYPE_NOT_AUTOLINK
       
   607 	end := tagLength(data, &altype)
       
   608 	if size := p.inlineHTMLComment(out, data); size > 0 {
       
   609 		end = size
       
   610 	}
       
   611 	if end > 2 {
       
   612 		if altype != LINK_TYPE_NOT_AUTOLINK {
       
   613 			var uLink bytes.Buffer
       
   614 			unescapeText(&uLink, data[1:end+1-2])
       
   615 			if uLink.Len() > 0 {
       
   616 				p.r.AutoLink(out, uLink.Bytes(), altype)
       
   617 			}
       
   618 		} else {
       
   619 			p.r.RawHtmlTag(out, data[:end])
       
   620 		}
       
   621 	}
       
   622 
       
   623 	return end
       
   624 }
       
   625 
       
   626 // '\\' backslash escape
       
   627 var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
       
   628 
       
   629 func escape(p *parser, out *bytes.Buffer, data []byte, offset int) int {
       
   630 	data = data[offset:]
       
   631 
       
   632 	if len(data) > 1 {
       
   633 		if bytes.IndexByte(escapeChars, data[1]) < 0 {
       
   634 			return 0
       
   635 		}
       
   636 
       
   637 		p.r.NormalText(out, data[1:2])
       
   638 	}
       
   639 
       
   640 	return 2
       
   641 }
       
   642 
       
   643 func unescapeText(ob *bytes.Buffer, src []byte) {
       
   644 	i := 0
       
   645 	for i < len(src) {
       
   646 		org := i
       
   647 		for i < len(src) && src[i] != '\\' {
       
   648 			i++
       
   649 		}
       
   650 
       
   651 		if i > org {
       
   652 			ob.Write(src[org:i])
       
   653 		}
       
   654 
       
   655 		if i+1 >= len(src) {
       
   656 			break
       
   657 		}
       
   658 
       
   659 		ob.WriteByte(src[i+1])
       
   660 		i += 2
       
   661 	}
       
   662 }
       
   663 
       
   664 // '&' escaped when it doesn't belong to an entity
       
   665 // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
       
   666 func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int {
       
   667 	data = data[offset:]
       
   668 
       
   669 	end := 1
       
   670 
       
   671 	if end < len(data) && data[end] == '#' {
       
   672 		end++
       
   673 	}
       
   674 
       
   675 	for end < len(data) && isalnum(data[end]) {
       
   676 		end++
       
   677 	}
       
   678 
       
   679 	if end < len(data) && data[end] == ';' {
       
   680 		end++ // real entity
       
   681 	} else {
       
   682 		return 0 // lone '&'
       
   683 	}
       
   684 
       
   685 	p.r.Entity(out, data[:end])
       
   686 
       
   687 	return end
       
   688 }
       
   689 
       
   690 func linkEndsWithEntity(data []byte, linkEnd int) bool {
       
   691 	entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
       
   692 	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
       
   693 }
       
   694 
       
   695 func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
       
   696 	// quick check to rule out most false hits on ':'
       
   697 	if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
       
   698 		return 0
       
   699 	}
       
   700 
       
   701 	// Now a more expensive check to see if we're not inside an anchor element
       
   702 	anchorStart := offset
       
   703 	offsetFromAnchor := 0
       
   704 	for anchorStart > 0 && data[anchorStart] != '<' {
       
   705 		anchorStart--
       
   706 		offsetFromAnchor++
       
   707 	}
       
   708 
       
   709 	anchorStr := anchorRe.Find(data[anchorStart:])
       
   710 	if anchorStr != nil {
       
   711 		out.Write(anchorStr[offsetFromAnchor:])
       
   712 		return len(anchorStr) - offsetFromAnchor
       
   713 	}
       
   714 
       
   715 	// scan backward for a word boundary
       
   716 	rewind := 0
       
   717 	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
       
   718 		rewind++
       
   719 	}
       
   720 	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
       
   721 		return 0
       
   722 	}
       
   723 
       
   724 	origData := data
       
   725 	data = data[offset-rewind:]
       
   726 
       
   727 	if !isSafeLink(data) {
       
   728 		return 0
       
   729 	}
       
   730 
       
   731 	linkEnd := 0
       
   732 	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
       
   733 		linkEnd++
       
   734 	}
       
   735 
       
   736 	// Skip punctuation at the end of the link
       
   737 	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
       
   738 		linkEnd--
       
   739 	}
       
   740 
       
   741 	// But don't skip semicolon if it's a part of escaped entity:
       
   742 	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
       
   743 		linkEnd--
       
   744 	}
       
   745 
       
   746 	// See if the link finishes with a punctuation sign that can be closed.
       
   747 	var copen byte
       
   748 	switch data[linkEnd-1] {
       
   749 	case '"':
       
   750 		copen = '"'
       
   751 	case '\'':
       
   752 		copen = '\''
       
   753 	case ')':
       
   754 		copen = '('
       
   755 	case ']':
       
   756 		copen = '['
       
   757 	case '}':
       
   758 		copen = '{'
       
   759 	default:
       
   760 		copen = 0
       
   761 	}
       
   762 
       
   763 	if copen != 0 {
       
   764 		bufEnd := offset - rewind + linkEnd - 2
       
   765 
       
   766 		openDelim := 1
       
   767 
       
   768 		/* Try to close the final punctuation sign in this same line;
       
   769 		 * if we managed to close it outside of the URL, that means that it's
       
   770 		 * not part of the URL. If it closes inside the URL, that means it
       
   771 		 * is part of the URL.
       
   772 		 *
       
   773 		 * Examples:
       
   774 		 *
       
   775 		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
       
   776 		 *              => http://www.pokemon.com/Pikachu_(Electric)
       
   777 		 *
       
   778 		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
       
   779 		 *              => http://www.pokemon.com/Pikachu_(Electric)
       
   780 		 *
       
   781 		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
       
   782 		 *              => http://www.pokemon.com/Pikachu_(Electric))
       
   783 		 *
       
   784 		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
       
   785 		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
       
   786 		 */
       
   787 
       
   788 		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
       
   789 			if origData[bufEnd] == data[linkEnd-1] {
       
   790 				openDelim++
       
   791 			}
       
   792 
       
   793 			if origData[bufEnd] == copen {
       
   794 				openDelim--
       
   795 			}
       
   796 
       
   797 			bufEnd--
       
   798 		}
       
   799 
       
   800 		if openDelim == 0 {
       
   801 			linkEnd--
       
   802 		}
       
   803 	}
       
   804 
       
   805 	// we were triggered on the ':', so we need to rewind the output a bit
       
   806 	if out.Len() >= rewind {
       
   807 		out.Truncate(len(out.Bytes()) - rewind)
       
   808 	}
       
   809 
       
   810 	var uLink bytes.Buffer
       
   811 	unescapeText(&uLink, data[:linkEnd])
       
   812 
       
   813 	if uLink.Len() > 0 {
       
   814 		p.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
       
   815 	}
       
   816 
       
   817 	return linkEnd - rewind
       
   818 }
       
   819 
       
   820 func isEndOfLink(char byte) bool {
       
   821 	return isspace(char) || char == '<'
       
   822 }
       
   823 
       
   824 var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
       
   825 var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
       
   826 
       
   827 func isSafeLink(link []byte) bool {
       
   828 	for _, path := range validPaths {
       
   829 		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
       
   830 			if len(link) == len(path) {
       
   831 				return true
       
   832 			} else if isalnum(link[len(path)]) {
       
   833 				return true
       
   834 			}
       
   835 		}
       
   836 	}
       
   837 
       
   838 	for _, prefix := range validUris {
       
   839 		// TODO: handle unicode here
       
   840 		// case-insensitive prefix test
       
   841 		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
       
   842 			return true
       
   843 		}
       
   844 	}
       
   845 
       
   846 	return false
       
   847 }
       
   848 
       
   849 // return the length of the given tag, or 0 is it's not valid
       
   850 func tagLength(data []byte, autolink *int) int {
       
   851 	var i, j int
       
   852 
       
   853 	// a valid tag can't be shorter than 3 chars
       
   854 	if len(data) < 3 {
       
   855 		return 0
       
   856 	}
       
   857 
       
   858 	// begins with a '<' optionally followed by '/', followed by letter or number
       
   859 	if data[0] != '<' {
       
   860 		return 0
       
   861 	}
       
   862 	if data[1] == '/' {
       
   863 		i = 2
       
   864 	} else {
       
   865 		i = 1
       
   866 	}
       
   867 
       
   868 	if !isalnum(data[i]) {
       
   869 		return 0
       
   870 	}
       
   871 
       
   872 	// scheme test
       
   873 	*autolink = LINK_TYPE_NOT_AUTOLINK
       
   874 
       
   875 	// try to find the beginning of an URI
       
   876 	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
       
   877 		i++
       
   878 	}
       
   879 
       
   880 	if i > 1 && i < len(data) && data[i] == '@' {
       
   881 		if j = isMailtoAutoLink(data[i:]); j != 0 {
       
   882 			*autolink = LINK_TYPE_EMAIL
       
   883 			return i + j
       
   884 		}
       
   885 	}
       
   886 
       
   887 	if i > 2 && i < len(data) && data[i] == ':' {
       
   888 		*autolink = LINK_TYPE_NORMAL
       
   889 		i++
       
   890 	}
       
   891 
       
   892 	// complete autolink test: no whitespace or ' or "
       
   893 	switch {
       
   894 	case i >= len(data):
       
   895 		*autolink = LINK_TYPE_NOT_AUTOLINK
       
   896 	case *autolink != 0:
       
   897 		j = i
       
   898 
       
   899 		for i < len(data) {
       
   900 			if data[i] == '\\' {
       
   901 				i += 2
       
   902 			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
       
   903 				break
       
   904 			} else {
       
   905 				i++
       
   906 			}
       
   907 
       
   908 		}
       
   909 
       
   910 		if i >= len(data) {
       
   911 			return 0
       
   912 		}
       
   913 		if i > j && data[i] == '>' {
       
   914 			return i + 1
       
   915 		}
       
   916 
       
   917 		// one of the forbidden chars has been found
       
   918 		*autolink = LINK_TYPE_NOT_AUTOLINK
       
   919 	}
       
   920 
       
   921 	// look for something looking like a tag end
       
   922 	for i < len(data) && data[i] != '>' {
       
   923 		i++
       
   924 	}
       
   925 	if i >= len(data) {
       
   926 		return 0
       
   927 	}
       
   928 	return i + 1
       
   929 }
       
   930 
       
   931 // look for the address part of a mail autolink and '>'
       
   932 // this is less strict than the original markdown e-mail address matching
       
   933 func isMailtoAutoLink(data []byte) int {
       
   934 	nb := 0
       
   935 
       
   936 	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
       
   937 	for i := 0; i < len(data); i++ {
       
   938 		if isalnum(data[i]) {
       
   939 			continue
       
   940 		}
       
   941 
       
   942 		switch data[i] {
       
   943 		case '@':
       
   944 			nb++
       
   945 
       
   946 		case '-', '.', '_':
       
   947 			// Do nothing.
       
   948 
       
   949 		case '>':
       
   950 			if nb == 1 {
       
   951 				return i + 1
       
   952 			} else {
       
   953 				return 0
       
   954 			}
       
   955 		default:
       
   956 			return 0
       
   957 		}
       
   958 	}
       
   959 
       
   960 	return 0
       
   961 }
       
   962 
       
   963 // look for the next emph char, skipping other constructs
       
   964 func helperFindEmphChar(data []byte, c byte) int {
       
   965 	i := 0
       
   966 
       
   967 	for i < len(data) {
       
   968 		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
       
   969 			i++
       
   970 		}
       
   971 		if i >= len(data) {
       
   972 			return 0
       
   973 		}
       
   974 		// do not count escaped chars
       
   975 		if i != 0 && data[i-1] == '\\' {
       
   976 			i++
       
   977 			continue
       
   978 		}
       
   979 		if data[i] == c {
       
   980 			return i
       
   981 		}
       
   982 
       
   983 		if data[i] == '`' {
       
   984 			// skip a code span
       
   985 			tmpI := 0
       
   986 			i++
       
   987 			for i < len(data) && data[i] != '`' {
       
   988 				if tmpI == 0 && data[i] == c {
       
   989 					tmpI = i
       
   990 				}
       
   991 				i++
       
   992 			}
       
   993 			if i >= len(data) {
       
   994 				return tmpI
       
   995 			}
       
   996 			i++
       
   997 		} else if data[i] == '[' {
       
   998 			// skip a link
       
   999 			tmpI := 0
       
  1000 			i++
       
  1001 			for i < len(data) && data[i] != ']' {
       
  1002 				if tmpI == 0 && data[i] == c {
       
  1003 					tmpI = i
       
  1004 				}
       
  1005 				i++
       
  1006 			}
       
  1007 			i++
       
  1008 			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
       
  1009 				i++
       
  1010 			}
       
  1011 			if i >= len(data) {
       
  1012 				return tmpI
       
  1013 			}
       
  1014 			if data[i] != '[' && data[i] != '(' { // not a link
       
  1015 				if tmpI > 0 {
       
  1016 					return tmpI
       
  1017 				} else {
       
  1018 					continue
       
  1019 				}
       
  1020 			}
       
  1021 			cc := data[i]
       
  1022 			i++
       
  1023 			for i < len(data) && data[i] != cc {
       
  1024 				if tmpI == 0 && data[i] == c {
       
  1025 					return i
       
  1026 				}
       
  1027 				i++
       
  1028 			}
       
  1029 			if i >= len(data) {
       
  1030 				return tmpI
       
  1031 			}
       
  1032 			i++
       
  1033 		}
       
  1034 	}
       
  1035 	return 0
       
  1036 }
       
  1037 
       
  1038 func helperEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
       
  1039 	i := 0
       
  1040 
       
  1041 	// skip one symbol if coming from emph3
       
  1042 	if len(data) > 1 && data[0] == c && data[1] == c {
       
  1043 		i = 1
       
  1044 	}
       
  1045 
       
  1046 	for i < len(data) {
       
  1047 		length := helperFindEmphChar(data[i:], c)
       
  1048 		if length == 0 {
       
  1049 			return 0
       
  1050 		}
       
  1051 		i += length
       
  1052 		if i >= len(data) {
       
  1053 			return 0
       
  1054 		}
       
  1055 
       
  1056 		if i+1 < len(data) && data[i+1] == c {
       
  1057 			i++
       
  1058 			continue
       
  1059 		}
       
  1060 
       
  1061 		if data[i] == c && !isspace(data[i-1]) {
       
  1062 
       
  1063 			if p.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 {
       
  1064 				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
       
  1065 					continue
       
  1066 				}
       
  1067 			}
       
  1068 
       
  1069 			var work bytes.Buffer
       
  1070 			p.inline(&work, data[:i])
       
  1071 			p.r.Emphasis(out, work.Bytes())
       
  1072 			return i + 1
       
  1073 		}
       
  1074 	}
       
  1075 
       
  1076 	return 0
       
  1077 }
       
  1078 
       
  1079 func helperDoubleEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
       
  1080 	i := 0
       
  1081 
       
  1082 	for i < len(data) {
       
  1083 		length := helperFindEmphChar(data[i:], c)
       
  1084 		if length == 0 {
       
  1085 			return 0
       
  1086 		}
       
  1087 		i += length
       
  1088 
       
  1089 		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
       
  1090 			var work bytes.Buffer
       
  1091 			p.inline(&work, data[:i])
       
  1092 
       
  1093 			if work.Len() > 0 {
       
  1094 				// pick the right renderer
       
  1095 				if c == '~' {
       
  1096 					p.r.StrikeThrough(out, work.Bytes())
       
  1097 				} else {
       
  1098 					p.r.DoubleEmphasis(out, work.Bytes())
       
  1099 				}
       
  1100 			}
       
  1101 			return i + 2
       
  1102 		}
       
  1103 		i++
       
  1104 	}
       
  1105 	return 0
       
  1106 }
       
  1107 
       
  1108 func helperTripleEmphasis(p *parser, out *bytes.Buffer, data []byte, offset int, c byte) int {
       
  1109 	i := 0
       
  1110 	origData := data
       
  1111 	data = data[offset:]
       
  1112 
       
  1113 	for i < len(data) {
       
  1114 		length := helperFindEmphChar(data[i:], c)
       
  1115 		if length == 0 {
       
  1116 			return 0
       
  1117 		}
       
  1118 		i += length
       
  1119 
       
  1120 		// skip whitespace preceded symbols
       
  1121 		if data[i] != c || isspace(data[i-1]) {
       
  1122 			continue
       
  1123 		}
       
  1124 
       
  1125 		switch {
       
  1126 		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
       
  1127 			// triple symbol found
       
  1128 			var work bytes.Buffer
       
  1129 
       
  1130 			p.inline(&work, data[:i])
       
  1131 			if work.Len() > 0 {
       
  1132 				p.r.TripleEmphasis(out, work.Bytes())
       
  1133 			}
       
  1134 			return i + 3
       
  1135 		case (i+1 < len(data) && data[i+1] == c):
       
  1136 			// double symbol found, hand over to emph1
       
  1137 			length = helperEmphasis(p, out, origData[offset-2:], c)
       
  1138 			if length == 0 {
       
  1139 				return 0
       
  1140 			} else {
       
  1141 				return length - 2
       
  1142 			}
       
  1143 		default:
       
  1144 			// single symbol found, hand over to emph2
       
  1145 			length = helperDoubleEmphasis(p, out, origData[offset-1:], c)
       
  1146 			if length == 0 {
       
  1147 				return 0
       
  1148 			} else {
       
  1149 				return length - 1
       
  1150 			}
       
  1151 		}
       
  1152 	}
       
  1153 	return 0
       
  1154 }