vendor/github.com/russross/blackfriday/markdown.go
changeset 246 0998f404dd31
parent 245 910f00ab2799
child 247 1ca743b3eb80
equal deleted inserted replaced
245:910f00ab2799 246:0998f404dd31
     1 // Blackfriday Markdown Processor
       
     2 // Available at http://github.com/russross/blackfriday
       
     3 //
       
     4 // Copyright © 2011 Russ Ross <russ@russross.com>.
       
     5 // Distributed under the Simplified BSD License.
       
     6 // See README.md for details.
       
     7 
       
     8 package blackfriday
       
     9 
       
    10 import (
       
    11 	"bytes"
       
    12 	"fmt"
       
    13 	"io"
       
    14 	"strings"
       
    15 	"unicode/utf8"
       
    16 )
       
    17 
       
    18 //
       
    19 // Markdown parsing and processing
       
    20 //
       
    21 
       
    22 // Version string of the package. Appears in the rendered document when
       
    23 // CompletePage flag is on.
       
    24 const Version = "2.0"
       
    25 
       
    26 // Extensions is a bitwise or'ed collection of enabled Blackfriday's
       
    27 // extensions.
       
    28 type Extensions int
       
    29 
       
    30 // These are the supported markdown parsing extensions.
       
    31 // OR these values together to select multiple extensions.
       
    32 const (
       
    33 	NoExtensions           Extensions = 0
       
    34 	NoIntraEmphasis        Extensions = 1 << iota // Ignore emphasis markers inside words
       
    35 	Tables                                        // Render tables
       
    36 	FencedCode                                    // Render fenced code blocks
       
    37 	Autolink                                      // Detect embedded URLs that are not explicitly marked
       
    38 	Strikethrough                                 // Strikethrough text using ~~test~~
       
    39 	LaxHTMLBlocks                                 // Loosen up HTML block parsing rules
       
    40 	SpaceHeadings                                 // Be strict about prefix heading rules
       
    41 	HardLineBreak                                 // Translate newlines into line breaks
       
    42 	TabSizeEight                                  // Expand tabs to eight spaces instead of four
       
    43 	Footnotes                                     // Pandoc-style footnotes
       
    44 	NoEmptyLineBeforeBlock                        // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
       
    45 	HeadingIDs                                    // specify heading IDs  with {#id}
       
    46 	Titleblock                                    // Titleblock ala pandoc
       
    47 	AutoHeadingIDs                                // Create the heading ID from the text
       
    48 	BackslashLineBreak                            // Translate trailing backslashes into line breaks
       
    49 	DefinitionLists                               // Render definition lists
       
    50 
       
    51 	CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
       
    52 		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
       
    53 
       
    54 	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
       
    55 		Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
       
    56 		BackslashLineBreak | DefinitionLists
       
    57 )
       
    58 
       
    59 // ListType contains bitwise or'ed flags for list and list item objects.
       
    60 type ListType int
       
    61 
       
    62 // These are the possible flag values for the ListItem renderer.
       
    63 // Multiple flag values may be ORed together.
       
    64 // These are mostly of interest if you are writing a new output format.
       
    65 const (
       
    66 	ListTypeOrdered ListType = 1 << iota
       
    67 	ListTypeDefinition
       
    68 	ListTypeTerm
       
    69 
       
    70 	ListItemContainsBlock
       
    71 	ListItemBeginningOfList // TODO: figure out if this is of any use now
       
    72 	ListItemEndOfList
       
    73 )
       
    74 
       
    75 // CellAlignFlags holds a type of alignment in a table cell.
       
    76 type CellAlignFlags int
       
    77 
       
    78 // These are the possible flag values for the table cell renderer.
       
    79 // Only a single one of these values will be used; they are not ORed together.
       
    80 // These are mostly of interest if you are writing a new output format.
       
    81 const (
       
    82 	TableAlignmentLeft CellAlignFlags = 1 << iota
       
    83 	TableAlignmentRight
       
    84 	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
       
    85 )
       
    86 
       
    87 // The size of a tab stop.
       
    88 const (
       
    89 	TabSizeDefault = 4
       
    90 	TabSizeDouble  = 8
       
    91 )
       
    92 
       
    93 // blockTags is a set of tags that are recognized as HTML block tags.
       
    94 // Any of these can be included in markdown text without special escaping.
       
    95 var blockTags = map[string]struct{}{
       
    96 	"blockquote": struct{}{},
       
    97 	"del":        struct{}{},
       
    98 	"div":        struct{}{},
       
    99 	"dl":         struct{}{},
       
   100 	"fieldset":   struct{}{},
       
   101 	"form":       struct{}{},
       
   102 	"h1":         struct{}{},
       
   103 	"h2":         struct{}{},
       
   104 	"h3":         struct{}{},
       
   105 	"h4":         struct{}{},
       
   106 	"h5":         struct{}{},
       
   107 	"h6":         struct{}{},
       
   108 	"iframe":     struct{}{},
       
   109 	"ins":        struct{}{},
       
   110 	"math":       struct{}{},
       
   111 	"noscript":   struct{}{},
       
   112 	"ol":         struct{}{},
       
   113 	"pre":        struct{}{},
       
   114 	"p":          struct{}{},
       
   115 	"script":     struct{}{},
       
   116 	"style":      struct{}{},
       
   117 	"table":      struct{}{},
       
   118 	"ul":         struct{}{},
       
   119 
       
   120 	// HTML5
       
   121 	"address":    struct{}{},
       
   122 	"article":    struct{}{},
       
   123 	"aside":      struct{}{},
       
   124 	"canvas":     struct{}{},
       
   125 	"figcaption": struct{}{},
       
   126 	"figure":     struct{}{},
       
   127 	"footer":     struct{}{},
       
   128 	"header":     struct{}{},
       
   129 	"hgroup":     struct{}{},
       
   130 	"main":       struct{}{},
       
   131 	"nav":        struct{}{},
       
   132 	"output":     struct{}{},
       
   133 	"progress":   struct{}{},
       
   134 	"section":    struct{}{},
       
   135 	"video":      struct{}{},
       
   136 }
       
   137 
       
   138 // Renderer is the rendering interface. This is mostly of interest if you are
       
   139 // implementing a new rendering format.
       
   140 //
       
   141 // Only an HTML implementation is provided in this repository, see the README
       
   142 // for external implementations.
       
   143 type Renderer interface {
       
   144 	// RenderNode is the main rendering method. It will be called once for
       
   145 	// every leaf node and twice for every non-leaf node (first with
       
   146 	// entering=true, then with entering=false). The method should write its
       
   147 	// rendition of the node to the supplied writer w.
       
   148 	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
       
   149 
       
   150 	// RenderHeader is a method that allows the renderer to produce some
       
   151 	// content preceding the main body of the output document. The header is
       
   152 	// understood in the broad sense here. For example, the default HTML
       
   153 	// renderer will write not only the HTML document preamble, but also the
       
   154 	// table of contents if it was requested.
       
   155 	//
       
   156 	// The method will be passed an entire document tree, in case a particular
       
   157 	// implementation needs to inspect it to produce output.
       
   158 	//
       
   159 	// The output should be written to the supplied writer w. If your
       
   160 	// implementation has no header to write, supply an empty implementation.
       
   161 	RenderHeader(w io.Writer, ast *Node)
       
   162 
       
   163 	// RenderFooter is a symmetric counterpart of RenderHeader.
       
   164 	RenderFooter(w io.Writer, ast *Node)
       
   165 }
       
   166 
       
   167 // Callback functions for inline parsing. One such function is defined
       
   168 // for each character that triggers a response when parsing inline data.
       
   169 type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
       
   170 
       
   171 // Markdown is a type that holds extensions and the runtime state used by
       
   172 // Parse, and the renderer. You can not use it directly, construct it with New.
       
   173 type Markdown struct {
       
   174 	renderer          Renderer
       
   175 	referenceOverride ReferenceOverrideFunc
       
   176 	refs              map[string]*reference
       
   177 	inlineCallback    [256]inlineParser
       
   178 	extensions        Extensions
       
   179 	nesting           int
       
   180 	maxNesting        int
       
   181 	insideLink        bool
       
   182 
       
   183 	// Footnotes need to be ordered as well as available to quickly check for
       
   184 	// presence. If a ref is also a footnote, it's stored both in refs and here
       
   185 	// in notes. Slice is nil if footnotes not enabled.
       
   186 	notes []*reference
       
   187 
       
   188 	doc                  *Node
       
   189 	tip                  *Node // = doc
       
   190 	oldTip               *Node
       
   191 	lastMatchedContainer *Node // = doc
       
   192 	allClosed            bool
       
   193 }
       
   194 
       
   195 func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
       
   196 	if p.referenceOverride != nil {
       
   197 		r, overridden := p.referenceOverride(refid)
       
   198 		if overridden {
       
   199 			if r == nil {
       
   200 				return nil, false
       
   201 			}
       
   202 			return &reference{
       
   203 				link:     []byte(r.Link),
       
   204 				title:    []byte(r.Title),
       
   205 				noteID:   0,
       
   206 				hasBlock: false,
       
   207 				text:     []byte(r.Text)}, true
       
   208 		}
       
   209 	}
       
   210 	// refs are case insensitive
       
   211 	ref, found = p.refs[strings.ToLower(refid)]
       
   212 	return ref, found
       
   213 }
       
   214 
       
   215 func (p *Markdown) finalize(block *Node) {
       
   216 	above := block.Parent
       
   217 	block.open = false
       
   218 	p.tip = above
       
   219 }
       
   220 
       
   221 func (p *Markdown) addChild(node NodeType, offset uint32) *Node {
       
   222 	return p.addExistingChild(NewNode(node), offset)
       
   223 }
       
   224 
       
   225 func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node {
       
   226 	for !p.tip.canContain(node.Type) {
       
   227 		p.finalize(p.tip)
       
   228 	}
       
   229 	p.tip.AppendChild(node)
       
   230 	p.tip = node
       
   231 	return node
       
   232 }
       
   233 
       
   234 func (p *Markdown) closeUnmatchedBlocks() {
       
   235 	if !p.allClosed {
       
   236 		for p.oldTip != p.lastMatchedContainer {
       
   237 			parent := p.oldTip.Parent
       
   238 			p.finalize(p.oldTip)
       
   239 			p.oldTip = parent
       
   240 		}
       
   241 		p.allClosed = true
       
   242 	}
       
   243 }
       
   244 
       
   245 //
       
   246 //
       
   247 // Public interface
       
   248 //
       
   249 //
       
   250 
       
   251 // Reference represents the details of a link.
       
   252 // See the documentation in Options for more details on use-case.
       
   253 type Reference struct {
       
   254 	// Link is usually the URL the reference points to.
       
   255 	Link string
       
   256 	// Title is the alternate text describing the link in more detail.
       
   257 	Title string
       
   258 	// Text is the optional text to override the ref with if the syntax used was
       
   259 	// [refid][]
       
   260 	Text string
       
   261 }
       
   262 
       
   263 // ReferenceOverrideFunc is expected to be called with a reference string and
       
   264 // return either a valid Reference type that the reference string maps to or
       
   265 // nil. If overridden is false, the default reference logic will be executed.
       
   266 // See the documentation in Options for more details on use-case.
       
   267 type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
       
   268 
       
   269 // New constructs a Markdown processor. You can use the same With* functions as
       
   270 // for Run() to customize parser's behavior and the renderer.
       
   271 func New(opts ...Option) *Markdown {
       
   272 	var p Markdown
       
   273 	for _, opt := range opts {
       
   274 		opt(&p)
       
   275 	}
       
   276 	p.refs = make(map[string]*reference)
       
   277 	p.maxNesting = 16
       
   278 	p.insideLink = false
       
   279 	docNode := NewNode(Document)
       
   280 	p.doc = docNode
       
   281 	p.tip = docNode
       
   282 	p.oldTip = docNode
       
   283 	p.lastMatchedContainer = docNode
       
   284 	p.allClosed = true
       
   285 	// register inline parsers
       
   286 	p.inlineCallback[' '] = maybeLineBreak
       
   287 	p.inlineCallback['*'] = emphasis
       
   288 	p.inlineCallback['_'] = emphasis
       
   289 	if p.extensions&Strikethrough != 0 {
       
   290 		p.inlineCallback['~'] = emphasis
       
   291 	}
       
   292 	p.inlineCallback['`'] = codeSpan
       
   293 	p.inlineCallback['\n'] = lineBreak
       
   294 	p.inlineCallback['['] = link
       
   295 	p.inlineCallback['<'] = leftAngle
       
   296 	p.inlineCallback['\\'] = escape
       
   297 	p.inlineCallback['&'] = entity
       
   298 	p.inlineCallback['!'] = maybeImage
       
   299 	p.inlineCallback['^'] = maybeInlineFootnote
       
   300 	if p.extensions&Autolink != 0 {
       
   301 		p.inlineCallback['h'] = maybeAutoLink
       
   302 		p.inlineCallback['m'] = maybeAutoLink
       
   303 		p.inlineCallback['f'] = maybeAutoLink
       
   304 		p.inlineCallback['H'] = maybeAutoLink
       
   305 		p.inlineCallback['M'] = maybeAutoLink
       
   306 		p.inlineCallback['F'] = maybeAutoLink
       
   307 	}
       
   308 	if p.extensions&Footnotes != 0 {
       
   309 		p.notes = make([]*reference, 0)
       
   310 	}
       
   311 	return &p
       
   312 }
       
   313 
       
   314 // Option customizes the Markdown processor's default behavior.
       
   315 type Option func(*Markdown)
       
   316 
       
   317 // WithRenderer allows you to override the default renderer.
       
   318 func WithRenderer(r Renderer) Option {
       
   319 	return func(p *Markdown) {
       
   320 		p.renderer = r
       
   321 	}
       
   322 }
       
   323 
       
   324 // WithExtensions allows you to pick some of the many extensions provided by
       
   325 // Blackfriday. You can bitwise OR them.
       
   326 func WithExtensions(e Extensions) Option {
       
   327 	return func(p *Markdown) {
       
   328 		p.extensions = e
       
   329 	}
       
   330 }
       
   331 
       
   332 // WithNoExtensions turns off all extensions and custom behavior.
       
   333 func WithNoExtensions() Option {
       
   334 	return func(p *Markdown) {
       
   335 		p.extensions = NoExtensions
       
   336 		p.renderer = NewHTMLRenderer(HTMLRendererParameters{
       
   337 			Flags: HTMLFlagsNone,
       
   338 		})
       
   339 	}
       
   340 }
       
   341 
       
   342 // WithRefOverride sets an optional function callback that is called every
       
   343 // time a reference is resolved.
       
   344 //
       
   345 // In Markdown, the link reference syntax can be made to resolve a link to
       
   346 // a reference instead of an inline URL, in one of the following ways:
       
   347 //
       
   348 //  * [link text][refid]
       
   349 //  * [refid][]
       
   350 //
       
   351 // Usually, the refid is defined at the bottom of the Markdown document. If
       
   352 // this override function is provided, the refid is passed to the override
       
   353 // function first, before consulting the defined refids at the bottom. If
       
   354 // the override function indicates an override did not occur, the refids at
       
   355 // the bottom will be used to fill in the link details.
       
   356 func WithRefOverride(o ReferenceOverrideFunc) Option {
       
   357 	return func(p *Markdown) {
       
   358 		p.referenceOverride = o
       
   359 	}
       
   360 }
       
   361 
       
   362 // Run is the main entry point to Blackfriday. It parses and renders a
       
   363 // block of markdown-encoded text.
       
   364 //
       
   365 // The simplest invocation of Run takes one argument, input:
       
   366 //     output := Run(input)
       
   367 // This will parse the input with CommonExtensions enabled and render it with
       
   368 // the default HTMLRenderer (with CommonHTMLFlags).
       
   369 //
       
   370 // Variadic arguments opts can customize the default behavior. Since Markdown
       
   371 // type does not contain exported fields, you can not use it directly. Instead,
       
   372 // use the With* functions. For example, this will call the most basic
       
   373 // functionality, with no extensions:
       
   374 //     output := Run(input, WithNoExtensions())
       
   375 //
       
   376 // You can use any number of With* arguments, even contradicting ones. They
       
   377 // will be applied in order of appearance and the latter will override the
       
   378 // former:
       
   379 //     output := Run(input, WithNoExtensions(), WithExtensions(exts),
       
   380 //         WithRenderer(yourRenderer))
       
   381 func Run(input []byte, opts ...Option) []byte {
       
   382 	r := NewHTMLRenderer(HTMLRendererParameters{
       
   383 		Flags: CommonHTMLFlags,
       
   384 	})
       
   385 	optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
       
   386 	optList = append(optList, opts...)
       
   387 	parser := New(optList...)
       
   388 	ast := parser.Parse(input)
       
   389 	var buf bytes.Buffer
       
   390 	parser.renderer.RenderHeader(&buf, ast)
       
   391 	ast.Walk(func(node *Node, entering bool) WalkStatus {
       
   392 		return parser.renderer.RenderNode(&buf, node, entering)
       
   393 	})
       
   394 	parser.renderer.RenderFooter(&buf, ast)
       
   395 	return buf.Bytes()
       
   396 }
       
   397 
       
   398 // Parse is an entry point to the parsing part of Blackfriday. It takes an
       
   399 // input markdown document and produces a syntax tree for its contents. This
       
   400 // tree can then be rendered with a default or custom renderer, or
       
   401 // analyzed/transformed by the caller to whatever non-standard needs they have.
       
   402 // The return value is the root node of the syntax tree.
       
   403 func (p *Markdown) Parse(input []byte) *Node {
       
   404 	p.block(input)
       
   405 	// Walk the tree and finish up some of unfinished blocks
       
   406 	for p.tip != nil {
       
   407 		p.finalize(p.tip)
       
   408 	}
       
   409 	// Walk the tree again and process inline markdown in each block
       
   410 	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
       
   411 		if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
       
   412 			p.inline(node, node.content)
       
   413 			node.content = nil
       
   414 		}
       
   415 		return GoToNext
       
   416 	})
       
   417 	p.parseRefsToAST()
       
   418 	return p.doc
       
   419 }
       
   420 
       
   421 func (p *Markdown) parseRefsToAST() {
       
   422 	if p.extensions&Footnotes == 0 || len(p.notes) == 0 {
       
   423 		return
       
   424 	}
       
   425 	p.tip = p.doc
       
   426 	block := p.addBlock(List, nil)
       
   427 	block.IsFootnotesList = true
       
   428 	block.ListFlags = ListTypeOrdered
       
   429 	flags := ListItemBeginningOfList
       
   430 	// Note: this loop is intentionally explicit, not range-form. This is
       
   431 	// because the body of the loop will append nested footnotes to p.notes and
       
   432 	// we need to process those late additions. Range form would only walk over
       
   433 	// the fixed initial set.
       
   434 	for i := 0; i < len(p.notes); i++ {
       
   435 		ref := p.notes[i]
       
   436 		p.addExistingChild(ref.footnote, 0)
       
   437 		block := ref.footnote
       
   438 		block.ListFlags = flags | ListTypeOrdered
       
   439 		block.RefLink = ref.link
       
   440 		if ref.hasBlock {
       
   441 			flags |= ListItemContainsBlock
       
   442 			p.block(ref.title)
       
   443 		} else {
       
   444 			p.inline(block, ref.title)
       
   445 		}
       
   446 		flags &^= ListItemBeginningOfList | ListItemContainsBlock
       
   447 	}
       
   448 	above := block.Parent
       
   449 	finalizeList(block)
       
   450 	p.tip = above
       
   451 	block.Walk(func(node *Node, entering bool) WalkStatus {
       
   452 		if node.Type == Paragraph || node.Type == Heading {
       
   453 			p.inline(node, node.content)
       
   454 			node.content = nil
       
   455 		}
       
   456 		return GoToNext
       
   457 	})
       
   458 }
       
   459 
       
   460 //
       
   461 // Link references
       
   462 //
       
   463 // This section implements support for references that (usually) appear
       
   464 // as footnotes in a document, and can be referenced anywhere in the document.
       
   465 // The basic format is:
       
   466 //
       
   467 //    [1]: http://www.google.com/ "Google"
       
   468 //    [2]: http://www.github.com/ "Github"
       
   469 //
       
   470 // Anywhere in the document, the reference can be linked by referring to its
       
   471 // label, i.e., 1 and 2 in this example, as in:
       
   472 //
       
   473 //    This library is hosted on [Github][2], a git hosting site.
       
   474 //
       
   475 // Actual footnotes as specified in Pandoc and supported by some other Markdown
       
   476 // libraries such as php-markdown are also taken care of. They look like this:
       
   477 //
       
   478 //    This sentence needs a bit of further explanation.[^note]
       
   479 //
       
   480 //    [^note]: This is the explanation.
       
   481 //
       
   482 // Footnotes should be placed at the end of the document in an ordered list.
       
   483 // Inline footnotes such as:
       
   484 //
       
   485 //    Inline footnotes^[Not supported.] also exist.
       
   486 //
       
   487 // are not yet supported.
       
   488 
       
   489 // reference holds all information necessary for a reference-style links or
       
   490 // footnotes.
       
   491 //
       
   492 // Consider this markdown with reference-style links:
       
   493 //
       
   494 //     [link][ref]
       
   495 //
       
   496 //     [ref]: /url/ "tooltip title"
       
   497 //
       
   498 // It will be ultimately converted to this HTML:
       
   499 //
       
   500 //     <p><a href=\"/url/\" title=\"title\">link</a></p>
       
   501 //
       
   502 // And a reference structure will be populated as follows:
       
   503 //
       
   504 //     p.refs["ref"] = &reference{
       
   505 //         link: "/url/",
       
   506 //         title: "tooltip title",
       
   507 //     }
       
   508 //
       
   509 // Alternatively, reference can contain information about a footnote. Consider
       
   510 // this markdown:
       
   511 //
       
   512 //     Text needing a footnote.[^a]
       
   513 //
       
   514 //     [^a]: This is the note
       
   515 //
       
   516 // A reference structure will be populated as follows:
       
   517 //
       
   518 //     p.refs["a"] = &reference{
       
   519 //         link: "a",
       
   520 //         title: "This is the note",
       
   521 //         noteID: <some positive int>,
       
   522 //     }
       
   523 //
       
   524 // TODO: As you can see, it begs for splitting into two dedicated structures
       
   525 // for refs and for footnotes.
       
   526 type reference struct {
       
   527 	link     []byte
       
   528 	title    []byte
       
   529 	noteID   int // 0 if not a footnote ref
       
   530 	hasBlock bool
       
   531 	footnote *Node // a link to the Item node within a list of footnotes
       
   532 
       
   533 	text []byte // only gets populated by refOverride feature with Reference.Text
       
   534 }
       
   535 
       
   536 func (r *reference) String() string {
       
   537 	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
       
   538 		r.link, r.title, r.text, r.noteID, r.hasBlock)
       
   539 }
       
   540 
       
   541 // Check whether or not data starts with a reference link.
       
   542 // If so, it is parsed and stored in the list of references
       
   543 // (in the render struct).
       
   544 // Returns the number of bytes to skip to move past it,
       
   545 // or zero if the first line is not a reference.
       
   546 func isReference(p *Markdown, data []byte, tabSize int) int {
       
   547 	// up to 3 optional leading spaces
       
   548 	if len(data) < 4 {
       
   549 		return 0
       
   550 	}
       
   551 	i := 0
       
   552 	for i < 3 && data[i] == ' ' {
       
   553 		i++
       
   554 	}
       
   555 
       
   556 	noteID := 0
       
   557 
       
   558 	// id part: anything but a newline between brackets
       
   559 	if data[i] != '[' {
       
   560 		return 0
       
   561 	}
       
   562 	i++
       
   563 	if p.extensions&Footnotes != 0 {
       
   564 		if i < len(data) && data[i] == '^' {
       
   565 			// we can set it to anything here because the proper noteIds will
       
   566 			// be assigned later during the second pass. It just has to be != 0
       
   567 			noteID = 1
       
   568 			i++
       
   569 		}
       
   570 	}
       
   571 	idOffset := i
       
   572 	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
       
   573 		i++
       
   574 	}
       
   575 	if i >= len(data) || data[i] != ']' {
       
   576 		return 0
       
   577 	}
       
   578 	idEnd := i
       
   579 	// footnotes can have empty ID, like this: [^], but a reference can not be
       
   580 	// empty like this: []. Break early if it's not a footnote and there's no ID
       
   581 	if noteID == 0 && idOffset == idEnd {
       
   582 		return 0
       
   583 	}
       
   584 	// spacer: colon (space | tab)* newline? (space | tab)*
       
   585 	i++
       
   586 	if i >= len(data) || data[i] != ':' {
       
   587 		return 0
       
   588 	}
       
   589 	i++
       
   590 	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
       
   591 		i++
       
   592 	}
       
   593 	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
       
   594 		i++
       
   595 		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
       
   596 			i++
       
   597 		}
       
   598 	}
       
   599 	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
       
   600 		i++
       
   601 	}
       
   602 	if i >= len(data) {
       
   603 		return 0
       
   604 	}
       
   605 
       
   606 	var (
       
   607 		linkOffset, linkEnd   int
       
   608 		titleOffset, titleEnd int
       
   609 		lineEnd               int
       
   610 		raw                   []byte
       
   611 		hasBlock              bool
       
   612 	)
       
   613 
       
   614 	if p.extensions&Footnotes != 0 && noteID != 0 {
       
   615 		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
       
   616 		lineEnd = linkEnd
       
   617 	} else {
       
   618 		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
       
   619 	}
       
   620 	if lineEnd == 0 {
       
   621 		return 0
       
   622 	}
       
   623 
       
   624 	// a valid ref has been found
       
   625 
       
   626 	ref := &reference{
       
   627 		noteID:   noteID,
       
   628 		hasBlock: hasBlock,
       
   629 	}
       
   630 
       
   631 	if noteID > 0 {
       
   632 		// reusing the link field for the id since footnotes don't have links
       
   633 		ref.link = data[idOffset:idEnd]
       
   634 		// if footnote, it's not really a title, it's the contained text
       
   635 		ref.title = raw
       
   636 	} else {
       
   637 		ref.link = data[linkOffset:linkEnd]
       
   638 		ref.title = data[titleOffset:titleEnd]
       
   639 	}
       
   640 
       
   641 	// id matches are case-insensitive
       
   642 	id := string(bytes.ToLower(data[idOffset:idEnd]))
       
   643 
       
   644 	p.refs[id] = ref
       
   645 
       
   646 	return lineEnd
       
   647 }
       
   648 
       
   649 func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
       
   650 	// link: whitespace-free sequence, optionally between angle brackets
       
   651 	if data[i] == '<' {
       
   652 		i++
       
   653 	}
       
   654 	linkOffset = i
       
   655 	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
       
   656 		i++
       
   657 	}
       
   658 	linkEnd = i
       
   659 	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
       
   660 		linkOffset++
       
   661 		linkEnd--
       
   662 	}
       
   663 
       
   664 	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
       
   665 	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
       
   666 		i++
       
   667 	}
       
   668 	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
       
   669 		return
       
   670 	}
       
   671 
       
   672 	// compute end-of-line
       
   673 	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
       
   674 		lineEnd = i
       
   675 	}
       
   676 	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
       
   677 		lineEnd++
       
   678 	}
       
   679 
       
   680 	// optional (space|tab)* spacer after a newline
       
   681 	if lineEnd > 0 {
       
   682 		i = lineEnd + 1
       
   683 		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
       
   684 			i++
       
   685 		}
       
   686 	}
       
   687 
       
   688 	// optional title: any non-newline sequence enclosed in '"() alone on its line
       
   689 	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
       
   690 		i++
       
   691 		titleOffset = i
       
   692 
       
   693 		// look for EOL
       
   694 		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
       
   695 			i++
       
   696 		}
       
   697 		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
       
   698 			titleEnd = i + 1
       
   699 		} else {
       
   700 			titleEnd = i
       
   701 		}
       
   702 
       
   703 		// step back
       
   704 		i--
       
   705 		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
       
   706 			i--
       
   707 		}
       
   708 		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
       
   709 			lineEnd = titleEnd
       
   710 			titleEnd = i
       
   711 		}
       
   712 	}
       
   713 
       
   714 	return
       
   715 }
       
   716 
       
   717 // The first bit of this logic is the same as Parser.listItem, but the rest
       
   718 // is much simpler. This function simply finds the entire block and shifts it
       
   719 // over by one tab if it is indeed a block (just returns the line if it's not).
       
   720 // blockEnd is the end of the section in the input buffer, and contents is the
       
   721 // extracted text that was shifted over one tab. It will need to be rendered at
       
   722 // the end of the document.
       
   723 func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
       
   724 	if i == 0 || len(data) == 0 {
       
   725 		return
       
   726 	}
       
   727 
       
   728 	// skip leading whitespace on first line
       
   729 	for i < len(data) && data[i] == ' ' {
       
   730 		i++
       
   731 	}
       
   732 
       
   733 	blockStart = i
       
   734 
       
   735 	// find the end of the line
       
   736 	blockEnd = i
       
   737 	for i < len(data) && data[i-1] != '\n' {
       
   738 		i++
       
   739 	}
       
   740 
       
   741 	// get working buffer
       
   742 	var raw bytes.Buffer
       
   743 
       
   744 	// put the first line into the working buffer
       
   745 	raw.Write(data[blockEnd:i])
       
   746 	blockEnd = i
       
   747 
       
   748 	// process the following lines
       
   749 	containsBlankLine := false
       
   750 
       
   751 gatherLines:
       
   752 	for blockEnd < len(data) {
       
   753 		i++
       
   754 
       
   755 		// find the end of this line
       
   756 		for i < len(data) && data[i-1] != '\n' {
       
   757 			i++
       
   758 		}
       
   759 
       
   760 		// if it is an empty line, guess that it is part of this item
       
   761 		// and move on to the next line
       
   762 		if p.isEmpty(data[blockEnd:i]) > 0 {
       
   763 			containsBlankLine = true
       
   764 			blockEnd = i
       
   765 			continue
       
   766 		}
       
   767 
       
   768 		n := 0
       
   769 		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
       
   770 			// this is the end of the block.
       
   771 			// we don't want to include this last line in the index.
       
   772 			break gatherLines
       
   773 		}
       
   774 
       
   775 		// if there were blank lines before this one, insert a new one now
       
   776 		if containsBlankLine {
       
   777 			raw.WriteByte('\n')
       
   778 			containsBlankLine = false
       
   779 		}
       
   780 
       
   781 		// get rid of that first tab, write to buffer
       
   782 		raw.Write(data[blockEnd+n : i])
       
   783 		hasBlock = true
       
   784 
       
   785 		blockEnd = i
       
   786 	}
       
   787 
       
   788 	if data[blockEnd-1] != '\n' {
       
   789 		raw.WriteByte('\n')
       
   790 	}
       
   791 
       
   792 	contents = raw.Bytes()
       
   793 
       
   794 	return
       
   795 }
       
   796 
       
   797 //
       
   798 //
       
   799 // Miscellaneous helper functions
       
   800 //
       
   801 //
       
   802 
       
   803 // Test if a character is a punctuation symbol.
       
   804 // Taken from a private function in regexp in the stdlib.
       
   805 func ispunct(c byte) bool {
       
   806 	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
       
   807 		if c == r {
       
   808 			return true
       
   809 		}
       
   810 	}
       
   811 	return false
       
   812 }
       
   813 
       
   814 // Test if a character is a whitespace character.
       
   815 func isspace(c byte) bool {
       
   816 	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
       
   817 }
       
   818 
       
   819 // Test if a character is letter.
       
   820 func isletter(c byte) bool {
       
   821 	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
       
   822 }
       
   823 
       
   824 // Test if a character is a letter or a digit.
       
   825 // TODO: check when this is looking for ASCII alnum and when it should use unicode
       
   826 func isalnum(c byte) bool {
       
   827 	return (c >= '0' && c <= '9') || isletter(c)
       
   828 }
       
   829 
       
   830 // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
       
   831 // always ends output with a newline
       
   832 func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
       
   833 	// first, check for common cases: no tabs, or only tabs at beginning of line
       
   834 	i, prefix := 0, 0
       
   835 	slowcase := false
       
   836 	for i = 0; i < len(line); i++ {
       
   837 		if line[i] == '\t' {
       
   838 			if prefix == i {
       
   839 				prefix++
       
   840 			} else {
       
   841 				slowcase = true
       
   842 				break
       
   843 			}
       
   844 		}
       
   845 	}
       
   846 
       
   847 	// no need to decode runes if all tabs are at the beginning of the line
       
   848 	if !slowcase {
       
   849 		for i = 0; i < prefix*tabSize; i++ {
       
   850 			out.WriteByte(' ')
       
   851 		}
       
   852 		out.Write(line[prefix:])
       
   853 		return
       
   854 	}
       
   855 
       
   856 	// the slow case: we need to count runes to figure out how
       
   857 	// many spaces to insert for each tab
       
   858 	column := 0
       
   859 	i = 0
       
   860 	for i < len(line) {
       
   861 		start := i
       
   862 		for i < len(line) && line[i] != '\t' {
       
   863 			_, size := utf8.DecodeRune(line[i:])
       
   864 			i += size
       
   865 			column++
       
   866 		}
       
   867 
       
   868 		if i > start {
       
   869 			out.Write(line[start:i])
       
   870 		}
       
   871 
       
   872 		if i >= len(line) {
       
   873 			break
       
   874 		}
       
   875 
       
   876 		for {
       
   877 			out.WriteByte(' ')
       
   878 			column++
       
   879 			if column%tabSize == 0 {
       
   880 				break
       
   881 			}
       
   882 		}
       
   883 
       
   884 		i++
       
   885 	}
       
   886 }
       
   887 
       
   888 // Find if a line counts as indented or not.
       
   889 // Returns number of characters the indent is (0 = not indented).
       
   890 func isIndented(data []byte, indentSize int) int {
       
   891 	if len(data) == 0 {
       
   892 		return 0
       
   893 	}
       
   894 	if data[0] == '\t' {
       
   895 		return 1
       
   896 	}
       
   897 	if len(data) < indentSize {
       
   898 		return 0
       
   899 	}
       
   900 	for i := 0; i < indentSize; i++ {
       
   901 		if data[i] != ' ' {
       
   902 			return 0
       
   903 		}
       
   904 	}
       
   905 	return indentSize
       
   906 }
       
   907 
       
   908 // Create a url-safe slug for fragments
       
   909 func slugify(in []byte) []byte {
       
   910 	if len(in) == 0 {
       
   911 		return in
       
   912 	}
       
   913 	out := make([]byte, 0, len(in))
       
   914 	sym := false
       
   915 
       
   916 	for _, ch := range in {
       
   917 		if isalnum(ch) {
       
   918 			sym = false
       
   919 			out = append(out, ch)
       
   920 		} else if sym {
       
   921 			continue
       
   922 		} else {
       
   923 			out = append(out, '-')
       
   924 			sym = true
       
   925 		}
       
   926 	}
       
   927 	var a, b int
       
   928 	var ch byte
       
   929 	for a, ch = range out {
       
   930 		if ch != '-' {
       
   931 			break
       
   932 		}
       
   933 	}
       
   934 	for b = len(out) - 1; b > 0; b-- {
       
   935 		if out[b] != '-' {
       
   936 			break
       
   937 		}
       
   938 	}
       
   939 	return out[a : b+1]
       
   940 }