1 // Blackfriday Markdown Processor |
|
2 // Available at http://github.com/russross/blackfriday |
|
3 // |
|
4 // Copyright © 2011 Russ Ross <russ@russross.com>. |
|
5 // Distributed under the Simplified BSD License. |
|
6 // See README.md for details. |
|
7 |
|
8 package blackfriday |
|
9 |
|
10 import ( |
|
11 "bytes" |
|
12 "fmt" |
|
13 "io" |
|
14 "strings" |
|
15 "unicode/utf8" |
|
16 ) |
|
17 |
|
18 // |
|
19 // Markdown parsing and processing |
|
20 // |
|
21 |
|
22 // Version string of the package. Appears in the rendered document when |
|
23 // CompletePage flag is on. |
|
24 const Version = "2.0" |
|
25 |
|
26 // Extensions is a bitwise or'ed collection of enabled Blackfriday's |
|
27 // extensions. |
|
28 type Extensions int |
|
29 |
|
30 // These are the supported markdown parsing extensions. |
|
31 // OR these values together to select multiple extensions. |
|
32 const ( |
|
33 NoExtensions Extensions = 0 |
|
34 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words |
|
35 Tables // Render tables |
|
36 FencedCode // Render fenced code blocks |
|
37 Autolink // Detect embedded URLs that are not explicitly marked |
|
38 Strikethrough // Strikethrough text using ~~test~~ |
|
39 LaxHTMLBlocks // Loosen up HTML block parsing rules |
|
40 SpaceHeadings // Be strict about prefix heading rules |
|
41 HardLineBreak // Translate newlines into line breaks |
|
42 TabSizeEight // Expand tabs to eight spaces instead of four |
|
43 Footnotes // Pandoc-style footnotes |
|
44 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block |
|
45 HeadingIDs // specify heading IDs with {#id} |
|
46 Titleblock // Titleblock ala pandoc |
|
47 AutoHeadingIDs // Create the heading ID from the text |
|
48 BackslashLineBreak // Translate trailing backslashes into line breaks |
|
49 DefinitionLists // Render definition lists |
|
50 |
|
51 CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants | |
|
52 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes |
|
53 |
|
54 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | |
|
55 Autolink | Strikethrough | SpaceHeadings | HeadingIDs | |
|
56 BackslashLineBreak | DefinitionLists |
|
57 ) |
|
58 |
|
59 // ListType contains bitwise or'ed flags for list and list item objects. |
|
60 type ListType int |
|
61 |
|
62 // These are the possible flag values for the ListItem renderer. |
|
63 // Multiple flag values may be ORed together. |
|
64 // These are mostly of interest if you are writing a new output format. |
|
65 const ( |
|
66 ListTypeOrdered ListType = 1 << iota |
|
67 ListTypeDefinition |
|
68 ListTypeTerm |
|
69 |
|
70 ListItemContainsBlock |
|
71 ListItemBeginningOfList // TODO: figure out if this is of any use now |
|
72 ListItemEndOfList |
|
73 ) |
|
74 |
|
75 // CellAlignFlags holds a type of alignment in a table cell. |
|
76 type CellAlignFlags int |
|
77 |
|
78 // These are the possible flag values for the table cell renderer. |
|
79 // Only a single one of these values will be used; they are not ORed together. |
|
80 // These are mostly of interest if you are writing a new output format. |
|
81 const ( |
|
82 TableAlignmentLeft CellAlignFlags = 1 << iota |
|
83 TableAlignmentRight |
|
84 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) |
|
85 ) |
|
86 |
|
87 // The size of a tab stop. |
|
88 const ( |
|
89 TabSizeDefault = 4 |
|
90 TabSizeDouble = 8 |
|
91 ) |
|
92 |
|
93 // blockTags is a set of tags that are recognized as HTML block tags. |
|
94 // Any of these can be included in markdown text without special escaping. |
|
95 var blockTags = map[string]struct{}{ |
|
96 "blockquote": struct{}{}, |
|
97 "del": struct{}{}, |
|
98 "div": struct{}{}, |
|
99 "dl": struct{}{}, |
|
100 "fieldset": struct{}{}, |
|
101 "form": struct{}{}, |
|
102 "h1": struct{}{}, |
|
103 "h2": struct{}{}, |
|
104 "h3": struct{}{}, |
|
105 "h4": struct{}{}, |
|
106 "h5": struct{}{}, |
|
107 "h6": struct{}{}, |
|
108 "iframe": struct{}{}, |
|
109 "ins": struct{}{}, |
|
110 "math": struct{}{}, |
|
111 "noscript": struct{}{}, |
|
112 "ol": struct{}{}, |
|
113 "pre": struct{}{}, |
|
114 "p": struct{}{}, |
|
115 "script": struct{}{}, |
|
116 "style": struct{}{}, |
|
117 "table": struct{}{}, |
|
118 "ul": struct{}{}, |
|
119 |
|
120 // HTML5 |
|
121 "address": struct{}{}, |
|
122 "article": struct{}{}, |
|
123 "aside": struct{}{}, |
|
124 "canvas": struct{}{}, |
|
125 "figcaption": struct{}{}, |
|
126 "figure": struct{}{}, |
|
127 "footer": struct{}{}, |
|
128 "header": struct{}{}, |
|
129 "hgroup": struct{}{}, |
|
130 "main": struct{}{}, |
|
131 "nav": struct{}{}, |
|
132 "output": struct{}{}, |
|
133 "progress": struct{}{}, |
|
134 "section": struct{}{}, |
|
135 "video": struct{}{}, |
|
136 } |
|
137 |
|
138 // Renderer is the rendering interface. This is mostly of interest if you are |
|
139 // implementing a new rendering format. |
|
140 // |
|
141 // Only an HTML implementation is provided in this repository, see the README |
|
142 // for external implementations. |
|
143 type Renderer interface { |
|
144 // RenderNode is the main rendering method. It will be called once for |
|
145 // every leaf node and twice for every non-leaf node (first with |
|
146 // entering=true, then with entering=false). The method should write its |
|
147 // rendition of the node to the supplied writer w. |
|
148 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus |
|
149 |
|
150 // RenderHeader is a method that allows the renderer to produce some |
|
151 // content preceding the main body of the output document. The header is |
|
152 // understood in the broad sense here. For example, the default HTML |
|
153 // renderer will write not only the HTML document preamble, but also the |
|
154 // table of contents if it was requested. |
|
155 // |
|
156 // The method will be passed an entire document tree, in case a particular |
|
157 // implementation needs to inspect it to produce output. |
|
158 // |
|
159 // The output should be written to the supplied writer w. If your |
|
160 // implementation has no header to write, supply an empty implementation. |
|
161 RenderHeader(w io.Writer, ast *Node) |
|
162 |
|
163 // RenderFooter is a symmetric counterpart of RenderHeader. |
|
164 RenderFooter(w io.Writer, ast *Node) |
|
165 } |
|
166 |
|
167 // Callback functions for inline parsing. One such function is defined |
|
168 // for each character that triggers a response when parsing inline data. |
|
169 type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node) |
|
170 |
|
171 // Markdown is a type that holds extensions and the runtime state used by |
|
172 // Parse, and the renderer. You can not use it directly, construct it with New. |
|
173 type Markdown struct { |
|
174 renderer Renderer |
|
175 referenceOverride ReferenceOverrideFunc |
|
176 refs map[string]*reference |
|
177 inlineCallback [256]inlineParser |
|
178 extensions Extensions |
|
179 nesting int |
|
180 maxNesting int |
|
181 insideLink bool |
|
182 |
|
183 // Footnotes need to be ordered as well as available to quickly check for |
|
184 // presence. If a ref is also a footnote, it's stored both in refs and here |
|
185 // in notes. Slice is nil if footnotes not enabled. |
|
186 notes []*reference |
|
187 |
|
188 doc *Node |
|
189 tip *Node // = doc |
|
190 oldTip *Node |
|
191 lastMatchedContainer *Node // = doc |
|
192 allClosed bool |
|
193 } |
|
194 |
|
195 func (p *Markdown) getRef(refid string) (ref *reference, found bool) { |
|
196 if p.referenceOverride != nil { |
|
197 r, overridden := p.referenceOverride(refid) |
|
198 if overridden { |
|
199 if r == nil { |
|
200 return nil, false |
|
201 } |
|
202 return &reference{ |
|
203 link: []byte(r.Link), |
|
204 title: []byte(r.Title), |
|
205 noteID: 0, |
|
206 hasBlock: false, |
|
207 text: []byte(r.Text)}, true |
|
208 } |
|
209 } |
|
210 // refs are case insensitive |
|
211 ref, found = p.refs[strings.ToLower(refid)] |
|
212 return ref, found |
|
213 } |
|
214 |
|
215 func (p *Markdown) finalize(block *Node) { |
|
216 above := block.Parent |
|
217 block.open = false |
|
218 p.tip = above |
|
219 } |
|
220 |
|
221 func (p *Markdown) addChild(node NodeType, offset uint32) *Node { |
|
222 return p.addExistingChild(NewNode(node), offset) |
|
223 } |
|
224 |
|
225 func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node { |
|
226 for !p.tip.canContain(node.Type) { |
|
227 p.finalize(p.tip) |
|
228 } |
|
229 p.tip.AppendChild(node) |
|
230 p.tip = node |
|
231 return node |
|
232 } |
|
233 |
|
234 func (p *Markdown) closeUnmatchedBlocks() { |
|
235 if !p.allClosed { |
|
236 for p.oldTip != p.lastMatchedContainer { |
|
237 parent := p.oldTip.Parent |
|
238 p.finalize(p.oldTip) |
|
239 p.oldTip = parent |
|
240 } |
|
241 p.allClosed = true |
|
242 } |
|
243 } |
|
244 |
|
245 // |
|
246 // |
|
247 // Public interface |
|
248 // |
|
249 // |
|
250 |
|
251 // Reference represents the details of a link. |
|
252 // See the documentation in Options for more details on use-case. |
|
253 type Reference struct { |
|
254 // Link is usually the URL the reference points to. |
|
255 Link string |
|
256 // Title is the alternate text describing the link in more detail. |
|
257 Title string |
|
258 // Text is the optional text to override the ref with if the syntax used was |
|
259 // [refid][] |
|
260 Text string |
|
261 } |
|
262 |
|
263 // ReferenceOverrideFunc is expected to be called with a reference string and |
|
264 // return either a valid Reference type that the reference string maps to or |
|
265 // nil. If overridden is false, the default reference logic will be executed. |
|
266 // See the documentation in Options for more details on use-case. |
|
267 type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) |
|
268 |
|
269 // New constructs a Markdown processor. You can use the same With* functions as |
|
270 // for Run() to customize parser's behavior and the renderer. |
|
271 func New(opts ...Option) *Markdown { |
|
272 var p Markdown |
|
273 for _, opt := range opts { |
|
274 opt(&p) |
|
275 } |
|
276 p.refs = make(map[string]*reference) |
|
277 p.maxNesting = 16 |
|
278 p.insideLink = false |
|
279 docNode := NewNode(Document) |
|
280 p.doc = docNode |
|
281 p.tip = docNode |
|
282 p.oldTip = docNode |
|
283 p.lastMatchedContainer = docNode |
|
284 p.allClosed = true |
|
285 // register inline parsers |
|
286 p.inlineCallback[' '] = maybeLineBreak |
|
287 p.inlineCallback['*'] = emphasis |
|
288 p.inlineCallback['_'] = emphasis |
|
289 if p.extensions&Strikethrough != 0 { |
|
290 p.inlineCallback['~'] = emphasis |
|
291 } |
|
292 p.inlineCallback['`'] = codeSpan |
|
293 p.inlineCallback['\n'] = lineBreak |
|
294 p.inlineCallback['['] = link |
|
295 p.inlineCallback['<'] = leftAngle |
|
296 p.inlineCallback['\\'] = escape |
|
297 p.inlineCallback['&'] = entity |
|
298 p.inlineCallback['!'] = maybeImage |
|
299 p.inlineCallback['^'] = maybeInlineFootnote |
|
300 if p.extensions&Autolink != 0 { |
|
301 p.inlineCallback['h'] = maybeAutoLink |
|
302 p.inlineCallback['m'] = maybeAutoLink |
|
303 p.inlineCallback['f'] = maybeAutoLink |
|
304 p.inlineCallback['H'] = maybeAutoLink |
|
305 p.inlineCallback['M'] = maybeAutoLink |
|
306 p.inlineCallback['F'] = maybeAutoLink |
|
307 } |
|
308 if p.extensions&Footnotes != 0 { |
|
309 p.notes = make([]*reference, 0) |
|
310 } |
|
311 return &p |
|
312 } |
|
313 |
|
314 // Option customizes the Markdown processor's default behavior. |
|
315 type Option func(*Markdown) |
|
316 |
|
317 // WithRenderer allows you to override the default renderer. |
|
318 func WithRenderer(r Renderer) Option { |
|
319 return func(p *Markdown) { |
|
320 p.renderer = r |
|
321 } |
|
322 } |
|
323 |
|
324 // WithExtensions allows you to pick some of the many extensions provided by |
|
325 // Blackfriday. You can bitwise OR them. |
|
326 func WithExtensions(e Extensions) Option { |
|
327 return func(p *Markdown) { |
|
328 p.extensions = e |
|
329 } |
|
330 } |
|
331 |
|
332 // WithNoExtensions turns off all extensions and custom behavior. |
|
333 func WithNoExtensions() Option { |
|
334 return func(p *Markdown) { |
|
335 p.extensions = NoExtensions |
|
336 p.renderer = NewHTMLRenderer(HTMLRendererParameters{ |
|
337 Flags: HTMLFlagsNone, |
|
338 }) |
|
339 } |
|
340 } |
|
341 |
|
342 // WithRefOverride sets an optional function callback that is called every |
|
343 // time a reference is resolved. |
|
344 // |
|
345 // In Markdown, the link reference syntax can be made to resolve a link to |
|
346 // a reference instead of an inline URL, in one of the following ways: |
|
347 // |
|
348 // * [link text][refid] |
|
349 // * [refid][] |
|
350 // |
|
351 // Usually, the refid is defined at the bottom of the Markdown document. If |
|
352 // this override function is provided, the refid is passed to the override |
|
353 // function first, before consulting the defined refids at the bottom. If |
|
354 // the override function indicates an override did not occur, the refids at |
|
355 // the bottom will be used to fill in the link details. |
|
356 func WithRefOverride(o ReferenceOverrideFunc) Option { |
|
357 return func(p *Markdown) { |
|
358 p.referenceOverride = o |
|
359 } |
|
360 } |
|
361 |
|
362 // Run is the main entry point to Blackfriday. It parses and renders a |
|
363 // block of markdown-encoded text. |
|
364 // |
|
365 // The simplest invocation of Run takes one argument, input: |
|
366 // output := Run(input) |
|
367 // This will parse the input with CommonExtensions enabled and render it with |
|
368 // the default HTMLRenderer (with CommonHTMLFlags). |
|
369 // |
|
370 // Variadic arguments opts can customize the default behavior. Since Markdown |
|
371 // type does not contain exported fields, you can not use it directly. Instead, |
|
372 // use the With* functions. For example, this will call the most basic |
|
373 // functionality, with no extensions: |
|
374 // output := Run(input, WithNoExtensions()) |
|
375 // |
|
376 // You can use any number of With* arguments, even contradicting ones. They |
|
377 // will be applied in order of appearance and the latter will override the |
|
378 // former: |
|
379 // output := Run(input, WithNoExtensions(), WithExtensions(exts), |
|
380 // WithRenderer(yourRenderer)) |
|
381 func Run(input []byte, opts ...Option) []byte { |
|
382 r := NewHTMLRenderer(HTMLRendererParameters{ |
|
383 Flags: CommonHTMLFlags, |
|
384 }) |
|
385 optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)} |
|
386 optList = append(optList, opts...) |
|
387 parser := New(optList...) |
|
388 ast := parser.Parse(input) |
|
389 var buf bytes.Buffer |
|
390 parser.renderer.RenderHeader(&buf, ast) |
|
391 ast.Walk(func(node *Node, entering bool) WalkStatus { |
|
392 return parser.renderer.RenderNode(&buf, node, entering) |
|
393 }) |
|
394 parser.renderer.RenderFooter(&buf, ast) |
|
395 return buf.Bytes() |
|
396 } |
|
397 |
|
398 // Parse is an entry point to the parsing part of Blackfriday. It takes an |
|
399 // input markdown document and produces a syntax tree for its contents. This |
|
400 // tree can then be rendered with a default or custom renderer, or |
|
401 // analyzed/transformed by the caller to whatever non-standard needs they have. |
|
402 // The return value is the root node of the syntax tree. |
|
403 func (p *Markdown) Parse(input []byte) *Node { |
|
404 p.block(input) |
|
405 // Walk the tree and finish up some of unfinished blocks |
|
406 for p.tip != nil { |
|
407 p.finalize(p.tip) |
|
408 } |
|
409 // Walk the tree again and process inline markdown in each block |
|
410 p.doc.Walk(func(node *Node, entering bool) WalkStatus { |
|
411 if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell { |
|
412 p.inline(node, node.content) |
|
413 node.content = nil |
|
414 } |
|
415 return GoToNext |
|
416 }) |
|
417 p.parseRefsToAST() |
|
418 return p.doc |
|
419 } |
|
420 |
|
421 func (p *Markdown) parseRefsToAST() { |
|
422 if p.extensions&Footnotes == 0 || len(p.notes) == 0 { |
|
423 return |
|
424 } |
|
425 p.tip = p.doc |
|
426 block := p.addBlock(List, nil) |
|
427 block.IsFootnotesList = true |
|
428 block.ListFlags = ListTypeOrdered |
|
429 flags := ListItemBeginningOfList |
|
430 // Note: this loop is intentionally explicit, not range-form. This is |
|
431 // because the body of the loop will append nested footnotes to p.notes and |
|
432 // we need to process those late additions. Range form would only walk over |
|
433 // the fixed initial set. |
|
434 for i := 0; i < len(p.notes); i++ { |
|
435 ref := p.notes[i] |
|
436 p.addExistingChild(ref.footnote, 0) |
|
437 block := ref.footnote |
|
438 block.ListFlags = flags | ListTypeOrdered |
|
439 block.RefLink = ref.link |
|
440 if ref.hasBlock { |
|
441 flags |= ListItemContainsBlock |
|
442 p.block(ref.title) |
|
443 } else { |
|
444 p.inline(block, ref.title) |
|
445 } |
|
446 flags &^= ListItemBeginningOfList | ListItemContainsBlock |
|
447 } |
|
448 above := block.Parent |
|
449 finalizeList(block) |
|
450 p.tip = above |
|
451 block.Walk(func(node *Node, entering bool) WalkStatus { |
|
452 if node.Type == Paragraph || node.Type == Heading { |
|
453 p.inline(node, node.content) |
|
454 node.content = nil |
|
455 } |
|
456 return GoToNext |
|
457 }) |
|
458 } |
|
459 |
|
460 // |
|
461 // Link references |
|
462 // |
|
463 // This section implements support for references that (usually) appear |
|
464 // as footnotes in a document, and can be referenced anywhere in the document. |
|
465 // The basic format is: |
|
466 // |
|
467 // [1]: http://www.google.com/ "Google" |
|
468 // [2]: http://www.github.com/ "Github" |
|
469 // |
|
470 // Anywhere in the document, the reference can be linked by referring to its |
|
471 // label, i.e., 1 and 2 in this example, as in: |
|
472 // |
|
473 // This library is hosted on [Github][2], a git hosting site. |
|
474 // |
|
475 // Actual footnotes as specified in Pandoc and supported by some other Markdown |
|
476 // libraries such as php-markdown are also taken care of. They look like this: |
|
477 // |
|
478 // This sentence needs a bit of further explanation.[^note] |
|
479 // |
|
480 // [^note]: This is the explanation. |
|
481 // |
|
482 // Footnotes should be placed at the end of the document in an ordered list. |
|
483 // Inline footnotes such as: |
|
484 // |
|
485 // Inline footnotes^[Not supported.] also exist. |
|
486 // |
|
487 // are not yet supported. |
|
488 |
|
489 // reference holds all information necessary for a reference-style links or |
|
490 // footnotes. |
|
491 // |
|
492 // Consider this markdown with reference-style links: |
|
493 // |
|
494 // [link][ref] |
|
495 // |
|
496 // [ref]: /url/ "tooltip title" |
|
497 // |
|
498 // It will be ultimately converted to this HTML: |
|
499 // |
|
500 // <p><a href=\"/url/\" title=\"title\">link</a></p> |
|
501 // |
|
502 // And a reference structure will be populated as follows: |
|
503 // |
|
504 // p.refs["ref"] = &reference{ |
|
505 // link: "/url/", |
|
506 // title: "tooltip title", |
|
507 // } |
|
508 // |
|
509 // Alternatively, reference can contain information about a footnote. Consider |
|
510 // this markdown: |
|
511 // |
|
512 // Text needing a footnote.[^a] |
|
513 // |
|
514 // [^a]: This is the note |
|
515 // |
|
516 // A reference structure will be populated as follows: |
|
517 // |
|
518 // p.refs["a"] = &reference{ |
|
519 // link: "a", |
|
520 // title: "This is the note", |
|
521 // noteID: <some positive int>, |
|
522 // } |
|
523 // |
|
524 // TODO: As you can see, it begs for splitting into two dedicated structures |
|
525 // for refs and for footnotes. |
|
526 type reference struct { |
|
527 link []byte |
|
528 title []byte |
|
529 noteID int // 0 if not a footnote ref |
|
530 hasBlock bool |
|
531 footnote *Node // a link to the Item node within a list of footnotes |
|
532 |
|
533 text []byte // only gets populated by refOverride feature with Reference.Text |
|
534 } |
|
535 |
|
536 func (r *reference) String() string { |
|
537 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}", |
|
538 r.link, r.title, r.text, r.noteID, r.hasBlock) |
|
539 } |
|
540 |
|
541 // Check whether or not data starts with a reference link. |
|
542 // If so, it is parsed and stored in the list of references |
|
543 // (in the render struct). |
|
544 // Returns the number of bytes to skip to move past it, |
|
545 // or zero if the first line is not a reference. |
|
546 func isReference(p *Markdown, data []byte, tabSize int) int { |
|
547 // up to 3 optional leading spaces |
|
548 if len(data) < 4 { |
|
549 return 0 |
|
550 } |
|
551 i := 0 |
|
552 for i < 3 && data[i] == ' ' { |
|
553 i++ |
|
554 } |
|
555 |
|
556 noteID := 0 |
|
557 |
|
558 // id part: anything but a newline between brackets |
|
559 if data[i] != '[' { |
|
560 return 0 |
|
561 } |
|
562 i++ |
|
563 if p.extensions&Footnotes != 0 { |
|
564 if i < len(data) && data[i] == '^' { |
|
565 // we can set it to anything here because the proper noteIds will |
|
566 // be assigned later during the second pass. It just has to be != 0 |
|
567 noteID = 1 |
|
568 i++ |
|
569 } |
|
570 } |
|
571 idOffset := i |
|
572 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { |
|
573 i++ |
|
574 } |
|
575 if i >= len(data) || data[i] != ']' { |
|
576 return 0 |
|
577 } |
|
578 idEnd := i |
|
579 // footnotes can have empty ID, like this: [^], but a reference can not be |
|
580 // empty like this: []. Break early if it's not a footnote and there's no ID |
|
581 if noteID == 0 && idOffset == idEnd { |
|
582 return 0 |
|
583 } |
|
584 // spacer: colon (space | tab)* newline? (space | tab)* |
|
585 i++ |
|
586 if i >= len(data) || data[i] != ':' { |
|
587 return 0 |
|
588 } |
|
589 i++ |
|
590 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
|
591 i++ |
|
592 } |
|
593 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { |
|
594 i++ |
|
595 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { |
|
596 i++ |
|
597 } |
|
598 } |
|
599 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
|
600 i++ |
|
601 } |
|
602 if i >= len(data) { |
|
603 return 0 |
|
604 } |
|
605 |
|
606 var ( |
|
607 linkOffset, linkEnd int |
|
608 titleOffset, titleEnd int |
|
609 lineEnd int |
|
610 raw []byte |
|
611 hasBlock bool |
|
612 ) |
|
613 |
|
614 if p.extensions&Footnotes != 0 && noteID != 0 { |
|
615 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) |
|
616 lineEnd = linkEnd |
|
617 } else { |
|
618 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) |
|
619 } |
|
620 if lineEnd == 0 { |
|
621 return 0 |
|
622 } |
|
623 |
|
624 // a valid ref has been found |
|
625 |
|
626 ref := &reference{ |
|
627 noteID: noteID, |
|
628 hasBlock: hasBlock, |
|
629 } |
|
630 |
|
631 if noteID > 0 { |
|
632 // reusing the link field for the id since footnotes don't have links |
|
633 ref.link = data[idOffset:idEnd] |
|
634 // if footnote, it's not really a title, it's the contained text |
|
635 ref.title = raw |
|
636 } else { |
|
637 ref.link = data[linkOffset:linkEnd] |
|
638 ref.title = data[titleOffset:titleEnd] |
|
639 } |
|
640 |
|
641 // id matches are case-insensitive |
|
642 id := string(bytes.ToLower(data[idOffset:idEnd])) |
|
643 |
|
644 p.refs[id] = ref |
|
645 |
|
646 return lineEnd |
|
647 } |
|
648 |
|
649 func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { |
|
650 // link: whitespace-free sequence, optionally between angle brackets |
|
651 if data[i] == '<' { |
|
652 i++ |
|
653 } |
|
654 linkOffset = i |
|
655 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { |
|
656 i++ |
|
657 } |
|
658 linkEnd = i |
|
659 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { |
|
660 linkOffset++ |
|
661 linkEnd-- |
|
662 } |
|
663 |
|
664 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) |
|
665 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
|
666 i++ |
|
667 } |
|
668 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { |
|
669 return |
|
670 } |
|
671 |
|
672 // compute end-of-line |
|
673 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { |
|
674 lineEnd = i |
|
675 } |
|
676 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { |
|
677 lineEnd++ |
|
678 } |
|
679 |
|
680 // optional (space|tab)* spacer after a newline |
|
681 if lineEnd > 0 { |
|
682 i = lineEnd + 1 |
|
683 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
|
684 i++ |
|
685 } |
|
686 } |
|
687 |
|
688 // optional title: any non-newline sequence enclosed in '"() alone on its line |
|
689 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { |
|
690 i++ |
|
691 titleOffset = i |
|
692 |
|
693 // look for EOL |
|
694 for i < len(data) && data[i] != '\n' && data[i] != '\r' { |
|
695 i++ |
|
696 } |
|
697 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { |
|
698 titleEnd = i + 1 |
|
699 } else { |
|
700 titleEnd = i |
|
701 } |
|
702 |
|
703 // step back |
|
704 i-- |
|
705 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { |
|
706 i-- |
|
707 } |
|
708 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { |
|
709 lineEnd = titleEnd |
|
710 titleEnd = i |
|
711 } |
|
712 } |
|
713 |
|
714 return |
|
715 } |
|
716 |
|
717 // The first bit of this logic is the same as Parser.listItem, but the rest |
|
718 // is much simpler. This function simply finds the entire block and shifts it |
|
719 // over by one tab if it is indeed a block (just returns the line if it's not). |
|
720 // blockEnd is the end of the section in the input buffer, and contents is the |
|
721 // extracted text that was shifted over one tab. It will need to be rendered at |
|
722 // the end of the document. |
|
723 func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { |
|
724 if i == 0 || len(data) == 0 { |
|
725 return |
|
726 } |
|
727 |
|
728 // skip leading whitespace on first line |
|
729 for i < len(data) && data[i] == ' ' { |
|
730 i++ |
|
731 } |
|
732 |
|
733 blockStart = i |
|
734 |
|
735 // find the end of the line |
|
736 blockEnd = i |
|
737 for i < len(data) && data[i-1] != '\n' { |
|
738 i++ |
|
739 } |
|
740 |
|
741 // get working buffer |
|
742 var raw bytes.Buffer |
|
743 |
|
744 // put the first line into the working buffer |
|
745 raw.Write(data[blockEnd:i]) |
|
746 blockEnd = i |
|
747 |
|
748 // process the following lines |
|
749 containsBlankLine := false |
|
750 |
|
751 gatherLines: |
|
752 for blockEnd < len(data) { |
|
753 i++ |
|
754 |
|
755 // find the end of this line |
|
756 for i < len(data) && data[i-1] != '\n' { |
|
757 i++ |
|
758 } |
|
759 |
|
760 // if it is an empty line, guess that it is part of this item |
|
761 // and move on to the next line |
|
762 if p.isEmpty(data[blockEnd:i]) > 0 { |
|
763 containsBlankLine = true |
|
764 blockEnd = i |
|
765 continue |
|
766 } |
|
767 |
|
768 n := 0 |
|
769 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { |
|
770 // this is the end of the block. |
|
771 // we don't want to include this last line in the index. |
|
772 break gatherLines |
|
773 } |
|
774 |
|
775 // if there were blank lines before this one, insert a new one now |
|
776 if containsBlankLine { |
|
777 raw.WriteByte('\n') |
|
778 containsBlankLine = false |
|
779 } |
|
780 |
|
781 // get rid of that first tab, write to buffer |
|
782 raw.Write(data[blockEnd+n : i]) |
|
783 hasBlock = true |
|
784 |
|
785 blockEnd = i |
|
786 } |
|
787 |
|
788 if data[blockEnd-1] != '\n' { |
|
789 raw.WriteByte('\n') |
|
790 } |
|
791 |
|
792 contents = raw.Bytes() |
|
793 |
|
794 return |
|
795 } |
|
796 |
|
797 // |
|
798 // |
|
799 // Miscellaneous helper functions |
|
800 // |
|
801 // |
|
802 |
|
803 // Test if a character is a punctuation symbol. |
|
804 // Taken from a private function in regexp in the stdlib. |
|
805 func ispunct(c byte) bool { |
|
806 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { |
|
807 if c == r { |
|
808 return true |
|
809 } |
|
810 } |
|
811 return false |
|
812 } |
|
813 |
|
814 // Test if a character is a whitespace character. |
|
815 func isspace(c byte) bool { |
|
816 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v' |
|
817 } |
|
818 |
|
819 // Test if a character is letter. |
|
820 func isletter(c byte) bool { |
|
821 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') |
|
822 } |
|
823 |
|
824 // Test if a character is a letter or a digit. |
|
825 // TODO: check when this is looking for ASCII alnum and when it should use unicode |
|
826 func isalnum(c byte) bool { |
|
827 return (c >= '0' && c <= '9') || isletter(c) |
|
828 } |
|
829 |
|
830 // Replace tab characters with spaces, aligning to the next TAB_SIZE column. |
|
831 // always ends output with a newline |
|
832 func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { |
|
833 // first, check for common cases: no tabs, or only tabs at beginning of line |
|
834 i, prefix := 0, 0 |
|
835 slowcase := false |
|
836 for i = 0; i < len(line); i++ { |
|
837 if line[i] == '\t' { |
|
838 if prefix == i { |
|
839 prefix++ |
|
840 } else { |
|
841 slowcase = true |
|
842 break |
|
843 } |
|
844 } |
|
845 } |
|
846 |
|
847 // no need to decode runes if all tabs are at the beginning of the line |
|
848 if !slowcase { |
|
849 for i = 0; i < prefix*tabSize; i++ { |
|
850 out.WriteByte(' ') |
|
851 } |
|
852 out.Write(line[prefix:]) |
|
853 return |
|
854 } |
|
855 |
|
856 // the slow case: we need to count runes to figure out how |
|
857 // many spaces to insert for each tab |
|
858 column := 0 |
|
859 i = 0 |
|
860 for i < len(line) { |
|
861 start := i |
|
862 for i < len(line) && line[i] != '\t' { |
|
863 _, size := utf8.DecodeRune(line[i:]) |
|
864 i += size |
|
865 column++ |
|
866 } |
|
867 |
|
868 if i > start { |
|
869 out.Write(line[start:i]) |
|
870 } |
|
871 |
|
872 if i >= len(line) { |
|
873 break |
|
874 } |
|
875 |
|
876 for { |
|
877 out.WriteByte(' ') |
|
878 column++ |
|
879 if column%tabSize == 0 { |
|
880 break |
|
881 } |
|
882 } |
|
883 |
|
884 i++ |
|
885 } |
|
886 } |
|
887 |
|
888 // Find if a line counts as indented or not. |
|
889 // Returns number of characters the indent is (0 = not indented). |
|
890 func isIndented(data []byte, indentSize int) int { |
|
891 if len(data) == 0 { |
|
892 return 0 |
|
893 } |
|
894 if data[0] == '\t' { |
|
895 return 1 |
|
896 } |
|
897 if len(data) < indentSize { |
|
898 return 0 |
|
899 } |
|
900 for i := 0; i < indentSize; i++ { |
|
901 if data[i] != ' ' { |
|
902 return 0 |
|
903 } |
|
904 } |
|
905 return indentSize |
|
906 } |
|
907 |
|
908 // Create a url-safe slug for fragments |
|
909 func slugify(in []byte) []byte { |
|
910 if len(in) == 0 { |
|
911 return in |
|
912 } |
|
913 out := make([]byte, 0, len(in)) |
|
914 sym := false |
|
915 |
|
916 for _, ch := range in { |
|
917 if isalnum(ch) { |
|
918 sym = false |
|
919 out = append(out, ch) |
|
920 } else if sym { |
|
921 continue |
|
922 } else { |
|
923 out = append(out, '-') |
|
924 sym = true |
|
925 } |
|
926 } |
|
927 var a, b int |
|
928 var ch byte |
|
929 for a, ch = range out { |
|
930 if ch != '-' { |
|
931 break |
|
932 } |
|
933 } |
|
934 for b = len(out) - 1; b > 0; b-- { |
|
935 if out[b] != '-' { |
|
936 break |
|
937 } |
|
938 } |
|
939 return out[a : b+1] |
|
940 } |
|