251
|
1 |
// |
|
2 |
// Blackfriday Markdown Processor |
|
3 |
// Available at http://github.com/russross/blackfriday |
|
4 |
// |
|
5 |
// Copyright © 2011 Russ Ross <russ@russross.com>. |
|
6 |
// Distributed under the Simplified BSD License. |
|
7 |
// See README.md for details. |
|
8 |
// |
|
9 |
|
|
10 |
// |
|
11 |
// |
|
12 |
// Markdown parsing and processing |
|
13 |
// |
|
14 |
// |
|
15 |
|
|
16 |
package blackfriday |
|
17 |
|
|
18 |
import ( |
|
19 |
"bytes" |
|
20 |
"fmt" |
|
21 |
"strings" |
|
22 |
"unicode/utf8" |
|
23 |
) |
|
24 |
|
|
25 |
const VERSION = "1.5" |
|
26 |
|
|
27 |
// These are the supported markdown parsing extensions. |
|
28 |
// OR these values together to select multiple extensions. |
|
29 |
const ( |
|
30 |
EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words |
|
31 |
EXTENSION_TABLES // render tables |
|
32 |
EXTENSION_FENCED_CODE // render fenced code blocks |
|
33 |
EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked |
|
34 |
EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~ |
|
35 |
EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules |
|
36 |
EXTENSION_SPACE_HEADERS // be strict about prefix header rules |
|
37 |
EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks |
|
38 |
EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four |
|
39 |
EXTENSION_FOOTNOTES // Pandoc-style footnotes |
|
40 |
EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block |
|
41 |
EXTENSION_HEADER_IDS // specify header IDs with {#id} |
|
42 |
EXTENSION_TITLEBLOCK // Titleblock ala pandoc |
|
43 |
EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text |
|
44 |
EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks |
|
45 |
EXTENSION_DEFINITION_LISTS // render definition lists |
|
46 |
EXTENSION_JOIN_LINES // delete newline and join lines |
|
47 |
|
|
48 |
commonHtmlFlags = 0 | |
|
49 |
HTML_USE_XHTML | |
|
50 |
HTML_USE_SMARTYPANTS | |
|
51 |
HTML_SMARTYPANTS_FRACTIONS | |
|
52 |
HTML_SMARTYPANTS_DASHES | |
|
53 |
HTML_SMARTYPANTS_LATEX_DASHES |
|
54 |
|
|
55 |
commonExtensions = 0 | |
|
56 |
EXTENSION_NO_INTRA_EMPHASIS | |
|
57 |
EXTENSION_TABLES | |
|
58 |
EXTENSION_FENCED_CODE | |
|
59 |
EXTENSION_AUTOLINK | |
|
60 |
EXTENSION_STRIKETHROUGH | |
|
61 |
EXTENSION_SPACE_HEADERS | |
|
62 |
EXTENSION_HEADER_IDS | |
|
63 |
EXTENSION_BACKSLASH_LINE_BREAK | |
|
64 |
EXTENSION_DEFINITION_LISTS |
|
65 |
) |
|
66 |
|
|
67 |
// These are the possible flag values for the link renderer. |
|
68 |
// Only a single one of these values will be used; they are not ORed together. |
|
69 |
// These are mostly of interest if you are writing a new output format. |
|
70 |
const ( |
|
71 |
LINK_TYPE_NOT_AUTOLINK = iota |
|
72 |
LINK_TYPE_NORMAL |
|
73 |
LINK_TYPE_EMAIL |
|
74 |
) |
|
75 |
|
|
76 |
// These are the possible flag values for the ListItem renderer. |
|
77 |
// Multiple flag values may be ORed together. |
|
78 |
// These are mostly of interest if you are writing a new output format. |
|
79 |
const ( |
|
80 |
LIST_TYPE_ORDERED = 1 << iota |
|
81 |
LIST_TYPE_DEFINITION |
|
82 |
LIST_TYPE_TERM |
|
83 |
LIST_ITEM_CONTAINS_BLOCK |
|
84 |
LIST_ITEM_BEGINNING_OF_LIST |
|
85 |
LIST_ITEM_END_OF_LIST |
|
86 |
) |
|
87 |
|
|
88 |
// These are the possible flag values for the table cell renderer. |
|
89 |
// Only a single one of these values will be used; they are not ORed together. |
|
90 |
// These are mostly of interest if you are writing a new output format. |
|
91 |
const ( |
|
92 |
TABLE_ALIGNMENT_LEFT = 1 << iota |
|
93 |
TABLE_ALIGNMENT_RIGHT |
|
94 |
TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT) |
|
95 |
) |
|
96 |
|
|
97 |
// The size of a tab stop. |
|
98 |
const ( |
|
99 |
TAB_SIZE_DEFAULT = 4 |
|
100 |
TAB_SIZE_EIGHT = 8 |
|
101 |
) |
|
102 |
|
|
103 |
// blockTags is a set of tags that are recognized as HTML block tags. |
|
104 |
// Any of these can be included in markdown text without special escaping. |
|
105 |
var blockTags = map[string]struct{}{ |
|
106 |
"blockquote": {}, |
|
107 |
"del": {}, |
|
108 |
"div": {}, |
|
109 |
"dl": {}, |
|
110 |
"fieldset": {}, |
|
111 |
"form": {}, |
|
112 |
"h1": {}, |
|
113 |
"h2": {}, |
|
114 |
"h3": {}, |
|
115 |
"h4": {}, |
|
116 |
"h5": {}, |
|
117 |
"h6": {}, |
|
118 |
"iframe": {}, |
|
119 |
"ins": {}, |
|
120 |
"math": {}, |
|
121 |
"noscript": {}, |
|
122 |
"ol": {}, |
|
123 |
"pre": {}, |
|
124 |
"p": {}, |
|
125 |
"script": {}, |
|
126 |
"style": {}, |
|
127 |
"table": {}, |
|
128 |
"ul": {}, |
|
129 |
|
|
130 |
// HTML5 |
|
131 |
"address": {}, |
|
132 |
"article": {}, |
|
133 |
"aside": {}, |
|
134 |
"canvas": {}, |
|
135 |
"figcaption": {}, |
|
136 |
"figure": {}, |
|
137 |
"footer": {}, |
|
138 |
"header": {}, |
|
139 |
"hgroup": {}, |
|
140 |
"main": {}, |
|
141 |
"nav": {}, |
|
142 |
"output": {}, |
|
143 |
"progress": {}, |
|
144 |
"section": {}, |
|
145 |
"video": {}, |
|
146 |
} |
|
147 |
|
|
148 |
// Renderer is the rendering interface. |
|
149 |
// This is mostly of interest if you are implementing a new rendering format. |
|
150 |
// |
|
151 |
// When a byte slice is provided, it contains the (rendered) contents of the |
|
152 |
// element. |
|
153 |
// |
|
154 |
// When a callback is provided instead, it will write the contents of the |
|
155 |
// respective element directly to the output buffer and return true on success. |
|
156 |
// If the callback returns false, the rendering function should reset the |
|
157 |
// output buffer as though it had never been called. |
|
158 |
// |
|
159 |
// Currently Html and Latex implementations are provided |
|
160 |
type Renderer interface { |
|
161 |
// block-level callbacks |
|
162 |
BlockCode(out *bytes.Buffer, text []byte, infoString string) |
|
163 |
BlockQuote(out *bytes.Buffer, text []byte) |
|
164 |
BlockHtml(out *bytes.Buffer, text []byte) |
|
165 |
Header(out *bytes.Buffer, text func() bool, level int, id string) |
|
166 |
HRule(out *bytes.Buffer) |
|
167 |
List(out *bytes.Buffer, text func() bool, flags int) |
|
168 |
ListItem(out *bytes.Buffer, text []byte, flags int) |
|
169 |
Paragraph(out *bytes.Buffer, text func() bool) |
|
170 |
Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) |
|
171 |
TableRow(out *bytes.Buffer, text []byte) |
|
172 |
TableHeaderCell(out *bytes.Buffer, text []byte, flags int) |
|
173 |
TableCell(out *bytes.Buffer, text []byte, flags int) |
|
174 |
Footnotes(out *bytes.Buffer, text func() bool) |
|
175 |
FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) |
|
176 |
TitleBlock(out *bytes.Buffer, text []byte) |
|
177 |
|
|
178 |
// Span-level callbacks |
|
179 |
AutoLink(out *bytes.Buffer, link []byte, kind int) |
|
180 |
CodeSpan(out *bytes.Buffer, text []byte) |
|
181 |
DoubleEmphasis(out *bytes.Buffer, text []byte) |
|
182 |
Emphasis(out *bytes.Buffer, text []byte) |
|
183 |
Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) |
|
184 |
LineBreak(out *bytes.Buffer) |
|
185 |
Link(out *bytes.Buffer, link []byte, title []byte, content []byte) |
|
186 |
RawHtmlTag(out *bytes.Buffer, tag []byte) |
|
187 |
TripleEmphasis(out *bytes.Buffer, text []byte) |
|
188 |
StrikeThrough(out *bytes.Buffer, text []byte) |
|
189 |
FootnoteRef(out *bytes.Buffer, ref []byte, id int) |
|
190 |
|
|
191 |
// Low-level callbacks |
|
192 |
Entity(out *bytes.Buffer, entity []byte) |
|
193 |
NormalText(out *bytes.Buffer, text []byte) |
|
194 |
|
|
195 |
// Header and footer |
|
196 |
DocumentHeader(out *bytes.Buffer) |
|
197 |
DocumentFooter(out *bytes.Buffer) |
|
198 |
|
|
199 |
GetFlags() int |
|
200 |
} |
|
201 |
|
|
202 |
// Callback functions for inline parsing. One such function is defined |
|
203 |
// for each character that triggers a response when parsing inline data. |
|
204 |
type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int |
|
205 |
|
|
206 |
// Parser holds runtime state used by the parser. |
|
207 |
// This is constructed by the Markdown function. |
|
208 |
type parser struct { |
|
209 |
r Renderer |
|
210 |
refOverride ReferenceOverrideFunc |
|
211 |
refs map[string]*reference |
|
212 |
inlineCallback [256]inlineParser |
|
213 |
flags int |
|
214 |
nesting int |
|
215 |
maxNesting int |
|
216 |
insideLink bool |
|
217 |
|
|
218 |
// Footnotes need to be ordered as well as available to quickly check for |
|
219 |
// presence. If a ref is also a footnote, it's stored both in refs and here |
|
220 |
// in notes. Slice is nil if footnotes not enabled. |
|
221 |
notes []*reference |
|
222 |
notesRecord map[string]struct{} |
|
223 |
} |
|
224 |
|
|
225 |
func (p *parser) getRef(refid string) (ref *reference, found bool) { |
|
226 |
if p.refOverride != nil { |
|
227 |
r, overridden := p.refOverride(refid) |
|
228 |
if overridden { |
|
229 |
if r == nil { |
|
230 |
return nil, false |
|
231 |
} |
|
232 |
return &reference{ |
|
233 |
link: []byte(r.Link), |
|
234 |
title: []byte(r.Title), |
|
235 |
noteId: 0, |
|
236 |
hasBlock: false, |
|
237 |
text: []byte(r.Text)}, true |
|
238 |
} |
|
239 |
} |
|
240 |
// refs are case insensitive |
|
241 |
ref, found = p.refs[strings.ToLower(refid)] |
|
242 |
return ref, found |
|
243 |
} |
|
244 |
|
|
245 |
func (p *parser) isFootnote(ref *reference) bool { |
|
246 |
_, ok := p.notesRecord[string(ref.link)] |
|
247 |
return ok |
|
248 |
} |
|
249 |
|
|
250 |
// |
|
251 |
// |
|
252 |
// Public interface |
|
253 |
// |
|
254 |
// |
|
255 |
|
|
256 |
// Reference represents the details of a link. |
|
257 |
// See the documentation in Options for more details on use-case. |
|
258 |
type Reference struct { |
|
259 |
// Link is usually the URL the reference points to. |
|
260 |
Link string |
|
261 |
// Title is the alternate text describing the link in more detail. |
|
262 |
Title string |
|
263 |
// Text is the optional text to override the ref with if the syntax used was |
|
264 |
// [refid][] |
|
265 |
Text string |
|
266 |
} |
|
267 |
|
|
268 |
// ReferenceOverrideFunc is expected to be called with a reference string and |
|
269 |
// return either a valid Reference type that the reference string maps to or |
|
270 |
// nil. If overridden is false, the default reference logic will be executed. |
|
271 |
// See the documentation in Options for more details on use-case. |
|
272 |
type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) |
|
273 |
|
|
274 |
// Options represents configurable overrides and callbacks (in addition to the |
|
275 |
// extension flag set) for configuring a Markdown parse. |
|
276 |
type Options struct { |
|
277 |
// Extensions is a flag set of bit-wise ORed extension bits. See the |
|
278 |
// EXTENSION_* flags defined in this package. |
|
279 |
Extensions int |
|
280 |
|
|
281 |
// ReferenceOverride is an optional function callback that is called every |
|
282 |
// time a reference is resolved. |
|
283 |
// |
|
284 |
// In Markdown, the link reference syntax can be made to resolve a link to |
|
285 |
// a reference instead of an inline URL, in one of the following ways: |
|
286 |
// |
|
287 |
// * [link text][refid] |
|
288 |
// * [refid][] |
|
289 |
// |
|
290 |
// Usually, the refid is defined at the bottom of the Markdown document. If |
|
291 |
// this override function is provided, the refid is passed to the override |
|
292 |
// function first, before consulting the defined refids at the bottom. If |
|
293 |
// the override function indicates an override did not occur, the refids at |
|
294 |
// the bottom will be used to fill in the link details. |
|
295 |
ReferenceOverride ReferenceOverrideFunc |
|
296 |
} |
|
297 |
|
|
298 |
// MarkdownBasic is a convenience function for simple rendering. |
|
299 |
// It processes markdown input with no extensions enabled. |
|
300 |
func MarkdownBasic(input []byte) []byte { |
|
301 |
// set up the HTML renderer |
|
302 |
htmlFlags := HTML_USE_XHTML |
|
303 |
renderer := HtmlRenderer(htmlFlags, "", "") |
|
304 |
|
|
305 |
// set up the parser |
|
306 |
return MarkdownOptions(input, renderer, Options{Extensions: 0}) |
|
307 |
} |
|
308 |
|
|
309 |
// Call Markdown with most useful extensions enabled |
|
310 |
// MarkdownCommon is a convenience function for simple rendering. |
|
311 |
// It processes markdown input with common extensions enabled, including: |
|
312 |
// |
|
313 |
// * Smartypants processing with smart fractions and LaTeX dashes |
|
314 |
// |
|
315 |
// * Intra-word emphasis suppression |
|
316 |
// |
|
317 |
// * Tables |
|
318 |
// |
|
319 |
// * Fenced code blocks |
|
320 |
// |
|
321 |
// * Autolinking |
|
322 |
// |
|
323 |
// * Strikethrough support |
|
324 |
// |
|
325 |
// * Strict header parsing |
|
326 |
// |
|
327 |
// * Custom Header IDs |
|
328 |
func MarkdownCommon(input []byte) []byte { |
|
329 |
// set up the HTML renderer |
|
330 |
renderer := HtmlRenderer(commonHtmlFlags, "", "") |
|
331 |
return MarkdownOptions(input, renderer, Options{ |
|
332 |
Extensions: commonExtensions}) |
|
333 |
} |
|
334 |
|
|
335 |
// Markdown is the main rendering function. |
|
336 |
// It parses and renders a block of markdown-encoded text. |
|
337 |
// The supplied Renderer is used to format the output, and extensions dictates |
|
338 |
// which non-standard extensions are enabled. |
|
339 |
// |
|
340 |
// To use the supplied Html or LaTeX renderers, see HtmlRenderer and |
|
341 |
// LatexRenderer, respectively. |
|
342 |
func Markdown(input []byte, renderer Renderer, extensions int) []byte { |
|
343 |
return MarkdownOptions(input, renderer, Options{ |
|
344 |
Extensions: extensions}) |
|
345 |
} |
|
346 |
|
|
347 |
// MarkdownOptions is just like Markdown but takes additional options through |
|
348 |
// the Options struct. |
|
349 |
func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { |
|
350 |
// no point in parsing if we can't render |
|
351 |
if renderer == nil { |
|
352 |
return nil |
|
353 |
} |
|
354 |
|
|
355 |
extensions := opts.Extensions |
|
356 |
|
|
357 |
// fill in the render structure |
|
358 |
p := new(parser) |
|
359 |
p.r = renderer |
|
360 |
p.flags = extensions |
|
361 |
p.refOverride = opts.ReferenceOverride |
|
362 |
p.refs = make(map[string]*reference) |
|
363 |
p.maxNesting = 16 |
|
364 |
p.insideLink = false |
|
365 |
|
|
366 |
// register inline parsers |
|
367 |
p.inlineCallback['*'] = emphasis |
|
368 |
p.inlineCallback['_'] = emphasis |
|
369 |
if extensions&EXTENSION_STRIKETHROUGH != 0 { |
|
370 |
p.inlineCallback['~'] = emphasis |
|
371 |
} |
|
372 |
p.inlineCallback['`'] = codeSpan |
|
373 |
p.inlineCallback['\n'] = lineBreak |
|
374 |
p.inlineCallback['['] = link |
|
375 |
p.inlineCallback['<'] = leftAngle |
|
376 |
p.inlineCallback['\\'] = escape |
|
377 |
p.inlineCallback['&'] = entity |
|
378 |
|
|
379 |
if extensions&EXTENSION_AUTOLINK != 0 { |
|
380 |
p.inlineCallback[':'] = autoLink |
|
381 |
} |
|
382 |
|
|
383 |
if extensions&EXTENSION_FOOTNOTES != 0 { |
|
384 |
p.notes = make([]*reference, 0) |
|
385 |
p.notesRecord = make(map[string]struct{}) |
|
386 |
} |
|
387 |
|
|
388 |
first := firstPass(p, input) |
|
389 |
second := secondPass(p, first) |
|
390 |
return second |
|
391 |
} |
|
392 |
|
|
393 |
// first pass: |
|
394 |
// - normalize newlines |
|
395 |
// - extract references (outside of fenced code blocks) |
|
396 |
// - expand tabs (outside of fenced code blocks) |
|
397 |
// - copy everything else |
|
398 |
func firstPass(p *parser, input []byte) []byte { |
|
399 |
var out bytes.Buffer |
|
400 |
tabSize := TAB_SIZE_DEFAULT |
|
401 |
if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { |
|
402 |
tabSize = TAB_SIZE_EIGHT |
|
403 |
} |
|
404 |
beg := 0 |
|
405 |
lastFencedCodeBlockEnd := 0 |
|
406 |
for beg < len(input) { |
|
407 |
// Find end of this line, then process the line. |
|
408 |
end := beg |
|
409 |
for end < len(input) && input[end] != '\n' && input[end] != '\r' { |
|
410 |
end++ |
|
411 |
} |
|
412 |
|
|
413 |
if p.flags&EXTENSION_FENCED_CODE != 0 { |
|
414 |
// track fenced code block boundaries to suppress tab expansion |
|
415 |
// and reference extraction inside them: |
|
416 |
if beg >= lastFencedCodeBlockEnd { |
|
417 |
if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 { |
|
418 |
lastFencedCodeBlockEnd = beg + i |
|
419 |
} |
|
420 |
} |
|
421 |
} |
|
422 |
|
|
423 |
// add the line body if present |
|
424 |
if end > beg { |
|
425 |
if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. |
|
426 |
out.Write(input[beg:end]) |
|
427 |
} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 { |
|
428 |
beg += refEnd |
|
429 |
continue |
|
430 |
} else { |
|
431 |
expandTabs(&out, input[beg:end], tabSize) |
|
432 |
} |
|
433 |
} |
|
434 |
|
|
435 |
if end < len(input) && input[end] == '\r' { |
|
436 |
end++ |
|
437 |
} |
|
438 |
if end < len(input) && input[end] == '\n' { |
|
439 |
end++ |
|
440 |
} |
|
441 |
out.WriteByte('\n') |
|
442 |
|
|
443 |
beg = end |
|
444 |
} |
|
445 |
|
|
446 |
// empty input? |
|
447 |
if out.Len() == 0 { |
|
448 |
out.WriteByte('\n') |
|
449 |
} |
|
450 |
|
|
451 |
return out.Bytes() |
|
452 |
} |
|
453 |
|
|
454 |
// second pass: actual rendering |
|
455 |
func secondPass(p *parser, input []byte) []byte { |
|
456 |
var output bytes.Buffer |
|
457 |
|
|
458 |
p.r.DocumentHeader(&output) |
|
459 |
p.block(&output, input) |
|
460 |
|
|
461 |
if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { |
|
462 |
p.r.Footnotes(&output, func() bool { |
|
463 |
flags := LIST_ITEM_BEGINNING_OF_LIST |
|
464 |
for i := 0; i < len(p.notes); i += 1 { |
|
465 |
ref := p.notes[i] |
|
466 |
var buf bytes.Buffer |
|
467 |
if ref.hasBlock { |
|
468 |
flags |= LIST_ITEM_CONTAINS_BLOCK |
|
469 |
p.block(&buf, ref.title) |
|
470 |
} else { |
|
471 |
p.inline(&buf, ref.title) |
|
472 |
} |
|
473 |
p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags) |
|
474 |
flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK |
|
475 |
} |
|
476 |
|
|
477 |
return true |
|
478 |
}) |
|
479 |
} |
|
480 |
|
|
481 |
p.r.DocumentFooter(&output) |
|
482 |
|
|
483 |
if p.nesting != 0 { |
|
484 |
panic("Nesting level did not end at zero") |
|
485 |
} |
|
486 |
|
|
487 |
return output.Bytes() |
|
488 |
} |
|
489 |
|
|
490 |
// |
|
491 |
// Link references |
|
492 |
// |
|
493 |
// This section implements support for references that (usually) appear |
|
494 |
// as footnotes in a document, and can be referenced anywhere in the document. |
|
495 |
// The basic format is: |
|
496 |
// |
|
497 |
// [1]: http://www.google.com/ "Google" |
|
498 |
// [2]: http://www.github.com/ "Github" |
|
499 |
// |
|
500 |
// Anywhere in the document, the reference can be linked by referring to its |
|
501 |
// label, i.e., 1 and 2 in this example, as in: |
|
502 |
// |
|
503 |
// This library is hosted on [Github][2], a git hosting site. |
|
504 |
// |
|
505 |
// Actual footnotes as specified in Pandoc and supported by some other Markdown |
|
506 |
// libraries such as php-markdown are also taken care of. They look like this: |
|
507 |
// |
|
508 |
// This sentence needs a bit of further explanation.[^note] |
|
509 |
// |
|
510 |
// [^note]: This is the explanation. |
|
511 |
// |
|
512 |
// Footnotes should be placed at the end of the document in an ordered list. |
|
513 |
// Inline footnotes such as: |
|
514 |
// |
|
515 |
// Inline footnotes^[Not supported.] also exist. |
|
516 |
// |
|
517 |
// are not yet supported. |
|
518 |
|
|
519 |
// References are parsed and stored in this struct. |
|
520 |
type reference struct { |
|
521 |
link []byte |
|
522 |
title []byte |
|
523 |
noteId int // 0 if not a footnote ref |
|
524 |
hasBlock bool |
|
525 |
text []byte |
|
526 |
} |
|
527 |
|
|
528 |
func (r *reference) String() string { |
|
529 |
return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}", |
|
530 |
r.link, r.title, r.text, r.noteId, r.hasBlock) |
|
531 |
} |
|
532 |
|
|
533 |
// Check whether or not data starts with a reference link. |
|
534 |
// If so, it is parsed and stored in the list of references |
|
535 |
// (in the render struct). |
|
536 |
// Returns the number of bytes to skip to move past it, |
|
537 |
// or zero if the first line is not a reference. |
|
538 |
func isReference(p *parser, data []byte, tabSize int) int { |
|
539 |
// up to 3 optional leading spaces |
|
540 |
if len(data) < 4 { |
|
541 |
return 0 |
|
542 |
} |
|
543 |
i := 0 |
|
544 |
for i < 3 && data[i] == ' ' { |
|
545 |
i++ |
|
546 |
} |
|
547 |
|
|
548 |
noteId := 0 |
|
549 |
|
|
550 |
// id part: anything but a newline between brackets |
|
551 |
if data[i] != '[' { |
|
552 |
return 0 |
|
553 |
} |
|
554 |
i++ |
|
555 |
if p.flags&EXTENSION_FOOTNOTES != 0 { |
|
556 |
if i < len(data) && data[i] == '^' { |
|
557 |
// we can set it to anything here because the proper noteIds will |
|
558 |
// be assigned later during the second pass. It just has to be != 0 |
|
559 |
noteId = 1 |
|
560 |
i++ |
|
561 |
} |
|
562 |
} |
|
563 |
idOffset := i |
|
564 |
for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { |
|
565 |
i++ |
|
566 |
} |
|
567 |
if i >= len(data) || data[i] != ']' { |
|
568 |
return 0 |
|
569 |
} |
|
570 |
idEnd := i |
|
571 |
|
|
572 |
// spacer: colon (space | tab)* newline? (space | tab)* |
|
573 |
i++ |
|
574 |
if i >= len(data) || data[i] != ':' { |
|
575 |
return 0 |
|
576 |
} |
|
577 |
i++ |
|
578 |
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
|
579 |
i++ |
|
580 |
} |
|
581 |
if i < len(data) && (data[i] == '\n' || data[i] == '\r') { |
|
582 |
i++ |
|
583 |
if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { |
|
584 |
i++ |
|
585 |
} |
|
586 |
} |
|
587 |
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
|
588 |
i++ |
|
589 |
} |
|
590 |
if i >= len(data) { |
|
591 |
return 0 |
|
592 |
} |
|
593 |
|
|
594 |
var ( |
|
595 |
linkOffset, linkEnd int |
|
596 |
titleOffset, titleEnd int |
|
597 |
lineEnd int |
|
598 |
raw []byte |
|
599 |
hasBlock bool |
|
600 |
) |
|
601 |
|
|
602 |
if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 { |
|
603 |
linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) |
|
604 |
lineEnd = linkEnd |
|
605 |
} else { |
|
606 |
linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) |
|
607 |
} |
|
608 |
if lineEnd == 0 { |
|
609 |
return 0 |
|
610 |
} |
|
611 |
|
|
612 |
// a valid ref has been found |
|
613 |
|
|
614 |
ref := &reference{ |
|
615 |
noteId: noteId, |
|
616 |
hasBlock: hasBlock, |
|
617 |
} |
|
618 |
|
|
619 |
if noteId > 0 { |
|
620 |
// reusing the link field for the id since footnotes don't have links |
|
621 |
ref.link = data[idOffset:idEnd] |
|
622 |
// if footnote, it's not really a title, it's the contained text |
|
623 |
ref.title = raw |
|
624 |
} else { |
|
625 |
ref.link = data[linkOffset:linkEnd] |
|
626 |
ref.title = data[titleOffset:titleEnd] |
|
627 |
} |
|
628 |
|
|
629 |
// id matches are case-insensitive |
|
630 |
id := string(bytes.ToLower(data[idOffset:idEnd])) |
|
631 |
|
|
632 |
p.refs[id] = ref |
|
633 |
|
|
634 |
return lineEnd |
|
635 |
} |
|
636 |
|
|
637 |
func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { |
|
638 |
// link: whitespace-free sequence, optionally between angle brackets |
|
639 |
if data[i] == '<' { |
|
640 |
i++ |
|
641 |
} |
|
642 |
linkOffset = i |
|
643 |
if i == len(data) { |
|
644 |
return |
|
645 |
} |
|
646 |
for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { |
|
647 |
i++ |
|
648 |
} |
|
649 |
linkEnd = i |
|
650 |
if data[linkOffset] == '<' && data[linkEnd-1] == '>' { |
|
651 |
linkOffset++ |
|
652 |
linkEnd-- |
|
653 |
} |
|
654 |
|
|
655 |
// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) |
|
656 |
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
|
657 |
i++ |
|
658 |
} |
|
659 |
if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { |
|
660 |
return |
|
661 |
} |
|
662 |
|
|
663 |
// compute end-of-line |
|
664 |
if i >= len(data) || data[i] == '\r' || data[i] == '\n' { |
|
665 |
lineEnd = i |
|
666 |
} |
|
667 |
if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { |
|
668 |
lineEnd++ |
|
669 |
} |
|
670 |
|
|
671 |
// optional (space|tab)* spacer after a newline |
|
672 |
if lineEnd > 0 { |
|
673 |
i = lineEnd + 1 |
|
674 |
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
|
675 |
i++ |
|
676 |
} |
|
677 |
} |
|
678 |
|
|
679 |
// optional title: any non-newline sequence enclosed in '"() alone on its line |
|
680 |
if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { |
|
681 |
i++ |
|
682 |
titleOffset = i |
|
683 |
|
|
684 |
// look for EOL |
|
685 |
for i < len(data) && data[i] != '\n' && data[i] != '\r' { |
|
686 |
i++ |
|
687 |
} |
|
688 |
if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { |
|
689 |
titleEnd = i + 1 |
|
690 |
} else { |
|
691 |
titleEnd = i |
|
692 |
} |
|
693 |
|
|
694 |
// step back |
|
695 |
i-- |
|
696 |
for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { |
|
697 |
i-- |
|
698 |
} |
|
699 |
if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { |
|
700 |
lineEnd = titleEnd |
|
701 |
titleEnd = i |
|
702 |
} |
|
703 |
} |
|
704 |
|
|
705 |
return |
|
706 |
} |
|
707 |
|
|
708 |
// The first bit of this logic is the same as (*parser).listItem, but the rest |
|
709 |
// is much simpler. This function simply finds the entire block and shifts it |
|
710 |
// over by one tab if it is indeed a block (just returns the line if it's not). |
|
711 |
// blockEnd is the end of the section in the input buffer, and contents is the |
|
712 |
// extracted text that was shifted over one tab. It will need to be rendered at |
|
713 |
// the end of the document. |
|
714 |
func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { |
|
715 |
if i == 0 || len(data) == 0 { |
|
716 |
return |
|
717 |
} |
|
718 |
|
|
719 |
// skip leading whitespace on first line |
|
720 |
for i < len(data) && data[i] == ' ' { |
|
721 |
i++ |
|
722 |
} |
|
723 |
|
|
724 |
blockStart = i |
|
725 |
|
|
726 |
// find the end of the line |
|
727 |
blockEnd = i |
|
728 |
for i < len(data) && data[i-1] != '\n' { |
|
729 |
i++ |
|
730 |
} |
|
731 |
|
|
732 |
// get working buffer |
|
733 |
var raw bytes.Buffer |
|
734 |
|
|
735 |
// put the first line into the working buffer |
|
736 |
raw.Write(data[blockEnd:i]) |
|
737 |
blockEnd = i |
|
738 |
|
|
739 |
// process the following lines |
|
740 |
containsBlankLine := false |
|
741 |
|
|
742 |
gatherLines: |
|
743 |
for blockEnd < len(data) { |
|
744 |
i++ |
|
745 |
|
|
746 |
// find the end of this line |
|
747 |
for i < len(data) && data[i-1] != '\n' { |
|
748 |
i++ |
|
749 |
} |
|
750 |
|
|
751 |
// if it is an empty line, guess that it is part of this item |
|
752 |
// and move on to the next line |
|
753 |
if p.isEmpty(data[blockEnd:i]) > 0 { |
|
754 |
containsBlankLine = true |
|
755 |
blockEnd = i |
|
756 |
continue |
|
757 |
} |
|
758 |
|
|
759 |
n := 0 |
|
760 |
if n = isIndented(data[blockEnd:i], indentSize); n == 0 { |
|
761 |
// this is the end of the block. |
|
762 |
// we don't want to include this last line in the index. |
|
763 |
break gatherLines |
|
764 |
} |
|
765 |
|
|
766 |
// if there were blank lines before this one, insert a new one now |
|
767 |
if containsBlankLine { |
|
768 |
raw.WriteByte('\n') |
|
769 |
containsBlankLine = false |
|
770 |
} |
|
771 |
|
|
772 |
// get rid of that first tab, write to buffer |
|
773 |
raw.Write(data[blockEnd+n : i]) |
|
774 |
hasBlock = true |
|
775 |
|
|
776 |
blockEnd = i |
|
777 |
} |
|
778 |
|
|
779 |
if data[blockEnd-1] != '\n' { |
|
780 |
raw.WriteByte('\n') |
|
781 |
} |
|
782 |
|
|
783 |
contents = raw.Bytes() |
|
784 |
|
|
785 |
return |
|
786 |
} |
|
787 |
|
|
788 |
// |
|
789 |
// |
|
790 |
// Miscellaneous helper functions |
|
791 |
// |
|
792 |
// |
|
793 |
|
|
794 |
// Test if a character is a punctuation symbol. |
|
795 |
// Taken from a private function in regexp in the stdlib. |
|
796 |
func ispunct(c byte) bool { |
|
797 |
for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { |
|
798 |
if c == r { |
|
799 |
return true |
|
800 |
} |
|
801 |
} |
|
802 |
return false |
|
803 |
} |
|
804 |
|
|
805 |
// Test if a character is a whitespace character. |
|
806 |
func isspace(c byte) bool { |
|
807 |
return ishorizontalspace(c) || isverticalspace(c) |
|
808 |
} |
|
809 |
|
|
810 |
// Test if a character is a horizontal whitespace character. |
|
811 |
func ishorizontalspace(c byte) bool { |
|
812 |
return c == ' ' || c == '\t' |
|
813 |
} |
|
814 |
|
|
815 |
// Test if a character is a vertical whitespace character. |
|
816 |
func isverticalspace(c byte) bool { |
|
817 |
return c == '\n' || c == '\r' || c == '\f' || c == '\v' |
|
818 |
} |
|
819 |
|
|
820 |
// Test if a character is letter. |
|
821 |
func isletter(c byte) bool { |
|
822 |
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') |
|
823 |
} |
|
824 |
|
|
825 |
// Test if a character is a letter or a digit. |
|
826 |
// TODO: check when this is looking for ASCII alnum and when it should use unicode |
|
827 |
func isalnum(c byte) bool { |
|
828 |
return (c >= '0' && c <= '9') || isletter(c) |
|
829 |
} |
|
830 |
|
|
831 |
// Replace tab characters with spaces, aligning to the next TAB_SIZE column. |
|
832 |
// always ends output with a newline |
|
833 |
func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { |
|
834 |
// first, check for common cases: no tabs, or only tabs at beginning of line |
|
835 |
i, prefix := 0, 0 |
|
836 |
slowcase := false |
|
837 |
for i = 0; i < len(line); i++ { |
|
838 |
if line[i] == '\t' { |
|
839 |
if prefix == i { |
|
840 |
prefix++ |
|
841 |
} else { |
|
842 |
slowcase = true |
|
843 |
break |
|
844 |
} |
|
845 |
} |
|
846 |
} |
|
847 |
|
|
848 |
// no need to decode runes if all tabs are at the beginning of the line |
|
849 |
if !slowcase { |
|
850 |
for i = 0; i < prefix*tabSize; i++ { |
|
851 |
out.WriteByte(' ') |
|
852 |
} |
|
853 |
out.Write(line[prefix:]) |
|
854 |
return |
|
855 |
} |
|
856 |
|
|
857 |
// the slow case: we need to count runes to figure out how |
|
858 |
// many spaces to insert for each tab |
|
859 |
column := 0 |
|
860 |
i = 0 |
|
861 |
for i < len(line) { |
|
862 |
start := i |
|
863 |
for i < len(line) && line[i] != '\t' { |
|
864 |
_, size := utf8.DecodeRune(line[i:]) |
|
865 |
i += size |
|
866 |
column++ |
|
867 |
} |
|
868 |
|
|
869 |
if i > start { |
|
870 |
out.Write(line[start:i]) |
|
871 |
} |
|
872 |
|
|
873 |
if i >= len(line) { |
|
874 |
break |
|
875 |
} |
|
876 |
|
|
877 |
for { |
|
878 |
out.WriteByte(' ') |
|
879 |
column++ |
|
880 |
if column%tabSize == 0 { |
|
881 |
break |
|
882 |
} |
|
883 |
} |
|
884 |
|
|
885 |
i++ |
|
886 |
} |
|
887 |
} |
|
888 |
|
|
889 |
// Find if a line counts as indented or not. |
|
890 |
// Returns number of characters the indent is (0 = not indented). |
|
891 |
func isIndented(data []byte, indentSize int) int { |
|
892 |
if len(data) == 0 { |
|
893 |
return 0 |
|
894 |
} |
|
895 |
if data[0] == '\t' { |
|
896 |
return 1 |
|
897 |
} |
|
898 |
if len(data) < indentSize { |
|
899 |
return 0 |
|
900 |
} |
|
901 |
for i := 0; i < indentSize; i++ { |
|
902 |
if data[i] != ' ' { |
|
903 |
return 0 |
|
904 |
} |
|
905 |
} |
|
906 |
return indentSize |
|
907 |
} |
|
908 |
|
|
909 |
// Create a url-safe slug for fragments |
|
910 |
func slugify(in []byte) []byte { |
|
911 |
if len(in) == 0 { |
|
912 |
return in |
|
913 |
} |
|
914 |
out := make([]byte, 0, len(in)) |
|
915 |
sym := false |
|
916 |
|
|
917 |
for _, ch := range in { |
|
918 |
if isalnum(ch) { |
|
919 |
sym = false |
|
920 |
out = append(out, ch) |
|
921 |
} else if sym { |
|
922 |
continue |
|
923 |
} else { |
|
924 |
out = append(out, '-') |
|
925 |
sym = true |
|
926 |
} |
|
927 |
} |
|
928 |
var a, b int |
|
929 |
var ch byte |
|
930 |
for a, ch = range out { |
|
931 |
if ch != '-' { |
|
932 |
break |
|
933 |
} |
|
934 |
} |
|
935 |
for b = len(out) - 1; b > 0; b-- { |
|
936 |
if out[b] != '-' { |
|
937 |
break |
|
938 |
} |
|
939 |
} |
|
940 |
return out[a : b+1] |
|
941 |
} |