root/HTTP/markdown.lua

Revision 1443 (checked in by rsz, 2 months ago)

cleanup

Line 
1 #!/usr/bin/env lua
2
3 --[[
4 # markdown.lua -- version 0.32
5
6 <http://www.frykholm.se/files/markdown.lua>
7
8 **Author:** Niklas Frykholm, <niklas@frykholm.se> 
9 **Date:** 31 May 2008
10
11 This is an implementation of the popular text markup language Markdown in pure Lua.
12 Markdown can convert documents written in a simple and easy to read text format
13 to well-formatted HTML. For a more thourough description of Markdown and the Markdown
14 syntax, see <http://daringfireball.net/projects/markdown>.
15
16 The original Markdown source is written in Perl and makes heavy use of advanced
17 regular expression techniques (such as negative look-ahead, etc) which are not available
18 in Lua's simple regex engine. Therefore this Lua port has been rewritten from the ground
19 up. It is probably not completely bug free. If you notice any bugs, please report them to
20 me. A unit test that exposes the error is helpful.
21
22 ## Usage
23
24     require "markdown"
25     markdown(source)
26
27 ``markdown.lua`` exposes a single global function named ``markdown(s)`` which applies the
28 Markdown transformation to the specified string.
29
30 ``markdown.lua`` can also be used directly from the command line:
31
32         lua markdown.lua test.md
33
34 Creates a file ``test.html`` with the converted content of ``test.md``. Run:
35
36     lua markdown.lua -h
37
38 For a description of the command-line options.
39
40 ``markdown.lua`` uses the same license as Lua, the MIT license.
41
42 ## License
43
44 Copyright &copy; 2008 Niklas Frykholm.
45
46 Permission is hereby granted, free of charge, to any person obtaining a copy of this
47 software and associated documentation files (the "Software"), to deal in the Software
48 without restriction, including without limitation the rights to use, copy, modify, merge,
49 publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
50 to whom the Software is furnished to do so, subject to the following conditions:
51
52 The above copyright notice and this permission notice shall be included in all copies
53 or substantial portions of the Software.
54
55 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
56 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
57 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
58 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
59 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
60 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
61 THE SOFTWARE.
62
63 ## Version history
64
65 -       **0.32** -- 31 May 2008
66         - Fix for links containing brackets
67 -       **0.31** -- 1 Mar 2008
68         -       Fix for link definitions followed by spaces
69 -       **0.30** -- 25 Feb 2008
70         -       Consistent behavior with Markdown when the same link reference is reused
71 -       **0.29** -- 24 Feb 2008
72         -       Fix for <pre> blocks with spaces in them
73 -       **0.28** -- 18 Feb 2008
74         -       Fix for link encoding
75 -       **0.27** -- 14 Feb 2008
76         -       Fix for link database links with ()
77 -       **0.26** -- 06 Feb 2008
78         -       Fix for nested italic and bold markers
79 -       **0.25** -- 24 Jan 2008
80         -       Fix for encoding of naked <
81 -       **0.24** -- 21 Jan 2008
82         -       Fix for link behavior.
83 -       **0.23** -- 10 Jan 2008
84         -       Fix for a regression bug in longer expressions in italic or bold.
85 -       **0.22** -- 27 Dec 2007
86         -       Fix for crash when processing blocks with a percent sign in them.
87 -       **0.21** -- 27 Dec 2007
88         -       Fix for combined strong and emphasis tags
89 -       **0.20** -- 13 Oct 2007
90         -       Fix for < as well in image titles, now matches Dingus behavior
91 -       **0.19** -- 28 Sep 2007
92         -       Fix for quotation marks " and ampersands & in link and image titles.
93 -       **0.18** -- 28 Jul 2007
94         -       Does not crash on unmatched tags (behaves like standard markdown)
95 -       **0.17** -- 12 Apr 2007
96         -       Fix for links with %20 in them.
97 -       **0.16** -- 12 Apr 2007
98         -       Do not require arg global to exist.
99 -       **0.15** -- 28 Aug 2006
100         -       Better handling of links with underscores in them.
101 -       **0.14** -- 22 Aug 2006
102         -       Bug for *`foo()`*
103 -       **0.13** -- 12 Aug 2006
104         -       Added -l option for including stylesheet inline in document.
105         -       Fixed bug in -s flag.
106         -       Fixed emphasis bug.
107 -       **0.12** -- 15 May 2006
108         -       Fixed several bugs to comply with MarkdownTest 1.0 <http://six.pairlist.net/pipermail/markdown-discuss/2004-December/000909.html>
109 -       **0.11** -- 12 May 2006
110         -       Fixed bug for escaping `*` and `_` inside code spans.
111         -       Added license terms.
112         -       Changed join() to table.concat().
113 -       **0.10** -- 3 May 2006
114         -       Initial public release.
115
116 // Niklas
117 ]]
118
119
120 -- Set up a table for holding local functions to avoid polluting the global namespace
121 local M = {}
122 local MT = {__index = _G}
123 setmetatable(M, MT)
124 setfenv(1, M)
125
126 ----------------------------------------------------------------------
127 -- Utility functions
128 ----------------------------------------------------------------------
129
130 -- Locks table t from changes, writes an error if someone attempts to change the table.
131 -- This is useful for detecting variables that have "accidently" been made global. Something
132 -- I tend to do all too much.
133 function lock(t)
134         function lock_new_index(t, k, v)
135                 error("module has been locked -- " .. k .. " must be declared local", 2)
136         end
137
138         local mt = {__newindex = lock_new_index}
139         if getmetatable(t) then mt.__index = getmetatable(t).__index end
140         setmetatable(t, mt)
141 end
142
143 -- Returns the result of mapping the values in table t through the function f
144 function map(t, f)
145         local out = {}
146         for k,v in pairs(t) do out[k] = f(v,k) end
147         return out
148 end
149
150 -- The identity function, useful as a placeholder.
151 function identity(text) return text end
152
153 -- Functional style if statement. (NOTE: no short circuit evaluation)
154 function iff(t, a, b) if t then return a else return b end end
155
156 -- Splits the text into an array of separate lines.
157 function split(text, sep)
158         sep = sep or "\n"
159         local lines = {}
160         local pos = 1
161         while true do
162                 local b,e = text:find(sep, pos)
163                 if not b then table.insert(lines, text:sub(pos)) break end
164                 table.insert(lines, text:sub(pos, b-1))
165                 pos = e + 1
166         end
167         return lines
168 end
169
170 -- Converts tabs to spaces
171 function detab(text)
172         local tab_width = 4
173         local function rep(match)
174                 local spaces = -match:len()
175                 while spaces<1 do spaces = spaces + tab_width end
176                 return match .. string.rep(" ", spaces)
177         end
178         text = text:gsub("([^\n]-)\t", rep)
179         return text
180 end
181
182 -- Applies string.find for every pattern in the list and returns the first match
183 function find_first(s, patterns, index)
184         local res = {}
185         for _,p in ipairs(patterns) do
186                 local match = {s:find(p, index)}
187                 if #match>0 and (#res==0 or match[1] < res[1]) then res = match end
188         end
189         return unpack(res)
190 end
191
192 -- If a replacement array is specified, the range [start, stop] in the array is replaced
193 -- with the replacement array and the resulting array is returned. Without a replacement
194 -- array the section of the array between start and stop is returned.
195 function splice(array, start, stop, replacement)
196         if replacement then
197                 local n = stop - start + 1
198                 while n > 0 do
199                         table.remove(array, start)
200                         n = n - 1
201                 end
202                 for i,v in ipairs(replacement) do
203                         table.insert(array, start, v)
204                 end
205                 return array
206         else
207                 local res = {}
208                 for i = start,stop do
209                         table.insert(res, array[i])
210                 end
211                 return res
212         end
213 end
214
215 -- Outdents the text one step.
216 function outdent(text)
217         text = "\n" .. text
218         text = text:gsub("\n  ? ? ?", "\n")
219         text = text:sub(2)
220         return text
221 end
222
223 -- Indents the text one step.
224 function indent(text)
225         text = text:gsub("\n", "\n    ")
226         return text
227 end
228
229 -- Does a simple tokenization of html data. Returns the data as a list of tokens.
230 -- Each token is a table with a type field (which is either "tag" or "text") and
231 -- a text field (which contains the original token data).
232 function tokenize_html(html)
233         local tokens = {}
234         local pos = 1
235         while true do
236                 local start = find_first(html, {"<!%-%-", "<[a-z/!$]", "<%?"}, pos)
237                 if not start then
238                         table.insert(tokens, {type="text", text=html:sub(pos)})
239                         break
240                 end
241                 if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end
242                
243                 local _, stop
244                 if html:match("^<!%-%-", start) then
245                         _,stop = html:find("%-%->", start)
246                 elseif html:match("^<%?", start) then
247                         _,stop = html:find("?>", start)
248                 else
249                         _,stop = html:find("%b<>", start)
250                 end
251                 if not stop then
252                         -- error("Could not match html tag " .. html:sub(start,start+30))
253                         table.insert(tokens, {type="text", text=html:sub(start, start)})
254                         pos = start + 1
255                 else
256                         table.insert(tokens, {type="tag", text=html:sub(start, stop)})
257                         pos = stop + 1
258                 end
259         end
260         return tokens
261 end
262
263 ----------------------------------------------------------------------
264 -- Hash
265 ----------------------------------------------------------------------
266
267 -- This is used to "hash" data into alphanumeric strings that are unique
268 -- in the document. (Note that this is not cryptographic hash, the hash
269 -- function is not one-way.) The hash procedure is used to protect parts
270 -- of the document from further processing.
271
272 local HASH = {
273         -- Has the hash been inited.
274         inited = false,
275        
276         -- The unique string prepended to all hash values. This is to ensure
277         -- that hash values do not accidently coincide with an actual existing
278         -- string in the document.
279         identifier = "",
280        
281         -- Counter that counts up for each new hash instance.
282         counter = 0,
283        
284         -- Hash table.
285         table = {}
286 }
287
288 -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere
289 -- in the text.
290 function init_hash(text)
291         HASH.inited = true
292         HASH.identifier = ""
293         HASH.counter = 0
294         HASH.table = {}
295        
296         local s = "HASH"
297         local counter = 0
298         local id
299         while true do
300                 id  = s .. counter
301                 if not text:find(id, 1, true) then break end
302                 counter = counter + 1
303         end
304         HASH.identifier = id
305 end
306
307 -- Returns the hashed value for s.
308 function hash(s)
309         assert(HASH.inited)
310         if not HASH.table[s] then
311                 HASH.counter = HASH.counter + 1
312                 local id = HASH.identifier .. HASH.counter .. "X"
313                 HASH.table[s] = id
314         end
315         return HASH.table[s]
316 end
317
318 ----------------------------------------------------------------------
319 -- Protection
320 ----------------------------------------------------------------------
321
322 -- The protection module is used to "protect" parts of a document
323 -- so that they are not modified by subsequent processing steps.
324 -- Protected parts are saved in a table for later unprotection
325
326 -- Protection data
327 local PD = {
328         -- Saved blocks that have been converted
329         blocks = {},
330
331         -- Block level tags that will be protected
332         tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote",
333         "pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset",
334         "iframe", "math", "ins", "del"}
335 }
336
337 -- Pattern for matching a block tag that begins and ends in the leftmost
338 -- column and may contain indented subtags, i.e.
339 -- <div>
340 --    A nested block.
341 --    <div>
342 --        Nested data.
343 --     </div>
344 -- </div>
345 function block_pattern(tag)
346         return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n"
347 end
348
349 -- Pattern for matching a block tag that begins and ends with a newline
350 function line_pattern(tag)
351         return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n"
352 end
353
354 -- Protects the range of characters from start to stop in the text and
355 -- returns the protected string.
356 function protect_range(text, start, stop)
357         local s = text:sub(start, stop)
358         local h = hash(s)
359         PD.blocks[h] = s
360         text = text:sub(1,start) .. h .. text:sub(stop)
361         return text
362 end
363
364 -- Protect every part of the text that matches any of the patterns. The first
365 -- matching pattern is protected first, etc.
366 function protect_matches(text, patterns)
367         while true do
368                 local start, stop = find_first(text, patterns)
369                 if not start then break end
370                 text = protect_range(text, start, stop)
371         end
372         return text
373 end
374
375 -- Protects blocklevel tags in the specified text
376 function protect(text)
377         -- First protect potentially nested block tags
378         text = protect_matches(text, map(PD.tags, block_pattern))
379         -- Then protect block tags at the line level.
380         text = protect_matches(text, map(PD.tags, line_pattern))
381         -- Protect <hr> and comment tags
382         text = protect_matches(text, {"\n<hr[^>]->[ \t]*\n"})
383         text = protect_matches(text, {"\n<!%-%-.-%-%->[ \t]*\n"})
384         return text
385 end
386
387 -- Returns true if the string s is a hash resulting from protection
388 function is_protected(s)
389         return PD.blocks[s]
390 end
391
392 -- Unprotects the specified text by expanding all the nonces
393 function unprotect(text)
394         for k,v in pairs(PD.blocks) do
395                 v = v:gsub("%%", "%%%%")
396                 text = text:gsub(k, v)
397         end
398         return text
399 end
400
401
402 ----------------------------------------------------------------------
403 -- Block transform
404 ----------------------------------------------------------------------
405
406 -- The block transform functions transform the text on the block level.
407 -- They work with the text as an array of lines rather than as individual
408 -- characters.
409
410 -- Returns true if the line is a ruler of (char) characters.
411 -- The line must contain at least three char characters and contain only spaces and
412 -- char characters.
413 function is_ruler_of(line, char)
414         if not line:match("^[ %" .. char .. "]*$") then return false end
415         if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end
416         return true
417 end
418
419 -- Identifies the block level formatting present in the line
420 function classify(line)
421         local info = {line = line, text = line}
422        
423         if line:match("^    ") then
424                 info.type = "indented"
425                 info.outdented = line:sub(5)
426                 return info
427         end
428        
429         for _,c in ipairs({'*', '-', '_', '='}) do
430                 if is_ruler_of(line, c) then
431                         info.type = "ruler"
432                         info.ruler_char = c
433                         return info
434                 end
435         end
436        
437         if line == "" then
438                 info.type = "blank"
439                 return info
440         end
441        
442         if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then
443                 local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$")
444                 info.type = "header"
445                 info.level = m1:len()
446                 info.text = m2
447                 return info
448         end
449        
450         if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then
451                 local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)")
452                 info.type = "list_item"
453                 info.list_type = "numeric"
454                 info.number = 0 + number
455                 info.text = text
456                 return info
457         end
458        
459         if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then
460                 local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)")
461                 info.type = "list_item"
462                 info.list_type = "bullet"
463                 info.bullet = bullet
464                 info.text= text
465                 return info
466         end
467        
468         if line:match("^>[ \t]?(.*)") then
469                 info.type = "blockquote"
470                 info.text = line:match("^>[ \t]?(.*)")
471                 return info
472         end
473        
474         if is_protected(line) then
475                 info.type = "raw"
476                 info.html = unprotect(line)
477                 return info
478         end
479        
480         info.type = "normal"
481         return info
482 end
483
484 -- Find headers constisting of a normal line followed by a ruler and converts them to
485 -- header entries.
486 function headers(array)
487         local i = 1
488         while i <= #array - 1 do
489                 if array[i].type  == "normal" and array[i+1].type == "ruler" and
490                         (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then
491                         local info = {line = array[i].line}
492                         info.text = info.line
493                         info.type = "header"
494                         info.level = iff(array[i+1].ruler_char == "=", 1, 2)
495                         table.remove(array, i+1)
496                         array[i] = info
497                 end
498                 i = i + 1
499         end
500         return array
501 end
502
503 -- Find list blocks and convert them to protected data blocks
504 function lists(array, sublist)
505         local function process_list(arr)
506                 local function any_blanks(arr)
507                         for i = 1, #arr do
508                                 if arr[i].type == "blank" then return true end
509                         end
510                         return false
511                 end
512                
513                 local function split_list_items(arr)
514                         local acc = {arr[1]}
515                         local res = {}
516                         for i=2,#arr do
517                                 if arr[i].type == "list_item" then
518                                         table.insert(res, acc)
519                                         acc = {arr[i]}
520                                 else
521                                         table.insert(acc, arr[i])
522                                 end
523                         end
524                         table.insert(res, acc)
525                         return res
526                 end
527                
528                 local function process_list_item(lines, block)
529                         while lines[#lines].type == "blank" do
530                                 table.remove(lines)
531                         end
532                        
533                         local itemtext = lines[1].text
534                         for i=2,#lines do
535                                 itemtext = itemtext .. "\n" .. outdent(lines[i].line)
536                         end
537                         if block then
538                                 itemtext = block_transform(itemtext, true)
539                                 if not itemtext:find("<pre>") then itemtext = indent(itemtext) end
540                                 return "    <li>" .. itemtext .. "</li>"
541                         else
542                                 local lines = split(itemtext)
543                                 lines = map(lines, classify)
544                                 lines = lists(lines, true)
545                                 lines = blocks_to_html(lines, true)
546                                 itemtext = table.concat(lines, "\n")
547                                 if not itemtext:find("<pre>") then itemtext = indent(itemtext) end
548                                 return "    <li>" .. itemtext .. "</li>"
549                         end
550                 end
551                
552                 local block_list = any_blanks(arr)
553                 local items = split_list_items(arr)
554                 local out = ""
555                 for _, item in ipairs(items) do
556                         out = out .. process_list_item(item, block_list) .. "\n"
557                 end
558                 if arr[1].list_type == "numeric" then
559                         return "<ol>\n" .. out .. "</ol>"
560                 else
561                         return "<ul>\n" .. out .. "</ul>"
562                 end
563         end
564        
565         -- Finds the range of lines composing the first list in the array. A list
566         -- starts with (^ list_item) or (blank list_item) and ends with
567         -- (blank* $) or (blank normal).
568         --
569         -- A sublist can start with just (list_item) does not need a blank...
570         local function find_list(array, sublist)
571                 local function find_list_start(array, sublist)
572                         if array[1].type == "list_item" then return 1 end
573                         if sublist then
574                                 for i = 1,#array do
575                                         if array[i].type == "list_item" then return i end
576                                 end
577                         else
578                                 for i = 1, #array-1 do
579                                         if array[i].type == "blank" and array[i+1].type == "list_item" then
580                                                 return i+1
581                                         end
582                                 end
583                         end
584                         return nil
585                 end
586                 local function find_list_end(array, start)
587                         local pos = #array
588                         for i = start, #array-1 do
589                                 if array[i].type == "blank" and array[i+1].type ~= "list_item"
590                                         and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then
591                                         pos = i-1
592                                         break
593                                 end
594                         end
595                         while pos > start and array[pos].type == "blank" do
596                                 pos = pos - 1
597                         end
598                         return pos
599                 end
600                
601                 local start = find_list_start(array, sublist)
602                 if not start then return nil end
603                 return start, find_list_end(array, start)
604         end
605        
606         while true do
607                 local start, stop = find_list(array, sublist)
608                 if not start then break end
609                 local text = process_list(splice(array, start, stop))
610                 local info = {
611                         line = text,
612                         type = "raw",
613                         html = text
614                 }
615                 array = splice(array, start, stop, {info})
616         end
617        
618         -- Convert any remaining list items to normal
619         for _,line in ipairs(array) do
620                 if line.type == "list_item" then line.type = "normal" end
621         end
622        
623         return array
624 end
625
626 -- Find and convert blockquote markers.
627 function blockquotes(lines)
628         local function find_blockquote(lines)
629                 local start
630                 for i,line in ipairs(lines) do
631                         if line.type == "blockquote" then
632                                 start = i
633                                 break
634                         end
635                 end
636                 if not start then return nil end
637                
638                 local stop = #lines
639                 for i = start+1, #lines do
640                         if lines[i].type == "blank" or lines[i].type == "blockquote" then
641                         elseif lines[i].type == "normal" then
642                                 if lines[i-1].type == "blank" then stop = i-1 break end
643                         else
644                                 stop = i-1 break
645                         end
646                 end
647                 while lines[stop].type == "blank" do stop = stop - 1 end
648                 return start, stop
649         end
650        
651         local function process_blockquote(lines)
652                 local raw = lines[1].text
653                 for i = 2,#lines do
654                         raw = raw .. "\n" .. lines[i].text
655                 end
656                 local bt = block_transform(raw)
657                 if not bt:find("<pre>") then bt = indent(bt) end
658                 return "<blockquote>\n    " .. bt ..
659                         "\n</blockquote>"
660         end
661        
662         while true do
663                 local start, stop = find_blockquote(lines)
664                 if not start then break end
665                 local text = process_blockquote(splice(lines, start, stop))
666                 local info = {
667                         line = text,
668                         type = "raw",
669                         html = text
670                 }
671                 lines = splice(lines, start, stop, {info})
672         end
673         return lines
674 end
675
676 -- Find and convert codeblocks.
677 function codeblocks(lines)
678         local function find_codeblock(lines)
679                 local start
680                 for i,line in ipairs(lines) do
681                         if line.type == "indented" then start = i break end
682                 end
683                 if not start then return nil end
684                
685                 local stop = #lines
686                 for i = start+1, #lines do
687                         if lines[i].type ~= "indented" and lines[i].type ~= "blank" then
688                                 stop = i-1
689                                 break
690                         end
691                 end
692                 while lines[stop].type == "blank" do stop = stop - 1 end
693                 return start, stop
694         end
695        
696         local function process_codeblock(lines)
697                 local raw = detab(encode_code(outdent(lines[1].line)))
698                 for i = 2,#lines do
699                         raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line)))
700                 end
701                 return "<pre><code>" .. raw .. "\n</code></pre>"
702         end
703        
704         while true do
705                 local start, stop = find_codeblock(lines)
706                 if not start then break end
707                 local text = process_codeblock(splice(lines, start, stop))
708                 local info = {
709                         line = text,
710                         type = "raw",
711                         html = text
712                 }
713                 lines = splice(lines, start, stop, {info})
714         end
715         return lines
716 end
717
718 -- Convert lines to html code
719 function blocks_to_html(lines, no_paragraphs)
720         local out = {}
721         local i = 1
722         while i <= #lines do
723                 local line = lines[i]
724                 if line.type == "ruler" then
725                         table.insert(out, "<hr/>")
726                 elseif line.type == "raw" then
727                         table.insert(out, line.html)
728                 elseif line.type == "normal" then
729                         local s = line.line
730                        
731                         while i+1 <= #lines and lines[i+1].type == "normal" do
732                                 i = i + 1
733                                 s = s .. "\n" .. lines[i].line
734                         end
735                        
736                         if no_paragraphs then
737                                 table.insert(out, span_transform(s))
738                         else
739                                 table.insert(out, "<p>" .. span_transform(s) .. "</p>")
740                         end
741                 elseif line.type == "header" then
742                         local s = "<h" .. line.level .. ">" .. span_transform(line.text) .. "</h" .. line.level .. ">"
743                         table.insert(out, s)
744                 else
745                         table.insert(out, line.line)
746                 end
747                 i = i + 1
748         end
749         return out
750 end
751
752 -- Perform all the block level transforms
753 function block_transform(text, sublist)
754         local lines = split(text)
755         lines = map(lines, classify)
756         lines = headers(lines)
757         lines = lists(lines, sublist)
758         lines = codeblocks(lines)
759         lines = blockquotes(lines)
760         lines = blocks_to_html(lines)
761         local text = table.concat(lines, "\n")
762         return text
763 end
764
765 -- Debug function for printing a line array to see the result
766 -- of partial transforms.
767 function print_lines(lines)
768         for i, line in ipairs(lines) do
769                 print(i, line.type, line.text or line.line)
770         end
771 end
772
773 ----------------------------------------------------------------------
774 -- Span transform
775 ----------------------------------------------------------------------
776
777 -- Functions for transforming the text at the span level.
778
779 -- These characters may need to be escaped because they have a special
780 -- meaning in markdown.
781 escape_chars = "'\\`*_{}[]()>#+-.!'"
782 escape_table = {}
783
784 function init_escape_table()
785         escape_table = {}
786         for i = 1,#escape_chars do
787                 local c = escape_chars:sub(i,i)
788                 escape_table[c] = hash(c)
789         end
790 end
791
792 -- Adds a new escape to the escape table.
793 function add_escape(text)
794         if not escape_table[text] then
795                 escape_table[text] = hash(text)
796         end
797         return escape_table[text]
798 end     
799
800 -- Escape characters that should not be disturbed by markdown.
801 function escape_special_chars(text)
802         local tokens = tokenize_html(text)
803        
804         local out = ""
805         for _, token in ipairs(tokens) do
806                 local t = token.text
807                 if token.type == "tag" then
808                         -- In tags, encode * and _ so they don't conflict with their use in markdown.
809                         t = t:gsub("%*", escape_table["*"])
810                         t = t:gsub("%_", escape_table["_"])
811                 else
812                         t = encode_backslash_escapes(t)
813                 end
814                 out = out .. t
815         end
816         return out
817 end
818
819 -- Encode backspace-escaped characters in the markdown source.
820 function encode_backslash_escapes(t)
821         for i=1,escape_chars:len() do
822                 local c = escape_chars:sub(i,i)
823                 t = t:gsub("\\%" .. c, escape_table[c])
824         end
825         return t
826 end
827
828 -- Unescape characters that have been encoded.
829 function unescape_special_chars(t)
830         local tin = t
831         for k,v in pairs(escape_table) do
832                 k = k:gsub("%%", "%%%%")
833                 t = t:gsub(v,k)
834         end
835         if t ~= tin then t = unescape_special_chars(t) end
836         return t
837 end
838
839 -- Encode/escape certain characters inside Markdown code runs.
840 -- The point is that in code, these characters are literals,
841 -- and lose their special Markdown meanings.
842 function encode_code(s)
843         s = s:gsub("%&", "&amp;")
844         s = s:gsub("<", "&lt;")
845         s = s:gsub(">", "&gt;")
846         for k,v in pairs(escape_table) do
847                 s = s:gsub("%"..k, v)
848         end
849         return s
850 end
851
852 -- Handle backtick blocks.
853 function code_spans(s)
854         s = s:gsub("\\\\", escape_table["\\"])
855         s = s:gsub("\\`", escape_table["`"])
856
857         local pos = 1
858         while true do
859                 local start, stop = s:find("`+", pos)
860                 if not start then return s end
861                 local count = stop - start + 1
862                 -- Find a matching numbert of backticks
863                 local estart, estop = s:find(string.rep("`", count), stop+1)
864                 local brstart = s:find("\n", stop+1)
865                 if estart and (not brstart or estart < brstart) then
866                         local code = s:sub(stop+1, estart-1)
867                         code = code:gsub("^[ \t]+", "")
868                         code = code:gsub("[ \t]+$", "")
869                         code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"])
870                         code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"])
871                         code = "<code>" .. encode_code(code) .. "</code>"
872                         code = add_escape(code)
873                         s = s:sub(1, start-1) .. code .. s:sub(estop+1)
874                         pos = start + code:len()
875                 else
876                         pos = stop + 1
877                 end
878         end
879         return s
880 end
881
882 -- Encode alt text... enodes &, and ".
883 function encode_alt(s)
884         if not s then return s end
885         s = s:gsub('&', '&amp;')
886         s = s:gsub('"', '&quot;')
887         s = s:gsub('<', '&lt;')
888         return s
889 end
890
891 -- Handle image references
892 function images(text)
893         local function reference_link(alt, id)
894                 alt = encode_alt(alt:match("%b[]"):sub(2,-2))
895                 id = id:match("%[(.*)%]"):lower()
896                 if id == "" then id = text:lower() end
897                 link_database[id] = link_database[id] or {}
898                 if not link_database[id].url then return nil end
899                 local url = link_database[id].url or id
900                 url = encode_alt(url)
901                 local title = encode_alt(link_database[id].title)
902                 if title then title = " title=\"" .. title .. "\"" else title = "" end
903                 return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>")
904         end
905        
906         local function inline_link(alt, link)
907                 alt = encode_alt(alt:match("%b[]"):sub(2,-2))
908                 local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]")
909                 url = url or link:match("%(<?(.-)>?%)")
910                 url = encode_alt(url)
911                 title = encode_alt(title)
912                 if title then
913                         return add_escape('<img src="' .. url .. '" alt="' .. alt .. '" title="' .. title .. '"/>')
914                 else
915                         return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>')
916                 end
917         end
918        
919         text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link)
920         text = text:gsub("!(%b[])(%b())", inline_link)
921         return text
922 end
923
924 -- Handle anchor references
925 function anchors(text)
926         local function reference_link(text, id)
927                 text = text:match("%b[]"):sub(2,-2)
928                 id = id:match("%b[]"):sub(2,-2):lower()
929                 if id == "" then id = text:lower() end
930                 link_database[id] = link_database[id] or {}
931                 if not link_database[id].url then return nil end
932                 local url = link_database[id].url or id
933                 url = encode_alt(url)
934                 local title = encode_alt(link_database[id].title)
935                 if title then title = " title=\"" .. title .. "\"" else title = "" end
936                 return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>")
937         end
938        
939         local function inline_link(text, link)
940                 text = text:match("%b[]"):sub(2,-2)
941                 local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]")
942                 title = encode_alt(title)
943                 url  = url or  link:match("%(<?(.-)>?%)") or ""
944                 url = encode_alt(url)
945                 if title then
946                         return add_escape("<a href=\"" .. url .. "\" title=\"" .. title .. "\">") .. text .. "</a>"
947                 else
948                         return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>")
949                 end
950         end
951        
952         text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link)
953         text = text:gsub("(%b[])(%b())", inline_link)
954         return text
955 end
956
957 -- Handle auto links, i.e. <http://www.google.com/>.
958 function auto_links(text)
959         local function link(s)
960                 return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>"
961         end
962         -- Encode chars as a mix of dec and hex entitites to (perhaps) fool
963         -- spambots.
964         local function encode_email_address(s)
965                 -- Use a deterministic encoding to make unit testing possible.
966                 -- Code 45% hex, 45% dec, 10% plain.
967                 local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45}
968                 local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45}
969                 local plain = {code = function(c) return c end, count = 0, rate = 0.1}
970                 local codes = {hex, dec, plain}
971                 local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end
972                
973                 local out = ""
974                 for i = 1,s:len() do
975                         for _,code in ipairs(codes) do code.count = code.count + code.rate end
976                         if codes[1].count < codes[2].count then swap(codes,1,2) end
977                         if codes[2].count < codes[3].count then swap(codes,2,3) end
978                         if codes[1].count < codes[2].count then swap(codes,1,2) end
979                        
980                         local code = codes[1]
981                         local c = s:sub(i,i)
982                         -- Force encoding of "@" to make email address more invisible.
983                         if c == "@" and code == plain then code = codes[2] end
984                         out = out .. code.code(c)
985                         code.count = code.count - 1
986                 end     
987                 return out
988         end
989         local function mail(s)
990                 s = unescape_special_chars(s)
991                 local address = encode_email_address("mailto:" .. s)
992                 local text = encode_email_address(s)
993                 return add_escape("<a href=\"" .. address .. "\">") .. text .