Skip to content

Commit 6d5b96e

Browse files
author
Waylan Limberg
committed
Added better documentation to pre & post proccessors and fixed inheritance issues.
1 parent 393ef60 commit 6d5b96e

File tree

1 file changed

+128
-90
lines changed

1 file changed

+128
-90
lines changed

markdown.py

Lines changed: 128 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -405,85 +405,43 @@ def toxml(self):
405405
Preprocessors munge source text before we start doing anything too
406406
complicated.
407407
408-
Each preprocessor implements a "run" method that takes a pointer to a
409-
list of lines of the document, modifies it as necessary and returns
410-
either the same pointer or a pointer to a new list. Preprocessors
411-
must extend markdown.Preprocessor.
408+
There are two types of preprocessors: TextPreprocessor and Preprocessor.
412409
413410
"""
414411

415412

416-
class Preprocessor:
417-
pass
418-
419-
420-
class HeaderPreprocessor (Preprocessor):
421-
422-
"""
423-
Replaces underlined headers with hashed headers to avoid
424-
the nead for lookahead later.
425-
"""
426-
427-
def run (self, lines):
428-
429-
i = -1
430-
while i+1 < len(lines):
431-
i = i+1
432-
if not lines[i].strip():
433-
continue
434-
435-
if lines[i].startswith("#"):
436-
lines.insert(i+1, "\n")
437-
438-
if (i+1 <= len(lines)
439-
and lines[i+1]
440-
and lines[i+1][0] in ['-', '=']):
441-
442-
underline = lines[i+1].strip()
443-
444-
if underline == "="*len(underline):
445-
lines[i] = "# " + lines[i].strip()
446-
lines[i+1] = ""
447-
elif underline == "-"*len(underline):
448-
lines[i] = "## " + lines[i].strip()
449-
lines[i+1] = ""
450-
451-
return lines
452-
453-
HEADER_PREPROCESSOR = HeaderPreprocessor()
454-
455-
class LinePreprocessor (Preprocessor):
456-
"""Deals with HR lines (needs to be done before processing lists)"""
413+
class TextPreprocessor:
414+
'''
415+
TextPreprocessors are run before the text is broken into lines.
416+
417+
Each TextPreprocessor implements a "run" method that takes a pointer to a
418+
text string of the document, modifies it as necessary and returns
419+
either the same pointer or a pointer to a new string.
420+
421+
TextPreprocessors must extend markdown.TextPreprocessor.
422+
'''
457423

458-
blockquote_re = re.compile(r'^(> )+')
424+
def run(self, text):
425+
pass
459426

460-
def run (self, lines):
461-
for i in range(len(lines)):
462-
prefix = ''
463-
m = self.blockquote_re.search(lines[i])
464-
if m : prefix = m.group(0)
465-
if self._isLine(lines[i][len(prefix):]):
466-
lines[i] = prefix + self.stash.store("<hr />", safe=True)
467-
return lines
468427

469-
def _isLine(self, block):
470-
"""Determines if a block should be replaced with an <HR>"""
471-
if block.startswith(" "): return 0 # a code block
472-
text = "".join([x for x in block if not x.isspace()])
473-
if len(text) <= 2:
474-
return 0
475-
for pattern in ['isline1', 'isline2', 'isline3']:
476-
m = RE.regExp[pattern].match(text)
477-
if (m and m.group(1)):
478-
return 1
479-
else:
480-
return 0
428+
class Preprocessor:
429+
'''
430+
Preprocessors are run after the text is broken into lines.
481431
482-
LINE_PREPROCESSOR = LinePreprocessor()
432+
Each preprocessor implements a "run" method that takes a pointer to a
433+
list of lines of the document, modifies it as necessary and returns
434+
either the same pointer or a pointer to a new list.
435+
436+
Preprocessors must extend markdown.Preprocessor.
437+
'''
483438

439+
def run(self, lines):
440+
pass
441+
484442

485-
class HtmlBlockPreprocessor (Preprocessor):
486-
"""Removes html blocks from self.lines"""
443+
class HtmlBlockPreprocessor(TextPreprocessor):
444+
"""Removes html blocks from the source text and stores it."""
487445

488446
def _get_left_tag(self, block):
489447
return block[1:].replace(">", " ", 1).split()[0].lower()
@@ -510,7 +468,7 @@ def _is_oneliner(self, tag):
510468
return (tag in ['hr', 'hr/'])
511469

512470

513-
def run (self, text):
471+
def run(self, text):
514472

515473
new_blocks = []
516474
text = text.split("\n\n")
@@ -580,7 +538,76 @@ def run (self, text):
580538
HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
581539

582540

583-
class ReferencePreprocessor (Preprocessor):
541+
class HeaderPreprocessor(Preprocessor):
542+
543+
"""
544+
Replaces underlined headers with hashed headers to avoid
545+
the nead for lookahead later.
546+
"""
547+
548+
def run (self, lines):
549+
550+
i = -1
551+
while i+1 < len(lines):
552+
i = i+1
553+
if not lines[i].strip():
554+
continue
555+
556+
if lines[i].startswith("#"):
557+
lines.insert(i+1, "\n")
558+
559+
if (i+1 <= len(lines)
560+
and lines[i+1]
561+
and lines[i+1][0] in ['-', '=']):
562+
563+
underline = lines[i+1].strip()
564+
565+
if underline == "="*len(underline):
566+
lines[i] = "# " + lines[i].strip()
567+
lines[i+1] = ""
568+
elif underline == "-"*len(underline):
569+
lines[i] = "## " + lines[i].strip()
570+
lines[i+1] = ""
571+
572+
return lines
573+
574+
HEADER_PREPROCESSOR = HeaderPreprocessor()
575+
576+
577+
class LinePreprocessor(Preprocessor):
578+
"""Deals with HR lines (needs to be done before processing lists)"""
579+
580+
blockquote_re = re.compile(r'^(> )+')
581+
582+
def run (self, lines):
583+
for i in range(len(lines)):
584+
prefix = ''
585+
m = self.blockquote_re.search(lines[i])
586+
if m : prefix = m.group(0)
587+
if self._isLine(lines[i][len(prefix):]):
588+
lines[i] = prefix + self.stash.store("<hr />", safe=True)
589+
return lines
590+
591+
def _isLine(self, block):
592+
"""Determines if a block should be replaced with an <HR>"""
593+
if block.startswith(" "): return 0 # a code block
594+
text = "".join([x for x in block if not x.isspace()])
595+
if len(text) <= 2:
596+
return 0
597+
for pattern in ['isline1', 'isline2', 'isline3']:
598+
m = RE.regExp[pattern].match(text)
599+
if (m and m.group(1)):
600+
return 1
601+
else:
602+
return 0
603+
604+
LINE_PREPROCESSOR = LinePreprocessor()
605+
606+
607+
class ReferencePreprocessor(Preprocessor):
608+
'''
609+
Removes reference definitions from the text and stores them for later use.
610+
'''
584611

585612
def run (self, lines):
586613

@@ -889,36 +916,47 @@ def handleMatch(self, m, doc):
889916
======================================================================
890917
891918
Markdown also allows post-processors, which are similar to
892-
preprocessors in that they need to implement a "run" method. Unlike
893-
pre-processors, they take a NanoDom document as a parameter and work
894-
with that.
895-
896-
Post-Processor should extend markdown.Postprocessor.
919+
preprocessors in that they need to implement a "run" method. However,
920+
they are run after core processing.
897921
898-
There are currently no standard post-processors, but the footnote
899-
extension below uses one.
922+
There are two types of post-processors: Postprocessor and TextPostprocessor
900923
"""
901924

925+
902926
class Postprocessor:
903-
pass
927+
'''
928+
Postprocessors are run before the dom it converted back into text.
929+
930+
Each Postprocessor implements a "run" method that takes a pointer to a
931+
NanoDom document, modifies it as necessary and returns a NanoDom
932+
document.
933+
934+
Postprocessors must extend markdown.Postprocessor.
904935
936+
There are currently no standard post-processors, but the footnote
937+
extension uses one.
938+
'''
905939

906-
"""
907-
======================================================================
908-
======================== TEXT-POST-PROCESSORS ========================
909-
======================================================================
940+
def run(self, dom):
941+
pass
910942

911-
Markdown also allows text-post-processors, which are similar to
912-
textpreprocessors in that they need to implement a "run" method.
913-
Unlike post-processors, they take a text string as a parameter and
914-
should return a string.
915943

916-
Text-Post-Processors should extend markdown.Postprocessor.
917944

918-
"""
945+
class TextPostprocessor:
946+
'''
947+
TextPostprocessors are run after the dom it converted back into text.
948+
949+
Each TextPostprocessor implements a "run" method that takes a pointer to a
950+
text string, modifies it as necessary and returns a text string.
951+
952+
TextPostprocessors must extend markdown.TextPostprocessor.
953+
'''
954+
955+
def run(self, text):
956+
pass
919957

920958

921-
class RawHtmlTextPostprocessor(Postprocessor):
959+
class RawHtmlTextPostprocessor(TextPostprocessor):
922960

923961
def __init__(self):
924962
pass

0 commit comments

Comments
 (0)