haskell学习笔记:从Python源代码生成类图(继承关系图)

我想搞一个通过reST直接写论文的东东,想法如下:

digraph name{
    node[shape=box]
    reST -> latex -> pdf
}

而现状是: 从latex到pdf有了 模板 ,而Sphinx或者直接docutils都可以生成pdf, 但还不能直接生成模板所需要的那个样子(主要是表格和插图引用有问题),所以想系统的学习下docutils的源代码好hack,找了一个旧版本的 Presentation ,把代码也从 sf.net 下面搞到本地了,然后搞明白其关键的数据结构是 文档树 也就是 nodes.py所定义的数据结构。

说了半天还没没有到正题,nodes.py有2205行,132个类定义,我想搞一个UML类图反映继承关系,正好用用这几天刚学的haskell来测试下。哦, 忘了提一个附加背景知识了, plantuml 利用 graphviz 可以通过文本 转换UML图,那么我的问题就分解为如何利用haskell来生成plantuml所需要的输入文本。

代码如下:

import Text.Regex
import Data.List(dropWhileEnd,isPrefixOf)

pytouml :: String -> String

pytouml = removeEmptyLines. unlines .  map transform . (filter isclassdef) . lines where
  removeEmptyLines = unlines. ( filter ("" /= ) ) . lines
  isclassdef =  ( "class " `isPrefixOf` )
  transform  = \line ->
    let (classname, parents) = getClassNameAndParents line
    in  unlines $ map  ( ( \x ->  x ++ " <|-- " ++ myQuote classname ) . myQuote )
       parents where
         myQuote s = "\"" ++ s ++ "\""



getClassNameAndParents :: String -> (String, [String])
getClassNameAndParents s = let idpattern = "([a-zA-Z_][a-zA-Z0-9_]*)" in
  let reclassname = mkRegex $ "^ *class +" ++ idpattern
  in let m1 = matchRegexAll reclassname s in case m1 of
    Nothing -> ("", [])
    Just (_,_,after,m:_) ->
      let classname = m
          remaining = after
          reparents = mkRegex  " *(\\(.+\\))? *:" in
      let m2 = matchRegex reparents remaining in case m2 of
        Nothing -> ("", [])
        Just [[]] -> (classname,[])
        Just (m3:_) ->
          let parents = splitRegex (mkRegex " *, *" ) .
                        dropWhileEnd (' ' ==) .
                        dropWhile (' ' == ) .init.tail $ m3
          in (classname, parents)

利用GHCi生成output.txt:

*Main Text.Regex Data.List> :l ~/haskell/pytouml.hs
[1 of 1] Compiling Main             ( /home/somebody/hasell/pytouml.hs, interpreted )
Ok, modules loaded: Main.
*Main Text.Regex Data.List> s <- readFile "nodes.py"
*Main Text.Regex Data.List> writeFile "out.txt" $ pytouml s

生成的UML效果图

"object" <|--"Node"
"Node" <|--"Text"
"reprunicode" <|--"Text"
"Node" <|--"Element"
"Element" <|--"TextElement"
"TextElement" <|--"FixedTextElement"
"PreBibliographic" <|--"Decorative"
"Body" <|--"General"
"Body" <|--"Sequential"
"Body" <|--"Admonition"
"Body" <|--"Special"
"PreBibliographic" <|--"Invisible"
"Resolvable" <|--"Referential"
"Resolvable" <|--"Targetable"
"Root" <|--"document"
"Structural" <|--"document"
"Element" <|--"document"
"Titular" <|--"title"
"PreBibliographic" <|--"title"
"TextElement" <|--"title"
"Titular" <|--"subtitle"
"PreBibliographic" <|--"subtitle"
"TextElement" <|--"subtitle"
"Titular" <|--"rubric"
"TextElement" <|--"rubric"
"Bibliographic" <|--"docinfo"
"Element" <|--"docinfo"
"Bibliographic" <|--"author"
"TextElement" <|--"author"
"Bibliographic" <|--"authors"
"Element" <|--"authors"
"Bibliographic" <|--"organization"
"TextElement" <|--"organization"
"Bibliographic" <|--"address"
"FixedTextElement" <|--"address"
"Bibliographic" <|--"contact"
"TextElement" <|--"contact"
"Bibliographic" <|--"version"
"TextElement" <|--"version"
"Bibliographic" <|--"revision"
"TextElement" <|--"revision"
"Bibliographic" <|--"status"
"TextElement" <|--"status"
"Bibliographic" <|--"date"
"TextElement" <|--"date"
"Bibliographic" <|--"copyright"
"TextElement" <|--"copyright"
"Decorative" <|--"decoration"
"Element" <|--"decoration"
"Decorative" <|--"header"
"Element" <|--"header"
"Decorative" <|--"footer"
"Element" <|--"footer"
"Structural" <|--"section"
"Element" <|--"section"
"Structural" <|--"topic"
"Element" <|--"topic"
"Structural" <|--"sidebar"
"Element" <|--"sidebar"
"Structural" <|--"transition"
"Element" <|--"transition"
"General" <|--"paragraph"
"TextElement" <|--"paragraph"
"General" <|--"compound"
"Element" <|--"compound"
"General" <|--"container"
"Element" <|--"container"
"Sequential" <|--"bullet_list"
"Element" <|--"bullet_list"
"Sequential" <|--"enumerated_list"
"Element" <|--"enumerated_list"
"Part" <|--"list_item"
"Element" <|--"list_item"
"Sequential" <|--"definition_list"
"Element" <|--"definition_list"
"Part" <|--"definition_list_item"
"Element" <|--"definition_list_item"
"Part" <|--"term"
"TextElement" <|--"term"
"Part" <|--"classifier"
"TextElement" <|--"classifier"
"Part" <|--"definition"
"Element" <|--"definition"
"Sequential" <|--"field_list"
"Element" <|--"field_list"
"Part" <|--"field"
"Element" <|--"field"
"Part" <|--"field_name"
"TextElement" <|--"field_name"
"Part" <|--"field_body"
"Element" <|--"field_body"
"Part" <|--"option"
"Element" <|--"option"
"Part" <|--"option_argument"
"TextElement" <|--"option_argument"
"Part" <|--"option_group"
"Element" <|--"option_group"
"Sequential" <|--"option_list"
"Element" <|--"option_list"
"Part" <|--"option_list_item"
"Element" <|--"option_list_item"
"Part" <|--"option_string"
"TextElement" <|--"option_string"
"Part" <|--"description"
"Element" <|--"description"
"General" <|--"literal_block"
"FixedTextElement" <|--"literal_block"
"General" <|--"doctest_block"
"FixedTextElement" <|--"doctest_block"
"General" <|--"math_block"
"FixedTextElement" <|--"math_block"
"General" <|--"line_block"
"Element" <|--"line_block"
"Part" <|--"line"
"TextElement" <|--"line"
"General" <|--"block_quote"
"Element" <|--"block_quote"
"Part" <|--"attribution"
"TextElement" <|--"attribution"
"Admonition" <|--"attention"
"Element" <|--"attention"
"Admonition" <|--"caution"
"Element" <|--"caution"
"Admonition" <|--"danger"
"Element" <|--"danger"
"Admonition" <|--"error"
"Element" <|--"error"
"Admonition" <|--"important"
"Element" <|--"important"
"Admonition" <|--"note"
"Element" <|--"note"
"Admonition" <|--"tip"
"Element" <|--"tip"
"Admonition" <|--"hint"
"Element" <|--"hint"
"Admonition" <|--"warning"
"Element" <|--"warning"
"Admonition" <|--"admonition"
"Element" <|--"admonition"
"Special" <|--"comment"
"Invisible" <|--"comment"
"FixedTextElement" <|--"comment"
"Special" <|--"substitution_definition"
"Invisible" <|--"substitution_definition"
"TextElement" <|--"substitution_definition"
"Special" <|--"target"
"Invisible" <|--"target"
"Inline" <|--"target"
"TextElement" <|--"target"
"Targetable" <|--"target"
"General" <|--"footnote"
"BackLinkable" <|--"footnote"
"Element" <|--"footnote"
"Labeled" <|--"footnote"
"Targetable" <|--"footnote"
"General" <|--"citation"
"BackLinkable" <|--"citation"
"Element" <|--"citation"
"Labeled" <|--"citation"
"Targetable" <|--"citation"
"Part" <|--"label"
"TextElement" <|--"label"
"General" <|--"figure"
"Element" <|--"figure"
"Part" <|--"caption"
"TextElement" <|--"caption"
"Part" <|--"legend"
"Element" <|--"legend"
"General" <|--"table"
"Element" <|--"table"
"Part" <|--"tgroup"
"Element" <|--"tgroup"
"Part" <|--"colspec"
"Element" <|--"colspec"
"Part" <|--"thead"
"Element" <|--"thead"
"Part" <|--"tbody"
"Element" <|--"tbody"
"Part" <|--"row"
"Element" <|--"row"
"Part" <|--"entry"
"Element" <|--"entry"
"Special" <|--"system_message"
"BackLinkable" <|--"system_message"
"PreBibliographic" <|--"system_message"
"Element" <|--"system_message"
"Special" <|--"pending"
"Invisible" <|--"pending"
"Element" <|--"pending"
"Special" <|--"raw"
"Inline" <|--"raw"
"PreBibliographic" <|--"raw"
"FixedTextElement" <|--"raw"
"Inline" <|--"emphasis"
"TextElement" <|--"emphasis"
"Inline" <|--"strong"
"TextElement" <|--"strong"
"Inline" <|--"literal"
"TextElement" <|--"literal"
"General" <|--"reference"
"Inline" <|--"reference"
"Referential" <|--"reference"
"TextElement" <|--"reference"
"Inline" <|--"footnote_reference"
"Referential" <|--"footnote_reference"
"TextElement" <|--"footnote_reference"
"Inline" <|--"citation_reference"
"Referential" <|--"citation_reference"
"TextElement" <|--"citation_reference"
"Inline" <|--"substitution_reference"
"TextElement" <|--"substitution_reference"
"Inline" <|--"title_reference"
"TextElement" <|--"title_reference"
"Inline" <|--"abbreviation"
"TextElement" <|--"abbreviation"
"Inline" <|--"acronym"
"TextElement" <|--"acronym"
"Inline" <|--"superscript"
"TextElement" <|--"superscript"
"Inline" <|--"subscript"
"TextElement" <|--"subscript"
"Inline" <|--"math"
"TextElement" <|--"math"
"General" <|--"image"
"Inline" <|--"image"
"Element" <|--"image"
"Inline" <|--"inline"
"TextElement" <|--"inline"
"Inline" <|--"problematic"
"TextElement" <|--"problematic"
"Inline" <|--"generated"
"TextElement" <|--"generated"
"NodeVisitor" <|--"SparseNodeVisitor"
"NodeVisitor" <|--"GenericNodeVisitor"
"GenericNodeVisitor" <|--"TreeCopyVisitor"
"Exception" <|--"TreePruningException"
"TreePruningException" <|--"SkipChildren"
"TreePruningException" <|--"SkipSiblings"
"TreePruningException" <|--"SkipNode"
"TreePruningException" <|--"SkipDeparture"
"TreePruningException" <|--"NodeFound"
"TreePruningException" <|--"StopTraversal"