doc/doxygen/doxy2swig/doxy2swig.py

   1 #!/usr/bin/env python
   2 """Doxygen XML to SWIG docstring converter.
   3
   4 Usage:
   5
   6   doxy2swig.py [options] input.xml output.i
   7
   8 Converts Doxygen generated XML files into a file containing docstrings
   9 that can be used by SWIG-1.3.x.  Note that you need to get SWIG
  10 version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
  11 the resulting output.
  12
  13 input.xml is your doxygen generated XML file and output.i is where the
  14 output will be written (the file will be clobbered).
  15
  16 """
  17 ######################################################################
  18 #
  19 # This code is implemented using Mark Pilgrim's code as a guideline:
  20 #   http://www.faqs.org/docs/diveintopython/kgp_divein.html
  21 #
  22 # Author: Prabhu Ramachandran
  23 # License: BSD style
  24 #
  25 # Thanks:
  26 #   Johan Hake:  the include_function_definition feature
  27 #   Bill Spotz:  bug reports and testing.
  28 #   Sebastian Henschel:   Misc. enhancements.
  29 #
  30 ######################################################################
  31
  32 from xml.dom import minidom
  33 import re
  34 import textwrap
  35 import sys
  36 import types
  37 import os.path
  38 import optparse
  39
  40
  41 def my_open_read(source):
  42     if hasattr(source, "read"):
  43         return source
  44     else:
  45         return open(source)
  46
  47 def my_open_write(dest):
  48     if hasattr(dest, "write"):
  49         return dest
  50     else:
  51         return open(dest, 'w')
  52
  53
  54 class Doxy2SWIG:
  55     """Converts Doxygen generated XML files into a file containing
  56     docstrings that can be used by SWIG-1.3.x that have support for
  57     feature("docstring").  Once the data is parsed it is stored in
  58     self.pieces.
  59
  60     """
  61
  62     def __init__(self, src, include_function_definition=True, quiet=False):
  63         """Initialize the instance given a source object.  `src` can
  64         be a file or filename.  If you do not want to include function
  65         definitions from doxygen then set
  66         `include_function_definition` to `False`.  This is handy since
  67         this allows you to use the swig generated function definition
  68         using %feature("autodoc", [0,1]).
  69
  70         """
  71         f = my_open_read(src)
  72         self.my_dir = os.path.dirname(f.name)
  73         self.xmldoc = minidom.parse(f).documentElement
  74         f.close()
  75
  76         self.pieces = []
  77         self.pieces.append('\n// File: %s\n'%\
  78                            os.path.basename(f.name))
  79
  80         self.space_re = re.compile(r'\s+')
  81         self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
  82         self.multi = 0
  83         self.ignores = ['inheritancegraph', 'param', 'listofallmembers',
  84                         'innerclass', 'name', 'declname', 'incdepgraph',
  85                         'invincdepgraph', 'programlisting', 'type',
  86                         'references', 'referencedby', 'location',
  87                         'collaborationgraph', 'reimplements',
  88                         'reimplementedby', 'derivedcompoundref',
  89                         'basecompoundref']
  90         #self.generics = []
  91         self.include_function_definition = include_function_definition
  92         if not include_function_definition:
  93             self.ignores.append('argsstring')
  94
  95         self.quiet = quiet
  96
  97
  98     def generate(self):
  99         """Parses the file set in the initialization.  The resulting
 100         data is stored in `self.pieces`.
 101
 102         """
 103         self.parse(self.xmldoc)
 104
 105     def parse(self, node):
 106         """Parse a given node.  This function in turn calls the
 107         `parse_<nodeType>` functions which handle the respective
 108         nodes.
 109
 110         """
 111         pm = getattr(self, "parse_%s"%node.__class__.__name__)
 112         pm(node)
 113
 114     def parse_Document(self, node):
 115         self.parse(node.documentElement)
 116
 117     def parse_Text(self, node):
 118         txt = node.data
 119         txt = txt.replace('\\', r'\\\\')
 120         txt = txt.replace('"', r'\"')
 121         # ignore pure whitespace
 122         m = self.space_re.match(txt)
 123         if m and len(m.group()) == len(txt):
 124             pass
 125         else:
 126             self.add_text(textwrap.fill(txt, break_long_words=False))
 127
 128     def parse_Element(self, node):
 129         """Parse an `ELEMENT_NODE`.  This calls specific
 130         `do_<tagName>` handers for different elements.  If no handler
 131         is available the `generic_parse` method is called.  All
 132         tagNames specified in `self.ignores` are simply ignored.
 133
 134         """
 135         name = node.tagName
 136         ignores = self.ignores
 137         if name in ignores:
 138             return
 139         attr = "do_%s" % name
 140         if hasattr(self, attr):
 141             handlerMethod = getattr(self, attr)
 142             handlerMethod(node)
 143         else:
 144             self.generic_parse(node)
 145             #if name not in self.generics: self.generics.append(name)
 146
 147     def parse_Comment(self, node):
 148         """Parse a `COMMENT_NODE`.  This does nothing for now."""
 149         return
 150
 151     def add_text(self, value):
 152         """Adds text corresponding to `value` into `self.pieces`."""
 153         if type(value) in (types.ListType, types.TupleType):
 154             self.pieces.extend(value)
 155         else:
 156             self.pieces.append(value)
 157
 158     def get_specific_nodes(self, node, names):
 159         """Given a node and a sequence of strings in `names`, return a
 160         dictionary containing the names as keys and child
 161         `ELEMENT_NODEs`, that have a `tagName` equal to the name.
 162
 163         """
 164         nodes = [(x.tagName, x) for x in node.childNodes \
 165                  if x.nodeType == x.ELEMENT_NODE and \
 166                  x.tagName in names]
 167         return dict(nodes)
 168
 169     def generic_parse(self, node, pad=0):
 170         """A Generic parser for arbitrary tags in a node.
 171
 172         Parameters:
 173
 174          - node:  A node in the DOM.
 175          - pad: `int` (default: 0)
 176
 177            If 0 the node data is not padded with newlines.  If 1 it
 178            appends a newline after parsing the childNodes.  If 2 it
 179            pads before and after the nodes are processed.  Defaults to
 180            0.
 181
 182         """
 183         npiece = 0
 184         if pad:
 185             npiece = len(self.pieces)
 186             if pad == 2:
 187                 self.add_text('\n')
 188         for n in node.childNodes:
 189             self.parse(n)
 190         if pad:
 191             if len(self.pieces) > npiece:
 192                 self.add_text('\n')
 193
 194     def space_parse(self, node):
 195         self.add_text(' ')
 196         self.generic_parse(node)
 197
 198     do_ref = space_parse
 199     do_emphasis = space_parse
 200     do_bold = space_parse
 201     do_computeroutput = space_parse
 202     do_formula = space_parse
 203
 204     def do_compoundname(self, node):
 205         self.add_text('\n\n')
 206         data = node.firstChild.data
 207         self.add_text('%%feature("docstring") %s "\n'%data)
 208
 209     def do_compounddef(self, node):
 210         kind = node.attributes['kind'].value
 211         if kind in ('class', 'struct'):
 212             prot = node.attributes['prot'].value
 213             if prot <> 'public':
 214                 return
 215             names = ('compoundname', 'briefdescription',
 216                      'detaileddescription', 'includes')
 217             first = self.get_specific_nodes(node, names)
 218             for n in names:
 219                 if first.has_key(n):
 220                     self.parse(first[n])
 221             self.add_text(['";','\n'])
 222             for n in node.childNodes:
 223                 if n not in first.values():
 224                     self.parse(n)
 225         elif kind in ('file', 'namespace'):
 226             nodes = node.getElementsByTagName('sectiondef')
 227             for n in nodes:
 228                 self.parse(n)
 229
 230     def do_includes(self, node):
 231         self.add_text('C++ includes: ')
 232         self.generic_parse(node, pad=1)
 233
 234     def do_parameterlist(self, node):
 235         text='unknown'
 236         for key, val in node.attributes.items():
 237             if key == 'kind':
 238                 if val == 'param': text = 'Parameters'
 239                 elif val == 'exception': text = 'Exceptions'
 240                 else: text = val
 241                 break
 242         self.add_text(['\n', '\n', text, ':', '\n'])
 243         self.generic_parse(node, pad=1)
 244
 245     def do_para(self, node):
 246         self.add_text('\n')
 247         self.generic_parse(node, pad=1)
 248
 249     def do_parametername(self, node):
 250         self.add_text('\n')
 251         try:
 252             data=node.firstChild.data
 253         except AttributeError: # perhaps a <ref> tag in it
 254             data=node.firstChild.firstChild.data
 255         if data.find('Exception') != -1:
 256             self.add_text(data)
 257         else:
 258             self.add_text("%s: "%data)
 259
 260     def do_parameterdefinition(self, node):
 261         self.generic_parse(node, pad=1)
 262
 263     def do_detaileddescription(self, node):
 264         self.generic_parse(node, pad=1)
 265
 266     def do_briefdescription(self, node):
 267         self.generic_parse(node, pad=1)
 268
 269     def do_memberdef(self, node):
 270         prot = node.attributes['prot'].value
 271         id = node.attributes['id'].value
 272         kind = node.attributes['kind'].value
 273         tmp = node.parentNode.parentNode.parentNode
 274         compdef = tmp.getElementsByTagName('compounddef')[0]
 275         cdef_kind = compdef.attributes['kind'].value
 276
 277         if prot == 'public':
 278             first = self.get_specific_nodes(node, ('definition', 'name'))
 279             name = first['name'].firstChild.data
 280             if name[:8] == 'operator': # Don't handle operators yet.
 281                 return
 282
 283             if not first.has_key('definition') or \
 284                    kind in ['variable', 'typedef']:
 285                 return
 286
 287             if self.include_function_definition:
 288                 defn = first['definition'].firstChild.data
 289             else:
 290                 defn = ""
 291             self.add_text('\n')
 292             self.add_text('%feature("docstring") ')
 293
 294             anc = node.parentNode.parentNode
 295             if cdef_kind in ('file', 'namespace'):
 296                 ns_node = anc.getElementsByTagName('innernamespace')
 297                 if not ns_node and cdef_kind == 'namespace':
 298                     ns_node = anc.getElementsByTagName('compoundname')
 299                 if ns_node:
 300                     ns = ns_node[0].firstChild.data
 301                     self.add_text(' %s::%s "\n%s'%(ns, name, defn))
 302                 else:
 303                     self.add_text(' %s "\n%s'%(name, defn))
 304             elif cdef_kind in ('class', 'struct'):
 305                 # Get the full function name.
 306                 anc_node = anc.getElementsByTagName('compoundname')
 307                 cname = anc_node[0].firstChild.data
 308                 self.add_text(' %s::%s "\n%s'%(cname, name, defn))
 309
 310             for n in node.childNodes:
 311                 if n not in first.values():
 312                     self.parse(n)
 313             self.add_text(['";', '\n'])
 314
 315     def do_definition(self, node):
 316         data = node.firstChild.data
 317         self.add_text('%s "\n%s'%(data, data))
 318
 319     def do_sectiondef(self, node):
 320         kind = node.attributes['kind'].value
 321         if kind in ('public-func', 'func', 'user-defined', ''):
 322             self.generic_parse(node)
 323
 324     def do_header(self, node):
 325         """For a user defined section def a header field is present
 326         which should not be printed as such, so we comment it in the
 327         output."""
 328         data = node.firstChild.data
 329         self.add_text('\n/*\n %s \n*/\n'%data)
 330         # If our immediate sibling is a 'description' node then we
 331         # should comment that out also and remove it from the parent
 332         # node's children.
 333         parent = node.parentNode
 334         idx = parent.childNodes.index(node)
 335         if len(parent.childNodes) >= idx + 2:
 336             nd = parent.childNodes[idx+2]
 337             if nd.nodeName == 'description':
 338                 nd = parent.removeChild(nd)
 339                 self.add_text('\n/*')
 340                 self.generic_parse(nd)
 341                 self.add_text('\n*/\n')
 342
 343     def do_simplesect(self, node):
 344         kind = node.attributes['kind'].value
 345         if kind in ('date', 'rcs', 'version'):
 346             pass
 347         elif kind == 'warning':
 348             self.add_text(['\n', 'WARNING: '])
 349             self.generic_parse(node)
 350         elif kind == 'see':
 351             self.add_text('\n')
 352             self.add_text('See: ')
 353             self.generic_parse(node)
 354         elif kind == 'return':
 355             self.add_text(['\n', '===> Returns: '])
 356             self.generic_parse(node)
 357         else:
 358             self.generic_parse(node)
 359
 360     def do_argsstring(self, node):
 361         self.generic_parse(node, pad=1)
 362
 363     def do_member(self, node):
 364         kind = node.attributes['kind'].value
 365         refid = node.attributes['refid'].value
 366         if kind == 'function' and refid[:9] == 'namespace':
 367             self.generic_parse(node)
 368
 369     def do_doxygenindex(self, node):
 370         self.multi = 1
 371         comps = node.getElementsByTagName('compound')
 372         for c in comps:
 373             refid = c.attributes['refid'].value
 374             fname = refid + '.xml'
 375             if not os.path.exists(fname):
 376                 fname = os.path.join(self.my_dir,  fname)
 377             if not self.quiet:
 378                 print "parsing file: %s"%fname
 379             p = Doxy2SWIG(fname, self.include_function_definition, self.quiet)
 380             p.generate()
 381             self.pieces.extend(self.clean_pieces(p.pieces))
 382
 383     def write(self, fname):
 384         o = my_open_write(fname)
 385         if self.multi:
 386             o.write("".join(self.pieces))
 387         else:
 388             o.write("".join(self.clean_pieces(self.pieces)))
 389         o.close()
 390
 391     def clean_pieces(self, pieces):
 392         """Cleans the list of strings given as `pieces`.  It replaces
 393         multiple newlines by a maximum of 2 and returns a new list.
 394         It also wraps the paragraphs nicely.
 395
 396         """
 397         ret = []
 398         count = 0
 399         for i in pieces:
 400             if i == '\n':
 401                 count = count + 1
 402             else:
 403                 if i == '";':
 404                     if count:
 405                         ret.append('\n')
 406                 elif count > 2:
 407                     ret.append('\n\n')
 408                 elif count:
 409                     ret.append('\n'*count)
 410                 count = 0
 411                 ret.append(i)
 412
 413         _data = "".join(ret)
 414         ret = []
 415         for i in _data.split('\n\n'):
 416             if i == 'Parameters:' or i == 'Exceptions:':
 417                 ret.extend([i, '\n-----------', '\n\n'])
 418             elif i.find('// File:') > -1: # leave comments alone.
 419                 ret.extend([i, '\n'])
 420             else:
 421                 _tmp = textwrap.fill(i.strip(), break_long_words=False)
 422                 _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
 423                 ret.extend([_tmp, '\n\n'])
 424         return ret
 425
 426
 427 def convert(input, output, include_function_definition=True, quiet=False):
 428     p = Doxy2SWIG(input, include_function_definition, quiet)
 429     p.generate()
 430     p.write(output)
 431
 432 def main():
 433     usage = __doc__
 434     parser = optparse.OptionParser(usage)
 435     parser.add_option("-n", '--no-function-definition',
 436                       action='store_true',
 437                       default=False,
 438                       dest='func_def',
 439                       help='do not include doxygen function definitions')
 440     parser.add_option("-q", '--quiet',
 441                       action='store_true',
 442                       default=False,
 443                       dest='quiet',
 444                       help='be quiet and minimize output')
 445
 446     options, args = parser.parse_args()
 447     if len(args) != 2:
 448         parser.error("error: no input and output specified")
 449
 450     convert(args[0], args[1], not options.func_def, options.quiet)
 451
 452
 453 if __name__ == '__main__':
 454     main()
 455