Misc/doxy2swig.py

   1 #!/usr/bin/env python
   2 # Copyright (C) 2006-2013  CEA/DEN, EDF R&D
   3 #
   4 # This library is free software; you can redistribute it and/or
   5 # modify it under the terms of the GNU Lesser General Public
   6 # License as published by the Free Software Foundation; either
   7 # version 2.1 of the License.
   8 #
   9 # This library is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 # Lesser General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Lesser General Public
  15 # License along with this library; if not, write to the Free Software
  16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  17 #
  18 # See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  19 #
  20
  21 """Doxygen XML to SWIG docstring converter.
  22
  23 Usage:
  24
  25   doxy2swig.py [options] input.xml output.i
  26
  27 Converts Doxygen generated XML files into a file containing docstrings
  28 that can be used by SWIG-1.3.x.  Note that you need to get SWIG
  29 version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
  30 the resulting output.
  31
  32 input.xml is your doxygen generated XML file and output.i is where the
  33 output will be written (the file will be clobbered).
  34
  35 """
  36 ######################################################################
  37 #
  38 # This code is implemented using Mark Pilgrim's code as a guideline:
  39 #   http://www.faqs.org/docs/diveintopython/kgp_divein.html
  40 #
  41 # Author: Prabhu Ramachandran
  42 # License: BSD style
  43 #
  44 # Thanks:
  45 #   Johan Hake:  the include_function_definition feature
  46 #   Bill Spotz:  bug reports and testing.
  47 #
  48 ######################################################################
  49
  50 from xml.dom import minidom
  51 import re
  52 import textwrap
  53 import sys
  54 import types
  55 import os.path
  56 import optparse
  57
  58
  59 def my_open_read(source):
  60     if hasattr(source, "read"):
  61         return source
  62     else:
  63         return open(source)
  64
  65 def my_open_write(dest):
  66     if hasattr(dest, "write"):
  67         return dest
  68     else:
  69         return open(dest, 'w')
  70
  71
  72 class Doxy2SWIG:
  73     """Converts Doxygen generated XML files into a file containing
  74     docstrings that can be used by SWIG-1.3.x that have support for
  75     feature("docstring").  Once the data is parsed it is stored in
  76     self.pieces.
  77
  78     """
  79
  80     def __init__(self, src, include_function_definition=True, quiet=False):
  81         """Initialize the instance given a source object.  `src` can
  82         be a file or filename.  If you do not want to include function
  83         definitions from doxygen then set
  84         `include_function_definition` to `False`.  This is handy since
  85         this allows you to use the swig generated function definition
  86         using %feature("autodoc", [0,1]).
  87
  88         """
  89         f = my_open_read(src)
  90         self.my_dir = os.path.dirname(f.name)
  91         self.xmldoc = minidom.parse(f).documentElement
  92         f.close()
  93
  94         self.pieces = []
  95         self.pieces.append('\n// File: %s\n'%\
  96                            os.path.basename(f.name))
  97
  98         self.space_re = re.compile(r'\s+')
  99         self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
 100         self.multi = 0
 101         self.ignores = ['inheritancegraph', 'param', 'listofallmembers',
 102                         'innerclass', 'name', 'declname', 'incdepgraph',
 103                         'invincdepgraph', 'programlisting', 'type',
 104                         'references', 'referencedby', 'location',
 105                         'collaborationgraph', 'reimplements',
 106                         'reimplementedby', 'derivedcompoundref',
 107                         'basecompoundref']
 108         #self.generics = []
 109         self.include_function_definition = include_function_definition
 110         if not include_function_definition:
 111             self.ignores.append('argsstring')
 112
 113         self.quiet = quiet
 114
 115
 116     def generate(self):
 117         """Parses the file set in the initialization.  The resulting
 118         data is stored in `self.pieces`.
 119
 120         """
 121         self.parse(self.xmldoc)
 122
 123     def parse(self, node):
 124         """Parse a given node.  This function in turn calls the
 125         `parse_<nodeType>` functions which handle the respective
 126         nodes.
 127
 128         """
 129         pm = getattr(self, "parse_%s"%node.__class__.__name__)
 130         pm(node)
 131
 132     def parse_Document(self, node):
 133         self.parse(node.documentElement)
 134
 135     def parse_Text(self, node):
 136         txt = node.data
 137         txt = txt.replace('\\', r'\\\\')
 138         txt = txt.replace('"', r'\"')
 139         # ignore pure whitespace
 140         m = self.space_re.match(txt)
 141         if m and len(m.group()) == len(txt):
 142             pass
 143         else:
 144             self.add_text(textwrap.fill(txt, break_long_words=False))
 145
 146     def parse_Element(self, node):
 147         """Parse an `ELEMENT_NODE`.  This calls specific
 148         `do_<tagName>` handers for different elements.  If no handler
 149         is available the `generic_parse` method is called.  All
 150         tagNames specified in `self.ignores` are simply ignored.
 151
 152         """
 153         name = node.tagName
 154         ignores = self.ignores
 155         if name in ignores:
 156             return
 157         attr = "do_%s" % name
 158         if hasattr(self, attr):
 159             handlerMethod = getattr(self, attr)
 160             handlerMethod(node)
 161         else:
 162             self.generic_parse(node)
 163             #if name not in self.generics: self.generics.append(name)
 164
 165     def parse_Comment(self, node):
 166         """Parse a `COMMENT_NODE`.  This does nothing for now."""
 167         return
 168
 169     def add_text(self, value):
 170         """Adds text corresponding to `value` into `self.pieces`."""
 171         if type(value) in (types.ListType, types.TupleType):
 172             self.pieces.extend(value)
 173         else:
 174             self.pieces.append(value)
 175
 176     def get_specific_nodes(self, node, names):
 177         """Given a node and a sequence of strings in `names`, return a
 178         dictionary containing the names as keys and child
 179         `ELEMENT_NODEs`, that have a `tagName` equal to the name.
 180
 181         """
 182         nodes = [(x.tagName, x) for x in node.childNodes \
 183                  if x.nodeType == x.ELEMENT_NODE and \
 184                  x.tagName in names]
 185         return dict(nodes)
 186
 187     def generic_parse(self, node, pad=0):
 188         """A Generic parser for arbitrary tags in a node.
 189
 190         Parameters:
 191
 192          - node:  A node in the DOM.
 193          - pad: `int` (default: 0)
 194
 195            If 0 the node data is not padded with newlines.  If 1 it
 196            appends a newline after parsing the childNodes.  If 2 it
 197            pads before and after the nodes are processed.  Defaults to
 198            0.
 199
 200         """
 201         npiece = 0
 202         if pad:
 203             npiece = len(self.pieces)
 204             if pad == 2:
 205                 self.add_text('\n')
 206         for n in node.childNodes:
 207             self.parse(n)
 208         if pad:
 209             if len(self.pieces) > npiece:
 210                 self.add_text('\n')
 211
 212     def space_parse(self, node):
 213         self.add_text(' ')
 214         self.generic_parse(node)
 215
 216     do_ref = space_parse
 217     do_emphasis = space_parse
 218     do_bold = space_parse
 219     do_computeroutput = space_parse
 220     do_formula = space_parse
 221
 222     def do_compoundname(self, node):
 223         self.add_text('\n\n')
 224         data = node.firstChild.data
 225         self.add_text('%%feature("docstring") %s "\n'%data)
 226
 227     def do_compounddef(self, node):
 228         kind = node.attributes['kind'].value
 229         if kind in ('class', 'struct'):
 230             prot = node.attributes['prot'].value
 231             if prot <> 'public':
 232                 return
 233             names = ('compoundname', 'briefdescription',
 234                      'detaileddescription', 'includes')
 235             first = self.get_specific_nodes(node, names)
 236             for n in names:
 237                 if first.has_key(n):
 238                     self.parse(first[n])
 239             self.add_text(['";','\n'])
 240             for n in node.childNodes:
 241                 if n not in first.values():
 242                     self.parse(n)
 243         elif kind in ('file', 'namespace'):
 244             nodes = node.getElementsByTagName('sectiondef')
 245             for n in nodes:
 246                 self.parse(n)
 247
 248     def do_includes(self, node):
 249         self.add_text('C++ includes: ')
 250         self.generic_parse(node, pad=1)
 251
 252     def do_parameterlist(self, node):
 253         text='unknown'
 254         for key, val in node.attributes.items():
 255             if key == 'kind':
 256                 if val == 'param': text = 'Parameters'
 257                 elif val == 'exception': text = 'Exceptions'
 258                 else: text = val
 259                 break
 260         self.add_text(['\n', '\n', text, ':', '\n'])
 261         self.generic_parse(node, pad=1)
 262
 263     def do_para(self, node):
 264         self.add_text('\n')
 265         self.generic_parse(node, pad=1)
 266
 267     def do_parametername(self, node):
 268         self.add_text('\n')
 269         try:
 270             data=node.firstChild.data
 271         except AttributeError: # perhaps a <ref> tag in it
 272             data=node.firstChild.firstChild.data
 273         if data.find('Exception') != -1:
 274             self.add_text(data)
 275         else:
 276             self.add_text("%s: "%data)
 277
 278     def do_parameterdefinition(self, node):
 279         self.generic_parse(node, pad=1)
 280
 281     def do_detaileddescription(self, node):
 282         self.generic_parse(node, pad=1)
 283
 284     def do_briefdescription(self, node):
 285         self.generic_parse(node, pad=1)
 286
 287     def do_memberdef(self, node):
 288         prot = node.attributes['prot'].value
 289         id = node.attributes['id'].value
 290         kind = node.attributes['kind'].value
 291         tmp = node.parentNode.parentNode.parentNode
 292         compdef = tmp.getElementsByTagName('compounddef')[0]
 293         cdef_kind = compdef.attributes['kind'].value
 294
 295         if prot == 'public':
 296             first = self.get_specific_nodes(node, ('definition', 'name'))
 297             name = first['name'].firstChild.data
 298             if name[:8] == 'operator': # Don't handle operators yet.
 299                 return
 300
 301             if not first.has_key('definition') or \
 302                    kind in ['variable', 'typedef']:
 303                 return
 304
 305             if self.include_function_definition:
 306                 defn = first['definition'].firstChild.data
 307             else:
 308                 defn = ""
 309             self.add_text('\n')
 310             self.add_text('%feature("docstring") ')
 311
 312             anc = node.parentNode.parentNode
 313             if cdef_kind in ('file', 'namespace'):
 314                 ns_node = anc.getElementsByTagName('innernamespace')
 315                 if not ns_node and cdef_kind == 'namespace':
 316                     ns_node = anc.getElementsByTagName('compoundname')
 317                 if ns_node:
 318                     ns = ns_node[0].firstChild.data
 319                     self.add_text(' %s::%s "\n%s'%(ns, name, defn))
 320                 else:
 321                     self.add_text(' %s "\n%s'%(name, defn))
 322             elif cdef_kind in ('class', 'struct'):
 323                 # Get the full function name.
 324                 anc_node = anc.getElementsByTagName('compoundname')
 325                 cname = anc_node[0].firstChild.data
 326                 self.add_text(' %s::%s "\n%s'%(cname, name, defn))
 327
 328             for n in node.childNodes:
 329                 if n not in first.values():
 330                     self.parse(n)
 331             self.add_text(['";', '\n'])
 332
 333     def do_definition(self, node):
 334         data = node.firstChild.data
 335         self.add_text('%s "\n%s'%(data, data))
 336
 337     def do_sectiondef(self, node):
 338         kind = node.attributes['kind'].value
 339         if kind in ('public-func', 'func', 'user-defined', ''):
 340             self.generic_parse(node)
 341
 342     def do_header(self, node):
 343         """For a user defined section def a header field is present
 344         which should not be printed as such, so we comment it in the
 345         output."""
 346         data = node.firstChild.data
 347         self.add_text('\n/*\n %s \n*/\n'%data)
 348         # If our immediate sibling is a 'description' node then we
 349         # should comment that out also and remove it from the parent
 350         # node's children.
 351         parent = node.parentNode
 352         idx = parent.childNodes.index(node)
 353         if len(parent.childNodes) >= idx + 2:
 354             nd = parent.childNodes[idx+2]
 355             if nd.nodeName == 'description':
 356                 nd = parent.removeChild(nd)
 357                 self.add_text('\n/*')
 358                 self.generic_parse(nd)
 359                 self.add_text('\n*/\n')
 360
 361     def do_simplesect(self, node):
 362         kind = node.attributes['kind'].value
 363         if kind in ('date', 'rcs', 'version'):
 364             pass
 365         elif kind == 'warning':
 366             self.add_text(['\n', 'WARNING: '])
 367             self.generic_parse(node)
 368         elif kind == 'see':
 369             self.add_text('\n')
 370             self.add_text('See: ')
 371             self.generic_parse(node)
 372         else:
 373             self.generic_parse(node)
 374
 375     def do_argsstring(self, node):
 376         self.generic_parse(node, pad=1)
 377
 378     def do_member(self, node):
 379         kind = node.attributes['kind'].value
 380         refid = node.attributes['refid'].value
 381         if kind == 'function' and refid[:9] == 'namespace':
 382             self.generic_parse(node)
 383
 384     def do_doxygenindex(self, node):
 385         self.multi = 1
 386         comps = node.getElementsByTagName('compound')
 387         for c in comps:
 388             refid = c.attributes['refid'].value
 389             fname = refid + '.xml'
 390             if not os.path.exists(fname):
 391                 fname = os.path.join(self.my_dir,  fname)
 392             if not self.quiet:
 393                 print "parsing file: %s"%fname
 394             p = Doxy2SWIG(fname, self.include_function_definition, self.quiet)
 395             p.generate()
 396             self.pieces.extend(self.clean_pieces(p.pieces))
 397
 398     def write(self, fname):
 399         o = my_open_write(fname)
 400         if self.multi:
 401             o.write("".join(self.pieces))
 402         else:
 403             o.write("".join(self.clean_pieces(self.pieces)))
 404         o.close()
 405
 406     def clean_pieces(self, pieces):
 407         """Cleans the list of strings given as `pieces`.  It replaces
 408         multiple newlines by a maximum of 2 and returns a new list.
 409         It also wraps the paragraphs nicely.
 410
 411         """
 412         ret = []
 413         count = 0
 414         for i in pieces:
 415             if i == '\n':
 416                 count = count + 1
 417             else:
 418                 if i == '";':
 419                     if count:
 420                         ret.append('\n')
 421                 elif count > 2:
 422                     ret.append('\n\n')
 423                 elif count:
 424                     ret.append('\n'*count)
 425                 count = 0
 426                 ret.append(i)
 427
 428         _data = "".join(ret)
 429         ret = []
 430         for i in _data.split('\n\n'):
 431             if i == 'Parameters:' or i == 'Exceptions:':
 432                 ret.extend([i, '\n-----------', '\n\n'])
 433             elif i.find('// File:') > -1: # leave comments alone.
 434                 ret.extend([i, '\n'])
 435             else:
 436                 _tmp = textwrap.fill(i.strip(), break_long_words=False)
 437                 _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
 438                 ret.extend([_tmp, '\n\n'])
 439         return ret
 440
 441
 442 def convert(input, output, include_function_definition=True, quiet=False):
 443     p = Doxy2SWIG(input, include_function_definition, quiet)
 444     p.generate()
 445     p.write(output)
 446
 447 def main():
 448     usage = __doc__
 449     parser = optparse.OptionParser(usage)
 450     parser.add_option("-n", '--no-function-definition',
 451                       action='store_true',
 452                       default=False,
 453                       dest='func_def',
 454                       help='do not include doxygen function definitions')
 455     parser.add_option("-q", '--quiet',
 456                       action='store_true',
 457                       default=False,
 458                       dest='quiet',
 459                       help='be quiet and minimise output')
 460
 461     options, args = parser.parse_args()
 462     if len(args) != 2:
 463         parser.error("error: no input and output specified")
 464
 465     convert(args[0], args[1], not options.func_def, options.quiet)
 466
 467
 468 if __name__ == '__main__':
 469     main()