Misc/doxy2swig.py

   1 #!/usr/bin/env python3
   2 # Copyright (C) 2006-2023  CEA, EDF
   3 #
   4 # This library is free software; you can redistribute it and/or
   5 # modify it under the terms of the GNU Lesser General Public
   6 # License as published by the Free Software Foundation; either
   7 # version 2.1 of the License, or (at your option) any later version.
   8 #
   9 # This library is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 # Lesser General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Lesser General Public
  15 # License along with this library; if not, write to the Free Software
  16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  17 #
  18 # See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  19 #
  20
  21 """Doxygen XML to SWIG docstring converter.
  22
  23 Converts Doxygen generated XML files into a file containing docstrings
  24 that can be used by SWIG-1.3.x.  Note that you need to get SWIG
  25 version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
  26 the resulting output.
  27
  28 input.xml is your doxygen generated XML file and output.i is where the
  29 output will be written (the file will be clobbered).
  30
  31 """
  32
  33 __usage__ = "doxy2swig.py [options] input.xml output.i"
  34 ######################################################################
  35 #
  36 # This code is implemented using Mark Pilgrim's code as a guideline:
  37 #   http://www.faqs.org/docs/diveintopython/kgp_divein.html
  38 #
  39 # Author: Prabhu Ramachandran
  40 # License: BSD style
  41 #
  42 # Thanks:
  43 #   Johan Hake:  the include_function_definition feature
  44 #   Bill Spotz:  bug reports and testing.
  45 #
  46 ######################################################################
  47
  48 from xml.dom import minidom
  49 import re
  50 import textwrap
  51 import sys
  52 import types
  53 import os.path
  54 import argparse
  55
  56
  57 def my_open_read(source):
  58     if hasattr(source, "read"):
  59         return source
  60     else:
  61         return open(source, encoding='utf8')
  62
  63 def my_open_write(dest):
  64     if hasattr(dest, "write"):
  65         return dest
  66     else:
  67         return open(dest, 'w', encoding='utf8')
  68
  69
  70 class Doxy2SWIG:
  71     """Converts Doxygen generated XML files into a file containing
  72     docstrings that can be used by SWIG-1.3.x that have support for
  73     feature("docstring").  Once the data is parsed it is stored in
  74     self.pieces.
  75
  76     """
  77
  78     def __init__(self, src, include_function_definition=True, quiet=False):
  79         """Initialize the instance given a source object.  `src` can
  80         be a file or filename.  If you do not want to include function
  81         definitions from doxygen then set
  82         `include_function_definition` to `False`.  This is handy since
  83         this allows you to use the swig generated function definition
  84         using %feature("autodoc", [0,1]).
  85
  86         """
  87         f = my_open_read(src)
  88         self.my_dir = os.path.dirname(f.name)
  89         self.xmldoc = minidom.parse(f).documentElement
  90         f.close()
  91
  92         self.pieces = []
  93         self.pieces.append('\n// File: %s\n'%\
  94                            os.path.basename(f.name))
  95
  96         self.space_re = re.compile(r'\s+')
  97         self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
  98         self.multi = 0
  99         self.ignores = ['inheritancegraph', 'param', 'listofallmembers',
 100                         'innerclass', 'name', 'declname', 'incdepgraph',
 101                         'invincdepgraph', 'programlisting', 'type',
 102                         'references', 'referencedby', 'location',
 103                         'collaborationgraph', 'reimplements',
 104                         'reimplementedby', 'derivedcompoundref',
 105                         'basecompoundref']
 106         #self.generics = []
 107         self.include_function_definition = include_function_definition
 108         if not include_function_definition:
 109             self.ignores.append('argsstring')
 110
 111         self.quiet = quiet
 112
 113
 114     def generate(self):
 115         """Parses the file set in the initialization.  The resulting
 116         data is stored in `self.pieces`.
 117
 118         """
 119         self.parse(self.xmldoc)
 120
 121     def parse(self, node):
 122         """Parse a given node.  This function in turn calls the
 123         `parse_<nodeType>` functions which handle the respective
 124         nodes.
 125
 126         """
 127         pm = getattr(self, "parse_%s"%node.__class__.__name__)
 128         pm(node)
 129
 130     def parse_Document(self, node):
 131         self.parse(node.documentElement)
 132
 133     def parse_Text(self, node):
 134         txt = node.data
 135         txt = txt.replace('\\', r'\\\\')
 136         txt = txt.replace('"', r'\"')
 137         # ignore pure whitespace
 138         m = self.space_re.match(txt)
 139         if m and len(m.group()) == len(txt):
 140             pass
 141         else:
 142             self.add_text(textwrap.fill(txt, break_long_words=False))
 143
 144     def parse_Element(self, node):
 145         """Parse an `ELEMENT_NODE`.  This calls specific
 146         `do_<tagName>` handers for different elements.  If no handler
 147         is available the `generic_parse` method is called.  All
 148         tagNames specified in `self.ignores` are simply ignored.
 149
 150         """
 151         name = node.tagName
 152         ignores = self.ignores
 153         if name in ignores:
 154             return
 155         attr = "do_%s" % name
 156         if hasattr(self, attr):
 157             handlerMethod = getattr(self, attr)
 158             handlerMethod(node)
 159         else:
 160             self.generic_parse(node)
 161             #if name not in self.generics: self.generics.append(name)
 162
 163     def parse_Comment(self, node):
 164         """Parse a `COMMENT_NODE`.  This does nothing for now."""
 165         return
 166
 167     def add_text(self, value):
 168         """Adds text corresponding to `value` into `self.pieces`."""
 169         if type(value) in (list, tuple):
 170             self.pieces.extend(value)
 171         else:
 172             self.pieces.append(value)
 173
 174     def get_specific_nodes(self, node, names):
 175         """Given a node and a sequence of strings in `names`, return a
 176         dictionary containing the names as keys and child
 177         `ELEMENT_NODEs`, that have a `tagName` equal to the name.
 178
 179         """
 180         nodes = [(x.tagName, x) for x in node.childNodes \
 181                  if x.nodeType == x.ELEMENT_NODE and \
 182                  x.tagName in names]
 183         return dict(nodes)
 184
 185     def generic_parse(self, node, pad=0):
 186         """A Generic parser for arbitrary tags in a node.
 187
 188         Parameters:
 189
 190          - node:  A node in the DOM.
 191          - pad: `int` (default: 0)
 192
 193            If 0 the node data is not padded with newlines.  If 1 it
 194            appends a newline after parsing the childNodes.  If 2 it
 195            pads before and after the nodes are processed.  Defaults to
 196            0.
 197
 198         """
 199         npiece = 0
 200         if pad:
 201             npiece = len(self.pieces)
 202             if pad == 2:
 203                 self.add_text('\n')
 204         for n in node.childNodes:
 205             self.parse(n)
 206         if pad:
 207             if len(self.pieces) > npiece:
 208                 self.add_text('\n')
 209
 210     def space_parse(self, node):
 211         self.add_text(' ')
 212         self.generic_parse(node)
 213
 214     do_ref = space_parse
 215     do_emphasis = space_parse
 216     do_bold = space_parse
 217     do_computeroutput = space_parse
 218     do_formula = space_parse
 219
 220     def do_compoundname(self, node):
 221         self.add_text('\n\n')
 222         data = node.firstChild.data
 223         self.add_text('%%feature("docstring") %s "\n'%data)
 224
 225     def do_compounddef(self, node):
 226         kind = node.attributes['kind'].value
 227         if kind in ('class', 'struct'):
 228             prot = node.attributes['prot'].value
 229             if prot != 'public':
 230                 return
 231             names = ('compoundname', 'briefdescription',
 232                      'detaileddescription', 'includes')
 233             first = self.get_specific_nodes(node, names)
 234             for n in names:
 235                 if n in first:
 236                     self.parse(first[n])
 237             self.add_text(['";','\n'])
 238             for n in node.childNodes:
 239                 if n not in list(first.values()):
 240                     self.parse(n)
 241         elif kind in ('file', 'namespace'):
 242             nodes = node.getElementsByTagName('sectiondef')
 243             for n in nodes:
 244                 self.parse(n)
 245
 246     def do_includes(self, node):
 247         self.add_text('C++ includes: ')
 248         self.generic_parse(node, pad=1)
 249
 250     def do_parameterlist(self, node):
 251         text='unknown'
 252         for key, val in list(node.attributes.items()):
 253             if key == 'kind':
 254                 if val == 'param': text = 'Parameters'
 255                 elif val == 'exception': text = 'Exceptions'
 256                 else: text = val
 257                 break
 258         self.add_text(['\n', '\n', text, ':', '\n'])
 259         self.generic_parse(node, pad=1)
 260
 261     def do_para(self, node):
 262         self.add_text('\n')
 263         self.generic_parse(node, pad=1)
 264
 265     def do_parametername(self, node):
 266         self.add_text('\n')
 267         try:
 268             data=node.firstChild.data
 269         except AttributeError: # perhaps a <ref> tag in it
 270             data=node.firstChild.firstChild.data
 271         if data.find('Exception') != -1:
 272             self.add_text(data)
 273         else:
 274             self.add_text("%s: "%data)
 275
 276     def do_parameterdefinition(self, node):
 277         self.generic_parse(node, pad=1)
 278
 279     def do_detaileddescription(self, node):
 280         self.generic_parse(node, pad=1)
 281
 282     def do_briefdescription(self, node):
 283         self.generic_parse(node, pad=1)
 284
 285     def do_memberdef(self, node):
 286         prot = node.attributes['prot'].value
 287         id = node.attributes['id'].value
 288         kind = node.attributes['kind'].value
 289         tmp = node.parentNode.parentNode.parentNode
 290         compdef = tmp.getElementsByTagName('compounddef')[0]
 291         cdef_kind = compdef.attributes['kind'].value
 292
 293         if prot == 'public':
 294             first = self.get_specific_nodes(node, ('definition', 'name'))
 295             name = first['name'].firstChild.data
 296             if name[:8] == 'operator': # Don't handle operators yet.
 297                 return
 298
 299             if 'definition' not in first or \
 300                    kind in ['variable', 'typedef']:
 301                 return
 302
 303             if self.include_function_definition:
 304                 defn = first['definition'].firstChild.data
 305             else:
 306                 defn = ""
 307             self.add_text('\n')
 308             self.add_text('%feature("docstring") ')
 309
 310             anc = node.parentNode.parentNode
 311             if cdef_kind in ('file', 'namespace'):
 312                 ns_node = anc.getElementsByTagName('innernamespace')
 313                 if not ns_node and cdef_kind == 'namespace':
 314                     ns_node = anc.getElementsByTagName('compoundname')
 315                 if ns_node:
 316                     ns = ns_node[0].firstChild.data
 317                     self.add_text(' %s::%s "\n%s'%(ns, name, defn))
 318                 else:
 319                     self.add_text(' %s "\n%s'%(name, defn))
 320             elif cdef_kind in ('class', 'struct'):
 321                 # Get the full function name.
 322                 anc_node = anc.getElementsByTagName('compoundname')
 323                 cname = anc_node[0].firstChild.data
 324                 self.add_text(' %s::%s "\n%s'%(cname, name, defn))
 325
 326             for n in node.childNodes:
 327                 if n not in list(first.values()):
 328                     self.parse(n)
 329             self.add_text(['";', '\n'])
 330
 331     def do_definition(self, node):
 332         data = node.firstChild.data
 333         self.add_text('%s "\n%s'%(data, data))
 334
 335     def do_sectiondef(self, node):
 336         kind = node.attributes['kind'].value
 337         if kind in ('public-func', 'func', 'user-defined', ''):
 338             self.generic_parse(node)
 339
 340     def do_header(self, node):
 341         """For a user defined section def a header field is present
 342         which should not be printed as such, so we comment it in the
 343         output."""
 344         data = node.firstChild.data
 345         self.add_text('\n/*\n %s \n*/\n'%data)
 346         # If our immediate sibling is a 'description' node then we
 347         # should comment that out also and remove it from the parent
 348         # node's children.
 349         parent = node.parentNode
 350         idx = parent.childNodes.index(node)
 351         if len(parent.childNodes) >= idx + 2:
 352             nd = parent.childNodes[idx+2]
 353             if nd.nodeName == 'description':
 354                 nd = parent.removeChild(nd)
 355                 self.add_text('\n/*')
 356                 self.generic_parse(nd)
 357                 self.add_text('\n*/\n')
 358
 359     def do_simplesect(self, node):
 360         kind = node.attributes['kind'].value
 361         if kind in ('date', 'rcs', 'version'):
 362             pass
 363         elif kind == 'warning':
 364             self.add_text(['\n', 'WARNING: '])
 365             self.generic_parse(node)
 366         elif kind == 'see':
 367             self.add_text('\n')
 368             self.add_text('See: ')
 369             self.generic_parse(node)
 370         else:
 371             self.generic_parse(node)
 372
 373     def do_argsstring(self, node):
 374         self.generic_parse(node, pad=1)
 375
 376     def do_member(self, node):
 377         kind = node.attributes['kind'].value
 378         refid = node.attributes['refid'].value
 379         if kind == 'function' and refid[:9] == 'namespace':
 380             self.generic_parse(node)
 381
 382     def do_doxygenindex(self, node):
 383         self.multi = 1
 384         comps = node.getElementsByTagName('compound')
 385         for c in comps:
 386             refid = c.attributes['refid'].value
 387             fname = refid + '.xml'
 388             if not os.path.exists(fname):
 389                 fname = os.path.join(self.my_dir,  fname)
 390             if not self.quiet:
 391                 print("parsing file: %s"%fname)
 392             p = Doxy2SWIG(fname, self.include_function_definition, self.quiet)
 393             p.generate()
 394             self.pieces.extend(self.clean_pieces(p.pieces))
 395
 396     def write(self, fname):
 397         o = my_open_write(fname)
 398         if self.multi:
 399             o.write("".join(self.pieces))
 400         else:
 401             o.write("".join(self.clean_pieces(self.pieces)))
 402         o.close()
 403
 404     def clean_pieces(self, pieces):
 405         """Cleans the list of strings given as `pieces`.  It replaces
 406         multiple newlines by a maximum of 2 and returns a new list.
 407         It also wraps the paragraphs nicely.
 408
 409         """
 410         ret = []
 411         count = 0
 412         for i in pieces:
 413             if i == '\n':
 414                 count = count + 1
 415             else:
 416                 if i == '";':
 417                     if count:
 418                         ret.append('\n')
 419                 elif count > 2:
 420                     ret.append('\n\n')
 421                 elif count:
 422                     ret.append('\n'*count)
 423                 count = 0
 424                 ret.append(i)
 425
 426         _data = "".join(ret)
 427         ret = []
 428         for i in _data.split('\n\n'):
 429             if i == 'Parameters:' or i == 'Exceptions:':
 430                 ret.extend([i, '\n-----------', '\n\n'])
 431             elif i.find('// File:') > -1: # leave comments alone.
 432                 ret.extend([i, '\n'])
 433             else:
 434                 _tmp = textwrap.fill(i.strip(), break_long_words=False)
 435                 _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
 436                 ret.extend([_tmp, '\n\n'])
 437         return ret
 438
 439
 440 def convert(input, output, include_function_definition=True, quiet=False):
 441     p = Doxy2SWIG(input, include_function_definition, quiet)
 442     p.generate()
 443     p.write(output)
 444
 445 def main():
 446     parser = argparse.ArgumentParser(description=__doc__, usage = __usage__)
 447     parser.add_argument("-n", '--no-function-definition',
 448                         action='store_true',
 449                         default=False,
 450                         dest='func_def',
 451                         help='do not include doxygen function definitions')
 452     parser.add_argument("-q", '--quiet',
 453                         action='store_true',
 454                         default=False,
 455                         dest='quiet',
 456                         help='be quiet and minimise output')
 457     parser.add_argument('input')
 458     parser.add_argument('ouput')
 459
 460     args = parser.parse_args()
 461
 462     convert(args.input, args.output, not options.func_def, options.quiet)
 463
 464
 465 if __name__ == '__main__':
 466     main()