From 09669178d1c257215276c0092186a82126430c03 Mon Sep 17 00:00:00 2001
From: Serge Rehbinder <serge.rehbinder@cea.fr>
Date: Thu, 25 Feb 2016 16:19:37 +0100
Subject: [PATCH] Add the patch command

---
 commands/patch.py                  |  168 ++++
 commands/source.py                 |   10 +-
 data/modules/MODULE_ARCHIVE.pyconf |    1 +
 src/patching.py                    | 1154 ++++++++++++++++++++++++++++
 4 files changed, 1327 insertions(+), 6 deletions(-)
 create mode 100644 commands/patch.py
 create mode 100755 src/patching.py
diff --git a/commands/patch.py b/commands/patch.py
new file mode 100644
index 0000000..a1eae4d
--- /dev/null
+++ b/commands/patch.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+#  Copyright (C) 2010-2012  CEA/DEN
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2.1 of the License.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+
+import os
+import subprocess
+
+import src
+
+# Define all possible option for log command :  sat log <options>
+parser = src.options.Options()
+parser.add_option('m', 'module', 'list2', 'modules',
+    _('modules to get the sources. This option can be'
+    ' passed several time to get the sources of several modules.'))
+parser.add_option('', 'no_sample', 'boolean', 'no_sample', 
+    _("do not get sources from sample modules."))
+
+def apply_patch(config, module_info, logger):
+    if not "patches" in module_info or len(module_info.patches) == 0:
+        msg = _("No patch for the %s module") % module_info.name
+        logger.write(msg, 3)
+        logger.write("\n", 1)
+        return True, ""
+
+    if not os.path.exists(module_info.source_dir):
+        msg = _("No sources found for the %s module\n") % module_info.name
+        logger.write(src.printcolors.printcWarning(msg), 1)
+        return False, ""
+
+    retcode = []
+    res = []
+    for patch in module_info.patches:
+        details = []
+
+        if os.path.isfile(patch) and patch.endswith(".patch"):
+            #patch_exe = "patch" # old patch command (now replace by patch.py)
+            patch_exe = os.path.join(config.VARS.srcDir, "patching.py")
+            patch_cmd = "python %s -p1 -- < %s" % (patch_exe, patch)
+
+            logger.write(("    >%s\n" % patch_cmd),5)
+            res_cmd = (subprocess.call(patch_cmd, 
+                                   shell=True, 
+                                   cwd=module_info.source_dir,
+                                   stdout=logger.logTxtFile, 
+                                   stderr=subprocess.STDOUT) == 0)        
+        else:
+            res_cmd = False
+            details.append("  " + 
+                src.printcolors.printcError(_("Not a valid patch: %s") % patch))
+
+        res.append(res_cmd)
+        
+        if res_cmd:
+            message = (_("Apply patch %s") % 
+                       src.printcolors.printcHighlight(patch))
+        else:
+            message = src.printcolors.printcWarning(
+                                        _("Failed to apply patch %s") % patch)
+
+        if config.USER.output_level >= 3:
+            retcode.append("  %s" % message)
+        else:
+            retcode.append("%s: %s" % (module_info.name, message))
+        
+        if len(details) > 0:
+            retcode.extend(details)
+
+    res = not (False in res)
+    
+    return res, "\n".join(retcode) + "\n"
+
+def description():
+    '''method that is called when salomeTools is called with --help option.
+    
+    :return: The text to display for the patch command description.
+    :rtype: str
+    '''
+    return _("The patch command apply the patches on the sources of "
+             "the application modules if there is any")
+  
+def run(args, runner, logger):
+    '''method that is called when salomeTools is called with patch parameter.
+    '''
+    # Parse the options
+    (options, args) = parser.parse_args(args)
+    
+    # check that the command has been called with an application
+    src.check_config_has_application( runner.cfg )
+
+    # Print some informations
+    logger.write('Patching sources of the application %s\n' % 
+                src.printcolors.printcLabel(runner.cfg.VARS.application), 1)
+
+    src.printcolors.print_value(logger, 'out_dir', 
+                                runner.cfg.APPLICATION.out_dir, 2)
+    logger.write("\n", 2, False)
+
+    # Get the modules to be prepared, regarding the options
+    if options.modules is None:
+        # No options, get all modules sources
+        modules = runner.cfg.APPLICATION.modules
+    else:
+        # if option --modules, check that all modules of the command line
+        # are present in the application.
+        modules = options.modules
+        for m in modules:
+            if m not in runner.cfg.APPLICATION.modules:
+                raise src.SatException(_("Module %(module)s "
+                            "not defined in appplication %(application)s") %
+                { 'module': m, 'application': runner.cfg.VARS.application} )
+    
+    # Construct the list of tuple containing 
+    # the modules name and their definition
+    modules_infos = src.module.get_modules_infos(modules, runner.cfg)
+
+    # if the --no_sample option is invoked, suppress the sample modules from 
+    # the list
+    if options.no_sample:
+        modules_infos = filter(lambda l: not src.module.module_is_sample(l[1]),
+                         modules_infos)
+    
+    # Get the maximum name length in order to format the terminal display
+    max_module_name_len = 1
+    if len(modules_infos) > 0:
+        max_module_name_len = max(map(lambda l: len(l), modules_infos[0])) + 4
+    
+    # The loop on all the modules on which to apply the patches
+    good_result = 0
+    for module_name, module_info in modules_infos:
+        # display and log
+        logger.write('%s: ' % src.printcolors.printcLabel(module_name), 3)
+        logger.write(' ' * (max_module_name_len - len(module_name)), 3, False)
+        logger.write("\n", 4, False)
+        return_code, patch_res = apply_patch(runner.cfg, module_info, logger)
+        logger.write(patch_res, 1, False)
+        if return_code:
+            good_result += 1
+    
+    # Display the results (how much passed, how much failed, etc...)
+
+    logger.write("\n", 2, False)
+    if good_result == len(modules_infos):
+        status = src.OK_STATUS
+        res_count = "%d / %d" % (good_result, good_result)
+    else:
+        status = src.KO_STATUS
+        res_count = "%d / %d" % (good_result, len(modules))
+    
+    # write results
+    logger.write("Patching sources of the application:", 1)
+    logger.write(" " + src.printcolors.printc(status), 1, False)
+    logger.write(" (%s)\n" % res_count, 1, False)
+    
+    return len(modules_infos) - good_result
\ No newline at end of file
diff --git a/commands/source.py b/commands/source.py
index e64ea26..a3b68ec 100644
--- a/commands/source.py
+++ b/commands/source.py
@@ -359,11 +359,9 @@ def get_all_module_sources(config, modules, force, logger):
         max_module_name_len = max(map(lambda l: len(l), modules[0])) + 4
     
     # The loop on all the modules from which to get the sources
-    for module in modules:
+    for module_name, module_info in modules:
         # get module name, module informations and the directory where to put
         # the sources
-        module_name = module[0]
-        module_info = module[1]
         source_dir = src.Path(module_info.source_dir)
 
         # display and log
@@ -437,7 +435,7 @@ def run(args, runner, logger):
     src.check_config_has_application( runner.cfg )
 
     # Print some informations
-    logger.write(_('Preparing sources of application %s\n') % 
+    logger.write(_('Preparing sources of the application %s\n') % 
                 src.printcolors.printcLabel(runner.cfg.VARS.application), 1)
     src.printcolors.print_value(logger, 'out_dir', 
                                 runner.cfg.APPLICATION.out_dir, 2)
@@ -468,7 +466,7 @@ def run(args, runner, logger):
     # the modules name and their definition
     modules_infos = src.module.get_modules_infos(modules, runner.cfg)
 
-    # if the --no_sample option is invoked, supress the sample modules from 
+    # if the --no_sample option is invoked, suppress the sample modules from 
     # the list
     if options.no_sample:
         modules_infos = filter(lambda l: not src.module.module_is_sample(l[1]),
@@ -498,7 +496,7 @@ def run(args, runner, logger):
     result = len(modules) - good_result
 
     # write results
-    logger.write(_("Getting sources of application:"), 1)
+    logger.write(_("Getting sources of the application:"), 1)
     logger.write(" " + src.printcolors.printc(status), 1, False)
     logger.write(" (%s)\n" % res_count, 1, False)
 
diff --git a/data/modules/MODULE_ARCHIVE.pyconf b/data/modules/MODULE_ARCHIVE.pyconf
index 0c88156..77ef5d0 100644
--- a/data/modules/MODULE_ARCHIVE.pyconf
+++ b/data/modules/MODULE_ARCHIVE.pyconf
@@ -28,5 +28,6 @@ MODULES :
 	    module_type : "sample"
 	    source_dir : $APPLICATION.out_dir + $VARS.sep + 'SOURCES' + $VARS.sep + $name
 	    build_dir : $APPLICATION.out_dir + $VARS.sep + 'BUILD' + $VARS.sep + $name
+	    patches : ['/home/salome/salomeTools-4.3.7/data/compil_scripts/patches/scipy.patch', '/export/home/serioja/MODULE_ARCHIVE.patch']
 	}
 }
diff --git a/src/patching.py b/src/patching.py
new file mode 100755
index 0000000..21c1a69
--- /dev/null
+++ b/src/patching.py
@@ -0,0 +1,1154 @@
+#!/usr/bin/env python
+""" Patch utility to apply unified diffs
+
+    Brute-force line-by-line non-recursive parsing 
+
+    Copyright (c) 2008-2014 anatoly techtonik
+    Available under the terms of MIT license
+
+    Project home: http://code.google.com/p/python-patch/
+
+
+    $Id$
+    $HeadURL$
+"""
+
+__author__ = "anatoly techtonik <techtonik@gmail.com>"
+__version__ = "1.14dev"
+
+import copy
+import logging
+import re
+# cStringIO doesn't support unicode in 2.5
+from StringIO import StringIO
+import urllib2
+
+from os.path import exists, isfile, abspath
+import os
+import posixpath
+import shutil
+
+
+#------------------------------------------------
+# Logging is controlled by logger named after the
+# module name (e.g. 'patch' for patch.py module)
+
+debugmode = False
+
+logger = logging.getLogger(__name__)
+
+debug = logger.debug
+info = logger.info
+warning = logger.warning
+
+class NullHandler(logging.Handler):
+  """ Copied from Python 2.7 to avoid getting
+      `No handlers could be found for logger "patch"`
+      http://bugs.python.org/issue16539
+  """
+  def handle(self, record):
+    pass
+  def emit(self, record):
+    pass
+  def createLock(self):
+    self.lock = None
+
+logger.addHandler(NullHandler())
+
+#------------------------------------------------
+# Constants for Patch/PatchSet types
+
+DIFF = PLAIN = "plain"
+GIT = "git"
+HG = MERCURIAL = "mercurial"
+SVN = SUBVERSION = "svn"
+# mixed type is only actual when PatchSet contains
+# Patches of different type
+MIXED = MIXED = "mixed"
+
+
+#------------------------------------------------
+# Helpers (these could come with Python stdlib)
+
+# x...() function are used to work with paths in
+# cross-platform manner - all paths use forward
+# slashes even on Windows.
+
+def xisabs(filename):
+  """ Cross-platform version of `os.path.isabs()`
+      Returns True if `filename` is absolute on
+      Linux, OS X or Windows.
+  """
+  if filename.startswith('/'):     # Linux/Unix
+    return True
+  elif filename.startswith('\\'):  # Windows
+    return True
+  elif re.match(r'\w:[\\/]', filename): # Windows
+    return True
+  return False
+
+def xnormpath(path):
+  """ Cross-platform version of os.path.normpath """
+  # replace escapes and Windows slashes
+  normalized = posixpath.normpath(path).replace('\\', '/')
+  # fold the result
+  return posixpath.normpath(normalized)
+
+def xstrip(filename):
+  """ Make relative path out of absolute by stripping
+      prefixes used on Linux, OS X and Windows.
+
+      This function is critical for security.
+  """
+  while xisabs(filename):
+    # strip windows drive with all slashes
+    if re.match(r'\w:[\\/]', filename):
+      filename = re.sub(r'^\w+:[\\/]+', '', filename)
+    # strip all slashes
+    elif re.match(r'[\\/]', filename):
+      filename = re.sub(r'^[\\/]+', '', filename)
+  return filename
+
+#-----------------------------------------------
+# Main API functions
+
+def fromfile(filename):
+  """ Parse patch file. If successful, returns
+      PatchSet() object. Otherwise returns False.
+  """
+  patchset = PatchSet()
+  debug("reading %s" % filename)
+  fp = open(filename, "rb")
+  res = patchset.parse(fp)
+  fp.close()
+  if res == True:
+    return patchset
+  return False
+
+
+def fromstring(s):
+  """ Parse text string and return PatchSet()
+      object (or False if parsing fails)
+  """
+  ps = PatchSet( StringIO(s) )
+  if ps.errors == 0:
+    return ps
+  return False
+
+
+def fromurl(url):
+  """ Parse patch from an URL, return False
+      if an error occured. Note that this also
+      can throw urlopen() exceptions.
+  """
+  ps = PatchSet( urllib2.urlopen(url) )
+  if ps.errors == 0:
+    return ps
+  return False
+
+
+# --- Utility functions ---
+# [ ] reuse more universal pathsplit()
+def pathstrip(path, n):
+  """ Strip n leading components from the given path """
+  pathlist = [path]
+  while os.path.dirname(pathlist[0]) != '':
+    pathlist[0:1] = os.path.split(pathlist[0])
+  return '/'.join(pathlist[n:])
+# --- /Utility function ---
+
+
+class Hunk(object):
+  """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
+
+  def __init__(self):
+    self.startsrc=None #: line count starts with 1
+    self.linessrc=None
+    self.starttgt=None
+    self.linestgt=None
+    self.invalid=False
+    self.desc=''
+    self.text=[]
+
+#  def apply(self, estream):
+#    """ write hunk data into enumerable stream
+#        return strings one by one until hunk is
+#        over
+#
+#        enumerable stream are tuples (lineno, line)
+#        where lineno starts with 0
+#    """
+#    pass
+
+
+class Patch(object):
+  """ Patch for a single file.
+      If used as an iterable, returns hunks.
+  """
+  def __init__(self):
+    self.source = None 
+    self.target = None
+    self.hunks = []
+    self.hunkends = []
+    self.header = []
+
+    self.type = None
+
+  def __iter__(self):
+    for h in self.hunks:
+      yield h
+
+
+class PatchSet(object):
+  """ PatchSet is a patch parser and container.
+      When used as an iterable, returns patches.
+  """
+
+  def __init__(self, stream=None):
+    # --- API accessible fields ---
+
+    # name of the PatchSet (filename or ...)
+    self.name = None
+    # patch set type - one of constants
+    self.type = None
+
+    # list of Patch objects
+    self.items = []
+
+    self.errors = 0    # fatal parsing errors
+    self.warnings = 0  # non-critical warnings
+    # --- /API ---
+
+    if stream:
+      self.parse(stream)
+
+  def __len__(self):
+    return len(self.items)
+
+  def __iter__(self):
+    for i in self.items:
+      yield i
+
+  def parse(self, stream):
+    """ parse unified diff
+        return True on success
+    """
+    lineends = dict(lf=0, crlf=0, cr=0)
+    nexthunkno = 0    #: even if index starts with 0 user messages number hunks from 1
+
+    p = None
+    hunk = None
+    # hunkactual variable is used to calculate hunk lines for comparison
+    hunkactual = dict(linessrc=None, linestgt=None)
+
+
+    class wrapumerate(enumerate):
+      """Enumerate wrapper that uses boolean end of stream status instead of
+      StopIteration exception, and properties to access line information.
+      """
+
+      def __init__(self, *args, **kwargs):
+        # we don't call parent, it is magically created by __new__ method
+
+        self._exhausted = False
+        self._lineno = False     # after end of stream equal to the num of lines
+        self._line = False       # will be reset to False after end of stream
+
+      def next(self):
+        """Try to read the next line and return True if it is available,
+           False if end of stream is reached."""
+        if self._exhausted:
+          return False
+
+        try:
+          self._lineno, self._line = super(wrapumerate, self).next()
+        except StopIteration:
+          self._exhausted = True
+          self._line = False
+          return False
+        return True
+
+      @property
+      def is_empty(self):
+        return self._exhausted
+
+      @property
+      def line(self):
+        return self._line
+
+      @property
+      def lineno(self):
+        return self._lineno
+
+    # define states (possible file regions) that direct parse flow
+    headscan  = True  # start with scanning header
+    filenames = False # lines starting with --- and +++
+
+    hunkhead = False  # @@ -R +R @@ sequence
+    hunkbody = False  #
+    hunkskip = False  # skipping invalid hunk mode
+
+    hunkparsed = False # state after successfully parsed hunk
+
+    # regexp to match start of hunk, used groups - 1,3,4,6
+    re_hunk_start = re.compile("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@")
+    
+    self.errors = 0
+    # temp buffers for header and filenames info
+    header = []
+    srcname = None
+    tgtname = None
+
+    # start of main cycle
+    # each parsing block already has line available in fe.line
+    fe = wrapumerate(stream)
+    while fe.next():
+
+      # -- deciders: these only switch state to decide who should process
+      # --           line fetched at the start of this cycle
+      if hunkparsed:
+        hunkparsed = False
+        if re_hunk_start.match(fe.line):
+            hunkhead = True
+        elif fe.line.startswith("--- "):
+            filenames = True
+        else:
+            headscan = True
+      # -- ------------------------------------
+
+      # read out header
+      if headscan:
+        while not fe.is_empty and not fe.line.startswith("--- "):
+            header.append(fe.line)
+            fe.next()
+        if fe.is_empty:
+            if p == None:
+              debug("no patch data found")  # error is shown later
+              self.errors += 1
+            else:
+              info("%d unparsed bytes left at the end of stream" % len(''.join(header)))
+              self.warnings += 1
+              # TODO check for \No new line at the end.. 
+              # TODO test for unparsed bytes
+              # otherwise error += 1
+            # this is actually a loop exit
+            continue
+
+        headscan = False
+        # switch to filenames state
+        filenames = True
+
+      line = fe.line
+      lineno = fe.lineno
+
+
+      # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
+      if hunkbody:
+        # [x] treat empty lines inside hunks as containing single space
+        #     (this happens when diff is saved by copy/pasting to editor
+        #      that strips trailing whitespace)
+        if line.strip("\r\n") == "":
+            debug("expanding empty line in a middle of hunk body")
+            self.warnings += 1
+            line = ' ' + line
+
+        # process line first
+        if re.match(r"^[- \+\\]", line):
+            # gather stats about line endings
+            if line.endswith("\r\n"):
+              p.hunkends["crlf"] += 1
+            elif line.endswith("\n"):
+              p.hunkends["lf"] += 1
+            elif line.endswith("\r"):
+              p.hunkends["cr"] += 1
+              
+            if line.startswith("-"):
+              hunkactual["linessrc"] += 1
+            elif line.startswith("+"):
+              hunkactual["linestgt"] += 1
+            elif not line.startswith("\\"):
+              hunkactual["linessrc"] += 1
+              hunkactual["linestgt"] += 1
+            hunk.text.append(line)
+            # todo: handle \ No newline cases
+        else:
+            warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, p.target))
+            # add hunk status node
+            hunk.invalid = True
+            p.hunks.append(hunk)
+            self.errors += 1
+            # switch to hunkskip state
+            hunkbody = False
+            hunkskip = True
+
+        # check exit conditions
+        if hunkactual["linessrc"] > hunk.linessrc or hunkactual["linestgt"] > hunk.linestgt:
+            warning("extra lines for hunk no.%d at %d for target %s" % (nexthunkno, lineno+1, p.target))
+            # add hunk status node
+            hunk.invalid = True
+            p.hunks.append(hunk)
+            self.errors += 1
+            # switch to hunkskip state
+            hunkbody = False
+            hunkskip = True
+        elif hunk.linessrc == hunkactual["linessrc"] and hunk.linestgt == hunkactual["linestgt"]:
+            # hunk parsed successfully
+            p.hunks.append(hunk)
+            # switch to hunkparsed state
+            hunkbody = False
+            hunkparsed = True
+
+            # detect mixed window/unix line ends
+            ends = p.hunkends
+            if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
+              warning("inconsistent line ends in patch hunks for %s" % p.source)
+              self.warnings += 1
+            if debugmode:
+              debuglines = dict(ends)
+              debuglines.update(file=p.target, hunk=nexthunkno)
+              debug("crlf: %(crlf)d  lf: %(lf)d  cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
+            # fetch next line
+            continue
+
+      if hunkskip:
+        if re_hunk_start.match(line):
+          # switch to hunkhead state
+          hunkskip = False
+          hunkhead = True
+        elif line.startswith("--- "):
+          # switch to filenames state
+          hunkskip = False
+          filenames = True
+          if debugmode and len(self.items) > 0:
+            debug("- %2d hunks for %s" % (len(p.hunks), p.source))
+
+      if filenames:
+        if line.startswith("--- "):
+          if srcname != None:
+            # XXX testcase
+            warning("skipping false patch for %s" % srcname)
+            srcname = None
+            # XXX header += srcname
+            # double source filename line is encountered
+            # attempt to restart from this second line
+          re_filename = "^--- ([^\t]+)"
+          match = re.match(re_filename, line)
+          # todo: support spaces in filenames
+          if match:
+            srcname = match.group(1).strip()
+          else:
+            warning("skipping invalid filename at line %d" % lineno)
+            self.errors += 1
+            # XXX p.header += line
+            # switch back to headscan state
+            filenames = False
+            headscan = True
+        elif not line.startswith("+++ "):
+          if srcname != None:
+            warning("skipping invalid patch with no target for %s" % srcname)
+            self.errors += 1
+            srcname = None
+            # XXX header += srcname
+            # XXX header += line
+          else:
+            # this should be unreachable
+            warning("skipping invalid target patch")
+          filenames = False
+          headscan = True
+        else:
+          if tgtname != None:
+            # XXX seems to be a dead branch  
+            warning("skipping invalid patch - double target at line %d" % lineno)
+            self.errors += 1
+            srcname = None
+            tgtname = None
+            # XXX header += srcname
+            # XXX header += tgtname
+            # XXX header += line
+            # double target filename line is encountered
+            # switch back to headscan state
+            filenames = False
+            headscan = True
+          else:
+            re_filename = "^\+\+\+ ([^\t]+)"
+            match = re.match(re_filename, line)
+            if not match:
+              warning("skipping invalid patch - no target filename at line %d" % lineno)
+              self.errors += 1
+              srcname = None
+              # switch back to headscan state
+              filenames = False
+              headscan = True
+            else:
+              if p: # for the first run p is None
+                self.items.append(p)
+              p = Patch()
+              p.source = srcname
+              srcname = None
+              p.target = match.group(1).strip()
+              p.header = header
+              header = []
+              # switch to hunkhead state
+              filenames = False
+              hunkhead = True
+              nexthunkno = 0
+              p.hunkends = lineends.copy()
+              continue
+
+      if hunkhead:
+        match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@(.*)", line)
+        if not match:
+          if not p.hunks:
+            warning("skipping invalid patch with no hunks for file %s" % p.source)
+            self.errors += 1
+            # XXX review switch
+            # switch to headscan state
+            hunkhead = False
+            headscan = True
+            continue
+          else:
+            # TODO review condition case
+            # switch to headscan state
+            hunkhead = False
+            headscan = True
+        else:
+          hunk = Hunk()
+          hunk.startsrc = int(match.group(1))
+          hunk.linessrc = 1
+          if match.group(3): hunk.linessrc = int(match.group(3))
+          hunk.starttgt = int(match.group(4))
+          hunk.linestgt = 1
+          if match.group(6): hunk.linestgt = int(match.group(6))
+          hunk.invalid = False
+          hunk.desc = match.group(7)[1:].rstrip()
+          hunk.text = []
+
+          hunkactual["linessrc"] = hunkactual["linestgt"] = 0
+
+          # switch to hunkbody state
+          hunkhead = False
+          hunkbody = True
+          nexthunkno += 1
+          continue
+
+    # /while fe.next()
+
+    if p:
+      self.items.append(p)
+
+    if not hunkparsed:
+      if hunkskip:
+        warning("warning: finished with errors, some hunks may be invalid")
+      elif headscan:
+        if len(self.items) == 0:
+          warning("error: no patch data found!")
+          return False
+        else: # extra data at the end of file
+          pass 
+      else:
+        warning("error: patch stream is incomplete!")
+        self.errors += 1
+        if len(self.items) == 0:
+          return False
+
+    if debugmode and len(self.items) > 0:
+        debug("- %2d hunks for %s" % (len(p.hunks), p.source))
+
+    # XXX fix total hunks calculation
+    debug("total files: %d  total hunks: %d" % (len(self.items),
+        sum(len(p.hunks) for p in self.items)))
+
+    # ---- detect patch and patchset types ----
+    for idx, p in enumerate(self.items):
+      self.items[idx].type = self._detect_type(p)
+
+    types = set([p.type for p in self.items])
+    if len(types) > 1:
+      self.type = MIXED
+    else:
+      self.type = types.pop()
+    # --------
+
+    self._normalize_filenames()
+    
+    return (self.errors == 0)
+
+  def _detect_type(self, p):
+    """ detect and return type for the specified Patch object
+        analyzes header and filenames info
+
+        NOTE: must be run before filenames are normalized
+    """
+
+    # check for SVN
+    #  - header starts with Index:
+    #  - next line is ===... delimiter
+    #  - filename is followed by revision number
+    # TODO add SVN revision
+    if (len(p.header) > 1 and p.header[-2].startswith("Index: ")
+          and p.header[-1].startswith("="*67)):
+        return SVN
+
+    # common checks for both HG and GIT
+    DVCS = ((p.source.startswith('a/') or p.source == '/dev/null')
+        and (p.target.startswith('b/') or p.target == '/dev/null'))
+
+    # GIT type check
+    #  - header[-2] is like "diff --git a/oldname b/newname"
+    #  - header[-1] is like "index <hash>..<hash> <mode>"
+    # TODO add git rename diffs and add/remove diffs
+    #      add git diff with spaced filename
+    # TODO http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
+
+    # detect the start of diff header - there might be some comments before
+    if len(p.header) > 1:
+      for idx in reversed(range(len(p.header))):
+        if p.header[idx].startswith("diff --git"):
+          break
+      if re.match(r'diff --git a/[\w/.]+ b/[\w/.]+', p.header[idx]):
+        if (idx+1 < len(p.header)
+            and re.match(r'index \w{7}..\w{7} \d{6}', p.header[idx+1])):
+          if DVCS:
+            return GIT
+
+    # HG check
+    # 
+    #  - for plain HG format header is like "diff -r b2d9961ff1f5 filename"
+    #  - for Git-style HG patches it is "diff --git a/oldname b/newname"
+    #  - filename starts with a/, b/ or is equal to /dev/null
+    #  - exported changesets also contain the header
+    #    # HG changeset patch
+    #    # User name@example.com
+    #    ...   
+    # TODO add MQ
+    # TODO add revision info
+    if len(p.header) > 0:
+      if DVCS and re.match(r'diff -r \w{12} .*', p.header[-1]):
+        return HG
+      if DVCS and p.header[-1].startswith('diff --git a/'):
+        if len(p.header) == 1:  # native Git patch header len is 2
+          return HG
+        elif p.header[0].startswith('# HG changeset patch'):
+          return HG
+
+    return PLAIN
+
+
+  def _normalize_filenames(self):
+    """ sanitize filenames, normalizing paths, i.e.:
+        1. strip a/ and b/ prefixes from GIT and HG style patches
+        2. remove all references to parent directories (with warning)
+        3. translate any absolute paths to relative (with warning)
+
+        [x] always use forward slashes to be crossplatform
+            (diff/patch were born as a unix utility after all)
+        
+        return None
+    """
+    for i,p in enumerate(self.items):
+      if p.type in (HG, GIT):
+        # TODO: figure out how to deal with /dev/null entries
+        debug("stripping a/ and b/ prefixes")
+        if p.source != '/dev/null':
+          if not p.source.startswith("a/"):
+            warning("invalid source filename")
+          else:
+            p.source = p.source[2:]
+        if p.target != '/dev/null':
+          if not p.target.startswith("b/"):
+            warning("invalid target filename")
+          else:
+            p.target = p.target[2:]
+
+      p.source = xnormpath(p.source)
+      p.target = xnormpath(p.target)
+
+      sep = '/'  # sep value can be hardcoded, but it looks nice this way
+
+      # references to parent are not allowed
+      if p.source.startswith(".." + sep):
+        warning("error: stripping parent path for source file patch no.%d" % (i+1))
+        self.warnings += 1
+        while p.source.startswith(".." + sep):
+          p.source = p.source.partition(sep)[2]
+      if p.target.startswith(".." + sep):
+        warning("error: stripping parent path for target file patch no.%d" % (i+1))
+        self.warnings += 1
+        while p.target.startswith(".." + sep):
+          p.target = p.target.partition(sep)[2]
+      # absolute paths are not allowed
+      if xisabs(p.source) or xisabs(p.target):
+        warning("error: absolute paths are not allowed - file no.%d" % (i+1))
+        self.warnings += 1
+        if xisabs(p.source):
+          warning("stripping absolute path from source name '%s'" % p.source)
+          p.source = xstrip(p.source)
+        if xisabs(p.target):
+          warning("stripping absolute path from target name '%s'" % p.target)
+          p.target = xstrip(p.target)
+    
+      self.items[i].source = p.source
+      self.items[i].target = p.target
+
+
+  def diffstat(self):
+    """ calculate diffstat and return as a string
+        Notes:
+          - original diffstat ouputs target filename
+          - single + or - shouldn't escape histogram
+    """
+    names = []
+    insert = []
+    delete = []
+    delta = 0    # size change in bytes
+    namelen = 0
+    maxdiff = 0  # max number of changes for single file
+                 # (for histogram width calculation)
+    for patch in self.items:
+      i,d = 0,0
+      for hunk in patch.hunks:
+        for line in hunk.text:
+          if line.startswith('+'):
+            i += 1
+            delta += len(line)-1
+          elif line.startswith('-'):
+            d += 1
+            delta -= len(line)-1
+      names.append(patch.target)
+      insert.append(i)
+      delete.append(d)
+      namelen = max(namelen, len(patch.target))
+      maxdiff = max(maxdiff, i+d)
+    output = ''
+    statlen = len(str(maxdiff))  # stats column width
+    for i,n in enumerate(names):
+      # %-19s | %-4d %s
+      format = " %-" + str(namelen) + "s | %" + str(statlen) + "s %s\n"
+
+      hist = ''
+      # -- calculating histogram --
+      width = len(format % ('', '', ''))
+      histwidth = max(2, 80 - width)
+      if maxdiff < histwidth:
+        hist = "+"*insert[i] + "-"*delete[i]
+      else:
+        iratio = (float(insert[i]) / maxdiff) * histwidth
+        dratio = (float(delete[i]) / maxdiff) * histwidth
+
+        # make sure every entry gets at least one + or -
+        iwidth = 1 if 0 < iratio < 1 else int(iratio)
+        dwidth = 1 if 0 < dratio < 1 else int(dratio)
+        #print iratio, dratio, iwidth, dwidth, histwidth
+        hist = "+"*int(iwidth) + "-"*int(dwidth)
+      # -- /calculating +- histogram --
+      output += (format % (names[i], insert[i] + delete[i], hist))
+ 
+    output += (" %d files changed, %d insertions(+), %d deletions(-), %+d bytes"
+               % (len(names), sum(insert), sum(delete), delta))
+    return output
+
+
+  def findfile(self, old, new):
+    """ return name of file to be patched or None """
+    if exists(old):
+      return old
+    elif exists(new):
+      return new
+    else:
+      # [w] Google Code generates broken patches with its online editor
+      debug("broken patch from Google Code, stripping prefixes..")
+      if old.startswith('a/') and new.startswith('b/'):
+        old, new = old[2:], new[2:]
+        debug("   %s" % old)
+        debug("   %s" % new)
+        if exists(old):
+          return old
+        elif exists(new):
+          return new
+      return None
+
+
+  def apply(self, strip=0, root=None):
+    """ Apply parsed patch, optionally stripping leading components
+        from file paths. `root` parameter specifies working dir.
+        return True on success
+    """
+    if root:
+      prevdir = os.getcwd()
+      os.chdir(root)
+
+    total = len(self.items)
+    errors = 0
+    if strip:
+      # [ ] test strip level exceeds nesting level
+      #   [ ] test the same only for selected files
+      #     [ ] test if files end up being on the same level
+      try:
+        strip = int(strip)
+      except ValueError:
+        errors += 1
+        warning("error: strip parameter '%s' must be an integer" % strip)
+        strip = 0
+
+    #for fileno, filename in enumerate(self.source):
+    for i,p in enumerate(self.items):
+      if strip:
+        debug("stripping %s leading component(s) from:" % strip)
+        debug("   %s" % p.source)
+        debug("   %s" % p.target)
+        old = pathstrip(p.source, strip)
+        new = pathstrip(p.target, strip)
+      else:
+        old, new = p.source, p.target
+
+      filename = self.findfile(old, new)
+
+      if not filename:
+          warning("source/target file does not exist:\n  --- %s\n  +++ %s" % (old, new))
+          errors += 1
+          continue
+      if not isfile(filename):
+        warning("not a file - %s" % filename)
+        errors += 1
+        continue
+
+      # [ ] check absolute paths security here
+      debug("processing %d/%d:\t %s" % (i+1, total, filename))
+
+      # validate before patching
+      f2fp = open(filename)
+      hunkno = 0
+      hunk = p.hunks[hunkno]
+      hunkfind = []
+      hunkreplace = []
+      validhunks = 0
+      canpatch = False
+      for lineno, line in enumerate(f2fp):
+        if lineno+1 < hunk.startsrc:
+          continue
+        elif lineno+1 == hunk.startsrc:
+          hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
+          hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
+          #pprint(hunkreplace)
+          hunklineno = 0
+
+          # todo \ No newline at end of file
+
+        # check hunks in source file
+        if lineno+1 < hunk.startsrc+len(hunkfind)-1:
+          if line.rstrip("\r\n") == hunkfind[hunklineno]:
+            hunklineno+=1
+          else:
+            info("file %d/%d:\t %s" % (i+1, total, filename))
+            info(" hunk no.%d doesn't match source file at line %d" % (hunkno+1, lineno))
+            info("  expected: %s" % hunkfind[hunklineno])
+            info("  actual  : %s" % line.rstrip("\r\n"))
+            # not counting this as error, because file may already be patched.
+            # check if file is already patched is done after the number of
+            # invalid hunks if found
+            # TODO: check hunks against source/target file in one pass
+            #   API - check(stream, srchunks, tgthunks)
+            #           return tuple (srcerrs, tgterrs)
+
+            # continue to check other hunks for completeness
+            hunkno += 1
+            if hunkno < len(p.hunks):
+              hunk = p.hunks[hunkno]
+              continue
+            else:
+              break
+
+        # check if processed line is the last line
+        if lineno+1 == hunk.startsrc+len(hunkfind)-1:
+          debug(" hunk no.%d for file %s  -- is ready to be patched" % (hunkno+1, filename))
+          hunkno+=1
+          validhunks+=1
+          if hunkno < len(p.hunks):
+            hunk = p.hunks[hunkno]
+          else:
+            if validhunks == len(p.hunks):
+              # patch file
+              canpatch = True
+              break
+      else:
+        if hunkno < len(p.hunks):
+          warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
+          errors += 1
+
+      f2fp.close()
+
+      if validhunks < len(p.hunks):
+        if self._match_file_hunks(filename, p.hunks):
+          warning("already patched  %s" % filename)
+        else:
+          warning("source file is different - %s" % filename)
+          errors += 1
+      if canpatch:
+        backupname = filename+".orig"
+        if exists(backupname):
+          warning("can't backup original file to %s - aborting" % backupname)
+        else:
+          import shutil
+          shutil.move(filename, backupname)
+          if self.write_hunks(backupname, filename, p.hunks):
+            info("successfully patched %d/%d:\t %s" % (i+1, total, filename))
+            os.unlink(backupname)
+          else:
+            errors += 1
+            warning("error patching file %s" % filename)
+            shutil.copy(filename, filename+".invalid")
+            warning("invalid version is saved to %s" % filename+".invalid")
+            # todo: proper rejects
+            shutil.move(backupname, filename)
+
+    if root:
+      os.chdir(prevdir)
+
+    # todo: check for premature eof
+    return (errors == 0)
+
+
+  def _reverse(self):
+    """ reverse patch direction (this doesn't touch filenames) """
+    for p in self.items:
+      for h in p.hunks:
+        h.startsrc, h.starttgt = h.starttgt, h.startsrc
+        h.linessrc, h.linestgt = h.linestgt, h.linessrc
+        for i,line in enumerate(h.text):
+          if line[0] == '+':
+            h.text[i] = '-' + line[1:]
+          elif line[0] == '-':
+            h.text[i] = '+' +line[1:]
+
+  def revert(self, strip=0, root=None):
+    """ apply patch in reverse order """
+    reverted = copy.deepcopy(self)
+    reverted._reverse()
+    return reverted.apply(strip, root)
+
+
+  def can_patch(self, filename):
+    """ Check if specified filename can be patched. Returns None if file can
+    not be found among source filenames. False if patch can not be applied
+    clearly. True otherwise.
+
+    :returns: True, False or None
+    """
+    filename = abspath(filename)
+    for p in self.items:
+      if filename == abspath(p.source):
+        return self._match_file_hunks(filename, p.hunks)
+    return None
+
+
+  def _match_file_hunks(self, filepath, hunks):
+    matched = True
+    fp = open(abspath(filepath))
+
+    class NoMatch(Exception):
+      pass
+
+    lineno = 1
+    line = fp.readline()
+    hno = None
+    try:
+      for hno, h in enumerate(hunks):
+        # skip to first line of the hunk
+        while lineno < h.starttgt:
+          if not len(line): # eof
+            debug("check failed - premature eof before hunk: %d" % (hno+1))
+            raise NoMatch
+          line = fp.readline()
+          lineno += 1
+        for hline in h.text:
+          if hline.startswith("-"):
+            continue
+          if not len(line):
+            debug("check failed - premature eof on hunk: %d" % (hno+1))
+            # todo: \ No newline at the end of file
+            raise NoMatch
+          if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
+            debug("file is not patched - failed hunk: %d" % (hno+1))
+            raise NoMatch
+          line = fp.readline()
+          lineno += 1
+
+    except NoMatch:
+      matched = False
+      # todo: display failed hunk, i.e. expected/found
+
+    fp.close()
+    return matched
+
+
+  def patch_stream(self, instream, hunks):
+    """ Generator that yields stream patched with hunks iterable
+    
+        Converts lineends in hunk lines to the best suitable format
+        autodetected from input
+    """
+
+    # todo: At the moment substituted lineends may not be the same
+    #       at the start and at the end of patching. Also issue a
+    #       warning/throw about mixed lineends (is it really needed?)
+
+    hunks = iter(hunks)
+
+    srclineno = 1
+
+    lineends = {'\n':0, '\r\n':0, '\r':0}
+    def get_line():
+      """
+      local utility function - return line from source stream
+      collecting line end statistics on the way
+      """
+      line = instream.readline()
+        # 'U' mode works only with text files
+      if line.endswith("\r\n"):
+        lineends["\r\n"] += 1
+      elif line.endswith("\n"):
+        lineends["\n"] += 1
+      elif line.endswith("\r"):
+        lineends["\r"] += 1
+      return line
+
+    for hno, h in enumerate(hunks):
+      debug("hunk %d" % (hno+1))
+      # skip to line just before hunk starts
+      while srclineno < h.startsrc:
+        yield get_line()
+        srclineno += 1
+
+      for hline in h.text:
+        # todo: check \ No newline at the end of file
+        if hline.startswith("-") or hline.startswith("\\"):
+          get_line()
+          srclineno += 1
+          continue
+        else:
+          if not hline.startswith("+"):
+            get_line()
+            srclineno += 1
+          line2write = hline[1:]
+          # detect if line ends are consistent in source file
+          if sum([bool(lineends[x]) for x in lineends]) == 1:
+            newline = [x for x in lineends if lineends[x] != 0][0]
+            yield line2write.rstrip("\r\n")+newline
+          else: # newlines are mixed
+            yield line2write
+     
+    for line in instream:
+      yield line
+
+
+  def write_hunks(self, srcname, tgtname, hunks):
+    src = open(srcname, "rb")
+    tgt = open(tgtname, "wb")
+
+    debug("processing target file %s" % tgtname)
+
+    tgt.writelines(self.patch_stream(src, hunks))
+
+    tgt.close()
+    src.close()
+    # [ ] TODO: add test for permission copy
+    shutil.copymode(srcname, tgtname)
+    return True
+
+
+  def dump(self):
+    for p in self.items:
+      for headline in p.header:
+        print headline.rstrip('\n')
+      print '--- ' + p.source
+      print '+++ ' + p.target
+      for h in p.hunks:
+        print '@@ -%s,%s +%s,%s @@' % (h.startsrc, h.linessrc, h.starttgt, h.linestgt)
+        for line in h.text:
+          print line.rstrip('\n')
+
+
+def main():
+  from optparse import OptionParser
+  from os.path import exists
+  import sys
+
+  opt = OptionParser(usage="1. %prog [options] unified.diff\n"
+                    "       2. %prog [options] http://host/patch\n"
+                    "       3. %prog [options] -- < unified.diff",
+                     version="python-patch %s" % __version__)
+  opt.add_option("-q", "--quiet", action="store_const", dest="verbosity",
+                                  const=0, help="print only warnings and errors", default=1)
+  opt.add_option("-v", "--verbose", action="store_const", dest="verbosity",
+                                  const=2, help="be verbose")
+  opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode")
+  opt.add_option("--diffstat", action="store_true", dest="diffstat",
+                                           help="print diffstat and exit")
+  opt.add_option("-d", "--directory", metavar='DIR',
+                                           help="specify root directory for applying patch")
+  opt.add_option("-p", "--strip", type="int", metavar='N', default=0,
+                                           help="strip N path components from filenames")
+  opt.add_option("--revert", action="store_true",
+                                           help="apply patch in reverse order (unpatch)")
+  (options, args) = opt.parse_args()
+
+  if not args and sys.argv[-1:] != ['--']:
+    opt.print_version()
+    opt.print_help()
+    sys.exit()
+  readstdin = (sys.argv[-1:] == ['--'] and not args)
+
+  debugmode = options.debugmode
+
+  verbosity_levels = {0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG}
+  loglevel = verbosity_levels[options.verbosity]
+  logformat = "%(message)s"
+  if debugmode:
+    loglevel = logging.DEBUG
+    logformat = "%(levelname)8s %(message)s"
+  logger.setLevel(loglevel)
+  loghandler = logging.StreamHandler()
+  loghandler.setFormatter(logging.Formatter(logformat))
+  logger.addHandler(loghandler)
+
+
+  if readstdin:
+    patch = PatchSet(sys.stdin)
+  else:
+    patchfile = args[0]
+    urltest = patchfile.split(':')[0]
+    if (':' in patchfile and urltest.isalpha()
+        and len(urltest) > 1): # one char before : is a windows drive letter
+      patch = fromurl(patchfile)
+    else:
+      if not exists(patchfile) or not isfile(patchfile):
+        sys.exit("patch file does not exist - %s" % patchfile)
+      patch = fromfile(patchfile)
+
+  if options.diffstat:
+    print patch.diffstat()
+    sys.exit(0)
+
+  #pprint(patch)
+  if options.revert:
+    patch.revert(options.strip, root=options.directory) or sys.exit(-1)
+  else:
+    patch.apply(options.strip, root=options.directory) or sys.exit(-1)
+
+  # todo: document and test line ends handling logic - patch.py detects proper line-endings
+  #       for inserted hunks and issues a warning if patched file has incosistent line ends
+
+
+if __name__ == "__main__":
+  main()
+
+# Legend:
+# [ ]  - some thing to be done
+# [w]  - official wart, external or internal that is unlikely to be fixed
+
+# [ ] API break (2.x) wishlist
+# PatchSet.items  -->  PatchSet.patches
+
+# [ ] run --revert test for all dataset items
+# [ ] run .parse() / .dump() test for dataset
+
-- 
2.39.2