From: Ovidiu Mircescu <ovidiu.mircescu@edf.fr>
Date: Tue, 2 Jul 2019 15:58:44 +0000 (+0200)
Subject: Rollback the removal of the samplecsviterator.
X-Git-Tag: V9_4_0rc1~10
X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=5183cf2e734d96a962e90b1289644d5f653d49f9;p=tools%2Fydefx.git

Rollback the removal of the samplecsviterator.

Some refactoring.
---

diff --git a/src/cpp/TMonoPyJob.hxx b/src/cpp/TMonoPyJob.hxx
index d5bb00b..72b2e85 100644
--- a/src/cpp/TMonoPyJob.hxx
+++ b/src/cpp/TMonoPyJob.hxx
@@ -58,7 +58,7 @@ public:
   : MonoPyJob()
   , _sample(sample)
   {
-    if(_lastError.empty()) // no errors during parent contruction
+    if(_lastError.empty()) // no errors during parent construction
     {
       try
       {
diff --git a/src/pydefx/CMakeLists.txt b/src/pydefx/CMakeLists.txt
index e8b1d94..6d5d94c 100644
--- a/src/pydefx/CMakeLists.txt
+++ b/src/pydefx/CMakeLists.txt
@@ -23,6 +23,7 @@ SET(SCRIPTS
   pyscript.py
   pystudy.py
   sample.py
+  samplecsviterator.py
   samplecsvmanager.py
   defaultschemabuilder.py
   )
diff --git a/src/pydefx/pystudy.py b/src/pydefx/pystudy.py
index 6852dec..0b43a16 100644
--- a/src/pydefx/pystudy.py
+++ b/src/pydefx/pystudy.py
@@ -71,7 +71,7 @@ class PyStudy:
     """
     Recover a study from a result directory where a previous study was launched.
     """
-    self.sample = self.sampleManager.loadSample(path)
+    self.sample = self.sampleManager.restoreSample(path)
     job_string = loadJobString(path)
     launcher = salome.naming_service.Resolve('/SalomeLauncher')
     self.job_id = launcher.restoreJob(job_string)
@@ -94,7 +94,7 @@ class PyStudy:
       salome_params = launcher.getJobParameters(self.job_id)
       self.params = parameters.Parameters(salome_parameters=salome_params)
       #TODO: sampleManager should be loaded from result_directory
-      self.sample = self.sampleManager.loadSample(
+      self.sample = self.sampleManager.restoreSample(
                                                  salome_params.result_directory)
       self.getResult()
     else:
@@ -112,7 +112,7 @@ class PyStudy:
     salome_params = launcher.getJobParameters(job_id)
     self.params = parameters.Parameters(salome_parameters=salome_params)
     #TODO: sampleManager should be loaded from result_directory
-    self.sample = self.sampleManager.loadSample(salome_params.result_directory)
+    self.sample=self.sampleManager.restoreSample(salome_params.result_directory)
     self.script = None
     return
 
diff --git a/src/pydefx/samplecsviterator.py b/src/pydefx/samplecsviterator.py
new file mode 100644
index 0000000..e8063e5
--- /dev/null
+++ b/src/pydefx/samplecsviterator.py
@@ -0,0 +1,194 @@
+# Copyright (C) 2019  EDF R&D
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+#
+# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+#
+import csv
+import numbers
+import pickle
+import os
+
+class SampleIterator:
+  """
+  Iterator used to iterate over the input values of a sample, adding an order
+  number. The order number is the id you get as the first parameter of the
+  function addResult.
+  """
+  DATAFILE = "idefixdata.csv"
+  OUTPUTNAMESFILE = "idefixoutputnames.csv"
+  RESULTDIR = "idefixresult" # directory which contains all the result files
+  RESULTFILE = "idefixresult.csv" # main result file - values for every point
+  GLOBALFILE = "idefixglobal"     # global result - one value for the whole simulation
+  ERRORCOLUMN = "idefix_error"
+  IDCOLUMN ="idefix_id"
+  ESCAPE_CHAR = "@"   # prefix a value that needs particular save/load procedure
+  PICK_CHAR = "p"     # @p : csv value saved in another file using pickle
+
+  def __init__(self, directory=None):
+    if directory:
+      datapath = os.path.join(directory, SampleIterator.DATAFILE)
+      outputnamespath = os.path.join(directory, SampleIterator.OUTPUTNAMESFILE)
+      self.directory = directory
+    else:
+      datapath = SampleIterator.DATAFILE
+      outputnamespath = SampleIterator.OUTPUTNAMESFILE
+      self.directory = None
+    self.result_file = None
+    self.datafile = open(datapath, newline='')
+    self.data     = csv.DictReader(self.datafile, quoting=csv.QUOTE_NONNUMERIC)
+    self.inputnames = self.data.fieldnames
+    self.outputnames = _loadOutputNames(outputnamespath)
+    self.iterNb = -1
+
+  def __next__(self):
+    self.iterNb += 1
+    return self.iterNb, next(self.data)
+
+  def __iter__(self):
+    return self
+
+  def writeHeaders(self):
+    """
+    This function can be called after initInputIterator and before the first
+    call to addResult in order to write the names of the parameters in the
+    result file.
+    """
+    if self.directory:
+      resultdir = os.path.join(self.directory, SampleIterator.RESULTDIR)
+      outputnamespath = os.path.join(self.directory,
+                                     SampleIterator.OUTPUTNAMESFILE)
+    else:
+      resultdir = SampleIterator.RESULTDIR
+      outputnamespath = SampleIterator.OUTPUTNAMESFILE
+    os.makedirs(resultdir, exist_ok=True)
+    resultpath = os.path.join(resultdir, SampleIterator.RESULTFILE)
+    result_columns = [SampleIterator.IDCOLUMN]
+    result_columns.extend(self.inputnames)
+    result_columns.extend(self.outputnames)
+    result_columns.append(SampleIterator.ERRORCOLUMN)
+    self.result_file = open(resultpath, 'w', newline='')
+    self.result_csv = csv.DictWriter( self.result_file,
+                                      fieldnames=result_columns,
+                                      quoting=csv.QUOTE_NONNUMERIC )
+    self.result_csv.writeheader()
+    self.result_file.flush()
+
+  def addResult(self, currentId, currentInput, currentOutput, currentError):
+    """
+    You need to call initInputIterator and writeHeaders before the first call
+    of this function.
+    currentId : int value
+    currentInput : dictionary {"input name":value}
+    currentOutput : result returned by _exec.  Can be a tuple, a simple value or
+    None in case of error.
+    currentError : string or None if no error
+    """
+    currentRecord = {}
+    currentRecord[SampleIterator.IDCOLUMN] = currentId
+    for name in self.inputnames:
+      currentRecord[name] = currentInput[name]
+    if currentError is None:
+      if len(self.outputnames) == 1 :
+        outputname = self.outputnames[0]
+        currentRecord[outputname] = _codeOutput(currentOutput,
+                                                currentId,
+                                                outputname,
+                                                self.directory)
+      elif len(self.outputnames) > 1 :
+        outputIter = iter(currentOutput)
+        for name in self.outputnames:
+          currentRecord[name] = _codeOutput(next(outputIter),
+                                            currentId,
+                                            name,
+                                            self.directory)
+    else:
+      for name in self.outputnames:
+        currentRecord[name] = None
+    currentRecord[SampleIterator.ERRORCOLUMN] = currentError
+    self.result_csv.writerow(currentRecord)
+    self.result_file.flush()
+
+  def terminate(self):
+    """
+    Call this function at the end of the simulation in order to close every
+    open files.
+    """
+    if not self.datafile is None:
+      self.datafile.close()
+      self.datafile = None
+    if not self.result_file is None:
+      self.result_file.close()
+      self.result_file = None
+
+
+# Private functions
+def _loadOutputNames(filepath):
+    outputnames = []
+    with open(filepath, "r") as namesfile:
+      for line in namesfile:
+        line = line.rstrip() # remove whitespaces at the end
+        outputnames.append(line)
+    return outputnames
+
+# Read and write results (output parameters)
+def _codeOutput(value, currentId, name, directory=None):
+  """
+  Define how a value should be saved.
+  value: object to be saved - value of a parameter
+  currentId: number of the current line (current point).
+  name: name of the parameter (name of the column in the csv file).
+  return: string to be saved in the csv file.
+  """
+  res = None
+  if isinstance(value, numbers.Number):
+    res = value
+  elif isinstance(value, str):
+    res = value
+    if res[0:1] == SampleIterator.ESCAPE_CHAR :
+      res = SampleIterator.ESCAPE_CHAR + res
+  else:
+    file_name = "idefixresult-{}-{}.pick".format(name, currentId)
+    res = SampleIterator.ESCAPE_CHAR + SampleIterator.PICK_CHAR + file_name
+    file_path = os.path.join(SampleIterator.RESULTDIR, file_name)
+    if directory :
+      file_path = os.path.join(directory, file_path)
+    with open(file_path, "wb") as f:
+      pickle.dump(value, f)
+  return res
+
+def _decodeOutput(obj, resultdir):
+  """
+  Decode a value read from the csv file.
+  obj: object to decode (string or number).
+  resultdir : directory which contains the result files
+  return: decoded object.
+  """
+  res = None
+  if isinstance(obj, numbers.Number):
+    res = obj
+  elif isinstance(obj, str):
+    res = obj
+    if res[0:1] == SampleIterator.ESCAPE_CHAR :
+      res = res[1:]
+      if res[0:1] == SampleIterator.ESCAPE_CHAR :# obj = @@string begins with@
+        pass
+      elif res[0:1] == SampleIterator.PICK_CHAR:# obj = @pidefixresult-x-1.pick
+        file_path = os.path.join(resultdir, res[1:])
+        with open(file_path, "rb") as f:
+          res = pickle.load(f)
+      else:
+        raise Exception("Unknown escape value:" + obj)
+  return res
diff --git a/src/pydefx/samplecsvmanager.py b/src/pydefx/samplecsvmanager.py
index 6ffa78b..05db2f9 100644
--- a/src/pydefx/samplecsvmanager.py
+++ b/src/pydefx/samplecsvmanager.py
@@ -20,23 +20,29 @@ import csv
 import inspect
 import os
 import pathlib
-import numbers
-import pickle
 from . import sample
+from . import samplecsviterator
+SampleIterator = samplecsviterator.SampleIterator
 
 class SampleManager:
-  DATAFILE = "idefixdata.csv"
-  OUTPUTNAMESFILE = "idefixoutputnames.csv"
-  RESULTDIR = "idefixresult" # directory which contains all the result files
-  RESULTFILE = "idefixresult.csv" # main result file - values for every point
-  GLOBALFILE = "idefixglobal"     # global result - one value for the whole simulation
-  ERRORCOLUMN = "idefix_error"
-  IDCOLUMN ="idefix_id"
-  ESCAPE_CHAR = "@"   # prefix a value that needs particular save/load procedure
-  PICK_CHAR = "p"     # @p : csv value saved in another file using pickle
+  """
+  The SampleManager is used by the study for reading and writing a sample from
+  and to the file system. This SampleManager uses the csv format.
+  The following services are needed by the study:
+  - write the sample on the local file system (prepareRun).
+  - know what files were written in order to copy them on the remote file system
+  (return value of prepareRun).
+  - know what files contain the result in order to bring them back from the
+  remote file system to the local one (getResultFileName).
+  - load the results from the local file system to a sample (loadResult).
+  - restore a sample from a local directory when you want to recover a job
+  launched in a previous session.
+  - the name of the module which contains the class SampleIterator in order to
+  iterate over the input values of the sample (getModuleName).
+  This name is written by the study in a configuration file and it is used by
+  the optimizer loop plugin.
+  """
   def __init__(self):
-    self.datafile = None
-    self.result_file = None
     pass
 
   # Functions used by the study
@@ -48,7 +54,7 @@ class SampleManager:
                copied. This directory should be already created.
     Return a list of files to add to the input files list of the job.
     """
-    datapath = os.path.join(directory, SampleManager.DATAFILE)
+    datapath = os.path.join(directory, SampleIterator.DATAFILE)
     with open(datapath, 'w', newline='') as csvfile:
       writer = csv.DictWriter(csvfile,
                               fieldnames=sample.getInputNames(),
@@ -56,14 +62,16 @@ class SampleManager:
       writer.writeheader()
       writer.writerows(sample.inputIterator())
 
-    outnamespath = os.path.join(directory, SampleManager.OUTPUTNAMESFILE)
+    outnamespath = os.path.join(directory, SampleIterator.OUTPUTNAMESFILE)
     with open(outnamespath, 'w') as outputfile:
       for v in sample.getOutputNames():
         outputfile.write(v+'\n')
     filename = inspect.getframeinfo(inspect.currentframe()).filename
+    install_directory = pathlib.Path(filename).resolve().parent
+    iteratorFile = os.path.join(install_directory, "samplecsviterator.py")
     return [datapath,
             outnamespath,
-            filename
+            iteratorFile
             ]
 
   def loadResult(self, sample, directory):
@@ -72,216 +80,56 @@ class SampleManager:
     The results are loaded into the sample.
     Return the modified sample.
     """
-    resultdir = os.path.join(directory, SampleManager.RESULTDIR)
-    datapath = os.path.join(resultdir, SampleManager.RESULTFILE)
+    resultdir = os.path.join(directory, SampleIterator.RESULTDIR)
+    datapath = os.path.join(resultdir, SampleIterator.RESULTFILE)
     with open(datapath, newline='') as datafile:
       data = csv.DictReader(datafile, quoting=csv.QUOTE_NONNUMERIC)
       for elt in data:
-        index = int(elt[SampleManager.IDCOLUMN]) # float to int
+        index = int(elt[SampleIterator.IDCOLUMN]) # float to int
         input_vals = {}
         for name in sample.getInputNames():
           input_vals[name] = elt[name]
         output_vals = {}
         for name in sample.getOutputNames():
-          output_vals[name] = self.decodeOutput(elt[name], resultdir)
+          output_vals[name] = samplecsviterator._decodeOutput(elt[name],
+                                                              resultdir)
         try:
           sample.checkId(index, input_vals)
         except Exception as err:
           extraInfo = "Error on processing file {} index number {}:".format(
                                                 datapath,       str(index))
           raise Exception(extraInfo + str(err))
-        sample.addResult(index, output_vals, elt[SampleManager.ERRORCOLUMN])
+        sample.addResult(index, output_vals, elt[SampleIterator.ERRORCOLUMN])
     return sample
 
-  def loadSample(self, directory):
+  def restoreSample(self, directory):
     """ The directory should contain the files created by prepareRun. A new
     sample object is created and returned from those files.
     This function is used to recover a previous run.
     """
-    sampleIt = self.initInputIterator(directory)
+    sampleIt = SampleIterator(directory)
     inputvalues = {}
-    for name in self.inputnames:
+    for name in sampleIt.inputnames:
       inputvalues[name] = []
     for newid, values in sampleIt:
-      for name in self.inputnames:
+      for name in sampleIt.inputnames:
         inputvalues[name].append(values[name])
     
-    result = sample.Sample(self.inputnames, self.outputnames)
+    result = sample.Sample(sampleIt.inputnames, sampleIt.outputnames)
     result.setInputValues(inputvalues)
-    self.terminate()
+    sampleIt.terminate()
     return result
 
   def getModuleName(self):
     """
-    Return the module name which contains the class SampleManager.
+    Return the module name which contains the class SampleIterator.
     """
-    return __name__
+    return "samplecsviterator"
   
   def getResultFileName(self):
     """
     Name of the file or directory which contains the result and needs to be
     copied from the remote computer.
     """
-    return SampleManager.RESULTDIR
-
-  # Functions used by the optimizerloop plugin
-  def initInputIterator(self, directory=None):
-    """
-    Iterate over the input values read from the csv file.
-    """
-    if directory:
-      datapath = os.path.join(directory, SampleManager.DATAFILE)
-      outputnamespath = os.path.join(directory, SampleManager.OUTPUTNAMESFILE)
-      self.directory = directory
-    else:
-      datapath = SampleManager.DATAFILE
-      outputnamespath = SampleManager.OUTPUTNAMESFILE
-      self.directory = None
-
-    self.datafile = open(datapath, newline='')
-    data     = csv.DictReader(self.datafile, quoting=csv.QUOTE_NONNUMERIC)
-    self.inputnames = data.fieldnames
-    self.outputnames = _loadOutputNames(outputnamespath)
-    return InputSampleIterator(data)
-
-  def writeHeaders(self):
-    """
-    This function can be called after initInputIterator and before the first
-    call to addResult in order to write the names of the parameters in the
-    result file.
-    """
-    if self.directory:
-      resultdir = os.path.join(self.directory, SampleManager.RESULTDIR)
-      outputnamespath = os.path.join(self.directory,
-                                     SampleManager.OUTPUTNAMESFILE)
-    else:
-      resultdir = SampleManager.RESULTDIR
-      outputnamespath = SampleManager.OUTPUTNAMESFILE
-    os.makedirs(resultdir, exist_ok=True)
-    resultpath = os.path.join(resultdir, SampleManager.RESULTFILE)
-    result_columns = [SampleManager.IDCOLUMN]
-    result_columns.extend(self.inputnames)
-    result_columns.extend(self.outputnames)
-    result_columns.append(SampleManager.ERRORCOLUMN)
-    self.result_file = open(resultpath, 'w', newline='')
-    self.result_csv = csv.DictWriter( self.result_file,
-                                      fieldnames=result_columns,
-                                      quoting=csv.QUOTE_NONNUMERIC )
-    self.result_csv.writeheader()
-    self.result_file.flush()
-
-  def addResult(self, currentId, currentInput, currentOutput, currentError):
-    """
-    You need to call initInputIterator and writeHeaders before the first call
-    of this function.
-    currentId : int value
-    currentInput : dictionary {"input name":value}
-    currentOutput : result returned by _exec.  Can be a tuple, a simple value or
-    None in case of error.
-    currentError : string or None if no error
-    """
-    currentRecord = {}
-    currentRecord[SampleManager.IDCOLUMN] = currentId
-    for name in self.inputnames:
-      currentRecord[name] = currentInput[name]
-    if currentError is None:
-      if len(self.outputnames) == 1 :
-        outputname = self.outputnames[0]
-        currentRecord[outputname] = self.codeOutput(currentOutput,
-                                                    currentId,
-                                                    outputname)
-      elif len(self.outputnames) > 1 :
-        outputIter = iter(currentOutput)
-        for name in self.outputnames:
-          currentRecord[name]=self.codeOutput(next(outputIter), currentId, name)
-    else:
-      for name in self.outputnames:
-        currentRecord[name] = None
-    currentRecord[SampleManager.ERRORCOLUMN] = currentError
-    self.result_csv.writerow(currentRecord)
-    self.result_file.flush()
-
-  def terminate(self):
-    """
-    Call this function at the end of the simulation in order to close every
-    open files.
-    """
-    if not self.datafile is None:
-      self.datafile.close()
-      self.datafile = None
-    if not self.result_file is None:
-      self.result_file.close()
-      self.result_file = None
-
-  # Read and write results (output parameters)
-  def codeOutput(self, value, currentId, name):
-    """
-    Define how a value should be saved.
-    value: object to be saved - value of a parameter
-    currentId: number of the current line (current point).
-    name: name of the parameter (name of the column in the csv file).
-    return: string to be saved in the csv file.
-    """
-    res = None
-    if isinstance(value, numbers.Number):
-      res = value
-    elif isinstance(value, str):
-      res = value
-      if res[0:1] == SampleManager.ESCAPE_CHAR :
-        res = SampleManager.ESCAPE_CHAR + res
-    else:
-      file_name = "idefixresult-{}-{}.pick".format(name, currentId)
-      res = SampleManager.ESCAPE_CHAR + SampleManager.PICK_CHAR + file_name
-      file_path = os.path.join(SampleManager.RESULTDIR, file_name)
-      if self.directory :
-        file_path = os.path.join(self.directory, file_path)
-      with open(file_path, "wb") as f:
-        pickle.dump(value, f)
-    return res
-
-  def decodeOutput(self, obj, resultdir):
-    """
-    Decode a value read from the csv file.
-    obj: object to decode (string or number).
-    resultdir : directory which contains the result files
-    return: decoded object.
-    """
-    res = None
-    if isinstance(obj, numbers.Number):
-      res = obj
-    elif isinstance(obj, str):
-      res = obj
-      if res[0:1] == SampleManager.ESCAPE_CHAR :
-        res = res[1:]
-        if res[0:1] == SampleManager.ESCAPE_CHAR :# obj = @@string begins with@
-          pass
-        elif res[0:1] == SampleManager.PICK_CHAR:# obj = @pidefixresult-x-1.pick
-          file_path = os.path.join(resultdir, res[1:])
-          with open(file_path, "rb") as f:
-            res = pickle.load(f)
-        else:
-          raise Exception("Unknown escape value:" + obj)
-    return res
-
-class InputSampleIterator:
-  """
-  Iterator used to iterate over the input values of a sample, adding an order
-  number.
-  """
-  def __init__(self, iterable):
-    self.it = iter(iterable)
-    self.iterNb = -1
-  def __next__(self):
-    self.iterNb += 1
-    return self.iterNb, next(self.it)
-  def __iter__(self):
-    return self
+    return SampleIterator.RESULTDIR
 
-# Private functions
-def _loadOutputNames(filepath):
-    outputnames = []
-    with open(filepath, "r") as namesfile:
-      for line in namesfile:
-        line = line.rstrip() # remove whitespaces at the end
-        outputnames.append(line)
-    return outputnames
diff --git a/src/pydefx/schemas/plugin.py b/src/pydefx/schemas/plugin.py
index a5fe5b5..d49a3e4 100644
--- a/src/pydefx/schemas/plugin.py
+++ b/src/pydefx/schemas/plugin.py
@@ -38,13 +38,12 @@ class myalgosync(SALOMERuntime.OptimizerAlgSync):
     """Start to fill the pool with samples to evaluate."""
     itModuleName = self.config["sampleIterator"]
     itModule = importlib.import_module(itModuleName)
-    self.manager = itModule.SampleManager()
-    self.iterator = self.manager.initInputIterator()
+    self.manager = itModule.SampleIterator()
     self.manager.writeHeaders()
     values=None
     for i in range(0, self.getNbOfBranches()):
       try:
-        newid, values = next(self.iterator)
+        newid, values = next(self.manager)
         self.pool.pushInSample(newid, pickle.dumps(values, protocol=0).decode())
       except StopIteration:
         pass
@@ -61,7 +60,7 @@ class myalgosync(SALOMERuntime.OptimizerAlgSync):
     error,result = pickle.loads(resultbyte)
     self.manager.addResult(currentId, sample, result, error)
     try:
-      newid, values = next(self.iterator)
+      newid, values = next(self.manager)
       self.pool.pushInSample(newid, pickle.dumps(values, protocol=0).decode())
     except StopIteration:
       pass