From: Ovidiu Mircescu Date: Tue, 2 Jul 2019 15:58:44 +0000 (+0200) Subject: Rollback the removal of the samplecsviterator. X-Git-Tag: V9_4_0rc1~10 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=5183cf2e734d96a962e90b1289644d5f653d49f9;p=tools%2Fydefx.git Rollback the removal of the samplecsviterator. Some refactoring. --- diff --git a/src/cpp/TMonoPyJob.hxx b/src/cpp/TMonoPyJob.hxx index d5bb00b..72b2e85 100644 --- a/src/cpp/TMonoPyJob.hxx +++ b/src/cpp/TMonoPyJob.hxx @@ -58,7 +58,7 @@ public: : MonoPyJob() , _sample(sample) { - if(_lastError.empty()) // no errors during parent contruction + if(_lastError.empty()) // no errors during parent construction { try { diff --git a/src/pydefx/CMakeLists.txt b/src/pydefx/CMakeLists.txt index e8b1d94..6d5d94c 100644 --- a/src/pydefx/CMakeLists.txt +++ b/src/pydefx/CMakeLists.txt @@ -23,6 +23,7 @@ SET(SCRIPTS pyscript.py pystudy.py sample.py + samplecsviterator.py samplecsvmanager.py defaultschemabuilder.py ) diff --git a/src/pydefx/pystudy.py b/src/pydefx/pystudy.py index 6852dec..0b43a16 100644 --- a/src/pydefx/pystudy.py +++ b/src/pydefx/pystudy.py @@ -71,7 +71,7 @@ class PyStudy: """ Recover a study from a result directory where a previous study was launched. """ - self.sample = self.sampleManager.loadSample(path) + self.sample = self.sampleManager.restoreSample(path) job_string = loadJobString(path) launcher = salome.naming_service.Resolve('/SalomeLauncher') self.job_id = launcher.restoreJob(job_string) @@ -94,7 +94,7 @@ class PyStudy: salome_params = launcher.getJobParameters(self.job_id) self.params = parameters.Parameters(salome_parameters=salome_params) #TODO: sampleManager should be loaded from result_directory - self.sample = self.sampleManager.loadSample( + self.sample = self.sampleManager.restoreSample( salome_params.result_directory) self.getResult() else: @@ -112,7 +112,7 @@ class PyStudy: salome_params = launcher.getJobParameters(job_id) self.params = parameters.Parameters(salome_parameters=salome_params) #TODO: sampleManager should be loaded from result_directory - self.sample = self.sampleManager.loadSample(salome_params.result_directory) + self.sample=self.sampleManager.restoreSample(salome_params.result_directory) self.script = None return diff --git a/src/pydefx/samplecsviterator.py b/src/pydefx/samplecsviterator.py new file mode 100644 index 0000000..e8063e5 --- /dev/null +++ b/src/pydefx/samplecsviterator.py @@ -0,0 +1,194 @@ +# Copyright (C) 2019 EDF R&D +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# +import csv +import numbers +import pickle +import os + +class SampleIterator: + """ + Iterator used to iterate over the input values of a sample, adding an order + number. The order number is the id you get as the first parameter of the + function addResult. + """ + DATAFILE = "idefixdata.csv" + OUTPUTNAMESFILE = "idefixoutputnames.csv" + RESULTDIR = "idefixresult" # directory which contains all the result files + RESULTFILE = "idefixresult.csv" # main result file - values for every point + GLOBALFILE = "idefixglobal" # global result - one value for the whole simulation + ERRORCOLUMN = "idefix_error" + IDCOLUMN ="idefix_id" + ESCAPE_CHAR = "@" # prefix a value that needs particular save/load procedure + PICK_CHAR = "p" # @p : csv value saved in another file using pickle + + def __init__(self, directory=None): + if directory: + datapath = os.path.join(directory, SampleIterator.DATAFILE) + outputnamespath = os.path.join(directory, SampleIterator.OUTPUTNAMESFILE) + self.directory = directory + else: + datapath = SampleIterator.DATAFILE + outputnamespath = SampleIterator.OUTPUTNAMESFILE + self.directory = None + self.result_file = None + self.datafile = open(datapath, newline='') + self.data = csv.DictReader(self.datafile, quoting=csv.QUOTE_NONNUMERIC) + self.inputnames = self.data.fieldnames + self.outputnames = _loadOutputNames(outputnamespath) + self.iterNb = -1 + + def __next__(self): + self.iterNb += 1 + return self.iterNb, next(self.data) + + def __iter__(self): + return self + + def writeHeaders(self): + """ + This function can be called after initInputIterator and before the first + call to addResult in order to write the names of the parameters in the + result file. + """ + if self.directory: + resultdir = os.path.join(self.directory, SampleIterator.RESULTDIR) + outputnamespath = os.path.join(self.directory, + SampleIterator.OUTPUTNAMESFILE) + else: + resultdir = SampleIterator.RESULTDIR + outputnamespath = SampleIterator.OUTPUTNAMESFILE + os.makedirs(resultdir, exist_ok=True) + resultpath = os.path.join(resultdir, SampleIterator.RESULTFILE) + result_columns = [SampleIterator.IDCOLUMN] + result_columns.extend(self.inputnames) + result_columns.extend(self.outputnames) + result_columns.append(SampleIterator.ERRORCOLUMN) + self.result_file = open(resultpath, 'w', newline='') + self.result_csv = csv.DictWriter( self.result_file, + fieldnames=result_columns, + quoting=csv.QUOTE_NONNUMERIC ) + self.result_csv.writeheader() + self.result_file.flush() + + def addResult(self, currentId, currentInput, currentOutput, currentError): + """ + You need to call initInputIterator and writeHeaders before the first call + of this function. + currentId : int value + currentInput : dictionary {"input name":value} + currentOutput : result returned by _exec. Can be a tuple, a simple value or + None in case of error. + currentError : string or None if no error + """ + currentRecord = {} + currentRecord[SampleIterator.IDCOLUMN] = currentId + for name in self.inputnames: + currentRecord[name] = currentInput[name] + if currentError is None: + if len(self.outputnames) == 1 : + outputname = self.outputnames[0] + currentRecord[outputname] = _codeOutput(currentOutput, + currentId, + outputname, + self.directory) + elif len(self.outputnames) > 1 : + outputIter = iter(currentOutput) + for name in self.outputnames: + currentRecord[name] = _codeOutput(next(outputIter), + currentId, + name, + self.directory) + else: + for name in self.outputnames: + currentRecord[name] = None + currentRecord[SampleIterator.ERRORCOLUMN] = currentError + self.result_csv.writerow(currentRecord) + self.result_file.flush() + + def terminate(self): + """ + Call this function at the end of the simulation in order to close every + open files. + """ + if not self.datafile is None: + self.datafile.close() + self.datafile = None + if not self.result_file is None: + self.result_file.close() + self.result_file = None + + +# Private functions +def _loadOutputNames(filepath): + outputnames = [] + with open(filepath, "r") as namesfile: + for line in namesfile: + line = line.rstrip() # remove whitespaces at the end + outputnames.append(line) + return outputnames + +# Read and write results (output parameters) +def _codeOutput(value, currentId, name, directory=None): + """ + Define how a value should be saved. + value: object to be saved - value of a parameter + currentId: number of the current line (current point). + name: name of the parameter (name of the column in the csv file). + return: string to be saved in the csv file. + """ + res = None + if isinstance(value, numbers.Number): + res = value + elif isinstance(value, str): + res = value + if res[0:1] == SampleIterator.ESCAPE_CHAR : + res = SampleIterator.ESCAPE_CHAR + res + else: + file_name = "idefixresult-{}-{}.pick".format(name, currentId) + res = SampleIterator.ESCAPE_CHAR + SampleIterator.PICK_CHAR + file_name + file_path = os.path.join(SampleIterator.RESULTDIR, file_name) + if directory : + file_path = os.path.join(directory, file_path) + with open(file_path, "wb") as f: + pickle.dump(value, f) + return res + +def _decodeOutput(obj, resultdir): + """ + Decode a value read from the csv file. + obj: object to decode (string or number). + resultdir : directory which contains the result files + return: decoded object. + """ + res = None + if isinstance(obj, numbers.Number): + res = obj + elif isinstance(obj, str): + res = obj + if res[0:1] == SampleIterator.ESCAPE_CHAR : + res = res[1:] + if res[0:1] == SampleIterator.ESCAPE_CHAR :# obj = @@string begins with@ + pass + elif res[0:1] == SampleIterator.PICK_CHAR:# obj = @pidefixresult-x-1.pick + file_path = os.path.join(resultdir, res[1:]) + with open(file_path, "rb") as f: + res = pickle.load(f) + else: + raise Exception("Unknown escape value:" + obj) + return res diff --git a/src/pydefx/samplecsvmanager.py b/src/pydefx/samplecsvmanager.py index 6ffa78b..05db2f9 100644 --- a/src/pydefx/samplecsvmanager.py +++ b/src/pydefx/samplecsvmanager.py @@ -20,23 +20,29 @@ import csv import inspect import os import pathlib -import numbers -import pickle from . import sample +from . import samplecsviterator +SampleIterator = samplecsviterator.SampleIterator class SampleManager: - DATAFILE = "idefixdata.csv" - OUTPUTNAMESFILE = "idefixoutputnames.csv" - RESULTDIR = "idefixresult" # directory which contains all the result files - RESULTFILE = "idefixresult.csv" # main result file - values for every point - GLOBALFILE = "idefixglobal" # global result - one value for the whole simulation - ERRORCOLUMN = "idefix_error" - IDCOLUMN ="idefix_id" - ESCAPE_CHAR = "@" # prefix a value that needs particular save/load procedure - PICK_CHAR = "p" # @p : csv value saved in another file using pickle + """ + The SampleManager is used by the study for reading and writing a sample from + and to the file system. This SampleManager uses the csv format. + The following services are needed by the study: + - write the sample on the local file system (prepareRun). + - know what files were written in order to copy them on the remote file system + (return value of prepareRun). + - know what files contain the result in order to bring them back from the + remote file system to the local one (getResultFileName). + - load the results from the local file system to a sample (loadResult). + - restore a sample from a local directory when you want to recover a job + launched in a previous session. + - the name of the module which contains the class SampleIterator in order to + iterate over the input values of the sample (getModuleName). + This name is written by the study in a configuration file and it is used by + the optimizer loop plugin. + """ def __init__(self): - self.datafile = None - self.result_file = None pass # Functions used by the study @@ -48,7 +54,7 @@ class SampleManager: copied. This directory should be already created. Return a list of files to add to the input files list of the job. """ - datapath = os.path.join(directory, SampleManager.DATAFILE) + datapath = os.path.join(directory, SampleIterator.DATAFILE) with open(datapath, 'w', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=sample.getInputNames(), @@ -56,14 +62,16 @@ class SampleManager: writer.writeheader() writer.writerows(sample.inputIterator()) - outnamespath = os.path.join(directory, SampleManager.OUTPUTNAMESFILE) + outnamespath = os.path.join(directory, SampleIterator.OUTPUTNAMESFILE) with open(outnamespath, 'w') as outputfile: for v in sample.getOutputNames(): outputfile.write(v+'\n') filename = inspect.getframeinfo(inspect.currentframe()).filename + install_directory = pathlib.Path(filename).resolve().parent + iteratorFile = os.path.join(install_directory, "samplecsviterator.py") return [datapath, outnamespath, - filename + iteratorFile ] def loadResult(self, sample, directory): @@ -72,216 +80,56 @@ class SampleManager: The results are loaded into the sample. Return the modified sample. """ - resultdir = os.path.join(directory, SampleManager.RESULTDIR) - datapath = os.path.join(resultdir, SampleManager.RESULTFILE) + resultdir = os.path.join(directory, SampleIterator.RESULTDIR) + datapath = os.path.join(resultdir, SampleIterator.RESULTFILE) with open(datapath, newline='') as datafile: data = csv.DictReader(datafile, quoting=csv.QUOTE_NONNUMERIC) for elt in data: - index = int(elt[SampleManager.IDCOLUMN]) # float to int + index = int(elt[SampleIterator.IDCOLUMN]) # float to int input_vals = {} for name in sample.getInputNames(): input_vals[name] = elt[name] output_vals = {} for name in sample.getOutputNames(): - output_vals[name] = self.decodeOutput(elt[name], resultdir) + output_vals[name] = samplecsviterator._decodeOutput(elt[name], + resultdir) try: sample.checkId(index, input_vals) except Exception as err: extraInfo = "Error on processing file {} index number {}:".format( datapath, str(index)) raise Exception(extraInfo + str(err)) - sample.addResult(index, output_vals, elt[SampleManager.ERRORCOLUMN]) + sample.addResult(index, output_vals, elt[SampleIterator.ERRORCOLUMN]) return sample - def loadSample(self, directory): + def restoreSample(self, directory): """ The directory should contain the files created by prepareRun. A new sample object is created and returned from those files. This function is used to recover a previous run. """ - sampleIt = self.initInputIterator(directory) + sampleIt = SampleIterator(directory) inputvalues = {} - for name in self.inputnames: + for name in sampleIt.inputnames: inputvalues[name] = [] for newid, values in sampleIt: - for name in self.inputnames: + for name in sampleIt.inputnames: inputvalues[name].append(values[name]) - result = sample.Sample(self.inputnames, self.outputnames) + result = sample.Sample(sampleIt.inputnames, sampleIt.outputnames) result.setInputValues(inputvalues) - self.terminate() + sampleIt.terminate() return result def getModuleName(self): """ - Return the module name which contains the class SampleManager. + Return the module name which contains the class SampleIterator. """ - return __name__ + return "samplecsviterator" def getResultFileName(self): """ Name of the file or directory which contains the result and needs to be copied from the remote computer. """ - return SampleManager.RESULTDIR - - # Functions used by the optimizerloop plugin - def initInputIterator(self, directory=None): - """ - Iterate over the input values read from the csv file. - """ - if directory: - datapath = os.path.join(directory, SampleManager.DATAFILE) - outputnamespath = os.path.join(directory, SampleManager.OUTPUTNAMESFILE) - self.directory = directory - else: - datapath = SampleManager.DATAFILE - outputnamespath = SampleManager.OUTPUTNAMESFILE - self.directory = None - - self.datafile = open(datapath, newline='') - data = csv.DictReader(self.datafile, quoting=csv.QUOTE_NONNUMERIC) - self.inputnames = data.fieldnames - self.outputnames = _loadOutputNames(outputnamespath) - return InputSampleIterator(data) - - def writeHeaders(self): - """ - This function can be called after initInputIterator and before the first - call to addResult in order to write the names of the parameters in the - result file. - """ - if self.directory: - resultdir = os.path.join(self.directory, SampleManager.RESULTDIR) - outputnamespath = os.path.join(self.directory, - SampleManager.OUTPUTNAMESFILE) - else: - resultdir = SampleManager.RESULTDIR - outputnamespath = SampleManager.OUTPUTNAMESFILE - os.makedirs(resultdir, exist_ok=True) - resultpath = os.path.join(resultdir, SampleManager.RESULTFILE) - result_columns = [SampleManager.IDCOLUMN] - result_columns.extend(self.inputnames) - result_columns.extend(self.outputnames) - result_columns.append(SampleManager.ERRORCOLUMN) - self.result_file = open(resultpath, 'w', newline='') - self.result_csv = csv.DictWriter( self.result_file, - fieldnames=result_columns, - quoting=csv.QUOTE_NONNUMERIC ) - self.result_csv.writeheader() - self.result_file.flush() - - def addResult(self, currentId, currentInput, currentOutput, currentError): - """ - You need to call initInputIterator and writeHeaders before the first call - of this function. - currentId : int value - currentInput : dictionary {"input name":value} - currentOutput : result returned by _exec. Can be a tuple, a simple value or - None in case of error. - currentError : string or None if no error - """ - currentRecord = {} - currentRecord[SampleManager.IDCOLUMN] = currentId - for name in self.inputnames: - currentRecord[name] = currentInput[name] - if currentError is None: - if len(self.outputnames) == 1 : - outputname = self.outputnames[0] - currentRecord[outputname] = self.codeOutput(currentOutput, - currentId, - outputname) - elif len(self.outputnames) > 1 : - outputIter = iter(currentOutput) - for name in self.outputnames: - currentRecord[name]=self.codeOutput(next(outputIter), currentId, name) - else: - for name in self.outputnames: - currentRecord[name] = None - currentRecord[SampleManager.ERRORCOLUMN] = currentError - self.result_csv.writerow(currentRecord) - self.result_file.flush() - - def terminate(self): - """ - Call this function at the end of the simulation in order to close every - open files. - """ - if not self.datafile is None: - self.datafile.close() - self.datafile = None - if not self.result_file is None: - self.result_file.close() - self.result_file = None - - # Read and write results (output parameters) - def codeOutput(self, value, currentId, name): - """ - Define how a value should be saved. - value: object to be saved - value of a parameter - currentId: number of the current line (current point). - name: name of the parameter (name of the column in the csv file). - return: string to be saved in the csv file. - """ - res = None - if isinstance(value, numbers.Number): - res = value - elif isinstance(value, str): - res = value - if res[0:1] == SampleManager.ESCAPE_CHAR : - res = SampleManager.ESCAPE_CHAR + res - else: - file_name = "idefixresult-{}-{}.pick".format(name, currentId) - res = SampleManager.ESCAPE_CHAR + SampleManager.PICK_CHAR + file_name - file_path = os.path.join(SampleManager.RESULTDIR, file_name) - if self.directory : - file_path = os.path.join(self.directory, file_path) - with open(file_path, "wb") as f: - pickle.dump(value, f) - return res - - def decodeOutput(self, obj, resultdir): - """ - Decode a value read from the csv file. - obj: object to decode (string or number). - resultdir : directory which contains the result files - return: decoded object. - """ - res = None - if isinstance(obj, numbers.Number): - res = obj - elif isinstance(obj, str): - res = obj - if res[0:1] == SampleManager.ESCAPE_CHAR : - res = res[1:] - if res[0:1] == SampleManager.ESCAPE_CHAR :# obj = @@string begins with@ - pass - elif res[0:1] == SampleManager.PICK_CHAR:# obj = @pidefixresult-x-1.pick - file_path = os.path.join(resultdir, res[1:]) - with open(file_path, "rb") as f: - res = pickle.load(f) - else: - raise Exception("Unknown escape value:" + obj) - return res - -class InputSampleIterator: - """ - Iterator used to iterate over the input values of a sample, adding an order - number. - """ - def __init__(self, iterable): - self.it = iter(iterable) - self.iterNb = -1 - def __next__(self): - self.iterNb += 1 - return self.iterNb, next(self.it) - def __iter__(self): - return self + return SampleIterator.RESULTDIR -# Private functions -def _loadOutputNames(filepath): - outputnames = [] - with open(filepath, "r") as namesfile: - for line in namesfile: - line = line.rstrip() # remove whitespaces at the end - outputnames.append(line) - return outputnames diff --git a/src/pydefx/schemas/plugin.py b/src/pydefx/schemas/plugin.py index a5fe5b5..d49a3e4 100644 --- a/src/pydefx/schemas/plugin.py +++ b/src/pydefx/schemas/plugin.py @@ -38,13 +38,12 @@ class myalgosync(SALOMERuntime.OptimizerAlgSync): """Start to fill the pool with samples to evaluate.""" itModuleName = self.config["sampleIterator"] itModule = importlib.import_module(itModuleName) - self.manager = itModule.SampleManager() - self.iterator = self.manager.initInputIterator() + self.manager = itModule.SampleIterator() self.manager.writeHeaders() values=None for i in range(0, self.getNbOfBranches()): try: - newid, values = next(self.iterator) + newid, values = next(self.manager) self.pool.pushInSample(newid, pickle.dumps(values, protocol=0).decode()) except StopIteration: pass @@ -61,7 +60,7 @@ class myalgosync(SALOMERuntime.OptimizerAlgSync): error,result = pickle.loads(resultbyte) self.manager.addResult(currentId, sample, result, error) try: - newid, values = next(self.iterator) + newid, values = next(self.manager) self.pool.pushInSample(newid, pickle.dumps(values, protocol=0).decode()) except StopIteration: pass