From 3a3d5427f40725ca4199eb80800c690709197974 Mon Sep 17 00:00:00 2001 From: barate Date: Mon, 28 Jan 2013 16:20:27 +0000 Subject: [PATCH] Add very simple log system - Logs are no longer printed on stdout/stderr - LOG macro writes a log message in file /tmp/libbatch-log-date-XXXXXX --- src/CCC/BatchManager_CCC.cxx | 21 +++++---- src/CCC/JobInfo_CCC.cxx | 3 +- src/Core/BatchManager.cxx | 56 +++++----------------- src/Core/CMakeLists.txt | 5 +- src/Core/CommunicationProtocol.cxx | 9 ++-- src/Core/CommunicationProtocolRSH.cxx | 5 +- src/Core/FactBatchManager.cxx | 6 --- src/Core/Log.cxx | 66 ++++++++++++++++++++++++++ src/Core/Log.hxx | 65 +++++++++++++++++++++++++ src/LSF/BatchManager_LSF.cxx | 21 +++++---- src/LSF/JobInfo_LSF.cxx | 3 +- src/LoadLeveler/BatchManager_LL.cxx | 13 ++--- src/Local/BatchManager_Local.cxx | 68 +++++++++++++-------------- src/PBS/BatchManager_PBS.cxx | 13 ++--- src/PBS/JobInfo_PBS.cxx | 3 +- src/SGE/BatchManager_SGE.cxx | 15 +++--- src/SGE/JobInfo_SGE.cxx | 3 +- src/Slurm/BatchManager_Slurm.cxx | 13 ++--- src/Vishnu/BatchManager_Vishnu.cxx | 21 +++++---- 19 files changed, 258 insertions(+), 151 deletions(-) create mode 100644 src/Core/Log.cxx create mode 100644 src/Core/Log.hxx diff --git a/src/CCC/BatchManager_CCC.cxx b/src/CCC/BatchManager_CCC.cxx index 715a51f..e24c171 100644 --- a/src/CCC/BatchManager_CCC.cxx +++ b/src/CCC/BatchManager_CCC.cxx @@ -52,6 +52,7 @@ #include "BatchManager_CCC.hxx" #include "JobInfo_CCC.hxx" +#include "Log.hxx" using namespace std; @@ -82,24 +83,24 @@ namespace Batch { std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); // export input files on cluster - cerr << "Export des fichiers en entree" << endl; + LOG("Export des fichiers en entree"); exportInputFiles(job); // build batch script for job - cerr << "Construction du script de batch" << endl; + LOG("Construction du script de batch"); buildBatchScript(job); - cerr << "Script envoye" << endl; + LOG("Script envoye"); // define command to submit batch string subCommand = string("bash -l -c \\\"cd ") + workDir + "; ccc_msub " + fileNameToExecute + "_Batch.sh\\\""; string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " 2>&1"; - cerr << command.c_str() << endl; + LOG(command); // submit job string output; int status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // find id of submitted job in output @@ -127,12 +128,12 @@ namespace Batch { // define command to delete batch string subCommand = string("bash -l -c \\\"ccc_mdel ") + iss.str() + string("\\\""); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); status = system(command.c_str()); if (status) throw RunTimeException("Error of connection on remote host"); - cerr << "jobId = " << ref << "killed" << endl; + LOG("jobId = " << ref << "killed"); } // Methode pour le controle des jobs : renvoie l'etat du job @@ -145,7 +146,7 @@ namespace Batch { // define command to query batch string subCommand = string("bash -l -c \\\"bjobs ") + iss.str() + string("\\\""); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); @@ -256,7 +257,7 @@ namespace Batch { tempOutputFile.close(); Utils::chmod(TmpFileName.c_str(), 0x1ED); - cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + LOG("Batch script file generated is: " << TmpFileName.c_str()); int status = _protocol.copyFile(TmpFileName, "", "", workDir + "/" + rootNameToExecute + "_Batch.sh", @@ -275,7 +276,7 @@ namespace Batch { string subCommand = string("echo "); subCommand += tmpdir; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); diff --git a/src/CCC/JobInfo_CCC.cxx b/src/CCC/JobInfo_CCC.cxx index 947d366..510d967 100644 --- a/src/CCC/JobInfo_CCC.cxx +++ b/src/CCC/JobInfo_CCC.cxx @@ -37,6 +37,7 @@ #include "Parametre.hxx" #include "Environnement.hxx" #include "JobInfo_CCC.hxx" +#include "Log.hxx" using namespace std; @@ -81,7 +82,7 @@ namespace Batch { } else if (status == "ZOMBI") { // Zombie _param[STATE] = FAILED; } else { - cerr << "Unknown job state code: " << status << endl; + LOG("Unknown job state code: " << status); } if( status.find("RUN") != string::npos) diff --git a/src/Core/BatchManager.cxx b/src/Core/BatchManager.cxx index c369f56..7ee7a56 100644 --- a/src/Core/BatchManager.cxx +++ b/src/Core/BatchManager.cxx @@ -40,6 +40,7 @@ #include "BatchManager.hxx" #include "Utils.hxx" #include "NotYetImplementedException.hxx" +#include "Log.hxx" using namespace std; @@ -148,9 +149,8 @@ namespace Batch { bool timeoutReached = (testTimeout && time >= timeout); JobInfo jinfo = jobid.queryJob(); string state = jinfo.getParametre()[STATE].str(); - cout << "State is \"" << state << "\""; while (!timeoutReached && state != FINISHED && state != FAILED) { - cout << ", sleeping " << sleeptime << "s..." << endl; + LOG("State is \"" << state << "\"" << ", sleeping " << sleeptime << "s..."); Utils::sleep(sleeptime); time += sleeptime; timeoutReached = (testTimeout && time >= timeout); @@ -161,9 +161,8 @@ namespace Batch { sleeptime = maxSleepTime; jinfo = jobid.queryJob(); state = jinfo.getParametre()[STATE].str(); - cout << "State is \"" << state << "\""; } - cout << endl; + LOG("State is \"" << state << "\""); return state; } @@ -208,7 +207,7 @@ namespace Batch { string subCommand = string("chmod u+x ") + remoteExec; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); status = system(command.c_str()); if (status) { std::ostringstream oss; @@ -248,13 +247,8 @@ namespace Batch { // Create local result directory int status = CommunicationProtocol::getInstance(SH).makeDirectory(directory, "", ""); - if (status) { - string mess("Directory creation failed. Status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } + if (status) + LOG("Directory creation failed. Status is: " << status); for(Vit=V.begin(); Vit!=V.end(); Vit++) { CoupleType cpt = *static_cast< CoupleType * >(*Vit); @@ -269,28 +263,15 @@ namespace Batch { } status = _protocol.copyFile(remotePath, _hostname, _username, localPath, "", ""); - if (status) { - // Try to get what we can (logs files) - // throw BatchException("Error of connection on remote host"); - std::string mess("Copy command failed ! status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } + if (status) + LOG("Copy command failed. Status is: " << status); } // Copy logs status = _protocol.copyFile(string(params[WORKDIR]) + string("/logs"), _hostname, _username, directory, "", ""); - if (status) { - std::string mess("Copy logs directory failed ! status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } - + if (status) + LOG("Copy logs directory failed. Status is: " << status); } bool BatchManager::importDumpStateFile( const Job & job, const string directory ) @@ -299,25 +280,14 @@ namespace Batch { // Create local result directory int status = CommunicationProtocol::getInstance(SH).makeDirectory(directory, "", ""); - if (status) { - string mess("Directory creation failed. Status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } + if (status) + LOG("Directory creation failed. Status is: " << status); bool ret = true; status = _protocol.copyFile(string(params[WORKDIR]) + string("/dumpState*.xml"), _hostname, _username, directory, "", ""); if (status) { - // Try to get what we can (logs files) - // throw BatchException("Error of connection on remote host"); - std::string mess("Copy command failed ! status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; + LOG("Copy command failed. Status is: " << status); ret = false; } return ret; diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt index 7acb3ea..0e53214 100644 --- a/src/Core/CMakeLists.txt +++ b/src/Core/CMakeLists.txt @@ -47,10 +47,11 @@ SET(CLASS_LIST Core/APIInternalFailureException Core/StringType Core/TypeMismatchException Core/Versatile - Core/Utils ) -APPEND_CLASSES_TO_SRC_FILES(${CLASS_LIST}) +APPEND_CLASSES_TO_SRC_FILES(${CLASS_LIST} + Core/Utils + Core/Log) APPEND_CLASSES_TO_HDR_FILES(${CLASS_LIST}) APPEND_CLASSES_TO_HDR_FILES(Core/Defines) diff --git a/src/Core/CommunicationProtocol.cxx b/src/Core/CommunicationProtocol.cxx index b365bba..371ba8d 100644 --- a/src/Core/CommunicationProtocol.cxx +++ b/src/Core/CommunicationProtocol.cxx @@ -27,8 +27,6 @@ */ #include -#include -#include #include @@ -44,6 +42,7 @@ #endif #include "APIInternalFailureException.hxx" #include "RunTimeException.hxx" +#include "Log.hxx" using namespace std; @@ -105,7 +104,7 @@ namespace Batch { string command = commandStringFromArgs(getCopyCommandArgs(sourcePath, sourceHost, sourceUser, destinationPath, destinationHost, destinationUser)); - cout << command.c_str() << endl; + LOG(command); int status = system(command.c_str()); return status; } @@ -125,7 +124,7 @@ namespace Batch { const std::string & user) const { string command = getExecCommand(getRemoveSubCommand(path), host, user); - cout << command.c_str() << endl; + LOG(command); int status = system(command.c_str()); return status; } @@ -135,7 +134,7 @@ namespace Batch { const std::string & user) const { string command = getExecCommand(getMakeDirectorySubCommand(path), host, user); - cout << command.c_str() << endl; + LOG(command); int status = system(command.c_str()); return status; } diff --git a/src/Core/CommunicationProtocolRSH.cxx b/src/Core/CommunicationProtocolRSH.cxx index 59caf2d..7c69d15 100644 --- a/src/Core/CommunicationProtocolRSH.cxx +++ b/src/Core/CommunicationProtocolRSH.cxx @@ -36,6 +36,7 @@ #include #include "CommunicationProtocolRSH.hxx" +#include "Log.hxx" using namespace std; @@ -160,7 +161,7 @@ namespace Batch { newdir[0] = driveLetter; newdir[1] = ':'; newdir[2] = '\0'; - cout << "Changing directory: " << newdir << endl; + LOG("Changing directory: " << newdir); _chdir(newdir); } @@ -169,7 +170,7 @@ namespace Batch { // Go back to previous directory if necessary if (driveLetter != '\0' && driveLetter != currentDrive) { - cout << "Changing directory: " << cwd << endl; + LOG("Changing directory: " << cwd); _chdir(cwd); } diff --git a/src/Core/FactBatchManager.cxx b/src/Core/FactBatchManager.cxx index e286e5b..dae5f12 100644 --- a/src/Core/FactBatchManager.cxx +++ b/src/Core/FactBatchManager.cxx @@ -41,12 +41,6 @@ namespace Batch { FactBatchManager::FactBatchManager(const string & _t) : type(_t) { BatchManagerCatalog::getInstance().addFactBatchManager(type.c_str(), this); - - /* - ostringstream msg; - msg << "FactBatchManager of type '" << type << "' inserted into catalog"; - cerr << msg.str().c_str() << endl; - */ } // Destructeur diff --git a/src/Core/Log.cxx b/src/Core/Log.cxx new file mode 100644 index 0000000..7d4f359 --- /dev/null +++ b/src/Core/Log.cxx @@ -0,0 +1,66 @@ +// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Log.cxx : + * + * Created on: 28 jan. 2013 + * Author : Renaud BARATE - EDF R&D + */ + +#include "Utils.hxx" +#include "Log.hxx" + +using namespace std; + +namespace Batch { + + Log::Log() + { + const size_t BUFSIZE = 32; + char date[BUFSIZE]; + time_t curtime = time(NULL); + strftime(date, BUFSIZE, "%Y-%m-%d_%H-%M-%S", localtime(&curtime)); + string prefix = string("log-") + date; + Utils::createAndOpenTemporaryFile(prefix, _stream); + } + + Log::~Log() + { + _stream.close(); + } + + void Log::log(const string & msg) + { + const size_t BUFSIZE = 32; + char timestamp[BUFSIZE]; + time_t curtime = time(NULL); + strftime(timestamp, BUFSIZE, "%Y-%m-%d_%H-%M-%S", localtime(&curtime)); + getInstance()._stream << timestamp << ": " << msg << endl; + } + + Log & Log::getInstance() + { + static Log instance; + return instance; + } + +} diff --git a/src/Core/Log.hxx b/src/Core/Log.hxx new file mode 100644 index 0000000..6647d95 --- /dev/null +++ b/src/Core/Log.hxx @@ -0,0 +1,65 @@ +// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Log.hxx : + * + * Created on: 28 jan. 2013 + * Author : Renaud BARATE - EDF R&D + */ + +#ifndef _LOG_H_ +#define _LOG_H_ + +#include +#include +#include + +#include "Defines.hxx" + +#define LOG(msg) {std::ostringstream sstream; sstream << msg; Log::log(sstream.str());} + +namespace Batch { + + class BATCH_EXPORT Log + { + public: + virtual ~Log(); + + static void log(const std::string & msg); + + private: + + Log(); + + static Log & getInstance(); + + // Forbid the use of copy constructor and assignment operator + Log(const Log & orig) {} + void operator=(const Log & orig) {} + + std::ofstream _stream; + + }; + +} + +#endif diff --git a/src/LSF/BatchManager_LSF.cxx b/src/LSF/BatchManager_LSF.cxx index e9388fa..a1b0daf 100644 --- a/src/LSF/BatchManager_LSF.cxx +++ b/src/LSF/BatchManager_LSF.cxx @@ -38,6 +38,7 @@ #include #include "BatchManager_LSF.hxx" #include "JobInfo_LSF.hxx" +#include "Log.hxx" using namespace std; @@ -64,23 +65,23 @@ namespace Batch { const std::string workDir = params[WORKDIR]; // export input files on cluster - cerr << "Export des fichiers en entree" << endl; + LOG("Export des fichiers en entree"); exportInputFiles(job); // build batch script for job - cerr << "Construction du script de batch" << endl; + LOG("Construction du script de batch"); string scriptFile = buildSubmissionScript(job); - cerr << "Script envoye" << endl; + LOG("Script envoye"); // define command to submit batch string subCommand = string("cd ") + workDir + "; bsub < " + scriptFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " 2>&1"; - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // read id of submitted job in output @@ -103,12 +104,12 @@ namespace Batch { // define command to delete batch string subCommand = string("bkill ") + iss.str(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); status = system(command.c_str()); if (status) throw RunTimeException("Error of connection on remote host"); - cerr << "jobId = " << ref << "killed" << endl; + LOG("jobId = " << ref << "killed"); } // Methode pour le controle des jobs : renvoie l'etat du job @@ -121,7 +122,7 @@ namespace Batch { // define command to query batch string subCommand = string("bjobs ") + iss.str(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); @@ -251,7 +252,7 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + LOG("Batch script file generated is: " << TmpFileName.c_str()); string remoteFileName = rootNameToExecute + "_Batch.sh"; int status = _protocol.copyFile(TmpFileName, "", "", @@ -284,7 +285,7 @@ namespace Batch { string subCommand = string("echo $HOME"); string command = _protocol.getExecCommand(subCommand, _hostname, _username) + " > " + filelogtemp; - cerr << command.c_str() << endl; + LOG(command); int status = system(command.c_str()); if (status) throw RunTimeException("Error of launching home command on remote host"); diff --git a/src/LSF/JobInfo_LSF.cxx b/src/LSF/JobInfo_LSF.cxx index c9e6bb1..2413b48 100644 --- a/src/LSF/JobInfo_LSF.cxx +++ b/src/LSF/JobInfo_LSF.cxx @@ -34,6 +34,7 @@ #include #include "JobInfo_LSF.hxx" +#include "Log.hxx" using namespace std; @@ -82,7 +83,7 @@ namespace Batch { } else if (status == "ZOMBI") { // Zombie _param[STATE] = FAILED; } else { - cerr << "Unknown job state code: " << status << endl; + LOG("Unknown job state code: " << status); } if( status.find("RUN") != string::npos) diff --git a/src/LoadLeveler/BatchManager_LL.cxx b/src/LoadLeveler/BatchManager_LL.cxx index c5fe1b2..f62871e 100644 --- a/src/LoadLeveler/BatchManager_LL.cxx +++ b/src/LoadLeveler/BatchManager_LL.cxx @@ -37,6 +37,7 @@ #include "FactBatchManager_LL.hxx" #include "BatchManager_LL.hxx" #include "JobInfo_LL.hxx" +#include "Log.hxx" using namespace std; @@ -70,12 +71,12 @@ namespace Batch { // define command to submit batch string subCommand = string("cd ") + workDir + "; llsubmit " + cmdFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); // submit job string output; int status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // find id of submitted job in output @@ -199,7 +200,7 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - cerr << "Batch script file generated is: " << tmpFileName << endl; + LOG("Batch script file generated is: " << tmpFileName); string remoteFileName = rootNameToExecute + "_LL.cmd"; int status = _protocol.copyFile(tmpFileName, "", "", @@ -216,13 +217,13 @@ namespace Batch { // define command to delete job string subCommand = "llcancel " + jobid.getReference(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); int status = system(command.c_str()); if (status) throw RunTimeException("Can't delete job " + jobid.getReference()); - cerr << "job " << jobid.getReference() << " killed" << endl; + LOG("job " << jobid.getReference() << " killed"); } JobInfo BatchManager_LL::queryJob(const JobId & jobid) @@ -230,7 +231,7 @@ namespace Batch { // define command to query batch string subCommand = "llq -f %st " + jobid.getReference(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); if (status != 0) diff --git a/src/Local/BatchManager_Local.cxx b/src/Local/BatchManager_Local.cxx index e47a42a..6d1ae1a 100644 --- a/src/Local/BatchManager_Local.cxx +++ b/src/Local/BatchManager_Local.cxx @@ -56,6 +56,7 @@ #include "BatchManager_Local.hxx" #include "RunTimeException.hxx" #include "Utils.hxx" +#include "Log.hxx" using namespace std; @@ -80,8 +81,8 @@ namespace Batch { pthread_mutex_lock(&_threads_mutex); string state = iter->second.param[STATE]; if (state != FINISHED && state != FAILED) { - UNDER_LOCK( cout << "Warning: Job " << iter->first << - " is not finished, it will now be canceled." << endl ); + UNDER_LOCK( LOG("Warning: Job " << iter->first << + " is not finished, it will now be canceled.")); pthread_cancel(iter->second.thread_id); pthread_cond_wait(&_threadSyncCondition, &_threads_mutex); } @@ -147,8 +148,7 @@ namespace Batch { pthread_cancel(_threads[id].thread_id); pthread_cond_wait(&_threadSyncCondition, &_threads_mutex); } else { - cout << "Cannot delete job " << jobid.getReference() << - ". Job is already finished." << endl; + LOG("Cannot delete job " << jobid.getReference() << ". Job is already finished."); } } pthread_mutex_unlock(&_threads_mutex); @@ -165,7 +165,7 @@ namespace Batch { istringstream iss(jobid.getReference()); iss >> id; - UNDER_LOCK( cout << "BatchManager is sending HOLD command to the thread " << id << endl ); + UNDER_LOCK( LOG("BatchManager is sending HOLD command to the thread " << id) ); // On introduit une commande dans la queue du thread // @@@ --------> SECTION CRITIQUE <-------- @@@ @@ -183,7 +183,7 @@ namespace Batch { istringstream iss(jobid.getReference()); iss >> id; - UNDER_LOCK( cout << "BatchManager is sending RELEASE command to the thread " << id << endl ); + UNDER_LOCK( LOG("BatchManager is sending RELEASE command to the thread " << id) ); // On introduit une commande dans la queue du thread // @@@ --------> SECTION CRITIQUE <-------- @@@ @@ -377,7 +377,7 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - cerr << "Batch script file generated is: " << tmpFileName << endl; + LOG("Batch script file generated is: " << tmpFileName); Utils::chmod(tmpFileName.c_str(), 0x1ED); int status = _bm._protocol.copyFile(tmpFileName, "", "", @@ -391,7 +391,7 @@ namespace Batch { // pscp does not preserve access permissions on files string subCommand = string("chmod u+x ") + remotePath; string command = _bm._protocol.getExecCommand(subCommand, _bm._hostname, _bm._username); - cerr << command.c_str() << endl; + LOG(command); status = system(command.c_str()); if (status) { std::ostringstream oss; @@ -452,7 +452,7 @@ namespace Batch { #else child = fork(); if (child < 0) { // erreur - UNDER_LOCK( cerr << "Fork impossible (rc=" << child << ")" << endl ); + UNDER_LOCK( LOG("Fork impossible (rc=" << child << ")") ); } else if (child > 0) { // pere p_ta->pere(child); @@ -481,7 +481,7 @@ namespace Batch { // On invoque la fonction de nettoyage de la memoire delete_on_exit(arg); - UNDER_LOCK( cout << "Father is leaving" << endl ); + UNDER_LOCK( LOG("Father is leaving") ); pthread_exit(NULL); return NULL; } @@ -524,22 +524,22 @@ namespace Batch { DWORD exitCode; GetExitCodeProcess(child, &exitCode); if (exitCode != STILL_ACTIVE) { - UNDER_LOCK( cout << "Father sees his child is DONE: exit code = " << exitCode << endl ); + UNDER_LOCK( LOG("Father sees his child is DONE: exit code = " << exitCode) ); break; } #else int child_rc = 0; pid_t child_wait_rc = waitpid(child, &child_rc, WNOHANG /* | WUNTRACED */); if (child_wait_rc > 0) { - UNDER_LOCK( cout << "Status is: " << WIFEXITED( child_rc) << endl); - UNDER_LOCK( cout << "Status is: " << WEXITSTATUS( child_rc) << endl); - UNDER_LOCK( cout << "Status is: " << WIFSIGNALED( child_rc) << endl); - UNDER_LOCK( cout << "Status is: " << WTERMSIG( child_rc) << endl); - UNDER_LOCK( cout << "Status is: " << WCOREDUMP( child_rc) << endl); - UNDER_LOCK( cout << "Status is: " << WIFSTOPPED( child_rc) << endl); - UNDER_LOCK( cout << "Status is: " << WSTOPSIG( child_rc) << endl); + UNDER_LOCK( LOG("Status is: " << WIFEXITED( child_rc)) ); + UNDER_LOCK( LOG("Status is: " << WEXITSTATUS( child_rc)) ); + UNDER_LOCK( LOG("Status is: " << WIFSIGNALED( child_rc)) ); + UNDER_LOCK( LOG("Status is: " << WTERMSIG( child_rc)) ); + UNDER_LOCK( LOG("Status is: " << WCOREDUMP( child_rc)) ); + UNDER_LOCK( LOG("Status is: " << WIFSTOPPED( child_rc)) ); + UNDER_LOCK( LOG("Status is: " << WSTOPSIG( child_rc)) ); #ifdef WIFCONTINUED - UNDER_LOCK( cout << "Status is: " << WIFCONTINUED( child_rc) << endl); // not compilable on sarge + UNDER_LOCK( LOG("Status is: " << WIFCONTINUED( child_rc)) ); // not compilable on sarge #endif if (WIFSTOPPED(child_rc)) { // NOTA : pour rentrer dans cette section, il faut que le flag WUNTRACED @@ -553,12 +553,12 @@ namespace Batch { _bm._threads[_id].param[STATE] = Batch::PAUSED; pthread_mutex_unlock(&_bm._threads_mutex); // @@@ --------> SECTION CRITIQUE <-------- @@@ - UNDER_LOCK( cout << "Father sees his child is STOPPED : " << child_wait_rc << endl ); + UNDER_LOCK( LOG("Father sees his child is STOPPED : " << child_wait_rc) ); } else { // Le fils est termine, on sort de la boucle et du if englobant - UNDER_LOCK( cout << "Father sees his child is DONE : " << child_wait_rc << " (child_rc=" << (WIFEXITED(child_rc) ? WEXITSTATUS(child_rc) : -1) << ")" << endl ); + UNDER_LOCK( LOG("Father sees his child is DONE : " << child_wait_rc << " (child_rc=" << (WIFEXITED(child_rc) ? WEXITSTATUS(child_rc) : -1) << ")") ); break; } } @@ -569,7 +569,7 @@ namespace Batch { _bm._threads[_id].hasFailed = true; pthread_mutex_unlock(&_bm._threads_mutex); // @@@ --------> SECTION CRITIQUE <-------- @@@ - UNDER_LOCK( cout << "Father sees his child is DEAD : " << child_wait_rc << " (Reason : " << strerror(errno) << ")" << endl ); + UNDER_LOCK( LOG("Father sees his child is DEAD : " << child_wait_rc << " (Reason : " << strerror(errno) << ")") ); break; } #endif @@ -590,7 +590,7 @@ namespace Batch { // << "maxwalltime = " << maxwalltime << endl // << "int(maxwalltime * 1.1) = " << int(maxwalltime * 1.1) << endl; if (child_elapsedtime_minutes > long((float)maxwalltime * 1.1) ) { // On se donne 10% de marge avant le KILL - UNDER_LOCK( cout << "Father is sending KILL command to the thread " << _id << endl ); + UNDER_LOCK( LOG("Father is sending KILL command to the thread " << _id) ); // On introduit une commande dans la queue du thread // @@@ --------> SECTION CRITIQUE <-------- @@@ pthread_mutex_lock(&_bm._threads_mutex); @@ -601,7 +601,7 @@ namespace Batch { } else if (child_elapsedtime_minutes > maxwalltime ) { - UNDER_LOCK( cout << "Father is sending TERM command to the thread " << _id << endl ); + UNDER_LOCK( LOG("Father is sending TERM command to the thread " << _id) ); // On introduit une commande dans la queue du thread // @@@ --------> SECTION CRITIQUE <-------- @@@ pthread_mutex_lock(&_bm._threads_mutex); @@ -624,26 +624,26 @@ namespace Batch { switch (cmd) { case NOP: - UNDER_LOCK( cout << "Father does nothing to his child" << endl ); + UNDER_LOCK( LOG("Father does nothing to his child") ); break; #ifndef WIN32 case HOLD: - UNDER_LOCK( cout << "Father is sending SIGSTOP signal to his child" << endl ); + UNDER_LOCK( LOG("Father is sending SIGSTOP signal to his child") ); kill(child, SIGSTOP); break; case RELEASE: - UNDER_LOCK( cout << "Father is sending SIGCONT signal to his child" << endl ); + UNDER_LOCK( LOG("Father is sending SIGCONT signal to his child") ); kill(child, SIGCONT); break; case TERM: - UNDER_LOCK( cout << "Father is sending SIGTERM signal to his child" << endl ); + UNDER_LOCK( LOG("Father is sending SIGTERM signal to his child") ); kill(child, SIGTERM); break; case KILL: - UNDER_LOCK( cout << "Father is sending SIGKILL signal to his child" << endl ); + UNDER_LOCK( LOG("Father is sending SIGKILL signal to his child") ); kill(child, SIGKILL); break; #endif @@ -696,7 +696,7 @@ namespace Batch { comstr += command[i] + " "; } argv[command.size()] = NULL; - UNDER_LOCK( cout << "*** debug_command = " << comstr << endl ); + UNDER_LOCK( LOG("*** debug_command = " << comstr) ); // On cree une session pour le fils de facon a ce qu'il ne soit pas // detruit lorsque le shell se termine (le shell ouvre une session et @@ -710,7 +710,7 @@ namespace Batch { // On execute la commande du fils execv(argv[0], argv); - UNDER_LOCK( cout << "*** debug_command = " << strerror(errno) << endl ); + UNDER_LOCK( LOG("*** debug_command = " << strerror(errno)) ); // No need to deallocate since nothing happens after a successful exec // Normalement on ne devrait jamais arriver ici @@ -719,7 +719,7 @@ namespace Batch { } catch (GenericException & e) { - std::cerr << "Caught exception : " << e.type << " : " << e.message << std::endl; + LOG("Caught exception : " << e.type << " : " << e.message); } exit(99); @@ -748,7 +748,7 @@ namespace Batch { comstr += command[i]; } - UNDER_LOCK( cout << "*** debug_command = " << comstr << endl ); + UNDER_LOCK( LOG("*** debug_command = " << comstr) ); STARTUPINFO si; ZeroMemory( &si, sizeof(si) ); @@ -769,7 +769,7 @@ namespace Batch { } catch (GenericException & e) { - std::cerr << "Caught exception : " << e.type << " : " << e.message << std::endl; + LOG("Caught exception : " << e.type << " : " << e.message); } return pi.hProcess; diff --git a/src/PBS/BatchManager_PBS.cxx b/src/PBS/BatchManager_PBS.cxx index 7dfd666..ae18c23 100644 --- a/src/PBS/BatchManager_PBS.cxx +++ b/src/PBS/BatchManager_PBS.cxx @@ -39,6 +39,7 @@ #include "BatchManager_PBS.hxx" #include "JobInfo_PBS.hxx" +#include "Log.hxx" using namespace std; @@ -74,12 +75,12 @@ namespace Batch { string subCommand = string("cd ") + workDir + "; qsub " + scriptFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " 2>&1"; - cerr << command.c_str() << endl; + LOG(command); // submit job string output; int status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // normally output contains only id of submitted job, we just need to remove the final \n @@ -100,12 +101,12 @@ namespace Batch { // define command to delete batch string subCommand = string("qdel ") + iss.str(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); status = system(command.c_str()); if (status) throw RunTimeException("Error of connection on remote host"); - cerr << "jobId = " << ref << "killed" << endl; + LOG("jobId = " << ref << "killed"); } // Methode pour le controle des jobs : renvoie l'etat du job @@ -118,7 +119,7 @@ namespace Batch { // define command to query batch string subCommand = string("qstat -f ") + iss.str(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); @@ -239,7 +240,7 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + LOG("Batch script file generated is: " << TmpFileName.c_str()); string remoteFileName = rootNameToExecute + "_Batch.sh"; int status = _protocol.copyFile(TmpFileName, "", "", diff --git a/src/PBS/JobInfo_PBS.cxx b/src/PBS/JobInfo_PBS.cxx index 514e258..179b2c3 100644 --- a/src/PBS/JobInfo_PBS.cxx +++ b/src/PBS/JobInfo_PBS.cxx @@ -35,6 +35,7 @@ #include #include "JobInfo_PBS.hxx" +#include "Log.hxx" using namespace std; @@ -80,7 +81,7 @@ namespace Batch { } else if (status == "W") { // Waiting _param[STATE] = PAUSED; } else { - cerr << "Unknown job state code: " << status << endl; + LOG("Unknown job state code: " << status); } } else { // On some batch managers, the job is deleted as soon as it is finished, diff --git a/src/SGE/BatchManager_SGE.cxx b/src/SGE/BatchManager_SGE.cxx index 72d8b3c..ac4bea7 100644 --- a/src/SGE/BatchManager_SGE.cxx +++ b/src/SGE/BatchManager_SGE.cxx @@ -51,6 +51,7 @@ #include "BatchManager_SGE.hxx" #include "JobInfo_SGE.hxx" +#include "Log.hxx" using namespace std; @@ -90,12 +91,12 @@ namespace Batch { string subCommand = string("bash -l -c \\\"cd ") + workDir + "; qsub " + fileNameToExecute + "_Batch.sh\\\""; string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " 2>&1"; - cerr << command.c_str() << endl; + LOG(command); // submit job string output; int status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // find id of submitted job in output @@ -119,12 +120,12 @@ namespace Batch { // define command to delete batch string subCommand = string("bash -l -c \\\"qdel ") + iss.str() + string("\\\""); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); status = system(command.c_str()); if(status) throw RunTimeException("Error of connection on remote host"); - cerr << "jobId = " << ref << "killed" << endl; + LOG("jobId = " << ref << "killed"); } // Methode pour le controle des jobs : renvoie l'etat du job @@ -137,7 +138,7 @@ namespace Batch { // define command to query batch string subCommand = string("bash -l -c \\\"qstat | grep ") + iss.str() + string("\\\""); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); @@ -158,7 +159,7 @@ namespace Batch { { #ifndef WIN32 //TODO porting on Win32 platform - std::cerr << "BuildBatchScript" << std::endl; + LOG("BuildBatchScript"); Parametre params = job.getParametre(); // Job Parameters @@ -219,7 +220,7 @@ namespace Batch { tempOutputFile.close(); Utils::chmod(TmpFileName.c_str(), 0x1ED); - cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + LOG("Batch script file generated is: " << TmpFileName.c_str()); int status = _protocol.copyFile(TmpFileName, "", "", workDir + "/" + rootNameToExecute + "_Batch.sh", diff --git a/src/SGE/JobInfo_SGE.cxx b/src/SGE/JobInfo_SGE.cxx index 2f14ac4..db8e1a1 100644 --- a/src/SGE/JobInfo_SGE.cxx +++ b/src/SGE/JobInfo_SGE.cxx @@ -39,6 +39,7 @@ #include "RunTimeException.hxx" #include "APIInternalFailureException.hxx" #include "JobInfo_SGE.hxx" +#include "Log.hxx" using namespace std; @@ -88,7 +89,7 @@ namespace Batch { } else if (status == "h") { // Hold _param[STATE] = PAUSED; } else { - cerr << "Unknown job state code: " << status << endl; + LOG("Unknown job state code: " << status); } } else { // TODO: Check this. I suppose that unknown jobs are finished ones. diff --git a/src/Slurm/BatchManager_Slurm.cxx b/src/Slurm/BatchManager_Slurm.cxx index dfce7cd..13cb2a6 100644 --- a/src/Slurm/BatchManager_Slurm.cxx +++ b/src/Slurm/BatchManager_Slurm.cxx @@ -32,6 +32,7 @@ #include #include #include +#include #include "BatchManager_Slurm.hxx" #include "JobInfo_Slurm.hxx" @@ -69,12 +70,12 @@ namespace Batch { string subCommand = string("cd ") + workDir + "; sbatch " + cmdFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " 2>&1"; - cout << command.c_str() << endl; + LOG(command); // submit job string output; int status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // find id of submitted job in output @@ -180,7 +181,7 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - cerr << "Batch script file generated is: " << tmpFileName << endl; + LOG("Batch script file generated is: " << tmpFileName); string remoteFileName = rootNameToExecute + "_slurm.cmd"; int status = _protocol.copyFile(tmpFileName, "", "", @@ -197,13 +198,13 @@ namespace Batch { // define command to delete job string subCommand = "scancel " + jobid.getReference(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); int status = system(command.c_str()); if (status) throw RunTimeException("Can't delete job " + jobid.getReference()); - cerr << "job " << jobid.getReference() << " killed" << endl; + LOG("job " << jobid.getReference() << " killed"); } JobInfo BatchManager_Slurm::queryJob(const JobId & jobid) @@ -211,7 +212,7 @@ namespace Batch { // define command to query batch string subCommand = "squeue -o %t -j " + jobid.getReference(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); string output; Utils::getCommandOutput(command, output); // We don't test the return code here because with jobs finished since a long time Slurm diff --git a/src/Vishnu/BatchManager_Vishnu.cxx b/src/Vishnu/BatchManager_Vishnu.cxx index 291f096..4b7fe00 100644 --- a/src/Vishnu/BatchManager_Vishnu.cxx +++ b/src/Vishnu/BatchManager_Vishnu.cxx @@ -40,6 +40,7 @@ #include "BatchManager_Vishnu.hxx" #include "JobInfo_Vishnu.hxx" +#include "Log.hxx" using namespace std; @@ -83,12 +84,12 @@ namespace Batch { subCommand += "vishnu_close"; string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " 2>&1"; - cerr << command.c_str() << endl; + LOG(command); // submit job string output; int status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // find id of submitted job in output @@ -152,10 +153,10 @@ namespace Batch { // Execute command string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " 2>&1"; - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't copy input files, error was: " + output); } @@ -237,7 +238,7 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - cerr << "Batch script file generated is: " << tmpFileName << endl; + LOG("Batch script file generated is: " << tmpFileName); return tmpFileName; } @@ -249,13 +250,13 @@ namespace Batch { subCommand += "vishnu_cancel_job " + _hostname + " " + jobid.getReference() + " && "; subCommand += "vishnu_close"; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); int status = system(command.c_str()); if (status) throw RunTimeException("Can't delete job " + jobid.getReference()); - cerr << "job " << jobid.getReference() << " killed" << endl; + LOG("job " << jobid.getReference() << " killed"); } JobInfo BatchManager_Vishnu::queryJob(const JobId & jobid) @@ -266,7 +267,7 @@ namespace Batch { subCommand += "vishnu_get_job_info " + _hostname + " " + jobid.getReference() + " && "; subCommand += "vishnu_close"; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; + LOG(command); string output; int status = Utils::getCommandOutput(command, output); @@ -317,10 +318,10 @@ namespace Batch { // Execute command string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " 2>&1"; - cerr << command.c_str() << endl; + LOG(command); string output; status = Utils::getCommandOutput(command, output); - cout << output; + LOG(output); if (status != 0) throw RunTimeException("Can't import output files, error was: " + output); } -- 2.30.2