From: barate Date: Wed, 19 Oct 2011 08:15:51 +0000 (+0000) Subject: Change Vishnu parameters and add Vishnu file transfer X-Git-Tag: V1_4_0_VISHNU~1 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=acc8ac19b30ac4b71de796c77f498a77832f1e8e;p=tools%2Flibbatch.git Change Vishnu parameters and add Vishnu file transfer --- diff --git a/src/Core/Batch_BatchManager.cxx b/src/Core/Batch_BatchManager.cxx index 6f34765..cbe725c 100644 --- a/src/Core/Batch_BatchManager.cxx +++ b/src/Core/Batch_BatchManager.cxx @@ -66,6 +66,7 @@ namespace Batch { // } BatchManager::BatchManager(const FactBatchManager * parent, const char * host) throw(InvalidArgumentException) : _hostname(host), jobid_map(), _parent(parent) { + /* #ifdef WIN32 WSADATA wsaData; WSAStartup(MAKEWORD(2, 2), &wsaData); // Initialize Winsock @@ -84,6 +85,7 @@ namespace Batch { msg += "\" unknown from the network"; throw InvalidArgumentException(msg.c_str()); } + */ } // Destructeur diff --git a/src/Core/Batch_BatchManager_eClient.hxx b/src/Core/Batch_BatchManager_eClient.hxx index b33ca17..d596f44 100644 --- a/src/Core/Batch_BatchManager_eClient.hxx +++ b/src/Core/Batch_BatchManager_eClient.hxx @@ -52,7 +52,7 @@ namespace Batch { const char * username="", CommunicationProtocolType protocolType = SSH, const char* mpiImpl="mpich1"); virtual ~BatchManager_eClient(); - void importOutputFiles( const Job & job, const std::string directory ); + virtual void importOutputFiles( const Job & job, const std::string directory ); bool importDumpStateFile( const Job & job, const std::string directory ); protected: diff --git a/src/Vishnu/Batch_BatchManager_eVishnu.cxx b/src/Vishnu/Batch_BatchManager_eVishnu.cxx index 94e1baf..dfd6615 100644 --- a/src/Vishnu/Batch_BatchManager_eVishnu.cxx +++ b/src/Vishnu/Batch_BatchManager_eVishnu.cxx @@ -66,7 +66,11 @@ namespace Batch { const string workDir = params[WORKDIR]; // export input files on cluster - exportInputFiles(job); + + // This part will be removed in the final version + copyInputFilesFromLocalToVishnu(job); + // This part will be kept in the final version + copyInputFilesFromVishnuToCluster(job); // build command file to submit the job and copy it on the server string cmdFile = buildCommandFile(job); @@ -75,12 +79,14 @@ namespace Batch { string logFile = generateTemporaryFileName("vishnu-submitlog"); // define command to submit batch - string subCommand = string("cd ") + workDir + "; "; + string subCommand = string("cd ") + params[VISHNU_HOST_WORKDIR].str() + "; "; subCommand += ". ~/.vishnu/vishnu.env ;"; - subCommand += "vishnu_connect -p 2 " + params[VISHNU_USERID].str() + + subCommand += "vishnu_connect -p 2 " + _username + " -w " + params[VISHNU_PASSWORD].str() + "; "; - subCommand += "vishnu_submit_job " + params[VISHNU_MACHINEID].str() + " " + cmdFile; - string command = _protocol.getExecCommand(subCommand, _hostname, _username); + subCommand += "vishnu_submit_job " + _hostname + " " + cmdFile; + string command = _protocol.getExecCommand(subCommand, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); command += " > "; command += logFile; cerr << command.c_str() << endl; @@ -108,14 +114,137 @@ namespace Batch { if (jobref.size() == 0) throw EmulationException("Error in the submission of the job on the remote host"); - // Store the userId, password and machineId with the jobId in the job reference + // Store the Vishnu password, host name and login with the jobId in the job reference // (for further queries) - jobref = params[VISHNU_USERID].str() + ":" + params[VISHNU_PASSWORD].str() + ":" + - params[VISHNU_MACHINEID].str() + ":" + jobref; + jobref = params[VISHNU_PASSWORD].str() + ":" + params[VISHNU_HOST_NAME].str() + ":" + \ + params[VISHNU_HOST_LOGIN].str() + ":" + jobref; JobId id(this, jobref); return id; } + + void BatchManager_eVishnu::copyInputFilesFromLocalToVishnu(const Job& job) + { + int status; + Parametre params = job.getParametre(); + const Versatile & V = params[INFILE]; + Versatile::const_iterator Vit; + + // Create working directory on Vishnu host + status = _protocol.makeDirectory(params[VISHNU_HOST_WORKDIR], + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw EmulationException(ex_mess.c_str()); + } + + // copy executable + string executeFile = params[EXECUTABLE]; + if (executeFile.size() != 0) { + status = _protocol.copyFile(executeFile, "", "", + params[VISHNU_HOST_WORKDIR], + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw EmulationException(ex_mess.c_str()); + } + + } + + // copy filesToExportList + for(Vit=V.begin(); Vit!=V.end(); Vit++) { + CoupleType cpt = *static_cast< CoupleType * >(*Vit); + Couple inputFile = cpt; + + // remote file -> transform to get path on Vishnu host + size_t found = inputFile.getRemote().find_last_of("/"); + string remote = params[VISHNU_HOST_WORKDIR].str() + "/" + inputFile.getRemote().substr(found+1); + + status = _protocol.copyFile(inputFile.getLocal(), "", "", + remote, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw EmulationException(ex_mess.c_str()); + } + } + + } + + void BatchManager_eVishnu::copyInputFilesFromVishnuToCluster(const Job& job) + { + int status; + Parametre params = job.getParametre(); + const Versatile & V = params[INFILE]; + Versatile::const_iterator Vit; + + // create remote directories + string subCommand = ". ~/.vishnu/vishnu.env ;"; + subCommand += "vishnu_connect -p 2 " + _username + " -w " + params[VISHNU_PASSWORD].str() + "; "; + subCommand += "vishnu_create_dir " + _hostname + ":" + params[WORKDIR].str() + "; "; + subCommand += "vishnu_create_dir " + _hostname + ":" + params[WORKDIR].str() + "/logs"; + string command = _protocol.getExecCommand(subCommand, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); + cerr << command.c_str() << endl; + status = system(command.c_str()); + if (status != 0) + throw EmulationException("Can't create remote directories"); + + // copy executable + string executeFile = params[EXECUTABLE]; + if (executeFile.size() != 0) { + + // transform to get path on Vishnu host + size_t found = executeFile.find_last_of("/"); + string origin = params[VISHNU_HOST_WORKDIR].str() + "/" + executeFile.substr(found+1); + + string subCommand = ". ~/.vishnu/vishnu.env ;"; + subCommand += "vishnu_connect -p 2 " + _username + " -w " + params[VISHNU_PASSWORD].str() + "; "; + subCommand += "vishnu_copy_file " + origin + " " + _hostname + ":" + params[WORKDIR].str(); + string command = _protocol.getExecCommand(subCommand, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); + cerr << command.c_str() << endl; + status = system(command.c_str()); + if (status != 0) + throw EmulationException("Can't copy executable"); + } + + // copy filesToExportList + for (Vit=V.begin(); Vit!=V.end(); Vit++) { + CoupleType cpt = *static_cast< CoupleType * >(*Vit); + Couple inputFile = cpt; + + // transform to get path on Vishnu host + size_t found = inputFile.getRemote().find_last_of("/"); + string origin = params[VISHNU_HOST_WORKDIR].str() + "/" + inputFile.getRemote().substr(found+1); + + string subCommand = ". ~/.vishnu/vishnu.env ;"; + subCommand += "vishnu_connect -p 2 " + _username + " -w " + params[VISHNU_PASSWORD].str() + "; "; + subCommand += "vishnu_copy_file " + origin + " " + _hostname + ":" + inputFile.getRemote(); + string command = _protocol.getExecCommand(subCommand, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); + cerr << command.c_str() << endl; + status = system(command.c_str()); + if (status != 0) + throw EmulationException("Can't copy file"); + } + } + /** * Create Vishnu command file and copy it on the server. * Return the name of the remote file. @@ -149,27 +278,34 @@ namespace Batch { string tmpFileName = createAndOpenTemporaryFile("vishnu-script", tempOutputFile); tempOutputFile << "#!/bin/sh" << endl; - tempOutputFile << "%vishnuoutput=" << workDir << "/logs/output.log." << rootNameToExecute << endl; - tempOutputFile << "%vishnuerror=" << workDir << "/logs/error.log." << rootNameToExecute << endl; + tempOutputFile << "#% vishnu_output=" << workDir << "/logs/output.log." << rootNameToExecute << endl; + tempOutputFile << "#% vishnu_error=" << workDir << "/logs/error.log." << rootNameToExecute << endl; if (params.find(NAME) != params.end()) - tempOutputFile << "%vishnujob_name=\"" << params[NAME] << "\"" << endl; + tempOutputFile << "#% vishnu_job_name=\"" << params[NAME] << "\"" << endl; // Optional parameters int nbproc = 1; if (params.find(NBPROC) != params.end()) nbproc = params[NBPROC]; - int nodes_requested = (nbproc + _nb_proc_per_node -1) / _nb_proc_per_node; + //int nodes_requested = (nbproc + _nb_proc_per_node -1) / _nb_proc_per_node; //tempOutputFile << "#SBATCH --nodes=" << nodes_requested << endl; //tempOutputFile << "#SBATCH --ntasks-per-node=" << _nb_proc_per_node << endl; - if (params.find(MAXWALLTIME) != params.end()) - tempOutputFile << "%vishnuwallclocklimit=" << params[MAXWALLTIME] << endl; + if (params.find(MAXWALLTIME) != params.end()) { + long totalMinutes = params[MAXWALLTIME]; + long h = totalMinutes / 60; + long m = totalMinutes - h * 60; + tempOutputFile << "#% vishnu_wallclocklimit=" << h << ":"; + if (m < 10) + tempOutputFile << "0"; + tempOutputFile << m << ":00" << endl; + } //if (params.find(MAXRAMSIZE) != params.end()) // tempOutputFile << "#SBATCH --mem=" << params[MAXRAMSIZE] << endl; if (params.find(QUEUE) != params.end()) - tempOutputFile << "%vishnuqueue=" << params[QUEUE] << endl; + tempOutputFile << "#% vishnu_queue=" << params[QUEUE] << endl; // Define environment for the job Environnement env = job.getEnvironnement(); @@ -193,8 +329,9 @@ namespace Batch { string remoteFileName = rootNameToExecute + "_vishnu"; int status = _protocol.copyFile(tmpFileName, "", "", - workDir + "/" + remoteFileName, - _hostname, _username); + params[VISHNU_HOST_WORKDIR].str() + "/" + remoteFileName, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); if (status) throw EmulationException("Cannot copy command file on host " + _hostname); @@ -203,19 +340,19 @@ namespace Batch { void BatchManager_eVishnu::deleteJob(const JobId & jobid) { - // split job reference in Vishnu userId / password / machineId / jobId - string vishnuUserId, vishnuPassword, vishnuMachineId, vishnuJobId; + // split job reference in Vishnu password / host name / host login / jobId + string vishnuPassword, vishnuHostName, vishnuHostLogin, vishnuJobId; istringstream iss(jobid.getReference()); - getline(iss, vishnuUserId, ':'); getline(iss, vishnuPassword, ':'); - getline(iss, vishnuMachineId, ':'); + getline(iss, vishnuHostName, ':'); + getline(iss, vishnuHostLogin, ':'); getline(iss, vishnuJobId, ':'); // define command to delete job string subCommand = ". ~/.vishnu/vishnu.env ;"; - subCommand += "vishnu_connect -p 2 " + vishnuUserId + " -w " + vishnuPassword + "; "; - subCommand += "vishnu_cancel_job " + vishnuMachineId + " " + vishnuJobId; - string command = _protocol.getExecCommand(subCommand, _hostname, _username); + subCommand += "vishnu_connect -p 2 " + _username + " -w " + vishnuPassword + "; "; + subCommand += "vishnu_cancel_job " + _hostname + " " + vishnuJobId; + string command = _protocol.getExecCommand(subCommand, vishnuHostName, vishnuHostLogin); cerr << command.c_str() << endl; int status = system(command.c_str()); @@ -255,19 +392,19 @@ namespace Batch { // define name of log file (local) string logFile = generateTemporaryFileName("vishnu-querylog-" + jobid.getReference()); - // split job reference in Vishnu userId / password / machineId / jobId - string vishnuUserId, vishnuPassword, vishnuMachineId, vishnuJobId; + // split job reference in Vishnu password / host name / host login / jobId + string vishnuPassword, vishnuHostName, vishnuHostLogin, vishnuJobId; istringstream iss(jobid.getReference()); - getline(iss, vishnuUserId, ':'); getline(iss, vishnuPassword, ':'); - getline(iss, vishnuMachineId, ':'); + getline(iss, vishnuHostName, ':'); + getline(iss, vishnuHostLogin, ':'); getline(iss, vishnuJobId, ':'); // define command to query batch string subCommand = ". ~/.vishnu/vishnu.env ;"; - subCommand += "vishnu_connect -p 2 " + vishnuUserId + " -w " + vishnuPassword + "; "; - subCommand += "vishnu_get_job_info " + vishnuMachineId + " " + vishnuJobId; - string command = _protocol.getExecCommand(subCommand, _hostname, _username); + subCommand += "vishnu_connect -p 2 " + _username + " -w " + vishnuPassword + "; "; + subCommand += "vishnu_get_job_info " + _hostname + " " + vishnuJobId; + string command = _protocol.getExecCommand(subCommand, vishnuHostName, vishnuHostLogin); command += " > "; command += logFile; cerr << command.c_str() << endl; @@ -284,4 +421,108 @@ namespace Batch { return JobId(this, reference); } + void BatchManager_eVishnu::importOutputFiles(const Job & job, const std::string directory) + { + // This part will be kept in the final version + copyOutputFilesFromClusterToVishnu(job); + + // This part will be removed in the final version + copyOutputFilesFromVishnuToLocal(job, directory); + } + + void BatchManager_eVishnu::copyOutputFilesFromClusterToVishnu(const Job & job) + { + int status; + Parametre params = job.getParametre(); + const Versatile & V = params[OUTFILE]; + Versatile::const_iterator Vit; + + for (Vit=V.begin(); Vit!=V.end(); Vit++) { + CoupleType cpt = *static_cast< CoupleType * >(*Vit); + Couple outputFile = cpt; + + // transform to get path on Vishnu host + size_t found = outputFile.getRemote().find_last_of("/"); + string dest = params[VISHNU_HOST_WORKDIR].str() + "/" + outputFile.getRemote().substr(found+1); + + string subCommand = ". ~/.vishnu/vishnu.env ;"; + subCommand += "vishnu_connect -p 2 " + _username + " -w " + params[VISHNU_PASSWORD].str() + "; "; + subCommand += "vishnu_copy_file " + _hostname + ":" + outputFile.getRemote() + " " + dest; + string command = _protocol.getExecCommand(subCommand, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); + cerr << command.c_str() << endl; + status = system(command.c_str()); + if (status != 0) + throw EmulationException("Can't copy file"); + } + + // Copy logs + string subCommand = ". ~/.vishnu/vishnu.env ;"; + subCommand += "vishnu_connect -p 2 " + _username + " -w " + params[VISHNU_PASSWORD].str() + "; "; + subCommand += "vishnu_copy_file -r " +_hostname + ":" + params[WORKDIR].str() + "/logs" + " " + \ + params[VISHNU_HOST_WORKDIR].str(); + string command = _protocol.getExecCommand(subCommand, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN]); + cerr << command.c_str() << endl; + status = system(command.c_str()); + if (status != 0) + throw EmulationException("Can't copy logs"); + } + + void BatchManager_eVishnu::copyOutputFilesFromVishnuToLocal(const Job & job, + const std::string directory) + { + Parametre params = job.getParametre(); + const Versatile & V = params[OUTFILE]; + Versatile::const_iterator Vit; + + // Create local result directory + int status = CommunicationProtocol::getInstance(SH).makeDirectory(directory, "", ""); + if (status) { + string mess("Directory creation failed. Status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + + for(Vit=V.begin(); Vit!=V.end(); Vit++) { + CoupleType cpt = *static_cast< CoupleType * >(*Vit); + Couple outputFile = cpt; + + // remote file -> transform to get path on Vishnu host + size_t found = outputFile.getRemote().find_last_of("/"); + string remote = params[VISHNU_HOST_WORKDIR].str() + "/" + outputFile.getRemote().substr(found+1); + + status = _protocol.copyFile(remote, + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN], + directory, "", ""); + if (status) { + // Try to get what we can (logs files) + // throw BatchException("Error of connection on remote host"); + std::string mess("Copy command failed ! status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + } + + // Copy logs + status = _protocol.copyFile(params[VISHNU_HOST_WORKDIR].str() + "/logs", + params[VISHNU_HOST_NAME], + params[VISHNU_HOST_LOGIN], + directory, "", ""); + if (status) { + std::string mess("Copy logs directory failed ! status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + } + } diff --git a/src/Vishnu/Batch_BatchManager_eVishnu.hxx b/src/Vishnu/Batch_BatchManager_eVishnu.hxx index b382d2f..195cb86 100644 --- a/src/Vishnu/Batch_BatchManager_eVishnu.hxx +++ b/src/Vishnu/Batch_BatchManager_eVishnu.hxx @@ -61,10 +61,15 @@ namespace Batch { virtual void alterJob(const JobId & jobid, const Environnement & env); virtual JobInfo queryJob(const JobId & jobid); virtual const JobId addJob(const Job & job, const std::string reference); + virtual void importOutputFiles(const Job & job, const std::string directory); protected: std::string buildCommandFile(const Job & job); + void copyInputFilesFromLocalToVishnu(const Job & job); + void copyInputFilesFromVishnuToCluster(const Job & job); + void copyOutputFilesFromClusterToVishnu(const Job & job); + void copyOutputFilesFromVishnuToLocal(const Job & job, const std::string directory); int _nb_proc_per_node; diff --git a/src/Vishnu/Batch_FactBatchManager_eVishnu.cxx b/src/Vishnu/Batch_FactBatchManager_eVishnu.cxx index bcbe521..b4dbb4e 100644 --- a/src/Vishnu/Batch_FactBatchManager_eVishnu.cxx +++ b/src/Vishnu/Batch_FactBatchManager_eVishnu.cxx @@ -33,18 +33,20 @@ namespace Batch { - def_Constant(VISHNU_USERID); def_Constant(VISHNU_PASSWORD); - def_Constant(VISHNU_MACHINEID); + def_Constant(VISHNU_HOST_NAME); + def_Constant(VISHNU_HOST_LOGIN); + def_Constant(VISHNU_HOST_WORKDIR); static FactBatchManager_eVishnu sFBM_eVishnu; FactBatchManager_eVishnu::FactBatchManager_eVishnu() : FactBatchManager_eClient("eVISHNU") { // Add specific parameters - ParameterTypeMap::getInstance().addParameter(VISHNU_USERID, STRING, 1); ParameterTypeMap::getInstance().addParameter(VISHNU_PASSWORD, STRING, 1); - ParameterTypeMap::getInstance().addParameter(VISHNU_MACHINEID, STRING, 1); + ParameterTypeMap::getInstance().addParameter(VISHNU_HOST_NAME, STRING, 1); + ParameterTypeMap::getInstance().addParameter(VISHNU_HOST_LOGIN, STRING, 1); + ParameterTypeMap::getInstance().addParameter(VISHNU_HOST_WORKDIR, STRING, 1); } FactBatchManager_eVishnu::~FactBatchManager_eVishnu() diff --git a/src/Vishnu/Batch_FactBatchManager_eVishnu.hxx b/src/Vishnu/Batch_FactBatchManager_eVishnu.hxx index 9a8bbb7..5e7fb41 100644 --- a/src/Vishnu/Batch_FactBatchManager_eVishnu.hxx +++ b/src/Vishnu/Batch_FactBatchManager_eVishnu.hxx @@ -36,9 +36,10 @@ namespace Batch { - decl_extern_Constant(VISHNU_USERID); decl_extern_Constant(VISHNU_PASSWORD); - decl_extern_Constant(VISHNU_MACHINEID); + decl_extern_Constant(VISHNU_HOST_NAME); + decl_extern_Constant(VISHNU_HOST_LOGIN); + decl_extern_Constant(VISHNU_HOST_WORKDIR); class BATCH_EXPORT FactBatchManager_eVishnu : public FactBatchManager_eClient { diff --git a/src/Vishnu/Test/Test_eVishnu.cxx b/src/Vishnu/Test/Test_eVishnu.cxx index c2d2927..8ca4cc8 100644 --- a/src/Vishnu/Test/Test_eVishnu.cxx +++ b/src/Vishnu/Test/Test_eVishnu.cxx @@ -83,9 +83,10 @@ int main(int argc, char** argv) const string & homedir = parser.getValue("TEST_EVISHNU_HOMEDIR"); const string & host = parser.getValue("TEST_EVISHNU_HOST"); const string & user = parser.getValue("TEST_EVISHNU_USER"); - const string & vishnu_userid = parser.getValue("TEST_EVISHNU_VISHNU_USERID"); const string & vishnu_password = parser.getValue("TEST_EVISHNU_VISHNU_PASSWORD"); - const string & vishnu_machineid = parser.getValue("TEST_EVISHNU_VISHNU_MACHINEID"); + const string & vishnu_host_name = parser.getValue("TEST_EVISHNU_VISHNU_HOST_NAME"); + const string & vishnu_host_login = parser.getValue("TEST_EVISHNU_VISHNU_HOST_LOGIN"); + const string & vishnu_host_workdir = parser.getValue("TEST_EVISHNU_VISHNU_HOST_WORKDIR"); int timeout = parser.getValueAsInt("TEST_EVISHNU_TIMEOUT"); // Define the job... @@ -93,7 +94,7 @@ int main(int argc, char** argv) // ... and its parameters ... Parametre p; p[EXECUTABLE] = "./test-script.sh"; - p[NAME] = string("Test eVISHNU ") + argv[1]; + p[NAME] = string("Test_eVISHNU_") + argv[1]; p[WORKDIR] = homedir + "/tmp/Batch"; p[INFILE] = Couple("seta.sh", "tmp/Batch/seta.sh"); p[INFILE] += Couple("setb.sh", "tmp/Batch/setb.sh"); @@ -103,9 +104,10 @@ int main(int argc, char** argv) p[MAXWALLTIME] = 1; p[MAXRAMSIZE] = 50; p[HOMEDIR] = homedir; - p["VISHNU_USERID"] = vishnu_userid; p["VISHNU_PASSWORD"] = vishnu_password; - p["VISHNU_MACHINEID"] = vishnu_machineid; + p["VISHNU_HOST_NAME"] = vishnu_host_name; + p["VISHNU_HOST_LOGIN"] = vishnu_host_login; + p["VISHNU_HOST_WORKDIR"] = vishnu_host_workdir; job.setParametre(p); // ... and its environment Environnement e;