Salome HOME
Copyright update 2020
[tools/libbatch.git] / src / COORM / BatchManager_COORM.cxx
1 // Copyright (C) 2012-2020  INRIA
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 2.1 of the License, or (at your option) any later version.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
16 //
17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
18 //
19
20 #include <cstdlib>
21 #include <fstream>
22
23 #include <Constants.hxx>
24 #include <Utils.hxx>
25 #include <Log.hxx>
26
27 #include "BatchManager_COORM.hxx"
28 #include "JobInfo_COORM.hxx"
29
30 using namespace std;
31
32 namespace Batch
33 {
34       BatchManager_COORM::BatchManager_COORM(const FactBatchManager * parent, const char * host,
35                       const char * username,
36                       CommunicationProtocolType protocolType, const char * mpiImpl)
37               : BatchManager(parent, host, username, protocolType, mpiImpl)
38       {
39       }
40
41       BatchManager_COORM::~BatchManager_COORM()
42       {
43       }
44
45       // Soumet un job au gestionnaire
46       const JobId BatchManager_COORM::runJob(const Job & job)
47       {
48               Parametre params = job.getParametre();
49               const string workDir = params[WORKDIR];
50               const string fileToExecute = params[EXECUTABLE];
51
52               // For CooRM
53               const string launcherArgs = params[LAUNCHER_ARGS];
54               const string launcherFile  = params[LAUNCHER_FILE];
55
56               const string jobName = params[NAME];
57
58               string::size_type p1 = fileToExecute.find_last_of("/");
59               string::size_type p2 = fileToExecute.find_last_of(".");
60               std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1);
61
62               // For CooRM
63               p1 = launcherFile.find_last_of("/");
64               std::string fileNameToLaunch = launcherFile.substr(p1+1);
65
66               // build batch script for job
67               string scriptFile = buildBatchScript(job);
68
69               // Get REMOTE_COORM_PATH environment variable
70               const char * remote_coorm_path = getenv("REMOTE_COORM_PATH");
71               if (remote_coorm_path == NULL)
72               {
73                       throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable");
74               }
75
76               // We need omniORB to execute launcher.py
77               const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;";
78
79
80               // define command to submit batch
81               string subCommand = set_env_cmd + "python " + workDir + "/" + fileNameToLaunch + " --name="+ jobName +
82                                   " --workdir=" + workDir + " --outputs=" + workDir + "/logs/outputs.log" +
83                                   " --errors=" + workDir + "/logs/errors.log" +
84                                   " --executable=" + scriptFile + " " + launcherArgs;
85               string command = _protocol.getExecCommand(subCommand, _hostname, _username);
86               command += " 2>&1";
87               LOG(command);
88
89               // submit job
90               string output;
91               int status = Utils::getCommandOutput(command, output);
92               LOG(output);
93               if (status != 0) throw RunTimeException("Can't submit job, error was: " + output);
94
95               // read id of submitted job in output
96               istringstream logfile(output);
97               string sline, idline, id;
98
99               if (logfile)
100               {
101                 while (getline(logfile, sline) && sline != "")
102                 {
103                         idline = sline;
104                 }
105
106                 vector<string> tokens;
107                 JobInfo::Tokenize(idline, tokens, "=");
108                 id = tokens[1] ;
109               }
110               else
111               {
112                 throw RunTimeException("Error in the submission of the job on the remote host");
113               }
114
115               JobId jobid(this, (string) id);
116               return jobid;
117               }
118
119               // retire un job du gestionnaire
120               void BatchManager_COORM::deleteJob(const JobId & jobid)
121               {
122               // Get REMOTE_COORM_PATH environment variable
123               const char * remote_coorm_path = getenv("REMOTE_COORM_PATH");
124               if (remote_coorm_path == NULL)
125               {
126                 throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable");
127               }
128
129               // We need omniORB to execute launcher.py
130               const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;";
131
132               // define command to delete job
133               string subCommand = set_env_cmd + "python " + string(remote_coorm_path) + "/coormdel.py --jobID=" + jobid.getReference();
134               string command = _protocol.getExecCommand(subCommand, _hostname, _username);
135               LOG(command);
136
137               int status = system(command.c_str());
138               if (status)
139                 throw RunTimeException("Can't delete job " + jobid.getReference());
140
141               LOG("job " << jobid.getReference() << " killed");
142       }
143
144       // Renvoie l'etat du job
145       JobInfo BatchManager_COORM::queryJob(const JobId & jobid)
146       {
147         // Get REMOTE_COORM_PATH environment variable
148         const char * remote_coorm_path = getenv("REMOTE_COORM_PATH");
149         if (remote_coorm_path == NULL)
150         {
151           throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable");
152         }
153
154         // We need omniORB to execute launcher.py
155         const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;";
156
157         // define command to query batch
158         string subCommand = set_env_cmd + "python " + string(remote_coorm_path) + "/coormstat.py --jobID=" + jobid.getReference();
159         string command = _protocol.getExecCommand(subCommand, _hostname, _username);
160         LOG(command);
161         string output;
162         int status = Utils::getCommandOutput(command, output);
163         if(status && status != 153 && status != 256*153)
164           throw RunTimeException("Error of connection on remote host");
165
166         JobInfo_COORM jobinfo = JobInfo_COORM(jobid.getReference(), output);
167         return jobinfo;
168       }
169
170       string BatchManager_COORM::buildBatchScript(const Job & job)
171       {
172         Parametre params = job.getParametre();
173
174         // Job Parameters
175         string workDir       = "";
176         string fileToExecute = "";
177         string tmpDir = "";
178         int nbproc               = 0;
179         int edt          = 0;
180         int mem              = 0;
181         string queue         = "";
182
183         // Mandatory parameters
184         if (params.find(WORKDIR) != params.end())
185           workDir = params[WORKDIR].str();
186         else
187           throw RunTimeException("params[WORKDIR] is not defined. Please define it, cannot submit this job.");
188         if (params.find(EXECUTABLE) != params.end())
189           fileToExecute = params[EXECUTABLE].str();
190         else
191           throw RunTimeException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job.");
192
193         string::size_type p1 = fileToExecute.find_last_of("/");
194         string::size_type p2 = fileToExecute.find_last_of(".");
195         string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1);
196         string fileNameToExecute = fileToExecute.substr(p1+1);
197
198         // Create batch submit file
199         ofstream tempOutputFile;
200         string tmpFileName = Utils::createAndOpenTemporaryFile("COORM-script", tempOutputFile);
201
202         tempOutputFile << "#!/bin/sh -f" << endl;
203         tempOutputFile << "export LIBBATCH_NODEFILE=$COORM_NODEFILE" << endl;
204         // Launch the executable
205         tempOutputFile << "cd " << tmpDir << endl;
206         tempOutputFile << "./" + fileNameToExecute << endl;
207         tempOutputFile.flush();
208         tempOutputFile.close();
209
210         Utils::chmod(tmpFileName.c_str(), 0x1ED);
211         LOG("Batch script file generated is: " << tmpFileName);
212
213         string remoteFileName = rootNameToExecute + "_Batch.sh";
214         int status = _protocol.copyFile(tmpFileName, "", "",
215                                         workDir + "/" + remoteFileName,
216                                         _hostname, _username);
217         if (status)
218           throw RunTimeException("Cannot copy batch submission file on host " + _hostname);
219
220         return remoteFileName;
221       }
222
223       void BatchManager_COORM::exportInputFiles(const Job & job)
224       {
225         BatchManager::exportInputFiles(job);
226
227         int status;
228         Parametre params = job.getParametre();
229
230         string launcherFile = params[LAUNCHER_FILE];
231
232         if (launcherFile.size() != 0)
233         {
234           // Copy launcherFile into batch working directory
235           status = _protocol.copyFile(launcherFile, "", "",
236                                       params[WORKDIR], _hostname, _username);
237           if (status) {
238             std::ostringstream oss;
239             oss << "Cannot copy file " << launcherFile << " on host " << _hostname;
240             oss << ". Return status is " << status;
241             throw RunTimeException(oss.str());
242           }
243
244           string remoteLauncher = launcherFile;
245           remoteLauncher = remoteLauncher.substr(remoteLauncher.rfind("/") + 1, remoteLauncher.length());
246           remoteLauncher = string(params[WORKDIR]) + "/" + remoteLauncher;
247
248           string subCommand = string("chmod u+x ") + remoteLauncher;
249           string command = _protocol.getExecCommand(subCommand, _hostname, _username);
250           LOG(command);
251           status = system(command.c_str());
252           if (status) {
253             std::ostringstream oss;
254             oss << "Cannot change permissions of file " << remoteLauncher << " on host " << _hostname;
255             oss << ". Return status is " << status;
256             throw RunTimeException(oss.str());
257           }
258         }
259       }
260 }