Salome HOME
Copyright update 2021
[tools/libbatch.git] / src / COORM / BatchManager_COORM.cxx
1 // Copyright (C) 2012-2021  INRIA
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 2.1 of the License, or (at your option) any later version.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
16 //
17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
18 //
19
20 #include <cstdlib>
21 #include <fstream>
22
23 #include <Constants.hxx>
24 #include <Utils.hxx>
25 #include <Log.hxx>
26
27 #include "BatchManager_COORM.hxx"
28 #include "JobInfo_COORM.hxx"
29
30 using namespace std;
31
32 namespace Batch
33 {
34       BatchManager_COORM::BatchManager_COORM(const FactBatchManager * parent, const char * host,
35                       const char * username,
36                       CommunicationProtocolType protocolType, const char * mpiImpl)
37               : BatchManager(parent, host, username, protocolType, mpiImpl)
38       {
39       }
40
41       BatchManager_COORM::~BatchManager_COORM()
42       {
43       }
44
45       // Soumet un job au gestionnaire
46       const JobId BatchManager_COORM::runJob(const Job & job)
47       {
48               Parametre params = job.getParametre();
49               const string workDir = params[WORKDIR];
50               const string fileToExecute = params[EXECUTABLE];
51
52               // For CooRM
53               const string launcherArgs = params[LAUNCHER_ARGS];
54               const string launcherFile  = params[LAUNCHER_FILE];
55
56               const string jobName = params[NAME];
57
58               string::size_type p1 = fileToExecute.find_last_of("/");
59               string::size_type p2 = fileToExecute.find_last_of(".");
60               std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1);
61
62               // For CooRM
63               p1 = launcherFile.find_last_of("/");
64               std::string fileNameToLaunch = launcherFile.substr(p1+1);
65
66               // build batch script for job
67               string scriptFile = buildBatchScript(job);
68
69               // Get REMOTE_COORM_PATH environment variable
70               const char * remote_coorm_path = getenv("REMOTE_COORM_PATH");
71               if (remote_coorm_path == NULL)
72               {
73                       throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable");
74               }
75
76               // We need omniORB to execute launcher.py
77               const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;";
78
79
80               // define command to submit batch
81               string subCommand = set_env_cmd + "python " + workDir + "/" + fileNameToLaunch + " --name="+ jobName +
82                                   " --workdir=" + workDir + " --outputs=" + workDir + "/logs/outputs.log" +
83                                   " --errors=" + workDir + "/logs/errors.log" +
84                                   " --executable=" + scriptFile + " " + launcherArgs;
85               string command = _protocol.getExecCommand(subCommand, _hostname, _username);
86               command += " 2>&1";
87               LOG(command);
88
89               // submit job
90               string output;
91               int status = Utils::getCommandOutput(command, output);
92               LOG(output);
93               if (status != 0) throw RunTimeException("Can't submit job, error was: " + output);
94
95               // read id of submitted job in output
96               istringstream logfile(output);
97               string sline, idline, id;
98
99               if (logfile)
100               {
101                 while (getline(logfile, sline) && sline != "")
102                 {
103                         idline = sline;
104                 }
105
106                 vector<string> tokens;
107                 JobInfo::Tokenize(idline, tokens, "=");
108                 id = tokens[1] ;
109               }
110               else
111               {
112                 throw RunTimeException("Error in the submission of the job on the remote host");
113               }
114
115               JobId jobid(this, (string) id);
116               return jobid;
117               }
118
119               // retire un job du gestionnaire
120               void BatchManager_COORM::deleteJob(const JobId & jobid)
121               {
122               // Get REMOTE_COORM_PATH environment variable
123               const char * remote_coorm_path = getenv("REMOTE_COORM_PATH");
124               if (remote_coorm_path == NULL)
125               {
126                 throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable");
127               }
128
129               // We need omniORB to execute launcher.py
130               const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;";
131
132               // define command to delete job
133               string subCommand = set_env_cmd + "python " + string(remote_coorm_path) + "/coormdel.py --jobID=" + jobid.getReference();
134               string command = _protocol.getExecCommand(subCommand, _hostname, _username);
135               LOG(command);
136
137               int status = system(command.c_str());
138               if (status)
139                 throw RunTimeException("Can't delete job " + jobid.getReference());
140
141               LOG("job " << jobid.getReference() << " killed");
142       }
143
144       // Renvoie l'etat du job
145       JobInfo BatchManager_COORM::queryJob(const JobId & jobid)
146       {
147         // Get REMOTE_COORM_PATH environment variable
148         const char * remote_coorm_path = getenv("REMOTE_COORM_PATH");
149         if (remote_coorm_path == NULL)
150         {
151           throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable");
152         }
153
154         // We need omniORB to execute launcher.py
155         const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;";
156
157         // define command to query batch
158         string subCommand = set_env_cmd + "python " + string(remote_coorm_path) + "/coormstat.py --jobID=" + jobid.getReference();
159         string command = _protocol.getExecCommand(subCommand, _hostname, _username);
160         LOG(command);
161         string output;
162         int status = Utils::getCommandOutput(command, output);
163         if(status && status != 153 && status != 256*153)
164           throw RunTimeException("Error of connection on remote host");
165
166         JobInfo_COORM jobinfo = JobInfo_COORM(jobid.getReference(), output);
167         return jobinfo;
168       }
169
170       string BatchManager_COORM::buildBatchScript(const Job & job)
171       {
172         Parametre params = job.getParametre();
173
174         // Job Parameters
175         string workDir       = "";
176         string fileToExecute = "";
177         string tmpDir = "";
178         int nbproc               = 0;
179         int edt          = 0;
180         int mem              = 0;
181         string queue         = "";
182         LIBBATCH_UNUSED(nbproc);
183         LIBBATCH_UNUSED(edt);
184         LIBBATCH_UNUSED(mem);
185
186         // Mandatory parameters
187         if (params.find(WORKDIR) != params.end())
188           workDir = params[WORKDIR].str();
189         else
190           throw RunTimeException("params[WORKDIR] is not defined. Please define it, cannot submit this job.");
191         if (params.find(EXECUTABLE) != params.end())
192           fileToExecute = params[EXECUTABLE].str();
193         else
194           throw RunTimeException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job.");
195
196         string::size_type p1 = fileToExecute.find_last_of("/");
197         string::size_type p2 = fileToExecute.find_last_of(".");
198         string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1);
199         string fileNameToExecute = fileToExecute.substr(p1+1);
200
201         // Create batch submit file
202         ofstream tempOutputFile;
203         string tmpFileName = Utils::createAndOpenTemporaryFile("COORM-script", tempOutputFile);
204
205         tempOutputFile << "#!/bin/sh -f" << endl;
206         tempOutputFile << "export LIBBATCH_NODEFILE=$COORM_NODEFILE" << endl;
207         // Launch the executable
208         tempOutputFile << "cd " << tmpDir << endl;
209         tempOutputFile << "./" + fileNameToExecute << endl;
210         tempOutputFile.flush();
211         tempOutputFile.close();
212
213         Utils::chmod(tmpFileName.c_str(), 0x1ED);
214         LOG("Batch script file generated is: " << tmpFileName);
215
216         string remoteFileName = rootNameToExecute + "_Batch.sh";
217         int status = _protocol.copyFile(tmpFileName, "", "",
218                                         workDir + "/" + remoteFileName,
219                                         _hostname, _username);
220         if (status)
221           throw RunTimeException("Cannot copy batch submission file on host " + _hostname);
222
223         return remoteFileName;
224       }
225
226       void BatchManager_COORM::exportInputFiles(const Job & job)
227       {
228         BatchManager::exportInputFiles(job);
229
230         int status;
231         Parametre params = job.getParametre();
232
233         string launcherFile = params[LAUNCHER_FILE];
234
235         if (launcherFile.size() != 0)
236         {
237           // Copy launcherFile into batch working directory
238           status = _protocol.copyFile(launcherFile, "", "",
239                                       params[WORKDIR], _hostname, _username);
240           if (status) {
241             std::ostringstream oss;
242             oss << "Cannot copy file " << launcherFile << " on host " << _hostname;
243             oss << ". Return status is " << status;
244             throw RunTimeException(oss.str());
245           }
246
247           string remoteLauncher = launcherFile;
248           remoteLauncher = remoteLauncher.substr(remoteLauncher.rfind("/") + 1, remoteLauncher.length());
249           remoteLauncher = string(params[WORKDIR]) + "/" + remoteLauncher;
250
251           string subCommand = string("chmod u+x ") + remoteLauncher;
252           string command = _protocol.getExecCommand(subCommand, _hostname, _username);
253           LOG(command);
254           status = system(command.c_str());
255           if (status) {
256             std::ostringstream oss;
257             oss << "Cannot change permissions of file " << remoteLauncher << " on host " << _hostname;
258             oss << ". Return status is " << status;
259             throw RunTimeException(oss.str());
260           }
261         }
262       }
263 }