1 // Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
2 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License.
9 // This library is distributed in the hope that it will be useful
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "BatchLight_BatchManager_PBS.hxx"
21 #include "BatchLight_BatchManager_SLURM.hxx"
22 #include "BatchLight_Job.hxx"
23 #include "SALOME_Launcher.hxx"
25 #include <sys/types.h>
30 #include "Utils_CorbaException.hxx"
31 #include "Batch_Date.hxx"
33 #define TIME_OUT_TO_LAUNCH_CONT 21
37 const char *SALOME_Launcher::_LauncherNameInNS = "/SalomeLauncher";
39 //=============================================================================
43 * Define a CORBA single thread policy for the server, which avoid to deal
44 * with non thread-safe usage like Change_Directory in SALOME naming service
46 //=============================================================================
48 SALOME_Launcher::SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa)
50 MESSAGE("constructor");
51 _NS = new SALOME_NamingService(orb);
52 _ResManager = new SALOME_ResourcesManager(orb,poa,_NS);
53 _ContManager = new SALOME_ContainerManager(orb,poa,_ResManager,_NS);
54 _ResManager->_remove_ref();
55 _ContManager->_remove_ref();
57 _orb = CORBA::ORB::_duplicate(orb) ;
58 _poa = PortableServer::POA::_duplicate(poa) ;
59 PortableServer::ObjectId_var id = _poa->activate_object(this);
60 CORBA::Object_var obj = _poa->id_to_reference(id);
61 Engines::SalomeLauncher_var refContMan = Engines::SalomeLauncher::_narrow(obj);
63 _NS->Register(refContMan,_LauncherNameInNS);
64 MESSAGE("constructor end");
67 //=============================================================================
71 //=============================================================================
73 SALOME_Launcher::~SALOME_Launcher()
75 MESSAGE("destructor");
77 std::map < string, BatchLight::BatchManager * >::const_iterator it;
78 for(it=_batchmap.begin();it!=_batchmap.end();it++)
82 //=============================================================================
84 * shutdown all the containers, then the ContainerManager servant
86 //=============================================================================
88 void SALOME_Launcher::Shutdown()
91 _NS->Destroy_Name(_LauncherNameInNS);
92 _ContManager->Shutdown();
93 _ResManager->Shutdown();
94 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
95 _poa->deactivate_object(oid);
97 if(!CORBA::is_nil(_orb))
101 //=============================================================================
103 * Returns the PID of the process
105 //=============================================================================
106 CORBA::Long SALOME_Launcher::getPID()
108 return (CORBA::Long)getpid();
111 //=============================================================================
113 * Submit a batch job on a cluster and returns the JobId
114 * \param fileToExecute : .py/.exe/.sh/... to execute on the batch cluster
115 * \param filesToExport : to export on the batch cluster
116 * \param NumberOfProcessors : Number of processors needed on the batch cluster
117 * \param params : Constraints for the choice of the batch cluster
119 //=============================================================================
120 CORBA::Long SALOME_Launcher::submitSalomeJob( const char * fileToExecute ,
121 const Engines::FilesList& filesToExport ,
122 const Engines::FilesList& filesToImport ,
123 const Engines::BatchParameters& batch_params,
124 const Engines::MachineParameters& params)
126 MESSAGE("BEGIN OF SALOME_Launcher::submitSalomeJob");
129 // find a cluster matching the structure params
130 Engines::CompoList aCompoList ;
131 Engines::MachineList *aMachineList = _ResManager->GetFittingResources(params, aCompoList);
132 if (aMachineList->length() == 0)
133 throw SALOME_Exception("No resources have been found with your parameters");
135 const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]);
136 string clustername(p->alias);
137 INFOS("Choose cluster" << clustername);
139 // search batch manager for that cluster in map or instanciate one
140 std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername);
141 if(it == _batchmap.end())
143 _batchmap[clustername] = FactoryBatchManager(p);
144 // TODO: Add a test for the cluster !
147 // create and submit job on cluster
148 BatchLight::Job* job = new BatchLight::Job(fileToExecute, filesToExport, filesToImport, batch_params);
149 bool res = job->check();
152 throw SALOME_Exception("Job parameters are bad (see informations above)");
154 jobId = _batchmap[clustername]->submitJob(job);
156 catch(const SALOME_Exception &ex){
158 THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::INTERNAL_ERROR);
163 //=============================================================================
165 * Query a batch job on a cluster and returns the status of job
166 * \param jobId : identification of Salome job
167 * \param params : Constraints for the choice of the batch cluster
169 //=============================================================================
170 char* SALOME_Launcher::querySalomeJob( const CORBA::Long jobId,
171 const Engines::MachineParameters& params)
175 // find a cluster matching params structure
176 Engines::CompoList aCompoList ;
177 Engines::MachineList * aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ;
178 const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]);
179 string clustername(p->alias);
181 // search batch manager for that cluster in map
182 std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername);
183 if(it == _batchmap.end())
184 throw SALOME_Exception("no batchmanager for that cluster");
186 status = _batchmap[clustername]->queryJob(jobId);
188 catch(const SALOME_Exception &ex){
189 INFOS("Caught exception.");
190 THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM);
192 return CORBA::string_dup(status.c_str());
195 //=============================================================================
197 * Delete a batch job on a cluster
198 * \param jobId : identification of Salome job
199 * \param params : Constraints for the choice of the batch cluster
201 //=============================================================================
202 void SALOME_Launcher::deleteSalomeJob( const CORBA::Long jobId,
203 const Engines::MachineParameters& params)
206 // find a cluster matching params structure
207 Engines::CompoList aCompoList ;
208 Engines::MachineList *aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ;
209 const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]);
210 string clustername(p->alias);
212 // search batch manager for that cluster in map
213 std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername);
214 if(it == _batchmap.end())
215 throw SALOME_Exception("no batchmanager for that cluster");
217 _batchmap[clustername]->deleteJob(jobId);
219 catch(const SALOME_Exception &ex){
220 INFOS("Caught exception.");
221 THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM);
225 //=============================================================================
227 * Get result files of job on a cluster
228 * \param jobId : identification of Salome job
229 * \param params : Constraints for the choice of the batch cluster
231 //=============================================================================
232 void SALOME_Launcher::getResultSalomeJob( const char *directory,
233 const CORBA::Long jobId,
234 const Engines::MachineParameters& params)
237 // find a cluster matching params structure
238 Engines::CompoList aCompoList ;
239 Engines::MachineList *aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ;
240 const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]);
241 string clustername(p->alias);
243 // search batch manager for that cluster in map
244 std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername);
245 if(it == _batchmap.end())
246 throw SALOME_Exception("no batchmanager for that cluster");
248 _batchmap[clustername]->importOutputFiles( directory, jobId );
250 catch(const SALOME_Exception &ex){
251 INFOS("Caught exception.");
252 THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM);
256 //=============================================================================
258 * Factory to instanciate the good batch manager for choosen cluster.
260 //=============================================================================
262 BatchLight::BatchManager *SALOME_Launcher::FactoryBatchManager( const Engines::MachineParameters* params ) throw(SALOME_Exception)
264 // Fill structure for batch manager
265 BatchLight::batchParams p;
266 p.hostname = params->alias;
267 p.protocol = params->protocol;
268 p.username = params->username;
269 p.applipath = params->applipath;
270 for(int i=0;i<params->modList.length();i++)
271 p.modulesList.push_back((const char*)params->modList[i]);
272 p.nbnodes = params->nb_node;
273 p.nbprocpernode = params->nb_proc_per_node;
274 p.mpiImpl = params->mpiImpl;
276 string sb = (const char*)params->batch;
278 return new BatchLight::BatchManager_PBS(p);
279 else if(sb == "slurm")
280 return new BatchLight::BatchManager_SLURM(p);
282 MESSAGE("BATCH = " << params->batch);
283 throw SALOME_Exception("no batchmanager for that cluster");