Salome HOME
style: black format
[tools/sat.git] / commands / jobs.py
1 #!/usr/bin/env python
2 # -*- coding:utf-8 -*-
3 #  Copyright (C) 2010-2013  CEA/DEN
4 #
5 #  This library is free software; you can redistribute it and/or
6 #  modify it under the terms of the GNU Lesser General Public
7 #  License as published by the Free Software Foundation; either
8 #  version 2.1 of the License.
9 #
10 #  This library is distributed in the hope that it will be useful,
11 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 #  Lesser General Public License for more details.
14 #
15 #  You should have received a copy of the GNU Lesser General Public
16 #  License along with this library; if not, write to the Free Software
17 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
18
19 import os
20 import sys
21 import tempfile
22 import traceback
23 import datetime
24 import time
25 import csv
26 import shutil
27 import itertools
28 import re
29
30 # generate problem
31 try:
32     import paramiko
33 except:
34     paramiko = "import paramiko impossible"
35     pass
36
37 import src
38
39
40 import src.ElementTree as etree
41
42 STYLESHEET_GLOBAL = "jobs_global_report.xsl"
43 STYLESHEET_BOARD = "jobs_board_report.xsl"
44
45 DAYS_SEPARATOR = ","
46 CSV_DELIMITER = ";"
47
48 parser = src.options.Options()
49
50 parser.add_option(
51     "n",
52     "name",
53     "list2",
54     "jobs_cfg",
55     _(
56         "Mandatory: The name of the config file that contains"
57         " the jobs configuration. Can be a list."
58     ),
59 )
60 parser.add_option(
61     "o",
62     "only_jobs",
63     "list2",
64     "only_jobs",
65     _("Optional: the list of jobs to launch, by their name. "),
66 )
67 parser.add_option(
68     "l", "list", "boolean", "list", _("Optional: list all available config files.")
69 )
70 parser.add_option(
71     "t",
72     "test_connection",
73     "boolean",
74     "test_connection",
75     _("Optional: try to connect to the machines. " "Not executing the jobs."),
76     False,
77 )
78 parser.add_option(
79     "p",
80     "publish",
81     "boolean",
82     "publish",
83     _(
84         "Optional: generate an xml file that can be read in a "
85         "browser to display the jobs status."
86     ),
87     False,
88 )
89 parser.add_option(
90     "i",
91     "input_boards",
92     "string",
93     "input_boards",
94     _("Optional: " "the path to csv file that contain " "the expected boards."),
95     "",
96 )
97 parser.add_option(
98     "",
99     "completion",
100     "boolean",
101     "no_label",
102     _("Optional (internal use): do not print labels, Works only " "with --list."),
103     False,
104 )
105
106
107 class Machine(object):
108     """Class to manage a ssh connection on a machine"""
109
110     def __init__(self, name, host, user, port=22, passwd=None, sat_path="salomeTools"):
111         self.name = name
112         self.host = host
113         self.port = port
114         self.distribution = None  # Will be filled after copying SAT on the machine
115         self.user = user
116         self.password = passwd
117         self.sat_path = sat_path
118         self.ssh = paramiko.SSHClient()
119         self._connection_successful = None
120
121     def connect(self, logger):
122         """Initiate the ssh connection to the remote machine
123
124         :param logger src.logger.Logger: The logger instance
125         :return: Nothing
126         :rtype: N\A
127         """
128
129         self._connection_successful = False
130         self.ssh.load_system_host_keys()
131         self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
132         try:
133             self.ssh.connect(
134                 self.host, port=self.port, username=self.user, password=self.password
135             )
136         except paramiko.AuthenticationException:
137             message = src.KO_STATUS + _("Authentication failed")
138         except paramiko.BadHostKeyException:
139             message = src.KO_STATUS + _("The server's host key could not be verified")
140         except paramiko.SSHException:
141             message = _(
142                 "SSHException error connecting or " "establishing an SSH session"
143             )
144         except:
145             message = _("Error connecting or establishing an SSH session")
146         else:
147             self._connection_successful = True
148             message = ""
149         return message
150
151     def successfully_connected(self, logger):
152         """Verify if the connection to the remote machine has succeed
153
154         :param logger src.logger.Logger: The logger instance
155         :return: True if the connection has succeed, False if not
156         :rtype: bool
157         """
158         if self._connection_successful == None:
159             message = _(
160                 "Warning : trying to ask if the connection to "
161                 "(name: %s host: %s, port: %s, user: %s) is OK whereas there were"
162                 " no connection request" % (self.name, self.host, self.port, self.user)
163             )
164             logger.write(src.printcolors.printcWarning(message))
165         return self._connection_successful
166
167     def copy_sat(self, sat_local_path, job_file):
168         """Copy salomeTools to the remote machine in self.sat_path"""
169         res = 0
170         try:
171             # open a sftp connection
172             self.sftp = self.ssh.open_sftp()
173             # Create the sat directory on remote machine if it is not existing
174             self.mkdir(self.sat_path, ignore_existing=True)
175             # Put sat
176             self.put_dir(sat_local_path, self.sat_path, filters=[".git"])
177             # put the job configuration file in order to make it reachable
178             # on the remote machine
179             remote_job_file_name = ".%s" % os.path.basename(job_file)
180             self.sftp.put(job_file, os.path.join(self.sat_path, remote_job_file_name))
181         except Exception as e:
182             res = str(e)
183             self._connection_successful = False
184
185         return res
186
187     def put_dir(self, source, target, filters=[]):
188         """Uploads the contents of the source directory to the target path. The
189         target directory needs to exists. All sub-directories in source are
190         created under target.
191         """
192         for item in os.listdir(source):
193             if item in filters:
194                 continue
195             source_path = os.path.join(source, item)
196             destination_path = os.path.join(target, item)
197             if os.path.islink(source_path):
198                 linkto = os.readlink(source_path)
199                 try:
200                     self.sftp.symlink(linkto, destination_path)
201                     self.sftp.chmod(destination_path, os.stat(source_path).st_mode)
202                 except IOError:
203                     pass
204             else:
205                 if os.path.isfile(source_path):
206                     self.sftp.put(source_path, destination_path)
207                     self.sftp.chmod(destination_path, os.stat(source_path).st_mode)
208                 else:
209                     self.mkdir(destination_path, ignore_existing=True)
210                     self.put_dir(source_path, destination_path)
211
212     def mkdir(self, path, mode=511, ignore_existing=False):
213         """Augments mkdir by adding an option to not fail
214         if the folder exists
215         """
216         try:
217             self.sftp.mkdir(path, mode)
218         except IOError:
219             if ignore_existing:
220                 pass
221             else:
222                 raise
223
224     def exec_command(self, command, logger):
225         """Execute the command on the remote machine
226
227         :param command str: The command to be run
228         :param logger src.logger.Logger: The logger instance
229         :return: the stdin, stdout, and stderr of the executing command,
230                  as a 3-tuple
231         :rtype: (paramiko.channel.ChannelFile, paramiko.channel.ChannelFile,
232                 paramiko.channel.ChannelFile)
233         """
234         try:
235             # Does not wait the end of the command
236             (stdin, stdout, stderr) = self.ssh.exec_command(command)
237         except paramiko.SSHException:
238             message = src.KO_STATUS + _(": the server failed to execute the command\n")
239             logger.write(src.printcolors.printcError(message))
240             return (None, None, None)
241         except:
242             logger.write(src.printcolors.printcError(src.KO_STATUS + "\n"))
243             return (None, None, None)
244         else:
245             return (stdin, stdout, stderr)
246
247     def close(self):
248         """Close the ssh connection
249
250         :rtype: N\A
251         """
252         self.ssh.close()
253
254     def write_info(self, logger):
255         """Prints the informations relative to the machine in the logger
256            (terminal traces and log file)
257
258         :param logger src.logger.Logger: The logger instance
259         :return: Nothing
260         :rtype: N\A
261         """
262         logger.write("host : " + self.host + "\n")
263         logger.write("port : " + str(self.port) + "\n")
264         logger.write("user : " + str(self.user) + "\n")
265         if self.successfully_connected(logger):
266             status = src.OK_STATUS
267         else:
268             status = src.KO_STATUS
269         logger.write("Connection : " + status + "\n\n")
270
271
272 class Job(object):
273     """Class to manage one job"""
274
275     def __init__(
276         self,
277         name,
278         machine,
279         application,
280         board,
281         commands,
282         timeout,
283         config,
284         job_file_path,
285         logger,
286         after=None,
287         prefix=None,
288     ):
289
290         self.name = name
291         self.machine = machine
292         self.after = after
293         self.timeout = timeout
294         self.application = application
295         self.board = board
296         self.config = config
297         self.logger = logger
298         # The list of log files to download from the remote machine
299         self.remote_log_files = []
300
301         # The remote command status
302         # -1 means that it has not been launched,
303         # 0 means success and 1 means fail
304         self.res_job = "-1"
305         self.cancelled = False
306
307         self._T0 = -1
308         self._Tf = -1
309         self._has_begun = False
310         self._has_finished = False
311         self._has_timouted = False
312         self._stdin = None  # Store the command inputs field
313         self._stdout = None  # Store the command outputs field
314         self._stderr = None  # Store the command errors field
315
316         self.out = ""
317         self.err = ""
318
319         self.name_remote_jobs_pyconf = ".%s" % os.path.basename(job_file_path)
320         self.commands = commands
321         self.command = (
322             os.path.join(self.machine.sat_path, "sat")
323             + " -l "
324             + os.path.join(self.machine.sat_path, "list_log_files.txt")
325             + " job --jobs_config "
326             + os.path.join(self.machine.sat_path, self.name_remote_jobs_pyconf)
327             + " --name "
328             + self.name
329         )
330         if prefix:
331             self.command = prefix + ' "' + self.command + '"'
332
333     def get_pids(self):
334         """Get the pid(s) corresponding to the command that have been launched
335             On the remote machine
336
337         :return: The list of integers corresponding to the found pids
338         :rtype: List
339         """
340         pids = []
341         cmd_pid = 'ps aux | grep "' + self.command + "\" | awk '{print $2}'"
342         (_, out_pid, _) = self.machine.exec_command(cmd_pid, self.logger)
343         pids_cmd = out_pid.readlines()
344         pids_cmd = [str(src.only_numbers(pid)) for pid in pids_cmd]
345         pids += pids_cmd
346         return pids
347
348     def kill_remote_process(self, wait=1):
349         """Kills the process on the remote machine.
350
351         :return: (the output of the kill, the error of the kill)
352         :rtype: (str, str)
353         """
354         try:
355             pids = self.get_pids()
356         except:
357             return ("Unable to get the pid of the command.", "")
358
359         cmd_kill = " ; ".join([("kill -2 " + pid) for pid in pids])
360         (_, out_kill, err_kill) = self.machine.exec_command(cmd_kill, self.logger)
361         time.sleep(wait)
362         return (out_kill.read().decode(), err_kill.read().decode())
363
364     def has_begun(self):
365         """Returns True if the job has already begun
366
367         :return: True if the job has already begun
368         :rtype: bool
369         """
370         return self._has_begun
371
372     def has_finished(self):
373         """Returns True if the job has already finished
374            (i.e. all the commands have been executed)
375            If it is finished, the outputs are stored in the fields out and err.
376
377         :return: True if the job has already finished
378         :rtype: bool
379         """
380
381         # If the method has already been called and returned True
382         if self._has_finished:
383             return True
384
385         # If the job has not begun yet
386         if not self.has_begun():
387             return False
388
389         if self._stdout.channel.closed:
390             self._has_finished = True
391             # Store the result outputs
392             self.out += self._stdout.read().decode()
393             self.err += self._stderr.read().decode()
394             # Put end time
395             self._Tf = time.time()
396             # And get the remote command status and log files
397             try:
398                 self.get_log_files()
399             except Exception as e:
400                 self.err += _("Unable to get remote log files: %s" % e)
401
402         return self._has_finished
403
404     def get_log_files(self):
405         """Get the log files produced by the command launched
406         on the remote machine, and put it in the log directory of the user,
407         so they can be accessible from
408         """
409         # Do not get the files if the command is not finished
410         if not self.has_finished():
411             msg = _("Trying to get log files whereas the job is not finished.")
412             self.logger.write(src.printcolors.printcWarning(msg))
413             return
414
415         # First get the file that contains the list of log files to get
416         tmp_file_path = src.get_tmp_filename(self.config, "list_log_files.txt")
417         remote_path = os.path.join(self.machine.sat_path, "list_log_files.txt")
418         self.machine.sftp.get(remote_path, tmp_file_path)
419
420         # Read the file and get the result of the command and all the log files
421         # to get
422         fstream_tmp = open(tmp_file_path, "r")
423         file_lines = fstream_tmp.readlines()
424         file_lines = [line.replace("\n", "") for line in file_lines]
425         fstream_tmp.close()
426         os.remove(tmp_file_path)
427
428         try:
429             # The first line is the result of the command (0 success or 1 fail)
430             self.res_job = file_lines[0]
431         except Exception as e:
432             self.err += _(
433                 "Unable to get status from remote file %s: %s" % (remote_path, str(e))
434             )
435
436         for i, job_path_remote in enumerate(file_lines[1:]):
437             try:
438                 # For each command, there is two files to get :
439                 # 1- The xml file describing the command and giving the
440                 # internal traces.
441                 # 2- The txt file containing the system command traces (like
442                 # traces produced by the "make" command)
443                 # 3- In case of the test command, there is another file to get :
444                 # the xml board that contain the test results
445                 dirname = os.path.basename(os.path.dirname(job_path_remote))
446                 if dirname != "OUT" and dirname != "TEST":
447                     # Case 1-
448                     local_path = os.path.join(
449                         os.path.dirname(self.logger.logFilePath),
450                         os.path.basename(job_path_remote),
451                     )
452                     if i == 0:  # The first is the job command
453                         self.logger.add_link(
454                             os.path.basename(job_path_remote),
455                             "job",
456                             self.res_job,
457                             self.command,
458                         )
459                 elif dirname == "OUT":
460                     # Case 2-
461                     local_path = os.path.join(
462                         os.path.dirname(self.logger.logFilePath),
463                         "OUT",
464                         os.path.basename(job_path_remote),
465                     )
466                 elif dirname == "TEST":
467                     # Case 3-
468                     local_path = os.path.join(
469                         os.path.dirname(self.logger.logFilePath),
470                         "TEST",
471                         os.path.basename(job_path_remote),
472                     )
473
474                 # Get the file
475                 if not os.path.exists(local_path):
476                     self.machine.sftp.get(job_path_remote, local_path)
477                 self.remote_log_files.append(local_path)
478             except Exception as e:
479                 self.err += _(
480                     "Unable to get %s log file from remote: %s"
481                     % (str(job_path_remote), str(e))
482                 )
483
484     def has_failed(self):
485         """Returns True if the job has failed.
486            A job is considered as failed if the machine could not be reached,
487            if the remote command failed,
488            or if the job finished with a time out.
489
490         :return: True if the job has failed
491         :rtype: bool
492         """
493         if not self.has_finished():
494             return False
495         if not self.machine.successfully_connected(self.logger):
496             return True
497         if self.is_timeout():
498             return True
499         if self.res_job == "1":
500             return True
501         return False
502
503     def cancel(self):
504         """In case of a failing job, one has to cancel every job that depend
505         on it. This method put the job as failed and will not be executed.
506         """
507         if self.cancelled:
508             return
509         self._has_begun = True
510         self._has_finished = True
511         self.cancelled = True
512         self.out += _("This job was not launched because its father has failed.")
513         self.err += _("This job was not launched because its father has failed.")
514
515     def is_running(self):
516         """Returns True if the job commands are running
517
518         :return: True if the job is running
519         :rtype: bool
520         """
521         return self.has_begun() and not self.has_finished()
522
523     def is_timeout(self):
524         """Returns True if the job commands has finished with timeout
525
526         :return: True if the job has finished with timeout
527         :rtype: bool
528         """
529         return self._has_timouted
530
531     def time_elapsed(self):
532         """Get the time elapsed since the job launching
533
534         :return: The number of seconds
535         :rtype: int
536         """
537         if not self.has_begun():
538             return -1
539         T_now = time.time()
540         return T_now - self._T0
541
542     def check_time(self):
543         """Verify that the job has not exceeded its timeout.
544         If it has, kill the remote command and consider the job as finished.
545         """
546         if not self.has_begun():
547             return
548         if self.time_elapsed() > self.timeout:
549             self._has_finished = True
550             self._has_timouted = True
551             self._Tf = time.time()
552             (out_kill, __) = self.kill_remote_process()
553             self.out += "TIMEOUT \n" + out_kill
554             self.err += "TIMEOUT : %s seconds elapsed\n" % str(self.timeout)
555             try:
556                 self.get_log_files()
557             except Exception as e:
558                 self.err += _("Unable to get remote log files!\n%s\n" % str(e))
559
560     def total_duration(self):
561         """Give the total duration of the job
562
563         :return: the total duration of the job in seconds
564         :rtype: int
565         """
566         return self._Tf - self._T0
567
568     def run(self):
569         """Launch the job by executing the remote command."""
570
571         # Prevent multiple run
572         if self.has_begun():
573             msg = _("Warning: A job can only be launched one time")
574             msg2 = _(
575                 'Trying to launch the job "%s" whereas it has '
576                 "already been launched." % self.name
577             )
578             self.logger.write(src.printcolors.printcWarning("%s\n%s\n" % (msg, msg2)))
579             return
580
581         # Do not execute the command if the machine could not be reached
582         if not self.machine.successfully_connected(self.logger):
583             self._has_finished = True
584             self.out = "N\A"
585             self.err += (
586                 "Connection to machine (name : %s, host: %s, port:"
587                 " %s, user: %s) has failed\nUse the log command "
588                 "to get more information."
589                 % (
590                     self.machine.name,
591                     self.machine.host,
592                     self.machine.port,
593                     self.machine.user,
594                 )
595             )
596         else:
597             # Usual case : Launch the command on remote machine
598             self._T0 = time.time()
599             self._stdin, self._stdout, self._stderr = self.machine.exec_command(
600                 self.command, self.logger
601             )
602             # If the results are not initialized, finish the job
603             if (self._stdin, self._stdout, self._stderr) == (None, None, None):
604                 self._has_finished = True
605                 self._Tf = time.time()
606                 self.out += "N\A"
607                 self.err += "The server failed to execute the command"
608
609         # Put the beginning flag to true.
610         self._has_begun = True
611
612     def write_results(self):
613         """Display on the terminal all the job's information"""
614         self.logger.write("name : " + self.name + "\n")
615         if self.after:
616             self.logger.write("after : %s\n" % self.after)
617         self.logger.write(
618             "Time elapsed : %4imin %2is \n"
619             % (self.total_duration() // 60, self.total_duration() % 60)
620         )
621         if self._T0 != -1:
622             self.logger.write(
623                 "Begin time : %s\n"
624                 % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self._T0))
625             )
626         if self._Tf != -1:
627             self.logger.write(
628                 "End time   : %s\n\n"
629                 % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self._Tf))
630             )
631
632         machine_head = "Informations about connection :\n"
633         underline = (len(machine_head) - 2) * "-"
634         self.logger.write(src.printcolors.printcInfo(machine_head + underline + "\n"))
635         self.machine.write_info(self.logger)
636
637         self.logger.write(src.printcolors.printcInfo("out : \n"))
638         if self.out == "":
639             self.logger.write("Unable to get output\n")
640         else:
641             self.logger.write(self.out + "\n")
642         self.logger.write(src.printcolors.printcInfo("err : \n"))
643         self.logger.write(self.err + "\n")
644
645     def get_status(self):
646         """Get the status of the job (used by the Gui for xml display)
647
648         :return: The current status of the job
649         :rtype: String
650         """
651         if not self.machine.successfully_connected(self.logger):
652             return "SSH connection KO"
653         if not self.has_begun():
654             return "Not launched"
655         if self.cancelled:
656             return "Cancelled"
657         if self.is_running():
658             return "running since " + time.strftime(
659                 "%Y-%m-%d %H:%M:%S", time.localtime(self._T0)
660             )
661         if self.has_finished():
662             if self.is_timeout():
663                 return "Timeout since " + time.strftime(
664                     "%Y-%m-%d %H:%M:%S", time.localtime(self._Tf)
665                 )
666             return "Finished since " + time.strftime(
667                 "%Y-%m-%d %H:%M:%S", time.localtime(self._Tf)
668             )
669
670
671 class Jobs(object):
672     """Class to manage the jobs to be run"""
673
674     def __init__(self, runner, logger, job_file_path, config_jobs, lenght_columns=20):
675         # The jobs configuration
676         self.cfg_jobs = config_jobs
677         self.job_file_path = job_file_path
678         # The machine that will be used today
679         self.lmachines = []
680         # The list of machine (hosts, port) that will be used today
681         # (a same host can have several machine instances since there
682         # can be several ssh parameters)
683         self.lhosts = []
684         # The jobs to be launched today
685         self.ljobs = []
686         # The jobs that will not be launched today
687         self.ljobs_not_today = []
688         self.runner = runner
689         self.logger = logger
690         self.len_columns = lenght_columns
691
692         # the list of jobs that have not been run yet
693         self._l_jobs_not_started = []
694         # the list of jobs that have already ran
695         self._l_jobs_finished = []
696         # the list of jobs that are running
697         self._l_jobs_running = []
698
699         self.determine_jobs_and_machines()
700
701     def define_job(self, job_def, machine):
702         """Takes a pyconf job definition and a machine (from class machine)
703            and returns the job instance corresponding to the definition.
704
705         :param job_def src.config.Mapping: a job definition
706         :param machine machine: the machine on which the job will run
707         :return: The corresponding job in a job class instance
708         :rtype: job
709         """
710         name = job_def.name
711         cmmnds = job_def.commands
712         if not "timeout" in job_def:
713             timeout = 4 * 60 * 60  # default timeout = 4h
714         else:
715             timeout = job_def.timeout
716         after = None
717         if "after" in job_def:
718             after = job_def.after
719         application = None
720         if "application" in job_def:
721             application = job_def.application
722         board = None
723         if "board" in job_def:
724             board = job_def.board
725         prefix = None
726         if "prefix" in job_def:
727             prefix = job_def.prefix
728
729         return Job(
730             name,
731             machine,
732             application,
733             board,
734             cmmnds,
735             timeout,
736             self.runner.cfg,
737             self.job_file_path,
738             self.logger,
739             after=after,
740             prefix=prefix,
741         )
742
743     def determine_jobs_and_machines(self):
744         """Function that reads the pyconf jobs definition and instantiates all
745            the machines and jobs to be done today.
746
747         :return: Nothing
748         :rtype: N\A
749         """
750         today = datetime.date.weekday(datetime.date.today())
751         host_list = []
752
753         for job_def in self.cfg_jobs.jobs:
754
755             if not "machine" in job_def:
756                 msg = _(
757                     'WARNING: The job "%s" do not have the key '
758                     '"machine", this job is ignored.\n\n' % job_def.name
759                 )
760                 self.logger.write(src.printcolors.printcWarning(msg))
761                 continue
762             name_machine = job_def.machine
763
764             a_machine = None
765             for mach in self.lmachines:
766                 if mach.name == name_machine:
767                     a_machine = mach
768                     break
769
770             if a_machine == None:
771                 for machine_def in self.cfg_jobs.machines:
772                     if machine_def.name == name_machine:
773                         if "host" not in machine_def:
774                             host = self.runner.cfg.VARS.hostname
775                         else:
776                             host = machine_def.host
777
778                         if "user" not in machine_def:
779                             user = self.runner.cfg.VARS.user
780                         else:
781                             user = machine_def.user
782
783                         if "port" not in machine_def:
784                             port = 22
785                         else:
786                             port = machine_def.port
787
788                         if "password" not in machine_def:
789                             passwd = None
790                         else:
791                             passwd = machine_def.password
792
793                         if "sat_path" not in machine_def:
794                             sat_path = "salomeTools"
795                         else:
796                             sat_path = machine_def.sat_path
797
798                         a_machine = Machine(
799                             machine_def.name,
800                             host,
801                             user,
802                             port=port,
803                             passwd=passwd,
804                             sat_path=sat_path,
805                         )
806
807                         self.lmachines.append(a_machine)
808                         if (host, port) not in host_list:
809                             host_list.append((host, port))
810
811                 if a_machine == None:
812                     msg = _(
813                         'WARNING: The job "%(job_name)s" requires the '
814                         'machine "%(machine_name)s" but this machine '
815                         "is not defined in the configuration file.\n"
816                         "The job will not be launched\n"
817                     )
818                     self.logger.write(
819                         src.printcolors.printcWarning(
820                             msg
821                             % {"job_name": job_def.name, "machine_name": name_machine}
822                         )
823                     )
824                     continue
825
826             a_job = self.define_job(job_def, a_machine)
827
828             if today in job_def.when:
829                 self.ljobs.append(a_job)
830             else:  # today in job_def.when
831                 self.ljobs_not_today.append(a_job)
832
833         self.lhosts = host_list
834
835     def ssh_connection_all_machines(self, pad=50):
836         """Function that do the ssh connection to every machine
837            to be used today.
838
839         :return: Nothing
840         :rtype: N\A
841         """
842         self.logger.write(
843             src.printcolors.printcInfo(
844                 ("Establishing connection with all the machines :\n")
845             )
846         )
847         for machine in self.lmachines:
848             # little algorithm in order to display traces
849             begin_line = _("Connection to %s: " % machine.name)
850             if pad - len(begin_line) < 0:
851                 endline = " "
852             else:
853                 endline = (pad - len(begin_line)) * "." + " "
854
855             step = "SSH connection"
856             self.logger.write(begin_line + endline + step)
857             self.logger.flush()
858             # the call to the method that initiate the ssh connection
859             msg = machine.connect(self.logger)
860
861             # Copy salomeTools to the remote machine
862             if machine.successfully_connected(self.logger):
863                 step = _("Remove SAT")
864                 self.logger.write("\r%s%s%s" % (begin_line, endline, 20 * " "), 3)
865                 self.logger.write("\r%s%s%s" % (begin_line, endline, step), 3)
866                 (__, out_dist, __) = machine.exec_command(
867                     "rm -rf %s" % machine.sat_path, self.logger
868                 )
869                 out_dist.read()
870
871                 self.logger.flush()
872                 step = _("Copy SAT")
873                 self.logger.write("\r%s%s%s" % (begin_line, endline, 20 * " "), 3)
874                 self.logger.write("\r%s%s%s" % (begin_line, endline, step), 3)
875                 self.logger.flush()
876                 res_copy = machine.copy_sat(
877                     self.runner.cfg.VARS.salometoolsway, self.job_file_path
878                 )
879
880                 # set the local settings of sat on the remote machine using
881                 # the init command
882                 (__, out_dist, __) = machine.exec_command(
883                     os.path.join(
884                         machine.sat_path,
885                         "sat init --base default --workdir"
886                         " default --log_dir default",
887                     ),
888                     self.logger,
889                 )
890                 out_dist.read()
891
892                 # get the remote machine distribution using a sat command
893                 (__, out_dist, __) = machine.exec_command(
894                     os.path.join(
895                         machine.sat_path, "sat config --value VARS.dist --no_label"
896                     ),
897                     self.logger,
898                 )
899                 machine.distribution = out_dist.read().decode().replace("\n", "")
900
901                 # Print the status of the copy
902                 if res_copy == 0:
903                     self.logger.write(
904                         "\r%s" % ((len(begin_line) + len(endline) + 20) * " "), 3
905                     )
906                     self.logger.write(
907                         "\r%s%s%s"
908                         % (begin_line, endline, src.printcolors.printc(src.OK_STATUS)),
909                         3,
910                     )
911                 else:
912                     self.logger.write(
913                         "\r%s" % ((len(begin_line) + len(endline) + 20) * " "), 3
914                     )
915                     self.logger.write(
916                         "\r%s%s%s %s"
917                         % (
918                             begin_line,
919                             endline,
920                             src.printcolors.printc(src.KO_STATUS),
921                             _("Copy of SAT failed: %s" % res_copy),
922                         ),
923                         3,
924                     )
925             else:
926                 self.logger.write(
927                     "\r%s" % ((len(begin_line) + len(endline) + 20) * " "), 3
928                 )
929                 self.logger.write(
930                     "\r%s%s%s %s"
931                     % (begin_line, endline, src.printcolors.printc(src.KO_STATUS), msg),
932                     3,
933                 )
934             self.logger.write("\n", 3)
935
936         self.logger.write("\n")
937
938     def is_occupied(self, hostname):
939         """Function that returns True if a job is running on
940            the machine defined by its host and its port.
941
942         :param hostname (str, int): the pair (host, port)
943         :return: the job that is running on the host,
944                 or false if there is no job running on the host.
945         :rtype: job / bool
946         """
947         host = hostname[0]
948         port = hostname[1]
949         for jb in self.ljobs:
950             if jb.machine.host == host and jb.machine.port == port:
951                 if jb.is_running():
952                     return jb
953         return False
954
955     def update_jobs_states_list(self):
956         """Function that updates the lists that store the currently
957            running jobs and the jobs that have already finished.
958
959         :return: Nothing.
960         :rtype: N\A
961         """
962         jobs_finished_list = []
963         jobs_running_list = []
964         for jb in self.ljobs:
965             if jb.is_running():
966                 jobs_running_list.append(jb)
967                 jb.check_time()
968             if jb.has_finished():
969                 jobs_finished_list.append(jb)
970
971         nb_job_finished_before = len(self._l_jobs_finished)
972         self._l_jobs_finished = jobs_finished_list
973         self._l_jobs_running = jobs_running_list
974
975         nb_job_finished_now = len(self._l_jobs_finished)
976
977         return nb_job_finished_now > nb_job_finished_before
978
979     def cancel_dependencies_of_failing_jobs(self):
980         """Function that cancels all the jobs that depend on a failing one.
981
982         :return: Nothing.
983         :rtype: N\A
984         """
985
986         for job in self.ljobs:
987             if job.after is None:
988                 continue
989             father_job = self.find_job_that_has_name(job.after)
990             if father_job is not None and father_job.has_failed():
991                 job.cancel()
992
993     def find_job_that_has_name(self, name):
994         """Returns the job by its name.
995
996         :param name str: a job name
997         :return: the job that has the name.
998         :rtype: job
999         """
1000         for jb in self.ljobs:
1001             if jb.name == name:
1002                 return jb
1003         # the following is executed only if the job was not found
1004         return None
1005
1006     def str_of_length(self, text, length):
1007         """Takes a string text of any length and returns
1008            the most close string of length "length".
1009
1010         :param text str: any string
1011         :param length int: a length for the returned string
1012         :return: the most close string of length "length"
1013         :rtype: str
1014         """
1015         if len(text) > length:
1016             text_out = text[: length - 3] + "..."
1017         else:
1018             diff = length - len(text)
1019             before = " " * (diff // 2)
1020             after = " " * (diff // 2 + diff % 2)
1021             text_out = before + text + after
1022
1023         return text_out
1024
1025     def display_status(self, len_col):
1026         """Takes a lenght and construct the display of the current status
1027            of the jobs in an array that has a column for each host.
1028            It displays the job that is currently running on the host
1029            of the column.
1030
1031         :param len_col int: the size of the column
1032         :return: Nothing
1033         :rtype: N\A
1034         """
1035
1036         display_line = ""
1037         for host_port in self.lhosts:
1038             jb = self.is_occupied(host_port)
1039             if not jb:  # nothing running on the host
1040                 empty = self.str_of_length("empty", len_col)
1041                 display_line += "|" + empty
1042             else:
1043                 display_line += "|" + src.printcolors.printcInfo(
1044                     self.str_of_length(jb.name, len_col)
1045                 )
1046
1047         self.logger.write("\r" + display_line + "|")
1048         self.logger.flush()
1049
1050     def run_jobs(self):
1051         """The main method. Runs all the jobs on every host.
1052            For each host, at a given time, only one job can be running.
1053            The jobs that have the field after (that contain the job that has
1054            to be run before it) are run after the previous job.
1055            This method stops when all the jobs are finished.
1056
1057         :return: Nothing
1058         :rtype: N\A
1059         """
1060
1061         # Print header
1062         self.logger.write(src.printcolors.printcInfo(_("Executing the jobs :\n")))
1063         text_line = ""
1064         for host_port in self.lhosts:
1065             host = host_port[0]
1066             port = host_port[1]
1067             if port == 22:  # default value
1068                 text_line += "|" + self.str_of_length(host, self.len_columns)
1069             else:
1070                 text_line += "|" + self.str_of_length(
1071                     "(" + host + ", " + str(port) + ")", self.len_columns
1072                 )
1073
1074         tiret_line = " " + "-" * (len(text_line) - 1) + "\n"
1075         self.logger.write(tiret_line)
1076         self.logger.write(text_line + "|\n")
1077         self.logger.write(tiret_line)
1078         self.logger.flush()
1079
1080         # The infinite loop that runs the jobs
1081         l_jobs_not_started = src.deepcopy_list(self.ljobs)
1082         while len(self._l_jobs_finished) != len(self.ljobs):
1083             new_job_start = False
1084             for host_port in self.lhosts:
1085
1086                 if self.is_occupied(host_port):
1087                     continue
1088
1089                 for jb in l_jobs_not_started:
1090                     if (jb.machine.host, jb.machine.port) != host_port:
1091                         continue
1092                     if jb.after == None:
1093                         jb.run()
1094                         l_jobs_not_started.remove(jb)
1095                         new_job_start = True
1096                         break
1097                     else:
1098                         jb_before = self.find_job_that_has_name(jb.after)
1099                         if jb_before is None:
1100                             jb.cancel()
1101                             msg = _(
1102                                 "This job was not launched because its "
1103                                 "father is not in the jobs list."
1104                             )
1105                             jb.out = msg
1106                             jb.err = msg
1107                             break
1108                         if jb_before.has_finished():
1109                             jb.run()
1110                             l_jobs_not_started.remove(jb)
1111                             new_job_start = True
1112                             break
1113             self.cancel_dependencies_of_failing_jobs()
1114             new_job_finished = self.update_jobs_states_list()
1115
1116             if new_job_start or new_job_finished:
1117                 if self.gui:
1118                     self.gui.update_xml_files(self.ljobs)
1119                 # Display the current status
1120                 self.display_status(self.len_columns)
1121
1122             # Make sure that the proc is not entirely busy
1123             time.sleep(0.001)
1124
1125         self.logger.write("\n")
1126         self.logger.write(tiret_line)
1127         self.logger.write("\n\n")
1128
1129         if self.gui:
1130             self.gui.update_xml_files(self.ljobs)
1131             self.gui.last_update()
1132
1133     def write_all_results(self):
1134         """Display all the jobs outputs.
1135
1136         :return: Nothing
1137         :rtype: N\A
1138         """
1139
1140         for jb in self.ljobs:
1141             self.logger.write(
1142                 src.printcolors.printcLabel(
1143                     "#------- Results for job %s -------#\n" % jb.name
1144                 )
1145             )
1146             jb.write_results()
1147             self.logger.write("\n\n")
1148
1149
1150 class Gui(object):
1151     """Class to manage the the xml data that can be displayed in a browser to
1152     see the jobs states
1153     """
1154
1155     def __init__(
1156         self, xml_dir_path, l_jobs, l_jobs_not_today, prefix, logger, file_boards=""
1157     ):
1158         """Initialization
1159
1160         :param xml_dir_path str: The path to the directory where to put
1161                                  the xml resulting files
1162         :param l_jobs List: the list of jobs that run today
1163         :param l_jobs_not_today List: the list of jobs that do not run today
1164         :param file_boards str: the file path from which to read the
1165                                    expected boards
1166         """
1167         # The logging instance
1168         self.logger = logger
1169
1170         # The prefix to add to the xml files : date_hour
1171         self.prefix = prefix
1172
1173         # The path of the csv files to read to fill the expected boards
1174         self.file_boards = file_boards
1175
1176         if file_boards != "":
1177             today = datetime.date.weekday(datetime.date.today())
1178             self.parse_csv_boards(today)
1179         else:
1180             self.d_input_boards = {}
1181
1182         # The path of the global xml file
1183         self.xml_dir_path = xml_dir_path
1184         # Initialize the xml files
1185         self.global_name = "global_report"
1186         xml_global_path = os.path.join(self.xml_dir_path, self.global_name + ".xml")
1187         self.xml_global_file = src.xmlManager.XmlLogFile(xml_global_path, "JobsReport")
1188
1189         # Find history for each job
1190         self.history = {}
1191         self.find_history(l_jobs, l_jobs_not_today)
1192
1193         # The xml files that corresponds to the boards.
1194         # {name_board : xml_object}}
1195         self.d_xml_board_files = {}
1196
1197         # Create the lines and columns
1198         self.initialize_boards(l_jobs, l_jobs_not_today)
1199
1200         # Write the xml file
1201         self.update_xml_files(l_jobs)
1202
1203     def add_xml_board(self, name):
1204         """Add a board to the board list
1205         :param name str: the board name
1206         """
1207         xml_board_path = os.path.join(self.xml_dir_path, name + ".xml")
1208         self.d_xml_board_files[name] = src.xmlManager.XmlLogFile(
1209             xml_board_path, "JobsReport"
1210         )
1211         self.d_xml_board_files[name].add_simple_node("distributions")
1212         self.d_xml_board_files[name].add_simple_node("applications")
1213         self.d_xml_board_files[name].add_simple_node("board", text=name)
1214
1215     def initialize_boards(self, l_jobs, l_jobs_not_today):
1216         """Get all the first information needed for each file and write the
1217            first version of the files
1218         :param l_jobs List: the list of jobs that run today
1219         :param l_jobs_not_today List: the list of jobs that do not run today
1220         """
1221         # Get the boards to fill and put it in a dictionary
1222         # {board_name : xml instance corresponding to the board}
1223         for job in l_jobs + l_jobs_not_today:
1224             board = job.board
1225             if board is not None and board not in self.d_xml_board_files.keys():
1226                 self.add_xml_board(board)
1227
1228         # Verify that the boards given as input are done
1229         for board in list(self.d_input_boards.keys()):
1230             if board not in self.d_xml_board_files:
1231                 self.add_xml_board(board)
1232             root_node = self.d_xml_board_files[board].xmlroot
1233             src.xmlManager.append_node_attrib(
1234                 root_node, {"input_file": self.file_boards}
1235             )
1236
1237         # Loop over all jobs in order to get the lines and columns for each
1238         # xml file
1239         d_dist = {}
1240         d_application = {}
1241         for board in self.d_xml_board_files:
1242             d_dist[board] = []
1243             d_application[board] = []
1244
1245         l_hosts_ports = []
1246
1247         for job in l_jobs + l_jobs_not_today:
1248
1249             if (job.machine.host, job.machine.port) not in l_hosts_ports:
1250                 l_hosts_ports.append((job.machine.host, job.machine.port))
1251
1252             distrib = job.machine.distribution
1253             application = job.application
1254
1255             board_job = job.board
1256             if board is None:
1257                 continue
1258             for board in self.d_xml_board_files:
1259                 if board_job == board:
1260                     if distrib not in [None, ""] and distrib not in d_dist[board]:
1261                         d_dist[board].append(distrib)
1262                         src.xmlManager.add_simple_node(
1263                             self.d_xml_board_files[board].xmlroot.find("distributions"),
1264                             "dist",
1265                             attrib={"name": distrib},
1266                         )
1267
1268                 if board_job == board:
1269                     if (
1270                         application not in [None, ""]
1271                         and application not in d_application[board]
1272                     ):
1273                         d_application[board].append(application)
1274                         src.xmlManager.add_simple_node(
1275                             self.d_xml_board_files[board].xmlroot.find("applications"),
1276                             "application",
1277                             attrib={"name": application},
1278                         )
1279
1280         # Verify that there are no missing application or distribution in the
1281         # xml board files (regarding the input boards)
1282         for board in self.d_xml_board_files:
1283             l_dist = d_dist[board]
1284             if board not in self.d_input_boards.keys():
1285                 continue
1286             for dist in self.d_input_boards[board]["rows"]:
1287                 if dist not in l_dist:
1288                     src.xmlManager.add_simple_node(
1289                         self.d_xml_board_files[board].xmlroot.find("distributions"),
1290                         "dist",
1291                         attrib={"name": dist},
1292                     )
1293             l_appli = d_application[board]
1294             for appli in self.d_input_boards[board]["columns"]:
1295                 if appli not in l_appli:
1296                     src.xmlManager.add_simple_node(
1297                         self.d_xml_board_files[board].xmlroot.find("applications"),
1298                         "application",
1299                         attrib={"name": appli},
1300                     )
1301
1302         # Initialize the hosts_ports node for the global file
1303         self.xmlhosts_ports = self.xml_global_file.add_simple_node("hosts_ports")
1304         for host, port in l_hosts_ports:
1305             host_port = "%s:%i" % (host, port)
1306             src.xmlManager.add_simple_node(
1307                 self.xmlhosts_ports, "host_port", attrib={"name": host_port}
1308             )
1309
1310         # Initialize the jobs node in all files
1311         for xml_file in [self.xml_global_file] + list(self.d_xml_board_files.values()):
1312             xml_jobs = xml_file.add_simple_node("jobs")
1313             # Get the jobs present in the config file but
1314             # that will not be launched today
1315             self.put_jobs_not_today(l_jobs_not_today, xml_jobs)
1316
1317             # add also the infos node
1318             xml_file.add_simple_node(
1319                 "infos", attrib={"name": "last update", "JobsCommandStatus": "running"}
1320             )
1321
1322             # and put the history node
1323             history_node = xml_file.add_simple_node("history")
1324             name_board = os.path.basename(xml_file.logFile)[: -len(".xml")]
1325             # serach for board files
1326             expression = "^[0-9]{8}_+[0-9]{6}_" + name_board + ".xml$"
1327             oExpr = re.compile(expression)
1328             # Get the list of xml borad files that are in the log directory
1329             for file_name in os.listdir(self.xml_dir_path):
1330                 if oExpr.search(file_name):
1331                     date = os.path.basename(file_name).split("_")[0]
1332                     file_path = os.path.join(self.xml_dir_path, file_name)
1333                     src.xmlManager.add_simple_node(
1334                         history_node, "link", text=file_path, attrib={"date": date}
1335                     )
1336
1337         # Find in each board the squares that needs to be filled regarding the
1338         # input csv files but that are not covered by a today job
1339         for board in self.d_input_boards.keys():
1340             xml_root_board = self.d_xml_board_files[board].xmlroot
1341             # Find the missing jobs for today
1342             xml_missing = src.xmlManager.add_simple_node(xml_root_board, "missing_jobs")
1343             for row, column in self.d_input_boards[board]["jobs"]:
1344                 found = False
1345                 for job in l_jobs:
1346                     if job.application == column and job.machine.distribution == row:
1347                         found = True
1348                         break
1349                 if not found:
1350                     src.xmlManager.add_simple_node(
1351                         xml_missing,
1352                         "job",
1353                         attrib={"distribution": row, "application": column},
1354                     )
1355             # Find the missing jobs not today
1356             xml_missing_not_today = src.xmlManager.add_simple_node(
1357                 xml_root_board, "missing_jobs_not_today"
1358             )
1359             for row, column in self.d_input_boards[board]["jobs_not_today"]:
1360                 found = False
1361                 for job in l_jobs_not_today:
1362                     if job.application == column and job.machine.distribution == row:
1363                         found = True
1364                         break
1365                 if not found:
1366                     src.xmlManager.add_simple_node(
1367                         xml_missing_not_today,
1368                         "job",
1369                         attrib={"distribution": row, "application": column},
1370                     )
1371
1372     def find_history(self, l_jobs, l_jobs_not_today):
1373         """find, for each job, in the existent xml boards the results for the
1374            job. Store the results in the dictionnary self.history = {name_job :
1375            list of (date, status, list links)}
1376
1377         :param l_jobs List: the list of jobs to run today
1378         :param l_jobs_not_today List: the list of jobs that do not run today
1379         """
1380         # load the all the history
1381         expression = "^[0-9]{8}_+[0-9]{6}_" + self.global_name + ".xml$"
1382         oExpr = re.compile(expression)
1383         # Get the list of global xml that are in the log directory
1384         l_globalxml = []
1385         for file_name in os.listdir(self.xml_dir_path):
1386             if oExpr.search(file_name):
1387                 file_path = os.path.join(self.xml_dir_path, file_name)
1388                 try:
1389                     global_xml = src.xmlManager.ReadXmlFile(file_path)
1390                     l_globalxml.append(global_xml)
1391                 except Exception as e:
1392                     msg = _(
1393                         "\nWARNING: the file %s can not be read, it will be "
1394                         "ignored\n%s" % (file_path, e)
1395                     )
1396                     self.logger.write("%s\n" % src.printcolors.printcWarning(msg), 5)
1397
1398         # Construct the dictionnary self.history
1399         for job in l_jobs + l_jobs_not_today:
1400             l_links = []
1401             for global_xml in l_globalxml:
1402                 date = os.path.basename(global_xml.filePath).split("_")[0]
1403                 global_root_node = global_xml.xmlroot.find("jobs")
1404                 job_node = src.xmlManager.find_node_by_attrib(
1405                     global_root_node, "job", "name", job.name
1406                 )
1407                 if job_node:
1408                     if job_node.find("remote_log_file_path") is not None:
1409                         link = job_node.find("remote_log_file_path").text
1410                         res_job = job_node.find("res").text
1411                         if link != "nothing":
1412                             l_links.append((date, res_job, link))
1413             l_links = sorted(l_links, reverse=True)
1414             self.history[job.name] = l_links
1415
1416     def put_jobs_not_today(self, l_jobs_not_today, xml_node_jobs):
1417         """Get all the first information needed for each file and write the
1418            first version of the files
1419
1420         :param xml_node_jobs etree.Element: the node corresponding to a job
1421         :param l_jobs_not_today List: the list of jobs that do not run today
1422         """
1423         for job in l_jobs_not_today:
1424             xmlj = src.xmlManager.add_simple_node(
1425                 xml_node_jobs, "job", attrib={"name": job.name}
1426             )
1427             src.xmlManager.add_simple_node(xmlj, "application", job.application)
1428             src.xmlManager.add_simple_node(
1429                 xmlj, "distribution", job.machine.distribution
1430             )
1431             src.xmlManager.add_simple_node(xmlj, "board", job.board)
1432             src.xmlManager.add_simple_node(xmlj, "commands", " ; ".join(job.commands))
1433             src.xmlManager.add_simple_node(xmlj, "state", "Not today")
1434             src.xmlManager.add_simple_node(xmlj, "machine", job.machine.name)
1435             src.xmlManager.add_simple_node(xmlj, "host", job.machine.host)
1436             src.xmlManager.add_simple_node(xmlj, "port", str(job.machine.port))
1437             src.xmlManager.add_simple_node(xmlj, "user", job.machine.user)
1438             src.xmlManager.add_simple_node(xmlj, "sat_path", job.machine.sat_path)
1439             xml_history = src.xmlManager.add_simple_node(xmlj, "history")
1440             for i, (date, res_job, link) in enumerate(self.history[job.name]):
1441                 if i == 0:
1442                     # tag the first one (the last one)
1443                     src.xmlManager.add_simple_node(
1444                         xml_history,
1445                         "link",
1446                         text=link,
1447                         attrib={"date": date, "res": res_job, "last": "yes"},
1448                     )
1449                 else:
1450                     src.xmlManager.add_simple_node(
1451                         xml_history,
1452                         "link",
1453                         text=link,
1454                         attrib={"date": date, "res": res_job, "last": "no"},
1455                     )
1456
1457     def parse_csv_boards(self, today):
1458         """Parse the csv file that describes the boards to produce and fill
1459             the dict d_input_boards that contain the csv file contain
1460
1461         :param today int: the current day of the week
1462         """
1463         # open the csv file and read its content
1464         l_read = []
1465         with open(self.file_boards, "r") as f:
1466             reader = csv.reader(f, delimiter=CSV_DELIMITER)
1467             for row in reader:
1468                 l_read.append(row)
1469         # get the delimiter for the boards (empty line)
1470         boards_delimiter = [""] * len(l_read[0])
1471         # Make the list of boards, by splitting with the delimiter
1472         l_boards = [
1473             list(y)
1474             for x, y in itertools.groupby(l_read, lambda z: z == boards_delimiter)
1475             if not x
1476         ]
1477
1478         # loop over the csv lists of lines and get the rows, columns and jobs
1479         d_boards = {}
1480         for input_board in l_boards:
1481             # get board name
1482             board_name = input_board[0][0]
1483
1484             # Get columns list
1485             columns = input_board[0][1:]
1486
1487             rows = []
1488             jobs = []
1489             jobs_not_today = []
1490             for line in input_board[1:]:
1491                 row = line[0]
1492                 rows.append(row)
1493                 for i, square in enumerate(line[1:]):
1494                     if square == "":
1495                         continue
1496                     days = square.split(DAYS_SEPARATOR)
1497                     days = [int(day) for day in days]
1498                     job = (row, columns[i])
1499                     if today in days:
1500                         jobs.append(job)
1501                     else:
1502                         jobs_not_today.append(job)
1503
1504             d_boards[board_name] = {
1505                 "rows": rows,
1506                 "columns": columns,
1507                 "jobs": jobs,
1508                 "jobs_not_today": jobs_not_today,
1509             }
1510
1511         self.d_input_boards = d_boards
1512
1513     def update_xml_files(self, l_jobs):
1514         """Write all the xml files with updated information about the jobs
1515
1516         :param l_jobs List: the list of jobs that run today
1517         """
1518         for xml_file in [self.xml_global_file] + list(self.d_xml_board_files.values()):
1519             self.update_xml_file(l_jobs, xml_file)
1520
1521         # Write the file
1522         self.write_xml_files()
1523
1524     def update_xml_file(self, l_jobs, xml_file):
1525         """update information about the jobs for the file xml_file
1526
1527         :param l_jobs List: the list of jobs that run today
1528         :param xml_file xmlManager.XmlLogFile: the xml instance to update
1529         """
1530
1531         xml_node_jobs = xml_file.xmlroot.find("jobs")
1532         # Update the job names and status node
1533         for job in l_jobs:
1534             # Find the node corresponding to the job and delete it
1535             # in order to recreate it
1536             for xmljob in xml_node_jobs.findall("job"):
1537                 if xmljob.attrib["name"] == job.name:
1538                     xml_node_jobs.remove(xmljob)
1539
1540             T0 = str(job._T0)
1541             if T0 != "-1":
1542                 T0 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(job._T0))
1543             Tf = str(job._Tf)
1544             if Tf != "-1":
1545                 Tf = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(job._Tf))
1546
1547             # recreate the job node
1548             xmlj = src.xmlManager.add_simple_node(
1549                 xml_node_jobs, "job", attrib={"name": job.name}
1550             )
1551             src.xmlManager.add_simple_node(xmlj, "machine", job.machine.name)
1552             src.xmlManager.add_simple_node(xmlj, "host", job.machine.host)
1553             src.xmlManager.add_simple_node(xmlj, "port", str(job.machine.port))
1554             src.xmlManager.add_simple_node(xmlj, "user", job.machine.user)
1555             xml_history = src.xmlManager.add_simple_node(xmlj, "history")
1556             for date, res_job, link in self.history[job.name]:
1557                 src.xmlManager.add_simple_node(
1558                     xml_history,
1559                     "link",
1560                     text=link,
1561                     attrib={"date": date, "res": res_job},
1562                 )
1563
1564             src.xmlManager.add_simple_node(xmlj, "sat_path", job.machine.sat_path)
1565             src.xmlManager.add_simple_node(xmlj, "application", job.application)
1566             src.xmlManager.add_simple_node(
1567                 xmlj, "distribution", job.machine.distribution
1568             )
1569             src.xmlManager.add_simple_node(xmlj, "board", job.board)
1570             src.xmlManager.add_simple_node(xmlj, "timeout", str(job.timeout))
1571             src.xmlManager.add_simple_node(xmlj, "commands", " ; ".join(job.commands))
1572             src.xmlManager.add_simple_node(xmlj, "state", job.get_status())
1573             src.xmlManager.add_simple_node(xmlj, "begin", T0)
1574             src.xmlManager.add_simple_node(xmlj, "end", Tf)
1575             src.xmlManager.add_simple_node(
1576                 xmlj, "out", src.printcolors.cleancolor(job.out)
1577             )
1578             src.xmlManager.add_simple_node(
1579                 xmlj, "err", src.printcolors.cleancolor(job.err)
1580             )
1581             src.xmlManager.add_simple_node(xmlj, "res", str(job.res_job))
1582             if len(job.remote_log_files) > 0:
1583                 src.xmlManager.add_simple_node(
1584                     xmlj, "remote_log_file_path", job.remote_log_files[0]
1585                 )
1586             else:
1587                 src.xmlManager.add_simple_node(xmlj, "remote_log_file_path", "nothing")
1588             # Search for the test log if there is any
1589             l_test_log_files = self.find_test_log(job.remote_log_files)
1590             xml_test = src.xmlManager.add_simple_node(xmlj, "test_log_file_path")
1591             for test_log_path, res_test, nb_fails in l_test_log_files:
1592                 test_path_node = src.xmlManager.add_simple_node(
1593                     xml_test, "path", test_log_path
1594                 )
1595                 test_path_node.attrib["res"] = res_test
1596                 test_path_node.attrib["nb_fails"] = nb_fails
1597
1598             xmlafter = src.xmlManager.add_simple_node(xmlj, "after", job.after)
1599             # get the job father
1600             if job.after is not None:
1601                 job_father = None
1602                 for jb in l_jobs:
1603                     if jb.name == job.after:
1604                         job_father = jb
1605
1606                 if job_father is not None and len(job_father.remote_log_files) > 0:
1607                     link = job_father.remote_log_files[0]
1608                 else:
1609                     link = "nothing"
1610                 src.xmlManager.append_node_attrib(xmlafter, {"link": link})
1611
1612             # Verify that the job is to be done today regarding the input csv
1613             # files
1614             if job.board and job.board in self.d_input_boards.keys():
1615                 found = False
1616                 for dist, appli in self.d_input_boards[job.board]["jobs"]:
1617                     if job.machine.distribution == dist and job.application == appli:
1618                         found = True
1619                         src.xmlManager.add_simple_node(xmlj, "extra_job", "no")
1620                         break
1621                 if not found:
1622                     src.xmlManager.add_simple_node(xmlj, "extra_job", "yes")
1623
1624         # Update the date
1625         xml_node_infos = xml_file.xmlroot.find("infos")
1626         src.xmlManager.append_node_attrib(
1627             xml_node_infos,
1628             attrib={"value": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
1629         )
1630
1631     def find_test_log(self, l_remote_log_files):
1632         """Find if there is a test log (board) in the remote log files and
1633            the path to it. There can be several test command, so the result is
1634            a list.
1635
1636         :param l_remote_log_files List: the list of all remote log files
1637         :return: the list of (test log files path, res of the command)
1638         :rtype: List
1639         """
1640         res = []
1641         for file_path in l_remote_log_files:
1642             dirname = os.path.basename(os.path.dirname(file_path))
1643             file_name = os.path.basename(file_path)
1644             regex = src.logger.log_all_command_file_expression
1645             oExpr = re.compile(regex)
1646             if dirname == "TEST" and oExpr.search(file_name):
1647                 # find the res of the command
1648                 prod_node = etree.parse(file_path).getroot().find("product")
1649                 res_test = prod_node.attrib["global_res"]
1650                 # find the number of fails
1651                 testbase_node = prod_node.find("tests").find("testbase")
1652                 nb_fails = int(testbase_node.attrib["failed"])
1653                 # put the file path, the res of the test command and the number
1654                 # of fails in the output
1655                 res.append((file_path, res_test, nb_fails))
1656
1657         return res
1658
1659     def last_update(self, finish_status="finished"):
1660         """update information about the jobs for the file xml_file
1661
1662         :param l_jobs List: the list of jobs that run today
1663         :param xml_file xmlManager.XmlLogFile: the xml instance to update
1664         """
1665         for xml_file in [self.xml_global_file] + list(self.d_xml_board_files.values()):
1666             xml_node_infos = xml_file.xmlroot.find("infos")
1667             src.xmlManager.append_node_attrib(
1668                 xml_node_infos, attrib={"JobsCommandStatus": finish_status}
1669             )
1670         # Write the file
1671         self.write_xml_files()
1672
1673     def write_xml_file(self, xml_file, stylesheet):
1674         """Write one xml file and the same file with prefix"""
1675         xml_file.write_tree(stylesheet)
1676         file_path = xml_file.logFile
1677         file_dir = os.path.dirname(file_path)
1678         file_name = os.path.basename(file_path)
1679         file_name_with_prefix = self.prefix + "_" + file_name
1680         xml_file.write_tree(stylesheet, os.path.join(file_dir, file_name_with_prefix))
1681
1682     def write_xml_files(self):
1683         """Write the xml files"""
1684         self.write_xml_file(self.xml_global_file, STYLESHEET_GLOBAL)
1685         for xml_file in self.d_xml_board_files.values():
1686             self.write_xml_file(xml_file, STYLESHEET_BOARD)
1687
1688
1689 def get_config_file_path(job_config_name, l_cfg_dir):
1690     found = False
1691     file_jobs_cfg = None
1692     if os.path.exists(job_config_name) and job_config_name.endswith(".pyconf"):
1693         found = True
1694         file_jobs_cfg = job_config_name
1695     else:
1696         for cfg_dir in l_cfg_dir:
1697             file_jobs_cfg = os.path.join(cfg_dir, job_config_name)
1698             if not file_jobs_cfg.endswith(".pyconf"):
1699                 file_jobs_cfg += ".pyconf"
1700
1701             if not os.path.exists(file_jobs_cfg):
1702                 continue
1703             else:
1704                 found = True
1705                 break
1706     return found, file_jobs_cfg
1707
1708
1709 def develop_factorized_jobs(config_jobs):
1710     """update information about the jobs for the file xml_file
1711
1712     :param config_jobs Config: the config corresponding to the jos description
1713     """
1714     developed_jobs_list = []
1715     for jb in config_jobs.jobs:
1716         # case where the jobs are not developed
1717         if type(jb.machine) == type(""):
1718             developed_jobs_list.append(jb)
1719             continue
1720         # Case where the jobs must be developed
1721         # Example:
1722         # machine : ["CO7.2 physique", ["CO6.4 physique", $MONDAY, $TUESDAY ], "FD22"]
1723         name_job = jb.name
1724         for machine in jb.machine:
1725             new_job = src.pyconf.deepCopyMapping(jb)
1726             # case where there is a jobs on the machine corresponding to all
1727             # days in when variable.
1728             if type(machine) == type(""):
1729                 new_job.machine = machine
1730                 new_job.name = name_job + " / " + machine
1731             else:
1732                 # case the days are re defined
1733                 new_job.machine = machine[0]
1734                 new_job.name = name_job + " / " + machine[0]
1735                 new_job.when = machine[1:]
1736             developed_jobs_list.append(new_job)
1737
1738     config_jobs.jobs = developed_jobs_list
1739
1740
1741 ##
1742 # Describes the command
1743 def description():
1744     return _(
1745         "The jobs command launches maintenances that are described"
1746         " in the dedicated jobs configuration file.\n\nexample:\nsat "
1747         "jobs --name my_jobs --publish"
1748     )
1749
1750
1751 ##
1752 # Runs the command.
1753 def run(args, runner, logger):
1754
1755     (options, args) = parser.parse_args(args)
1756
1757     l_cfg_dir = runner.cfg.PATHS.JOBPATH
1758
1759     # list option : display all the available config files
1760     if options.list:
1761         for cfg_dir in l_cfg_dir:
1762             if not options.no_label:
1763                 logger.write("------ %s\n" % src.printcolors.printcHeader(cfg_dir))
1764             if not os.path.exists(cfg_dir):
1765                 continue
1766             for f in sorted(os.listdir(cfg_dir)):
1767                 if not f.endswith(".pyconf"):
1768                     continue
1769                 cfilename = f[:-7]
1770                 logger.write("%s\n" % cfilename)
1771         return 0
1772
1773     # Make sure the jobs_config option has been called
1774     if not options.jobs_cfg:
1775         message = _("The option --jobs_config is required\n")
1776         src.printcolors.printcError(message)
1777         return 1
1778
1779     # Find the file in the directories, unless it is a full path
1780     # merge all in a config
1781     merger = src.pyconf.ConfigMerger()
1782     config_jobs = src.pyconf.Config()
1783     l_conf_files_path = []
1784     for config_file in options.jobs_cfg:
1785         found, file_jobs_cfg = get_config_file_path(config_file, l_cfg_dir)
1786         if not found:
1787             msg = _(
1788                 "The file configuration %s was not found."
1789                 "\nUse the --list option to get the "
1790                 "possible files." % config_file
1791             )
1792             logger.write("%s\n" % src.printcolors.printcError(msg), 1)
1793             return 1
1794         l_conf_files_path.append(file_jobs_cfg)
1795         # Read the config that is in the file
1796         one_config_jobs = src.read_config_from_a_file(file_jobs_cfg)
1797         merger.merge(config_jobs, one_config_jobs)
1798
1799     info = [
1800         (_("Platform"), runner.cfg.VARS.dist),
1801         (_("Files containing the jobs configuration"), l_conf_files_path),
1802     ]
1803     src.print_info(logger, info)
1804
1805     if options.only_jobs:
1806         l_jb = src.pyconf.Sequence()
1807         for jb in config_jobs.jobs:
1808             if jb.name in options.only_jobs:
1809                 l_jb.append(jb, "Job that was given in only_jobs option parameters\n")
1810         config_jobs.jobs = l_jb
1811
1812     # Parse the config jobs in order to develop all the factorized jobs
1813     develop_factorized_jobs(config_jobs)
1814
1815     # Make a unique file that contain all the jobs in order to use it
1816     # on every machine
1817     name_pyconf = (
1818         "_".join(
1819             [os.path.basename(path)[: -len(".pyconf")] for path in l_conf_files_path]
1820         )
1821         + ".pyconf"
1822     )
1823     path_pyconf = src.get_tmp_filename(runner.cfg, name_pyconf)
1824     # Save config
1825     f = file(path_pyconf, "w")
1826     config_jobs.__save__(f)
1827
1828     # log the paramiko problems
1829     log_dir = src.get_log_path(runner.cfg)
1830     paramiko_log_dir_path = os.path.join(log_dir, "JOBS")
1831     src.ensure_path_exists(paramiko_log_dir_path)
1832     paramiko.util.log_to_file(os.path.join(paramiko_log_dir_path, logger.txtFileName))
1833
1834     # Initialization
1835     today_jobs = Jobs(runner, logger, path_pyconf, config_jobs)
1836
1837     # SSH connection to all machines
1838     today_jobs.ssh_connection_all_machines()
1839     if options.test_connection:
1840         return 0
1841
1842     gui = None
1843     if options.publish:
1844         logger.write(src.printcolors.printcInfo(_("Initialize the xml boards : ")), 5)
1845         logger.flush()
1846
1847         # Copy the stylesheets in the log directory
1848         log_dir = log_dir
1849         xsl_dir = os.path.join(runner.cfg.VARS.srcDir, "xsl")
1850         files_to_copy = []
1851         files_to_copy.append(os.path.join(xsl_dir, STYLESHEET_GLOBAL))
1852         files_to_copy.append(os.path.join(xsl_dir, STYLESHEET_BOARD))
1853         files_to_copy.append(os.path.join(xsl_dir, "command.xsl"))
1854         files_to_copy.append(os.path.join(xsl_dir, "running.gif"))
1855         for file_path in files_to_copy:
1856             # OP We use copy instead of copy2 to update the creation date
1857             #    So we can clean the LOGS directories easily
1858             shutil.copy(file_path, log_dir)
1859
1860         # Instanciate the Gui in order to produce the xml files that contain all
1861         # the boards
1862         gui = Gui(
1863             log_dir,
1864             today_jobs.ljobs,
1865             today_jobs.ljobs_not_today,
1866             runner.cfg.VARS.datehour,
1867             logger,
1868             file_boards=options.input_boards,
1869         )
1870
1871         logger.write(src.printcolors.printcSuccess("OK"), 5)
1872         logger.write("\n\n", 5)
1873         logger.flush()
1874
1875         # Display the list of the xml files
1876         logger.write(
1877             src.printcolors.printcInfo(("Here is the list of published" " files :\n")),
1878             4,
1879         )
1880         logger.write("%s\n" % gui.xml_global_file.logFile, 4)
1881         for board in gui.d_xml_board_files.keys():
1882             file_path = gui.d_xml_board_files[board].logFile
1883             file_name = os.path.basename(file_path)
1884             logger.write("%s\n" % file_path, 4)
1885             logger.add_link(file_name, "board", 0, board)
1886
1887         logger.write("\n", 4)
1888
1889     today_jobs.gui = gui
1890
1891     interruped = False
1892     try:
1893         # Run all the jobs contained in config_jobs
1894         today_jobs.run_jobs()
1895     except KeyboardInterrupt:
1896         interruped = True
1897         logger.write(
1898             "\n\n%s\n\n" % (src.printcolors.printcWarning(_("Forced interruption"))), 1
1899         )
1900     except Exception as e:
1901         msg = _("CRITICAL ERROR: The jobs loop has been interrupted\n")
1902         logger.write("\n\n%s\n" % src.printcolors.printcError(msg))
1903         logger.write("%s\n" % str(e))
1904         # get stack
1905         __, __, exc_traceback = sys.exc_info()
1906         fp = tempfile.TemporaryFile()
1907         traceback.print_tb(exc_traceback, file=fp)
1908         fp.seek(0)
1909         stack = fp.read()
1910         logger.write("\nTRACEBACK: %s\n" % stack.replace('"', "'"), 1)
1911
1912     finally:
1913         res = 0
1914         if interruped:
1915             res = 1
1916             msg = _(
1917                 "Killing the running jobs and trying" " to get the corresponding logs\n"
1918             )
1919             logger.write(src.printcolors.printcWarning(msg))
1920
1921         # find the potential not finished jobs and kill them
1922         for jb in today_jobs.ljobs:
1923             if not jb.has_finished():
1924                 res = 1
1925                 try:
1926                     jb.kill_remote_process()
1927                 except Exception as e:
1928                     msg = _("Failed to kill job %s: %s\n" % (jb.name, e))
1929                     logger.write(src.printcolors.printcWarning(msg))
1930             if jb.res_job != "0":
1931                 res = 1
1932         if interruped:
1933             if today_jobs.gui:
1934                 today_jobs.gui.last_update(_("Forced interruption"))
1935         else:
1936             if today_jobs.gui:
1937                 today_jobs.gui.last_update()
1938         # Output the results
1939         today_jobs.write_all_results()
1940         # Remove the temporary pyconf file
1941         if os.path.exists(path_pyconf):
1942             os.remove(path_pyconf)
1943         return res