3 # Copyright (C) 2010-2013 CEA/DEN
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License.
10 # This library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with this library; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 # create a parser for command line options
24 parser = src.options.Options()
25 parser.add_option("s",
29 _("Search the duplicate files in the SOURCES directory."))
30 parser.add_option("p",
34 _("Optional: Search the duplicate files in the given "
40 _("Optional: Override the default list of filtered files."))
45 _("Optional: Override the default list of filtered "
51 _("Optional: Override the default list of filtered paths."))
53 default_extension_ignored = ['html', 'png', 'txt', 'js', 'xml', 'cmake', 'gif',
54 'm4', 'in', 'pyo', 'pyc', 'doctree', 'css']
55 default_files_ignored = ['__init__.py', 'Makefile.am', 'VERSION',
57 'README', 'AUTHORS', 'NEWS', 'COPYING', 'ChangeLog']
58 default_directories_ignored = []
60 def list_directory(path, extension_ignored, files_ignored, directories_ignored):
61 '''Make the list of all files and paths that are not filtered
63 :param path Str: The path to of the directory where to search for duplicates
64 :param extension_ignored List: The list of extensions to ignore
65 :param files_ignored List: The list of files to ignore
66 :param directories_ignored List: The list of directory paths to ignore
67 :return: files_arb_out is the list of [file, path]
68 and files_out is is the list of files
73 for root, __, files in os.walk(path):
75 extension = fic.split('.')[-1]
76 if extension not in extension_ignored and fic not in files_ignored:
77 in_ignored_dir = False
78 for rep in directories_ignored:
81 if not in_ignored_dir:
82 files_out.append([fic])
83 files_arb_out.append([fic, root])
84 return files_arb_out, files_out
86 def format_list_of_str(l_str):
87 '''Make a list from a string
89 :param l_str List or Str: The variable to format
90 :return: the formatted variable
93 if not isinstance(l_str, list):
95 return ",".join(l_str)
97 def print_info(logger, info, level=2):
100 :param logger Logger: The logger instance
101 :param info List: the list of tuple to display
102 :param valMax float: the maximum value of the variable
103 :param level int: the verbose level that will be used
105 smax = max(map(lambda l: len(l[0]), info))
107 sp = " " * (smax - len(i[0]))
108 src.printcolors.print_value(logger,
110 format_list_of_str(i[1]),
112 logger.write("\n", level)
115 "Create a progress bar in the terminal"
117 def __init__(self, name, valMin, valMax, logger, length = 50):
118 '''Initialization of the progress bar.
120 :param name str: The name of the progress bar
121 :param valMin float: the minimum value of the variable
122 :param valMax float: the maximum value of the variable
123 :param logger Logger: the logger instance
124 :param length int: the lenght of the progress bar
131 if (self.valMax - self.valMin) <= 0 or length <= 0:
132 out_err = _('ERROR: Wrong init values for the progress bar\n')
133 raise src.SatException(out_err)
135 def display_value_progression(self,val):
136 '''Display the progress bar.
138 :param val float: val must be between valMin and valMax.
140 if val < self.valMin or val > self.valMax:
141 self.logger.write(src.printcolors.printcWarning(_(
142 'WARNING : wrong value for the progress bar.\n')), 3)
144 perc = (float(val-self.valMin) / (self.valMax - self.valMin)) * 100.
145 nb_equals = int(perc * self.length / 100)
146 out = '\r %s : %3d %% [%s%s]' % (self.name, perc, nb_equals*'=',
147 (self.length - nb_equals)*' ' )
148 self.logger.write(out, 3)
152 '''method that is called when salomeTools is called with --help option.
154 :return: The text to display for the find_duplicates command description.
157 return _("The find_duplicates command search recursively for all duplicates"
158 " files in a the INSTALL directory (or the optionally given "
159 "directory) and prints the found files to the terminal.\n\n"
160 "example:\nsat find_duplicates --path /tmp")
162 def run(args, runner, logger):
163 '''method that is called when salomeTools is called with find_duplicates
166 # parse the arguments
167 (options, args) = parser.parse_args(args)
169 # Determine the directory path where to search
170 # for duplicates files regarding the options
172 dir_path = options.path
174 src.check_config_has_application(runner.cfg)
176 dir_path = os.path.join(runner.cfg.APPLICATION.workdir, "SOURCES")
178 dir_path = os.path.join(runner.cfg.APPLICATION.workdir, "INSTALL")
180 # Get the files to ignore during the searching
181 files_ignored = default_files_ignored
182 if options.exclude_file:
183 files_ignored = options.exclude_file
185 # Get the extension to ignore during the searching
186 extension_ignored = default_extension_ignored
187 if options.exclude_extension:
188 extension_ignored = options.exclude_extension
190 # Get the directory paths to ignore during the searching
191 directories_ignored = default_directories_ignored
192 if options.exclude_path:
193 directories_ignored = options.exclude_path
195 # Check the directory
196 if not(os.path.isdir(dir_path)):
197 msg = _("%s has to be a valid repository path." % dir_path)
198 logger.write(src.printcolors.printcError(msg), 1)
201 # Display some information
202 info = [(_("Directory"), dir_path),
203 (_("Ignored files"), files_ignored),
204 (_("Ignored extensions"), extension_ignored),
205 (_("Ignored directories"), directories_ignored)
207 print_info(logger, info)
209 # Get all the files and paths
210 logger.write(_("Store all file paths ... "), 3)
212 dic, fic = list_directory(dir_path,
216 logger.write(src.printcolors.printcSuccess('OK\n'), 3)
218 # Eliminate all the singletons
220 range_fic = range(0,len_fic)
222 my_bar = Progress_bar(_('Eliminate the files that are not duplicated'),
228 my_bar.display_value_progression(len_fic - i)
229 if fic.count(fic[i])==1:
233 # Format the resulting variable to get a dictionary
234 logger.write(_("\n\nCompute the dict {files : [list of pathes]} ... "), 3)
237 rg_fic = range(0,len_fic)
240 if fic[i-1] != fic[i]:
245 the_file = fichier[0]
248 if fic_path[0] == the_file:
249 l_path.append(fic_path[1])
250 dic_fic_paths[the_file] = l_path
252 logger.write(src.printcolors.printcSuccess('OK\n'), 3)
254 # End the execution if no duplicates were found
255 if len(dic_fic_paths) == 0:
256 logger.write(_("No duplicate files found.\n"), 3)
259 # Check that there are no singletons in the result (it would be a bug)
260 for elem in dic_fic_paths:
261 if len(dic_fic_paths[elem])<2:
262 logger.write(_("Warning : element %s has not more than"
263 " two paths.\n") % elem, 3)
266 # Display the results
267 logger.write(src.printcolors.printcInfo(_('\nResults:\n\n')), 3)
268 max_file_name_lenght = max(map(lambda l: len(l), dic_fic_paths.keys()))
269 for fich in dic_fic_paths:
270 logger.write(src.printcolors.printcLabel(fich), 1)
271 sp = " " * (max_file_name_lenght - len(fich))
273 for rep in dic_fic_paths[fich]:
276 logger.write("\n", 1)