# HG changeset patch # User Mikael Berthe # Date 1404056072 -7200 # Node ID cc0ee80cf216dd509dd3f767e93772fd0705b6ae # Parent 4e3a67dc70a077bcb86e11cfd31cabb590ad65b2 Sort results Sort groups by size of a group file, and files from a same group by path. diff -r 4e3a67dc70a0 -r cc0ee80cf216 goduf.go --- a/goduf.go Sun Jun 29 15:47:04 2014 +0200 +++ b/goduf.go Sun Jun 29 17:34:32 2014 +0200 @@ -57,6 +57,7 @@ // FileObjList is only exported so that we can have a sort interface on inodes. type FileObjList []*fileObj +type foListList []FileObjList type dataT struct { totalSize uint64 @@ -265,7 +266,7 @@ } } } -func computeSheduledChecksums(fileLists ...[]FileObjList) { +func computeSheduledChecksums(fileLists ...foListList) { var bigFileList FileObjList // Merge the lists of FileObjList lists and create a unique list // of file objects. @@ -292,9 +293,9 @@ } } -func (fileList FileObjList) findDupesChecksums(sType sumType) []FileObjList { - var dupeList []FileObjList - var scheduleFull []FileObjList +func (fileList FileObjList) findDupesChecksums(sType sumType) foListList { + var dupeList foListList + var scheduleFull foListList hashes := make(map[string]FileObjList) // Sort the list for better efficiency @@ -334,10 +335,10 @@ } // findDupes() uses checksums to find file duplicates -func (data *dataT) findDupes(skipPartial bool) []FileObjList { - var dupeList []FileObjList - var schedulePartial []FileObjList - var scheduleFull []FileObjList +func (data *dataT) findDupes(skipPartial bool) foListList { + var dupeList foListList + var schedulePartial foListList + var scheduleFull foListList for size, sgListP := range data.sizeGroups { // We skip partial checksums for small files or if requested @@ -545,7 +546,7 @@ // Get list of dupes myLog.Println(1, "* Computing checksums...") - var result []FileObjList + var result foListList if len(data.emptyFiles) > 0 { result = append(result, data.emptyFiles) } @@ -557,7 +558,9 @@ if len(result) > 0 && !summary { myLog.Println(1, "* Dupes:") } - // TODO: sort by increasing size + // Sort by increasing size (of the files, not groups) + sort.Sort(byGroupFileSize(result)) + var dupeSize uint64 data.cmpt = 0 for i, l := range result { @@ -569,6 +572,7 @@ fmt.Printf("\nGroup #%d (%d files * %v):\n", i+1, len(l), formatSize(size, true)) } + sort.Sort(byFilePathName(l)) for _, f := range l { if !summary { fmt.Println(f.FilePath) @@ -592,3 +596,23 @@ myLog.Println(summaryLevel, "Redundant data size:", formatSize(dupeSize, false)) } + +// Implement a sort interface for the list of duplicate groups +type byGroupFileSize foListList + +func (a byGroupFileSize) Len() int { return len(a) } +func (a byGroupFileSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byGroupFileSize) Less(i, j int) bool { + // Since this is supposed to be used for duplicate lists, + // we use the size of the first file of the group. + return a[i][0].Size() < a[j][0].Size() +} + +// Implement a sort interface for a slice of files +type byFilePathName FileObjList + +func (a byFilePathName) Len() int { return len(a) } +func (a byFilePathName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byFilePathName) Less(i, j int) bool { + return a[i].Name() < a[j].Name() +}