golang/mikael/goduf: comparison goduf.go

equal deleted inserted replaced

-:4e3a67dc70a0
+:cc0ee80cf216
 	needHash    sumType
 }
 // FileObjList is only exported so that we can have a sort interface on inodes.
 type FileObjList []*fileObj
+type foListList []FileObjList
 type dataT struct {
 	totalSize   uint64
 	cmpt        uint
 	sizeGroups  map[int64]*FileObjList
 		if err := fo.Sum(fo.needHash); err != nil {
 			myLog.Println(0, "Error:", err)
 		}
 	}
 }
-func computeSheduledChecksums(fileLists ...[]FileObjList) {
+func computeSheduledChecksums(fileLists ...foListList) {
 	var bigFileList FileObjList
 	// Merge the lists of FileObjList lists and create a unique list
 	// of file objects.
 	for _, foll := range fileLists {
 		for _, fol := range foll {
 	for _, fo := range fileList {
 		fo.needHash = sType
 	}
 }
-func (fileList FileObjList) findDupesChecksums(sType sumType) []FileObjList {
+func (fileList FileObjList) findDupesChecksums(sType sumType) foListList {
-	var dupeList []FileObjList
+	var dupeList foListList
-	var scheduleFull []FileObjList
+	var scheduleFull foListList
 	hashes := make(map[string]FileObjList)
 	// Sort the list for better efficiency
 	sort.Sort(ByInode(fileList))
 	return dupeList
 }
 // findDupes() uses checksums to find file duplicates
-func (data *dataT) findDupes(skipPartial bool) []FileObjList {
+func (data *dataT) findDupes(skipPartial bool) foListList {
-	var dupeList []FileObjList
+	var dupeList foListList
-	var schedulePartial []FileObjList
+	var schedulePartial foListList
-	var scheduleFull []FileObjList
+	var scheduleFull foListList
 	for size, sgListP := range data.sizeGroups {
 		// We skip partial checksums for small files or if requested
 		if size > minSizePartialChecksum && !skipPartial {
 			sgListP.scheduleChecksum(partialChecksum)
 		data.dispCount() // XXX
 	}
 	// Get list of dupes
 	myLog.Println(1, "* Computing checksums...")
-	var result []FileObjList
+	var result foListList
 	if len(data.emptyFiles) > 0 {
 		result = append(result, data.emptyFiles)
 	}
 	result = append(result, data.findDupes(skipPartial)...)
 	// Done!  Dump dupes
 	if len(result) > 0 && !summary {
 		myLog.Println(1, "* Dupes:")
 	}
-	// TODO: sort by increasing size
+	// Sort by increasing size (of the files, not groups)
+	sort.Sort(byGroupFileSize(result))
 	var dupeSize uint64
 	data.cmpt = 0
 	for i, l := range result {
 		size := uint64(l[0].Size())
 		// We do not count the size of the 1st item
 		dupeSize += size * uint64(len(l)-1)
 		if !summary {
 			fmt.Printf("\nGroup #%d (%d files * %v):\n", i+1,
 				len(l), formatSize(size, true))
 		}
+		sort.Sort(byFilePathName(l))
 		for _, f := range l {
 			if !summary {
 				fmt.Println(f.FilePath)
 			}
 			data.cmpt++
 	myLog.Println(summaryLevel, "Final count:", data.cmpt,
 		"duplicate files in", len(result), "sets")
 	myLog.Println(summaryLevel, "Redundant data size:",
 		formatSize(dupeSize, false))
 }
+// Implement a sort interface for the list of duplicate groups
+type byGroupFileSize foListList
+func (a byGroupFileSize) Len() int      { return len(a) }
+func (a byGroupFileSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a byGroupFileSize) Less(i, j int) bool {
+	// Since this is supposed to be used for duplicate lists,
+	// we use the size of the first file of the group.
+	return a[i][0].Size() < a[j][0].Size()
+}
+// Implement a sort interface for a slice of files
+type byFilePathName FileObjList
+func (a byFilePathName) Len() int      { return len(a) }
+func (a byFilePathName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a byFilePathName) Less(i, j int) bool {
+	return a[i].Name() < a[j].Name()
+}

changeset 16	cc0ee80cf216
parent 15	4e3a67dc70a0
child 19	3389a17fc0d2