# HG changeset patch # User Mikael Berthe # Date 1487530624 -3600 # Node ID 46681d21157ab0e497f8e90b0a92845c704b0b71 # Parent dee0e0c1ad10cf5de198863dd3273124cd9b9d28 Experimental optimization This is actually an old patch; it tries to batch checksum computation, which is slightly helpful with high number of files and mecanical drives. diff -r dee0e0c1ad10 -r 46681d21157a goduf.go --- a/goduf.go Sun Feb 19 18:21:44 2017 +0100 +++ b/goduf.go Sun Feb 19 19:57:04 2017 +0100 @@ -267,6 +267,7 @@ if err := fo.Sum(fo.needHash); err != nil { myLog.Println(0, "Error:", err) } + fo.needHash = noChecksum } } @@ -278,7 +279,7 @@ // findDupesChecksums splits the fileObj list into several lists with the // same sType hash. -func (fileList FileObjList) findDupesChecksums(sType sumType) foListList { +func (fileList FileObjList) findDupesChecksums(sType sumType, dryRun bool) foListList { var dupeList foListList var scheduleFull foListList hashes := make(map[string]FileObjList) @@ -286,6 +287,10 @@ // Sort the list for better efficiency sort.Sort(ByInode(fileList)) + if sType == fullChecksum && dryRun { + fileList.scheduleChecksum(fullChecksum) + return append(dupeList, fileList) + } // Compute checksums for _, fo := range fileList { hash, err := fo.checksum(sType) @@ -309,11 +314,14 @@ } } if sType == partialChecksum && len(scheduleFull) > 0 { - computeSheduledChecksums(scheduleFull) + //computeSheduledChecksums(scheduleFull) for _, l := range scheduleFull { - r := l.findDupesChecksums(fullChecksum) + r := l.findDupesChecksums(fullChecksum, dryRun) dupeList = append(dupeList, r...) } + if dryRun { + return scheduleFull + } } return dupeList @@ -323,6 +331,7 @@ func (data *dataT) findDupes(skipPartial bool) foListList { var dupeList foListList var schedulePartial foListList + var schedulePartial2 foListList var scheduleFull foListList for size, sgListP := range data.sizeGroups { @@ -339,11 +348,16 @@ computeSheduledChecksums(schedulePartial, scheduleFull) for _, l := range schedulePartial { - r := l.findDupesChecksums(partialChecksum) + r := l.findDupesChecksums(partialChecksum, true) // dry-run + schedulePartial2 = append(schedulePartial2, r...) + } + computeSheduledChecksums(schedulePartial2) + for _, l := range schedulePartial { + r := l.findDupesChecksums(partialChecksum, false) dupeList = append(dupeList, r...) } for _, l := range scheduleFull { - r := l.findDupesChecksums(fullChecksum) + r := l.findDupesChecksums(fullChecksum, false) dupeList = append(dupeList, r...) } return dupeList