Experimental optimization
authorMikael Berthe <mikael@lilotux.net>
Sun, 19 Feb 2017 19:57:04 +0100
changeset 22 46681d21157a
parent 21 dee0e0c1ad10
child 23 9ce0f2e2a33f
Experimental optimization This is actually an old patch; it tries to batch checksum computation, which is slightly helpful with high number of files and mecanical drives.
goduf.go
--- a/goduf.go	Sun Feb 19 18:21:44 2017 +0100
+++ b/goduf.go	Sun Feb 19 19:57:04 2017 +0100
@@ -267,6 +267,7 @@
 		if err := fo.Sum(fo.needHash); err != nil {
 			myLog.Println(0, "Error:", err)
 		}
+		fo.needHash = noChecksum
 	}
 }
 
@@ -278,7 +279,7 @@
 
 // findDupesChecksums splits the fileObj list into several lists with the
 // same sType hash.
-func (fileList FileObjList) findDupesChecksums(sType sumType) foListList {
+func (fileList FileObjList) findDupesChecksums(sType sumType, dryRun bool) foListList {
 	var dupeList foListList
 	var scheduleFull foListList
 	hashes := make(map[string]FileObjList)
@@ -286,6 +287,10 @@
 	// Sort the list for better efficiency
 	sort.Sort(ByInode(fileList))
 
+	if sType == fullChecksum && dryRun {
+		fileList.scheduleChecksum(fullChecksum)
+		return append(dupeList, fileList)
+	}
 	// Compute checksums
 	for _, fo := range fileList {
 		hash, err := fo.checksum(sType)
@@ -309,11 +314,14 @@
 		}
 	}
 	if sType == partialChecksum && len(scheduleFull) > 0 {
-		computeSheduledChecksums(scheduleFull)
+		//computeSheduledChecksums(scheduleFull)
 		for _, l := range scheduleFull {
-			r := l.findDupesChecksums(fullChecksum)
+			r := l.findDupesChecksums(fullChecksum, dryRun)
 			dupeList = append(dupeList, r...)
 		}
+		if dryRun {
+			return scheduleFull
+		}
 	}
 
 	return dupeList
@@ -323,6 +331,7 @@
 func (data *dataT) findDupes(skipPartial bool) foListList {
 	var dupeList foListList
 	var schedulePartial foListList
+	var schedulePartial2 foListList
 	var scheduleFull foListList
 
 	for size, sgListP := range data.sizeGroups {
@@ -339,11 +348,16 @@
 	computeSheduledChecksums(schedulePartial, scheduleFull)
 
 	for _, l := range schedulePartial {
-		r := l.findDupesChecksums(partialChecksum)
+		r := l.findDupesChecksums(partialChecksum, true) // dry-run
+		schedulePartial2 = append(schedulePartial2, r...)
+	}
+	computeSheduledChecksums(schedulePartial2)
+	for _, l := range schedulePartial {
+		r := l.findDupesChecksums(partialChecksum, false)
 		dupeList = append(dupeList, r...)
 	}
 	for _, l := range scheduleFull {
-		r := l.findDupesChecksums(fullChecksum)
+		r := l.findDupesChecksums(fullChecksum, false)
 		dupeList = append(dupeList, r...)
 	}
 	return dupeList