# HG changeset patch # User Mikael Berthe # Date 1403987008 -7200 # Node ID 68375cc98f98e23266c604b1445bde1f2a720748 # Parent 6740350569d3e4796f679023f3e47a5df6dce595 Refactor checksum functions to reduce code duplication diff -r 6740350569d3 -r 68375cc98f98 goduf.go --- a/goduf.go Sat Jun 28 21:57:58 2014 +0200 +++ b/goduf.go Sat Jun 28 22:23:28 2014 +0200 @@ -56,7 +56,7 @@ // FileObjList is only exported so that we can have a sort interface on inodes. type FileObjList []*fileObj -type sizeClass struct { +type sizeClass struct { // XXX still useful? files FileObjList medsums map[string]FileObjList fullsums map[string]FileObjList @@ -240,21 +240,31 @@ return } -func findDupesFullChecksums(fileList FileObjList) []FileObjList { +func (fileList FileObjList) findDupesChecksums(sType sumType) []FileObjList { var dupeList []FileObjList hashes := make(map[string]FileObjList) // Sort the list for better efficiency sort.Sort(ByInode(fileList)) - // Compute full checksums + // Compute checksums for _, fo := range fileList { - if err := fo.Sum(fullChecksum); err != nil { + if err := fo.Sum(sType); err != nil { myLog.Println(0, "Error:", err) continue } - hash := hex.EncodeToString(fo.Hash) - hashes[hash] = append(hashes[hash], fo) + var hbytes []byte + if sType == partialChecksum { + hbytes = fo.PartialHash + } else if sType == fullChecksum { + hbytes = fo.Hash + } else { + panic("Internal error: Invalid sType") + } + if hbytes != nil { + hash := hex.EncodeToString(hbytes) + hashes[hash] = append(hashes[hash], fo) + } } // Let's de-dupe now... @@ -262,39 +272,12 @@ if len(l) < 2 { continue } - dupeList = append(dupeList, l) - // TODO sort by increasing size - myLog.Printf(5, " . found %d new duplicates\n", len(l)) - } - - return dupeList -} - -// TODO: refactor to avoid code duplication -func findDupesPartialChecksums(fileList FileObjList) []FileObjList { - var dupeList []FileObjList - hashes := make(map[string]FileObjList) - - // Sort the list for better efficiency - sort.Sort(ByInode(fileList)) - - // Compute partial checksums - for _, fo := range fileList { - if err := fo.Sum(partialChecksum); err != nil { - myLog.Println(0, "Error:", err) - continue + if sType == partialChecksum { + dupeList = append(dupeList, l.findDupesChecksums(fullChecksum)...) + } else { // full checksums -> we’re done + dupeList = append(dupeList, l) } - hash := hex.EncodeToString(fo.PartialHash) - hashes[hash] = append(hashes[hash], fo) - } - - // Let's de-dupe now... - for _, l := range hashes { - if len(l) < 2 { - continue - } - dupeList = append(dupeList, findDupesFullChecksums(l)...) - // TODO sort by increasing size + // TODO: sort by increasing size } return dupeList @@ -308,9 +291,9 @@ var r []FileObjList // We skip partial checksums for small files or if requested if size > minSizePartialChecksum && !skipPartial { - r = findDupesPartialChecksums(sizeGroup.files) + r = sizeGroup.files.findDupesChecksums(partialChecksum) } else { - r = findDupesFullChecksums(sizeGroup.files) + r = sizeGroup.files.findDupesChecksums(fullChecksum) } dupeList = append(dupeList, r...) }