265 // Compute checksums |
265 // Compute checksums |
266 for _, fo := range bigFileList { |
266 for _, fo := range bigFileList { |
267 if err := fo.Sum(fo.needHash); err != nil { |
267 if err := fo.Sum(fo.needHash); err != nil { |
268 myLog.Println(0, "Error:", err) |
268 myLog.Println(0, "Error:", err) |
269 } |
269 } |
|
270 fo.needHash = noChecksum |
270 } |
271 } |
271 } |
272 } |
272 |
273 |
273 func (fileList FileObjList) scheduleChecksum(sType sumType) { |
274 func (fileList FileObjList) scheduleChecksum(sType sumType) { |
274 for _, fo := range fileList { |
275 for _, fo := range fileList { |
276 } |
277 } |
277 } |
278 } |
278 |
279 |
279 // findDupesChecksums splits the fileObj list into several lists with the |
280 // findDupesChecksums splits the fileObj list into several lists with the |
280 // same sType hash. |
281 // same sType hash. |
281 func (fileList FileObjList) findDupesChecksums(sType sumType) foListList { |
282 func (fileList FileObjList) findDupesChecksums(sType sumType, dryRun bool) foListList { |
282 var dupeList foListList |
283 var dupeList foListList |
283 var scheduleFull foListList |
284 var scheduleFull foListList |
284 hashes := make(map[string]FileObjList) |
285 hashes := make(map[string]FileObjList) |
285 |
286 |
286 // Sort the list for better efficiency |
287 // Sort the list for better efficiency |
287 sort.Sort(ByInode(fileList)) |
288 sort.Sort(ByInode(fileList)) |
288 |
289 |
|
290 if sType == fullChecksum && dryRun { |
|
291 fileList.scheduleChecksum(fullChecksum) |
|
292 return append(dupeList, fileList) |
|
293 } |
289 // Compute checksums |
294 // Compute checksums |
290 for _, fo := range fileList { |
295 for _, fo := range fileList { |
291 hash, err := fo.checksum(sType) |
296 hash, err := fo.checksum(sType) |
292 if err != nil { |
297 if err != nil { |
293 myLog.Println(0, "Error:", err) |
298 myLog.Println(0, "Error:", err) |
307 dupeList = append(dupeList, l) |
312 dupeList = append(dupeList, l) |
308 myLog.Printf(5, " . found %d new duplicates\n", len(l)) |
313 myLog.Printf(5, " . found %d new duplicates\n", len(l)) |
309 } |
314 } |
310 } |
315 } |
311 if sType == partialChecksum && len(scheduleFull) > 0 { |
316 if sType == partialChecksum && len(scheduleFull) > 0 { |
312 computeSheduledChecksums(scheduleFull) |
317 //computeSheduledChecksums(scheduleFull) |
313 for _, l := range scheduleFull { |
318 for _, l := range scheduleFull { |
314 r := l.findDupesChecksums(fullChecksum) |
319 r := l.findDupesChecksums(fullChecksum, dryRun) |
315 dupeList = append(dupeList, r...) |
320 dupeList = append(dupeList, r...) |
|
321 } |
|
322 if dryRun { |
|
323 return scheduleFull |
316 } |
324 } |
317 } |
325 } |
318 |
326 |
319 return dupeList |
327 return dupeList |
320 } |
328 } |
321 |
329 |
322 // findDupes() uses checksums to find file duplicates |
330 // findDupes() uses checksums to find file duplicates |
323 func (data *dataT) findDupes(skipPartial bool) foListList { |
331 func (data *dataT) findDupes(skipPartial bool) foListList { |
324 var dupeList foListList |
332 var dupeList foListList |
325 var schedulePartial foListList |
333 var schedulePartial foListList |
|
334 var schedulePartial2 foListList |
326 var scheduleFull foListList |
335 var scheduleFull foListList |
327 |
336 |
328 for size, sgListP := range data.sizeGroups { |
337 for size, sgListP := range data.sizeGroups { |
329 // We skip partial checksums for small files or if requested |
338 // We skip partial checksums for small files or if requested |
330 if size > minSizePartialChecksum && !skipPartial { |
339 if size > minSizePartialChecksum && !skipPartial { |
337 } |
346 } |
338 |
347 |
339 computeSheduledChecksums(schedulePartial, scheduleFull) |
348 computeSheduledChecksums(schedulePartial, scheduleFull) |
340 |
349 |
341 for _, l := range schedulePartial { |
350 for _, l := range schedulePartial { |
342 r := l.findDupesChecksums(partialChecksum) |
351 r := l.findDupesChecksums(partialChecksum, true) // dry-run |
|
352 schedulePartial2 = append(schedulePartial2, r...) |
|
353 } |
|
354 computeSheduledChecksums(schedulePartial2) |
|
355 for _, l := range schedulePartial { |
|
356 r := l.findDupesChecksums(partialChecksum, false) |
343 dupeList = append(dupeList, r...) |
357 dupeList = append(dupeList, r...) |
344 } |
358 } |
345 for _, l := range scheduleFull { |
359 for _, l := range scheduleFull { |
346 r := l.findDupesChecksums(fullChecksum) |
360 r := l.findDupesChecksums(fullChecksum, false) |
347 dupeList = append(dupeList, r...) |
361 dupeList = append(dupeList, r...) |
348 } |
362 } |
349 return dupeList |
363 return dupeList |
350 } |
364 } |
351 |
365 |