133 |
129 |
134 data.cmpt++ |
130 data.cmpt++ |
135 data.totalSize += uint64(f.Size()) |
131 data.totalSize += uint64(f.Size()) |
136 fo := &fileObj{FilePath: path, FileInfo: f} |
132 fo := &fileObj{FilePath: path, FileInfo: f} |
137 if _, ok := data.sizeGroups[f.Size()]; !ok { |
133 if _, ok := data.sizeGroups[f.Size()]; !ok { |
138 data.sizeGroups[f.Size()] = &sizeClass{} |
134 data.sizeGroups[f.Size()] = new(FileObjList) |
139 } |
135 } |
140 data.sizeGroups[f.Size()].files = |
136 *data.sizeGroups[f.Size()] = append(*data.sizeGroups[f.Size()], fo) |
141 append(data.sizeGroups[f.Size()].files, fo) |
|
142 return nil |
137 return nil |
143 } |
138 } |
144 |
139 |
145 func (fo *fileObj) CheckSum() error { |
140 func (fo *fileObj) CheckSum() error { |
146 file, err := os.Open(fo.FilePath) |
141 file, err := os.Open(fo.FilePath) |
344 func (data *dataT) findDupes(skipPartial bool) []FileObjList { |
339 func (data *dataT) findDupes(skipPartial bool) []FileObjList { |
345 var dupeList []FileObjList |
340 var dupeList []FileObjList |
346 var schedulePartial []FileObjList |
341 var schedulePartial []FileObjList |
347 var scheduleFull []FileObjList |
342 var scheduleFull []FileObjList |
348 |
343 |
349 for size, sizeGroup := range data.sizeGroups { |
344 for size, sgListP := range data.sizeGroups { |
350 // We skip partial checksums for small files or if requested |
345 // We skip partial checksums for small files or if requested |
351 if size > minSizePartialChecksum && !skipPartial { |
346 if size > minSizePartialChecksum && !skipPartial { |
352 sizeGroup.files.scheduleChecksum(partialChecksum) |
347 sgListP.scheduleChecksum(partialChecksum) |
353 schedulePartial = append(schedulePartial, sizeGroup.files) |
348 schedulePartial = append(schedulePartial, *sgListP) |
354 } else { |
349 } else { |
355 sizeGroup.files.scheduleChecksum(fullChecksum) |
350 sgListP.scheduleChecksum(fullChecksum) |
356 scheduleFull = append(scheduleFull, sizeGroup.files) |
351 scheduleFull = append(scheduleFull, *sgListP) |
357 } |
352 } |
358 } |
353 } |
359 |
354 |
360 computeSheduledChecksums(schedulePartial, scheduleFull) |
355 computeSheduledChecksums(schedulePartial, scheduleFull) |
361 |
356 |
370 // TODO: sort by increasing size |
365 // TODO: sort by increasing size |
371 return dupeList |
366 return dupeList |
372 } |
367 } |
373 |
368 |
374 func (data *dataT) dropEmptyFiles(ignoreEmpty bool) (emptyCount int) { |
369 func (data *dataT) dropEmptyFiles(ignoreEmpty bool) (emptyCount int) { |
375 sc, ok := data.sizeGroups[0] |
370 sgListP, ok := data.sizeGroups[0] |
376 if ok == false { |
371 if ok == false { |
377 return // no empty files |
372 return // no empty files |
378 } |
373 } |
379 if !ignoreEmpty { |
374 if !ignoreEmpty { |
380 if len(sc.files) > 1 { |
375 if len(*sgListP) > 1 { |
381 data.emptyFiles = sc.files |
376 data.emptyFiles = *sgListP |
382 } |
377 } |
383 delete(data.sizeGroups, 0) |
378 delete(data.sizeGroups, 0) |
384 return |
379 return |
385 } |
380 } |
386 emptyCount = len(sc.files) |
381 emptyCount = len(*sgListP) |
387 delete(data.sizeGroups, 0) |
382 delete(data.sizeGroups, 0) |
388 return |
383 return |
389 } |
384 } |
390 |
385 |
391 // initialCleanup() removes files with unique size as well as hard links |
386 // initialCleanup() removes files with unique size as well as hard links |
392 func (data *dataT) initialCleanup() (hardLinkCount, uniqueSizeCount int) { |
387 func (data *dataT) initialCleanup() (hardLinkCount, uniqueSizeCount int) { |
393 for s, sizeGroup := range data.sizeGroups { |
388 for s, sgListP := range data.sizeGroups { |
394 if len(sizeGroup.files) < 2 { |
389 if len(*sgListP) < 2 { |
395 delete(data.sizeGroups, s) |
390 delete(data.sizeGroups, s) |
396 uniqueSizeCount++ |
391 uniqueSizeCount++ |
397 continue |
392 continue |
398 } |
393 } |
399 |
394 |
413 for { |
408 for { |
414 type devinode struct { dev, ino uint64 } |
409 type devinode struct { dev, ino uint64 } |
415 devinodes := make(map[devinode]bool) |
410 devinodes := make(map[devinode]bool) |
416 var hardLinkIndex int |
411 var hardLinkIndex int |
417 |
412 |
418 for i, fo := range sizeGroup.files { |
413 for i, fo := range *sgListP { |
419 dev, ino := GetDevIno(fo) |
414 dev, ino := GetDevIno(fo) |
420 di := devinode{ dev, ino} |
415 di := devinode{ dev, ino} |
421 if _, hlink := devinodes[di]; hlink { |
416 if _, hlink := devinodes[di]; hlink { |
422 hardLinkIndex = i |
417 hardLinkIndex = i |
423 hardLinkCount++ |
418 hardLinkCount++ |
431 if hardLinkIndex == 0 { |
426 if hardLinkIndex == 0 { |
432 break |
427 break |
433 } |
428 } |
434 i := hardLinkIndex |
429 i := hardLinkIndex |
435 // Remove hardink |
430 // Remove hardink |
436 copy(sizeGroup.files[i:], sizeGroup.files[i+1:]) |
431 copy((*sgListP)[i:], (*sgListP)[i+1:]) |
437 sizeGroup.files[len(sizeGroup.files)-1] = nil |
432 (*sgListP)[len(*sgListP)-1] = nil |
438 sizeGroup.files = sizeGroup.files[:len(sizeGroup.files)-1] |
433 *sgListP = (*sgListP)[:len(*sgListP)-1] |
439 } |
434 } |
440 // We have found hard links in this size group, |
435 // We have found hard links in this size group, |
441 // maybe we can remove it |
436 // maybe we can remove it |
442 if hardlinksFound { |
437 if hardlinksFound { |
443 if len(sizeGroup.files) < 2 { |
438 if len(*sgListP) < 2 { |
444 delete(data.sizeGroups, s) |
439 delete(data.sizeGroups, s) |
445 uniqueSizeCount++ |
440 uniqueSizeCount++ |
446 continue |
441 continue |
447 } |
442 } |
448 } |
443 } |
511 |
506 |
512 if *timings { |
507 if *timings { |
513 log.SetFlags(log.LstdFlags | log.Lmicroseconds) |
508 log.SetFlags(log.LstdFlags | log.Lmicroseconds) |
514 } |
509 } |
515 |
510 |
516 data.sizeGroups = make(map[int64]*sizeClass) |
511 data.sizeGroups = make(map[int64]*FileObjList) |
517 myLog.Println(1, "* Reading file metadata") |
512 myLog.Println(1, "* Reading file metadata") |
518 |
513 |
519 for _, root := range flag.Args() { |
514 for _, root := range flag.Args() { |
520 if err := filepath.Walk(root, visit); err != nil { |
515 if err := filepath.Walk(root, visit); err != nil { |
521 myLog.Printf(-1, "* Error: could not read file tree:\n") |
516 myLog.Printf(-1, "* Error: could not read file tree:\n") |