97 } |
98 } |
98 // Error message without timestamp |
99 // Error message without timestamp |
99 fmt.Fprintln(os.Stderr, args...) |
100 fmt.Fprintln(os.Stderr, args...) |
100 } |
101 } |
101 |
102 |
|
103 // visit is called for every file and directory. |
|
104 // We check the file object is correct (regular, readable...) and add |
|
105 // it to the data.sizeGroups hash. |
102 func visit(path string, f os.FileInfo, err error) error { |
106 func visit(path string, f os.FileInfo, err error) error { |
103 if err != nil { |
107 if err != nil { |
104 if f == nil { |
108 if f == nil { |
105 return err |
109 return err |
106 } |
110 } |
183 fo.PartialHash = hash.Sum(nil) |
189 fo.PartialHash = hash.Sum(nil) |
184 |
190 |
185 return nil |
191 return nil |
186 } |
192 } |
187 |
193 |
|
194 // Sum computes the file's SHA1 hash, partial or full according to sType. |
188 func (fo *fileObj) Sum(sType sumType) error { |
195 func (fo *fileObj) Sum(sType sumType) error { |
189 if sType == partialChecksum { |
196 if sType == partialChecksum { |
190 return fo.MedSum() |
197 return fo.partialChecksum() |
191 } else if sType == fullChecksum { |
198 } else if sType == fullChecksum { |
192 return fo.CheckSum() |
199 return fo.Checksum() |
193 } else if sType == noChecksum { |
200 } else if sType == noChecksum { |
194 return nil |
201 return nil |
195 } |
202 } |
196 panic("Internal error: Invalid sType") |
203 panic("Internal error: Invalid sType") |
197 } |
204 } |
198 |
205 |
199 func (data *dataT) dispCount() { // FIXME rather useless |
206 // dispCount display statistics to the user. |
|
207 func (data *dataT) dispCount() { // It this still useful? |
200 if myLog.verbosity < 4 { |
208 if myLog.verbosity < 4 { |
201 return |
209 return |
202 } |
210 } |
203 var c1, c1b, c2 int |
211 var c1, c1b, c2 int |
204 var s1 string |
212 var s1 string |
236 } |
244 } |
237 } |
245 } |
238 return hex.EncodeToString(hbytes), nil |
246 return hex.EncodeToString(hbytes), nil |
239 } |
247 } |
240 |
248 |
241 func (fileList FileObjList) computeSheduledChecksums() { |
249 // computeSheduledChecksums calculates the checksums for all the files |
242 // Sort the list for better efficiency |
250 // from the fileLists slice items (the kind of hash is taken from the |
243 sort.Sort(ByInode(fileList)) |
251 // needHash field). |
244 |
|
245 //myLog.Printf(6, " . will compute %d checksums\n", len(fileList)) |
|
246 |
|
247 // Compute checksums |
|
248 for _, fo := range fileList { |
|
249 if err := fo.Sum(fo.needHash); err != nil { |
|
250 myLog.Println(0, "Error:", err) |
|
251 } |
|
252 } |
|
253 } |
|
254 func computeSheduledChecksums(fileLists ...foListList) { |
252 func computeSheduledChecksums(fileLists ...foListList) { |
255 var bigFileList FileObjList |
253 var bigFileList FileObjList |
256 // Merge the lists of FileObjList lists and create a unique list |
254 // Merge the lists of FileObjList lists and create a unique list |
257 // of file objects. |
255 // of file objects. |
258 for _, foll := range fileLists { |
256 for _, foll := range fileLists { |
276 for _, fo := range fileList { |
274 for _, fo := range fileList { |
277 fo.needHash = sType |
275 fo.needHash = sType |
278 } |
276 } |
279 } |
277 } |
280 |
278 |
|
279 // findDupesChecksums splits the fileObj list into several lists with the |
|
280 // same sType hash. |
281 func (fileList FileObjList) findDupesChecksums(sType sumType) foListList { |
281 func (fileList FileObjList) findDupesChecksums(sType sumType) foListList { |
282 var dupeList foListList |
282 var dupeList foListList |
283 var scheduleFull foListList |
283 var scheduleFull foListList |
284 hashes := make(map[string]FileObjList) |
284 hashes := make(map[string]FileObjList) |
285 |
285 |
347 dupeList = append(dupeList, r...) |
347 dupeList = append(dupeList, r...) |
348 } |
348 } |
349 return dupeList |
349 return dupeList |
350 } |
350 } |
351 |
351 |
|
352 // dropEmptyFiles removes the empty files from the main map, since we don't |
|
353 // have to do any processing about them. |
|
354 // If ignoreEmpty is false, the empty file list is saved in data.emptyFiles. |
352 func (data *dataT) dropEmptyFiles(ignoreEmpty bool) (emptyCount int) { |
355 func (data *dataT) dropEmptyFiles(ignoreEmpty bool) (emptyCount int) { |
353 sgListP, ok := data.sizeGroups[0] |
356 sgListP, ok := data.sizeGroups[0] |
354 if ok == false { |
357 if ok == false { |
355 return // no empty files |
358 return // no empty files |
356 } |
359 } |
386 // Check for hard links |
389 // Check for hard links |
387 // Remove unique dev/inodes |
390 // Remove unique dev/inodes |
388 // Instead of this loop, another way would be to use the field |
391 // Instead of this loop, another way would be to use the field |
389 // "Unique" of the fileObj to mark them to be discarded |
392 // "Unique" of the fileObj to mark them to be discarded |
390 // and remove them all at the end. |
393 // and remove them all at the end. |
|
394 // TODO: Should we also check for duplicate paths? |
391 for { |
395 for { |
392 type devinode struct{ dev, ino uint64 } |
396 type devinode struct{ dev, ino uint64 } |
393 devinodes := make(map[devinode]bool) |
397 devinodes := make(map[devinode]bool) |
394 var hardLinkIndex int |
398 var hardLinkIndex int |
395 |
399 |
453 return fmt.Sprintf("%d %s", humanSize, units[n]) |
458 return fmt.Sprintf("%d %s", humanSize, units[n]) |
454 } |
459 } |
455 return fmt.Sprintf("%d bytes (%d %s)", sizeBytes, humanSize, units[n]) |
460 return fmt.Sprintf("%d bytes (%d %s)", sizeBytes, humanSize, units[n]) |
456 } |
461 } |
457 |
462 |
|
463 // It all starts here. |
458 func main() { |
464 func main() { |
459 var verbose bool |
465 var verbose bool |
460 var summary bool |
466 var summary bool |
461 var skipPartial bool |
467 var skipPartial bool |
462 var ignoreEmpty bool |
468 var ignoreEmpty bool |
463 |
469 |
|
470 // Command line parameters parsingg |
464 flag.BoolVar(&verbose, "verbose", false, "Be verbose (verbosity=1)") |
471 flag.BoolVar(&verbose, "verbose", false, "Be verbose (verbosity=1)") |
465 flag.BoolVar(&verbose, "v", false, "See --verbose") |
472 flag.BoolVar(&verbose, "v", false, "See --verbose") |
466 flag.BoolVar(&summary, "summary", false, "Do not display the duplicate list") |
473 flag.BoolVar(&summary, "summary", false, "Do not display the duplicate list") |
467 flag.BoolVar(&summary, "s", false, "See --summary") |
474 flag.BoolVar(&summary, "s", false, "See --summary") |
468 flag.BoolVar(&skipPartial, "skip-partial", false, "Skip partial checksums") |
475 flag.BoolVar(&skipPartial, "skip-partial", false, "Skip partial checksums") |
472 timings := flag.Bool("timings", false, "Set detailed log timings") |
479 timings := flag.Bool("timings", false, "Set detailed log timings") |
473 flag.BoolVar(&ignoreEmpty, "no-empty", false, "Ignore empty files") |
480 flag.BoolVar(&ignoreEmpty, "no-empty", false, "Ignore empty files") |
474 |
481 |
475 flag.Parse() |
482 flag.Parse() |
476 |
483 |
|
484 // Set verbosity: --verbose=true == --verbosity=1 |
477 if myLog.verbosity > 0 { |
485 if myLog.verbosity > 0 { |
478 verbose = true |
486 verbose = true |
479 } else if verbose == true { |
487 } else if verbose == true { |
480 myLog.verbosity = 1 |
488 myLog.verbosity = 1 |
481 } |
489 } |
482 |
490 |
483 if len(flag.Args()) == 0 { |
491 if len(flag.Args()) == 0 { |
484 // TODO: more helpful usage statement |
492 // TODO: more helpful usage statement |
485 myLog.Println(-1, "Usage:", os.Args[0], |
493 myLog.Println(-1, "Usage:", os.Args[0], |
486 "[options] base_directory") |
494 "[options] base_directory|file...") |
487 os.Exit(0) |
495 os.Exit(0) |
488 } |
496 } |
489 |
497 |
|
498 // Change log format for benchmarking |
490 if *timings { |
499 if *timings { |
491 log.SetFlags(log.LstdFlags | log.Lmicroseconds) |
500 log.SetFlags(log.LstdFlags | log.Lmicroseconds) |
492 } |
501 } |
493 |
502 |
494 data.sizeGroups = make(map[int64]*FileObjList) |
503 data.sizeGroups = make(map[int64]*FileObjList) |
499 myLog.Printf(-1, "* Error: could not read file tree:\n") |
508 myLog.Printf(-1, "* Error: could not read file tree:\n") |
500 myLog.Printf(-1, "> %v\n", err) |
509 myLog.Printf(-1, "> %v\n", err) |
501 os.Exit(1) |
510 os.Exit(1) |
502 } |
511 } |
503 } |
512 } |
|
513 |
|
514 // Count empty files and drop them if they should be ignored |
504 emptyCount := data.dropEmptyFiles(ignoreEmpty) |
515 emptyCount := data.dropEmptyFiles(ignoreEmpty) |
|
516 |
|
517 // Display a small report |
505 if verbose { |
518 if verbose { |
506 if data.ignoreCount > 0 { |
519 if data.ignoreCount > 0 { |
507 myLog.Printf(1, " %d special files were ignored\n", |
520 myLog.Printf(1, " %d special files were ignored\n", |
508 data.ignoreCount) |
521 data.ignoreCount) |
509 } |
522 } |
512 false)) |
525 false)) |
513 if emptyCount > 0 { |
526 if emptyCount > 0 { |
514 myLog.Printf(1, " %d empty files were ignored\n", |
527 myLog.Printf(1, " %d empty files were ignored\n", |
515 emptyCount) |
528 emptyCount) |
516 } |
529 } |
517 data.dispCount() // XXX |
530 data.dispCount() |
518 myLog.Println(3, "* Number of size groups:", len(data.sizeGroups)) |
531 myLog.Println(3, "* Number of size groups:", len(data.sizeGroups)) |
519 } |
532 } |
520 |
533 |
521 // Remove unique sizes |
534 // Remove unique sizes and hard links |
522 myLog.Println(1, "* Removing files with unique size and hard links...") |
535 myLog.Println(1, "* Removing files with unique size and hard links...") |
523 hardLinkCount, uniqueSizeCount := data.initialCleanup() |
536 hardLinkCount, uniqueSizeCount := data.initialCleanup() |
524 if verbose { |
537 if verbose { |
525 myLog.Printf(2, " Dropped %d files with unique size\n", |
538 myLog.Printf(2, " Dropped %d files with unique size\n", |
526 uniqueSizeCount) |
539 uniqueSizeCount) |
527 myLog.Printf(2, " Dropped %d hard links\n", hardLinkCount) |
540 myLog.Printf(2, " Dropped %d hard links\n", hardLinkCount) |
528 myLog.Println(3, "* Number of size groups:", len(data.sizeGroups)) |
541 myLog.Println(3, "* Number of size groups:", len(data.sizeGroups)) |
529 data.dispCount() // XXX |
542 data.dispCount() |
530 } |
543 } |
531 |
544 |
532 // Get list of dupes |
545 // Get the final list of dupes, using checksums |
533 myLog.Println(1, "* Computing checksums...") |
546 myLog.Println(1, "* Computing checksums...") |
534 var result foListList |
547 var result foListList |
535 if len(data.emptyFiles) > 0 { |
548 if len(data.emptyFiles) > 0 { |
536 result = append(result, data.emptyFiles) |
549 result = append(result, data.emptyFiles) |
537 } |
550 } |