goduf.go
changeset 37 a7662fbfbe02
parent 36 e918b7e63748
child 38 25db238bf03f
equal deleted inserted replaced
36:e918b7e63748 37:a7662fbfbe02
    49 	noChecksum sumType = iota
    49 	noChecksum sumType = iota
    50 	fullChecksum
    50 	fullChecksum
    51 	partialChecksum
    51 	partialChecksum
    52 )
    52 )
    53 
    53 
       
    54 type Options struct {
       
    55 	Summary     bool
       
    56 	OutToJSON   bool
       
    57 	SkipPartial bool
       
    58 	IgnoreEmpty bool
       
    59 }
       
    60 
    54 // Results contains the results of the duplicates search
    61 // Results contains the results of the duplicates search
    55 type Results struct {
    62 type Results struct {
    56 	Groups             []ResultSet `json:"groups"`
    63 	Groups             []ResultSet `json:"groups"`
    57 	Duplicates         uint        `json:"duplicates"`
    64 	Duplicates         uint        `json:"duplicates"`
    58 	NumberOfSets       uint        `json:"number_of_sets"`
    65 	NumberOfSets       uint        `json:"number_of_sets"`
   465 		return fmt.Sprintf("%d %s", humanSize, units[n])
   472 		return fmt.Sprintf("%d %s", humanSize, units[n])
   466 	}
   473 	}
   467 	return fmt.Sprintf("%d bytes (%d %s)", sizeBytes, humanSize, units[n])
   474 	return fmt.Sprintf("%d bytes (%d %s)", sizeBytes, humanSize, units[n])
   468 }
   475 }
   469 
   476 
   470 // It all starts here.
   477 func duf(dirs []string, options Options) (Results, error) {
   471 func main() {
       
   472 	var verbose bool
   478 	var verbose bool
   473 	var summary bool
       
   474 	var outToJSON bool
       
   475 	var skipPartial bool
       
   476 	var ignoreEmpty bool
       
   477 
       
   478 	// Assertion on constant values
       
   479 	if minSizePartialChecksum <= 2*medsumBytes {
       
   480 		myLog.Fatal("Internal error: assert minSizePartialChecksum > 2*medsumBytes")
       
   481 	}
       
   482 
       
   483 	// Command line parameters parsingg
       
   484 	flag.BoolVar(&verbose, "verbose", false, "Be verbose (verbosity=1)")
       
   485 	flag.BoolVar(&verbose, "v", false, "See --verbose")
       
   486 	flag.BoolVar(&outToJSON, "json", false, "Use JSON format for output")
       
   487 	flag.BoolVar(&summary, "summary", false, "Do not display the duplicate list")
       
   488 	flag.BoolVar(&summary, "s", false, "See --summary")
       
   489 	flag.BoolVar(&skipPartial, "skip-partial", false, "Skip partial checksums")
       
   490 	flag.IntVar(&myLog.verbosity, "verbosity", 0,
       
   491 		"Set verbosity level (1-6)")
       
   492 	flag.IntVar(&myLog.verbosity, "vl", 0, "See verbosity")
       
   493 	timings := flag.Bool("timings", false, "Set detailed log timings")
       
   494 	flag.BoolVar(&ignoreEmpty, "no-empty", false, "Ignore empty files")
       
   495 
       
   496 	flag.Parse()
       
   497 
       
   498 	// Set verbosity: --verbose=true == --verbosity=1
       
   499 	if myLog.verbosity > 0 {
   479 	if myLog.verbosity > 0 {
   500 		verbose = true
   480 		verbose = true
   501 	} else if verbose == true {
   481 	}
   502 		myLog.verbosity = 1
   482 
   503 	}
   483 	var results Results
   504 
       
   505 	if len(flag.Args()) == 0 {
       
   506 		// TODO: more helpful usage statement
       
   507 		myLog.Println(-1, "Usage:", os.Args[0],
       
   508 			"[options] base_directory|file...")
       
   509 		os.Exit(0)
       
   510 	}
       
   511 
       
   512 	// Change log format for benchmarking
       
   513 	if *timings {
       
   514 		myLog.SetBenchFlags()
       
   515 	}
       
   516 
       
   517 	data.sizeGroups = make(map[int64]*FileObjList)
   484 	data.sizeGroups = make(map[int64]*FileObjList)
       
   485 
   518 	myLog.Println(1, "* Reading file metadata")
   486 	myLog.Println(1, "* Reading file metadata")
   519 
   487 
   520 	for _, root := range flag.Args() {
   488 	for _, root := range dirs {
   521 		if err := filepath.Walk(root, visit); err != nil {
   489 		if err := filepath.Walk(root, visit); err != nil {
   522 			myLog.Printf(-1, "* Error: could not read file tree:\n")
   490 			return results, fmt.Errorf("could not read file tree: %v", err)
   523 			myLog.Printf(-1, "> %v\n", err)
       
   524 			os.Exit(1)
       
   525 		}
   491 		}
   526 	}
   492 	}
   527 
   493 
   528 	// Count empty files and drop them if they should be ignored
   494 	// Count empty files and drop them if they should be ignored
   529 	emptyCount := data.dropEmptyFiles(ignoreEmpty)
   495 	emptyCount := data.dropEmptyFiles(options.IgnoreEmpty)
   530 
   496 
   531 	// Display a small report
   497 	// Display a small report
   532 	if verbose {
   498 	if verbose {
   533 		if data.ignoreCount > 0 {
   499 		if data.ignoreCount > 0 {
   534 			myLog.Printf(1, "  %d special files were ignored\n",
   500 			myLog.Printf(1, "  %d special files were ignored\n",
   560 	myLog.Println(1, "* Computing checksums...")
   526 	myLog.Println(1, "* Computing checksums...")
   561 	var result foListList
   527 	var result foListList
   562 	if len(data.emptyFiles) > 0 {
   528 	if len(data.emptyFiles) > 0 {
   563 		result = append(result, data.emptyFiles)
   529 		result = append(result, data.emptyFiles)
   564 	}
   530 	}
   565 	result = append(result, data.findDupes(skipPartial)...)
   531 	result = append(result, data.findDupes(options.SkipPartial)...)
   566 
   532 
   567 	myLog.Println(3, "* Number of match groups:", len(result))
   533 	myLog.Println(3, "* Number of match groups:", len(result))
   568 
   534 
   569 	// Done!  Prepare results data
   535 	// Done!  Prepare results data
   570 	if len(result) > 0 && !summary {
   536 	if len(result) > 0 && !options.Summary {
   571 		myLog.Println(1, "* Dupes:")
   537 		myLog.Println(1, "* Dupes:")
   572 	}
   538 	}
   573 
   539 
   574 	// Sort files by path inside each group
   540 	// Sort files by path inside each group
   575 	for _, l := range result {
   541 	for _, l := range result {
   576 		sort.Sort(byFilePathName(l))
   542 		sort.Sort(byFilePathName(l))
   577 	}
   543 	}
   578 	// Sort groups by increasing size (of the duplicated files)
   544 	// Sort groups by increasing size (of the duplicated files)
   579 	sort.Sort(byGroupFileSize(result))
   545 	sort.Sort(byGroupFileSize(result))
   580 
       
   581 	var results Results
       
   582 
   546 
   583 	for _, l := range result {
   547 	for _, l := range result {
   584 		size := uint64(l[0].Size())
   548 		size := uint64(l[0].Size())
   585 		// We do not count the size of the 1st item
   549 		// We do not count the size of the 1st item
   586 		// so we get only duplicate size.
   550 		// so we get only duplicate size.
   593 		results.Groups = append(results.Groups, newSet)
   557 		results.Groups = append(results.Groups, newSet)
   594 	}
   558 	}
   595 	results.RedundantDataSizeH = formatSize(results.RedundantDataSize, true)
   559 	results.RedundantDataSizeH = formatSize(results.RedundantDataSize, true)
   596 	results.TotalFileCount = data.cmpt
   560 	results.TotalFileCount = data.cmpt
   597 
   561 
       
   562 	return results, nil
       
   563 }
       
   564 
       
   565 // It all starts here.
       
   566 func main() {
       
   567 	var verbose bool
       
   568 	var options Options
       
   569 
       
   570 	// Assertion on constant values
       
   571 	if minSizePartialChecksum <= 2*medsumBytes {
       
   572 		myLog.Fatal("Internal error: assert minSizePartialChecksum > 2*medsumBytes")
       
   573 	}
       
   574 
       
   575 	// Command line parameters parsingg
       
   576 	flag.BoolVar(&verbose, "verbose", false, "Be verbose (verbosity=1)")
       
   577 	flag.BoolVar(&verbose, "v", false, "See --verbose")
       
   578 	flag.BoolVar(&options.OutToJSON, "json", false, "Use JSON format for output")
       
   579 	flag.BoolVar(&options.Summary, "summary", false, "Do not display the duplicate list")
       
   580 	flag.BoolVar(&options.Summary, "s", false, "See --summary")
       
   581 	flag.BoolVar(&options.SkipPartial, "skip-partial", false, "Skip partial checksums")
       
   582 	flag.IntVar(&myLog.verbosity, "verbosity", 0,
       
   583 		"Set verbosity level (1-6)")
       
   584 	flag.IntVar(&myLog.verbosity, "vl", 0, "See verbosity")
       
   585 	timings := flag.Bool("timings", false, "Set detailed log timings")
       
   586 	flag.BoolVar(&options.IgnoreEmpty, "no-empty", false, "Ignore empty files")
       
   587 
       
   588 	flag.Parse()
       
   589 
       
   590 	// Set verbosity: --verbose=true == --verbosity=1
       
   591 	if myLog.verbosity > 0 {
       
   592 		verbose = true
       
   593 	} else if verbose == true {
       
   594 		myLog.verbosity = 1
       
   595 	}
       
   596 
       
   597 	if len(flag.Args()) == 0 {
       
   598 		// TODO: more helpful usage statement
       
   599 		myLog.Println(-1, "Usage:", os.Args[0],
       
   600 			"[options] base_directory|file...")
       
   601 		os.Exit(0)
       
   602 	}
       
   603 
       
   604 	// Change log format for benchmarking
       
   605 	if *timings {
       
   606 		myLog.SetBenchFlags()
       
   607 	}
       
   608 
       
   609 	results, err := duf(flag.Args(), options)
       
   610 	if err != nil {
       
   611 		myLog.Fatal("ERROR: " + err.Error())
       
   612 	}
       
   613 
   598 	// Output the results
   614 	// Output the results
   599 	displayResults(results, outToJSON, summary)
   615 	displayResults(results, options.OutToJSON, options.Summary)
   600 }
   616 }