goduf.go
changeset 36 e918b7e63748
parent 35 730377b4449f
child 37 a7662fbfbe02
equal deleted inserted replaced
35:730377b4449f 36:e918b7e63748
    49 	noChecksum sumType = iota
    49 	noChecksum sumType = iota
    50 	fullChecksum
    50 	fullChecksum
    51 	partialChecksum
    51 	partialChecksum
    52 )
    52 )
    53 
    53 
       
    54 // Results contains the results of the duplicates search
       
    55 type Results struct {
       
    56 	Groups             []ResultSet `json:"groups"`
       
    57 	Duplicates         uint        `json:"duplicates"`
       
    58 	NumberOfSets       uint        `json:"number_of_sets"`
       
    59 	RedundantDataSize  uint64      `json:"redundant_data_size"`
       
    60 	RedundantDataSizeH string      `json:"redundant_data_size_h"`
       
    61 	TotalFileCount     uint        `json:"total_file_count"`
       
    62 }
       
    63 
       
    64 // ResultSet contains a group of identical duplicate files
       
    65 type ResultSet struct {
       
    66 	Size  uint64   `json:"size"`  // Size of each item
       
    67 	Paths []string `json:"paths"` // List of file paths
       
    68 }
       
    69 
    54 type fileObj struct {
    70 type fileObj struct {
    55 	//Unique   bool
    71 	//Unique   bool
    56 	FilePath string
    72 	FilePath string
    57 	os.FileInfo
    73 	os.FileInfo
    58 	PartialHash []byte
    74 	PartialHash []byte
   453 
   469 
   454 // It all starts here.
   470 // It all starts here.
   455 func main() {
   471 func main() {
   456 	var verbose bool
   472 	var verbose bool
   457 	var summary bool
   473 	var summary bool
       
   474 	var outToJSON bool
   458 	var skipPartial bool
   475 	var skipPartial bool
   459 	var ignoreEmpty bool
   476 	var ignoreEmpty bool
   460 
   477 
   461 	// Assertion on constant values
   478 	// Assertion on constant values
   462 	if minSizePartialChecksum <= 2*medsumBytes {
   479 	if minSizePartialChecksum <= 2*medsumBytes {
   464 	}
   481 	}
   465 
   482 
   466 	// Command line parameters parsingg
   483 	// Command line parameters parsingg
   467 	flag.BoolVar(&verbose, "verbose", false, "Be verbose (verbosity=1)")
   484 	flag.BoolVar(&verbose, "verbose", false, "Be verbose (verbosity=1)")
   468 	flag.BoolVar(&verbose, "v", false, "See --verbose")
   485 	flag.BoolVar(&verbose, "v", false, "See --verbose")
       
   486 	flag.BoolVar(&outToJSON, "json", false, "Use JSON format for output")
   469 	flag.BoolVar(&summary, "summary", false, "Do not display the duplicate list")
   487 	flag.BoolVar(&summary, "summary", false, "Do not display the duplicate list")
   470 	flag.BoolVar(&summary, "s", false, "See --summary")
   488 	flag.BoolVar(&summary, "s", false, "See --summary")
   471 	flag.BoolVar(&skipPartial, "skip-partial", false, "Skip partial checksums")
   489 	flag.BoolVar(&skipPartial, "skip-partial", false, "Skip partial checksums")
   472 	flag.IntVar(&myLog.verbosity, "verbosity", 0,
   490 	flag.IntVar(&myLog.verbosity, "verbosity", 0,
   473 		"Set verbosity level (1-6)")
   491 		"Set verbosity level (1-6)")
   546 	}
   564 	}
   547 	result = append(result, data.findDupes(skipPartial)...)
   565 	result = append(result, data.findDupes(skipPartial)...)
   548 
   566 
   549 	myLog.Println(3, "* Number of match groups:", len(result))
   567 	myLog.Println(3, "* Number of match groups:", len(result))
   550 
   568 
   551 	// Done!  Dump dupes
   569 	// Done!  Prepare results data
   552 	if len(result) > 0 && !summary {
   570 	if len(result) > 0 && !summary {
   553 		myLog.Println(1, "* Dupes:")
   571 		myLog.Println(1, "* Dupes:")
   554 	}
   572 	}
   555 
   573 
   556 	// Sort files by path inside each group
   574 	// Sort files by path inside each group
   558 		sort.Sort(byFilePathName(l))
   576 		sort.Sort(byFilePathName(l))
   559 	}
   577 	}
   560 	// Sort groups by increasing size (of the duplicated files)
   578 	// Sort groups by increasing size (of the duplicated files)
   561 	sort.Sort(byGroupFileSize(result))
   579 	sort.Sort(byGroupFileSize(result))
   562 
   580 
   563 	var dupeSize uint64
   581 	var results Results
   564 	data.cmpt = 0
   582 
   565 	for i, l := range result {
   583 	for _, l := range result {
   566 		size := uint64(l[0].Size())
   584 		size := uint64(l[0].Size())
   567 		// We do not count the size of the 1st item
   585 		// We do not count the size of the 1st item
   568 		// so we get only duplicate size.
   586 		// so we get only duplicate size.
   569 		dupeSize += size * uint64(len(l)-1)
   587 		results.RedundantDataSize += size * uint64(len(l)-1)
   570 		if !summary {
   588 		newSet := ResultSet{Size: size}
   571 			fmt.Printf("\nGroup #%d (%d files * %v):\n", i+1,
       
   572 				len(l), formatSize(size, true))
       
   573 		}
       
   574 		for _, f := range l {
   589 		for _, f := range l {
   575 			if !summary {
   590 			newSet.Paths = append(newSet.Paths, f.FilePath)
   576 				fmt.Println(f.FilePath)
   591 			results.Duplicates++
   577 			}
   592 		}
   578 			data.cmpt++
   593 		results.Groups = append(results.Groups, newSet)
   579 		}
   594 	}
   580 	}
   595 	results.RedundantDataSizeH = formatSize(results.RedundantDataSize, true)
   581 	summaryLevel := 1 // Default verbosity for the summary line
   596 	results.TotalFileCount = data.cmpt
   582 	if summary == false {
   597 
   583 		// Add a trailing newline
   598 	// Output the results
   584 		if len(result) > 0 {
   599 	displayResults(results, outToJSON, summary)
   585 			fmt.Println("")
   600 }
   586 		}
       
   587 	} else {
       
   588 		// The summary is requested so we lower the verbosity level
       
   589 		summaryLevel = 0
       
   590 	}
       
   591 
       
   592 	myLog.Println(summaryLevel, "Final count:", data.cmpt,
       
   593 		"duplicate files in", len(result), "sets")
       
   594 	myLog.Println(summaryLevel, "Redundant data size:",
       
   595 		formatSize(dupeSize, false))
       
   596 }