# HG changeset patch
# User Mikael Berthe <mikael@lilotux.net>
# Date 1404059402 -7200
# Node ID f7ce9d750e8323b92b6aae5df625e291aa90fa39
# Parent  3389a17fc0d25b6ea279999215226c3eb10c2bc4
Better documentation

diff -r 3389a17fc0d2 -r f7ce9d750e83 TODO.md
--- a/TODO.md	Sun Jun 29 18:00:13 2014 +0200
+++ b/TODO.md	Sun Jun 29 18:30:02 2014 +0200
@@ -1,7 +1,5 @@
 # TODO:
 
-* Clean up, fix variable names
 * Add version number
-* Sort results?
 * Possibility to accept file list from stdin
 * Check Windows portability
diff -r 3389a17fc0d2 -r f7ce9d750e83 goduf.go
--- a/goduf.go	Sun Jun 29 18:00:13 2014 +0200
+++ b/goduf.go	Sun Jun 29 18:30:02 2014 +0200
@@ -73,6 +73,7 @@
 	verbosity int
 }
 
+// Implement my own logger
 var myLog myLogT
 
 func (l *myLogT) Printf(level int, format string, args ...interface{}) {
@@ -99,6 +100,9 @@
 	fmt.Fprintln(os.Stderr, args...)
 }
 
+// visit is called for every file and directory.
+// We check the file object is correct (regular, readable...) and add
+// it to the data.sizeGroups hash.
 func visit(path string, f os.FileInfo, err error) error {
 	if err != nil {
 		if f == nil {
@@ -138,7 +142,8 @@
 	return nil
 }
 
-func (fo *fileObj) CheckSum() error {
+// Checksum computes the file's complete SHA1 hash.
+func (fo *fileObj) Checksum() error {
 	file, err := os.Open(fo.FilePath)
 	if err != nil {
 		return err
@@ -158,7 +163,8 @@
 	return nil
 }
 
-func (fo *fileObj) MedSum() error {
+// partialChecksum computes the file's partial SHA1 hash (first and last bytes).
+func (fo *fileObj) partialChecksum() error {
 	file, err := os.Open(fo.FilePath)
 	if err != nil {
 		return err
@@ -185,18 +191,20 @@
 	return nil
 }
 
+// Sum computes the file's SHA1 hash, partial or full according to sType.
 func (fo *fileObj) Sum(sType sumType) error {
 	if sType == partialChecksum {
-		return fo.MedSum()
+		return fo.partialChecksum()
 	} else if sType == fullChecksum {
-		return fo.CheckSum()
+		return fo.Checksum()
 	} else if sType == noChecksum {
 		return nil
 	}
 	panic("Internal error: Invalid sType")
 }
 
-func (data *dataT) dispCount() { // FIXME rather useless
+// dispCount display statistics to the user.
+func (data *dataT) dispCount() { // It this still useful?
 	if myLog.verbosity < 4 {
 		return
 	}
@@ -238,19 +246,9 @@
 	return hex.EncodeToString(hbytes), nil
 }
 
-func (fileList FileObjList) computeSheduledChecksums() {
-	// Sort the list for better efficiency
-	sort.Sort(ByInode(fileList))
-
-	//myLog.Printf(6, "  . will compute %d checksums\n", len(fileList))
-
-	// Compute checksums
-	for _, fo := range fileList {
-		if err := fo.Sum(fo.needHash); err != nil {
-			myLog.Println(0, "Error:", err)
-		}
-	}
-}
+// computeSheduledChecksums calculates the checksums for all the files
+// from the fileLists slice items (the kind of hash is taken from the
+// needHash field).
 func computeSheduledChecksums(fileLists ...foListList) {
 	var bigFileList FileObjList
 	// Merge the lists of FileObjList lists and create a unique list
@@ -278,6 +276,8 @@
 	}
 }
 
+// findDupesChecksums splits the fileObj list into several lists with the
+// same sType hash.
 func (fileList FileObjList) findDupesChecksums(sType sumType) foListList {
 	var dupeList foListList
 	var scheduleFull foListList
@@ -349,6 +349,9 @@
 	return dupeList
 }
 
+// dropEmptyFiles removes the empty files from the main map, since we don't
+// have to do any processing about them.
+// If ignoreEmpty is false, the empty file list is saved in data.emptyFiles.
 func (data *dataT) dropEmptyFiles(ignoreEmpty bool) (emptyCount int) {
 	sgListP, ok := data.sizeGroups[0]
 	if ok == false {
@@ -388,6 +391,7 @@
 		// Instead of this loop, another way would be to use the field
 		// "Unique" of the fileObj to mark them to be discarded
 		// and remove them all at the end.
+		// TODO: Should we also check for duplicate paths?
 		for {
 			type devinode struct{ dev, ino uint64 }
 			devinodes := make(map[devinode]bool)
@@ -428,6 +432,7 @@
 	return
 }
 
+// formatSize returns the size in a string with a human-readable format.
 func formatSize(sizeBytes uint64, short bool) string {
 	var units = map[int]string{
 		0: "B",
@@ -455,12 +460,14 @@
 	return fmt.Sprintf("%d bytes (%d %s)", sizeBytes, humanSize, units[n])
 }
 
+// It all starts here.
 func main() {
 	var verbose bool
 	var summary bool
 	var skipPartial bool
 	var ignoreEmpty bool
 
+	// Command line parameters parsingg
 	flag.BoolVar(&verbose, "verbose", false, "Be verbose (verbosity=1)")
 	flag.BoolVar(&verbose, "v", false, "See --verbose")
 	flag.BoolVar(&summary, "summary", false, "Do not display the duplicate list")
@@ -474,6 +481,7 @@
 
 	flag.Parse()
 
+	// Set verbosity: --verbose=true == --verbosity=1
 	if myLog.verbosity > 0 {
 		verbose = true
 	} else if verbose == true {
@@ -483,10 +491,11 @@
 	if len(flag.Args()) == 0 {
 		// TODO: more helpful usage statement
 		myLog.Println(-1, "Usage:", os.Args[0],
-			"[options] base_directory")
+			"[options] base_directory|file...")
 		os.Exit(0)
 	}
 
+	// Change log format for benchmarking
 	if *timings {
 		log.SetFlags(log.LstdFlags | log.Lmicroseconds)
 	}
@@ -501,7 +510,11 @@
 			os.Exit(1)
 		}
 	}
+
+	// Count empty files and drop them if they should be ignored
 	emptyCount := data.dropEmptyFiles(ignoreEmpty)
+
+	// Display a small report
 	if verbose {
 		if data.ignoreCount > 0 {
 			myLog.Printf(1, "  %d special files were ignored\n",
@@ -514,11 +527,11 @@
 			myLog.Printf(1, "  %d empty files were ignored\n",
 				emptyCount)
 		}
-		data.dispCount() // XXX
+		data.dispCount()
 		myLog.Println(3, "* Number of size groups:", len(data.sizeGroups))
 	}
 
-	// Remove unique sizes
+	// Remove unique sizes and hard links
 	myLog.Println(1, "* Removing files with unique size and hard links...")
 	hardLinkCount, uniqueSizeCount := data.initialCleanup()
 	if verbose {
@@ -526,10 +539,10 @@
 			uniqueSizeCount)
 		myLog.Printf(2, "  Dropped %d hard links\n", hardLinkCount)
 		myLog.Println(3, "* Number of size groups:", len(data.sizeGroups))
-		data.dispCount() // XXX
+		data.dispCount()
 	}
 
-	// Get list of dupes
+	// Get the final list of dupes, using checksums
 	myLog.Println(1, "* Computing checksums...")
 	var result foListList
 	if len(data.emptyFiles) > 0 {