Display existing hard links in result sets default tip
authorMikael Berthe <mikael@lilotux.net>
Wed, 23 Feb 2022 22:56:53 +0100
changeset 45 ea6a9ba7a3c8
parent 44 af90d9396ef1
Display existing hard links in result sets This is a breaking change in the plain text output, but somehow the list displayed in case of existing hard links was arbitrary, since the all the hardlinked filenames were not displayed. Here's a sample JSON result with this patch: { "file_size": 9216, "paths": [ "test_tree/f09-1_5.raw", "test_tree/f09-4_5.raw" ], "links": { "test_tree/f09-1_5.raw": [ "test_tree/f09-2_5.raw", "test_tree/f09-3_5.raw" ], "test_tree/f09-4_5.raw": [ "test_tree/f09-5_5.raw" ] } } Here the 5 files have the same contents, but there are two hardlink groups: "test_tree/f09-1_5.raw" "test_tree/f09-2_5.raw" "test_tree/f09-3_5.raw" are hard-linked, and "test_tree/f09-4_5.raw" "test_tree/f09-5_5.raw" are hard-linked. Here's the same set displayed With the regular text output: Group #5 (2 files * 9216 bytes): test_tree/f09-1_5.raw test_tree/f09-2_5.raw test_tree/f09-3_5.raw test_tree/f09-4_5.raw test_tree/f09-5_5.raw (The link file names are indented using 1 space character.)
goduf.go
output.go
--- a/goduf.go	Sat Dec 01 22:46:09 2018 +0100
+++ b/goduf.go	Wed Feb 23 22:56:53 2022 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014-2018 Mikael Berthe <mikael@lilotux.net>
+ * Copyright (C) 2014-2022 Mikael Berthe <mikael@lilotux.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -73,8 +73,9 @@
 
 // ResultSet contains a group of identical duplicate files
 type ResultSet struct {
-	FileSize uint64   `json:"file_size"` // Size of each item
-	Paths    []string `json:"paths"`     // List of file paths
+	FileSize uint64              `json:"file_size"`       // Size of each item
+	Paths    []string            `json:"paths"`           // List of file paths
+	Links    map[string][]string `json:"links,omitempty"` // Existing hard links
 }
 
 type fileObj struct {
@@ -96,6 +97,7 @@
 	sizeGroups  map[int64]*FileObjList
 	emptyFiles  FileObjList
 	ignoreCount int
+	hardLinks   map[string][]string
 }
 
 var data dataT
@@ -411,19 +413,20 @@
 		// TODO: Should we also check for duplicate paths?
 		for {
 			type devinode struct{ dev, ino uint64 }
-			devinodes := make(map[devinode]bool)
+			devinodes := make(map[devinode]string)
 			var hardLinkIndex int
 
 			for i, fo := range *sgListP {
 				dev, ino := GetDevIno(fo)
 				di := devinode{dev, ino}
-				if _, hlink := devinodes[di]; hlink {
+				if primaryPath, ok := devinodes[di]; ok {
 					hardLinkIndex = i
 					hardLinkCount++
 					hardlinksFound = true
+					data.hardLinks[primaryPath] = append(data.hardLinks[primaryPath], fo.FilePath)
 					break
 				} else {
-					devinodes[di] = true
+					devinodes[di] = fo.FilePath
 				}
 			}
 
@@ -457,6 +460,7 @@
 
 	var results Results
 	data.sizeGroups = make(map[int64]*FileObjList)
+	data.hardLinks = make(map[string][]string)
 
 	myLog.Println(1, "* Reading file metadata")
 
@@ -523,6 +527,7 @@
 	// Sort groups by increasing size (of the duplicated files)
 	sort.Sort(byGroupFileSize(result))
 
+	// Build the result duplicate sets
 	for _, l := range result {
 		size := uint64(l[0].Size())
 		// We do not count the size of the 1st item
@@ -532,6 +537,12 @@
 		for _, f := range l {
 			newSet.Paths = append(newSet.Paths, f.FilePath)
 			results.Duplicates++
+			if len(data.hardLinks[f.FilePath]) > 0 {
+				if newSet.Links == nil {
+					newSet.Links = make(map[string][]string)
+				}
+				newSet.Links[f.FilePath] = data.hardLinks[f.FilePath]
+			}
 		}
 		results.Groups = append(results.Groups, newSet)
 	}
--- a/output.go	Sat Dec 01 22:46:09 2018 +0100
+++ b/output.go	Wed Feb 23 22:56:53 2022 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014-2018 Mikael Berthe <mikael@lilotux.net>
+ * Copyright (C) 2014-2022 Mikael Berthe <mikael@lilotux.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -65,6 +65,11 @@
 				len(g.Paths), formatSize(g.FileSize, true))
 			for _, f := range g.Paths {
 				fmt.Println(f)
+				if g.Links != nil { // Display linked files
+					for _, lf := range g.Links[f] {
+						fmt.Printf(" %s\n", lf)
+					}
+				}
 			}
 		}
 	}