Display existing hard links in result sets
This is a breaking change in the plain text output,
but somehow the list displayed in case of existing hard links
was arbitrary, since the all the hardlinked filenames were not displayed.
Here's a sample JSON result with this patch:
{
"file_size": 9216,
"paths": [
"test_tree/f09-1_5.raw",
"test_tree/f09-4_5.raw"
],
"links": {
"test_tree/f09-1_5.raw": [
"test_tree/f09-2_5.raw",
"test_tree/f09-3_5.raw"
],
"test_tree/f09-4_5.raw": [
"test_tree/f09-5_5.raw"
]
}
}
Here the 5 files have the same contents, but there are two hardlink groups:
"test_tree/f09-1_5.raw"
"test_tree/f09-2_5.raw"
"test_tree/f09-3_5.raw"
are hard-linked, and
"test_tree/f09-4_5.raw"
"test_tree/f09-5_5.raw"
are hard-linked.
Here's the same set displayed With the regular text output:
Group #5 (2 files * 9216 bytes):
test_tree/f09-1_5.raw
test_tree/f09-2_5.raw
test_tree/f09-3_5.raw
test_tree/f09-4_5.raw
test_tree/f09-5_5.raw
(The link file names are indented using 1 space character.)
--- a/goduf.go Sat Dec 01 22:46:09 2018 +0100
+++ b/goduf.go Wed Feb 23 22:56:53 2022 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2018 Mikael Berthe <mikael@lilotux.net>
+ * Copyright (C) 2014-2022 Mikael Berthe <mikael@lilotux.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -73,8 +73,9 @@
// ResultSet contains a group of identical duplicate files
type ResultSet struct {
- FileSize uint64 `json:"file_size"` // Size of each item
- Paths []string `json:"paths"` // List of file paths
+ FileSize uint64 `json:"file_size"` // Size of each item
+ Paths []string `json:"paths"` // List of file paths
+ Links map[string][]string `json:"links,omitempty"` // Existing hard links
}
type fileObj struct {
@@ -96,6 +97,7 @@
sizeGroups map[int64]*FileObjList
emptyFiles FileObjList
ignoreCount int
+ hardLinks map[string][]string
}
var data dataT
@@ -411,19 +413,20 @@
// TODO: Should we also check for duplicate paths?
for {
type devinode struct{ dev, ino uint64 }
- devinodes := make(map[devinode]bool)
+ devinodes := make(map[devinode]string)
var hardLinkIndex int
for i, fo := range *sgListP {
dev, ino := GetDevIno(fo)
di := devinode{dev, ino}
- if _, hlink := devinodes[di]; hlink {
+ if primaryPath, ok := devinodes[di]; ok {
hardLinkIndex = i
hardLinkCount++
hardlinksFound = true
+ data.hardLinks[primaryPath] = append(data.hardLinks[primaryPath], fo.FilePath)
break
} else {
- devinodes[di] = true
+ devinodes[di] = fo.FilePath
}
}
@@ -457,6 +460,7 @@
var results Results
data.sizeGroups = make(map[int64]*FileObjList)
+ data.hardLinks = make(map[string][]string)
myLog.Println(1, "* Reading file metadata")
@@ -523,6 +527,7 @@
// Sort groups by increasing size (of the duplicated files)
sort.Sort(byGroupFileSize(result))
+ // Build the result duplicate sets
for _, l := range result {
size := uint64(l[0].Size())
// We do not count the size of the 1st item
@@ -532,6 +537,12 @@
for _, f := range l {
newSet.Paths = append(newSet.Paths, f.FilePath)
results.Duplicates++
+ if len(data.hardLinks[f.FilePath]) > 0 {
+ if newSet.Links == nil {
+ newSet.Links = make(map[string][]string)
+ }
+ newSet.Links[f.FilePath] = data.hardLinks[f.FilePath]
+ }
}
results.Groups = append(results.Groups, newSet)
}
--- a/output.go Sat Dec 01 22:46:09 2018 +0100
+++ b/output.go Wed Feb 23 22:56:53 2022 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2018 Mikael Berthe <mikael@lilotux.net>
+ * Copyright (C) 2014-2022 Mikael Berthe <mikael@lilotux.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -65,6 +65,11 @@
len(g.Paths), formatSize(g.FileSize, true))
for _, f := range g.Paths {
fmt.Println(f)
+ if g.Links != nil { // Display linked files
+ for _, lf := range g.Links[f] {
+ fmt.Printf(" %s\n", lf)
+ }
+ }
}
}
}