author | Mikael Berthe <mikael@lilotux.net> |
Sun, 14 Oct 2018 17:27:37 +0200 | |
changeset 42 | 3fa13770e970 |
parent 41 | bce80b708ddb |
child 43 | 95d940f9598e |
permissions | -rw-r--r-- |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
1 |
/* |
34
b70346ff153d
Split goduf.go into several files
Mikael Berthe <mikael@lilotux.net>
parents:
25
diff
changeset
|
2 |
* Copyright (C) 2014-2018 Mikael Berthe <mikael@lilotux.net> |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
3 |
* |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
4 |
* This program is free software; you can redistribute it and/or modify |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
5 |
* it under the terms of the GNU General Public License as published by |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
6 |
* the Free Software Foundation; either version 2 of the License, or (at |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
7 |
* your option) any later version. |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
8 |
* |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
9 |
* This program is distributed in the hope that it will be useful, but |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
10 |
* WITHOUT ANY WARRANTY; without even the implied warranty of |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
12 |
* General Public License for more details. |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
13 |
* |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
14 |
* You should have received a copy of the GNU General Public License |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
15 |
* along with this program; if not, write to the Free Software |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
16 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
17 |
* USA |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
18 |
*/ |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
19 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
20 |
// This program (Goduf) is a fast duplicate file finder. |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
21 |
// Use goduf --help to get the list of available options. |
25 | 22 |
// |
24
40074e33c579
Add installation command line example
Mikael Berthe <mikael@lilotux.net>
parents:
23
diff
changeset
|
23 |
// Installation: |
40074e33c579
Add installation command line example
Mikael Berthe <mikael@lilotux.net>
parents:
23
diff
changeset
|
24 |
// |
40074e33c579
Add installation command line example
Mikael Berthe <mikael@lilotux.net>
parents:
23
diff
changeset
|
25 |
// % go get hg.lilotux.net/golang/mikael/goduf |
35
730377b4449f
Mention github mirror in the comments
Mikael Berthe <mikael@lilotux.net>
parents:
34
diff
changeset
|
26 |
// or |
730377b4449f
Mention github mirror in the comments
Mikael Berthe <mikael@lilotux.net>
parents:
34
diff
changeset
|
27 |
// % go get github.com/McKael/goduf |
24
40074e33c579
Add installation command line example
Mikael Berthe <mikael@lilotux.net>
parents:
23
diff
changeset
|
28 |
|
25 | 29 |
package main |
30 |
||
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
31 |
import ( |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
32 |
"crypto/sha1" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
33 |
"encoding/hex" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
34 |
"errors" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
35 |
"flag" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
36 |
"fmt" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
37 |
"io" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
38 |
"os" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
39 |
"path/filepath" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
40 |
"sort" |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
41 |
) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
42 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
43 |
const medsumBytes = 128 |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
44 |
const minSizePartialChecksum = 49152 // Should be > 3*medsumBytes |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
45 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
46 |
type sumType int |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
47 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
48 |
const ( |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
49 |
noChecksum sumType = iota |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
50 |
fullChecksum |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
51 |
partialChecksum |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
52 |
) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
53 |
|
38 | 54 |
// Options contains the command-line flags |
37 | 55 |
type Options struct { |
56 |
Summary bool |
|
57 |
OutToJSON bool |
|
58 |
SkipPartial bool |
|
59 |
IgnoreEmpty bool |
|
60 |
} |
|
61 |
||
36 | 62 |
// Results contains the results of the duplicates search |
63 |
type Results struct { |
|
42 | 64 |
Groups []ResultSet `json:"groups"` // List of duplicate sets |
65 |
Duplicates uint `json:"duplicates"` // Number of duplicates |
|
66 |
NumberOfSets uint `json:"number_of_sets"` // Number of duplicate sets |
|
67 |
RedundantDataSizeBytes uint64 `json:"redundant_data_size_bytes"` // Redundant data size |
|
68 |
RedundantDataSizeHuman string `json:"redundant_data_size_human"` // Same, human-readable |
|
69 |
TotalFileCount uint `json:"total_file_count"` // Total number of checked files |
|
70 |
TotalSizeBytes uint64 `json:"total_size_bytes"` // Total size for checked files |
|
71 |
TotalSizeHuman string `json:"total_size_human"` // Same, human-readable |
|
36 | 72 |
} |
73 |
||
74 |
// ResultSet contains a group of identical duplicate files |
|
75 |
type ResultSet struct { |
|
42 | 76 |
FileSize uint64 `json:"file_size"` // Size of each item |
77 |
Paths []string `json:"paths"` // List of file paths |
|
36 | 78 |
} |
79 |
||
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
80 |
type fileObj struct { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
81 |
//Unique bool |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
82 |
FilePath string |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
83 |
os.FileInfo |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
84 |
PartialHash []byte |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
85 |
Hash []byte |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
86 |
needHash sumType |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
87 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
88 |
|
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
89 |
// FileObjList is only exported so that we can have a sort interface on inodes. |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
90 |
type FileObjList []*fileObj |
16 | 91 |
type foListList []FileObjList |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
92 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
93 |
type dataT struct { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
94 |
totalSize uint64 |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
95 |
cmpt uint |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
96 |
sizeGroups map[int64]*FileObjList |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
97 |
emptyFiles FileObjList |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
98 |
ignoreCount int |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
99 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
100 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
101 |
var data dataT |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
102 |
|
20 | 103 |
// Implement my own logger |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
104 |
var myLog myLogT |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
105 |
|
20 | 106 |
// visit is called for every file and directory. |
107 |
// We check the file object is correct (regular, readable...) and add |
|
108 |
// it to the data.sizeGroups hash. |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
109 |
func visit(path string, f os.FileInfo, err error) error { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
110 |
if err != nil { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
111 |
if f == nil { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
112 |
return err |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
113 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
114 |
if f.IsDir() { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
115 |
myLog.Println(-1, "Warning: cannot process directory:", |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
116 |
path) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
117 |
return filepath.SkipDir |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
118 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
119 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
120 |
myLog.Println(-1, "Ignoring ", path, " - ", err) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
121 |
data.ignoreCount++ |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
122 |
return nil |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
123 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
124 |
if f.IsDir() { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
125 |
return nil |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
126 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
127 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
128 |
if mode := f.Mode(); mode&os.ModeType != 0 { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
129 |
if mode&os.ModeSymlink != 0 { |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
130 |
myLog.Println(6, "Ignoring symbolic link", path) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
131 |
} else { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
132 |
myLog.Println(0, "Ignoring special file", path) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
133 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
134 |
data.ignoreCount++ |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
135 |
return nil |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
136 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
137 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
138 |
data.cmpt++ |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
139 |
data.totalSize += uint64(f.Size()) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
140 |
fo := &fileObj{FilePath: path, FileInfo: f} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
141 |
if _, ok := data.sizeGroups[f.Size()]; !ok { |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
142 |
data.sizeGroups[f.Size()] = new(FileObjList) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
143 |
} |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
144 |
*data.sizeGroups[f.Size()] = append(*data.sizeGroups[f.Size()], fo) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
145 |
return nil |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
146 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
147 |
|
20 | 148 |
// Checksum computes the file's complete SHA1 hash. |
149 |
func (fo *fileObj) Checksum() error { |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
150 |
file, err := os.Open(fo.FilePath) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
151 |
if err != nil { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
152 |
return err |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
153 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
154 |
defer file.Close() |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
155 |
hash := sha1.New() |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
156 |
if size, err := io.Copy(hash, file); size != fo.Size() || err != nil { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
157 |
if err == nil { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
158 |
return errors.New("failed to read the whole file: " + |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
159 |
fo.FilePath) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
160 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
161 |
return err |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
162 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
163 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
164 |
fo.Hash = hash.Sum(nil) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
165 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
166 |
return nil |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
167 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
168 |
|
20 | 169 |
// partialChecksum computes the file's partial SHA1 hash (first and last bytes). |
170 |
func (fo *fileObj) partialChecksum() error { |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
171 |
file, err := os.Open(fo.FilePath) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
172 |
if err != nil { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
173 |
return err |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
174 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
175 |
defer file.Close() |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
176 |
hash := sha1.New() |
19 | 177 |
|
178 |
// Read first bytes and last bytes from file |
|
179 |
for i := 0; i < 2; i++ { |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
180 |
if _, err := io.CopyN(hash, file, medsumBytes); err != nil { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
181 |
if err == nil { |
19 | 182 |
const errmsg = "failed to read bytes from file: " |
183 |
return errors.New(errmsg + fo.FilePath) |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
184 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
185 |
return err |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
186 |
} |
19 | 187 |
if i == 0 { // Seek to end of file |
188 |
file.Seek(0-medsumBytes, 2) |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
189 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
190 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
191 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
192 |
fo.PartialHash = hash.Sum(nil) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
193 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
194 |
return nil |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
195 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
196 |
|
20 | 197 |
// Sum computes the file's SHA1 hash, partial or full according to sType. |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
198 |
func (fo *fileObj) Sum(sType sumType) error { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
199 |
if sType == partialChecksum { |
20 | 200 |
return fo.partialChecksum() |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
201 |
} else if sType == fullChecksum { |
20 | 202 |
return fo.Checksum() |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
203 |
} else if sType == noChecksum { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
204 |
return nil |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
205 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
206 |
panic("Internal error: Invalid sType") |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
207 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
208 |
|
20 | 209 |
// dispCount display statistics to the user. |
210 |
func (data *dataT) dispCount() { // It this still useful? |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
211 |
if myLog.verbosity < 4 { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
212 |
return |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
213 |
} |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
214 |
var c1, c1b, c2 int |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
215 |
var s1 string |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
216 |
for _, scListP := range data.sizeGroups { |
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
217 |
c1 += len(*scListP) |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
218 |
c2++ |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
219 |
} |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
220 |
c1b = len(data.emptyFiles) |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
221 |
if c1b > 0 { |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
222 |
s1 = fmt.Sprintf("+%d", c1b) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
223 |
} |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
224 |
myLog.Printf(4, " Current countdown: %d [%d%s/%d]\n", |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
225 |
c1+c1b, c1, s1, c2) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
226 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
227 |
|
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
228 |
// checksum returns the requested checksum as a string. |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
229 |
// If the checksum has not been pre-computed, it is calculated now. |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
230 |
func (fo fileObj) checksum(sType sumType) (string, error) { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
231 |
var hbytes []byte |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
232 |
if sType == partialChecksum { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
233 |
hbytes = fo.PartialHash |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
234 |
} else if sType == fullChecksum { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
235 |
hbytes = fo.Hash |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
236 |
} else { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
237 |
panic("Internal error: Invalid sType") |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
238 |
} |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
239 |
if hbytes == nil { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
240 |
if err := fo.Sum(sType); err != nil { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
241 |
return "", err |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
242 |
} |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
243 |
if sType == partialChecksum { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
244 |
hbytes = fo.PartialHash |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
245 |
} else if sType == fullChecksum { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
246 |
hbytes = fo.Hash |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
247 |
} |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
248 |
} |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
249 |
return hex.EncodeToString(hbytes), nil |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
250 |
} |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
251 |
|
20 | 252 |
// computeSheduledChecksums calculates the checksums for all the files |
253 |
// from the fileLists slice items (the kind of hash is taken from the |
|
254 |
// needHash field). |
|
16 | 255 |
func computeSheduledChecksums(fileLists ...foListList) { |
10
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
256 |
var bigFileList FileObjList |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
257 |
// Merge the lists of FileObjList lists and create a unique list |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
258 |
// of file objects. |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
259 |
for _, foll := range fileLists { |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
260 |
for _, fol := range foll { |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
261 |
bigFileList = append(bigFileList, fol...) |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
262 |
} |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
263 |
} |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
264 |
|
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
265 |
// Sort the list for better efficiency |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
266 |
sort.Sort(ByInode(bigFileList)) |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
267 |
|
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
268 |
// Compute checksums |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
269 |
for _, fo := range bigFileList { |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
270 |
if err := fo.Sum(fo.needHash); err != nil { |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
271 |
myLog.Println(0, "Error:", err) |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
272 |
} |
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
273 |
fo.needHash = noChecksum |
10
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
274 |
} |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
275 |
} |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
276 |
|
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
277 |
func (fileList FileObjList) scheduleChecksum(sType sumType) { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
278 |
for _, fo := range fileList { |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
279 |
fo.needHash = sType |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
280 |
} |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
281 |
} |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
282 |
|
20 | 283 |
// findDupesChecksums splits the fileObj list into several lists with the |
284 |
// same sType hash. |
|
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
285 |
func (fileList FileObjList) findDupesChecksums(sType sumType, dryRun bool) foListList { |
16 | 286 |
var dupeList foListList |
287 |
var scheduleFull foListList |
|
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
288 |
hashes := make(map[string]FileObjList) |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
289 |
|
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
290 |
// Sort the list for better efficiency |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
291 |
sort.Sort(ByInode(fileList)) |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
292 |
|
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
293 |
if sType == fullChecksum && dryRun { |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
294 |
fileList.scheduleChecksum(fullChecksum) |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
295 |
return append(dupeList, fileList) |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
296 |
} |
7
68375cc98f98
Refactor checksum functions to reduce code duplication
Mikael Berthe <mikael@lilotux.net>
parents:
6
diff
changeset
|
297 |
// Compute checksums |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
298 |
for _, fo := range fileList { |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
299 |
hash, err := fo.checksum(sType) |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
300 |
if err != nil { |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
301 |
myLog.Println(0, "Error:", err) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
302 |
continue |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
303 |
} |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
304 |
hashes[hash] = append(hashes[hash], fo) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
305 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
306 |
|
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
307 |
// Let's de-dupe now... |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
308 |
for _, l := range hashes { |
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
309 |
if len(l) < 2 { |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
310 |
continue |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
311 |
} |
7
68375cc98f98
Refactor checksum functions to reduce code duplication
Mikael Berthe <mikael@lilotux.net>
parents:
6
diff
changeset
|
312 |
if sType == partialChecksum { |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
313 |
scheduleFull = append(scheduleFull, l) |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
314 |
} else { // full checksums -> we're done |
7
68375cc98f98
Refactor checksum functions to reduce code duplication
Mikael Berthe <mikael@lilotux.net>
parents:
6
diff
changeset
|
315 |
dupeList = append(dupeList, l) |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
316 |
myLog.Printf(5, " . found %d new duplicates\n", len(l)) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
317 |
} |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
318 |
} |
9 | 319 |
if sType == partialChecksum && len(scheduleFull) > 0 { |
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
320 |
//computeSheduledChecksums(scheduleFull) |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
321 |
for _, l := range scheduleFull { |
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
322 |
r := l.findDupesChecksums(fullChecksum, dryRun) |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
323 |
dupeList = append(dupeList, r...) |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
324 |
} |
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
325 |
if dryRun { |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
326 |
return scheduleFull |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
327 |
} |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
328 |
} |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
329 |
|
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
330 |
return dupeList |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
331 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
332 |
|
6 | 333 |
// findDupes() uses checksums to find file duplicates |
16 | 334 |
func (data *dataT) findDupes(skipPartial bool) foListList { |
335 |
var dupeList foListList |
|
336 |
var schedulePartial foListList |
|
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
337 |
var schedulePartial2 foListList |
16 | 338 |
var scheduleFull foListList |
6 | 339 |
|
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
340 |
for size, sgListP := range data.sizeGroups { |
6 | 341 |
// We skip partial checksums for small files or if requested |
342 |
if size > minSizePartialChecksum && !skipPartial { |
|
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
343 |
sgListP.scheduleChecksum(partialChecksum) |
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
344 |
schedulePartial = append(schedulePartial, *sgListP) |
6 | 345 |
} else { |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
346 |
sgListP.scheduleChecksum(fullChecksum) |
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
347 |
scheduleFull = append(scheduleFull, *sgListP) |
6 | 348 |
} |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
349 |
} |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
350 |
|
10
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
351 |
computeSheduledChecksums(schedulePartial, scheduleFull) |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
352 |
|
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
353 |
for _, l := range schedulePartial { |
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
354 |
r := l.findDupesChecksums(partialChecksum, true) // dry-run |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
355 |
schedulePartial2 = append(schedulePartial2, r...) |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
356 |
} |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
357 |
computeSheduledChecksums(schedulePartial2) |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
358 |
for _, l := range schedulePartial { |
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
359 |
r := l.findDupesChecksums(partialChecksum, false) |
6 | 360 |
dupeList = append(dupeList, r...) |
361 |
} |
|
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
362 |
for _, l := range scheduleFull { |
22
46681d21157a
Experimental optimization
Mikael Berthe <mikael@lilotux.net>
parents:
21
diff
changeset
|
363 |
r := l.findDupesChecksums(fullChecksum, false) |
8
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
364 |
dupeList = append(dupeList, r...) |
25ad96511395
Schedule checksum computations so that we reduce hard drive seeks
Mikael Berthe <mikael@lilotux.net>
parents:
7
diff
changeset
|
365 |
} |
6 | 366 |
return dupeList |
367 |
} |
|
368 |
||
20 | 369 |
// dropEmptyFiles removes the empty files from the main map, since we don't |
370 |
// have to do any processing about them. |
|
371 |
// If ignoreEmpty is false, the empty file list is saved in data.emptyFiles. |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
372 |
func (data *dataT) dropEmptyFiles(ignoreEmpty bool) (emptyCount int) { |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
373 |
sgListP, ok := data.sizeGroups[0] |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
374 |
if ok == false { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
375 |
return // no empty files |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
376 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
377 |
if !ignoreEmpty { |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
378 |
if len(*sgListP) > 1 { |
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
379 |
data.emptyFiles = *sgListP |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
380 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
381 |
delete(data.sizeGroups, 0) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
382 |
return |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
383 |
} |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
384 |
emptyCount = len(*sgListP) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
385 |
delete(data.sizeGroups, 0) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
386 |
return |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
387 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
388 |
|
2
55098d552ae2
s/createSizeHash/initialCleanup/
Mikael Berthe <mikael@lilotux.net>
parents:
1
diff
changeset
|
389 |
// initialCleanup() removes files with unique size as well as hard links |
55098d552ae2
s/createSizeHash/initialCleanup/
Mikael Berthe <mikael@lilotux.net>
parents:
1
diff
changeset
|
390 |
func (data *dataT) initialCleanup() (hardLinkCount, uniqueSizeCount int) { |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
391 |
for s, sgListP := range data.sizeGroups { |
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
392 |
if len(*sgListP) < 2 { |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
393 |
delete(data.sizeGroups, s) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
394 |
uniqueSizeCount++ |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
395 |
continue |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
396 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
397 |
|
10
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
398 |
// We can't look for hard links if the O.S. does not support |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
399 |
// them... |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
400 |
if !OSHasInodes() { |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
401 |
continue |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
402 |
} |
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
403 |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
404 |
var hardlinksFound bool |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
405 |
|
10
1ee01b135e0e
Reduce code duplication related to computeSheduledChecksums()
Mikael Berthe <mikael@lilotux.net>
parents:
9
diff
changeset
|
406 |
// Check for hard links |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
407 |
// Remove unique dev/inodes |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
408 |
// Instead of this loop, another way would be to use the field |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
409 |
// "Unique" of the fileObj to mark them to be discarded |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
410 |
// and remove them all at the end. |
20 | 411 |
// TODO: Should we also check for duplicate paths? |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
412 |
for { |
15
4e3a67dc70a0
Cosmetics (go fmt and comments...)
Mikael Berthe <mikael@lilotux.net>
parents:
14
diff
changeset
|
413 |
type devinode struct{ dev, ino uint64 } |
12 | 414 |
devinodes := make(map[devinode]bool) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
415 |
var hardLinkIndex int |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
416 |
|
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
417 |
for i, fo := range *sgListP { |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
418 |
dev, ino := GetDevIno(fo) |
15
4e3a67dc70a0
Cosmetics (go fmt and comments...)
Mikael Berthe <mikael@lilotux.net>
parents:
14
diff
changeset
|
419 |
di := devinode{dev, ino} |
12 | 420 |
if _, hlink := devinodes[di]; hlink { |
421 |
hardLinkIndex = i |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
422 |
hardLinkCount++ |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
423 |
hardlinksFound = true |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
424 |
break |
12 | 425 |
} else { |
426 |
devinodes[di] = true |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
427 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
428 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
429 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
430 |
if hardLinkIndex == 0 { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
431 |
break |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
432 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
433 |
i := hardLinkIndex |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
434 |
// Remove hardink |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
435 |
copy((*sgListP)[i:], (*sgListP)[i+1:]) |
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
436 |
(*sgListP)[len(*sgListP)-1] = nil |
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
437 |
*sgListP = (*sgListP)[:len(*sgListP)-1] |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
438 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
439 |
// We have found hard links in this size group, |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
440 |
// maybe we can remove it |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
441 |
if hardlinksFound { |
14
ea4286b6c4b1
Get rid of the now useless sizeClass structure
Mikael Berthe <mikael@lilotux.net>
parents:
13
diff
changeset
|
442 |
if len(*sgListP) < 2 { |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
443 |
delete(data.sizeGroups, s) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
444 |
uniqueSizeCount++ |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
445 |
continue |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
446 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
447 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
448 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
449 |
return |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
450 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
451 |
|
37 | 452 |
func duf(dirs []string, options Options) (Results, error) { |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
453 |
var verbose bool |
37 | 454 |
if myLog.verbosity > 0 { |
455 |
verbose = true |
|
23
9ce0f2e2a33f
Add an assertion on constant variables
Mikael Berthe <mikael@lilotux.net>
parents:
22
diff
changeset
|
456 |
} |
9ce0f2e2a33f
Add an assertion on constant variables
Mikael Berthe <mikael@lilotux.net>
parents:
22
diff
changeset
|
457 |
|
37 | 458 |
var results Results |
459 |
data.sizeGroups = make(map[int64]*FileObjList) |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
460 |
|
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
461 |
myLog.Println(1, "* Reading file metadata") |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
462 |
|
37 | 463 |
for _, root := range dirs { |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
464 |
if err := filepath.Walk(root, visit); err != nil { |
37 | 465 |
return results, fmt.Errorf("could not read file tree: %v", err) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
466 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
467 |
} |
20 | 468 |
|
469 |
// Count empty files and drop them if they should be ignored |
|
37 | 470 |
emptyCount := data.dropEmptyFiles(options.IgnoreEmpty) |
20 | 471 |
|
472 |
// Display a small report |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
473 |
if verbose { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
474 |
if data.ignoreCount > 0 { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
475 |
myLog.Printf(1, " %d special files were ignored\n", |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
476 |
data.ignoreCount) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
477 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
478 |
myLog.Println(2, " Initial counter:", data.cmpt, "files") |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
479 |
myLog.Println(2, " Total size:", formatSize(data.totalSize, |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
480 |
false)) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
481 |
if emptyCount > 0 { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
482 |
myLog.Printf(1, " %d empty files were ignored\n", |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
483 |
emptyCount) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
484 |
} |
20 | 485 |
data.dispCount() |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
486 |
myLog.Println(3, "* Number of size groups:", len(data.sizeGroups)) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
487 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
488 |
|
20 | 489 |
// Remove unique sizes and hard links |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
490 |
myLog.Println(1, "* Removing files with unique size and hard links...") |
2
55098d552ae2
s/createSizeHash/initialCleanup/
Mikael Berthe <mikael@lilotux.net>
parents:
1
diff
changeset
|
491 |
hardLinkCount, uniqueSizeCount := data.initialCleanup() |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
492 |
if verbose { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
493 |
myLog.Printf(2, " Dropped %d files with unique size\n", |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
494 |
uniqueSizeCount) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
495 |
myLog.Printf(2, " Dropped %d hard links\n", hardLinkCount) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
496 |
myLog.Println(3, "* Number of size groups:", len(data.sizeGroups)) |
20 | 497 |
data.dispCount() |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
498 |
} |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
499 |
|
20 | 500 |
// Get the final list of dupes, using checksums |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
501 |
myLog.Println(1, "* Computing checksums...") |
16 | 502 |
var result foListList |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
503 |
if len(data.emptyFiles) > 0 { |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
504 |
result = append(result, data.emptyFiles) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
505 |
} |
37 | 506 |
result = append(result, data.findDupes(options.SkipPartial)...) |
5
887c21c26cc8
Refactor the checksum part
Mikael Berthe <mikael@lilotux.net>
parents:
2
diff
changeset
|
507 |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
508 |
myLog.Println(3, "* Number of match groups:", len(result)) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
509 |
|
36 | 510 |
// Done! Prepare results data |
37 | 511 |
if len(result) > 0 && !options.Summary { |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
512 |
myLog.Println(1, "* Dupes:") |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
513 |
} |
21 | 514 |
|
515 |
// Sort files by path inside each group |
|
516 |
for _, l := range result { |
|
517 |
sort.Sort(byFilePathName(l)) |
|
518 |
} |
|
519 |
// Sort groups by increasing size (of the duplicated files) |
|
16 | 520 |
sort.Sort(byGroupFileSize(result)) |
521 |
||
36 | 522 |
for _, l := range result { |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
523 |
size := uint64(l[0].Size()) |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
524 |
// We do not count the size of the 1st item |
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
525 |
// so we get only duplicate size. |
42 | 526 |
results.RedundantDataSizeBytes += size * uint64(len(l)-1) |
527 |
newSet := ResultSet{FileSize: size} |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
528 |
for _, f := range l { |
36 | 529 |
newSet.Paths = append(newSet.Paths, f.FilePath) |
530 |
results.Duplicates++ |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
531 |
} |
36 | 532 |
results.Groups = append(results.Groups, newSet) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
533 |
} |
40
7f9cdb9d166d
JSON output: NumberOfSets was not set
Mikael Berthe <mikael@lilotux.net>
parents:
39
diff
changeset
|
534 |
results.NumberOfSets = uint(len(results.Groups)) |
42 | 535 |
results.RedundantDataSizeHuman = formatSize(results.RedundantDataSizeBytes, true) |
36 | 536 |
results.TotalFileCount = data.cmpt |
42 | 537 |
results.TotalSizeBytes = data.totalSize |
538 |
results.TotalSizeHuman = formatSize(data.totalSize, true) |
|
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
539 |
|
37 | 540 |
return results, nil |
541 |
} |
|
542 |
||
543 |
// It all starts here. |
|
544 |
func main() { |
|
545 |
var verbose bool |
|
546 |
var options Options |
|
547 |
||
548 |
// Assertion on constant values |
|
549 |
if minSizePartialChecksum <= 2*medsumBytes { |
|
550 |
myLog.Fatal("Internal error: assert minSizePartialChecksum > 2*medsumBytes") |
|
551 |
} |
|
552 |
||
553 |
// Command line parameters parsingg |
|
554 |
flag.BoolVar(&verbose, "verbose", false, "Be verbose (verbosity=1)") |
|
555 |
flag.BoolVar(&verbose, "v", false, "See --verbose") |
|
556 |
flag.BoolVar(&options.OutToJSON, "json", false, "Use JSON format for output") |
|
557 |
flag.BoolVar(&options.Summary, "summary", false, "Do not display the duplicate list") |
|
558 |
flag.BoolVar(&options.Summary, "s", false, "See --summary") |
|
559 |
flag.BoolVar(&options.SkipPartial, "skip-partial", false, "Skip partial checksums") |
|
38 | 560 |
flag.BoolVar(&options.IgnoreEmpty, "no-empty", false, "Ignore empty files") |
561 |
flag.IntVar(&myLog.verbosity, "verbosity", 0, "Set verbosity level (1-6)") |
|
37 | 562 |
flag.IntVar(&myLog.verbosity, "vl", 0, "See verbosity") |
38 | 563 |
timings := flag.Bool("timings", false, "Show detailed log timings") |
37 | 564 |
|
565 |
flag.Parse() |
|
566 |
||
567 |
// Set verbosity: --verbose=true == --verbosity=1 |
|
568 |
if myLog.verbosity > 0 { |
|
569 |
verbose = true |
|
570 |
} else if verbose == true { |
|
571 |
myLog.verbosity = 1 |
|
572 |
} |
|
573 |
||
574 |
if len(flag.Args()) == 0 { |
|
575 |
// TODO: more helpful usage statement |
|
576 |
myLog.Println(-1, "Usage:", os.Args[0], |
|
577 |
"[options] base_directory|file...") |
|
578 |
os.Exit(0) |
|
579 |
} |
|
580 |
||
581 |
// Change log format for benchmarking |
|
582 |
if *timings { |
|
583 |
myLog.SetBenchFlags() |
|
584 |
} |
|
585 |
||
586 |
results, err := duf(flag.Args(), options) |
|
587 |
if err != nil { |
|
588 |
myLog.Fatal("ERROR: " + err.Error()) |
|
589 |
} |
|
590 |
||
36 | 591 |
// Output the results |
37 | 592 |
displayResults(results, options.OutToJSON, options.Summary) |
0
a5642cd03cef
Goduf - initial version-controlled revision
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
593 |
} |