1 /* |
1 /* |
2 * Copyright (C) 2014-2018 Mikael Berthe <mikael@lilotux.net> |
2 * Copyright (C) 2014-2022 Mikael Berthe <mikael@lilotux.net> |
3 * |
3 * |
4 * This program is free software; you can redistribute it and/or modify |
4 * This program is free software; you can redistribute it and/or modify |
5 * it under the terms of the GNU General Public License as published by |
5 * it under the terms of the GNU General Public License as published by |
6 * the Free Software Foundation; either version 2 of the License, or (at |
6 * the Free Software Foundation; either version 2 of the License, or (at |
7 * your option) any later version. |
7 * your option) any later version. |
71 TotalSizeHuman string `json:"total_size_human"` // Same, human-readable |
71 TotalSizeHuman string `json:"total_size_human"` // Same, human-readable |
72 } |
72 } |
73 |
73 |
74 // ResultSet contains a group of identical duplicate files |
74 // ResultSet contains a group of identical duplicate files |
75 type ResultSet struct { |
75 type ResultSet struct { |
76 FileSize uint64 `json:"file_size"` // Size of each item |
76 FileSize uint64 `json:"file_size"` // Size of each item |
77 Paths []string `json:"paths"` // List of file paths |
77 Paths []string `json:"paths"` // List of file paths |
|
78 Links map[string][]string `json:"links,omitempty"` // Existing hard links |
78 } |
79 } |
79 |
80 |
80 type fileObj struct { |
81 type fileObj struct { |
81 //Unique bool |
82 //Unique bool |
82 FilePath string |
83 FilePath string |
409 // "Unique" of the fileObj to mark them to be discarded |
411 // "Unique" of the fileObj to mark them to be discarded |
410 // and remove them all at the end. |
412 // and remove them all at the end. |
411 // TODO: Should we also check for duplicate paths? |
413 // TODO: Should we also check for duplicate paths? |
412 for { |
414 for { |
413 type devinode struct{ dev, ino uint64 } |
415 type devinode struct{ dev, ino uint64 } |
414 devinodes := make(map[devinode]bool) |
416 devinodes := make(map[devinode]string) |
415 var hardLinkIndex int |
417 var hardLinkIndex int |
416 |
418 |
417 for i, fo := range *sgListP { |
419 for i, fo := range *sgListP { |
418 dev, ino := GetDevIno(fo) |
420 dev, ino := GetDevIno(fo) |
419 di := devinode{dev, ino} |
421 di := devinode{dev, ino} |
420 if _, hlink := devinodes[di]; hlink { |
422 if primaryPath, ok := devinodes[di]; ok { |
421 hardLinkIndex = i |
423 hardLinkIndex = i |
422 hardLinkCount++ |
424 hardLinkCount++ |
423 hardlinksFound = true |
425 hardlinksFound = true |
|
426 data.hardLinks[primaryPath] = append(data.hardLinks[primaryPath], fo.FilePath) |
424 break |
427 break |
425 } else { |
428 } else { |
426 devinodes[di] = true |
429 devinodes[di] = fo.FilePath |
427 } |
430 } |
428 } |
431 } |
429 |
432 |
430 if hardLinkIndex == 0 { |
433 if hardLinkIndex == 0 { |
431 break |
434 break |
455 verbose = true |
458 verbose = true |
456 } |
459 } |
457 |
460 |
458 var results Results |
461 var results Results |
459 data.sizeGroups = make(map[int64]*FileObjList) |
462 data.sizeGroups = make(map[int64]*FileObjList) |
|
463 data.hardLinks = make(map[string][]string) |
460 |
464 |
461 myLog.Println(1, "* Reading file metadata") |
465 myLog.Println(1, "* Reading file metadata") |
462 |
466 |
463 for _, root := range dirs { |
467 for _, root := range dirs { |
464 if err := filepath.Walk(root, visit); err != nil { |
468 if err := filepath.Walk(root, visit); err != nil { |
521 sort.Sort(byFilePathName(l)) |
525 sort.Sort(byFilePathName(l)) |
522 } |
526 } |
523 // Sort groups by increasing size (of the duplicated files) |
527 // Sort groups by increasing size (of the duplicated files) |
524 sort.Sort(byGroupFileSize(result)) |
528 sort.Sort(byGroupFileSize(result)) |
525 |
529 |
|
530 // Build the result duplicate sets |
526 for _, l := range result { |
531 for _, l := range result { |
527 size := uint64(l[0].Size()) |
532 size := uint64(l[0].Size()) |
528 // We do not count the size of the 1st item |
533 // We do not count the size of the 1st item |
529 // so we get only duplicate size. |
534 // so we get only duplicate size. |
530 results.RedundantDataSizeBytes += size * uint64(len(l)-1) |
535 results.RedundantDataSizeBytes += size * uint64(len(l)-1) |
531 newSet := ResultSet{FileSize: size} |
536 newSet := ResultSet{FileSize: size} |
532 for _, f := range l { |
537 for _, f := range l { |
533 newSet.Paths = append(newSet.Paths, f.FilePath) |
538 newSet.Paths = append(newSet.Paths, f.FilePath) |
534 results.Duplicates++ |
539 results.Duplicates++ |
|
540 if len(data.hardLinks[f.FilePath]) > 0 { |
|
541 if newSet.Links == nil { |
|
542 newSet.Links = make(map[string][]string) |
|
543 } |
|
544 newSet.Links[f.FilePath] = data.hardLinks[f.FilePath] |
|
545 } |
535 } |
546 } |
536 results.Groups = append(results.Groups, newSet) |
547 results.Groups = append(results.Groups, newSet) |
537 } |
548 } |
538 results.NumberOfSets = uint(len(results.Groups)) |
549 results.NumberOfSets = uint(len(results.Groups)) |
539 results.RedundantDataSizeHuman = formatSize(results.RedundantDataSizeBytes, true) |
550 results.RedundantDataSizeHuman = formatSize(results.RedundantDataSizeBytes, true) |