goduf.go
changeset 45 ea6a9ba7a3c8
parent 44 af90d9396ef1
equal deleted inserted replaced
44:af90d9396ef1 45:ea6a9ba7a3c8
     1 /*
     1 /*
     2  * Copyright (C) 2014-2018 Mikael Berthe <mikael@lilotux.net>
     2  * Copyright (C) 2014-2022 Mikael Berthe <mikael@lilotux.net>
     3  *
     3  *
     4  * This program is free software; you can redistribute it and/or modify
     4  * This program is free software; you can redistribute it and/or modify
     5  * it under the terms of the GNU General Public License as published by
     5  * it under the terms of the GNU General Public License as published by
     6  * the Free Software Foundation; either version 2 of the License, or (at
     6  * the Free Software Foundation; either version 2 of the License, or (at
     7  * your option) any later version.
     7  * your option) any later version.
    71 	TotalSizeHuman         string      `json:"total_size_human"`          // Same, human-readable
    71 	TotalSizeHuman         string      `json:"total_size_human"`          // Same, human-readable
    72 }
    72 }
    73 
    73 
    74 // ResultSet contains a group of identical duplicate files
    74 // ResultSet contains a group of identical duplicate files
    75 type ResultSet struct {
    75 type ResultSet struct {
    76 	FileSize uint64   `json:"file_size"` // Size of each item
    76 	FileSize uint64              `json:"file_size"`       // Size of each item
    77 	Paths    []string `json:"paths"`     // List of file paths
    77 	Paths    []string            `json:"paths"`           // List of file paths
       
    78 	Links    map[string][]string `json:"links,omitempty"` // Existing hard links
    78 }
    79 }
    79 
    80 
    80 type fileObj struct {
    81 type fileObj struct {
    81 	//Unique   bool
    82 	//Unique   bool
    82 	FilePath string
    83 	FilePath string
    94 	totalSize   uint64
    95 	totalSize   uint64
    95 	cmpt        uint
    96 	cmpt        uint
    96 	sizeGroups  map[int64]*FileObjList
    97 	sizeGroups  map[int64]*FileObjList
    97 	emptyFiles  FileObjList
    98 	emptyFiles  FileObjList
    98 	ignoreCount int
    99 	ignoreCount int
       
   100 	hardLinks   map[string][]string
    99 }
   101 }
   100 
   102 
   101 var data dataT
   103 var data dataT
   102 
   104 
   103 // Implement my own logger
   105 // Implement my own logger
   409 		// "Unique" of the fileObj to mark them to be discarded
   411 		// "Unique" of the fileObj to mark them to be discarded
   410 		// and remove them all at the end.
   412 		// and remove them all at the end.
   411 		// TODO: Should we also check for duplicate paths?
   413 		// TODO: Should we also check for duplicate paths?
   412 		for {
   414 		for {
   413 			type devinode struct{ dev, ino uint64 }
   415 			type devinode struct{ dev, ino uint64 }
   414 			devinodes := make(map[devinode]bool)
   416 			devinodes := make(map[devinode]string)
   415 			var hardLinkIndex int
   417 			var hardLinkIndex int
   416 
   418 
   417 			for i, fo := range *sgListP {
   419 			for i, fo := range *sgListP {
   418 				dev, ino := GetDevIno(fo)
   420 				dev, ino := GetDevIno(fo)
   419 				di := devinode{dev, ino}
   421 				di := devinode{dev, ino}
   420 				if _, hlink := devinodes[di]; hlink {
   422 				if primaryPath, ok := devinodes[di]; ok {
   421 					hardLinkIndex = i
   423 					hardLinkIndex = i
   422 					hardLinkCount++
   424 					hardLinkCount++
   423 					hardlinksFound = true
   425 					hardlinksFound = true
       
   426 					data.hardLinks[primaryPath] = append(data.hardLinks[primaryPath], fo.FilePath)
   424 					break
   427 					break
   425 				} else {
   428 				} else {
   426 					devinodes[di] = true
   429 					devinodes[di] = fo.FilePath
   427 				}
   430 				}
   428 			}
   431 			}
   429 
   432 
   430 			if hardLinkIndex == 0 {
   433 			if hardLinkIndex == 0 {
   431 				break
   434 				break
   455 		verbose = true
   458 		verbose = true
   456 	}
   459 	}
   457 
   460 
   458 	var results Results
   461 	var results Results
   459 	data.sizeGroups = make(map[int64]*FileObjList)
   462 	data.sizeGroups = make(map[int64]*FileObjList)
       
   463 	data.hardLinks = make(map[string][]string)
   460 
   464 
   461 	myLog.Println(1, "* Reading file metadata")
   465 	myLog.Println(1, "* Reading file metadata")
   462 
   466 
   463 	for _, root := range dirs {
   467 	for _, root := range dirs {
   464 		if err := filepath.Walk(root, visit); err != nil {
   468 		if err := filepath.Walk(root, visit); err != nil {
   521 		sort.Sort(byFilePathName(l))
   525 		sort.Sort(byFilePathName(l))
   522 	}
   526 	}
   523 	// Sort groups by increasing size (of the duplicated files)
   527 	// Sort groups by increasing size (of the duplicated files)
   524 	sort.Sort(byGroupFileSize(result))
   528 	sort.Sort(byGroupFileSize(result))
   525 
   529 
       
   530 	// Build the result duplicate sets
   526 	for _, l := range result {
   531 	for _, l := range result {
   527 		size := uint64(l[0].Size())
   532 		size := uint64(l[0].Size())
   528 		// We do not count the size of the 1st item
   533 		// We do not count the size of the 1st item
   529 		// so we get only duplicate size.
   534 		// so we get only duplicate size.
   530 		results.RedundantDataSizeBytes += size * uint64(len(l)-1)
   535 		results.RedundantDataSizeBytes += size * uint64(len(l)-1)
   531 		newSet := ResultSet{FileSize: size}
   536 		newSet := ResultSet{FileSize: size}
   532 		for _, f := range l {
   537 		for _, f := range l {
   533 			newSet.Paths = append(newSet.Paths, f.FilePath)
   538 			newSet.Paths = append(newSet.Paths, f.FilePath)
   534 			results.Duplicates++
   539 			results.Duplicates++
       
   540 			if len(data.hardLinks[f.FilePath]) > 0 {
       
   541 				if newSet.Links == nil {
       
   542 					newSet.Links = make(map[string][]string)
       
   543 				}
       
   544 				newSet.Links[f.FilePath] = data.hardLinks[f.FilePath]
       
   545 			}
   535 		}
   546 		}
   536 		results.Groups = append(results.Groups, newSet)
   547 		results.Groups = append(results.Groups, newSet)
   537 	}
   548 	}
   538 	results.NumberOfSets = uint(len(results.Groups))
   549 	results.NumberOfSets = uint(len(results.Groups))
   539 	results.RedundantDataSizeHuman = formatSize(results.RedundantDataSizeBytes, true)
   550 	results.RedundantDataSizeHuman = formatSize(results.RedundantDataSizeBytes, true)