goduf.go
changeset 14 ea4286b6c4b1
parent 13 4102e5551e1b
child 15 4e3a67dc70a0
equal deleted inserted replaced
13:4102e5551e1b 14:ea4286b6c4b1
    56 }
    56 }
    57 
    57 
    58 // FileObjList is only exported so that we can have a sort interface on inodes.
    58 // FileObjList is only exported so that we can have a sort interface on inodes.
    59 type FileObjList []*fileObj
    59 type FileObjList []*fileObj
    60 
    60 
    61 type sizeClass struct { // XXX still useful?
       
    62 	files    FileObjList
       
    63 }
       
    64 
       
    65 type dataT struct {
    61 type dataT struct {
    66 	totalSize   uint64
    62 	totalSize   uint64
    67 	cmpt        uint
    63 	cmpt        uint
    68 	sizeGroups  map[int64]*sizeClass
    64 	sizeGroups  map[int64]*FileObjList
    69 	emptyFiles  FileObjList
    65 	emptyFiles  FileObjList
    70 	ignoreCount int
    66 	ignoreCount int
    71 }
    67 }
    72 
    68 
    73 var data dataT
    69 var data dataT
   133 
   129 
   134 	data.cmpt++
   130 	data.cmpt++
   135 	data.totalSize += uint64(f.Size())
   131 	data.totalSize += uint64(f.Size())
   136 	fo := &fileObj{FilePath: path, FileInfo: f}
   132 	fo := &fileObj{FilePath: path, FileInfo: f}
   137 	if _, ok := data.sizeGroups[f.Size()]; !ok {
   133 	if _, ok := data.sizeGroups[f.Size()]; !ok {
   138 		data.sizeGroups[f.Size()] = &sizeClass{}
   134 		data.sizeGroups[f.Size()] = new(FileObjList)
   139 	}
   135 	}
   140 	data.sizeGroups[f.Size()].files =
   136 	*data.sizeGroups[f.Size()] = append(*data.sizeGroups[f.Size()], fo)
   141 		append(data.sizeGroups[f.Size()].files, fo)
       
   142 	return nil
   137 	return nil
   143 }
   138 }
   144 
   139 
   145 func (fo *fileObj) CheckSum() error {
   140 func (fo *fileObj) CheckSum() error {
   146 	file, err := os.Open(fo.FilePath)
   141 	file, err := os.Open(fo.FilePath)
   219 	if myLog.verbosity < 4 {
   214 	if myLog.verbosity < 4 {
   220 		return
   215 		return
   221 	}
   216 	}
   222 	var c1, c1b, c2 int
   217 	var c1, c1b, c2 int
   223 	var s1 string
   218 	var s1 string
   224 	for _, sc := range data.sizeGroups {
   219 	for _, scListP := range data.sizeGroups {
   225 		c1 += len(sc.files)
   220 		c1 += len(*scListP)
   226 		c2++
   221 		c2++
   227 	}
   222 	}
   228 	c1b = len(data.emptyFiles)
   223 	c1b = len(data.emptyFiles)
   229 	if c1b > 0 {
   224 	if c1b > 0 {
   230 		s1 = fmt.Sprintf("+%d", c1b)
   225 		s1 = fmt.Sprintf("+%d", c1b)
   344 func (data *dataT) findDupes(skipPartial bool) []FileObjList {
   339 func (data *dataT) findDupes(skipPartial bool) []FileObjList {
   345 	var dupeList []FileObjList
   340 	var dupeList []FileObjList
   346 	var schedulePartial []FileObjList
   341 	var schedulePartial []FileObjList
   347 	var scheduleFull []FileObjList
   342 	var scheduleFull []FileObjList
   348 
   343 
   349 	for size, sizeGroup := range data.sizeGroups {
   344 	for size, sgListP := range data.sizeGroups {
   350 		// We skip partial checksums for small files or if requested
   345 		// We skip partial checksums for small files or if requested
   351 		if size > minSizePartialChecksum && !skipPartial {
   346 		if size > minSizePartialChecksum && !skipPartial {
   352 			sizeGroup.files.scheduleChecksum(partialChecksum)
   347 			sgListP.scheduleChecksum(partialChecksum)
   353 			schedulePartial = append(schedulePartial, sizeGroup.files)
   348 			schedulePartial = append(schedulePartial, *sgListP)
   354 		} else {
   349 		} else {
   355 			sizeGroup.files.scheduleChecksum(fullChecksum)
   350 			sgListP.scheduleChecksum(fullChecksum)
   356 			scheduleFull = append(scheduleFull, sizeGroup.files)
   351 			scheduleFull = append(scheduleFull, *sgListP)
   357 		}
   352 		}
   358 	}
   353 	}
   359 
   354 
   360 	computeSheduledChecksums(schedulePartial, scheduleFull)
   355 	computeSheduledChecksums(schedulePartial, scheduleFull)
   361 
   356 
   370 	// TODO: sort by increasing size
   365 	// TODO: sort by increasing size
   371 	return dupeList
   366 	return dupeList
   372 }
   367 }
   373 
   368 
   374 func (data *dataT) dropEmptyFiles(ignoreEmpty bool) (emptyCount int) {
   369 func (data *dataT) dropEmptyFiles(ignoreEmpty bool) (emptyCount int) {
   375 	sc, ok := data.sizeGroups[0]
   370 	sgListP, ok := data.sizeGroups[0]
   376 	if ok == false {
   371 	if ok == false {
   377 		return // no empty files
   372 		return // no empty files
   378 	}
   373 	}
   379 	if !ignoreEmpty {
   374 	if !ignoreEmpty {
   380 		if len(sc.files) > 1 {
   375 		if len(*sgListP) > 1 {
   381 			data.emptyFiles = sc.files
   376 			data.emptyFiles = *sgListP
   382 		}
   377 		}
   383 		delete(data.sizeGroups, 0)
   378 		delete(data.sizeGroups, 0)
   384 		return
   379 		return
   385 	}
   380 	}
   386 	emptyCount = len(sc.files)
   381 	emptyCount = len(*sgListP)
   387 	delete(data.sizeGroups, 0)
   382 	delete(data.sizeGroups, 0)
   388 	return
   383 	return
   389 }
   384 }
   390 
   385 
   391 // initialCleanup() removes files with unique size as well as hard links
   386 // initialCleanup() removes files with unique size as well as hard links
   392 func (data *dataT) initialCleanup() (hardLinkCount, uniqueSizeCount int) {
   387 func (data *dataT) initialCleanup() (hardLinkCount, uniqueSizeCount int) {
   393 	for s, sizeGroup := range data.sizeGroups {
   388 	for s, sgListP := range data.sizeGroups {
   394 		if len(sizeGroup.files) < 2 {
   389 		if len(*sgListP) < 2 {
   395 			delete(data.sizeGroups, s)
   390 			delete(data.sizeGroups, s)
   396 			uniqueSizeCount++
   391 			uniqueSizeCount++
   397 			continue
   392 			continue
   398 		}
   393 		}
   399 
   394 
   413 		for {
   408 		for {
   414 			type devinode struct { dev, ino uint64 }
   409 			type devinode struct { dev, ino uint64 }
   415 			devinodes := make(map[devinode]bool)
   410 			devinodes := make(map[devinode]bool)
   416 			var hardLinkIndex int
   411 			var hardLinkIndex int
   417 
   412 
   418 			for i, fo := range sizeGroup.files {
   413 			for i, fo := range *sgListP {
   419 				dev, ino := GetDevIno(fo)
   414 				dev, ino := GetDevIno(fo)
   420 				di := devinode{ dev, ino}
   415 				di := devinode{ dev, ino}
   421 				if _, hlink := devinodes[di]; hlink {
   416 				if _, hlink := devinodes[di]; hlink {
   422 					hardLinkIndex = i
   417 					hardLinkIndex = i
   423 					hardLinkCount++
   418 					hardLinkCount++
   431 			if hardLinkIndex == 0 {
   426 			if hardLinkIndex == 0 {
   432 				break
   427 				break
   433 			}
   428 			}
   434 			i := hardLinkIndex
   429 			i := hardLinkIndex
   435 			// Remove hardink
   430 			// Remove hardink
   436 			copy(sizeGroup.files[i:], sizeGroup.files[i+1:])
   431 			copy((*sgListP)[i:], (*sgListP)[i+1:])
   437 			sizeGroup.files[len(sizeGroup.files)-1] = nil
   432 			(*sgListP)[len(*sgListP)-1] = nil
   438 			sizeGroup.files = sizeGroup.files[:len(sizeGroup.files)-1]
   433 			*sgListP = (*sgListP)[:len(*sgListP)-1]
   439 		}
   434 		}
   440 		// We have found hard links in this size group,
   435 		// We have found hard links in this size group,
   441 		// maybe we can remove it
   436 		// maybe we can remove it
   442 		if hardlinksFound {
   437 		if hardlinksFound {
   443 			if len(sizeGroup.files) < 2 {
   438 			if len(*sgListP) < 2 {
   444 				delete(data.sizeGroups, s)
   439 				delete(data.sizeGroups, s)
   445 				uniqueSizeCount++
   440 				uniqueSizeCount++
   446 				continue
   441 				continue
   447 			}
   442 			}
   448 		}
   443 		}
   511 
   506 
   512 	if *timings {
   507 	if *timings {
   513 		log.SetFlags(log.LstdFlags | log.Lmicroseconds)
   508 		log.SetFlags(log.LstdFlags | log.Lmicroseconds)
   514 	}
   509 	}
   515 
   510 
   516 	data.sizeGroups = make(map[int64]*sizeClass)
   511 	data.sizeGroups = make(map[int64]*FileObjList)
   517 	myLog.Println(1, "* Reading file metadata")
   512 	myLog.Println(1, "* Reading file metadata")
   518 
   513 
   519 	for _, root := range flag.Args() {
   514 	for _, root := range flag.Args() {
   520 		if err := filepath.Walk(root, visit); err != nil {
   515 		if err := filepath.Walk(root, visit); err != nil {
   521 			myLog.Printf(-1, "* Error: could not read file tree:\n")
   516 			myLog.Printf(-1, "* Error: could not read file tree:\n")