goduf.go
changeset 10 1ee01b135e0e
parent 9 5b58342459eb
child 12 15e3580cfb8d
equal deleted inserted replaced
9:5b58342459eb 10:1ee01b135e0e
   270 
   270 
   271 func (fileList FileObjList) computeSheduledChecksums() {
   271 func (fileList FileObjList) computeSheduledChecksums() {
   272 	// Sort the list for better efficiency
   272 	// Sort the list for better efficiency
   273 	sort.Sort(ByInode(fileList))
   273 	sort.Sort(ByInode(fileList))
   274 
   274 
   275 	myLog.Printf(6, "  . will compute %d checksums\n", len(fileList))
   275 	//myLog.Printf(6, "  . will compute %d checksums\n", len(fileList))
   276 
   276 
   277 	// Compute checksums
   277 	// Compute checksums
   278 	for _, fo := range fileList {
   278 	for _, fo := range fileList {
   279 		if err := fo.Sum(fo.needHash); err != nil {
   279 		if err := fo.Sum(fo.needHash); err != nil {
   280 			myLog.Println(0, "Error:", err)
   280 			myLog.Println(0, "Error:", err)
   281 		}
   281 		}
   282 	}
   282 	}
   283 }
   283 }
       
   284 func computeSheduledChecksums(fileLists ...[]FileObjList) {
       
   285 	var bigFileList FileObjList
       
   286 	// Merge the lists of FileObjList lists and create a unique list
       
   287 	// of file objects.
       
   288 	for _, foll := range fileLists {
       
   289 		for _, fol := range foll {
       
   290 			bigFileList = append(bigFileList, fol...)
       
   291 		}
       
   292 	}
       
   293 
       
   294 	// Sort the list for better efficiency
       
   295 	sort.Sort(ByInode(bigFileList))
       
   296 
       
   297 	// Compute checksums
       
   298 	for _, fo := range bigFileList {
       
   299 		if err := fo.Sum(fo.needHash); err != nil {
       
   300 			myLog.Println(0, "Error:", err)
       
   301 		}
       
   302 	}
       
   303 }
       
   304 
   284 
   305 
   285 func (fileList FileObjList) scheduleChecksum(sType sumType) {
   306 func (fileList FileObjList) scheduleChecksum(sType sumType) {
   286 	for _, fo := range fileList {
   307 	for _, fo := range fileList {
   287 		fo.needHash = sType
   308 		fo.needHash = sType
   288 	}
   309 	}
   318 			// TODO: sort by increasing size
   339 			// TODO: sort by increasing size
   319 			myLog.Printf(5, "  . found %d new duplicates\n", len(l))
   340 			myLog.Printf(5, "  . found %d new duplicates\n", len(l))
   320 		}
   341 		}
   321 	}
   342 	}
   322 	if sType == partialChecksum && len(scheduleFull) > 0 {
   343 	if sType == partialChecksum && len(scheduleFull) > 0 {
   323 		var csList FileObjList
   344 		computeSheduledChecksums(scheduleFull)
   324 		for _, fol := range scheduleFull {
       
   325 			csList = append(csList, fol...)
       
   326 		}
       
   327 		myLog.Printf(6, "  .. findDupesChecksums: computing %d "+
       
   328 			"full checksums\n", len(csList)) // DBG
       
   329 		csList.computeSheduledChecksums()
       
   330 		for _, l := range scheduleFull {
   345 		for _, l := range scheduleFull {
   331 			r := l.findDupesChecksums(fullChecksum)
   346 			r := l.findDupesChecksums(fullChecksum)
   332 			dupeList = append(dupeList, r...)
   347 			dupeList = append(dupeList, r...)
   333 		}
   348 		}
   334 	}
   349 	}
   351 			sizeGroup.files.scheduleChecksum(fullChecksum)
   366 			sizeGroup.files.scheduleChecksum(fullChecksum)
   352 			scheduleFull = append(scheduleFull, sizeGroup.files)
   367 			scheduleFull = append(scheduleFull, sizeGroup.files)
   353 		}
   368 		}
   354 	}
   369 	}
   355 
   370 
   356 	var csList FileObjList
   371 	computeSheduledChecksums(schedulePartial, scheduleFull)
   357 	for _, fol := range schedulePartial {
       
   358 		csList = append(csList, fol...)
       
   359 	}
       
   360 	for _, fol := range scheduleFull {
       
   361 		csList = append(csList, fol...)
       
   362 	}
       
   363 	myLog.Printf(6, "  .. findDupes: computing %d misc checksums\n",
       
   364 		len(csList)) // DBG
       
   365 	csList.computeSheduledChecksums()
       
   366 
   372 
   367 	for _, l := range schedulePartial {
   373 	for _, l := range schedulePartial {
   368 		r := l.findDupesChecksums(partialChecksum)
   374 		r := l.findDupesChecksums(partialChecksum)
   369 		dupeList = append(dupeList, r...)
   375 		dupeList = append(dupeList, r...)
   370 	}
   376 	}
   400 			delete(data.sizeGroups, s)
   406 			delete(data.sizeGroups, s)
   401 			uniqueSizeCount++
   407 			uniqueSizeCount++
   402 			continue
   408 			continue
   403 		}
   409 		}
   404 
   410 
       
   411 		// We can't look for hard links if the O.S. does not support
       
   412 		// them...
       
   413 		if !OSHasInodes() {
       
   414 			continue
       
   415 		}
       
   416 
   405 		var hardlinksFound bool
   417 		var hardlinksFound bool
   406 
   418 
   407 		// Check for hardlinks
   419 		// Check for hard links
   408 		// Remove unique dev/inodes
   420 		// Remove unique dev/inodes
   409 		// Instead of this loop, another way would be to use the field
   421 		// Instead of this loop, another way would be to use the field
   410 		// "Unique" of the fileObj to mark them to be discarded
   422 		// "Unique" of the fileObj to mark them to be discarded
   411 		// and remove them all at the end.
   423 		// and remove them all at the end.
   412 		for {
   424 		for {
   413 			if !OSHasInodes() {
       
   414 				break
       
   415 			}
       
   416 			var hardLinkIndex int
   425 			var hardLinkIndex int
   417 			fo := sizeGroup.files[0]
   426 			fo := sizeGroup.files[0]
   418 			prevDev, prevIno := GetDevIno(fo)
   427 			prevDev, prevIno := GetDevIno(fo)
   419 
   428 
   420 			for i, fo := range sizeGroup.files[1:] {
   429 			for i, fo := range sizeGroup.files[1:] {