goduf.go
changeset 22 46681d21157a
parent 21 dee0e0c1ad10
child 23 9ce0f2e2a33f
equal deleted inserted replaced
21:dee0e0c1ad10 22:46681d21157a
   265 	// Compute checksums
   265 	// Compute checksums
   266 	for _, fo := range bigFileList {
   266 	for _, fo := range bigFileList {
   267 		if err := fo.Sum(fo.needHash); err != nil {
   267 		if err := fo.Sum(fo.needHash); err != nil {
   268 			myLog.Println(0, "Error:", err)
   268 			myLog.Println(0, "Error:", err)
   269 		}
   269 		}
       
   270 		fo.needHash = noChecksum
   270 	}
   271 	}
   271 }
   272 }
   272 
   273 
   273 func (fileList FileObjList) scheduleChecksum(sType sumType) {
   274 func (fileList FileObjList) scheduleChecksum(sType sumType) {
   274 	for _, fo := range fileList {
   275 	for _, fo := range fileList {
   276 	}
   277 	}
   277 }
   278 }
   278 
   279 
   279 // findDupesChecksums splits the fileObj list into several lists with the
   280 // findDupesChecksums splits the fileObj list into several lists with the
   280 // same sType hash.
   281 // same sType hash.
   281 func (fileList FileObjList) findDupesChecksums(sType sumType) foListList {
   282 func (fileList FileObjList) findDupesChecksums(sType sumType, dryRun bool) foListList {
   282 	var dupeList foListList
   283 	var dupeList foListList
   283 	var scheduleFull foListList
   284 	var scheduleFull foListList
   284 	hashes := make(map[string]FileObjList)
   285 	hashes := make(map[string]FileObjList)
   285 
   286 
   286 	// Sort the list for better efficiency
   287 	// Sort the list for better efficiency
   287 	sort.Sort(ByInode(fileList))
   288 	sort.Sort(ByInode(fileList))
   288 
   289 
       
   290 	if sType == fullChecksum && dryRun {
       
   291 		fileList.scheduleChecksum(fullChecksum)
       
   292 		return append(dupeList, fileList)
       
   293 	}
   289 	// Compute checksums
   294 	// Compute checksums
   290 	for _, fo := range fileList {
   295 	for _, fo := range fileList {
   291 		hash, err := fo.checksum(sType)
   296 		hash, err := fo.checksum(sType)
   292 		if err != nil {
   297 		if err != nil {
   293 			myLog.Println(0, "Error:", err)
   298 			myLog.Println(0, "Error:", err)
   307 			dupeList = append(dupeList, l)
   312 			dupeList = append(dupeList, l)
   308 			myLog.Printf(5, "  . found %d new duplicates\n", len(l))
   313 			myLog.Printf(5, "  . found %d new duplicates\n", len(l))
   309 		}
   314 		}
   310 	}
   315 	}
   311 	if sType == partialChecksum && len(scheduleFull) > 0 {
   316 	if sType == partialChecksum && len(scheduleFull) > 0 {
   312 		computeSheduledChecksums(scheduleFull)
   317 		//computeSheduledChecksums(scheduleFull)
   313 		for _, l := range scheduleFull {
   318 		for _, l := range scheduleFull {
   314 			r := l.findDupesChecksums(fullChecksum)
   319 			r := l.findDupesChecksums(fullChecksum, dryRun)
   315 			dupeList = append(dupeList, r...)
   320 			dupeList = append(dupeList, r...)
       
   321 		}
       
   322 		if dryRun {
       
   323 			return scheduleFull
   316 		}
   324 		}
   317 	}
   325 	}
   318 
   326 
   319 	return dupeList
   327 	return dupeList
   320 }
   328 }
   321 
   329 
   322 // findDupes() uses checksums to find file duplicates
   330 // findDupes() uses checksums to find file duplicates
   323 func (data *dataT) findDupes(skipPartial bool) foListList {
   331 func (data *dataT) findDupes(skipPartial bool) foListList {
   324 	var dupeList foListList
   332 	var dupeList foListList
   325 	var schedulePartial foListList
   333 	var schedulePartial foListList
       
   334 	var schedulePartial2 foListList
   326 	var scheduleFull foListList
   335 	var scheduleFull foListList
   327 
   336 
   328 	for size, sgListP := range data.sizeGroups {
   337 	for size, sgListP := range data.sizeGroups {
   329 		// We skip partial checksums for small files or if requested
   338 		// We skip partial checksums for small files or if requested
   330 		if size > minSizePartialChecksum && !skipPartial {
   339 		if size > minSizePartialChecksum && !skipPartial {
   337 	}
   346 	}
   338 
   347 
   339 	computeSheduledChecksums(schedulePartial, scheduleFull)
   348 	computeSheduledChecksums(schedulePartial, scheduleFull)
   340 
   349 
   341 	for _, l := range schedulePartial {
   350 	for _, l := range schedulePartial {
   342 		r := l.findDupesChecksums(partialChecksum)
   351 		r := l.findDupesChecksums(partialChecksum, true) // dry-run
       
   352 		schedulePartial2 = append(schedulePartial2, r...)
       
   353 	}
       
   354 	computeSheduledChecksums(schedulePartial2)
       
   355 	for _, l := range schedulePartial {
       
   356 		r := l.findDupesChecksums(partialChecksum, false)
   343 		dupeList = append(dupeList, r...)
   357 		dupeList = append(dupeList, r...)
   344 	}
   358 	}
   345 	for _, l := range scheduleFull {
   359 	for _, l := range scheduleFull {
   346 		r := l.findDupesChecksums(fullChecksum)
   360 		r := l.findDupesChecksums(fullChecksum, false)
   347 		dupeList = append(dupeList, r...)
   361 		dupeList = append(dupeList, r...)
   348 	}
   362 	}
   349 	return dupeList
   363 	return dupeList
   350 }
   364 }
   351 
   365