mirror of
https://github.com/JaCoB1123/dupe-finder.git
synced 2025-05-18 06:01:56 +02:00
Parallel
This commit is contained in:
parent
8007b5686d
commit
a3fa3d4e7c
124
filesmap.go
124
filesmap.go
@ -1,65 +1,91 @@
|
||||
package main
|
||||
|
||||
import "os"
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// FilesMap is a struct for listing files by Size and Hash to search for duplicates
|
||||
type FilesMap struct {
|
||||
FilesBySize map[int64]map[string][]string
|
||||
}
|
||||
FilesBySize map[int64][]string
|
||||
|
||||
// Add a file to the Map and calculate hash on demand
|
||||
func (fm *FilesMap) Add(path string, info os.FileInfo) error {
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
FilesByHash map[string][]string
|
||||
|
||||
filesByHash := fm.FilesBySize[info.Size()]
|
||||
FilesHashing chan fileEntry
|
||||
|
||||
// first file with same size
|
||||
// => create new map for size
|
||||
if filesByHash == nil {
|
||||
filesByHash = map[string][]string{}
|
||||
fm.FilesBySize[info.Size()] = filesByHash
|
||||
filesByHash[""] = []string{path}
|
||||
return nil
|
||||
}
|
||||
FilesIncoming chan fileEntry
|
||||
|
||||
// second file with same size
|
||||
// => calculate hashes for all entries
|
||||
if _, hasEmptyHash := filesByHash[""]; hasEmptyHash {
|
||||
err := appendByFileHash(filesByHash, fileInfo)
|
||||
err2 := appendByFileHash(filesByHash, filesByHash[""][0])
|
||||
FilesHashed chan fileEntry
|
||||
|
||||
delete(filesByHash, "")
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return err2
|
||||
}
|
||||
|
||||
// for later files always append by hash
|
||||
return appendByFileHash(filesByHash, fileInfo)
|
||||
}
|
||||
|
||||
func appendByFileHash(filesByHash map[string][]string, path string) error {
|
||||
hash, err := calculateHash(path)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, ok := filesByHash[hash]; ok {
|
||||
filesByHash[hash] = append(filesByHash[hash], path)
|
||||
} else {
|
||||
filesByHash[hash] = []string{path}
|
||||
}
|
||||
return nil
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
func newFilesMap() *FilesMap {
|
||||
return &FilesMap{
|
||||
FilesBySize: map[int64]map[string][]string{},
|
||||
FilesBySize: map[int64][]string{},
|
||||
FilesByHash: map[string][]string{},
|
||||
FilesHashed: make(chan fileEntry),
|
||||
FilesIncoming: make(chan fileEntry),
|
||||
FilesHashing: make(chan fileEntry),
|
||||
}
|
||||
}
|
||||
|
||||
func (fm *FilesMap) IncomingWorker() {
|
||||
for file := range fm.FilesIncoming {
|
||||
if *verbose {
|
||||
fmt.Println("Incoming", file.path)
|
||||
}
|
||||
|
||||
files, ok := fm.FilesBySize[file.size]
|
||||
if !ok {
|
||||
files = []string{file.path}
|
||||
fm.FilesBySize[file.size] = files
|
||||
continue
|
||||
}
|
||||
|
||||
if len(files) == 1 {
|
||||
fm.FilesHashing <- fileEntry{files[0], file.size, ""}
|
||||
}
|
||||
|
||||
fm.FilesHashing <- file
|
||||
}
|
||||
close(fm.FilesHashing)
|
||||
}
|
||||
|
||||
func (fm *FilesMap) HashingWorker() {
|
||||
for file := range fm.FilesHashing {
|
||||
if *verbose {
|
||||
fmt.Println("Hashing", file.path)
|
||||
}
|
||||
|
||||
hash, err := calculateHash(file.path)
|
||||
|
||||
if err != nil {
|
||||
log.Printf("Error calculating Hash for %s: %v\n", file, err)
|
||||
continue
|
||||
}
|
||||
|
||||
file.hash = hash
|
||||
fm.FilesHashed <- file
|
||||
}
|
||||
close(fm.FilesHashed)
|
||||
}
|
||||
|
||||
func (fm *FilesMap) HashedWorker(done chan bool) {
|
||||
for file := range fm.FilesHashed {
|
||||
if *verbose {
|
||||
fmt.Println("Finishing", file.path)
|
||||
}
|
||||
|
||||
fm.lock.Lock()
|
||||
if _, ok := fm.FilesByHash[file.hash]; ok {
|
||||
fm.FilesByHash[file.hash] = append(fm.FilesByHash[file.hash], file.path)
|
||||
} else {
|
||||
fm.FilesByHash[file.hash] = []string{file.path}
|
||||
}
|
||||
fm.lock.Unlock()
|
||||
}
|
||||
|
||||
done <- true
|
||||
}
|
||||
|
48
main.go
48
main.go
@ -29,32 +29,45 @@ func main() {
|
||||
|
||||
filesMap := newFilesMap()
|
||||
if *fromFile != "" {
|
||||
fmt.Println("Loading file", *fromFile)
|
||||
|
||||
byteValue, _ := ioutil.ReadFile(*fromFile)
|
||||
err := json.Unmarshal(byteValue, &filesMap.FilesBySize)
|
||||
err := json.Unmarshal(byteValue, &filesMap.FilesByHash)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
} else {
|
||||
done := make(chan bool)
|
||||
//for i := 0; i < runtime.GOMAXPROCS(0); i++ {
|
||||
go filesMap.HashingWorker()
|
||||
//}
|
||||
|
||||
go filesMap.IncomingWorker()
|
||||
|
||||
go filesMap.HashedWorker(done)
|
||||
|
||||
for _, path := range flag.Args() {
|
||||
filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
||||
filesMap.Add(path, info)
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
filesMap.FilesIncoming <- fileEntry{path, info.Size(), ""}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
close(filesMap.FilesIncoming)
|
||||
<-done
|
||||
}
|
||||
|
||||
if *toFile != "" && *fromFile == "" {
|
||||
json, _ := json.MarshalIndent(filesMap.FilesBySize, "", " ")
|
||||
json, _ := json.MarshalIndent(filesMap.FilesByHash, "", " ")
|
||||
ioutil.WriteFile(*toFile, json, 644)
|
||||
}
|
||||
|
||||
if *deleteDupesIn != "" {
|
||||
deleteIn := filepath.Clean(*deleteDupesIn)
|
||||
for size := range filesMap.FilesBySize {
|
||||
for hash := range filesMap.FilesBySize[size] {
|
||||
duplicateFiles := filesMap.FilesBySize[size][hash]
|
||||
for hash := range filesMap.FilesByHash {
|
||||
duplicateFiles := filesMap.FilesByHash[hash]
|
||||
if len(duplicateFiles) <= 1 {
|
||||
continue
|
||||
}
|
||||
@ -68,12 +81,10 @@ func main() {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if *promptForDelete {
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
for size := range filesMap.FilesBySize {
|
||||
for hash := range filesMap.FilesBySize[size] {
|
||||
duplicateFiles := filesMap.FilesBySize[size][hash]
|
||||
for hash := range filesMap.FilesByHash {
|
||||
duplicateFiles := filesMap.FilesByHash[hash]
|
||||
if len(duplicateFiles) <= 1 {
|
||||
continue
|
||||
}
|
||||
@ -105,14 +116,12 @@ func main() {
|
||||
if *force {
|
||||
remove(file)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for size := range filesMap.FilesBySize {
|
||||
for hash := range filesMap.FilesBySize[size] {
|
||||
duplicateFiles := filesMap.FilesBySize[size][hash]
|
||||
for hash := range filesMap.FilesByHash {
|
||||
duplicateFiles := filesMap.FilesByHash[hash]
|
||||
if len(duplicateFiles) <= 1 {
|
||||
continue
|
||||
}
|
||||
@ -123,7 +132,6 @@ func main() {
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func printConfiguration() {
|
||||
@ -139,3 +147,9 @@ func printConfiguration() {
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
type fileEntry struct {
|
||||
path string
|
||||
size int64
|
||||
hash string
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user