diff --git a/filesmap.go b/filesmap.go index 386fe00..13d0359 100644 --- a/filesmap.go +++ b/filesmap.go @@ -1,65 +1,91 @@ package main -import "os" +import ( + "fmt" + "log" + "sync" +) // FilesMap is a struct for listing files by Size and Hash to search for duplicates type FilesMap struct { - FilesBySize map[int64]map[string][]string -} + FilesBySize map[int64][]string -// Add a file to the Map and calculate hash on demand -func (fm *FilesMap) Add(path string, info os.FileInfo) error { - if info.IsDir() { - return nil - } + FilesByHash map[string][]string - filesByHash := fm.FilesBySize[info.Size()] + FilesHashing chan fileEntry - // first file with same size - // => create new map for size - if filesByHash == nil { - filesByHash = map[string][]string{} - fm.FilesBySize[info.Size()] = filesByHash - filesByHash[""] = []string{path} - return nil - } + FilesIncoming chan fileEntry - // second file with same size - // => calculate hashes for all entries - if _, hasEmptyHash := filesByHash[""]; hasEmptyHash { - err := appendByFileHash(filesByHash, fileInfo) - err2 := appendByFileHash(filesByHash, filesByHash[""][0]) + FilesHashed chan fileEntry - delete(filesByHash, "") - - if err != nil { - return err - } - - return err2 - } - - // for later files always append by hash - return appendByFileHash(filesByHash, fileInfo) -} - -func appendByFileHash(filesByHash map[string][]string, path string) error { - hash, err := calculateHash(path) - - if err != nil { - return err - } - - if _, ok := filesByHash[hash]; ok { - filesByHash[hash] = append(filesByHash[hash], path) - } else { - filesByHash[hash] = []string{path} - } - return nil + lock sync.Mutex } func newFilesMap() *FilesMap { return &FilesMap{ - FilesBySize: map[int64]map[string][]string{}, + FilesBySize: map[int64][]string{}, + FilesByHash: map[string][]string{}, + FilesHashed: make(chan fileEntry), + FilesIncoming: make(chan fileEntry), + FilesHashing: make(chan fileEntry), } } + +func (fm *FilesMap) IncomingWorker() { + for file := range fm.FilesIncoming { + if *verbose { + fmt.Println("Incoming", file.path) + } + + files, ok := fm.FilesBySize[file.size] + if !ok { + files = []string{file.path} + fm.FilesBySize[file.size] = files + continue + } + + if len(files) == 1 { + fm.FilesHashing <- fileEntry{files[0], file.size, ""} + } + + fm.FilesHashing <- file + } + close(fm.FilesHashing) +} + +func (fm *FilesMap) HashingWorker() { + for file := range fm.FilesHashing { + if *verbose { + fmt.Println("Hashing", file.path) + } + + hash, err := calculateHash(file.path) + + if err != nil { + log.Printf("Error calculating Hash for %s: %v\n", file, err) + continue + } + + file.hash = hash + fm.FilesHashed <- file + } + close(fm.FilesHashed) +} + +func (fm *FilesMap) HashedWorker(done chan bool) { + for file := range fm.FilesHashed { + if *verbose { + fmt.Println("Finishing", file.path) + } + + fm.lock.Lock() + if _, ok := fm.FilesByHash[file.hash]; ok { + fm.FilesByHash[file.hash] = append(fm.FilesByHash[file.hash], file.path) + } else { + fm.FilesByHash[file.hash] = []string{file.path} + } + fm.lock.Unlock() + } + + done <- true +} diff --git a/main.go b/main.go index 558365e..bef6647 100644 --- a/main.go +++ b/main.go @@ -29,99 +29,107 @@ func main() { filesMap := newFilesMap() if *fromFile != "" { - fmt.Println("Loading file", *fromFile) - byteValue, _ := ioutil.ReadFile(*fromFile) - err := json.Unmarshal(byteValue, &filesMap.FilesBySize) + err := json.Unmarshal(byteValue, &filesMap.FilesByHash) if err != nil { panic(err) } } else { + done := make(chan bool) + //for i := 0; i < runtime.GOMAXPROCS(0); i++ { + go filesMap.HashingWorker() + //} + + go filesMap.IncomingWorker() + + go filesMap.HashedWorker(done) + for _, path := range flag.Args() { filepath.Walk(path, func(path string, info os.FileInfo, err error) error { - filesMap.Add(path, info) + if info.IsDir() { + return nil + } + + filesMap.FilesIncoming <- fileEntry{path, info.Size(), ""} return nil }) } + + close(filesMap.FilesIncoming) + <-done } if *toFile != "" && *fromFile == "" { - json, _ := json.MarshalIndent(filesMap.FilesBySize, "", " ") + json, _ := json.MarshalIndent(filesMap.FilesByHash, "", " ") ioutil.WriteFile(*toFile, json, 644) } if *deleteDupesIn != "" { deleteIn := filepath.Clean(*deleteDupesIn) - for size := range filesMap.FilesBySize { - for hash := range filesMap.FilesBySize[size] { - duplicateFiles := filesMap.FilesBySize[size][hash] - if len(duplicateFiles) <= 1 { - continue - } + for hash := range filesMap.FilesByHash { + duplicateFiles := filesMap.FilesByHash[hash] + if len(duplicateFiles) <= 1 { + continue + } - for _, file := range duplicateFiles { - if strings.HasPrefix(filepath.Clean(file), deleteIn) { - fmt.Println("Would delete ", file) - if *force { - remove(file) - } + for _, file := range duplicateFiles { + if strings.HasPrefix(filepath.Clean(file), deleteIn) { + fmt.Println("Would delete ", file) + if *force { + remove(file) } } } } } else if *promptForDelete { reader := bufio.NewReader(os.Stdin) - for size := range filesMap.FilesBySize { - for hash := range filesMap.FilesBySize[size] { - duplicateFiles := filesMap.FilesBySize[size][hash] - if len(duplicateFiles) <= 1 { + for hash := range filesMap.FilesByHash { + duplicateFiles := filesMap.FilesByHash[hash] + if len(duplicateFiles) <= 1 { + continue + } + + fmt.Print("\033[H\033[2J") + for i, file := range duplicateFiles { + fmt.Println(i+1, file) + } + + fmt.Printf("Which file to keep? ") + input, err := reader.ReadString('\n') + if err != nil { + fmt.Println("Invalid input") + continue + } + + input = strings.TrimRight(input, "\n\r") + intInput, err := strconv.Atoi(input) + if err != nil || intInput > len(duplicateFiles) || intInput < 1 { + fmt.Println("Invalid input") + continue + } + + for i, file := range duplicateFiles { + if i+1 == intInput { continue } - fmt.Print("\033[H\033[2J") - for i, file := range duplicateFiles { - fmt.Println(i+1, file) - } - - fmt.Printf("Which file to keep? ") - input, err := reader.ReadString('\n') - if err != nil { - fmt.Println("Invalid input") - continue - } - - input = strings.TrimRight(input, "\n\r") - intInput, err := strconv.Atoi(input) - if err != nil || intInput > len(duplicateFiles) || intInput < 1 { - fmt.Println("Invalid input") - continue - } - - for i, file := range duplicateFiles { - if i+1 == intInput { - continue - } - - if *force { - remove(file) - } + if *force { + remove(file) } } } } else { - for size := range filesMap.FilesBySize { - for hash := range filesMap.FilesBySize[size] { - duplicateFiles := filesMap.FilesBySize[size][hash] - if len(duplicateFiles) <= 1 { - continue - } - - for _, file := range duplicateFiles { - fmt.Println(file) - } - fmt.Println() + for hash := range filesMap.FilesByHash { + duplicateFiles := filesMap.FilesByHash[hash] + if len(duplicateFiles) <= 1 { + continue } + + for _, file := range duplicateFiles { + fmt.Println(file) + } + fmt.Println() } } } @@ -139,3 +147,9 @@ func printConfiguration() { fmt.Println() fmt.Println() } + +type fileEntry struct { + path string + size int64 + hash string +}