This commit is contained in:
Jan Bader 2020-11-22 00:55:12 +01:00
parent 8007b5686d
commit a3fa3d4e7c
2 changed files with 148 additions and 108 deletions

View File

@ -1,65 +1,91 @@
package main
import "os"
import (
"fmt"
"log"
"sync"
)
// FilesMap is a struct for listing files by Size and Hash to search for duplicates
type FilesMap struct {
FilesBySize map[int64]map[string][]string
}
FilesBySize map[int64][]string
// Add a file to the Map and calculate hash on demand
func (fm *FilesMap) Add(path string, info os.FileInfo) error {
if info.IsDir() {
return nil
}
FilesByHash map[string][]string
filesByHash := fm.FilesBySize[info.Size()]
FilesHashing chan fileEntry
// first file with same size
// => create new map for size
if filesByHash == nil {
filesByHash = map[string][]string{}
fm.FilesBySize[info.Size()] = filesByHash
filesByHash[""] = []string{path}
return nil
}
FilesIncoming chan fileEntry
// second file with same size
// => calculate hashes for all entries
if _, hasEmptyHash := filesByHash[""]; hasEmptyHash {
err := appendByFileHash(filesByHash, fileInfo)
err2 := appendByFileHash(filesByHash, filesByHash[""][0])
FilesHashed chan fileEntry
delete(filesByHash, "")
if err != nil {
return err
}
return err2
}
// for later files always append by hash
return appendByFileHash(filesByHash, fileInfo)
}
func appendByFileHash(filesByHash map[string][]string, path string) error {
hash, err := calculateHash(path)
if err != nil {
return err
}
if _, ok := filesByHash[hash]; ok {
filesByHash[hash] = append(filesByHash[hash], path)
} else {
filesByHash[hash] = []string{path}
}
return nil
lock sync.Mutex
}
func newFilesMap() *FilesMap {
return &FilesMap{
FilesBySize: map[int64]map[string][]string{},
FilesBySize: map[int64][]string{},
FilesByHash: map[string][]string{},
FilesHashed: make(chan fileEntry),
FilesIncoming: make(chan fileEntry),
FilesHashing: make(chan fileEntry),
}
}
func (fm *FilesMap) IncomingWorker() {
for file := range fm.FilesIncoming {
if *verbose {
fmt.Println("Incoming", file.path)
}
files, ok := fm.FilesBySize[file.size]
if !ok {
files = []string{file.path}
fm.FilesBySize[file.size] = files
continue
}
if len(files) == 1 {
fm.FilesHashing <- fileEntry{files[0], file.size, ""}
}
fm.FilesHashing <- file
}
close(fm.FilesHashing)
}
func (fm *FilesMap) HashingWorker() {
for file := range fm.FilesHashing {
if *verbose {
fmt.Println("Hashing", file.path)
}
hash, err := calculateHash(file.path)
if err != nil {
log.Printf("Error calculating Hash for %s: %v\n", file, err)
continue
}
file.hash = hash
fm.FilesHashed <- file
}
close(fm.FilesHashed)
}
func (fm *FilesMap) HashedWorker(done chan bool) {
for file := range fm.FilesHashed {
if *verbose {
fmt.Println("Finishing", file.path)
}
fm.lock.Lock()
if _, ok := fm.FilesByHash[file.hash]; ok {
fm.FilesByHash[file.hash] = append(fm.FilesByHash[file.hash], file.path)
} else {
fm.FilesByHash[file.hash] = []string{file.path}
}
fm.lock.Unlock()
}
done <- true
}

48
main.go
View File

@ -29,32 +29,45 @@ func main() {
filesMap := newFilesMap()
if *fromFile != "" {
fmt.Println("Loading file", *fromFile)
byteValue, _ := ioutil.ReadFile(*fromFile)
err := json.Unmarshal(byteValue, &filesMap.FilesBySize)
err := json.Unmarshal(byteValue, &filesMap.FilesByHash)
if err != nil {
panic(err)
}
} else {
done := make(chan bool)
//for i := 0; i < runtime.GOMAXPROCS(0); i++ {
go filesMap.HashingWorker()
//}
go filesMap.IncomingWorker()
go filesMap.HashedWorker(done)
for _, path := range flag.Args() {
filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
filesMap.Add(path, info)
if info.IsDir() {
return nil
}
filesMap.FilesIncoming <- fileEntry{path, info.Size(), ""}
return nil
})
}
close(filesMap.FilesIncoming)
<-done
}
if *toFile != "" && *fromFile == "" {
json, _ := json.MarshalIndent(filesMap.FilesBySize, "", " ")
json, _ := json.MarshalIndent(filesMap.FilesByHash, "", " ")
ioutil.WriteFile(*toFile, json, 644)
}
if *deleteDupesIn != "" {
deleteIn := filepath.Clean(*deleteDupesIn)
for size := range filesMap.FilesBySize {
for hash := range filesMap.FilesBySize[size] {
duplicateFiles := filesMap.FilesBySize[size][hash]
for hash := range filesMap.FilesByHash {
duplicateFiles := filesMap.FilesByHash[hash]
if len(duplicateFiles) <= 1 {
continue
}
@ -68,12 +81,10 @@ func main() {
}
}
}
}
} else if *promptForDelete {
reader := bufio.NewReader(os.Stdin)
for size := range filesMap.FilesBySize {
for hash := range filesMap.FilesBySize[size] {
duplicateFiles := filesMap.FilesBySize[size][hash]
for hash := range filesMap.FilesByHash {
duplicateFiles := filesMap.FilesByHash[hash]
if len(duplicateFiles) <= 1 {
continue
}
@ -105,14 +116,12 @@ func main() {
if *force {
remove(file)
}
}
}
}
} else {
for size := range filesMap.FilesBySize {
for hash := range filesMap.FilesBySize[size] {
duplicateFiles := filesMap.FilesBySize[size][hash]
for hash := range filesMap.FilesByHash {
duplicateFiles := filesMap.FilesByHash[hash]
if len(duplicateFiles) <= 1 {
continue
}
@ -124,7 +133,6 @@ func main() {
}
}
}
}
func printConfiguration() {
fmt.Printf("fromFile: \"%v\"\n", *fromFile)
@ -139,3 +147,9 @@ func printConfiguration() {
fmt.Println()
fmt.Println()
}
type fileEntry struct {
path string
size int64
hash string
}