diff --git a/file.go b/file.go index d637d08..aa58a21 100644 --- a/file.go +++ b/file.go @@ -1,6 +1,9 @@ package main import ( + "crypto/sha256" + "encoding/base64" + "io" "os" "path/filepath" "strconv" @@ -37,3 +40,18 @@ func moveButDontOvewrite(path string, targetPath string) { num++ } } + +func calculateHash(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + + return base64.RawStdEncoding.EncodeToString(h.Sum(nil)), nil +} diff --git a/filesmap.go b/filesmap.go new file mode 100644 index 0000000..fb96a92 --- /dev/null +++ b/filesmap.go @@ -0,0 +1,67 @@ +package main + +import "os" + +// FilesMap is a struct for listing files by Size and Hash to search for duplicates +type FilesMap struct { + FilesBySize map[int64]map[string][]string +} + +// Add a file to the Map and calculate hash on demand +func (fm *FilesMap) Add(path string, info os.FileInfo) error { + if info.IsDir() { + return nil + } + + fileInfo := path + + filesByHash := fm.FilesBySize[info.Size()] + + // first file with same size + // => create new map for size + if filesByHash == nil { + filesByHash = map[string][]string{} + fm.FilesBySize[info.Size()] = filesByHash + filesByHash[""] = []string{fileInfo} + return nil + } + + // second file with same size + // => calculate hashes for all entries + if _, hasEmptyHash := filesByHash[""]; hasEmptyHash { + err := appendByFileHash(filesByHash, fileInfo) + err2 := appendByFileHash(filesByHash, filesByHash[""][0]) + + delete(filesByHash, "") + + if err != nil { + return err + } + + return err2 + } + + // for later files always append by hash + return appendByFileHash(filesByHash, fileInfo) +} + +func appendByFileHash(filesByHash map[string][]string, fileInfo string) error { + hash, err := calculateHash(fileInfo) + + if err != nil { + return err + } + + if _, ok := filesByHash[hash]; ok { + filesByHash[hash] = append(filesByHash[hash], fileInfo) + } else { + filesByHash[hash] = []string{fileInfo} + } + return nil +} + +func newFilesMap() *FilesMap { + return &FilesMap{ + FilesBySize: map[int64]map[string][]string{}, + } +} diff --git a/main.go b/main.go index 8e90677..558365e 100644 --- a/main.go +++ b/main.go @@ -2,12 +2,9 @@ package main import ( "bufio" - "crypto/sha256" - "encoding/base64" "encoding/json" "flag" "fmt" - "io" "io/ioutil" "os" "path/filepath" @@ -142,82 +139,3 @@ func printConfiguration() { fmt.Println() fmt.Println() } - -// FilesMap is a struct for listing files by Size and Hash to search for duplicates -type FilesMap struct { - FilesBySize map[int64]map[string][]string -} - -// Add a file to the Map and calculate hash on demand -func (fm *FilesMap) Add(path string, info os.FileInfo) error { - if info.IsDir() { - return nil - } - - fileInfo := path - - filesByHash := fm.FilesBySize[info.Size()] - - // first file with same size - // => create new map for size - if filesByHash == nil { - filesByHash = map[string][]string{} - fm.FilesBySize[info.Size()] = filesByHash - filesByHash[""] = []string{fileInfo} - return nil - } - - // second file with same size - // => calculate hashes for all entries - if _, hasEmptyHash := filesByHash[""]; hasEmptyHash { - err := appendByFileHash(filesByHash, fileInfo) - err2 := appendByFileHash(filesByHash, filesByHash[""][0]) - - delete(filesByHash, "") - - if err != nil { - return err - } - - return err2 - } - - // for later files always append by hash - return appendByFileHash(filesByHash, fileInfo) -} - -func appendByFileHash(filesByHash map[string][]string, fileInfo string) error { - hash, err := calculateHash(fileInfo) - - if err != nil { - return err - } - - if _, ok := filesByHash[hash]; ok { - filesByHash[hash] = append(filesByHash[hash], fileInfo) - } else { - filesByHash[hash] = []string{fileInfo} - } - return nil -} - -func newFilesMap() *FilesMap { - return &FilesMap{ - FilesBySize: map[int64]map[string][]string{}, - } -} - -func calculateHash(path string) (string, error) { - f, err := os.Open(path) - if err != nil { - return "", err - } - defer f.Close() - - h := sha256.New() - if _, err := io.Copy(h, f); err != nil { - return "", err - } - - return base64.RawStdEncoding.EncodeToString(h.Sum(nil)), nil -}