Extract filesmap to own file

This commit is contained in:
Jan Bader 2020-11-21 22:14:42 +01:00
parent 8a9bcbf62e
commit 87c8a6e817
3 changed files with 85 additions and 82 deletions

18
file.go
View File

@ -1,6 +1,9 @@
package main
import (
"crypto/sha256"
"encoding/base64"
"io"
"os"
"path/filepath"
"strconv"
@ -37,3 +40,18 @@ func moveButDontOvewrite(path string, targetPath string) {
num++
}
}
func calculateHash(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return "", err
}
return base64.RawStdEncoding.EncodeToString(h.Sum(nil)), nil
}

67
filesmap.go Normal file
View File

@ -0,0 +1,67 @@
package main
import "os"
// FilesMap is a struct for listing files by Size and Hash to search for duplicates
type FilesMap struct {
FilesBySize map[int64]map[string][]string
}
// Add a file to the Map and calculate hash on demand
func (fm *FilesMap) Add(path string, info os.FileInfo) error {
if info.IsDir() {
return nil
}
fileInfo := path
filesByHash := fm.FilesBySize[info.Size()]
// first file with same size
// => create new map for size
if filesByHash == nil {
filesByHash = map[string][]string{}
fm.FilesBySize[info.Size()] = filesByHash
filesByHash[""] = []string{fileInfo}
return nil
}
// second file with same size
// => calculate hashes for all entries
if _, hasEmptyHash := filesByHash[""]; hasEmptyHash {
err := appendByFileHash(filesByHash, fileInfo)
err2 := appendByFileHash(filesByHash, filesByHash[""][0])
delete(filesByHash, "")
if err != nil {
return err
}
return err2
}
// for later files always append by hash
return appendByFileHash(filesByHash, fileInfo)
}
func appendByFileHash(filesByHash map[string][]string, fileInfo string) error {
hash, err := calculateHash(fileInfo)
if err != nil {
return err
}
if _, ok := filesByHash[hash]; ok {
filesByHash[hash] = append(filesByHash[hash], fileInfo)
} else {
filesByHash[hash] = []string{fileInfo}
}
return nil
}
func newFilesMap() *FilesMap {
return &FilesMap{
FilesBySize: map[int64]map[string][]string{},
}
}

82
main.go
View File

@ -2,12 +2,9 @@ package main
import (
"bufio"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
@ -142,82 +139,3 @@ func printConfiguration() {
fmt.Println()
fmt.Println()
}
// FilesMap is a struct for listing files by Size and Hash to search for duplicates
type FilesMap struct {
FilesBySize map[int64]map[string][]string
}
// Add a file to the Map and calculate hash on demand
func (fm *FilesMap) Add(path string, info os.FileInfo) error {
if info.IsDir() {
return nil
}
fileInfo := path
filesByHash := fm.FilesBySize[info.Size()]
// first file with same size
// => create new map for size
if filesByHash == nil {
filesByHash = map[string][]string{}
fm.FilesBySize[info.Size()] = filesByHash
filesByHash[""] = []string{fileInfo}
return nil
}
// second file with same size
// => calculate hashes for all entries
if _, hasEmptyHash := filesByHash[""]; hasEmptyHash {
err := appendByFileHash(filesByHash, fileInfo)
err2 := appendByFileHash(filesByHash, filesByHash[""][0])
delete(filesByHash, "")
if err != nil {
return err
}
return err2
}
// for later files always append by hash
return appendByFileHash(filesByHash, fileInfo)
}
func appendByFileHash(filesByHash map[string][]string, fileInfo string) error {
hash, err := calculateHash(fileInfo)
if err != nil {
return err
}
if _, ok := filesByHash[hash]; ok {
filesByHash[hash] = append(filesByHash[hash], fileInfo)
} else {
filesByHash[hash] = []string{fileInfo}
}
return nil
}
func newFilesMap() *FilesMap {
return &FilesMap{
FilesBySize: map[int64]map[string][]string{},
}
}
func calculateHash(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return "", err
}
return base64.RawStdEncoding.EncodeToString(h.Sum(nil)), nil
}