Only save path instead of adding size again

This commit is contained in:
Jan Bader 2020-11-21 20:47:43 +01:00
parent d8040e8451
commit ed15632238

79
main.go
View File

@ -5,6 +5,7 @@ import (
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"flag" "flag"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"os" "os"
@ -14,19 +15,25 @@ import (
func main() { func main() {
fromFile := flag.String("from-file", "", "Load results file from <path>") fromFile := flag.String("from-file", "", "Load results file from <path>")
toFile := flag.String("to-file", "", "Save results to <path>") toFile := flag.String("to-file", "", "Save results to <path>")
deleteDupesIn := flag.String("delete-dupes-in", "", "Delete duplicates if they are contained in <path>")
force := flag.Bool("force", false, "Actually delete files. Without this options, the files to be deleted are only printed")
flag.Parse() flag.Parse()
var filesMap filesMap fmt.Printf("fromFile: \"%v\"\n", *fromFile)
fmt.Printf("toFile: \"%v\"\n", *toFile)
fmt.Printf("deleteDupesIn: \"%v\"\n", *deleteDupesIn)
fmt.Printf("force: \"%v\"\n", *force)
filesMap := newFilesMap()
if *fromFile != "" { if *fromFile != "" {
fmt.Println("Loading file", *fromFile)
byteValue, _ := ioutil.ReadFile(*fromFile) byteValue, _ := ioutil.ReadFile(*fromFile)
err := json.Unmarshal(byteValue, filesMap)
// we unmarshal our byteArray which contains our if err != nil {
// jsonFile's content into 'users' which we defined above panic(err)
json.Unmarshal(byteValue, &filesMap) }
} else { } else {
filesMap = newFilesMap()
for _, path := range flag.Args() { for _, path := range flag.Args() {
filepath.Walk(path, func(path string, info os.FileInfo, err error) error { filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
filesMap.Add(path, info) filesMap.Add(path, info)
@ -39,30 +46,49 @@ func main() {
json, _ := json.MarshalIndent(filesMap.FilesBySize, "", " ") json, _ := json.MarshalIndent(filesMap.FilesBySize, "", " ")
ioutil.WriteFile(*toFile, json, 644) ioutil.WriteFile(*toFile, json, 644)
} }
if *deleteDupesIn != "" {
fmt.Println("Deleting", filesMap)
for size := range filesMap.FilesBySize {
fmt.Println("Deleting")
for hash := range filesMap.FilesBySize[size] {
duplicateFiles := filesMap.FilesBySize[size][hash]
if len(duplicateFiles) <= 1 {
continue
} }
type filesMap struct { fmt.Println("Would delete")
FilesBySize map[int64]map[string][]*fileEntry for _, file := range duplicateFiles {
if !*force {
fmt.Println(file)
}
}
}
}
}
} }
func (fm *filesMap) Add(path string, info os.FileInfo) error { // FilesMap is a struct for listing files by Size and Hash to search for duplicates
type FilesMap struct {
FilesBySize map[int64]map[string][]string
}
// Add a file to the Map and calculate hash on demand
func (fm *FilesMap) Add(path string, info os.FileInfo) error {
if info.IsDir() { if info.IsDir() {
return nil return nil
} }
fileInfo := &fileEntry{ fileInfo := path
Path: path,
Size: info.Size(),
}
filesByHash := fm.FilesBySize[fileInfo.Size] filesByHash := fm.FilesBySize[info.Size()]
// first file with same size // first file with same size
// => create new map for size // => create new map for size
if filesByHash == nil { if filesByHash == nil {
filesByHash = map[string][]*fileEntry{} filesByHash = map[string][]string{}
fm.FilesBySize[fileInfo.Size] = filesByHash fm.FilesBySize[info.Size()] = filesByHash
filesByHash[""] = []*fileEntry{fileInfo} filesByHash[""] = []string{fileInfo}
return nil return nil
} }
@ -85,8 +111,8 @@ func (fm *filesMap) Add(path string, info os.FileInfo) error {
return appendByFileHash(filesByHash, fileInfo) return appendByFileHash(filesByHash, fileInfo)
} }
func appendByFileHash(filesByHash map[string][]*fileEntry, fileInfo *fileEntry) error { func appendByFileHash(filesByHash map[string][]string, fileInfo string) error {
hash, err := calculateHash(fileInfo.Path) hash, err := calculateHash(fileInfo)
if err != nil { if err != nil {
return err return err
@ -95,22 +121,17 @@ func appendByFileHash(filesByHash map[string][]*fileEntry, fileInfo *fileEntry)
if _, ok := filesByHash[hash]; ok { if _, ok := filesByHash[hash]; ok {
filesByHash[hash] = append(filesByHash[hash], fileInfo) filesByHash[hash] = append(filesByHash[hash], fileInfo)
} else { } else {
filesByHash[hash] = []*fileEntry{fileInfo} filesByHash[hash] = []string{fileInfo}
} }
return nil return nil
} }
func newFilesMap() filesMap { func newFilesMap() *FilesMap {
return filesMap{ return &FilesMap{
FilesBySize: map[int64]map[string][]*fileEntry{}, FilesBySize: map[int64]map[string][]string{},
} }
} }
type fileEntry struct {
Path string
Size int64
}
func calculateHash(path string) (string, error) { func calculateHash(path string) (string, error) {
f, err := os.Open(path) f, err := os.Open(path)
if err != nil { if err != nil {