mirror of
				https://github.com/JaCoB1123/dupe-finder.git
				synced 2025-10-31 02:48:28 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			226 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			226 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package main
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"crypto/sha256"
 | |
| 	"encoding/base64"
 | |
| 	"encoding/json"
 | |
| 	"flag"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"io/ioutil"
 | |
| 	"os"
 | |
| 	"path/filepath"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| )
 | |
| 
 | |
| var fromFile = flag.String("from-file", "", "Load results file from <path>")
 | |
| var toFile = flag.String("to-file", "", "Save results to <path>")
 | |
| var deleteDupesIn = flag.String("delete-dupes-in", "", "Delete duplicates if they are contained in <path>")
 | |
| var promptForDelete = flag.Bool("delete-prompt", false, "Ask which file to keep for each dupe-set")
 | |
| var force = flag.Bool("force", false, "Actually delete files. Without this options, the files to be deleted are only printed")
 | |
| var verbose = flag.Bool("verbose", false, "Output additional information")
 | |
| 
 | |
| func main() {
 | |
| 	flag.Parse()
 | |
| 
 | |
| 	if *verbose {
 | |
| 		printConfiguration()
 | |
| 	}
 | |
| 
 | |
| 	filesMap := newFilesMap()
 | |
| 	if *fromFile != "" {
 | |
| 		fmt.Println("Loading file", *fromFile)
 | |
| 
 | |
| 		byteValue, _ := ioutil.ReadFile(*fromFile)
 | |
| 		err := json.Unmarshal(byteValue, &filesMap.FilesBySize)
 | |
| 		if err != nil {
 | |
| 			panic(err)
 | |
| 		}
 | |
| 	} else {
 | |
| 		for _, path := range flag.Args() {
 | |
| 			filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
 | |
| 				filesMap.Add(path, info)
 | |
| 				return nil
 | |
| 			})
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if *toFile != "" && *fromFile == "" {
 | |
| 		json, _ := json.MarshalIndent(filesMap.FilesBySize, "", "  ")
 | |
| 		ioutil.WriteFile(*toFile, json, 644)
 | |
| 	}
 | |
| 
 | |
| 	if *deleteDupesIn != "" {
 | |
| 		deleteIn := filepath.Clean(*deleteDupesIn)
 | |
| 		for size := range filesMap.FilesBySize {
 | |
| 			for hash := range filesMap.FilesBySize[size] {
 | |
| 				duplicateFiles := filesMap.FilesBySize[size][hash]
 | |
| 				if len(duplicateFiles) <= 1 {
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				for _, file := range duplicateFiles {
 | |
| 					if strings.HasPrefix(filepath.Clean(file), deleteIn) {
 | |
| 						fmt.Println("Would delete ", file)
 | |
| 						if *force {
 | |
| 							os.Remove(file)
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	} else if *promptForDelete {
 | |
| 		reader := bufio.NewReader(os.Stdin)
 | |
| 		for size := range filesMap.FilesBySize {
 | |
| 			for hash := range filesMap.FilesBySize[size] {
 | |
| 				duplicateFiles := filesMap.FilesBySize[size][hash]
 | |
| 				if len(duplicateFiles) <= 1 {
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				fmt.Print("\033[H\033[2J")
 | |
| 				for i, file := range duplicateFiles {
 | |
| 					fmt.Println(i+1, file)
 | |
| 					if *force {
 | |
| 						os.Remove(file)
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				fmt.Printf("Which file to keep? ")
 | |
| 				input, err := reader.ReadString('\n')
 | |
| 				if err != nil {
 | |
| 					fmt.Println("Invalid input")
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				input = strings.TrimRight(input, "\n\r")
 | |
| 				intInput, err := strconv.Atoi(input)
 | |
| 				if err != nil || intInput > len(duplicateFiles) || intInput < 1 {
 | |
| 					fmt.Println("Invalid input")
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				for i, file := range duplicateFiles {
 | |
| 					if i+1 == intInput {
 | |
| 						continue
 | |
| 					}
 | |
| 
 | |
| 					if *force {
 | |
| 						os.Remove(file)
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 			}
 | |
| 		}
 | |
| 	} else {
 | |
| 		for size := range filesMap.FilesBySize {
 | |
| 			for hash := range filesMap.FilesBySize[size] {
 | |
| 				duplicateFiles := filesMap.FilesBySize[size][hash]
 | |
| 				if len(duplicateFiles) <= 1 {
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				for _, file := range duplicateFiles {
 | |
| 					fmt.Println(file)
 | |
| 				}
 | |
| 				fmt.Println()
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func printConfiguration() {
 | |
| 	fmt.Printf("fromFile: \"%v\"\n", *fromFile)
 | |
| 	fmt.Printf("toFile: \"%v\"\n", *toFile)
 | |
| 	fmt.Printf("deleteDupesIn: \"%v\"\n", *deleteDupesIn)
 | |
| 	fmt.Printf("force: \"%v\"\n", *force)
 | |
| 	fmt.Println("Searching paths:")
 | |
| 	for _, path := range flag.Args() {
 | |
| 		fmt.Println("- ", path)
 | |
| 	}
 | |
| 
 | |
| 	fmt.Println()
 | |
| 	fmt.Println()
 | |
| }
 | |
| 
 | |
| // FilesMap is a struct for listing files by Size and Hash to search for duplicates
 | |
| type FilesMap struct {
 | |
| 	FilesBySize map[int64]map[string][]string
 | |
| }
 | |
| 
 | |
| // Add a file to the Map and calculate hash on demand
 | |
| func (fm *FilesMap) Add(path string, info os.FileInfo) error {
 | |
| 	if info.IsDir() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	fileInfo := path
 | |
| 
 | |
| 	filesByHash := fm.FilesBySize[info.Size()]
 | |
| 
 | |
| 	// first file with same size
 | |
| 	// => create new map for size
 | |
| 	if filesByHash == nil {
 | |
| 		filesByHash = map[string][]string{}
 | |
| 		fm.FilesBySize[info.Size()] = filesByHash
 | |
| 		filesByHash[""] = []string{fileInfo}
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	// second file with same size
 | |
| 	// => calculate hashes for all entries
 | |
| 	if _, hasEmptyHash := filesByHash[""]; hasEmptyHash {
 | |
| 		err := appendByFileHash(filesByHash, fileInfo)
 | |
| 		err2 := appendByFileHash(filesByHash, filesByHash[""][0])
 | |
| 
 | |
| 		delete(filesByHash, "")
 | |
| 
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 
 | |
| 		return err2
 | |
| 	}
 | |
| 
 | |
| 	// for later files always append by hash
 | |
| 	return appendByFileHash(filesByHash, fileInfo)
 | |
| }
 | |
| 
 | |
| func appendByFileHash(filesByHash map[string][]string, fileInfo string) error {
 | |
| 	hash, err := calculateHash(fileInfo)
 | |
| 
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	if _, ok := filesByHash[hash]; ok {
 | |
| 		filesByHash[hash] = append(filesByHash[hash], fileInfo)
 | |
| 	} else {
 | |
| 		filesByHash[hash] = []string{fileInfo}
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func newFilesMap() *FilesMap {
 | |
| 	return &FilesMap{
 | |
| 		FilesBySize: map[int64]map[string][]string{},
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func calculateHash(path string) (string, error) {
 | |
| 	f, err := os.Open(path)
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	defer f.Close()
 | |
| 
 | |
| 	h := sha256.New()
 | |
| 	if _, err := io.Copy(h, f); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	return base64.RawStdEncoding.EncodeToString(h.Sum(nil)), nil
 | |
| }
 |