mirror of
https://github.com/JaCoB1123/dupe-finder.git
synced 2025-07-04 09:38:54 +02:00
Compare commits
11 Commits
Author | SHA1 | Date | |
---|---|---|---|
594a88c3ec | |||
29fa093184 | |||
ff2d4daeda | |||
e33d7e2ca0 | |||
a3fa3d4e7c | |||
8007b5686d | |||
87c8a6e817 | |||
8a9bcbf62e | |||
f16a143125 | |||
903909de77 | |||
09a4dc8660 |
12
README.md
12
README.md
@ -3,12 +3,14 @@ Because I couldn't find a good program for my usecase, I wrote this simple Go pr
|
|||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
If you have go installed, the easiest way to install is `go get`:
|
If you have go installed, the easiest way to install and update is `go get`:
|
||||||
|
|
||||||
```
|
```
|
||||||
go get "github.com/JaCoB1123/dupe-finder"
|
go get -u "github.com/JaCoB1123/dupe-finder"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Otherwise you can download the latest binary from the [releases](https://github.com/JaCoB1123/dupe-finder/releases) page.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
dupe-finder supports the following options:
|
dupe-finder supports the following options:
|
||||||
@ -34,15 +36,15 @@ dupe-finder supports the following options:
|
|||||||
Find all duplicate files in `~/` and save the results to `dupes.json`
|
Find all duplicate files in `~/` and save the results to `dupes.json`
|
||||||
```
|
```
|
||||||
> dupe-finder --to-file dupes.json ~/
|
> dupe-finder --to-file dupes.json ~/
|
||||||
``̀`
|
```
|
||||||
|
|
||||||
Load previous results from `dupes.json` and delete all duplicates located in ~/.cache
|
Load previous results from `dupes.json` and delete all duplicates located in ~/.cache
|
||||||
```
|
```
|
||||||
> dupe-finder --from-file dupes.json --delete-dupes-in ~/.cache
|
> dupe-finder --from-file dupes.json --delete-dupes-in ~/.cache
|
||||||
``̀`
|
```
|
||||||
|
|
||||||
Find all duplicate files in `~/' and `/mnt/EXT`. Prompt which file to keep for each set of duplicates and move the others to /dupes/.
|
Find all duplicate files in `~/' and `/mnt/EXT`. Prompt which file to keep for each set of duplicates and move the others to /dupes/.
|
||||||
```
|
```
|
||||||
> dupe-finder --delete-prompt --move-files /dupes/ ~/ /mnt/EXT
|
> dupe-finder --delete-prompt --move-files /dupes/ ~/ /mnt/EXT
|
||||||
``̀`
|
```
|
||||||
|
|
||||||
|
57
file.go
Normal file
57
file.go
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha1"
|
||||||
|
"encoding/base64"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func remove(path string) {
|
||||||
|
if !*force {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if *moveToFolder == "" {
|
||||||
|
os.Remove(path)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
moveButDontOvewrite(path, *moveToFolder)
|
||||||
|
}
|
||||||
|
|
||||||
|
func moveButDontOvewrite(path string, targetPath string) {
|
||||||
|
num := 0
|
||||||
|
|
||||||
|
filename := filepath.Base(path)
|
||||||
|
|
||||||
|
target := filepath.Join(targetPath, filename)
|
||||||
|
|
||||||
|
for {
|
||||||
|
_, err := os.Stat(target)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
os.Rename(path, target)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
target = filepath.Join(targetPath, filename+"."+strconv.Itoa(num))
|
||||||
|
num++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculateHash(path string) (string, error) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
h := sha1.New()
|
||||||
|
if _, err := io.Copy(h, f); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return base64.RawStdEncoding.EncodeToString(h.Sum(nil)), nil
|
||||||
|
}
|
110
filesmap.go
Normal file
110
filesmap.go
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FilesMap is a struct for listing files by Size and Hash to search for duplicates
|
||||||
|
type FilesMap struct {
|
||||||
|
FilesBySize map[int64]string
|
||||||
|
|
||||||
|
FilesByHash map[string][]string
|
||||||
|
|
||||||
|
FilesHashing chan fileEntry
|
||||||
|
|
||||||
|
FilesIncoming chan fileEntry
|
||||||
|
|
||||||
|
FilesHashed chan fileEntry
|
||||||
|
|
||||||
|
lock sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFilesMap() *FilesMap {
|
||||||
|
return &FilesMap{
|
||||||
|
FilesBySize: map[int64]string{},
|
||||||
|
FilesByHash: map[string][]string{},
|
||||||
|
FilesHashed: make(chan fileEntry),
|
||||||
|
FilesIncoming: make(chan fileEntry),
|
||||||
|
FilesHashing: make(chan fileEntry),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) IncomingWorker() {
|
||||||
|
for file := range fm.FilesIncoming {
|
||||||
|
if *verbose {
|
||||||
|
fmt.Println("Incoming", file.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
prevFile, ok := fm.FilesBySize[file.size]
|
||||||
|
if !ok {
|
||||||
|
fm.FilesBySize[file.size] = file.path
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if prevFile != "" {
|
||||||
|
fm.FilesHashing <- fileEntry{prevFile, file.size, ""}
|
||||||
|
}
|
||||||
|
|
||||||
|
fm.FilesBySize[file.size] = ""
|
||||||
|
|
||||||
|
fm.FilesHashing <- file
|
||||||
|
}
|
||||||
|
close(fm.FilesHashing)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) HashingWorker(wg *sync.WaitGroup) {
|
||||||
|
for file := range fm.FilesHashing {
|
||||||
|
if *verbose {
|
||||||
|
fmt.Println("Hashing", file.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
hash, err := calculateHash(file.path)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Error calculating Hash for %s: %v\n", file, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
file.hash = hash
|
||||||
|
fm.FilesHashed <- file
|
||||||
|
}
|
||||||
|
wg.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) HashedWorker(done chan bool) {
|
||||||
|
for file := range fm.FilesHashed {
|
||||||
|
if *verbose {
|
||||||
|
fmt.Println("Finishing", file.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
fm.lock.Lock()
|
||||||
|
if _, ok := fm.FilesByHash[file.hash]; ok {
|
||||||
|
fm.FilesByHash[file.hash] = append(fm.FilesByHash[file.hash], file.path)
|
||||||
|
} else {
|
||||||
|
fm.FilesByHash[file.hash] = []string{file.path}
|
||||||
|
}
|
||||||
|
fm.lock.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
done <- true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) WalkDirectories() {
|
||||||
|
for _, path := range flag.Args() {
|
||||||
|
filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
||||||
|
if info.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
fm.FilesIncoming <- fileEntry{path, info.Size(), ""}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
close(fm.FilesIncoming)
|
||||||
|
}
|
178
main.go
178
main.go
@ -2,17 +2,18 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"crypto/sha256"
|
|
||||||
"encoding/base64"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"runtime/pprof"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
var fromFile = flag.String("from-file", "", "Load results file from <path>")
|
var fromFile = flag.String("from-file", "", "Load results file from <path>")
|
||||||
@ -22,74 +23,59 @@ var promptForDelete = flag.Bool("delete-prompt", false, "Ask which file to keep
|
|||||||
var moveToFolder = flag.String("move-files", "", "Move files to <path> instead of deleting them")
|
var moveToFolder = flag.String("move-files", "", "Move files to <path> instead of deleting them")
|
||||||
var force = flag.Bool("force", false, "Actually delete files. Without this options, the files to be deleted are only printed")
|
var force = flag.Bool("force", false, "Actually delete files. Without this options, the files to be deleted are only printed")
|
||||||
var verbose = flag.Bool("verbose", false, "Output additional information")
|
var verbose = flag.Bool("verbose", false, "Output additional information")
|
||||||
|
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
|
||||||
func Delete(path string) {
|
|
||||||
if !*force {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if *moveToFolder == "" {
|
|
||||||
os.Remove(path)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
MoveButDontOvewrite(path, *moveToFolder)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MoveButDontOvewrite(path string, targetPath string) {
|
|
||||||
num := 0
|
|
||||||
|
|
||||||
filename := filepath.Base(path)
|
|
||||||
|
|
||||||
target := filepath.Join(targetPath, filename)
|
|
||||||
|
|
||||||
for {
|
|
||||||
_, err := os.Stat(target)
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
os.Rename(path, target)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
target = filepath.Join(targetPath, filename+"."+strconv.Itoa(num))
|
|
||||||
num++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
if *cpuprofile != "" {
|
||||||
|
f, err := os.Create(*cpuprofile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
pprof.StartCPUProfile(f)
|
||||||
|
defer pprof.StopCPUProfile()
|
||||||
|
}
|
||||||
|
|
||||||
if *verbose {
|
if *verbose {
|
||||||
printConfiguration()
|
printConfiguration()
|
||||||
}
|
}
|
||||||
|
|
||||||
filesMap := newFilesMap()
|
filesMap := newFilesMap()
|
||||||
if *fromFile != "" {
|
if *fromFile != "" {
|
||||||
fmt.Println("Loading file", *fromFile)
|
|
||||||
|
|
||||||
byteValue, _ := ioutil.ReadFile(*fromFile)
|
byteValue, _ := ioutil.ReadFile(*fromFile)
|
||||||
err := json.Unmarshal(byteValue, &filesMap.FilesBySize)
|
err := json.Unmarshal(byteValue, &filesMap.FilesByHash)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for _, path := range flag.Args() {
|
done := make(chan bool)
|
||||||
filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
wg := sync.WaitGroup{}
|
||||||
filesMap.Add(path, info)
|
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
|
||||||
return nil
|
wg.Add(1)
|
||||||
})
|
go filesMap.HashingWorker(&wg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
go filesMap.IncomingWorker()
|
||||||
|
|
||||||
|
go filesMap.HashedWorker(done)
|
||||||
|
|
||||||
|
filesMap.WalkDirectories()
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
close(filesMap.FilesHashed)
|
||||||
|
<-done
|
||||||
}
|
}
|
||||||
|
|
||||||
if *toFile != "" && *fromFile == "" {
|
if *toFile != "" && *fromFile == "" {
|
||||||
json, _ := json.MarshalIndent(filesMap.FilesBySize, "", " ")
|
json, _ := json.MarshalIndent(filesMap.FilesByHash, "", " ")
|
||||||
ioutil.WriteFile(*toFile, json, 644)
|
ioutil.WriteFile(*toFile, json, 644)
|
||||||
}
|
}
|
||||||
|
|
||||||
if *deleteDupesIn != "" {
|
if *deleteDupesIn != "" {
|
||||||
deleteIn := filepath.Clean(*deleteDupesIn)
|
deleteIn := filepath.Clean(*deleteDupesIn)
|
||||||
for size := range filesMap.FilesBySize {
|
for hash := range filesMap.FilesByHash {
|
||||||
for hash := range filesMap.FilesBySize[size] {
|
duplicateFiles := filesMap.FilesByHash[hash]
|
||||||
duplicateFiles := filesMap.FilesBySize[size][hash]
|
|
||||||
if len(duplicateFiles) <= 1 {
|
if len(duplicateFiles) <= 1 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -98,17 +84,15 @@ func main() {
|
|||||||
if strings.HasPrefix(filepath.Clean(file), deleteIn) {
|
if strings.HasPrefix(filepath.Clean(file), deleteIn) {
|
||||||
fmt.Println("Would delete ", file)
|
fmt.Println("Would delete ", file)
|
||||||
if *force {
|
if *force {
|
||||||
Delete(file)
|
remove(file)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if *promptForDelete {
|
} else if *promptForDelete {
|
||||||
reader := bufio.NewReader(os.Stdin)
|
reader := bufio.NewReader(os.Stdin)
|
||||||
for size := range filesMap.FilesBySize {
|
for hash := range filesMap.FilesByHash {
|
||||||
for hash := range filesMap.FilesBySize[size] {
|
duplicateFiles := filesMap.FilesByHash[hash]
|
||||||
duplicateFiles := filesMap.FilesBySize[size][hash]
|
|
||||||
if len(duplicateFiles) <= 1 {
|
if len(duplicateFiles) <= 1 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -138,16 +122,14 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if *force {
|
if *force {
|
||||||
Delete(file)
|
remove(file)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for size := range filesMap.FilesBySize {
|
for hash := range filesMap.FilesByHash {
|
||||||
for hash := range filesMap.FilesBySize[size] {
|
duplicateFiles := filesMap.FilesByHash[hash]
|
||||||
duplicateFiles := filesMap.FilesBySize[size][hash]
|
|
||||||
if len(duplicateFiles) <= 1 {
|
if len(duplicateFiles) <= 1 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -158,7 +140,6 @@ func main() {
|
|||||||
fmt.Println()
|
fmt.Println()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func printConfiguration() {
|
func printConfiguration() {
|
||||||
@ -175,81 +156,8 @@ func printConfiguration() {
|
|||||||
fmt.Println()
|
fmt.Println()
|
||||||
}
|
}
|
||||||
|
|
||||||
// FilesMap is a struct for listing files by Size and Hash to search for duplicates
|
type fileEntry struct {
|
||||||
type FilesMap struct {
|
path string
|
||||||
FilesBySize map[int64]map[string][]string
|
size int64
|
||||||
}
|
hash string
|
||||||
|
|
||||||
// Add a file to the Map and calculate hash on demand
|
|
||||||
func (fm *FilesMap) Add(path string, info os.FileInfo) error {
|
|
||||||
if info.IsDir() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
fileInfo := path
|
|
||||||
|
|
||||||
filesByHash := fm.FilesBySize[info.Size()]
|
|
||||||
|
|
||||||
// first file with same size
|
|
||||||
// => create new map for size
|
|
||||||
if filesByHash == nil {
|
|
||||||
filesByHash = map[string][]string{}
|
|
||||||
fm.FilesBySize[info.Size()] = filesByHash
|
|
||||||
filesByHash[""] = []string{fileInfo}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// second file with same size
|
|
||||||
// => calculate hashes for all entries
|
|
||||||
if _, hasEmptyHash := filesByHash[""]; hasEmptyHash {
|
|
||||||
err := appendByFileHash(filesByHash, fileInfo)
|
|
||||||
err2 := appendByFileHash(filesByHash, filesByHash[""][0])
|
|
||||||
|
|
||||||
delete(filesByHash, "")
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return err2
|
|
||||||
}
|
|
||||||
|
|
||||||
// for later files always append by hash
|
|
||||||
return appendByFileHash(filesByHash, fileInfo)
|
|
||||||
}
|
|
||||||
|
|
||||||
func appendByFileHash(filesByHash map[string][]string, fileInfo string) error {
|
|
||||||
hash, err := calculateHash(fileInfo)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := filesByHash[hash]; ok {
|
|
||||||
filesByHash[hash] = append(filesByHash[hash], fileInfo)
|
|
||||||
} else {
|
|
||||||
filesByHash[hash] = []string{fileInfo}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func newFilesMap() *FilesMap {
|
|
||||||
return &FilesMap{
|
|
||||||
FilesBySize: map[int64]map[string][]string{},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func calculateHash(path string) (string, error) {
|
|
||||||
f, err := os.Open(path)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
h := sha256.New()
|
|
||||||
if _, err := io.Copy(h, f); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
return base64.RawStdEncoding.EncodeToString(h.Sum(nil)), nil
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user