mirror of
https://github.com/JaCoB1123/dupe-finder.git
synced 2025-07-04 09:38:54 +02:00
Compare commits
47 Commits
Author | SHA1 | Date | |
---|---|---|---|
7c95ec57d7 | |||
c72fbdd086 | |||
ce574a0b4a | |||
ceab693f34 | |||
eb6e492251 | |||
43d2ced820 | |||
c667707704 | |||
e314e89657 | |||
66da8393a4 | |||
9ce2e8a927 | |||
c6f4c8bd89 | |||
1a9e17de10 | |||
a6c978eaee | |||
c618bc88fc | |||
c57d6cea68 | |||
303833d06f | |||
587f904ebc | |||
f8564f20ac | |||
65ce046585 | |||
6f5eb6a1ca | |||
a2b5d2e224 | |||
ecaddb7f73 | |||
bfbd6de40b | |||
ccd2ea8fcd | |||
f4872c95d1 | |||
a66b84a545 | |||
f6c33a3b5d | |||
c5186d6ae2 | |||
fc2d1c0cb5 | |||
f4f827b3e4 | |||
6059baeeeb | |||
eb25a625cb | |||
bbdc296cbd | |||
b58151efb7 | |||
c535c3d050 | |||
c090b6645e | |||
1144e97045 | |||
c885c03130 | |||
4ea8dfd7ee | |||
425a87071d | |||
dda06924f1 | |||
fda00ec0b8 | |||
31383ad118 | |||
275b63cfe8 | |||
8f0f32d5ee | |||
3c3f1d747b | |||
66a9ae73e5 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1,3 @@
|
|||||||
*.exe
|
*.exe
|
||||||
|
test_data/
|
||||||
|
tmp/
|
||||||
|
@ -43,7 +43,7 @@ Load previous results from `dupes.json` and delete all duplicates located in ~/.
|
|||||||
> dupe-finder --from-file dupes.json --delete-dupes-in ~/.cache
|
> dupe-finder --from-file dupes.json --delete-dupes-in ~/.cache
|
||||||
```
|
```
|
||||||
|
|
||||||
Find all duplicate files in `~/' and `/mnt/EXT`. Prompt which file to keep for each set of duplicates and move the others to /dupes/.
|
Find all duplicate files in `~/` and `/mnt/EXT`. Prompt which file to keep for each set of duplicates and move the others to /dupes/.
|
||||||
```
|
```
|
||||||
> dupe-finder --delete-prompt --move-files /dupes/ ~/ /mnt/EXT
|
> dupe-finder --delete-prompt --move-files /dupes/ ~/ /mnt/EXT
|
||||||
```
|
```
|
||||||
|
46
clustering.go
Normal file
46
clustering.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"slices"
|
||||||
|
|
||||||
|
"github.com/steakknife/hamming"
|
||||||
|
)
|
||||||
|
|
||||||
|
type imageCluster struct {
|
||||||
|
images []similarImage
|
||||||
|
}
|
||||||
|
|
||||||
|
type similarImage struct {
|
||||||
|
path string
|
||||||
|
distance int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) getImageClusters() []imageCluster {
|
||||||
|
var clusters []imageCluster
|
||||||
|
|
||||||
|
for len(fm.Images) > 0 {
|
||||||
|
file := fm.Images[0]
|
||||||
|
fm.Images = slices.Delete(fm.Images, 0, 1)
|
||||||
|
|
||||||
|
var currentCluster []similarImage
|
||||||
|
currentCluster = append(currentCluster, similarImage{path: file.path})
|
||||||
|
for otherIndex := len(fm.Images) - 1; otherIndex >= 0; otherIndex-- {
|
||||||
|
otherFile := fm.Images[otherIndex]
|
||||||
|
var distance = hamming.Uint64(file.imageHash, otherFile.imageHash)
|
||||||
|
if distance > 5 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
fm.Images = slices.Delete(fm.Images, otherIndex, otherIndex+1)
|
||||||
|
currentCluster = append(currentCluster, similarImage{path: otherFile.path, distance: distance})
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(currentCluster) <= 1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
clusters = append(clusters, imageCluster{images: currentCluster})
|
||||||
|
}
|
||||||
|
|
||||||
|
return clusters
|
||||||
|
}
|
29
file.go
29
file.go
@ -3,10 +3,15 @@ package main
|
|||||||
import (
|
import (
|
||||||
"crypto/sha1"
|
"crypto/sha1"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
"image"
|
||||||
|
_ "image/jpeg"
|
||||||
|
_ "image/png"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/corona10/goimagehash"
|
||||||
)
|
)
|
||||||
|
|
||||||
func remove(path string) {
|
func remove(path string) {
|
||||||
@ -41,7 +46,26 @@ func moveButDontOvewrite(path string, targetPath string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func calculateHash(path string) (string, error) {
|
func calculateImageHash(path string) (uint64, error) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
img, _, err := image.Decode(f)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
hash, err := goimagehash.DifferenceHash(img)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return hash.GetHash(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculateFileHash(path string) (string, error) {
|
||||||
f, err := os.Open(path)
|
f, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@ -53,5 +77,6 @@ func calculateHash(path string) (string, error) {
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
return base64.RawStdEncoding.EncodeToString(h.Sum(nil)), nil
|
stringHash := base64.RawStdEncoding.EncodeToString(h.Sum(nil))
|
||||||
|
return stringHash, nil
|
||||||
}
|
}
|
||||||
|
154
filesmap.go
154
filesmap.go
@ -1,26 +1,30 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"image"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/vbauerster/mpb/v8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FilesMap is a struct for listing files by Size and Hash to search for duplicates
|
// FilesMap is a struct for listing files by Size and Hash to search for duplicates
|
||||||
type FilesMap struct {
|
type FilesMap struct {
|
||||||
|
Images []imageEntry
|
||||||
FilesBySize map[int64]string
|
FilesBySize map[int64]string
|
||||||
|
|
||||||
FilesByHash map[string][]string
|
FilesByHash map[string][]string
|
||||||
|
|
||||||
FilesHashing chan fileEntry
|
FilesHashing chan fileEntry
|
||||||
|
|
||||||
FilesIncoming chan fileEntry
|
|
||||||
|
|
||||||
FilesHashed chan fileEntry
|
FilesHashed chan fileEntry
|
||||||
|
ImagesHashing chan imageEntry
|
||||||
|
ImagesHashed chan imageEntry
|
||||||
|
progress *mpb.Progress
|
||||||
|
incomingBar *mpb.Bar
|
||||||
|
fileHashingBar *mpb.Bar
|
||||||
|
imageHashingBar *mpb.Bar
|
||||||
lock sync.Mutex
|
lock sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,50 +32,52 @@ func newFilesMap() *FilesMap {
|
|||||||
return &FilesMap{
|
return &FilesMap{
|
||||||
FilesBySize: map[int64]string{},
|
FilesBySize: map[int64]string{},
|
||||||
FilesByHash: map[string][]string{},
|
FilesByHash: map[string][]string{},
|
||||||
FilesHashed: make(chan fileEntry),
|
FilesHashed: make(chan fileEntry, 100000),
|
||||||
FilesIncoming: make(chan fileEntry),
|
|
||||||
FilesHashing: make(chan fileEntry),
|
FilesHashing: make(chan fileEntry),
|
||||||
|
ImagesHashed: make(chan imageEntry, 100000),
|
||||||
|
ImagesHashing: make(chan imageEntry),
|
||||||
|
progress: mpb.New(mpb.WithWidth(64)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fm *FilesMap) IncomingWorker() {
|
func (fm *FilesMap) FileHashingWorker(wg *sync.WaitGroup) {
|
||||||
for file := range fm.FilesIncoming {
|
|
||||||
if *verbose {
|
|
||||||
fmt.Println("Incoming", file.path)
|
|
||||||
}
|
|
||||||
|
|
||||||
prevFile, ok := fm.FilesBySize[file.size]
|
|
||||||
if !ok {
|
|
||||||
fm.FilesBySize[file.size] = file.path
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if prevFile != "" {
|
|
||||||
fm.FilesHashing <- fileEntry{prevFile, file.size, ""}
|
|
||||||
}
|
|
||||||
|
|
||||||
fm.FilesBySize[file.size] = ""
|
|
||||||
|
|
||||||
fm.FilesHashing <- file
|
|
||||||
}
|
|
||||||
close(fm.FilesHashing)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fm *FilesMap) HashingWorker(wg *sync.WaitGroup) {
|
|
||||||
for file := range fm.FilesHashing {
|
for file := range fm.FilesHashing {
|
||||||
if *verbose {
|
if *verbose {
|
||||||
fmt.Println("Hashing", file.path)
|
fmt.Fprintf(fm.progress, "Hashing file %s\n", file.path)
|
||||||
}
|
}
|
||||||
|
|
||||||
hash, err := calculateHash(file.path)
|
hash, err := calculateFileHash(file.path)
|
||||||
|
fm.fileHashingBar.IncrInt64(file.size)
|
||||||
|
fm.FilesHashed <- file
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Error calculating Hash for %s: %v\n", file, err)
|
fmt.Fprintf(fm.progress, "Error calculating Hash for file %s: %v\n", file.path, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
file.hash = hash
|
file.hash = hash
|
||||||
fm.FilesHashed <- file
|
}
|
||||||
|
wg.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) ImageHashingWorker(wg *sync.WaitGroup) {
|
||||||
|
for file := range fm.ImagesHashing {
|
||||||
|
if *verbose {
|
||||||
|
fmt.Fprintf(fm.progress, "Hashing image %s\n", file.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
hash, err := calculateImageHash(file.path)
|
||||||
|
fm.imageHashingBar.IncrInt64(file.size)
|
||||||
|
|
||||||
|
if errors.Is(err, image.ErrFormat) {
|
||||||
|
continue
|
||||||
|
} else if err != nil {
|
||||||
|
fmt.Fprintf(fm.progress, "Error calculating Hash for image %s: %v\n", file.path, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
file.imageHash = hash
|
||||||
|
fm.ImagesHashed <- file
|
||||||
}
|
}
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}
|
}
|
||||||
@ -83,28 +89,90 @@ func (fm *FilesMap) HashedWorker(done chan bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fm.lock.Lock()
|
fm.lock.Lock()
|
||||||
if _, ok := fm.FilesByHash[file.hash]; ok {
|
|
||||||
fm.FilesByHash[file.hash] = append(fm.FilesByHash[file.hash], file.path)
|
fm.FilesByHash[file.hash] = append(fm.FilesByHash[file.hash], file.path)
|
||||||
} else {
|
fm.lock.Unlock()
|
||||||
fm.FilesByHash[file.hash] = []string{file.path}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for file := range fm.ImagesHashed {
|
||||||
|
if *verbose {
|
||||||
|
fmt.Println("Finishing", file.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
fm.lock.Lock()
|
||||||
|
fm.Images = append(fm.Images, file)
|
||||||
fm.lock.Unlock()
|
fm.lock.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
done <- true
|
done <- true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fm *FilesMap) WalkDirectories() {
|
func (fm *FilesMap) WalkDirectories() int64 {
|
||||||
|
var countFiles int64 = 0
|
||||||
|
sumSize := int64(0)
|
||||||
for _, path := range flag.Args() {
|
for _, path := range flag.Args() {
|
||||||
filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
||||||
if info.IsDir() {
|
if info.IsDir() {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
fm.FilesIncoming <- fileEntry{path, info.Size(), ""}
|
size := info.Size()
|
||||||
|
if *minSize > size {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
countFiles++
|
||||||
|
fm.incomingBar.SetTotal(int64(countFiles), false)
|
||||||
|
|
||||||
|
fm.hashImage(path, size)
|
||||||
|
count := fm.hashFile(path, size)
|
||||||
|
if count > 0 {
|
||||||
|
sumSize += size * count
|
||||||
|
fm.fileHashingBar.SetTotal(int64(sumSize), false)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
close(fm.FilesIncoming)
|
fm.incomingBar.SetTotal(int64(countFiles), true)
|
||||||
|
close(fm.FilesHashing)
|
||||||
|
close(fm.ImagesHashing)
|
||||||
|
return countFiles
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) hashFile(path string, size int64) int64 {
|
||||||
|
prevFile, ok := fm.FilesBySize[size]
|
||||||
|
if !ok {
|
||||||
|
fm.FilesBySize[size] = path
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
fm.FilesBySize[size] = ""
|
||||||
|
fm.incomingBar.Increment()
|
||||||
|
if *verbose {
|
||||||
|
fmt.Println("Incoming", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
fm.FilesHashing <- fileEntry{path, size, ""}
|
||||||
|
if prevFile != "" {
|
||||||
|
fm.FilesHashing <- fileEntry{prevFile, size, ""}
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) hashImage(path string, size int64) {
|
||||||
|
fm.ImagesHashing <- imageEntry{path, size, 0}
|
||||||
|
}
|
||||||
|
|
||||||
|
type imageEntry struct {
|
||||||
|
path string
|
||||||
|
size int64
|
||||||
|
imageHash uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
type fileEntry struct {
|
||||||
|
path string
|
||||||
|
size int64
|
||||||
|
hash string
|
||||||
}
|
}
|
||||||
|
18
go.mod
Normal file
18
go.mod
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
module github.com/JaCoB1123/dupe-finder
|
||||||
|
|
||||||
|
go 1.21
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/corona10/goimagehash v1.1.0
|
||||||
|
github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3
|
||||||
|
github.com/vbauerster/mpb/v8 v8.7.0
|
||||||
|
)
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/VividCortex/ewma v1.2.0 // indirect
|
||||||
|
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect
|
||||||
|
github.com/mattn/go-runewidth v0.0.15 // indirect
|
||||||
|
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
|
||||||
|
github.com/rivo/uniseg v0.4.4 // indirect
|
||||||
|
golang.org/x/sys v0.15.0 // indirect
|
||||||
|
)
|
19
go.sum
Normal file
19
go.sum
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
||||||
|
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
|
||||||
|
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
|
||||||
|
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
|
||||||
|
github.com/corona10/goimagehash v1.1.0 h1:teNMX/1e+Wn/AYSbLHX8mj+mF9r60R1kBeqE9MkoYwI=
|
||||||
|
github.com/corona10/goimagehash v1.1.0/go.mod h1:VkvE0mLn84L4aF8vCb6mafVajEb6QYMHl2ZJLn0mOGI=
|
||||||
|
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
|
||||||
|
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||||
|
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
|
||||||
|
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
|
||||||
|
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||||
|
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
|
||||||
|
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||||
|
github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 h1:njlZPzLwU639dk2kqnCPPv+wNjq7Xb6EfUxe/oX0/NM=
|
||||||
|
github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3/go.mod h1:hpGUWaI9xL8pRQCTXQgocU38Qw1g0Us7n5PxxTwTCYU=
|
||||||
|
github.com/vbauerster/mpb/v8 v8.7.0 h1:n2LTGyol7qqNBcLQn8FL5Bga2O8CGF75OOYsJVFsfMg=
|
||||||
|
github.com/vbauerster/mpb/v8 v8.7.0/go.mod h1:0RgdqeTpu6cDbdWeSaDvEvfgm9O598rBnRZ09HKaV0k=
|
||||||
|
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
|
||||||
|
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
150
main.go
150
main.go
@ -11,9 +11,11 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"runtime/pprof"
|
"runtime/pprof"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/vbauerster/mpb/v8"
|
||||||
|
"github.com/vbauerster/mpb/v8/decor"
|
||||||
)
|
)
|
||||||
|
|
||||||
var fromFile = flag.String("from-file", "", "Load results file from <path>")
|
var fromFile = flag.String("from-file", "", "Load results file from <path>")
|
||||||
@ -21,6 +23,7 @@ var toFile = flag.String("to-file", "", "Save results to <path>")
|
|||||||
var deleteDupesIn = flag.String("delete-dupes-in", "", "Delete duplicates if they are contained in <path>")
|
var deleteDupesIn = flag.String("delete-dupes-in", "", "Delete duplicates if they are contained in <path>")
|
||||||
var promptForDelete = flag.Bool("delete-prompt", false, "Ask which file to keep for each dupe-set")
|
var promptForDelete = flag.Bool("delete-prompt", false, "Ask which file to keep for each dupe-set")
|
||||||
var moveToFolder = flag.String("move-files", "", "Move files to <path> instead of deleting them")
|
var moveToFolder = flag.String("move-files", "", "Move files to <path> instead of deleting them")
|
||||||
|
var minSize = flag.Int64("min-size", -1, "Ignore all files smaller than <size> in Bytes")
|
||||||
var force = flag.Bool("force", false, "Actually delete files. Without this options, the files to be deleted are only printed")
|
var force = flag.Bool("force", false, "Actually delete files. Without this options, the files to be deleted are only printed")
|
||||||
var verbose = flag.Bool("verbose", false, "Output additional information")
|
var verbose = flag.Bool("verbose", false, "Output additional information")
|
||||||
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
|
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
|
||||||
@ -41,6 +44,7 @@ func main() {
|
|||||||
printConfiguration()
|
printConfiguration()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var countFiles int64 = 0
|
||||||
filesMap := newFilesMap()
|
filesMap := newFilesMap()
|
||||||
if *fromFile != "" {
|
if *fromFile != "" {
|
||||||
byteValue, _ := ioutil.ReadFile(*fromFile)
|
byteValue, _ := ioutil.ReadFile(*fromFile)
|
||||||
@ -49,34 +53,94 @@ func main() {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
filesMap.incomingBar = filesMap.progress.AddSpinner(0,
|
||||||
|
mpb.PrependDecorators(
|
||||||
|
decor.Name("Finding files "),
|
||||||
|
decor.Elapsed(decor.ET_STYLE_HHMMSS),
|
||||||
|
),
|
||||||
|
mpb.AppendDecorators(
|
||||||
|
decor.AverageSpeed(0, "%8.2f"),
|
||||||
|
decor.Name(" "),
|
||||||
|
decor.CurrentNoUnit("%5d"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
filesMap.fileHashingBar = filesMap.progress.AddBar(0,
|
||||||
|
mpb.PrependDecorators(
|
||||||
|
decor.Name("Hashing files "),
|
||||||
|
decor.Elapsed(decor.ET_STYLE_HHMMSS),
|
||||||
|
),
|
||||||
|
mpb.AppendDecorators(
|
||||||
|
decor.AverageSpeed(decor.SizeB1024(0), "%23.2f"),
|
||||||
|
decor.Name(" "),
|
||||||
|
decor.CurrentKibiByte("%5d"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
filesMap.imageHashingBar = filesMap.progress.AddBar(0,
|
||||||
|
mpb.PrependDecorators(
|
||||||
|
decor.Name("Hashing images "),
|
||||||
|
decor.Elapsed(decor.ET_STYLE_HHMMSS),
|
||||||
|
),
|
||||||
|
mpb.AppendDecorators(
|
||||||
|
decor.AverageSpeed(decor.SizeB1024(0), "%23.2f"),
|
||||||
|
decor.Name(" "),
|
||||||
|
decor.CurrentKibiByte("%5d"),
|
||||||
|
),
|
||||||
|
)
|
||||||
done := make(chan bool)
|
done := make(chan bool)
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
|
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
|
||||||
wg.Add(1)
|
wg.Add(2)
|
||||||
go filesMap.HashingWorker(&wg)
|
go filesMap.ImageHashingWorker(&wg)
|
||||||
|
go filesMap.FileHashingWorker(&wg)
|
||||||
}
|
}
|
||||||
|
|
||||||
go filesMap.IncomingWorker()
|
|
||||||
|
|
||||||
go filesMap.HashedWorker(done)
|
go filesMap.HashedWorker(done)
|
||||||
|
|
||||||
filesMap.WalkDirectories()
|
countFiles = filesMap.WalkDirectories()
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
close(filesMap.FilesHashed)
|
close(filesMap.FilesHashed)
|
||||||
|
close(filesMap.ImagesHashed)
|
||||||
<-done
|
<-done
|
||||||
}
|
}
|
||||||
|
|
||||||
if *toFile != "" && *fromFile == "" {
|
if *toFile != "" && *fromFile == "" {
|
||||||
json, _ := json.MarshalIndent(filesMap.FilesByHash, "", " ")
|
json, _ := json.MarshalIndent(filesMap.FilesByHash, "", " ")
|
||||||
ioutil.WriteFile(*toFile, json, 644)
|
ioutil.WriteFile(*toFile, json, 0644)
|
||||||
|
}
|
||||||
|
|
||||||
|
for hash, duplicateFiles := range filesMap.FilesByHash {
|
||||||
|
if len(duplicateFiles) > 1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
delete(filesMap.FilesByHash, hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
if *deleteDupesIn != "" {
|
if *deleteDupesIn != "" {
|
||||||
deleteIn := filepath.Clean(*deleteDupesIn)
|
deleteIn := filepath.Clean(*deleteDupesIn)
|
||||||
for hash := range filesMap.FilesByHash {
|
for hash := range filesMap.FilesByHash {
|
||||||
duplicateFiles := filesMap.FilesByHash[hash]
|
duplicateFiles := filesMap.FilesByHash[hash]
|
||||||
if len(duplicateFiles) <= 1 {
|
hasDupesInFolder := false
|
||||||
|
hasDupesOutsideFolder := false
|
||||||
|
for _, file := range duplicateFiles {
|
||||||
|
fileIsInFolder := strings.HasPrefix(filepath.Clean(file), deleteIn)
|
||||||
|
hasDupesOutsideFolder = hasDupesOutsideFolder || !fileIsInFolder
|
||||||
|
hasDupesInFolder = hasDupesInFolder || fileIsInFolder
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hasDupesInFolder || !hasDupesOutsideFolder {
|
||||||
|
if !hasDupesOutsideFolder {
|
||||||
|
fmt.Println("Not deleting one of the following files, since all would be deleted")
|
||||||
|
}
|
||||||
|
if !hasDupesInFolder {
|
||||||
|
fmt.Println("Not deleting one of the following files, since none are in the selected directory")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range duplicateFiles {
|
||||||
|
fmt.Println("-", file)
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,52 +157,40 @@ func main() {
|
|||||||
reader := bufio.NewReader(os.Stdin)
|
reader := bufio.NewReader(os.Stdin)
|
||||||
for hash := range filesMap.FilesByHash {
|
for hash := range filesMap.FilesByHash {
|
||||||
duplicateFiles := filesMap.FilesByHash[hash]
|
duplicateFiles := filesMap.FilesByHash[hash]
|
||||||
if len(duplicateFiles) <= 1 {
|
promptForDeletion(reader, duplicateFiles)
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Print("\033[H\033[2J")
|
|
||||||
for i, file := range duplicateFiles {
|
|
||||||
fmt.Println(i+1, file)
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("Which file to keep? ")
|
|
||||||
input, err := reader.ReadString('\n')
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Invalid input")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
input = strings.TrimRight(input, "\n\r")
|
|
||||||
intInput, err := strconv.Atoi(input)
|
|
||||||
if err != nil || intInput > len(duplicateFiles) || intInput < 1 {
|
|
||||||
fmt.Println("Invalid input")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, file := range duplicateFiles {
|
|
||||||
if i+1 == intInput {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if *force {
|
|
||||||
remove(file)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for hash := range filesMap.FilesByHash {
|
countInstances := 0
|
||||||
duplicateFiles := filesMap.FilesByHash[hash]
|
countDupeSets := 0
|
||||||
if len(duplicateFiles) <= 1 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
|
fmt.Println("Files that are binary identical:")
|
||||||
|
for _, duplicateFiles := range filesMap.FilesByHash {
|
||||||
|
countDupeSets++
|
||||||
for _, file := range duplicateFiles {
|
for _, file := range duplicateFiles {
|
||||||
|
countInstances++
|
||||||
fmt.Println(file)
|
fmt.Println(file)
|
||||||
}
|
}
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Println("Images that are similar:")
|
||||||
|
imageClusters := filesMap.getImageClusters()
|
||||||
|
for _, cluster := range imageClusters {
|
||||||
|
countDupeSets++
|
||||||
|
for _, image := range cluster.images {
|
||||||
|
countInstances++
|
||||||
|
fmt.Println(image.path, image.distance)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("Statistics:")
|
||||||
|
fmt.Println(countFiles, "Files")
|
||||||
|
fmt.Println(len(filesMap.FilesBySize), "Unique Sizes")
|
||||||
|
fmt.Println(len(filesMap.FilesByHash), "Unique Hashes")
|
||||||
|
fmt.Println(countInstances, "Duplicate Files")
|
||||||
|
fmt.Println(countDupeSets, "Duplicate Sets")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,9 +207,3 @@ func printConfiguration() {
|
|||||||
fmt.Println()
|
fmt.Println()
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
}
|
}
|
||||||
|
|
||||||
type fileEntry struct {
|
|
||||||
path string
|
|
||||||
size int64
|
|
||||||
hash string
|
|
||||||
}
|
|
||||||
|
49
ui.go
Normal file
49
ui.go
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func promptForDeletion(reader *bufio.Reader, files []string) {
|
||||||
|
fmt.Print("\033[H\033[2J")
|
||||||
|
for i, file := range files {
|
||||||
|
fmt.Println(i+1, file)
|
||||||
|
}
|
||||||
|
fmt.Println(0, "Keep all")
|
||||||
|
|
||||||
|
fmt.Printf("Which file to keep? ")
|
||||||
|
input, err := reader.ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Invalid input")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
input = strings.TrimRight(input, "\n\r")
|
||||||
|
intInput, err := strconv.Atoi(input)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Invalid input")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if intInput == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if intInput > len(files) || intInput < 1 {
|
||||||
|
fmt.Println("Invalid input")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, file := range files {
|
||||||
|
if i+1 == intInput {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if *force {
|
||||||
|
remove(file)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user