mirror of
https://github.com/JaCoB1123/dupe-finder.git
synced 2025-07-04 01:28:54 +02:00
Extract getImageClusters
This commit is contained in:
41
filesmap.go
41
filesmap.go
@ -7,8 +7,10 @@ import (
|
|||||||
"image"
|
"image"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/steakknife/hamming"
|
||||||
"github.com/vbauerster/mpb/v8"
|
"github.com/vbauerster/mpb/v8"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -172,6 +174,45 @@ func (fm *FilesMap) hashFile(path string, size int64) int64 {
|
|||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type imageCluster struct {
|
||||||
|
images []similarImage
|
||||||
|
}
|
||||||
|
|
||||||
|
type similarImage struct {
|
||||||
|
path string
|
||||||
|
distance int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fm *FilesMap) getImageClusters() []imageCluster {
|
||||||
|
var clusters []imageCluster
|
||||||
|
|
||||||
|
for len(fm.Images) > 0 {
|
||||||
|
file := fm.Images[0]
|
||||||
|
fm.Images = slices.Delete(fm.Images, 0, 1)
|
||||||
|
|
||||||
|
var currentCluster []similarImage
|
||||||
|
currentCluster = append(currentCluster, similarImage{path: file.path})
|
||||||
|
for otherIndex := len(fm.Images) - 1; otherIndex >= 0; otherIndex-- {
|
||||||
|
otherFile := fm.Images[otherIndex]
|
||||||
|
var distance = hamming.Uint64(file.imageHash, otherFile.imageHash)
|
||||||
|
if distance > 5 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
fm.Images = slices.Delete(fm.Images, otherIndex, otherIndex+1)
|
||||||
|
currentCluster = append(currentCluster, similarImage{path: otherFile.path, distance: distance})
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(currentCluster) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
clusters = append(clusters, imageCluster{images: currentCluster})
|
||||||
|
}
|
||||||
|
|
||||||
|
return clusters
|
||||||
|
}
|
||||||
|
|
||||||
func (fm *FilesMap) hashImage(path string, size int64) {
|
func (fm *FilesMap) hashImage(path string, size int64) {
|
||||||
fm.ImagesHashing <- imageEntry{path, size, 0}
|
fm.ImagesHashing <- imageEntry{path, size, 0}
|
||||||
}
|
}
|
||||||
|
34
main.go
34
main.go
@ -15,9 +15,6 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"slices"
|
|
||||||
|
|
||||||
"github.com/steakknife/hamming"
|
|
||||||
"github.com/vbauerster/mpb/v8"
|
"github.com/vbauerster/mpb/v8"
|
||||||
"github.com/vbauerster/mpb/v8/decor"
|
"github.com/vbauerster/mpb/v8/decor"
|
||||||
)
|
)
|
||||||
@ -195,8 +192,7 @@ func main() {
|
|||||||
countDupeSets := 0
|
countDupeSets := 0
|
||||||
|
|
||||||
fmt.Println("Files that are binary identical:")
|
fmt.Println("Files that are binary identical:")
|
||||||
for hash := range filesMap.FilesByHash {
|
for _, duplicateFiles := range filesMap.FilesByHash {
|
||||||
duplicateFiles := filesMap.FilesByHash[hash]
|
|
||||||
if len(duplicateFiles) <= 1 {
|
if len(duplicateFiles) <= 1 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -210,32 +206,12 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("Images that are similar:")
|
fmt.Println("Images that are similar:")
|
||||||
for len(filesMap.Images) > 0 {
|
imageClusters := filesMap.getImageClusters()
|
||||||
file := filesMap.Images[0]
|
for _, cluster := range imageClusters {
|
||||||
filesMap.Images = slices.Delete(filesMap.Images, 0, 1)
|
|
||||||
var currentCluster []imageEntry
|
|
||||||
|
|
||||||
currentCluster = append(currentCluster, file)
|
|
||||||
for otherIndex := len(filesMap.Images) - 1; otherIndex >= 0; otherIndex-- {
|
|
||||||
otherFile := filesMap.Images[otherIndex]
|
|
||||||
var distance = hamming.Uint64(file.imageHash, otherFile.imageHash)
|
|
||||||
if distance > 5 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
filesMap.Images = slices.Delete(filesMap.Images, otherIndex, otherIndex+1)
|
|
||||||
if len(currentCluster) == 1 {
|
|
||||||
fmt.Println(currentCluster[0].path)
|
|
||||||
countDupeSets++
|
countDupeSets++
|
||||||
|
for _, image := range cluster.images {
|
||||||
countInstances++
|
countInstances++
|
||||||
}
|
fmt.Println(image.path, image.distance)
|
||||||
currentCluster = append(currentCluster, otherFile)
|
|
||||||
fmt.Println(otherFile.path, distance)
|
|
||||||
countInstances++
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(currentCluster) <= 1 {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
|
Reference in New Issue
Block a user