mirror of
https://github.com/JaCoB1123/dupe-finder.git
synced 2025-05-18 06:01:56 +02:00
Extract clustering.go
This commit is contained in:
parent
66da8393a4
commit
e314e89657
41
filesmap.go
41
filesmap.go
@ -7,10 +7,8 @@ import (
|
||||
"image"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"github.com/steakknife/hamming"
|
||||
"github.com/vbauerster/mpb/v8"
|
||||
)
|
||||
|
||||
@ -174,45 +172,6 @@ func (fm *FilesMap) hashFile(path string, size int64) int64 {
|
||||
return 1
|
||||
}
|
||||
|
||||
type imageCluster struct {
|
||||
images []similarImage
|
||||
}
|
||||
|
||||
type similarImage struct {
|
||||
path string
|
||||
distance int
|
||||
}
|
||||
|
||||
func (fm *FilesMap) getImageClusters() []imageCluster {
|
||||
var clusters []imageCluster
|
||||
|
||||
for len(fm.Images) > 0 {
|
||||
file := fm.Images[0]
|
||||
fm.Images = slices.Delete(fm.Images, 0, 1)
|
||||
|
||||
var currentCluster []similarImage
|
||||
currentCluster = append(currentCluster, similarImage{path: file.path})
|
||||
for otherIndex := len(fm.Images) - 1; otherIndex >= 0; otherIndex-- {
|
||||
otherFile := fm.Images[otherIndex]
|
||||
var distance = hamming.Uint64(file.imageHash, otherFile.imageHash)
|
||||
if distance > 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
fm.Images = slices.Delete(fm.Images, otherIndex, otherIndex+1)
|
||||
currentCluster = append(currentCluster, similarImage{path: otherFile.path, distance: distance})
|
||||
}
|
||||
|
||||
if len(currentCluster) == 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
clusters = append(clusters, imageCluster{images: currentCluster})
|
||||
}
|
||||
|
||||
return clusters
|
||||
}
|
||||
|
||||
func (fm *FilesMap) hashImage(path string, size int64) {
|
||||
fm.ImagesHashing <- imageEntry{path, size, 0}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user