mirror of
https://github.com/JaCoB1123/dupe-finder.git
synced 2025-05-18 14:11:55 +02:00
Extract clustering.go
This commit is contained in:
parent
66da8393a4
commit
e314e89657
41
filesmap.go
41
filesmap.go
@ -7,10 +7,8 @@ import (
|
|||||||
"image"
|
"image"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/steakknife/hamming"
|
|
||||||
"github.com/vbauerster/mpb/v8"
|
"github.com/vbauerster/mpb/v8"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -174,45 +172,6 @@ func (fm *FilesMap) hashFile(path string, size int64) int64 {
|
|||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
type imageCluster struct {
|
|
||||||
images []similarImage
|
|
||||||
}
|
|
||||||
|
|
||||||
type similarImage struct {
|
|
||||||
path string
|
|
||||||
distance int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fm *FilesMap) getImageClusters() []imageCluster {
|
|
||||||
var clusters []imageCluster
|
|
||||||
|
|
||||||
for len(fm.Images) > 0 {
|
|
||||||
file := fm.Images[0]
|
|
||||||
fm.Images = slices.Delete(fm.Images, 0, 1)
|
|
||||||
|
|
||||||
var currentCluster []similarImage
|
|
||||||
currentCluster = append(currentCluster, similarImage{path: file.path})
|
|
||||||
for otherIndex := len(fm.Images) - 1; otherIndex >= 0; otherIndex-- {
|
|
||||||
otherFile := fm.Images[otherIndex]
|
|
||||||
var distance = hamming.Uint64(file.imageHash, otherFile.imageHash)
|
|
||||||
if distance > 5 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
fm.Images = slices.Delete(fm.Images, otherIndex, otherIndex+1)
|
|
||||||
currentCluster = append(currentCluster, similarImage{path: otherFile.path, distance: distance})
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(currentCluster) == 1 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
clusters = append(clusters, imageCluster{images: currentCluster})
|
|
||||||
}
|
|
||||||
|
|
||||||
return clusters
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fm *FilesMap) hashImage(path string, size int64) {
|
func (fm *FilesMap) hashImage(path string, size int64) {
|
||||||
fm.ImagesHashing <- imageEntry{path, size, 0}
|
fm.ImagesHashing <- imageEntry{path, size, 0}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user