Extract clustering.go

This commit is contained in:
Jan Bader 2023-12-09 15:05:55 +01:00
parent 66da8393a4
commit e314e89657

View File

@ -7,10 +7,8 @@ import (
"image"
"os"
"path/filepath"
"slices"
"sync"
"github.com/steakknife/hamming"
"github.com/vbauerster/mpb/v8"
)
@ -174,45 +172,6 @@ func (fm *FilesMap) hashFile(path string, size int64) int64 {
return 1
}
type imageCluster struct {
images []similarImage
}
type similarImage struct {
path string
distance int
}
func (fm *FilesMap) getImageClusters() []imageCluster {
var clusters []imageCluster
for len(fm.Images) > 0 {
file := fm.Images[0]
fm.Images = slices.Delete(fm.Images, 0, 1)
var currentCluster []similarImage
currentCluster = append(currentCluster, similarImage{path: file.path})
for otherIndex := len(fm.Images) - 1; otherIndex >= 0; otherIndex-- {
otherFile := fm.Images[otherIndex]
var distance = hamming.Uint64(file.imageHash, otherFile.imageHash)
if distance > 5 {
continue
}
fm.Images = slices.Delete(fm.Images, otherIndex, otherIndex+1)
currentCluster = append(currentCluster, similarImage{path: otherFile.path, distance: distance})
}
if len(currentCluster) == 1 {
continue
}
clusters = append(clusters, imageCluster{images: currentCluster})
}
return clusters
}
func (fm *FilesMap) hashImage(path string, size int64) {
fm.ImagesHashing <- imageEntry{path, size, 0}
}