Extract getImageClusters

This commit is contained in:
Jan Bader
2023-12-09 14:50:29 +01:00
parent a6c978eaee
commit 1a9e17de10
2 changed files with 47 additions and 30 deletions

View File

@@ -7,8 +7,10 @@ import (
"image"
"os"
"path/filepath"
"slices"
"sync"
"github.com/steakknife/hamming"
"github.com/vbauerster/mpb/v8"
)
@@ -172,6 +174,45 @@ func (fm *FilesMap) hashFile(path string, size int64) int64 {
return 1
}
type imageCluster struct {
images []similarImage
}
type similarImage struct {
path string
distance int
}
func (fm *FilesMap) getImageClusters() []imageCluster {
var clusters []imageCluster
for len(fm.Images) > 0 {
file := fm.Images[0]
fm.Images = slices.Delete(fm.Images, 0, 1)
var currentCluster []similarImage
currentCluster = append(currentCluster, similarImage{path: file.path})
for otherIndex := len(fm.Images) - 1; otherIndex >= 0; otherIndex-- {
otherFile := fm.Images[otherIndex]
var distance = hamming.Uint64(file.imageHash, otherFile.imageHash)
if distance > 5 {
continue
}
fm.Images = slices.Delete(fm.Images, otherIndex, otherIndex+1)
currentCluster = append(currentCluster, similarImage{path: otherFile.path, distance: distance})
}
if len(currentCluster) == 0 {
continue
}
clusters = append(clusters, imageCluster{images: currentCluster})
}
return clusters
}
func (fm *FilesMap) hashImage(path string, size int64) {
fm.ImagesHashing <- imageEntry{path, size, 0}
}