node build fixed
This commit is contained in:
230
seanime-2.9.10/internal/util/comparison/matching.go
Normal file
230
seanime-2.9.10/internal/util/comparison/matching.go
Normal file
@@ -0,0 +1,230 @@
|
||||
// Package comparison contains helpers related to comparison, comparison and filtering of media titles.
|
||||
package comparison
|
||||
|
||||
import (
|
||||
"github.com/adrg/strutil/metrics"
|
||||
)
|
||||
|
||||
// LevenshteinResult is a struct that holds a string and its Levenshtein distance compared to another string.
|
||||
type LevenshteinResult struct {
|
||||
OriginalValue *string
|
||||
Value *string
|
||||
Distance int
|
||||
}
|
||||
|
||||
// CompareWithLevenshtein compares a string to a slice of strings and returns a slice of LevenshteinResult containing the Levenshtein distance for each string.
|
||||
func CompareWithLevenshtein(v *string, vals []*string) []*LevenshteinResult {
|
||||
return CompareWithLevenshteinCleanFunc(v, vals, func(val string) string {
|
||||
return val
|
||||
})
|
||||
}
|
||||
func CompareWithLevenshteinCleanFunc(v *string, vals []*string, cleanFunc func(val string) string) []*LevenshteinResult {
|
||||
|
||||
lev := metrics.NewLevenshtein()
|
||||
lev.CaseSensitive = false
|
||||
//lev.DeleteCost = 1
|
||||
|
||||
res := make([]*LevenshteinResult, len(vals))
|
||||
|
||||
for _, val := range vals {
|
||||
res = append(res, &LevenshteinResult{
|
||||
OriginalValue: v,
|
||||
Value: val,
|
||||
Distance: lev.Distance(cleanFunc(*v), cleanFunc(*val)),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// FindBestMatchWithLevenshtein returns the best match from a slice of strings as a reference to a LevenshteinResult.
|
||||
// It also returns a boolean indicating whether the best match was found.
|
||||
func FindBestMatchWithLevenshtein(v *string, vals []*string) (*LevenshteinResult, bool) {
|
||||
res := CompareWithLevenshtein(v, vals)
|
||||
|
||||
if len(res) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var bestResult *LevenshteinResult
|
||||
for _, result := range res {
|
||||
if bestResult == nil || result.Distance < bestResult.Distance {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult, true
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
// JaroWinklerResult is a struct that holds a string and its JaroWinkler distance compared to another string.
|
||||
type JaroWinklerResult struct {
|
||||
OriginalValue *string
|
||||
Value *string
|
||||
Rating float64
|
||||
}
|
||||
|
||||
// CompareWithJaroWinkler compares a string to a slice of strings and returns a slice of JaroWinklerResult containing the JaroWinkler distance for each string.
|
||||
func CompareWithJaroWinkler(v *string, vals []*string) []*JaroWinklerResult {
|
||||
|
||||
jw := metrics.NewJaroWinkler()
|
||||
jw.CaseSensitive = false
|
||||
|
||||
res := make([]*JaroWinklerResult, len(vals))
|
||||
|
||||
for _, val := range vals {
|
||||
res = append(res, &JaroWinklerResult{
|
||||
OriginalValue: v,
|
||||
Value: val,
|
||||
Rating: jw.Compare(*v, *val),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// FindBestMatchWithJaroWinkler returns the best match from a slice of strings as a reference to a JaroWinklerResult.
|
||||
// It also returns a boolean indicating whether the best match was found.
|
||||
func FindBestMatchWithJaroWinkler(v *string, vals []*string) (*JaroWinklerResult, bool) {
|
||||
res := CompareWithJaroWinkler(v, vals)
|
||||
|
||||
if len(res) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var bestResult *JaroWinklerResult
|
||||
for _, result := range res {
|
||||
if bestResult == nil || result.Rating > bestResult.Rating {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult, true
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
// JaccardResult is a struct that holds a string and its Jaccard distance compared to another string.
|
||||
type JaccardResult struct {
|
||||
OriginalValue *string
|
||||
Value *string
|
||||
Rating float64
|
||||
}
|
||||
|
||||
// CompareWithJaccard compares a string to a slice of strings and returns a slice of JaccardResult containing the Jaccard distance for each string.
|
||||
func CompareWithJaccard(v *string, vals []*string) []*JaccardResult {
|
||||
|
||||
jw := metrics.NewJaccard()
|
||||
jw.CaseSensitive = false
|
||||
jw.NgramSize = 1
|
||||
|
||||
res := make([]*JaccardResult, len(vals))
|
||||
|
||||
for _, val := range vals {
|
||||
res = append(res, &JaccardResult{
|
||||
OriginalValue: v,
|
||||
Value: val,
|
||||
Rating: jw.Compare(*v, *val),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// FindBestMatchWithJaccard returns the best match from a slice of strings as a reference to a JaccardResult.
|
||||
// It also returns a boolean indicating whether the best match was found.
|
||||
func FindBestMatchWithJaccard(v *string, vals []*string) (*JaccardResult, bool) {
|
||||
res := CompareWithJaccard(v, vals)
|
||||
|
||||
if len(res) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var bestResult *JaccardResult
|
||||
for _, result := range res {
|
||||
if bestResult == nil || result.Rating > bestResult.Rating {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult, true
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
type SorensenDiceResult struct {
|
||||
OriginalValue *string
|
||||
Value *string
|
||||
Rating float64
|
||||
}
|
||||
|
||||
func CompareWithSorensenDice(v *string, vals []*string) []*SorensenDiceResult {
|
||||
|
||||
dice := metrics.NewSorensenDice()
|
||||
dice.CaseSensitive = false
|
||||
|
||||
res := make([]*SorensenDiceResult, len(vals))
|
||||
|
||||
for _, val := range vals {
|
||||
res = append(res, &SorensenDiceResult{
|
||||
OriginalValue: v,
|
||||
Value: val,
|
||||
Rating: dice.Compare(*v, *val),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func FindBestMatchWithSorensenDice(v *string, vals []*string) (*SorensenDiceResult, bool) {
|
||||
res := CompareWithSorensenDice(v, vals)
|
||||
|
||||
if len(res) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var bestResult *SorensenDiceResult
|
||||
for _, result := range res {
|
||||
if bestResult == nil || result.Rating > bestResult.Rating {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult, true
|
||||
}
|
||||
|
||||
func EliminateLeastSimilarValue(arr []string) []string {
|
||||
if len(arr) < 3 {
|
||||
return arr
|
||||
}
|
||||
|
||||
sd := metrics.NewSorensenDice()
|
||||
sd.CaseSensitive = false
|
||||
|
||||
leastSimilarIndex := -1
|
||||
leastSimilarScore := 2.0
|
||||
|
||||
for i := 0; i < len(arr); i++ {
|
||||
totalSimilarity := 0.0
|
||||
|
||||
for j := 0; j < len(arr); j++ {
|
||||
if i != j {
|
||||
score := sd.Compare(arr[i], arr[j])
|
||||
totalSimilarity += score
|
||||
}
|
||||
}
|
||||
|
||||
if totalSimilarity < leastSimilarScore {
|
||||
leastSimilarScore = totalSimilarity
|
||||
leastSimilarIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
if leastSimilarIndex != -1 {
|
||||
arr = append(arr[:leastSimilarIndex], arr[leastSimilarIndex+1:]...)
|
||||
}
|
||||
|
||||
return arr
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user