node build fixed
This commit is contained in:
157
seanime-2.9.10/internal/util/comparison/filtering.go
Normal file
157
seanime-2.9.10/internal/util/comparison/filtering.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package comparison
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ValueContainsSeason(val string) bool {
|
||||
val = strings.ToLower(val)
|
||||
|
||||
if strings.IndexRune(val, '第') != -1 {
|
||||
return false
|
||||
}
|
||||
if ValueContainsSpecial(val) {
|
||||
return false
|
||||
}
|
||||
|
||||
if strings.Contains(val, "season") {
|
||||
return true
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(`\d(st|nd|rd|th) [Ss].*`)
|
||||
if re.MatchString(val) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func ExtractSeasonNumber(val string) int {
|
||||
val = strings.ToLower(val)
|
||||
|
||||
// Check for the word "season" followed by a number
|
||||
re := regexp.MustCompile(`season (\d+)`)
|
||||
matches := re.FindStringSubmatch(val)
|
||||
if len(matches) > 1 {
|
||||
season, err := strconv.Atoi(matches[1])
|
||||
if err == nil {
|
||||
return season
|
||||
}
|
||||
}
|
||||
|
||||
// Check for a number followed by "st", "nd", "rd", or "th", followed by "s" or "S"
|
||||
re = regexp.MustCompile(`(\d+)(st|nd|rd|th) [sS]`)
|
||||
matches = re.FindStringSubmatch(val)
|
||||
if len(matches) > 1 {
|
||||
season, err := strconv.Atoi(matches[1])
|
||||
if err == nil {
|
||||
return season
|
||||
}
|
||||
}
|
||||
|
||||
// No season number found
|
||||
return -1
|
||||
}
|
||||
|
||||
// ExtractResolutionInt extracts the resolution from a string and returns it as an integer.
|
||||
// This is used for comparing resolutions.
|
||||
// If the resolution is not found, it returns 0.
|
||||
func ExtractResolutionInt(val string) int {
|
||||
val = strings.ToLower(val)
|
||||
|
||||
if strings.Contains(strings.ToUpper(val), "4K") {
|
||||
return 2160
|
||||
}
|
||||
if strings.Contains(val, "2160") {
|
||||
return 2160
|
||||
}
|
||||
if strings.Contains(val, "1080") {
|
||||
return 1080
|
||||
}
|
||||
if strings.Contains(val, "720") {
|
||||
return 720
|
||||
}
|
||||
if strings.Contains(val, "540") {
|
||||
return 540
|
||||
}
|
||||
if strings.Contains(val, "480") {
|
||||
return 480
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(`^\d{3,4}([pP])$`)
|
||||
matches := re.FindStringSubmatch(val)
|
||||
if len(matches) > 1 {
|
||||
res, err := strconv.Atoi(matches[1])
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func ValueContainsSpecial(val string) bool {
|
||||
regexes := []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)(SP|OAV|OVA|OAD|ONA) ?(?P<ep>\d{1,2})(-(?P<ep2>[0-9]{1,3}))? ?(?P<title>.*)$`),
|
||||
regexp.MustCompile(`(?i)[-._( ](OVA|ONA)[-._) ]`),
|
||||
regexp.MustCompile(`(?i)[-._ ](S|SP)(?P<season>(0|00))([Ee]\d)`),
|
||||
regexp.MustCompile(`[({\[]?(OVA|ONA|OAV|OAD|SP|SPECIAL)[])}]?`),
|
||||
}
|
||||
|
||||
for _, regex := range regexes {
|
||||
if regex.MatchString(val) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func ValueContainsIgnoredKeywords(val string) bool {
|
||||
regexes := []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)^\s?[({\[]?\s?(EXTRAS?|OVAS?|OTHERS?|SPECIALS|MOVIES|SEASONS|NC)\s?[])}]?\s?$`),
|
||||
}
|
||||
|
||||
for _, regex := range regexes {
|
||||
if regex.MatchString(val) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
func ValueContainsBatchKeywords(val string) bool {
|
||||
regexes := []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)[({\[]?\s?(EXTRAS|OVAS|OTHERS|SPECIALS|MOVIES|SEASONS|BATCH|COMPLETE|COMPLETE SERIES)\s?[])}]?\s?`),
|
||||
}
|
||||
|
||||
for _, regex := range regexes {
|
||||
if regex.MatchString(val) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func ValueContainsNC(val string) bool {
|
||||
regexes := []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(OP|NCOP|OPED)\b ?(?P<ep>\d{1,2}[a-z]?)? ?([ _.\-)]+(?P<title>.*))?`),
|
||||
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(ED|NCED)\b ?(?P<ep>\d{1,2}[a-z]?)? ?([ _.\-)]+(?P<title>.*))?`),
|
||||
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(TRAILER|PROMO|PV)\b ?(?P<ep>\d{1,2}) ?([ _.\-)]+(?P<title>.*))?`),
|
||||
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(OTHERS?)\b(?P<ep>\d{1,2}) ?[ _.\-)]+(?P<title>.*)`),
|
||||
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(CM|COMMERCIAL|AD)\b ?(?P<ep>\d{1,2}) ?([ _.\-)]+(?P<title>.*))?`),
|
||||
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(CREDITLESS|NCOP|NCED|OP|ED)\b ?(?P<ep>\d{1,2}[a-z]?)? ?([ _.\-)]+(?P<title>.*))?`),
|
||||
}
|
||||
|
||||
for _, regex := range regexes {
|
||||
if regex.MatchString(val) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
344
seanime-2.9.10/internal/util/comparison/filtering_test.go
Normal file
344
seanime-2.9.10/internal/util/comparison/filtering_test.go
Normal file
@@ -0,0 +1,344 @@
|
||||
package comparison
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestValueContainsSeason(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "Contains 'season' in lowercase",
|
||||
input: "JJK season 2",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Contains 'season' in uppercase",
|
||||
input: "JJK SEASON 2",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Contains '2nd S' in lowercase",
|
||||
input: "Spy x Family 2nd Season",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Contains '2nd S' in uppercase",
|
||||
input: "Spy x Family 2ND SEASON",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Does not contain 'season' or '1st S'",
|
||||
input: "This is a test",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "Contains special characters",
|
||||
input: "JJK season 2 (OVA)",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := ValueContainsSeason(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("ValueContainsSeason() with args %v, expected %v, but got %v.", test.input, test.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractSeasonNumber(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected int
|
||||
}{
|
||||
{
|
||||
name: "Contains 'season' followed by a number",
|
||||
input: "JJK season 2",
|
||||
expected: 2,
|
||||
},
|
||||
{
|
||||
name: "Contains a number followed by 'st', 'nd', 'rd', or 'th', followed by 's' or 'S'",
|
||||
input: "Spy x Family 2nd S",
|
||||
expected: 2,
|
||||
},
|
||||
{
|
||||
name: "Does not contain 'season' or '1st S'",
|
||||
input: "This is a test",
|
||||
expected: -1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := ExtractSeasonNumber(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("ExtractSeasonNumber() with args %v, expected %v, but got %v.", test.input, test.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractResolutionInt(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected int
|
||||
}{
|
||||
{
|
||||
name: "Contains '4K' in uppercase",
|
||||
input: "4K",
|
||||
expected: 2160,
|
||||
},
|
||||
{
|
||||
name: "Contains '4k' in lowercase",
|
||||
input: "4k",
|
||||
expected: 2160,
|
||||
},
|
||||
{
|
||||
name: "Contains '2160'",
|
||||
input: "2160",
|
||||
expected: 2160,
|
||||
},
|
||||
{
|
||||
name: "Contains '1080'",
|
||||
input: "1080",
|
||||
expected: 1080,
|
||||
},
|
||||
{
|
||||
name: "Contains '720'",
|
||||
input: "720",
|
||||
expected: 720,
|
||||
},
|
||||
{
|
||||
name: "Contains '480'",
|
||||
input: "480",
|
||||
expected: 480,
|
||||
},
|
||||
{
|
||||
name: "Does not contain a resolution",
|
||||
input: "This is a test",
|
||||
expected: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := ExtractResolutionInt(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("ExtractResolutionInt() with args %v, expected %v, but got %v.", test.input, test.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValueContainsSpecial(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "Contains 'OVA' in uppercase",
|
||||
input: "JJK OVA",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Contains 'ova' in lowercase",
|
||||
input: "JJK ova",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "Does not contain special keywords",
|
||||
input: "This is a test",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := ValueContainsSpecial(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("ValueContainsSpecial() with args %v, expected %v, but got %v.", test.input, test.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValueContainsIgnoredKeywords(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "Contains 'EXTRAS' in uppercase",
|
||||
input: "EXTRAS",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Contains 'extras' in lowercase",
|
||||
input: "extras",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Does not contain ignored keywords",
|
||||
input: "This is a test",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := ValueContainsIgnoredKeywords(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("ValueContainsIgnoredKeywords() with args %v, expected %v, but got %v.", test.input, test.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValueContainsBatchKeywords(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "Contains 'BATCH' in uppercase",
|
||||
input: "BATCH",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Contains 'batch' in lowercase",
|
||||
input: "batch",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Does not contain batch keywords",
|
||||
input: "This is a test",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := ValueContainsBatchKeywords(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("ValueContainsBatchKeywords() with args %v, expected %v, but got %v.", test.input, test.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValueContainsNC(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
input: "NCOP",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
input: "ncop",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
input: "One Piece - 1000 - NCOP",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
input: "One Piece ED 2",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
input: "This is a test",
|
||||
expected: false,
|
||||
}, {
|
||||
input: "This is a test",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
input: "Himouto.Umaru.chan.S01E02.1080p.BluRay.Opus2.0.x265-smol",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
input: "Himouto.Umaru.chan.S01E02.1080p.BluRay.x265-smol",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
input: "One Piece - 1000 - Operation something something",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
result := ValueContainsNC(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("ValueContainsNC() with args %v, expected %v, but got %v.", test.input, test.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
//func TestLikelyNC(t *testing.T) {
|
||||
// tests := []struct {
|
||||
// name string
|
||||
// input string
|
||||
// expected bool
|
||||
// }{
|
||||
// {
|
||||
// name: "Does not contain NC keywords 1",
|
||||
// input: "Himouto.Umaru.chan.S01E02.1080p.BluRay.Opus2.0.x265-smol",
|
||||
// expected: false,
|
||||
// },
|
||||
// {
|
||||
// name: "Does not contain NC keywords 2",
|
||||
// input: "Himouto.Umaru.chan.S01E02.1080p.BluRay.x265-smol",
|
||||
// expected: false,
|
||||
// },
|
||||
// {
|
||||
// name: "Contains NC keywords 1",
|
||||
// input: "Himouto.Umaru.chan.S00E02.1080p.BluRay.x265-smol",
|
||||
// expected: true,
|
||||
// },
|
||||
// {
|
||||
// name: "Contains NC keywords 2",
|
||||
// input: "Himouto.Umaru.chan.OP02.1080p.BluRay.x265-smol",
|
||||
// expected: true,
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// for _, test := range tests {
|
||||
// t.Run(test.name, func(t *testing.T) {
|
||||
// metadata := habari.Parse(test.input)
|
||||
// var episode string
|
||||
// var season string
|
||||
//
|
||||
// if len(metadata.SeasonNumber) > 0 {
|
||||
// if len(metadata.SeasonNumber) == 1 {
|
||||
// season = metadata.SeasonNumber[0]
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if len(metadata.EpisodeNumber) > 0 {
|
||||
// if len(metadata.EpisodeNumber) == 1 {
|
||||
// episode = metadata.EpisodeNumber[0]
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// result := LikelyNC(test.input, season, episode)
|
||||
// if result != test.expected {
|
||||
// t.Errorf("ValueContainsNC() with args %v, expected %v, but got %v.", test.input, test.expected, result)
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
//}
|
||||
230
seanime-2.9.10/internal/util/comparison/matching.go
Normal file
230
seanime-2.9.10/internal/util/comparison/matching.go
Normal file
@@ -0,0 +1,230 @@
|
||||
// Package comparison contains helpers related to comparison, comparison and filtering of media titles.
|
||||
package comparison
|
||||
|
||||
import (
|
||||
"github.com/adrg/strutil/metrics"
|
||||
)
|
||||
|
||||
// LevenshteinResult is a struct that holds a string and its Levenshtein distance compared to another string.
|
||||
type LevenshteinResult struct {
|
||||
OriginalValue *string
|
||||
Value *string
|
||||
Distance int
|
||||
}
|
||||
|
||||
// CompareWithLevenshtein compares a string to a slice of strings and returns a slice of LevenshteinResult containing the Levenshtein distance for each string.
|
||||
func CompareWithLevenshtein(v *string, vals []*string) []*LevenshteinResult {
|
||||
return CompareWithLevenshteinCleanFunc(v, vals, func(val string) string {
|
||||
return val
|
||||
})
|
||||
}
|
||||
func CompareWithLevenshteinCleanFunc(v *string, vals []*string, cleanFunc func(val string) string) []*LevenshteinResult {
|
||||
|
||||
lev := metrics.NewLevenshtein()
|
||||
lev.CaseSensitive = false
|
||||
//lev.DeleteCost = 1
|
||||
|
||||
res := make([]*LevenshteinResult, len(vals))
|
||||
|
||||
for _, val := range vals {
|
||||
res = append(res, &LevenshteinResult{
|
||||
OriginalValue: v,
|
||||
Value: val,
|
||||
Distance: lev.Distance(cleanFunc(*v), cleanFunc(*val)),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// FindBestMatchWithLevenshtein returns the best match from a slice of strings as a reference to a LevenshteinResult.
|
||||
// It also returns a boolean indicating whether the best match was found.
|
||||
func FindBestMatchWithLevenshtein(v *string, vals []*string) (*LevenshteinResult, bool) {
|
||||
res := CompareWithLevenshtein(v, vals)
|
||||
|
||||
if len(res) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var bestResult *LevenshteinResult
|
||||
for _, result := range res {
|
||||
if bestResult == nil || result.Distance < bestResult.Distance {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult, true
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
// JaroWinklerResult is a struct that holds a string and its JaroWinkler distance compared to another string.
|
||||
type JaroWinklerResult struct {
|
||||
OriginalValue *string
|
||||
Value *string
|
||||
Rating float64
|
||||
}
|
||||
|
||||
// CompareWithJaroWinkler compares a string to a slice of strings and returns a slice of JaroWinklerResult containing the JaroWinkler distance for each string.
|
||||
func CompareWithJaroWinkler(v *string, vals []*string) []*JaroWinklerResult {
|
||||
|
||||
jw := metrics.NewJaroWinkler()
|
||||
jw.CaseSensitive = false
|
||||
|
||||
res := make([]*JaroWinklerResult, len(vals))
|
||||
|
||||
for _, val := range vals {
|
||||
res = append(res, &JaroWinklerResult{
|
||||
OriginalValue: v,
|
||||
Value: val,
|
||||
Rating: jw.Compare(*v, *val),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// FindBestMatchWithJaroWinkler returns the best match from a slice of strings as a reference to a JaroWinklerResult.
|
||||
// It also returns a boolean indicating whether the best match was found.
|
||||
func FindBestMatchWithJaroWinkler(v *string, vals []*string) (*JaroWinklerResult, bool) {
|
||||
res := CompareWithJaroWinkler(v, vals)
|
||||
|
||||
if len(res) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var bestResult *JaroWinklerResult
|
||||
for _, result := range res {
|
||||
if bestResult == nil || result.Rating > bestResult.Rating {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult, true
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
// JaccardResult is a struct that holds a string and its Jaccard distance compared to another string.
|
||||
type JaccardResult struct {
|
||||
OriginalValue *string
|
||||
Value *string
|
||||
Rating float64
|
||||
}
|
||||
|
||||
// CompareWithJaccard compares a string to a slice of strings and returns a slice of JaccardResult containing the Jaccard distance for each string.
|
||||
func CompareWithJaccard(v *string, vals []*string) []*JaccardResult {
|
||||
|
||||
jw := metrics.NewJaccard()
|
||||
jw.CaseSensitive = false
|
||||
jw.NgramSize = 1
|
||||
|
||||
res := make([]*JaccardResult, len(vals))
|
||||
|
||||
for _, val := range vals {
|
||||
res = append(res, &JaccardResult{
|
||||
OriginalValue: v,
|
||||
Value: val,
|
||||
Rating: jw.Compare(*v, *val),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// FindBestMatchWithJaccard returns the best match from a slice of strings as a reference to a JaccardResult.
|
||||
// It also returns a boolean indicating whether the best match was found.
|
||||
func FindBestMatchWithJaccard(v *string, vals []*string) (*JaccardResult, bool) {
|
||||
res := CompareWithJaccard(v, vals)
|
||||
|
||||
if len(res) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var bestResult *JaccardResult
|
||||
for _, result := range res {
|
||||
if bestResult == nil || result.Rating > bestResult.Rating {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult, true
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
type SorensenDiceResult struct {
|
||||
OriginalValue *string
|
||||
Value *string
|
||||
Rating float64
|
||||
}
|
||||
|
||||
func CompareWithSorensenDice(v *string, vals []*string) []*SorensenDiceResult {
|
||||
|
||||
dice := metrics.NewSorensenDice()
|
||||
dice.CaseSensitive = false
|
||||
|
||||
res := make([]*SorensenDiceResult, len(vals))
|
||||
|
||||
for _, val := range vals {
|
||||
res = append(res, &SorensenDiceResult{
|
||||
OriginalValue: v,
|
||||
Value: val,
|
||||
Rating: dice.Compare(*v, *val),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func FindBestMatchWithSorensenDice(v *string, vals []*string) (*SorensenDiceResult, bool) {
|
||||
res := CompareWithSorensenDice(v, vals)
|
||||
|
||||
if len(res) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var bestResult *SorensenDiceResult
|
||||
for _, result := range res {
|
||||
if bestResult == nil || result.Rating > bestResult.Rating {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult, true
|
||||
}
|
||||
|
||||
func EliminateLeastSimilarValue(arr []string) []string {
|
||||
if len(arr) < 3 {
|
||||
return arr
|
||||
}
|
||||
|
||||
sd := metrics.NewSorensenDice()
|
||||
sd.CaseSensitive = false
|
||||
|
||||
leastSimilarIndex := -1
|
||||
leastSimilarScore := 2.0
|
||||
|
||||
for i := 0; i < len(arr); i++ {
|
||||
totalSimilarity := 0.0
|
||||
|
||||
for j := 0; j < len(arr); j++ {
|
||||
if i != j {
|
||||
score := sd.Compare(arr[i], arr[j])
|
||||
totalSimilarity += score
|
||||
}
|
||||
}
|
||||
|
||||
if totalSimilarity < leastSimilarScore {
|
||||
leastSimilarScore = totalSimilarity
|
||||
leastSimilarIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
if leastSimilarIndex != -1 {
|
||||
arr = append(arr[:leastSimilarIndex], arr[leastSimilarIndex+1:]...)
|
||||
}
|
||||
|
||||
return arr
|
||||
|
||||
}
|
||||
114
seanime-2.9.10/internal/util/comparison/matching_test.go
Normal file
114
seanime-2.9.10/internal/util/comparison/matching_test.go
Normal file
@@ -0,0 +1,114 @@
|
||||
package comparison
|
||||
|
||||
import (
|
||||
"github.com/samber/lo"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFindBestMatchWithLevenstein(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
title string
|
||||
comparisonTitles []string
|
||||
expectedResult string
|
||||
expectedDistance int
|
||||
}{
|
||||
{
|
||||
title: "jujutsu kaisen 2",
|
||||
comparisonTitles: []string{"JJK", "Jujutsu Kaisen", "Jujutsu Kaisen 2"},
|
||||
expectedResult: "Jujutsu Kaisen 2",
|
||||
expectedDistance: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
t.Run(test.title, func(t *testing.T) {
|
||||
res, ok := FindBestMatchWithLevenshtein(&test.title, lo.ToSlicePtr(test.comparisonTitles))
|
||||
|
||||
if assert.True(t, ok) {
|
||||
assert.Equal(t, test.expectedResult, *res.Value, "expected result does not match")
|
||||
assert.Equal(t, test.expectedDistance, res.Distance, "expected distance does not match")
|
||||
t.Logf("value: %s, distance: %d", *res.Value, res.Distance)
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
func TestFindBestMatchWithDice(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
title string
|
||||
comparisonTitles []string
|
||||
expectedResult string
|
||||
expectedRating float64
|
||||
}{
|
||||
{
|
||||
title: "jujutsu kaisen 2",
|
||||
comparisonTitles: []string{"JJK", "Jujutsu Kaisen", "Jujutsu Kaisen 2"},
|
||||
expectedResult: "Jujutsu Kaisen 2",
|
||||
expectedRating: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
t.Run(test.title, func(t *testing.T) {
|
||||
res, ok := FindBestMatchWithSorensenDice(&test.title, lo.ToSlicePtr(test.comparisonTitles))
|
||||
|
||||
if assert.True(t, ok, "expected result, got nil") {
|
||||
assert.Equal(t, test.expectedResult, *res.Value, "expected result does not match")
|
||||
assert.Equal(t, test.expectedRating, res.Rating, "expected rating does not match")
|
||||
t.Logf("value: %s, rating: %f", *res.Value, res.Rating)
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestEliminateLestSimilarValue(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
title string
|
||||
comparisonTitles []string
|
||||
expectedEliminated string
|
||||
}{
|
||||
{
|
||||
title: "jujutsu kaisen 2",
|
||||
comparisonTitles: []string{"JJK", "Jujutsu Kaisen", "Jujutsu Kaisen 2"},
|
||||
expectedEliminated: "JJK",
|
||||
},
|
||||
{
|
||||
title: "One Piece - Film Z",
|
||||
comparisonTitles: []string{"One Piece - Film Z", "One Piece Film Z", "One Piece Gold"},
|
||||
expectedEliminated: "One Piece Gold",
|
||||
},
|
||||
{
|
||||
title: "One Piece - Film Z",
|
||||
comparisonTitles: []string{"One Piece - Film Z", "One Piece Film Z", "One Piece Z"},
|
||||
expectedEliminated: "One Piece Z",
|
||||
},
|
||||
{
|
||||
title: "Mononogatari",
|
||||
comparisonTitles: []string{"Mononogatari", "Mononogatari Cour 2", "Nekomonogatari"},
|
||||
expectedEliminated: "Nekomonogatari",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.title, func(t *testing.T) {
|
||||
res := EliminateLeastSimilarValue(test.comparisonTitles)
|
||||
for _, n := range res {
|
||||
if n == test.expectedEliminated {
|
||||
t.Fatalf("expected \"%s\" to be eliminated from %v", n, res)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user