node build fixed

This commit is contained in:
ra_ma
2025-09-20 14:08:38 +01:00
parent c6ebbe069d
commit 3d298fa434
1516 changed files with 535727 additions and 2 deletions

View File

@@ -0,0 +1,157 @@
package comparison
import (
"regexp"
"strconv"
"strings"
)
func ValueContainsSeason(val string) bool {
val = strings.ToLower(val)
if strings.IndexRune(val, '第') != -1 {
return false
}
if ValueContainsSpecial(val) {
return false
}
if strings.Contains(val, "season") {
return true
}
re := regexp.MustCompile(`\d(st|nd|rd|th) [Ss].*`)
if re.MatchString(val) {
return true
}
return false
}
func ExtractSeasonNumber(val string) int {
val = strings.ToLower(val)
// Check for the word "season" followed by a number
re := regexp.MustCompile(`season (\d+)`)
matches := re.FindStringSubmatch(val)
if len(matches) > 1 {
season, err := strconv.Atoi(matches[1])
if err == nil {
return season
}
}
// Check for a number followed by "st", "nd", "rd", or "th", followed by "s" or "S"
re = regexp.MustCompile(`(\d+)(st|nd|rd|th) [sS]`)
matches = re.FindStringSubmatch(val)
if len(matches) > 1 {
season, err := strconv.Atoi(matches[1])
if err == nil {
return season
}
}
// No season number found
return -1
}
// ExtractResolutionInt extracts the resolution from a string and returns it as an integer.
// This is used for comparing resolutions.
// If the resolution is not found, it returns 0.
func ExtractResolutionInt(val string) int {
val = strings.ToLower(val)
if strings.Contains(strings.ToUpper(val), "4K") {
return 2160
}
if strings.Contains(val, "2160") {
return 2160
}
if strings.Contains(val, "1080") {
return 1080
}
if strings.Contains(val, "720") {
return 720
}
if strings.Contains(val, "540") {
return 540
}
if strings.Contains(val, "480") {
return 480
}
re := regexp.MustCompile(`^\d{3,4}([pP])$`)
matches := re.FindStringSubmatch(val)
if len(matches) > 1 {
res, err := strconv.Atoi(matches[1])
if err != nil {
return 0
}
return res
}
return 0
}
func ValueContainsSpecial(val string) bool {
regexes := []*regexp.Regexp{
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)(SP|OAV|OVA|OAD|ONA) ?(?P<ep>\d{1,2})(-(?P<ep2>[0-9]{1,3}))? ?(?P<title>.*)$`),
regexp.MustCompile(`(?i)[-._( ](OVA|ONA)[-._) ]`),
regexp.MustCompile(`(?i)[-._ ](S|SP)(?P<season>(0|00))([Ee]\d)`),
regexp.MustCompile(`[({\[]?(OVA|ONA|OAV|OAD|SP|SPECIAL)[])}]?`),
}
for _, regex := range regexes {
if regex.MatchString(val) {
return true
}
}
return false
}
func ValueContainsIgnoredKeywords(val string) bool {
regexes := []*regexp.Regexp{
regexp.MustCompile(`(?i)^\s?[({\[]?\s?(EXTRAS?|OVAS?|OTHERS?|SPECIALS|MOVIES|SEASONS|NC)\s?[])}]?\s?$`),
}
for _, regex := range regexes {
if regex.MatchString(val) {
return true
}
}
return false
}
func ValueContainsBatchKeywords(val string) bool {
regexes := []*regexp.Regexp{
regexp.MustCompile(`(?i)[({\[]?\s?(EXTRAS|OVAS|OTHERS|SPECIALS|MOVIES|SEASONS|BATCH|COMPLETE|COMPLETE SERIES)\s?[])}]?\s?`),
}
for _, regex := range regexes {
if regex.MatchString(val) {
return true
}
}
return false
}
func ValueContainsNC(val string) bool {
regexes := []*regexp.Regexp{
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(OP|NCOP|OPED)\b ?(?P<ep>\d{1,2}[a-z]?)? ?([ _.\-)]+(?P<title>.*))?`),
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(ED|NCED)\b ?(?P<ep>\d{1,2}[a-z]?)? ?([ _.\-)]+(?P<title>.*))?`),
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(TRAILER|PROMO|PV)\b ?(?P<ep>\d{1,2}) ?([ _.\-)]+(?P<title>.*))?`),
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(OTHERS?)\b(?P<ep>\d{1,2}) ?[ _.\-)]+(?P<title>.*)`),
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(CM|COMMERCIAL|AD)\b ?(?P<ep>\d{1,2}) ?([ _.\-)]+(?P<title>.*))?`),
regexp.MustCompile(`(?i)(^|(?P<show>.*?)[ _.\-(]+)\b(CREDITLESS|NCOP|NCED|OP|ED)\b ?(?P<ep>\d{1,2}[a-z]?)? ?([ _.\-)]+(?P<title>.*))?`),
}
for _, regex := range regexes {
if regex.MatchString(val) {
return true
}
}
return false
}

View File

@@ -0,0 +1,344 @@
package comparison
import (
"testing"
)
func TestValueContainsSeason(t *testing.T) {
tests := []struct {
name string
input string
expected bool
}{
{
name: "Contains 'season' in lowercase",
input: "JJK season 2",
expected: true,
},
{
name: "Contains 'season' in uppercase",
input: "JJK SEASON 2",
expected: true,
},
{
name: "Contains '2nd S' in lowercase",
input: "Spy x Family 2nd Season",
expected: true,
},
{
name: "Contains '2nd S' in uppercase",
input: "Spy x Family 2ND SEASON",
expected: true,
},
{
name: "Does not contain 'season' or '1st S'",
input: "This is a test",
expected: false,
},
{
name: "Contains special characters",
input: "JJK season 2 (OVA)",
expected: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
result := ValueContainsSeason(test.input)
if result != test.expected {
t.Errorf("ValueContainsSeason() with args %v, expected %v, but got %v.", test.input, test.expected, result)
}
})
}
}
func TestExtractSeasonNumber(t *testing.T) {
tests := []struct {
name string
input string
expected int
}{
{
name: "Contains 'season' followed by a number",
input: "JJK season 2",
expected: 2,
},
{
name: "Contains a number followed by 'st', 'nd', 'rd', or 'th', followed by 's' or 'S'",
input: "Spy x Family 2nd S",
expected: 2,
},
{
name: "Does not contain 'season' or '1st S'",
input: "This is a test",
expected: -1,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
result := ExtractSeasonNumber(test.input)
if result != test.expected {
t.Errorf("ExtractSeasonNumber() with args %v, expected %v, but got %v.", test.input, test.expected, result)
}
})
}
}
func TestExtractResolutionInt(t *testing.T) {
tests := []struct {
name string
input string
expected int
}{
{
name: "Contains '4K' in uppercase",
input: "4K",
expected: 2160,
},
{
name: "Contains '4k' in lowercase",
input: "4k",
expected: 2160,
},
{
name: "Contains '2160'",
input: "2160",
expected: 2160,
},
{
name: "Contains '1080'",
input: "1080",
expected: 1080,
},
{
name: "Contains '720'",
input: "720",
expected: 720,
},
{
name: "Contains '480'",
input: "480",
expected: 480,
},
{
name: "Does not contain a resolution",
input: "This is a test",
expected: 0,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
result := ExtractResolutionInt(test.input)
if result != test.expected {
t.Errorf("ExtractResolutionInt() with args %v, expected %v, but got %v.", test.input, test.expected, result)
}
})
}
}
func TestValueContainsSpecial(t *testing.T) {
tests := []struct {
name string
input string
expected bool
}{
{
name: "Contains 'OVA' in uppercase",
input: "JJK OVA",
expected: true,
},
{
name: "Contains 'ova' in lowercase",
input: "JJK ova",
expected: false,
},
{
name: "Does not contain special keywords",
input: "This is a test",
expected: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
result := ValueContainsSpecial(test.input)
if result != test.expected {
t.Errorf("ValueContainsSpecial() with args %v, expected %v, but got %v.", test.input, test.expected, result)
}
})
}
}
func TestValueContainsIgnoredKeywords(t *testing.T) {
tests := []struct {
name string
input string
expected bool
}{
{
name: "Contains 'EXTRAS' in uppercase",
input: "EXTRAS",
expected: true,
},
{
name: "Contains 'extras' in lowercase",
input: "extras",
expected: true,
},
{
name: "Does not contain ignored keywords",
input: "This is a test",
expected: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
result := ValueContainsIgnoredKeywords(test.input)
if result != test.expected {
t.Errorf("ValueContainsIgnoredKeywords() with args %v, expected %v, but got %v.", test.input, test.expected, result)
}
})
}
}
func TestValueContainsBatchKeywords(t *testing.T) {
tests := []struct {
name string
input string
expected bool
}{
{
name: "Contains 'BATCH' in uppercase",
input: "BATCH",
expected: true,
},
{
name: "Contains 'batch' in lowercase",
input: "batch",
expected: true,
},
{
name: "Does not contain batch keywords",
input: "This is a test",
expected: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
result := ValueContainsBatchKeywords(test.input)
if result != test.expected {
t.Errorf("ValueContainsBatchKeywords() with args %v, expected %v, but got %v.", test.input, test.expected, result)
}
})
}
}
func TestValueContainsNC(t *testing.T) {
tests := []struct {
input string
expected bool
}{
{
input: "NCOP",
expected: true,
},
{
input: "ncop",
expected: true,
},
{
input: "One Piece - 1000 - NCOP",
expected: true,
},
{
input: "One Piece ED 2",
expected: true,
},
{
input: "This is a test",
expected: false,
}, {
input: "This is a test",
expected: false,
},
{
input: "Himouto.Umaru.chan.S01E02.1080p.BluRay.Opus2.0.x265-smol",
expected: false,
},
{
input: "Himouto.Umaru.chan.S01E02.1080p.BluRay.x265-smol",
expected: false,
},
{
input: "One Piece - 1000 - Operation something something",
expected: false,
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
result := ValueContainsNC(test.input)
if result != test.expected {
t.Errorf("ValueContainsNC() with args %v, expected %v, but got %v.", test.input, test.expected, result)
}
})
}
}
//func TestLikelyNC(t *testing.T) {
// tests := []struct {
// name string
// input string
// expected bool
// }{
// {
// name: "Does not contain NC keywords 1",
// input: "Himouto.Umaru.chan.S01E02.1080p.BluRay.Opus2.0.x265-smol",
// expected: false,
// },
// {
// name: "Does not contain NC keywords 2",
// input: "Himouto.Umaru.chan.S01E02.1080p.BluRay.x265-smol",
// expected: false,
// },
// {
// name: "Contains NC keywords 1",
// input: "Himouto.Umaru.chan.S00E02.1080p.BluRay.x265-smol",
// expected: true,
// },
// {
// name: "Contains NC keywords 2",
// input: "Himouto.Umaru.chan.OP02.1080p.BluRay.x265-smol",
// expected: true,
// },
// }
//
// for _, test := range tests {
// t.Run(test.name, func(t *testing.T) {
// metadata := habari.Parse(test.input)
// var episode string
// var season string
//
// if len(metadata.SeasonNumber) > 0 {
// if len(metadata.SeasonNumber) == 1 {
// season = metadata.SeasonNumber[0]
// }
// }
//
// if len(metadata.EpisodeNumber) > 0 {
// if len(metadata.EpisodeNumber) == 1 {
// episode = metadata.EpisodeNumber[0]
// }
// }
//
// result := LikelyNC(test.input, season, episode)
// if result != test.expected {
// t.Errorf("ValueContainsNC() with args %v, expected %v, but got %v.", test.input, test.expected, result)
// }
// })
// }
//}

View File

@@ -0,0 +1,230 @@
// Package comparison contains helpers related to comparison, comparison and filtering of media titles.
package comparison
import (
"github.com/adrg/strutil/metrics"
)
// LevenshteinResult is a struct that holds a string and its Levenshtein distance compared to another string.
type LevenshteinResult struct {
OriginalValue *string
Value *string
Distance int
}
// CompareWithLevenshtein compares a string to a slice of strings and returns a slice of LevenshteinResult containing the Levenshtein distance for each string.
func CompareWithLevenshtein(v *string, vals []*string) []*LevenshteinResult {
return CompareWithLevenshteinCleanFunc(v, vals, func(val string) string {
return val
})
}
func CompareWithLevenshteinCleanFunc(v *string, vals []*string, cleanFunc func(val string) string) []*LevenshteinResult {
lev := metrics.NewLevenshtein()
lev.CaseSensitive = false
//lev.DeleteCost = 1
res := make([]*LevenshteinResult, len(vals))
for _, val := range vals {
res = append(res, &LevenshteinResult{
OriginalValue: v,
Value: val,
Distance: lev.Distance(cleanFunc(*v), cleanFunc(*val)),
})
}
return res
}
// FindBestMatchWithLevenshtein returns the best match from a slice of strings as a reference to a LevenshteinResult.
// It also returns a boolean indicating whether the best match was found.
func FindBestMatchWithLevenshtein(v *string, vals []*string) (*LevenshteinResult, bool) {
res := CompareWithLevenshtein(v, vals)
if len(res) == 0 {
return nil, false
}
var bestResult *LevenshteinResult
for _, result := range res {
if bestResult == nil || result.Distance < bestResult.Distance {
bestResult = result
}
}
return bestResult, true
}
//----------------------------------------------------------------------------------------------------------------------
// JaroWinklerResult is a struct that holds a string and its JaroWinkler distance compared to another string.
type JaroWinklerResult struct {
OriginalValue *string
Value *string
Rating float64
}
// CompareWithJaroWinkler compares a string to a slice of strings and returns a slice of JaroWinklerResult containing the JaroWinkler distance for each string.
func CompareWithJaroWinkler(v *string, vals []*string) []*JaroWinklerResult {
jw := metrics.NewJaroWinkler()
jw.CaseSensitive = false
res := make([]*JaroWinklerResult, len(vals))
for _, val := range vals {
res = append(res, &JaroWinklerResult{
OriginalValue: v,
Value: val,
Rating: jw.Compare(*v, *val),
})
}
return res
}
// FindBestMatchWithJaroWinkler returns the best match from a slice of strings as a reference to a JaroWinklerResult.
// It also returns a boolean indicating whether the best match was found.
func FindBestMatchWithJaroWinkler(v *string, vals []*string) (*JaroWinklerResult, bool) {
res := CompareWithJaroWinkler(v, vals)
if len(res) == 0 {
return nil, false
}
var bestResult *JaroWinklerResult
for _, result := range res {
if bestResult == nil || result.Rating > bestResult.Rating {
bestResult = result
}
}
return bestResult, true
}
//----------------------------------------------------------------------------------------------------------------------
// JaccardResult is a struct that holds a string and its Jaccard distance compared to another string.
type JaccardResult struct {
OriginalValue *string
Value *string
Rating float64
}
// CompareWithJaccard compares a string to a slice of strings and returns a slice of JaccardResult containing the Jaccard distance for each string.
func CompareWithJaccard(v *string, vals []*string) []*JaccardResult {
jw := metrics.NewJaccard()
jw.CaseSensitive = false
jw.NgramSize = 1
res := make([]*JaccardResult, len(vals))
for _, val := range vals {
res = append(res, &JaccardResult{
OriginalValue: v,
Value: val,
Rating: jw.Compare(*v, *val),
})
}
return res
}
// FindBestMatchWithJaccard returns the best match from a slice of strings as a reference to a JaccardResult.
// It also returns a boolean indicating whether the best match was found.
func FindBestMatchWithJaccard(v *string, vals []*string) (*JaccardResult, bool) {
res := CompareWithJaccard(v, vals)
if len(res) == 0 {
return nil, false
}
var bestResult *JaccardResult
for _, result := range res {
if bestResult == nil || result.Rating > bestResult.Rating {
bestResult = result
}
}
return bestResult, true
}
//----------------------------------------------------------------------------------------------------------------------
type SorensenDiceResult struct {
OriginalValue *string
Value *string
Rating float64
}
func CompareWithSorensenDice(v *string, vals []*string) []*SorensenDiceResult {
dice := metrics.NewSorensenDice()
dice.CaseSensitive = false
res := make([]*SorensenDiceResult, len(vals))
for _, val := range vals {
res = append(res, &SorensenDiceResult{
OriginalValue: v,
Value: val,
Rating: dice.Compare(*v, *val),
})
}
return res
}
func FindBestMatchWithSorensenDice(v *string, vals []*string) (*SorensenDiceResult, bool) {
res := CompareWithSorensenDice(v, vals)
if len(res) == 0 {
return nil, false
}
var bestResult *SorensenDiceResult
for _, result := range res {
if bestResult == nil || result.Rating > bestResult.Rating {
bestResult = result
}
}
return bestResult, true
}
func EliminateLeastSimilarValue(arr []string) []string {
if len(arr) < 3 {
return arr
}
sd := metrics.NewSorensenDice()
sd.CaseSensitive = false
leastSimilarIndex := -1
leastSimilarScore := 2.0
for i := 0; i < len(arr); i++ {
totalSimilarity := 0.0
for j := 0; j < len(arr); j++ {
if i != j {
score := sd.Compare(arr[i], arr[j])
totalSimilarity += score
}
}
if totalSimilarity < leastSimilarScore {
leastSimilarScore = totalSimilarity
leastSimilarIndex = i
}
}
if leastSimilarIndex != -1 {
arr = append(arr[:leastSimilarIndex], arr[leastSimilarIndex+1:]...)
}
return arr
}

View File

@@ -0,0 +1,114 @@
package comparison
import (
"github.com/samber/lo"
"github.com/stretchr/testify/assert"
"testing"
)
func TestFindBestMatchWithLevenstein(t *testing.T) {
tests := []struct {
title string
comparisonTitles []string
expectedResult string
expectedDistance int
}{
{
title: "jujutsu kaisen 2",
comparisonTitles: []string{"JJK", "Jujutsu Kaisen", "Jujutsu Kaisen 2"},
expectedResult: "Jujutsu Kaisen 2",
expectedDistance: 0,
},
}
for _, test := range tests {
t.Run(test.title, func(t *testing.T) {
res, ok := FindBestMatchWithLevenshtein(&test.title, lo.ToSlicePtr(test.comparisonTitles))
if assert.True(t, ok) {
assert.Equal(t, test.expectedResult, *res.Value, "expected result does not match")
assert.Equal(t, test.expectedDistance, res.Distance, "expected distance does not match")
t.Logf("value: %s, distance: %d", *res.Value, res.Distance)
}
})
}
}
func TestFindBestMatchWithDice(t *testing.T) {
tests := []struct {
title string
comparisonTitles []string
expectedResult string
expectedRating float64
}{
{
title: "jujutsu kaisen 2",
comparisonTitles: []string{"JJK", "Jujutsu Kaisen", "Jujutsu Kaisen 2"},
expectedResult: "Jujutsu Kaisen 2",
expectedRating: 1,
},
}
for _, test := range tests {
t.Run(test.title, func(t *testing.T) {
res, ok := FindBestMatchWithSorensenDice(&test.title, lo.ToSlicePtr(test.comparisonTitles))
if assert.True(t, ok, "expected result, got nil") {
assert.Equal(t, test.expectedResult, *res.Value, "expected result does not match")
assert.Equal(t, test.expectedRating, res.Rating, "expected rating does not match")
t.Logf("value: %s, rating: %f", *res.Value, res.Rating)
}
})
}
}
func TestEliminateLestSimilarValue(t *testing.T) {
tests := []struct {
title string
comparisonTitles []string
expectedEliminated string
}{
{
title: "jujutsu kaisen 2",
comparisonTitles: []string{"JJK", "Jujutsu Kaisen", "Jujutsu Kaisen 2"},
expectedEliminated: "JJK",
},
{
title: "One Piece - Film Z",
comparisonTitles: []string{"One Piece - Film Z", "One Piece Film Z", "One Piece Gold"},
expectedEliminated: "One Piece Gold",
},
{
title: "One Piece - Film Z",
comparisonTitles: []string{"One Piece - Film Z", "One Piece Film Z", "One Piece Z"},
expectedEliminated: "One Piece Z",
},
{
title: "Mononogatari",
comparisonTitles: []string{"Mononogatari", "Mononogatari Cour 2", "Nekomonogatari"},
expectedEliminated: "Nekomonogatari",
},
}
for _, test := range tests {
t.Run(test.title, func(t *testing.T) {
res := EliminateLeastSimilarValue(test.comparisonTitles)
for _, n := range res {
if n == test.expectedEliminated {
t.Fatalf("expected \"%s\" to be eliminated from %v", n, res)
}
}
})
}
}