node build fixed
This commit is contained in:
445
seanime-2.9.10/internal/manga/downloader/chapter_downloader.go
Normal file
445
seanime-2.9.10/internal/manga/downloader/chapter_downloader.go
Normal file
@@ -0,0 +1,445 @@
|
||||
package chapter_downloader
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"image"
|
||||
_ "image/gif"
|
||||
_ "image/jpeg"
|
||||
_ "image/png"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"seanime/internal/database/db"
|
||||
"seanime/internal/events"
|
||||
hibikemanga "seanime/internal/extension/hibike/manga"
|
||||
manga_providers "seanime/internal/manga/providers"
|
||||
"seanime/internal/util"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/goccy/go-json"
|
||||
"github.com/rs/zerolog"
|
||||
_ "golang.org/x/image/bmp" // Register BMP format
|
||||
_ "golang.org/x/image/tiff" // Register Tiff format
|
||||
)
|
||||
|
||||
// 📁 cache/manga
|
||||
// └── 📁 {provider}_{mediaId}_{chapterId}_{chapterNumber} <- Downloader generates
|
||||
// ├── 📄 registry.json <- Contains Registry
|
||||
// ├── 📄 1.jpg
|
||||
// ├── 📄 2.jpg
|
||||
// └── 📄 ...
|
||||
//
|
||||
|
||||
type (
|
||||
// Downloader is used to download chapters from various manga providers.
|
||||
Downloader struct {
|
||||
logger *zerolog.Logger
|
||||
wsEventManager events.WSEventManagerInterface
|
||||
database *db.Database
|
||||
downloadDir string
|
||||
mu sync.Mutex
|
||||
downloadMu sync.Mutex
|
||||
// cancelChannel is used to cancel some or all downloads.
|
||||
cancelChannels map[DownloadID]chan struct{}
|
||||
queue *Queue
|
||||
cancelCh chan struct{} // Close to cancel the download process
|
||||
runCh chan *QueueInfo // Receives a signal to download the next item
|
||||
chapterDownloadedCh chan DownloadID // Sends a signal when a chapter has been downloaded
|
||||
}
|
||||
|
||||
//+-------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
DownloadID struct {
|
||||
Provider string `json:"provider"`
|
||||
MediaId int `json:"mediaId"`
|
||||
ChapterId string `json:"chapterId"`
|
||||
ChapterNumber string `json:"chapterNumber"`
|
||||
}
|
||||
|
||||
//+-------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
// Registry stored in 📄 registry.json for each chapter download.
|
||||
Registry map[int]PageInfo
|
||||
|
||||
PageInfo struct {
|
||||
Index int `json:"index"`
|
||||
Filename string `json:"filename"`
|
||||
OriginalURL string `json:"original_url"`
|
||||
Size int64 `json:"size"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
NewDownloaderOptions struct {
|
||||
Logger *zerolog.Logger
|
||||
WSEventManager events.WSEventManagerInterface
|
||||
DownloadDir string
|
||||
Database *db.Database
|
||||
}
|
||||
|
||||
DownloadOptions struct {
|
||||
DownloadID
|
||||
Pages []*hibikemanga.ChapterPage
|
||||
StartNow bool
|
||||
}
|
||||
)
|
||||
|
||||
func NewDownloader(opts *NewDownloaderOptions) *Downloader {
|
||||
runCh := make(chan *QueueInfo, 1)
|
||||
|
||||
d := &Downloader{
|
||||
logger: opts.Logger,
|
||||
wsEventManager: opts.WSEventManager,
|
||||
downloadDir: opts.DownloadDir,
|
||||
cancelChannels: make(map[DownloadID]chan struct{}),
|
||||
runCh: runCh,
|
||||
queue: NewQueue(opts.Database, opts.Logger, opts.WSEventManager, runCh),
|
||||
chapterDownloadedCh: make(chan DownloadID, 100),
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
|
||||
// Start spins up a goroutine that will listen to queue events.
|
||||
func (cd *Downloader) Start() {
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
// Listen for new queue items
|
||||
case queueInfo := <-cd.runCh:
|
||||
cd.logger.Debug().Msgf("chapter downloader: Received queue item to download: %s", queueInfo.ChapterId)
|
||||
cd.run(queueInfo)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (cd *Downloader) ChapterDownloaded() <-chan DownloadID {
|
||||
return cd.chapterDownloadedCh
|
||||
}
|
||||
|
||||
// AddToQueue adds a chapter to the download queue.
|
||||
// If the chapter is already downloaded (i.e. a folder already exists), it will delete the previous data and re-download it.
|
||||
func (cd *Downloader) AddToQueue(opts DownloadOptions) error {
|
||||
cd.mu.Lock()
|
||||
defer cd.mu.Unlock()
|
||||
|
||||
downloadId := opts.DownloadID
|
||||
|
||||
// Check if chapter is already downloaded
|
||||
registryPath := cd.getChapterRegistryPath(downloadId)
|
||||
if _, err := os.Stat(registryPath); err == nil {
|
||||
cd.logger.Warn().Msg("chapter downloader: directory already exists, deleting")
|
||||
// Delete folder
|
||||
_ = os.RemoveAll(cd.getChapterDownloadDir(downloadId))
|
||||
}
|
||||
|
||||
// Start download
|
||||
cd.logger.Debug().Msgf("chapter downloader: Adding chapter to download queue: %s", opts.ChapterId)
|
||||
// Add to queue
|
||||
return cd.queue.Add(downloadId, opts.Pages, opts.StartNow)
|
||||
}
|
||||
|
||||
// DeleteChapter deletes a chapter directory from the download directory.
|
||||
func (cd *Downloader) DeleteChapter(id DownloadID) error {
|
||||
cd.mu.Lock()
|
||||
defer cd.mu.Unlock()
|
||||
|
||||
cd.logger.Debug().Msgf("chapter downloader: Deleting chapter %s", id.ChapterId)
|
||||
|
||||
_ = os.RemoveAll(cd.getChapterDownloadDir(id))
|
||||
cd.logger.Debug().Msgf("chapter downloader: Removed chapter %s", id.ChapterId)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Run starts the downloader if it's not already running.
|
||||
func (cd *Downloader) Run() {
|
||||
cd.mu.Lock()
|
||||
defer cd.mu.Unlock()
|
||||
|
||||
cd.logger.Debug().Msg("chapter downloader: Starting queue")
|
||||
|
||||
cd.cancelCh = make(chan struct{})
|
||||
|
||||
cd.queue.Run()
|
||||
}
|
||||
|
||||
// Stop cancels the download process and stops the queue from running.
|
||||
func (cd *Downloader) Stop() {
|
||||
cd.mu.Lock()
|
||||
defer cd.mu.Unlock()
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
cd.logger.Error().Msgf("chapter downloader: cancelCh is already closed")
|
||||
}
|
||||
}()
|
||||
|
||||
cd.cancelCh = make(chan struct{})
|
||||
|
||||
close(cd.cancelCh) // Cancel download process
|
||||
|
||||
cd.queue.Stop()
|
||||
}
|
||||
|
||||
// run downloads the chapter based on the QueueInfo provided.
|
||||
// This is called successively for each current item being processed.
|
||||
// It invokes downloadChapterImages to download the chapter pages.
|
||||
func (cd *Downloader) run(queueInfo *QueueInfo) {
|
||||
|
||||
defer util.HandlePanicInModuleThen("internal/manga/downloader/runNext", func() {
|
||||
cd.logger.Error().Msg("chapter downloader: Panic in 'run'")
|
||||
})
|
||||
|
||||
// Download chapter images
|
||||
if err := cd.downloadChapterImages(queueInfo); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
cd.chapterDownloadedCh <- queueInfo.DownloadID
|
||||
}
|
||||
|
||||
// downloadChapterImages creates a directory for the chapter and downloads each image to that directory.
|
||||
// It also creates a Registry file that contains information about each image.
|
||||
//
|
||||
// e.g.,
|
||||
// 📁 {provider}_{mediaId}_{chapterId}_{chapterNumber}
|
||||
// ├── 📄 registry.json
|
||||
// ├── 📄 1.jpg
|
||||
// ├── 📄 2.jpg
|
||||
// └── 📄 ...
|
||||
func (cd *Downloader) downloadChapterImages(queueInfo *QueueInfo) (err error) {
|
||||
|
||||
// Create download directory
|
||||
// 📁 {provider}_{mediaId}_{chapterId}
|
||||
destination := cd.getChapterDownloadDir(queueInfo.DownloadID)
|
||||
if err = os.MkdirAll(destination, os.ModePerm); err != nil {
|
||||
cd.logger.Error().Err(err).Msgf("chapter downloader: Failed to create download directory for chapter %s", queueInfo.ChapterId)
|
||||
return err
|
||||
}
|
||||
|
||||
cd.logger.Debug().Msgf("chapter downloader: Downloading chapter %s images to %s", queueInfo.ChapterId, destination)
|
||||
|
||||
registry := make(Registry)
|
||||
|
||||
// calculateBatchSize calculates the batch size based on the number of URLs.
|
||||
calculateBatchSize := func(numURLs int) int {
|
||||
maxBatchSize := 5
|
||||
batchSize := numURLs / 10
|
||||
if batchSize < 1 {
|
||||
return 1
|
||||
} else if batchSize > maxBatchSize {
|
||||
return maxBatchSize
|
||||
}
|
||||
return batchSize
|
||||
}
|
||||
|
||||
// Download images
|
||||
batchSize := calculateBatchSize(len(queueInfo.Pages))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
semaphore := make(chan struct{}, batchSize) // Semaphore to control concurrency
|
||||
for _, page := range queueInfo.Pages {
|
||||
semaphore <- struct{}{} // Acquire semaphore
|
||||
wg.Add(1)
|
||||
go func(page *hibikemanga.ChapterPage, registry *Registry) {
|
||||
defer func() {
|
||||
<-semaphore // Release semaphore
|
||||
wg.Done()
|
||||
}()
|
||||
select {
|
||||
case <-cd.cancelCh:
|
||||
//cd.logger.Warn().Msg("chapter downloader: Download goroutine canceled")
|
||||
return
|
||||
default:
|
||||
cd.downloadPage(page, destination, registry)
|
||||
}
|
||||
}(page, ®istry)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// Write the registry
|
||||
_ = registry.save(queueInfo, destination, cd.logger)
|
||||
|
||||
cd.queue.HasCompleted(queueInfo)
|
||||
|
||||
if queueInfo.Status != QueueStatusErrored {
|
||||
cd.logger.Info().Msgf("chapter downloader: Finished downloading chapter %s", queueInfo.ChapterId)
|
||||
}
|
||||
|
||||
if queueInfo.Status == QueueStatusErrored {
|
||||
return fmt.Errorf("chapter downloader: Failed to download chapter %s", queueInfo.ChapterId)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// downloadPage downloads a single page from the URL and saves it to the destination directory.
|
||||
// It also updates the Registry with the page information.
|
||||
func (cd *Downloader) downloadPage(page *hibikemanga.ChapterPage, destination string, registry *Registry) {
|
||||
|
||||
defer util.HandlePanicInModuleThen("manga/downloader/downloadImage", func() {
|
||||
})
|
||||
|
||||
// Download image from URL
|
||||
|
||||
imgID := fmt.Sprintf("%02d", page.Index+1)
|
||||
|
||||
buf, err := manga_providers.GetImageByProxy(page.URL, page.Headers)
|
||||
if err != nil {
|
||||
cd.logger.Error().Err(err).Msgf("chapter downloader: Failed to get image from URL %s", page.URL)
|
||||
return
|
||||
}
|
||||
|
||||
// Get the image format
|
||||
config, format, err := image.DecodeConfig(bytes.NewReader(buf))
|
||||
if err != nil {
|
||||
cd.logger.Error().Err(err).Msgf("chapter downloader: Failed to decode image format from URL %s", page.URL)
|
||||
return
|
||||
}
|
||||
|
||||
filename := imgID + "." + format
|
||||
|
||||
// Create the file
|
||||
filePath := filepath.Join(destination, filename)
|
||||
file, err := os.Create(filePath)
|
||||
if err != nil {
|
||||
cd.logger.Error().Err(err).Msgf("chapter downloader: Failed to create file for image %s", imgID)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Copy the image data to the file
|
||||
_, err = io.Copy(file, bytes.NewReader(buf))
|
||||
if err != nil {
|
||||
cd.logger.Error().Err(err).Msgf("image downloader: Failed to write image data to file for image from %s", page.URL)
|
||||
return
|
||||
}
|
||||
|
||||
// Update registry
|
||||
cd.downloadMu.Lock()
|
||||
(*registry)[page.Index] = PageInfo{
|
||||
Index: page.Index,
|
||||
Width: config.Width,
|
||||
Height: config.Height,
|
||||
Filename: filename,
|
||||
OriginalURL: page.URL,
|
||||
Size: int64(len(buf)),
|
||||
}
|
||||
cd.downloadMu.Unlock()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
////////////////////////
|
||||
|
||||
// save saves the Registry content to a file in the chapter directory.
|
||||
func (r *Registry) save(queueInfo *QueueInfo, destination string, logger *zerolog.Logger) (err error) {
|
||||
|
||||
defer util.HandlePanicInModuleThen("manga/downloader/save", func() {
|
||||
err = fmt.Errorf("chapter downloader: Failed to save registry content")
|
||||
})
|
||||
|
||||
// Verify all images have been downloaded
|
||||
allDownloaded := true
|
||||
for _, page := range queueInfo.Pages {
|
||||
if _, ok := (*r)[page.Index]; !ok {
|
||||
allDownloaded = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !allDownloaded {
|
||||
// Clean up downloaded images
|
||||
logger.Error().Msg("chapter downloader: Not all images have been downloaded, aborting")
|
||||
queueInfo.Status = QueueStatusErrored
|
||||
// Delete directory
|
||||
go os.RemoveAll(destination)
|
||||
return fmt.Errorf("chapter downloader: Not all images have been downloaded, operation aborted")
|
||||
}
|
||||
|
||||
// Create registry file
|
||||
var data []byte
|
||||
data, err = json.Marshal(*r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
registryFilePath := filepath.Join(destination, "registry.json")
|
||||
err = os.WriteFile(registryFilePath, data, 0644)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (cd *Downloader) getChapterDownloadDir(downloadId DownloadID) string {
|
||||
return filepath.Join(cd.downloadDir, FormatChapterDirName(downloadId.Provider, downloadId.MediaId, downloadId.ChapterId, downloadId.ChapterNumber))
|
||||
}
|
||||
|
||||
func FormatChapterDirName(provider string, mediaId int, chapterId string, chapterNumber string) string {
|
||||
return fmt.Sprintf("%s_%d_%s_%s", provider, mediaId, EscapeChapterID(chapterId), chapterNumber)
|
||||
}
|
||||
|
||||
// ParseChapterDirName parses a chapter directory name and returns the DownloadID.
|
||||
// e.g. comick_1234_chapter$UNDERSCORE$id_13.5 -> {Provider: "comick", MediaId: 1234, ChapterId: "chapter_id", ChapterNumber: "13.5"}
|
||||
func ParseChapterDirName(dirName string) (id DownloadID, ok bool) {
|
||||
parts := strings.Split(dirName, "_")
|
||||
if len(parts) != 4 {
|
||||
return id, false
|
||||
}
|
||||
|
||||
id.Provider = parts[0]
|
||||
var err error
|
||||
id.MediaId, err = strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return id, false
|
||||
}
|
||||
id.ChapterId = UnescapeChapterID(parts[2])
|
||||
id.ChapterNumber = parts[3]
|
||||
|
||||
ok = true
|
||||
return
|
||||
}
|
||||
|
||||
func EscapeChapterID(id string) string {
|
||||
id = strings.ReplaceAll(id, "/", "$SLASH$")
|
||||
id = strings.ReplaceAll(id, "\\", "$BSLASH$")
|
||||
id = strings.ReplaceAll(id, ":", "$COLON$")
|
||||
id = strings.ReplaceAll(id, "*", "$ASTERISK$")
|
||||
id = strings.ReplaceAll(id, "?", "$QUESTION$")
|
||||
id = strings.ReplaceAll(id, "\"", "$QUOTE$")
|
||||
id = strings.ReplaceAll(id, "<", "$LT$")
|
||||
id = strings.ReplaceAll(id, ">", "$GT$")
|
||||
id = strings.ReplaceAll(id, "|", "$PIPE$")
|
||||
id = strings.ReplaceAll(id, ".", "$DOT$")
|
||||
id = strings.ReplaceAll(id, " ", "$SPACE$")
|
||||
id = strings.ReplaceAll(id, "_", "$UNDERSCORE$")
|
||||
return id
|
||||
}
|
||||
|
||||
func UnescapeChapterID(id string) string {
|
||||
id = strings.ReplaceAll(id, "$SLASH$", "/")
|
||||
id = strings.ReplaceAll(id, "$BSLASH$", "\\")
|
||||
id = strings.ReplaceAll(id, "$COLON$", ":")
|
||||
id = strings.ReplaceAll(id, "$ASTERISK$", "*")
|
||||
id = strings.ReplaceAll(id, "$QUESTION$", "?")
|
||||
id = strings.ReplaceAll(id, "$QUOTE$", "\"")
|
||||
id = strings.ReplaceAll(id, "$LT$", "<")
|
||||
id = strings.ReplaceAll(id, "$GT$", ">")
|
||||
id = strings.ReplaceAll(id, "$PIPE$", "|")
|
||||
id = strings.ReplaceAll(id, "$DOT$", ".")
|
||||
id = strings.ReplaceAll(id, "$SPACE$", " ")
|
||||
id = strings.ReplaceAll(id, "$UNDERSCORE$", "_")
|
||||
return id
|
||||
}
|
||||
|
||||
func (cd *Downloader) getChapterRegistryPath(downloadId DownloadID) string {
|
||||
return filepath.Join(cd.getChapterDownloadDir(downloadId), "registry.json")
|
||||
}
|
||||
Reference in New Issue
Block a user