batch episodes together, default 10

This commit is contained in:
Leo Robinovitch 2024-01-01 11:50:39 -04:00
parent 8bb695de04
commit 48b57c74d6
3 changed files with 94 additions and 35 deletions

115
main.go
View file

@ -18,6 +18,12 @@ import (
"time"
)
type EpisodeBatch struct {
imgLinks []string
minEp int
maxEp int
}
func getImgLinksForEpisode(url string) []string {
resp, err := soup.Get(url)
time.Sleep(200 * time.Millisecond)
@ -54,12 +60,16 @@ func getEpisodeLinksForPage(url string) ([]string, error) {
return links, nil
}
func getImgLinks(url string, minEp, maxEp int) ([]string, int, int) {
func getEpisodeBatches(url string, minEp, maxEp, epsPerBatch int) []EpisodeBatch {
if strings.Contains(url, "/viewer") {
// assume viewing single episode
return getImgLinksForEpisode(url), episodeNo(url), episodeNo(url)
return []EpisodeBatch{{
imgLinks: getImgLinksForEpisode(url),
minEp: episodeNo(url),
maxEp: episodeNo(url),
}}
} else {
// assume viewing list of episodes
// assume viewing set of episodes
println("scanning all pages to get all episode links")
allEpisodeLinks := getAllEpisodeLinks(url)
println(fmt.Sprintf("found %d total episodes", len(allEpisodeLinks)))
@ -71,8 +81,29 @@ func getImgLinks(url string, minEp, maxEp int) ([]string, int, int) {
desiredEpisodeLinks = append(desiredEpisodeLinks, episodeLink)
}
}
actualMinEp := episodeNo(desiredEpisodeLinks[0])
if minEp > actualMinEp {
actualMinEp = minEp
}
actualMaxEp := episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1])
if maxEp < actualMaxEp {
actualMaxEp = maxEp
}
println(fmt.Sprintf("fetching image links for episodes %d through %d", actualMinEp, actualMaxEp))
return getImgLinksForEpisodes(desiredEpisodeLinks), episodeNo(desiredEpisodeLinks[0]), episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1])
var episodeBatches []EpisodeBatch
for start := 0; start < len(desiredEpisodeLinks); start += epsPerBatch {
end := start + epsPerBatch
if end > len(desiredEpisodeLinks) {
end = len(desiredEpisodeLinks)
}
episodeBatches = append(episodeBatches, EpisodeBatch{
imgLinks: getImgLinksForEpisodes(desiredEpisodeLinks[start:end], actualMaxEp),
minEp: episodeNo(desiredEpisodeLinks[start]),
maxEp: episodeNo(desiredEpisodeLinks[end-1]),
})
}
return episodeBatches
}
}
@ -124,10 +155,10 @@ func episodeNo(episodeLink string) int {
return episodeNo
}
func getImgLinksForEpisodes(episodeLinks []string) []string {
func getImgLinksForEpisodes(episodeLinks []string, actualMaxEp int) []string {
var allImgLinks []string
for _, episodeLink := range episodeLinks {
println(fmt.Sprintf("fetching images for episode %d (last episode %d)", episodeNo(episodeLink), episodeNo(episodeLinks[len(episodeLinks)-1])))
println(fmt.Sprintf("fetching image links for episode %d/%d", episodeNo(episodeLink), actualMaxEp))
allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)
}
return allImgLinks
@ -193,44 +224,62 @@ func main() {
}
minEp := flag.Int("min-ep", 0, "Minimum episode number to download (inclusive)")
maxEp := flag.Int("max-ep", math.MaxInt, "Maximum episode number to download (inclusive)")
epsPerFile := flag.Int("eps-per-file", 10, "Number of episodes to put in each PDF file")
flag.Parse()
if *minEp > *maxEp {
fmt.Println("min-ep must be less than or equal to max-ep")
os.Exit(1)
}
if *epsPerFile < 1 {
fmt.Println("eps-per-file must be greater than or equal to 1")
os.Exit(1)
}
if *minEp < 0 {
fmt.Println("min-ep must be greater than or equal to 0")
os.Exit(1)
}
url := os.Args[len(os.Args)-1]
imgLinks, actualMinEp, actualMaxEp := getImgLinks(url, *minEp, *maxEp)
fmt.Println(fmt.Sprintf("found %d pages", len(imgLinks)))
episodeBatches := getEpisodeBatches(url, *minEp, *maxEp, *epsPerFile)
pdf := gopdf.GoPdf{}
pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})
for idx, imgLink := range imgLinks {
err := addImgToPdf(&pdf, imgLink)
totalPages := 0
for _, episodeBatch := range episodeBatches {
totalPages += len(episodeBatch.imgLinks)
}
totalEpisodes := episodeBatches[len(episodeBatches)-1].maxEp - episodeBatches[0].minEp + 1
fmt.Println(fmt.Sprintf("found %d total image links across %d episodes", totalPages, totalEpisodes))
fmt.Println(fmt.Sprintf("saving into %d files with max of %d episodes per file", len(episodeBatches), *epsPerFile))
for _, episodeBatch := range episodeBatches {
pdf := gopdf.GoPdf{}
pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})
for idx, imgLink := range episodeBatch.imgLinks {
err := addImgToPdf(&pdf, imgLink)
if err != nil {
fmt.Println(err.Error())
os.Exit(1)
}
fmt.Println(fmt.Sprintf("saving episodes %d through %d: added page %d/%d", episodeBatch.minEp, episodeBatch.maxEp, idx+1, len(episodeBatch.imgLinks)))
}
outURL := strings.ReplaceAll(url, "http://", "")
outURL = strings.ReplaceAll(outURL, "https://", "")
outURL = strings.ReplaceAll(outURL, "www.", "")
outURL = strings.ReplaceAll(outURL, "webtoons.com/", "")
outURL = strings.Split(outURL, "?")[0]
outURL = strings.ReplaceAll(outURL, "/viewer", "")
outURL = strings.ReplaceAll(outURL, "/", "-")
if episodeBatch.minEp != episodeBatch.maxEp {
outURL = fmt.Sprintf("%s-epNo%d-epNo%d", outURL, episodeBatch.minEp, episodeBatch.maxEp)
} else {
outURL = fmt.Sprintf("%s-epNo%d", outURL, episodeBatch.minEp)
}
outPath := outURL + ".pdf"
err := pdf.WritePdf(outPath)
if err != nil {
fmt.Println(err.Error())
os.Exit(1)
}
fmt.Println(fmt.Sprintf("added page %d/%d", idx+1, len(imgLinks)))
fmt.Println(fmt.Sprintf("saved to %s", outPath))
}
outURL := strings.ReplaceAll(url, "http://", "")
outURL = strings.ReplaceAll(outURL, "https://", "")
outURL = strings.ReplaceAll(outURL, "www.", "")
outURL = strings.ReplaceAll(outURL, "webtoons.com/", "")
outURL = strings.Split(outURL, "?")[0]
outURL = strings.ReplaceAll(outURL, "/viewer", "")
outURL = strings.ReplaceAll(outURL, "/", "-")
if actualMinEp != actualMaxEp {
outURL = fmt.Sprintf("%s-ep%d-%d", outURL, actualMinEp, actualMaxEp)
} else {
outURL = fmt.Sprintf("%s-ep%d", outURL, actualMinEp)
}
outPath := outURL + ".pdf"
err := pdf.WritePdf(outPath)
if err != nil {
fmt.Println(err.Error())
os.Exit(1)
}
fmt.Println(fmt.Sprintf("saved to %s", outPath))
}