diff --git a/.gitignore b/.gitignore index e05051a..941d585 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.jpg *.jpeg *.pdf +.DS_Store diff --git a/README.md b/README.md index 276880d..719c206 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,22 @@ Download [webtoon](https://www.webtoons.com/en/) comics as PDFs using a terminal # download single episodes webtoon-dl "" -# download entire series +# download entire series, default 10 episodes per pdf webtoon-dl "" -# create single pdfs from a range of episodes (inclusive) +# specify a range of episodes (inclusive on both ends) webtoon-dl --min-ep=10 --max-ep=20 "" + +# change the number of episodes per file, e.g. this would create 11 files +webtoon-dl --min-ep=10 --max-ep=20 --eps-per-file=1 "" + +# download entire series into a single file (GENERALLY NOT RECOMMENDED) +webtoon-dl --eps-per-file=1000000 "" ``` +> [!IMPORTANT] +> The episode numbers specified in `--min-ep` and `--max-ep` will correspond to the URL parameter `&episode_no=`, which may be different from the episode number in the title + > [!IMPORTANT] > Some terminal settings (e.g. [Oh My Zsh](https://ohmyz.sh)) make it so pasted URLs will be [automatically escaped](https://github.com/ohmyzsh/ohmyzsh/issues/7632). > You want to EITHER surround your unescaped webtoon URL with double quotes (otherwise you'll get something like a "no matches found" error) OR leave the double quotes off escaped URLs. diff --git a/main.go b/main.go index 3ef119c..9d602f6 100644 --- a/main.go +++ b/main.go @@ -18,6 +18,12 @@ import ( "time" ) +type EpisodeBatch struct { + imgLinks []string + minEp int + maxEp int +} + func getImgLinksForEpisode(url string) []string { resp, err := soup.Get(url) time.Sleep(200 * time.Millisecond) @@ -54,12 +60,16 @@ func getEpisodeLinksForPage(url string) ([]string, error) { return links, nil } -func getImgLinks(url string, minEp, maxEp int) ([]string, int, int) { +func getEpisodeBatches(url string, minEp, maxEp, epsPerBatch int) []EpisodeBatch { if strings.Contains(url, "/viewer") { // assume viewing single episode - return getImgLinksForEpisode(url), episodeNo(url), episodeNo(url) + return []EpisodeBatch{{ + imgLinks: getImgLinksForEpisode(url), + minEp: episodeNo(url), + maxEp: episodeNo(url), + }} } else { - // assume viewing list of episodes + // assume viewing set of episodes println("scanning all pages to get all episode links") allEpisodeLinks := getAllEpisodeLinks(url) println(fmt.Sprintf("found %d total episodes", len(allEpisodeLinks))) @@ -71,8 +81,29 @@ func getImgLinks(url string, minEp, maxEp int) ([]string, int, int) { desiredEpisodeLinks = append(desiredEpisodeLinks, episodeLink) } } + actualMinEp := episodeNo(desiredEpisodeLinks[0]) + if minEp > actualMinEp { + actualMinEp = minEp + } + actualMaxEp := episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1]) + if maxEp < actualMaxEp { + actualMaxEp = maxEp + } + println(fmt.Sprintf("fetching image links for episodes %d through %d", actualMinEp, actualMaxEp)) - return getImgLinksForEpisodes(desiredEpisodeLinks), episodeNo(desiredEpisodeLinks[0]), episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1]) + var episodeBatches []EpisodeBatch + for start := 0; start < len(desiredEpisodeLinks); start += epsPerBatch { + end := start + epsPerBatch + if end > len(desiredEpisodeLinks) { + end = len(desiredEpisodeLinks) + } + episodeBatches = append(episodeBatches, EpisodeBatch{ + imgLinks: getImgLinksForEpisodes(desiredEpisodeLinks[start:end], actualMaxEp), + minEp: episodeNo(desiredEpisodeLinks[start]), + maxEp: episodeNo(desiredEpisodeLinks[end-1]), + }) + } + return episodeBatches } } @@ -124,10 +155,10 @@ func episodeNo(episodeLink string) int { return episodeNo } -func getImgLinksForEpisodes(episodeLinks []string) []string { +func getImgLinksForEpisodes(episodeLinks []string, actualMaxEp int) []string { var allImgLinks []string for _, episodeLink := range episodeLinks { - println(fmt.Sprintf("fetching images for episode %d (last episode %d)", episodeNo(episodeLink), episodeNo(episodeLinks[len(episodeLinks)-1]))) + println(fmt.Sprintf("fetching image links for episode %d/%d", episodeNo(episodeLink), actualMaxEp)) allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...) } return allImgLinks @@ -193,44 +224,62 @@ func main() { } minEp := flag.Int("min-ep", 0, "Minimum episode number to download (inclusive)") maxEp := flag.Int("max-ep", math.MaxInt, "Maximum episode number to download (inclusive)") + epsPerFile := flag.Int("eps-per-file", 10, "Number of episodes to put in each PDF file") flag.Parse() if *minEp > *maxEp { fmt.Println("min-ep must be less than or equal to max-ep") os.Exit(1) } + if *epsPerFile < 1 { + fmt.Println("eps-per-file must be greater than or equal to 1") + os.Exit(1) + } + if *minEp < 0 { + fmt.Println("min-ep must be greater than or equal to 0") + os.Exit(1) + } url := os.Args[len(os.Args)-1] - imgLinks, actualMinEp, actualMaxEp := getImgLinks(url, *minEp, *maxEp) - fmt.Println(fmt.Sprintf("found %d pages", len(imgLinks))) + episodeBatches := getEpisodeBatches(url, *minEp, *maxEp, *epsPerFile) - pdf := gopdf.GoPdf{} - pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4}) - for idx, imgLink := range imgLinks { - err := addImgToPdf(&pdf, imgLink) + totalPages := 0 + for _, episodeBatch := range episodeBatches { + totalPages += len(episodeBatch.imgLinks) + } + totalEpisodes := episodeBatches[len(episodeBatches)-1].maxEp - episodeBatches[0].minEp + 1 + fmt.Println(fmt.Sprintf("found %d total image links across %d episodes", totalPages, totalEpisodes)) + fmt.Println(fmt.Sprintf("saving into %d files with max of %d episodes per file", len(episodeBatches), *epsPerFile)) + + for _, episodeBatch := range episodeBatches { + pdf := gopdf.GoPdf{} + pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4}) + for idx, imgLink := range episodeBatch.imgLinks { + err := addImgToPdf(&pdf, imgLink) + if err != nil { + fmt.Println(err.Error()) + os.Exit(1) + } + fmt.Println(fmt.Sprintf("saving episodes %d through %d: added page %d/%d", episodeBatch.minEp, episodeBatch.maxEp, idx+1, len(episodeBatch.imgLinks))) + } + + outURL := strings.ReplaceAll(url, "http://", "") + outURL = strings.ReplaceAll(outURL, "https://", "") + outURL = strings.ReplaceAll(outURL, "www.", "") + outURL = strings.ReplaceAll(outURL, "webtoons.com/", "") + outURL = strings.Split(outURL, "?")[0] + outURL = strings.ReplaceAll(outURL, "/viewer", "") + outURL = strings.ReplaceAll(outURL, "/", "-") + if episodeBatch.minEp != episodeBatch.maxEp { + outURL = fmt.Sprintf("%s-epNo%d-epNo%d", outURL, episodeBatch.minEp, episodeBatch.maxEp) + } else { + outURL = fmt.Sprintf("%s-epNo%d", outURL, episodeBatch.minEp) + } + outPath := outURL + ".pdf" + err := pdf.WritePdf(outPath) if err != nil { fmt.Println(err.Error()) os.Exit(1) } - fmt.Println(fmt.Sprintf("added page %d/%d", idx+1, len(imgLinks))) + fmt.Println(fmt.Sprintf("saved to %s", outPath)) } - - outURL := strings.ReplaceAll(url, "http://", "") - outURL = strings.ReplaceAll(outURL, "https://", "") - outURL = strings.ReplaceAll(outURL, "www.", "") - outURL = strings.ReplaceAll(outURL, "webtoons.com/", "") - outURL = strings.Split(outURL, "?")[0] - outURL = strings.ReplaceAll(outURL, "/viewer", "") - outURL = strings.ReplaceAll(outURL, "/", "-") - if actualMinEp != actualMaxEp { - outURL = fmt.Sprintf("%s-ep%d-%d", outURL, actualMinEp, actualMaxEp) - } else { - outURL = fmt.Sprintf("%s-ep%d", outURL, actualMinEp) - } - outPath := outURL + ".pdf" - err := pdf.WritePdf(outPath) - if err != nil { - fmt.Println(err.Error()) - os.Exit(1) - } - fmt.Println(fmt.Sprintf("saved to %s", outPath)) }