batch episodes together, default 10
This commit is contained in:
parent
8bb695de04
commit
48b57c74d6
3 changed files with 94 additions and 35 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -3,3 +3,4 @@
|
||||||
*.jpg
|
*.jpg
|
||||||
*.jpeg
|
*.jpeg
|
||||||
*.pdf
|
*.pdf
|
||||||
|
.DS_Store
|
||||||
|
|
|
||||||
13
README.md
13
README.md
|
|
@ -8,13 +8,22 @@ Download [webtoon](https://www.webtoons.com/en/) comics as PDFs using a terminal
|
||||||
# download single episodes
|
# download single episodes
|
||||||
webtoon-dl "<your-webtoon-episode-url>"
|
webtoon-dl "<your-webtoon-episode-url>"
|
||||||
|
|
||||||
# download entire series
|
# download entire series, default 10 episodes per pdf
|
||||||
webtoon-dl "<your-webtoon-series-url>"
|
webtoon-dl "<your-webtoon-series-url>"
|
||||||
|
|
||||||
# create single pdfs from a range of episodes (inclusive)
|
# specify a range of episodes (inclusive on both ends)
|
||||||
webtoon-dl --min-ep=10 --max-ep=20 "<your-webtoon-series-url>"
|
webtoon-dl --min-ep=10 --max-ep=20 "<your-webtoon-series-url>"
|
||||||
|
|
||||||
|
# change the number of episodes per file, e.g. this would create 11 files
|
||||||
|
webtoon-dl --min-ep=10 --max-ep=20 --eps-per-file=1 "<your-webtoon-series-url>"
|
||||||
|
|
||||||
|
# download entire series into a single file (GENERALLY NOT RECOMMENDED)
|
||||||
|
webtoon-dl --eps-per-file=1000000 "<your-webtoon-series-url>"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> [!IMPORTANT]
|
||||||
|
> The episode numbers specified in `--min-ep` and `--max-ep` will correspond to the URL parameter `&episode_no=`, which may be different from the episode number in the title
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> Some terminal settings (e.g. [Oh My Zsh](https://ohmyz.sh)) make it so pasted URLs will be [automatically escaped](https://github.com/ohmyzsh/ohmyzsh/issues/7632).
|
> Some terminal settings (e.g. [Oh My Zsh](https://ohmyz.sh)) make it so pasted URLs will be [automatically escaped](https://github.com/ohmyzsh/ohmyzsh/issues/7632).
|
||||||
> You want to EITHER surround your unescaped webtoon URL with double quotes (otherwise you'll get something like a "no matches found" error) OR leave the double quotes off escaped URLs.
|
> You want to EITHER surround your unescaped webtoon URL with double quotes (otherwise you'll get something like a "no matches found" error) OR leave the double quotes off escaped URLs.
|
||||||
|
|
|
||||||
115
main.go
115
main.go
|
|
@ -18,6 +18,12 @@ import (
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type EpisodeBatch struct {
|
||||||
|
imgLinks []string
|
||||||
|
minEp int
|
||||||
|
maxEp int
|
||||||
|
}
|
||||||
|
|
||||||
func getImgLinksForEpisode(url string) []string {
|
func getImgLinksForEpisode(url string) []string {
|
||||||
resp, err := soup.Get(url)
|
resp, err := soup.Get(url)
|
||||||
time.Sleep(200 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
|
|
@ -54,12 +60,16 @@ func getEpisodeLinksForPage(url string) ([]string, error) {
|
||||||
return links, nil
|
return links, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getImgLinks(url string, minEp, maxEp int) ([]string, int, int) {
|
func getEpisodeBatches(url string, minEp, maxEp, epsPerBatch int) []EpisodeBatch {
|
||||||
if strings.Contains(url, "/viewer") {
|
if strings.Contains(url, "/viewer") {
|
||||||
// assume viewing single episode
|
// assume viewing single episode
|
||||||
return getImgLinksForEpisode(url), episodeNo(url), episodeNo(url)
|
return []EpisodeBatch{{
|
||||||
|
imgLinks: getImgLinksForEpisode(url),
|
||||||
|
minEp: episodeNo(url),
|
||||||
|
maxEp: episodeNo(url),
|
||||||
|
}}
|
||||||
} else {
|
} else {
|
||||||
// assume viewing list of episodes
|
// assume viewing set of episodes
|
||||||
println("scanning all pages to get all episode links")
|
println("scanning all pages to get all episode links")
|
||||||
allEpisodeLinks := getAllEpisodeLinks(url)
|
allEpisodeLinks := getAllEpisodeLinks(url)
|
||||||
println(fmt.Sprintf("found %d total episodes", len(allEpisodeLinks)))
|
println(fmt.Sprintf("found %d total episodes", len(allEpisodeLinks)))
|
||||||
|
|
@ -71,8 +81,29 @@ func getImgLinks(url string, minEp, maxEp int) ([]string, int, int) {
|
||||||
desiredEpisodeLinks = append(desiredEpisodeLinks, episodeLink)
|
desiredEpisodeLinks = append(desiredEpisodeLinks, episodeLink)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
actualMinEp := episodeNo(desiredEpisodeLinks[0])
|
||||||
|
if minEp > actualMinEp {
|
||||||
|
actualMinEp = minEp
|
||||||
|
}
|
||||||
|
actualMaxEp := episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1])
|
||||||
|
if maxEp < actualMaxEp {
|
||||||
|
actualMaxEp = maxEp
|
||||||
|
}
|
||||||
|
println(fmt.Sprintf("fetching image links for episodes %d through %d", actualMinEp, actualMaxEp))
|
||||||
|
|
||||||
return getImgLinksForEpisodes(desiredEpisodeLinks), episodeNo(desiredEpisodeLinks[0]), episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1])
|
var episodeBatches []EpisodeBatch
|
||||||
|
for start := 0; start < len(desiredEpisodeLinks); start += epsPerBatch {
|
||||||
|
end := start + epsPerBatch
|
||||||
|
if end > len(desiredEpisodeLinks) {
|
||||||
|
end = len(desiredEpisodeLinks)
|
||||||
|
}
|
||||||
|
episodeBatches = append(episodeBatches, EpisodeBatch{
|
||||||
|
imgLinks: getImgLinksForEpisodes(desiredEpisodeLinks[start:end], actualMaxEp),
|
||||||
|
minEp: episodeNo(desiredEpisodeLinks[start]),
|
||||||
|
maxEp: episodeNo(desiredEpisodeLinks[end-1]),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return episodeBatches
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -124,10 +155,10 @@ func episodeNo(episodeLink string) int {
|
||||||
return episodeNo
|
return episodeNo
|
||||||
}
|
}
|
||||||
|
|
||||||
func getImgLinksForEpisodes(episodeLinks []string) []string {
|
func getImgLinksForEpisodes(episodeLinks []string, actualMaxEp int) []string {
|
||||||
var allImgLinks []string
|
var allImgLinks []string
|
||||||
for _, episodeLink := range episodeLinks {
|
for _, episodeLink := range episodeLinks {
|
||||||
println(fmt.Sprintf("fetching images for episode %d (last episode %d)", episodeNo(episodeLink), episodeNo(episodeLinks[len(episodeLinks)-1])))
|
println(fmt.Sprintf("fetching image links for episode %d/%d", episodeNo(episodeLink), actualMaxEp))
|
||||||
allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)
|
allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)
|
||||||
}
|
}
|
||||||
return allImgLinks
|
return allImgLinks
|
||||||
|
|
@ -193,44 +224,62 @@ func main() {
|
||||||
}
|
}
|
||||||
minEp := flag.Int("min-ep", 0, "Minimum episode number to download (inclusive)")
|
minEp := flag.Int("min-ep", 0, "Minimum episode number to download (inclusive)")
|
||||||
maxEp := flag.Int("max-ep", math.MaxInt, "Maximum episode number to download (inclusive)")
|
maxEp := flag.Int("max-ep", math.MaxInt, "Maximum episode number to download (inclusive)")
|
||||||
|
epsPerFile := flag.Int("eps-per-file", 10, "Number of episodes to put in each PDF file")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
if *minEp > *maxEp {
|
if *minEp > *maxEp {
|
||||||
fmt.Println("min-ep must be less than or equal to max-ep")
|
fmt.Println("min-ep must be less than or equal to max-ep")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
if *epsPerFile < 1 {
|
||||||
|
fmt.Println("eps-per-file must be greater than or equal to 1")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
if *minEp < 0 {
|
||||||
|
fmt.Println("min-ep must be greater than or equal to 0")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
url := os.Args[len(os.Args)-1]
|
url := os.Args[len(os.Args)-1]
|
||||||
imgLinks, actualMinEp, actualMaxEp := getImgLinks(url, *minEp, *maxEp)
|
episodeBatches := getEpisodeBatches(url, *minEp, *maxEp, *epsPerFile)
|
||||||
fmt.Println(fmt.Sprintf("found %d pages", len(imgLinks)))
|
|
||||||
|
|
||||||
pdf := gopdf.GoPdf{}
|
totalPages := 0
|
||||||
pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})
|
for _, episodeBatch := range episodeBatches {
|
||||||
for idx, imgLink := range imgLinks {
|
totalPages += len(episodeBatch.imgLinks)
|
||||||
err := addImgToPdf(&pdf, imgLink)
|
}
|
||||||
|
totalEpisodes := episodeBatches[len(episodeBatches)-1].maxEp - episodeBatches[0].minEp + 1
|
||||||
|
fmt.Println(fmt.Sprintf("found %d total image links across %d episodes", totalPages, totalEpisodes))
|
||||||
|
fmt.Println(fmt.Sprintf("saving into %d files with max of %d episodes per file", len(episodeBatches), *epsPerFile))
|
||||||
|
|
||||||
|
for _, episodeBatch := range episodeBatches {
|
||||||
|
pdf := gopdf.GoPdf{}
|
||||||
|
pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})
|
||||||
|
for idx, imgLink := range episodeBatch.imgLinks {
|
||||||
|
err := addImgToPdf(&pdf, imgLink)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
fmt.Println(fmt.Sprintf("saving episodes %d through %d: added page %d/%d", episodeBatch.minEp, episodeBatch.maxEp, idx+1, len(episodeBatch.imgLinks)))
|
||||||
|
}
|
||||||
|
|
||||||
|
outURL := strings.ReplaceAll(url, "http://", "")
|
||||||
|
outURL = strings.ReplaceAll(outURL, "https://", "")
|
||||||
|
outURL = strings.ReplaceAll(outURL, "www.", "")
|
||||||
|
outURL = strings.ReplaceAll(outURL, "webtoons.com/", "")
|
||||||
|
outURL = strings.Split(outURL, "?")[0]
|
||||||
|
outURL = strings.ReplaceAll(outURL, "/viewer", "")
|
||||||
|
outURL = strings.ReplaceAll(outURL, "/", "-")
|
||||||
|
if episodeBatch.minEp != episodeBatch.maxEp {
|
||||||
|
outURL = fmt.Sprintf("%s-epNo%d-epNo%d", outURL, episodeBatch.minEp, episodeBatch.maxEp)
|
||||||
|
} else {
|
||||||
|
outURL = fmt.Sprintf("%s-epNo%d", outURL, episodeBatch.minEp)
|
||||||
|
}
|
||||||
|
outPath := outURL + ".pdf"
|
||||||
|
err := pdf.WritePdf(outPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err.Error())
|
fmt.Println(err.Error())
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
fmt.Println(fmt.Sprintf("added page %d/%d", idx+1, len(imgLinks)))
|
fmt.Println(fmt.Sprintf("saved to %s", outPath))
|
||||||
}
|
}
|
||||||
|
|
||||||
outURL := strings.ReplaceAll(url, "http://", "")
|
|
||||||
outURL = strings.ReplaceAll(outURL, "https://", "")
|
|
||||||
outURL = strings.ReplaceAll(outURL, "www.", "")
|
|
||||||
outURL = strings.ReplaceAll(outURL, "webtoons.com/", "")
|
|
||||||
outURL = strings.Split(outURL, "?")[0]
|
|
||||||
outURL = strings.ReplaceAll(outURL, "/viewer", "")
|
|
||||||
outURL = strings.ReplaceAll(outURL, "/", "-")
|
|
||||||
if actualMinEp != actualMaxEp {
|
|
||||||
outURL = fmt.Sprintf("%s-ep%d-%d", outURL, actualMinEp, actualMaxEp)
|
|
||||||
} else {
|
|
||||||
outURL = fmt.Sprintf("%s-ep%d", outURL, actualMinEp)
|
|
||||||
}
|
|
||||||
outPath := outURL + ".pdf"
|
|
||||||
err := pdf.WritePdf(outPath)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err.Error())
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
fmt.Println(fmt.Sprintf("saved to %s", outPath))
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue