episode ranges
This commit is contained in:
parent
4412eed9c7
commit
6ac152f3f5
2 changed files with 121 additions and 75 deletions
|
|
@ -5,7 +5,14 @@ Download [webtoon](https://www.webtoons.com/en/) comics as PDFs using a terminal
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
webtoon-dl <your-webtoon-url>
|
# download single episodes
|
||||||
|
webtoon-dl <your-webtoon-episode-url>
|
||||||
|
|
||||||
|
# download entire series
|
||||||
|
webtoon-dl <your-webtoon-series-url>
|
||||||
|
|
||||||
|
# create single pdfs from a range of episodes (inclusive)
|
||||||
|
webtoon-dl --min-ep=10 --max-ep=20 <your-webtoon-series-url>
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
|
||||||
113
main.go
113
main.go
|
|
@ -2,11 +2,13 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/anaskhan96/soup"
|
"github.com/anaskhan96/soup"
|
||||||
"github.com/signintech/gopdf"
|
"github.com/signintech/gopdf"
|
||||||
"image"
|
"image"
|
||||||
"io"
|
"io"
|
||||||
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
|
@ -18,7 +20,7 @@ import (
|
||||||
|
|
||||||
func getImgLinksForEpisode(url string) []string {
|
func getImgLinksForEpisode(url string) []string {
|
||||||
resp, err := soup.Get(url)
|
resp, err := soup.Get(url)
|
||||||
time.Sleep(500 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(fmt.Sprintf("Error fetching page: %v", err))
|
fmt.Println(fmt.Sprintf("Error fetching page: %v", err))
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|
@ -37,7 +39,7 @@ func getImgLinksForEpisode(url string) []string {
|
||||||
|
|
||||||
func getEpisodeLinksForPage(url string) ([]string, error) {
|
func getEpisodeLinksForPage(url string) ([]string, error) {
|
||||||
resp, err := soup.Get(url)
|
resp, err := soup.Get(url)
|
||||||
time.Sleep(500 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return []string{}, fmt.Errorf("error fetching page: %v", err)
|
return []string{}, fmt.Errorf("error fetching page: %v", err)
|
||||||
}
|
}
|
||||||
|
|
@ -52,14 +54,31 @@ func getEpisodeLinksForPage(url string) ([]string, error) {
|
||||||
return links, nil
|
return links, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getImgLinks(url string) []string {
|
func getImgLinks(url string, minEp, maxEp int) ([]string, int, int) {
|
||||||
if strings.Contains(url, "/viewer") {
|
if strings.Contains(url, "/viewer") {
|
||||||
// assume viewing single episode
|
// assume viewing single episode
|
||||||
return getImgLinksForEpisode(url)
|
return getImgLinksForEpisode(url), episodeNo(url), episodeNo(url)
|
||||||
} else {
|
} else {
|
||||||
// assume viewing list of episodes
|
// assume viewing list of episodes
|
||||||
|
println("scanning all pages to get all episode links")
|
||||||
|
allEpisodeLinks := getAllEpisodeLinks(url)
|
||||||
|
println(fmt.Sprintf("found %d total episodes", len(allEpisodeLinks)))
|
||||||
|
|
||||||
|
var desiredEpisodeLinks []string
|
||||||
|
for _, episodeLink := range allEpisodeLinks {
|
||||||
|
epNo := episodeNo(episodeLink)
|
||||||
|
if epNo >= minEp && epNo <= maxEp {
|
||||||
|
desiredEpisodeLinks = append(desiredEpisodeLinks, episodeLink)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return getImgLinksForEpisodes(desiredEpisodeLinks), episodeNo(desiredEpisodeLinks[0]), episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getAllEpisodeLinks(url string) []string {
|
||||||
re := regexp.MustCompile("&page=[0-9]+")
|
re := regexp.MustCompile("&page=[0-9]+")
|
||||||
allEpisodeLinks := make(map[string]struct{})
|
episodeLinkSet := make(map[string]struct{})
|
||||||
foundLastPage := false
|
foundLastPage := false
|
||||||
for page := 1; !foundLastPage; page++ {
|
for page := 1; !foundLastPage; page++ {
|
||||||
url = re.ReplaceAllString(url, "") + fmt.Sprintf("&page=%d", page)
|
url = re.ReplaceAllString(url, "") + fmt.Sprintf("&page=%d", page)
|
||||||
|
|
@ -69,23 +88,31 @@ func getImgLinks(url string) []string {
|
||||||
}
|
}
|
||||||
for _, episodeLink := range episodeLinks {
|
for _, episodeLink := range episodeLinks {
|
||||||
// when you go past the last page, it just rerenders the last page
|
// when you go past the last page, it just rerenders the last page
|
||||||
if _, ok := allEpisodeLinks[episodeLink]; ok {
|
if _, ok := episodeLinkSet[episodeLink]; ok {
|
||||||
foundLastPage = true
|
foundLastPage = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
allEpisodeLinks[episodeLink] = struct{}{}
|
episodeLinkSet[episodeLink] = struct{}{}
|
||||||
}
|
}
|
||||||
if !foundLastPage {
|
if !foundLastPage {
|
||||||
println(url)
|
println(url)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
keys := make([]string, 0, len(allEpisodeLinks))
|
|
||||||
for k := range allEpisodeLinks {
|
allEpisodeLinks := make([]string, 0, len(episodeLinkSet))
|
||||||
keys = append(keys, k)
|
for episodeLink := range episodeLinkSet {
|
||||||
|
allEpisodeLinks = append(allEpisodeLinks, episodeLink)
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract episode_no from url and sort by it
|
// extract episode_no from url and sort by it
|
||||||
re = regexp.MustCompile("episode_no=([0-9]+)")
|
sort.Slice(allEpisodeLinks, func(i, j int) bool {
|
||||||
episodeNo := func(episodeLink string) int {
|
return episodeNo(allEpisodeLinks[i]) < episodeNo(allEpisodeLinks[j])
|
||||||
|
})
|
||||||
|
return allEpisodeLinks
|
||||||
|
}
|
||||||
|
|
||||||
|
func episodeNo(episodeLink string) int {
|
||||||
|
re := regexp.MustCompile("episode_no=([0-9]+)")
|
||||||
matches := re.FindStringSubmatch(episodeLink)
|
matches := re.FindStringSubmatch(episodeLink)
|
||||||
if len(matches) != 2 {
|
if len(matches) != 2 {
|
||||||
return 0
|
return 0
|
||||||
|
|
@ -95,18 +122,15 @@ func getImgLinks(url string) []string {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
return episodeNo
|
return episodeNo
|
||||||
}
|
}
|
||||||
sort.Slice(keys, func(i, j int) bool {
|
|
||||||
return episodeNo(keys[i]) < episodeNo(keys[j])
|
|
||||||
})
|
|
||||||
|
|
||||||
|
func getImgLinksForEpisodes(episodeLinks []string) []string {
|
||||||
var allImgLinks []string
|
var allImgLinks []string
|
||||||
for _, episodeLink := range keys {
|
for _, episodeLink := range episodeLinks {
|
||||||
println(episodeLink)
|
println(fmt.Sprintf("fetching images for episode %d (last episode %d)", episodeNo(episodeLink), episodeNo(episodeLinks[len(episodeLinks)-1])))
|
||||||
allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)
|
allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)
|
||||||
}
|
}
|
||||||
return allImgLinks
|
return allImgLinks
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchImage(imgLink string) []byte {
|
func fetchImage(imgLink string) []byte {
|
||||||
|
|
@ -139,30 +163,16 @@ func fetchImage(imgLink string) []byte {
|
||||||
return buff.Bytes()
|
return buff.Bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func addImgToPdf(pdf *gopdf.GoPdf, imgLink string) error {
|
||||||
if len(os.Args) < 2 {
|
|
||||||
fmt.Println("Usage: webtoon-dl <url>")
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
url := os.Args[1]
|
|
||||||
imgLinks := getImgLinks(url)
|
|
||||||
fmt.Println(fmt.Sprintf("found %d pages", len(imgLinks)))
|
|
||||||
|
|
||||||
pdf := gopdf.GoPdf{}
|
|
||||||
pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})
|
|
||||||
for _, imgLink := range imgLinks {
|
|
||||||
fmt.Println(imgLink)
|
|
||||||
img := fetchImage(imgLink)
|
img := fetchImage(imgLink)
|
||||||
holder, err := gopdf.ImageHolderByBytes(img)
|
holder, err := gopdf.ImageHolderByBytes(img)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err.Error())
|
return err
|
||||||
os.Exit(1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
d, _, err := image.DecodeConfig(bytes.NewReader(img))
|
d, _, err := image.DecodeConfig(bytes.NewReader(img))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err.Error())
|
return err
|
||||||
os.Exit(1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// gopdf assumes dpi 128 https://github.com/signintech/gopdf/issues/168
|
// gopdf assumes dpi 128 https://github.com/signintech/gopdf/issues/168
|
||||||
|
|
@ -173,11 +183,35 @@ func main() {
|
||||||
W: float64(d.Width)*72/128 - 1,
|
W: float64(d.Width)*72/128 - 1,
|
||||||
H: float64(d.Height)*72/128 - 1,
|
H: float64(d.Height)*72/128 - 1,
|
||||||
}})
|
}})
|
||||||
err = pdf.ImageByHolder(holder, 0, 0, nil)
|
return pdf.ImageByHolder(holder, 0, 0, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if len(os.Args) < 2 {
|
||||||
|
fmt.Println("Usage: webtoon-dl <url>")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
minEp := flag.Int("min-ep", 0, "Minimum episode number to download (inclusive)")
|
||||||
|
maxEp := flag.Int("max-ep", math.MaxInt, "Maximum episode number to download (inclusive)")
|
||||||
|
flag.Parse()
|
||||||
|
if *minEp > *maxEp {
|
||||||
|
fmt.Println("min-ep must be less than or equal to max-ep")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
url := os.Args[len(os.Args)-1]
|
||||||
|
imgLinks, actualMinEp, actualMaxEp := getImgLinks(url, *minEp, *maxEp)
|
||||||
|
fmt.Println(fmt.Sprintf("found %d pages", len(imgLinks)))
|
||||||
|
|
||||||
|
pdf := gopdf.GoPdf{}
|
||||||
|
pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})
|
||||||
|
for idx, imgLink := range imgLinks {
|
||||||
|
err := addImgToPdf(&pdf, imgLink)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err.Error())
|
fmt.Println(err.Error())
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
fmt.Println(fmt.Sprintf("added page %d/%d", idx+1, len(imgLinks)))
|
||||||
}
|
}
|
||||||
|
|
||||||
outURL := strings.ReplaceAll(url, "http://", "")
|
outURL := strings.ReplaceAll(url, "http://", "")
|
||||||
|
|
@ -187,6 +221,11 @@ func main() {
|
||||||
outURL = strings.Split(outURL, "?")[0]
|
outURL = strings.Split(outURL, "?")[0]
|
||||||
outURL = strings.ReplaceAll(outURL, "/viewer", "")
|
outURL = strings.ReplaceAll(outURL, "/viewer", "")
|
||||||
outURL = strings.ReplaceAll(outURL, "/", "-")
|
outURL = strings.ReplaceAll(outURL, "/", "-")
|
||||||
|
if actualMinEp != actualMaxEp {
|
||||||
|
outURL = fmt.Sprintf("%s-ep%d-%d", outURL, actualMinEp, actualMaxEp)
|
||||||
|
} else {
|
||||||
|
outURL = fmt.Sprintf("%s-ep%d", outURL, actualMinEp)
|
||||||
|
}
|
||||||
outPath := outURL + ".pdf"
|
outPath := outURL + ".pdf"
|
||||||
err := pdf.WritePdf(outPath)
|
err := pdf.WritePdf(outPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue