webtoon-dl-gif-support/main.go

package main

import (
	"archive/zip"
	"bytes"
	"encoding/json"
	"flag"
	"fmt"
	"github.com/anaskhan96/soup"
	"github.com/signintech/gopdf"
	"image"
	"io"
	"math"
	"net/http"
	"os"
	"regexp"
	"sort"
	"strconv"
	"strings"
	"time"
)

type MotiontoonJson struct {
	Assets struct {
		Image map[string]string `json:"image"`
	} `json:"assets"`
}

type EpisodeBatch struct {
	imgLinks []string
	minEp    int
	maxEp    int
}

type Comic interface {
	addImage([]byte) error
	save(outputPath string) error
}

type PDFComic struct {
	pdf *gopdf.GoPdf
}

// validate PDFComic implements Comic
var _ Comic = &PDFComic{}

func newPDFComic() *PDFComic {
	pdf := gopdf.GoPdf{}
	pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})
	return &PDFComic{pdf: &pdf}
}

func (c *PDFComic) addImage(img []byte) error {
	holder, err := gopdf.ImageHolderByBytes(img)
	if err != nil {
		return err
	}

	d, _, err := image.DecodeConfig(bytes.NewReader(img))
	if err != nil {
		return err
	}

	// gopdf assumes dpi 128 https://github.com/signintech/gopdf/issues/168
	// W and H are in points, 1 point = 1/72 inch
	// convert pixels (Width and Height) to points
	// subtract 1 point to account for margins
	c.pdf.AddPageWithOption(gopdf.PageOption{PageSize: &gopdf.Rect{
		W: float64(d.Width)*72/128 - 1,
		H: float64(d.Height)*72/128 - 1,
	}})
	return c.pdf.ImageByHolder(holder, 0, 0, nil)
}

func (c *PDFComic) save(outputPath string) error {
	return c.pdf.WritePdf(outputPath)
}

type CBZComic struct {
	zipWriter *zip.Writer
	outFile   *os.File
	numFiles  int
}

// validate CBZComic implements Comic
var _ Comic = &CBZComic{}

func newCBZComic() (*CBZComic, error) {
	out, err := os.CreateTemp("", "output.tmp.cbz")
	if err != nil {
		return nil, err
	}
	zipWriter := zip.NewWriter(out)
	return &CBZComic{zipWriter: zipWriter, outFile: out, numFiles: 0}, nil
}

func (c *CBZComic) addImage(img []byte) error {
	f, err := c.zipWriter.Create(fmt.Sprintf("%d.jpg", c.numFiles))
	if err != nil {
		return err
	}
	_, err = f.Write(img)
	if err != nil {
		return err
	}
	c.numFiles++
	return nil
}

func (c *CBZComic) save(outputPath string) error {
	err := c.zipWriter.Close()
	if err != nil {
		return err
	}
	err = c.outFile.Close()
	if err != nil {
		return err
	}
	err = os.Rename(c.outFile.Name(), outputPath)
	return err
}

func getOzPageImgLinks(doc soup.Root) []string {
	// regex find the documentURL, e.g:
	// viewerOptions: {
	//        // 필수항목
	//        containerId: '#ozViewer',
	//        documentURL: 'https://global.apis.naver.com/lineWebtoon/webtoon/motiontoonJson.json?seq=2830&hashValue=2e0b924676bdc38241bd8fd452191fe3',
	re := regexp.MustCompile("viewerOptions: \\{\n.*// 필수항목\n.*containerId: '#ozViewer',\n.*documentURL: '(.+)'")
	matches := re.FindStringSubmatch(doc.HTML())
	if len(matches) != 2 {
		fmt.Println("could not find documentURL")
		os.Exit(1)
	}

	// fetch json at documentURL and deserialize to MotiontoonJson
	resp, err := soup.Get(matches[1])
	if err != nil {
		fmt.Println(fmt.Sprintf("Error fetching page: %v", err))
		os.Exit(1)
	}
	var motionToon MotiontoonJson
	if err := json.Unmarshal([]byte(resp), &motionToon); err != nil {
		fmt.Println(fmt.Sprintf("Error unmarshalling json: %v", err))
		os.Exit(1)
	}

	// get sorted keys
	var sortedKeys []string
	for k := range motionToon.Assets.Image {
		sortedKeys = append(sortedKeys, k)
	}
	sort.Strings(sortedKeys)

	// get path rule, e.g:
	// motiontoonParam: {
	//   pathRuleParam: {
	//     stillcut: 'https://ewebtoon-phinf.pstatic.net/motiontoon/3536_2e0b924676bdc38241bd8fd452191fe3/{=filename}?type=q70',
	re = regexp.MustCompile("motiontoonParam: \\{\n.*pathRuleParam: \\{\n.*stillcut: '(.+)'")
	matches = re.FindStringSubmatch(doc.HTML())
	if len(matches) != 2 {
		fmt.Println("could not find pathRule")
		os.Exit(1)
	}
	var imgs []string
	for _, k := range sortedKeys {
		imgs = append(imgs, strings.ReplaceAll(matches[1], "{=filename}", motionToon.Assets.Image[k]))
	}
	return imgs
}

func getImgLinksForEpisode(url string) []string {
	resp, err := soup.Get(url)
	time.Sleep(200 * time.Millisecond)
	if err != nil {
		fmt.Println(fmt.Sprintf("Error fetching page: %v", err))
		os.Exit(1)
	}
	doc := soup.HTMLParse(resp)
	imgs := doc.Find("div", "class", "viewer_lst").FindAll("img")
	if len(imgs) == 0 {
		// some comics seem to serve images from a different backend, something about oz
		return getOzPageImgLinks(doc)
	}
	var imgLinks []string
	for _, img := range imgs {
		if dataURL, ok := img.Attrs()["data-url"]; ok {
			imgLinks = append(imgLinks, dataURL)
		}
	}
	return imgLinks
}

func getEpisodeLinksForPage(url string) ([]string, error) {
	resp, err := soup.Get(url)
	time.Sleep(200 * time.Millisecond)
	if err != nil {
		return []string{}, fmt.Errorf("error fetching page: %v", err)
	}
	doc := soup.HTMLParse(resp)
	episodeURLs := doc.Find("div", "class", "detail_lst").FindAll("a")
	var links []string
	for _, episodeURL := range episodeURLs {
		if href := episodeURL.Attrs()["href"]; strings.Contains(href, "/viewer") {
			links = append(links, href)
		}
	}
	return links, nil
}

func getEpisodeBatches(url string, minEp, maxEp, epsPerBatch int) []EpisodeBatch {
	if strings.Contains(url, "/viewer") {
		// assume viewing single episode
		return []EpisodeBatch{{
			imgLinks: getImgLinksForEpisode(url),
			minEp:    episodeNo(url),
			maxEp:    episodeNo(url),
		}}
	} else {
		// assume viewing set of episodes
		println("scanning all pages to get all episode links")
		allEpisodeLinks := getAllEpisodeLinks(url)
		println(fmt.Sprintf("found %d total episodes", len(allEpisodeLinks)))

		var desiredEpisodeLinks []string
		for _, episodeLink := range allEpisodeLinks {
			epNo := episodeNo(episodeLink)
			if epNo >= minEp && epNo <= maxEp {
				desiredEpisodeLinks = append(desiredEpisodeLinks, episodeLink)
			}
		}
		actualMinEp := episodeNo(desiredEpisodeLinks[0])
		if minEp > actualMinEp {
			actualMinEp = minEp
		}
		actualMaxEp := episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1])
		if maxEp < actualMaxEp {
			actualMaxEp = maxEp
		}
		println(fmt.Sprintf("fetching image links for episodes %d through %d", actualMinEp, actualMaxEp))

		var episodeBatches []EpisodeBatch
		for start := 0; start < len(desiredEpisodeLinks); start += epsPerBatch {
			end := start + epsPerBatch
			if end > len(desiredEpisodeLinks) {
				end = len(desiredEpisodeLinks)
			}
			episodeBatches = append(episodeBatches, EpisodeBatch{
				imgLinks: getImgLinksForEpisodes(desiredEpisodeLinks[start:end], actualMaxEp),
				minEp:    episodeNo(desiredEpisodeLinks[start]),
				maxEp:    episodeNo(desiredEpisodeLinks[end-1]),
			})
		}
		return episodeBatches
	}
}

func getAllEpisodeLinks(url string) []string {
	re := regexp.MustCompile("&page=[0-9]+")
	episodeLinkSet := make(map[string]struct{})
	foundLastPage := false
	for page := 1; !foundLastPage; page++ {
		url = re.ReplaceAllString(url, "") + fmt.Sprintf("&page=%d", page)
		episodeLinks, err := getEpisodeLinksForPage(url)
		if err != nil {
			break
		}
		for _, episodeLink := range episodeLinks {
			// when you go past the last page, it just rerenders the last page
			if _, ok := episodeLinkSet[episodeLink]; ok {
				foundLastPage = true
				break
			}
			episodeLinkSet[episodeLink] = struct{}{}
		}
		if !foundLastPage {
			println(url)
		}
	}

	allEpisodeLinks := make([]string, 0, len(episodeLinkSet))
	for episodeLink := range episodeLinkSet {
		allEpisodeLinks = append(allEpisodeLinks, episodeLink)
	}

	// extract episode_no from url and sort by it
	sort.Slice(allEpisodeLinks, func(i, j int) bool {
		return episodeNo(allEpisodeLinks[i]) < episodeNo(allEpisodeLinks[j])
	})
	return allEpisodeLinks
}

func episodeNo(episodeLink string) int {
	re := regexp.MustCompile("episode_no=([0-9]+)")
	matches := re.FindStringSubmatch(episodeLink)
	if len(matches) != 2 {
		return 0
	}
	episodeNo, err := strconv.Atoi(matches[1])
	if err != nil {
		return 0
	}
	return episodeNo
}

func getImgLinksForEpisodes(episodeLinks []string, actualMaxEp int) []string {
	var allImgLinks []string
	for _, episodeLink := range episodeLinks {
		println(fmt.Sprintf("fetching image links for episode %d/%d", episodeNo(episodeLink), actualMaxEp))
		allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)
	}
	return allImgLinks
}

func fetchImage(imgLink string) []byte {
	req, e := http.NewRequest("GET", imgLink, nil)
	if e != nil {
		fmt.Println(e)
		os.Exit(1)
	}
	req.Header.Set("Referer", "http://www.webtoons.com")

	response, err := http.DefaultClient.Do(req)
	if err != nil {
		fmt.Println(err.Error())
		os.Exit(1)
	}
	defer func(Body io.ReadCloser) {
		err := Body.Close()
		if err != nil {
			fmt.Println(err.Error())
			os.Exit(1)
		}
	}(response.Body)

	buff := new(bytes.Buffer)
	_, err = buff.ReadFrom(response.Body)
	if err != nil {
		fmt.Println(err.Error())
		os.Exit(1)
	}
	return buff.Bytes()
}

func addImgToPdf(pdf *gopdf.GoPdf, imgLink string) error {
	img := fetchImage(imgLink)
	holder, err := gopdf.ImageHolderByBytes(img)
	if err != nil {
		return err
	}

	d, _, err := image.DecodeConfig(bytes.NewReader(img))
	if err != nil {
		return err
	}

	// gopdf assumes dpi 128 https://github.com/signintech/gopdf/issues/168
	// W and H are in points, 1 point = 1/72 inch
	// convert pixels (Width and Height) to points
	// subtract 1 point to account for margins
	pdf.AddPageWithOption(gopdf.PageOption{PageSize: &gopdf.Rect{
		W: float64(d.Width)*72/128 - 1,
		H: float64(d.Height)*72/128 - 1,
	}})
	return pdf.ImageByHolder(holder, 0, 0, nil)
}

func main() {
	if len(os.Args) < 2 {
		fmt.Println("Usage: webtoon-dl <url>")
		os.Exit(1)
	}
	minEp := flag.Int("min-ep", 0, "Minimum episode number to download (inclusive)")
	maxEp := flag.Int("max-ep", math.MaxInt, "Maximum episode number to download (inclusive)")
	epsPerFile := flag.Int("eps-per-file", 10, "Number of episodes to put in each PDF file")
	flag.Parse()
	if *minEp > *maxEp {
		fmt.Println("min-ep must be less than or equal to max-ep")
		os.Exit(1)
	}
	if *epsPerFile < 1 {
		fmt.Println("eps-per-file must be greater than or equal to 1")
		os.Exit(1)
	}
	if *minEp < 0 {
		fmt.Println("min-ep must be greater than or equal to 0")
		os.Exit(1)
	}

	url := os.Args[len(os.Args)-1]
	episodeBatches := getEpisodeBatches(url, *minEp, *maxEp, *epsPerFile)

	totalPages := 0
	for _, episodeBatch := range episodeBatches {
		totalPages += len(episodeBatch.imgLinks)
	}
	totalEpisodes := episodeBatches[len(episodeBatches)-1].maxEp - episodeBatches[0].minEp + 1
	fmt.Println(fmt.Sprintf("found %d total image links across %d episodes", totalPages, totalEpisodes))
	fmt.Println(fmt.Sprintf("saving into %d files with max of %d episodes per file", len(episodeBatches), *epsPerFile))

	for _, episodeBatch := range episodeBatches {
		var err error
		outURL := strings.ReplaceAll(url, "http://", "")
		outURL = strings.ReplaceAll(outURL, "https://", "")
		outURL = strings.ReplaceAll(outURL, "www.", "")
		outURL = strings.ReplaceAll(outURL, "webtoons.com/", "")
		outURL = strings.Split(outURL, "?")[0]
		outURL = strings.ReplaceAll(outURL, "/viewer", "")
		outURL = strings.ReplaceAll(outURL, "/", "-")
		if episodeBatch.minEp != episodeBatch.maxEp {
			outURL = fmt.Sprintf("%s-epNo%d-epNo%d", outURL, episodeBatch.minEp, episodeBatch.maxEp)
		} else {
			outURL = fmt.Sprintf("%s-epNo%d", outURL, episodeBatch.minEp)
		}
		//comic := newPDFComic()
		//outPath := outURL + ".pdf"
		comic, err := newCBZComic()
		if err != nil {
			fmt.Println(err.Error())
			os.Exit(1)
		}
		outPath := outURL + ".cbz"
		for idx, imgLink := range episodeBatch.imgLinks {
			if strings.Contains(imgLink, ".gif") {
				fmt.Println(fmt.Sprintf("WARNING: skipping gif %s", imgLink))
				continue
			}
			err := comic.addImage(fetchImage(imgLink))
			if err != nil {
				fmt.Println(err.Error())
				os.Exit(1)
			}
			fmt.Println(fmt.Sprintf("saving episodes %d through %d: added page %d/%d", episodeBatch.minEp, episodeBatch.maxEp, idx+1, len(episodeBatch.imgLinks)))
		}

		err = comic.save(outPath)
		if err != nil {
			fmt.Println(err.Error())
			os.Exit(1)
		}
		fmt.Println(fmt.Sprintf("saved to %s", outPath))
	}

	//createCbz := func(episodeBatches []EpisodeBatch, outPath string) error {
	//	out, err := os.Create(outPath)
	//	if err != nil {
	//		return err
	//	}
	//	defer func(out *os.File) {
	//		err := out.Close()
	//		if err != nil {
	//			fmt.Println(err.Error())
	//			os.Exit(1)
	//		}
	//	}(out)
	//
	//	zipWriter := zip.NewWriter(out)
	//	for _, episodeBatch := range episodeBatches {
	//		for idx, imgLink := range episodeBatch.imgLinks {
	//			if strings.Contains(imgLink, ".gif") {
	//				fmt.Println(fmt.Sprintf("WARNING: skipping gif %s", imgLink))
	//				continue
	//			}
	//			img := fetchImage(imgLink)
	//			f, err := zipWriter.Create(fmt.Sprintf("%d.jpg", idx))
	//			if err != nil {
	//				return err
	//			}
	//			_, err = f.Write(img)
	//			if err != nil {
	//				return err
	//			}
	//			fmt.Println(fmt.Sprintf("saving episodes %d through %d: added page %d/%d", episodeBatch.minEp, episodeBatch.maxEp, idx+1, len(episodeBatch.imgLinks)))
	//		}
	//	}
	//	err = zipWriter.Close()
	//	if err != nil {
	//		return err
	//	}
	//	return nil
	//}
	//// create cbz output from image links
	//outPath := "output.cbz"
	//err := createCbz(episodeBatches, outPath)
	//if err != nil {
	//	fmt.Println(err.Error())
	//	os.Exit(1)
	//}
	//fmt.Println(fmt.Sprintf("saved to %s", outPath))
}
initial commit 2023-12-20 10:33:51 -08:00			`package main`

			`import (`
start cbz 2024-04-13 19:54:50 -07:00			`"archive/zip"`
working 2023-12-21 08:38:48 -08:00			`"bytes"`
Support "OZ Page" image links 2024-01-14 13:45:25 -08:00			`"encoding/json"`
episode ranges 2023-12-24 20:12:52 -08:00			`"flag"`
initial commit 2023-12-20 10:33:51 -08:00			`"fmt"`
			`"github.com/anaskhan96/soup"`
working 2023-12-21 08:38:48 -08:00			`"github.com/signintech/gopdf"`
			`"image"`
			`"io"`
episode ranges 2023-12-24 20:12:52 -08:00			`"math"`
working 2023-12-21 08:38:48 -08:00			`"net/http"`
initial commit 2023-12-20 10:33:51 -08:00			`"os"`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`"regexp"`
			`"sort"`
			`"strconv"`
finish 2023-12-21 09:20:50 -08:00			`"strings"`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`"time"`
initial commit 2023-12-20 10:33:51 -08:00			`)`

Support "OZ Page" image links 2024-01-14 13:45:25 -08:00			`type MotiontoonJson struct {`
			`Assets struct {`
			Image map[string]string `json:"image"`
			} `json:"assets"`
			`}`

batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`type EpisodeBatch struct {`
			`imgLinks []string`
			`minEp int`
			`maxEp int`
			`}`

start cbz 2024-04-13 19:54:50 -07:00			`type Comic interface {`
			`addImage([]byte) error`
			`save(outputPath string) error`
			`}`

			`type PDFComic struct {`
			`pdf *gopdf.GoPdf`
			`}`

			`// validate PDFComic implements Comic`
			`var _ Comic = &PDFComic{}`

			`func newPDFComic() *PDFComic {`
			`pdf := gopdf.GoPdf{}`
			`pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})`
			`return &PDFComic{pdf: &pdf}`
			`}`

			`func (c *PDFComic) addImage(img []byte) error {`
			`holder, err := gopdf.ImageHolderByBytes(img)`
			`if err != nil {`
			`return err`
			`}`

			`d, _, err := image.DecodeConfig(bytes.NewReader(img))`
			`if err != nil {`
			`return err`
			`}`

			`// gopdf assumes dpi 128 https://github.com/signintech/gopdf/issues/168`
			`// W and H are in points, 1 point = 1/72 inch`
			`// convert pixels (Width and Height) to points`
			`// subtract 1 point to account for margins`
			`c.pdf.AddPageWithOption(gopdf.PageOption{PageSize: &gopdf.Rect{`
			`W: float64(d.Width)*72/128 - 1,`
			`H: float64(d.Height)*72/128 - 1,`
			`}})`
			`return c.pdf.ImageByHolder(holder, 0, 0, nil)`
			`}`

			`func (c *PDFComic) save(outputPath string) error {`
			`return c.pdf.WritePdf(outputPath)`
			`}`

			`type CBZComic struct {`
			`zipWriter *zip.Writer`
			`outFile *os.File`
			`numFiles int`
			`}`

			`// validate CBZComic implements Comic`
			`var _ Comic = &CBZComic{}`

			`func newCBZComic() (*CBZComic, error) {`
			`out, err := os.CreateTemp("", "output.tmp.cbz")`
			`if err != nil {`
			`return nil, err`
			`}`
			`zipWriter := zip.NewWriter(out)`
			`return &CBZComic{zipWriter: zipWriter, outFile: out, numFiles: 0}, nil`
			`}`

			`func (c *CBZComic) addImage(img []byte) error {`
			`f, err := c.zipWriter.Create(fmt.Sprintf("%d.jpg", c.numFiles))`
			`if err != nil {`
			`return err`
			`}`
			`_, err = f.Write(img)`
			`if err != nil {`
			`return err`
			`}`
			`c.numFiles++`
			`return nil`
			`}`

			`func (c *CBZComic) save(outputPath string) error {`
			`err := c.zipWriter.Close()`
			`if err != nil {`
			`return err`
			`}`
			`err = c.outFile.Close()`
			`if err != nil {`
			`return err`
			`}`
			`err = os.Rename(c.outFile.Name(), outputPath)`
			`return err`
			`}`

Support "OZ Page" image links 2024-01-14 13:45:25 -08:00			`func getOzPageImgLinks(doc soup.Root) []string {`
			`// regex find the documentURL, e.g:`
			`// viewerOptions: {`
			`// // 필수항목`
			`// containerId: '#ozViewer',`
			`// documentURL: 'https://global.apis.naver.com/lineWebtoon/webtoon/motiontoonJson.json?seq=2830&hashValue=2e0b924676bdc38241bd8fd452191fe3',`
			`re := regexp.MustCompile("viewerOptions: \\{\n.// 필수항목\n.containerId: '#ozViewer',\n.*documentURL: '(.+)'")`
			`matches := re.FindStringSubmatch(doc.HTML())`
			`if len(matches) != 2 {`
			`fmt.Println("could not find documentURL")`
			`os.Exit(1)`
			`}`

			`// fetch json at documentURL and deserialize to MotiontoonJson`
			`resp, err := soup.Get(matches[1])`
			`if err != nil {`
			`fmt.Println(fmt.Sprintf("Error fetching page: %v", err))`
			`os.Exit(1)`
			`}`
			`var motionToon MotiontoonJson`
			`if err := json.Unmarshal([]byte(resp), &motionToon); err != nil {`
			`fmt.Println(fmt.Sprintf("Error unmarshalling json: %v", err))`
			`os.Exit(1)`
			`}`

			`// get sorted keys`
			`var sortedKeys []string`
			`for k := range motionToon.Assets.Image {`
			`sortedKeys = append(sortedKeys, k)`
			`}`
			`sort.Strings(sortedKeys)`

			`// get path rule, e.g:`
			`// motiontoonParam: {`
			`// pathRuleParam: {`
			`// stillcut: 'https://ewebtoon-phinf.pstatic.net/motiontoon/3536_2e0b924676bdc38241bd8fd452191fe3/{=filename}?type=q70',`
			`re = regexp.MustCompile("motiontoonParam: \\{\n.pathRuleParam: \\{\n.stillcut: '(.+)'")`
			`matches = re.FindStringSubmatch(doc.HTML())`
			`if len(matches) != 2 {`
			`fmt.Println("could not find pathRule")`
			`os.Exit(1)`
			`}`
			`var imgs []string`
			`for _, k := range sortedKeys {`
			`imgs = append(imgs, strings.ReplaceAll(matches[1], "{=filename}", motionToon.Assets.Image[k]))`
			`}`
			`return imgs`
			`}`

allow downloading entire comics 2023-12-23 16:42:43 -08:00			`func getImgLinksForEpisode(url string) []string {`
finish 2023-12-21 09:20:50 -08:00			`resp, err := soup.Get(url)`
episode ranges 2023-12-24 20:12:52 -08:00			`time.Sleep(200 * time.Millisecond)`
finish 2023-12-21 09:20:50 -08:00			`if err != nil {`
			`fmt.Println(fmt.Sprintf("Error fetching page: %v", err))`
			`os.Exit(1)`
			`}`
			`doc := soup.HTMLParse(resp)`
			`imgs := doc.Find("div", "class", "viewer_lst").FindAll("img")`
Support "OZ Page" image links 2024-01-14 13:45:25 -08:00			`if len(imgs) == 0 {`
			`// some comics seem to serve images from a different backend, something about oz`
			`return getOzPageImgLinks(doc)`
			`}`
finish 2023-12-21 09:20:50 -08:00			`var imgLinks []string`
			`for _, img := range imgs {`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`if dataURL, ok := img.Attrs()["data-url"]; ok {`
			`imgLinks = append(imgLinks, dataURL)`
			`}`
finish 2023-12-21 09:20:50 -08:00			`}`
			`return imgLinks`
			`}`

allow downloading entire comics 2023-12-23 16:42:43 -08:00			`func getEpisodeLinksForPage(url string) ([]string, error) {`
			`resp, err := soup.Get(url)`
episode ranges 2023-12-24 20:12:52 -08:00			`time.Sleep(200 * time.Millisecond)`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`if err != nil {`
			`return []string{}, fmt.Errorf("error fetching page: %v", err)`
			`}`
			`doc := soup.HTMLParse(resp)`
			`episodeURLs := doc.Find("div", "class", "detail_lst").FindAll("a")`
			`var links []string`
			`for _, episodeURL := range episodeURLs {`
			`if href := episodeURL.Attrs()["href"]; strings.Contains(href, "/viewer") {`
			`links = append(links, href)`
			`}`
			`}`
			`return links, nil`
			`}`

batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`func getEpisodeBatches(url string, minEp, maxEp, epsPerBatch int) []EpisodeBatch {`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`if strings.Contains(url, "/viewer") {`
			`// assume viewing single episode`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`return []EpisodeBatch{{`
			`imgLinks: getImgLinksForEpisode(url),`
			`minEp: episodeNo(url),`
			`maxEp: episodeNo(url),`
			`}}`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`} else {`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`// assume viewing set of episodes`
episode ranges 2023-12-24 20:12:52 -08:00			`println("scanning all pages to get all episode links")`
			`allEpisodeLinks := getAllEpisodeLinks(url)`
			`println(fmt.Sprintf("found %d total episodes", len(allEpisodeLinks)))`

			`var desiredEpisodeLinks []string`
			`for _, episodeLink := range allEpisodeLinks {`
			`epNo := episodeNo(episodeLink)`
			`if epNo >= minEp && epNo <= maxEp {`
			`desiredEpisodeLinks = append(desiredEpisodeLinks, episodeLink)`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`}`
			`}`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`actualMinEp := episodeNo(desiredEpisodeLinks[0])`
			`if minEp > actualMinEp {`
			`actualMinEp = minEp`
			`}`
			`actualMaxEp := episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1])`
			`if maxEp < actualMaxEp {`
			`actualMaxEp = maxEp`
			`}`
			`println(fmt.Sprintf("fetching image links for episodes %d through %d", actualMinEp, actualMaxEp))`
episode ranges 2023-12-24 20:12:52 -08:00
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`var episodeBatches []EpisodeBatch`
			`for start := 0; start < len(desiredEpisodeLinks); start += epsPerBatch {`
			`end := start + epsPerBatch`
			`if end > len(desiredEpisodeLinks) {`
			`end = len(desiredEpisodeLinks)`
			`}`
			`episodeBatches = append(episodeBatches, EpisodeBatch{`
			`imgLinks: getImgLinksForEpisodes(desiredEpisodeLinks[start:end], actualMaxEp),`
			`minEp: episodeNo(desiredEpisodeLinks[start]),`
			`maxEp: episodeNo(desiredEpisodeLinks[end-1]),`
			`})`
			`}`
			`return episodeBatches`
episode ranges 2023-12-24 20:12:52 -08:00			`}`
			`}`

			`func getAllEpisodeLinks(url string) []string {`
			`re := regexp.MustCompile("&page=[0-9]+")`
			`episodeLinkSet := make(map[string]struct{})`
			`foundLastPage := false`
			`for page := 1; !foundLastPage; page++ {`
			`url = re.ReplaceAllString(url, "") + fmt.Sprintf("&page=%d", page)`
			`episodeLinks, err := getEpisodeLinksForPage(url)`
			`if err != nil {`
			`break`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`}`
episode ranges 2023-12-24 20:12:52 -08:00			`for _, episodeLink := range episodeLinks {`
			`// when you go past the last page, it just rerenders the last page`
			`if _, ok := episodeLinkSet[episodeLink]; ok {`
			`foundLastPage = true`
			`break`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`}`
episode ranges 2023-12-24 20:12:52 -08:00			`episodeLinkSet[episodeLink] = struct{}{}`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`}`
episode ranges 2023-12-24 20:12:52 -08:00			`if !foundLastPage {`
			`println(url)`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`}`
			`}`
episode ranges 2023-12-24 20:12:52 -08:00
			`allEpisodeLinks := make([]string, 0, len(episodeLinkSet))`
			`for episodeLink := range episodeLinkSet {`
			`allEpisodeLinks = append(allEpisodeLinks, episodeLink)`
			`}`

			`// extract episode_no from url and sort by it`
			`sort.Slice(allEpisodeLinks, func(i, j int) bool {`
			`return episodeNo(allEpisodeLinks[i]) < episodeNo(allEpisodeLinks[j])`
			`})`
			`return allEpisodeLinks`
			`}`

			`func episodeNo(episodeLink string) int {`
			`re := regexp.MustCompile("episode_no=([0-9]+)")`
			`matches := re.FindStringSubmatch(episodeLink)`
			`if len(matches) != 2 {`
			`return 0`
			`}`
			`episodeNo, err := strconv.Atoi(matches[1])`
			`if err != nil {`
			`return 0`
			`}`
			`return episodeNo`
			`}`

batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`func getImgLinksForEpisodes(episodeLinks []string, actualMaxEp int) []string {`
episode ranges 2023-12-24 20:12:52 -08:00			`var allImgLinks []string`
			`for _, episodeLink := range episodeLinks {`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`println(fmt.Sprintf("fetching image links for episode %d/%d", episodeNo(episodeLink), actualMaxEp))`
episode ranges 2023-12-24 20:12:52 -08:00			`allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)`
			`}`
			`return allImgLinks`
allow downloading entire comics 2023-12-23 16:42:43 -08:00			`}`

working 2023-12-21 08:38:48 -08:00			`func fetchImage(imgLink string) []byte {`
			`req, e := http.NewRequest("GET", imgLink, nil)`
			`if e != nil {`
			`fmt.Println(e)`
			`os.Exit(1)`
			`}`
			`req.Header.Set("Referer", "http://www.webtoons.com")`

			`response, err := http.DefaultClient.Do(req)`
			`if err != nil {`
finish 2023-12-21 09:20:50 -08:00			`fmt.Println(err.Error())`
			`os.Exit(1)`
working 2023-12-21 08:38:48 -08:00			`}`
			`defer func(Body io.ReadCloser) {`
			`err := Body.Close()`
			`if err != nil {`
finish 2023-12-21 09:20:50 -08:00			`fmt.Println(err.Error())`
			`os.Exit(1)`
working 2023-12-21 08:38:48 -08:00			`}`
			`}(response.Body)`

			`buff := new(bytes.Buffer)`
			`_, err = buff.ReadFrom(response.Body)`
			`if err != nil {`
finish 2023-12-21 09:20:50 -08:00			`fmt.Println(err.Error())`
			`os.Exit(1)`
working 2023-12-21 08:38:48 -08:00			`}`
			`return buff.Bytes()`
			`}`

episode ranges 2023-12-24 20:12:52 -08:00			`func addImgToPdf(pdf *gopdf.GoPdf, imgLink string) error {`
			`img := fetchImage(imgLink)`
			`holder, err := gopdf.ImageHolderByBytes(img)`
			`if err != nil {`
			`return err`
			`}`

			`d, _, err := image.DecodeConfig(bytes.NewReader(img))`
			`if err != nil {`
			`return err`
			`}`

			`// gopdf assumes dpi 128 https://github.com/signintech/gopdf/issues/168`
			`// W and H are in points, 1 point = 1/72 inch`
			`// convert pixels (Width and Height) to points`
			`// subtract 1 point to account for margins`
			`pdf.AddPageWithOption(gopdf.PageOption{PageSize: &gopdf.Rect{`
			`W: float64(d.Width)*72/128 - 1,`
			`H: float64(d.Height)*72/128 - 1,`
			`}})`
			`return pdf.ImageByHolder(holder, 0, 0, nil)`
			`}`

initial commit 2023-12-20 10:33:51 -08:00			`func main() {`
finish 2023-12-21 09:20:50 -08:00			`if len(os.Args) < 2 {`
			`fmt.Println("Usage: webtoon-dl <url>")`
initial commit 2023-12-20 10:33:51 -08:00			`os.Exit(1)`
			`}`
episode ranges 2023-12-24 20:12:52 -08:00			`minEp := flag.Int("min-ep", 0, "Minimum episode number to download (inclusive)")`
			`maxEp := flag.Int("max-ep", math.MaxInt, "Maximum episode number to download (inclusive)")`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`epsPerFile := flag.Int("eps-per-file", 10, "Number of episodes to put in each PDF file")`
episode ranges 2023-12-24 20:12:52 -08:00			`flag.Parse()`
			`if minEp > maxEp {`
			`fmt.Println("min-ep must be less than or equal to max-ep")`
			`os.Exit(1)`
			`}`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`if *epsPerFile < 1 {`
			`fmt.Println("eps-per-file must be greater than or equal to 1")`
			`os.Exit(1)`
			`}`
			`if *minEp < 0 {`
			`fmt.Println("min-ep must be greater than or equal to 0")`
			`os.Exit(1)`
			`}`
episode ranges 2023-12-24 20:12:52 -08:00
			`url := os.Args[len(os.Args)-1]`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`episodeBatches := getEpisodeBatches(url, minEp, maxEp, *epsPerFile)`
working 2023-12-21 08:38:48 -08:00
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`totalPages := 0`
			`for _, episodeBatch := range episodeBatches {`
			`totalPages += len(episodeBatch.imgLinks)`
			`}`
			`totalEpisodes := episodeBatches[len(episodeBatches)-1].maxEp - episodeBatches[0].minEp + 1`
			`fmt.Println(fmt.Sprintf("found %d total image links across %d episodes", totalPages, totalEpisodes))`
			`fmt.Println(fmt.Sprintf("saving into %d files with max of %d episodes per file", len(episodeBatches), *epsPerFile))`

			`for _, episodeBatch := range episodeBatches {`
start cbz 2024-04-13 19:54:50 -07:00			`var err error`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`outURL := strings.ReplaceAll(url, "http://", "")`
			`outURL = strings.ReplaceAll(outURL, "https://", "")`
			`outURL = strings.ReplaceAll(outURL, "www.", "")`
			`outURL = strings.ReplaceAll(outURL, "webtoons.com/", "")`
			`outURL = strings.Split(outURL, "?")[0]`
			`outURL = strings.ReplaceAll(outURL, "/viewer", "")`
			`outURL = strings.ReplaceAll(outURL, "/", "-")`
			`if episodeBatch.minEp != episodeBatch.maxEp {`
			`outURL = fmt.Sprintf("%s-epNo%d-epNo%d", outURL, episodeBatch.minEp, episodeBatch.maxEp)`
			`} else {`
			`outURL = fmt.Sprintf("%s-epNo%d", outURL, episodeBatch.minEp)`
			`}`
start cbz 2024-04-13 19:54:50 -07:00			`//comic := newPDFComic()`
			`//outPath := outURL + ".pdf"`
			`comic, err := newCBZComic()`
			`if err != nil {`
			`fmt.Println(err.Error())`
			`os.Exit(1)`
			`}`
			`outPath := outURL + ".cbz"`
			`for idx, imgLink := range episodeBatch.imgLinks {`
			`if strings.Contains(imgLink, ".gif") {`
			`fmt.Println(fmt.Sprintf("WARNING: skipping gif %s", imgLink))`
			`continue`
			`}`
			`err := comic.addImage(fetchImage(imgLink))`
			`if err != nil {`
			`fmt.Println(err.Error())`
			`os.Exit(1)`
			`}`
			`fmt.Println(fmt.Sprintf("saving episodes %d through %d: added page %d/%d", episodeBatch.minEp, episodeBatch.maxEp, idx+1, len(episodeBatch.imgLinks)))`
			`}`

			`err = comic.save(outPath)`
working 2023-12-21 08:38:48 -08:00			`if err != nil {`
finish 2023-12-21 09:20:50 -08:00			`fmt.Println(err.Error())`
			`os.Exit(1)`
working 2023-12-21 08:38:48 -08:00			`}`
batch episodes together, default 10 2024-01-01 11:50:39 -04:00			`fmt.Println(fmt.Sprintf("saved to %s", outPath))`
working 2023-12-21 08:38:48 -08:00			`}`
start cbz 2024-04-13 19:54:50 -07:00
			`//createCbz := func(episodeBatches []EpisodeBatch, outPath string) error {`
			`// out, err := os.Create(outPath)`
			`// if err != nil {`
			`// return err`
			`// }`
			`// defer func(out *os.File) {`
			`// err := out.Close()`
			`// if err != nil {`
			`// fmt.Println(err.Error())`
			`// os.Exit(1)`
			`// }`
			`// }(out)`
			`//`
			`// zipWriter := zip.NewWriter(out)`
			`// for _, episodeBatch := range episodeBatches {`
			`// for idx, imgLink := range episodeBatch.imgLinks {`
			`// if strings.Contains(imgLink, ".gif") {`
			`// fmt.Println(fmt.Sprintf("WARNING: skipping gif %s", imgLink))`
			`// continue`
			`// }`
			`// img := fetchImage(imgLink)`
			`// f, err := zipWriter.Create(fmt.Sprintf("%d.jpg", idx))`
			`// if err != nil {`
			`// return err`
			`// }`
			`// _, err = f.Write(img)`
			`// if err != nil {`
			`// return err`
			`// }`
			`// fmt.Println(fmt.Sprintf("saving episodes %d through %d: added page %d/%d", episodeBatch.minEp, episodeBatch.maxEp, idx+1, len(episodeBatch.imgLinks)))`
			`// }`
			`// }`
			`// err = zipWriter.Close()`
			`// if err != nil {`
			`// return err`
			`// }`
			`// return nil`
			`//}`
			`//// create cbz output from image links`
			`//outPath := "output.cbz"`
			`//err := createCbz(episodeBatches, outPath)`
			`//if err != nil {`
			`// fmt.Println(err.Error())`
			`// os.Exit(1)`
			`//}`
			`//fmt.Println(fmt.Sprintf("saved to %s", outPath))`
initial commit 2023-12-20 10:33:51 -08:00			`}`