Support "OZ Page" image links
This commit is contained in:
parent
9b81cc5412
commit
2d2c8eae0a
1 changed files with 60 additions and 1 deletions
61
main.go
61
main.go
|
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/anaskhan96/soup"
|
"github.com/anaskhan96/soup"
|
||||||
|
|
@ -18,12 +19,67 @@ import (
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type MotiontoonJson struct {
|
||||||
|
Assets struct {
|
||||||
|
Image map[string]string `json:"image"`
|
||||||
|
} `json:"assets"`
|
||||||
|
}
|
||||||
|
|
||||||
type EpisodeBatch struct {
|
type EpisodeBatch struct {
|
||||||
imgLinks []string
|
imgLinks []string
|
||||||
minEp int
|
minEp int
|
||||||
maxEp int
|
maxEp int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getOzPageImgLinks(doc soup.Root) []string {
|
||||||
|
// regex find the documentURL, e.g:
|
||||||
|
// viewerOptions: {
|
||||||
|
// // 필수항목
|
||||||
|
// containerId: '#ozViewer',
|
||||||
|
// documentURL: 'https://global.apis.naver.com/lineWebtoon/webtoon/motiontoonJson.json?seq=2830&hashValue=2e0b924676bdc38241bd8fd452191fe3',
|
||||||
|
re := regexp.MustCompile("viewerOptions: \\{\n.*// 필수항목\n.*containerId: '#ozViewer',\n.*documentURL: '(.+)'")
|
||||||
|
matches := re.FindStringSubmatch(doc.HTML())
|
||||||
|
if len(matches) != 2 {
|
||||||
|
fmt.Println("could not find documentURL")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetch json at documentURL and deserialize to MotiontoonJson
|
||||||
|
resp, err := soup.Get(matches[1])
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(fmt.Sprintf("Error fetching page: %v", err))
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
var motionToon MotiontoonJson
|
||||||
|
if err := json.Unmarshal([]byte(resp), &motionToon); err != nil {
|
||||||
|
fmt.Println(fmt.Sprintf("Error unmarshalling json: %v", err))
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// get sorted keys
|
||||||
|
var sortedKeys []string
|
||||||
|
for k := range motionToon.Assets.Image {
|
||||||
|
sortedKeys = append(sortedKeys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(sortedKeys)
|
||||||
|
|
||||||
|
// get path rule, e.g:
|
||||||
|
// motiontoonParam: {
|
||||||
|
// pathRuleParam: {
|
||||||
|
// stillcut: 'https://ewebtoon-phinf.pstatic.net/motiontoon/3536_2e0b924676bdc38241bd8fd452191fe3/{=filename}?type=q70',
|
||||||
|
re = regexp.MustCompile("motiontoonParam: \\{\n.*pathRuleParam: \\{\n.*stillcut: '(.+)'")
|
||||||
|
matches = re.FindStringSubmatch(doc.HTML())
|
||||||
|
if len(matches) != 2 {
|
||||||
|
fmt.Println("could not find pathRule")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
var imgs []string
|
||||||
|
for _, k := range sortedKeys {
|
||||||
|
imgs = append(imgs, strings.ReplaceAll(matches[1], "{=filename}", motionToon.Assets.Image[k]))
|
||||||
|
}
|
||||||
|
return imgs
|
||||||
|
}
|
||||||
|
|
||||||
func getImgLinksForEpisode(url string) []string {
|
func getImgLinksForEpisode(url string) []string {
|
||||||
resp, err := soup.Get(url)
|
resp, err := soup.Get(url)
|
||||||
time.Sleep(200 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
|
|
@ -33,7 +89,10 @@ func getImgLinksForEpisode(url string) []string {
|
||||||
}
|
}
|
||||||
doc := soup.HTMLParse(resp)
|
doc := soup.HTMLParse(resp)
|
||||||
imgs := doc.Find("div", "class", "viewer_lst").FindAll("img")
|
imgs := doc.Find("div", "class", "viewer_lst").FindAll("img")
|
||||||
|
if len(imgs) == 0 {
|
||||||
|
// some comics seem to serve images from a different backend, something about oz
|
||||||
|
return getOzPageImgLinks(doc)
|
||||||
|
}
|
||||||
var imgLinks []string
|
var imgLinks []string
|
||||||
for _, img := range imgs {
|
for _, img := range imgs {
|
||||||
if dataURL, ok := img.Attrs()["data-url"]; ok {
|
if dataURL, ok := img.Attrs()["data-url"]; ok {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue