fix(novelpia): some bugs and logic

This commit is contained in:
Ryu juheon 2022-04-09 01:28:05 +09:00 committed by GitHub
parent f3c3c6053d
commit ace7f3d07c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 38 additions and 15 deletions

View File

@ -1,19 +1,24 @@
from io import BytesIO
from urllib.parse import urlparse
from typing import List, Tuple, cast
from requests.sessions import session
import re
from errors import LoginRequired
from utils import Downloader, Soup, Session, clean_title, urljoin
from io import BytesIO
from typing import List, Tuple, cast
from urllib.parse import urlparse
from bs4.element import Tag
import requests
from bs4.element import Tag
from errors import LoginRequired
from requests.sessions import session
from utils import Downloader, Session, Soup, clean_title, tr_, urljoin
class SoupInfo:
def __init__(self, soup: Soup, number: int) -> None:
self.soup: Soup = soup
self.number: int = number
@Downloader.register
class Downloader_novelpia(Downloader):
type = "novelpia"
type = "test_novelpia"
URLS = ["novelpia.com"]
def init(self) -> None:
@ -21,7 +26,7 @@ class Downloader_novelpia(Downloader):
@property
def is_novel(self):
return "novel" in self.url
return "novel" in self.parsed_url[2]
@property
def number(self) -> str:
@ -62,6 +67,7 @@ class Downloader_novelpia(Downloader):
matched = regex.match(last_episode)
assert matched
total_episode_page = matched.group(1)
self.title = tr_("{} 개 찾음".format(total_episode_page))
return int(total_episode_page), html
def __get_all_viewer_numbers(self):
@ -73,13 +79,15 @@ class Downloader_novelpia(Downloader):
for i in range(1, total_episode_page - 1):
html = self.__proc_episoe_list_url_request(session, i)
self.title = f"{tr_('페이지 읽는 중...')} {i + 1}/{total_episode_page}"
htmls.append(html)
for html in htmls:
soup = Soup(html)
for element in soup.find_all("i", {"class": "icon ion-bookmark"}):
novel_numbers.append(int(element["id"]))
novel_numbers.append(int(element["id"].replace("bookmark_", "")))
self.title = tr_("{} 개 찾음").format(len(novel_numbers))
return novel_numbers
def read(self):
@ -91,19 +99,31 @@ class Downloader_novelpia(Downloader):
else:
viewer_numbers.append(int(self.number))
i = 0
soups: List[SoupInfo] = []
for viewer_number in viewer_numbers:
i += 1
self.title = f"{tr_('읽는 중...')} {i} / {len(viewer_numbers)}"
r = session.get(urljoin(self.url, f"/viewer/{viewer_number}"))
soup = Soup(r.text)
soups.append(SoupInfo(soup, viewer_number))
for soup_info in soups:
soup = soup_info.soup
f = BytesIO()
title_element = soup.find("b", {"class": "cut_line_one"})
if not title_element:
if self.is_novel:
self.print_(
f"Ignored because the next item could not be fetched because there was no logged in cookie: {soup_info.number}"
)
continue
raise LoginRequired
self.title = title_element.text
# Maybe NavigableString?
assert isinstance(title_element, Tag)
self.title = title_element.text
# css selecter is not working :(
ep_num = soup.find(
@ -132,7 +152,7 @@ class Downloader_novelpia(Downloader):
# {"s": [{"text": ""}]}
response = session.get(
f"https://novelpia.com/proc/viewer_data/{viewer_number}"
f"https://novelpia.com/proc/viewer_data/{soup_info.number}"
)
if response.text:
response = response.json()
@ -147,8 +167,11 @@ class Downloader_novelpia(Downloader):
filename = img.get("data-filename") or "cover.jpg"
f.write(f"[{filename}]".encode("UTF-8"))
self.urls.append(f"https:{src}")
self.filenames[f"https:{src}"] = filename
else:
f.write(text_dict["text"].encode("UTF-8"))
f.write(
text_dict["text"].replace(" ", "\n").encode("UTF-8")
)
f.seek(0)
self.urls.append(f)
else: