import re
import requests
import ujson
from collections import OrderedDict
from bs4 import BeautifulSoup as soup
from ...errors import serverError, serializationFailed
settings = {
'apiurl': "https://www.baka-tsuki.org/project/api.php",
'header': {
'User-Agent': 'Pymoe (github.com/ccubed/Pymoe)'
},
'active': 56132,
'compiledRegex': {
'chapter': re.compile("volume|chapter", re.I),
'separate': re.compile("(volume|chapter) (?P<chapter>[0-9]{1,2})", re.I)
}
}
[docs]def cover(pageid: str):
"""
Get a cover image given a page id.
:param str pageid: The pageid for the light novel you want a cover image for
:return str: the image url or None
"""
r = requests.get(
settings['apiurl'],
params = {
'action': 'query',
'prop': 'pageimages',
'pageids': pageid,
'format': 'json'
},
headers = settings['header']
)
try:
jsd = ujson.loads(r.text)
except ValueError:
raise serializationFailed(r.text, r.status_code)
else:
if 'pageimage' in jsd['query']['pages'][str(pageid)]:
# pageid can be returned as an int
image = "File:" + jsd['query']['pages'][str(pageid)]['pageimage']
r = requests.get(
settings['apiurl'],
params={
'action': 'query',
'prop': 'imageinfo',
'iiprop': 'url',
'titles': image,
'format': 'json'
},
headers = settings['header']
)
try:
jsd = ujson.loads(r.text)
except ValueError:
return None
else:
return jsd['query']['pages'][list(jsd['query']['pages'].keys())[0]]['imageinfo'][0]['url']
else:
return None
[docs]def active():
"""
Get a list of active projects.
:return list: A list of tuples containing a title and pageid in that order.
"""
projects = []
r = requests.get(
settings['apiurl'],
params={
'action': 'query',
'list': 'categorymembers',
'cmpageid': settings['active'],
'cmtype': 'page',
'cmlimit': '500',
'format': 'json'
},
headers = settings['header']
)
if r.status_code == 200:
jsd = ujson.loads(r.text)
projects.append([(x['title'], x['pageid']) for x in jsd['query']['categorymembers']])
if 'query-continue' in jsd:
while True:
r = requests.get(
settings['apiurl'],
params={
'action': 'query',
'list': 'categorymembers',
'cmpageid': settings['active'],
'cmtype': 'page',
'cmlimit': '500',
'cmcontinue': jsd['query-continue']['categorymembers']['cmcontinue'],
'format': 'json'
},
headers = settings['header']
)
if r.status_code == 200:
jsd = ujson.loads(r.text)
projects.append([(x['title'], x['pageid']) for x in jsd['query']['categorymembers']])
if 'query-continue' not in jsd:
break
else:
break
return projects[0]
[docs]def chapters(title: str):
"""
Get a list of chapters for a visual novel. Keep in mind, this can be slow. I've certainly tried to make it as fast as possible, but it's still pulling text out of a webpage.
:param str title: The title of the novel you want chapters from
:return OrderedDict: An OrderedDict which contains the chapters found for the visual novel supplied
"""
r = requests.get(
"https://www.baka-tsuki.org/project/index.php?title={}".format(title.replace(' ','_')),
headers = settings['header']
)
if r.status_code != 200:
raise serverError(r.text, r.status_code)
else:
parsed = soup(r.text, 'html.parser')
dd = parsed.find_all('a')
volumes = []
for link in dd:
if 'class' in link.attrs:
if 'image' in link.get('class'):
continue
if 'href' in link.attrs:
if re.search(settings['compiledRegex']['chapter'], link.get('href')) is not None and not link.get('href').startswith("#"):
volumes.append(link)
seplist = OrderedDict()
for item in volumes:
if 'title' in item.attrs:
result = re.search(settings['compiledRegex']['separate'], item.get('title').lower())
else:
result = re.search(settings['compiledRegex']['separate'], item.text.lower())
if result and result.groups():
if result.group('chapter').lstrip('0') in seplist:
seplist[result.group('chapter').lstrip('0')].append(
[
item.get('href'),
item.get('title') if 'title' in item.attrs else item.text
]
)
else:
seplist[result.group('chapter').lstrip('0')] = [
[
item.get('href'),
item.get('title') if 'title' in item.attrs else item.text
]
]
return seplist