Source code for Pymoe.ln.get

import re
import requests
import ujson
from collections import OrderedDict
from bs4 import BeautifulSoup as soup
from ...errors import serverError, serializationFailed

settings = {
    'apiurl': "https://www.baka-tsuki.org/project/api.php",
    'header': {
        'User-Agent': 'Pymoe (github.com/ccubed/Pymoe)'
    },
    'active': 56132,
    'compiledRegex': {
        'chapter': re.compile("volume|chapter", re.I),
        'separate': re.compile("(volume|chapter) (?P<chapter>[0-9]{1,2})", re.I)
    }
}

[docs]def cover(pageid: str):
    """
        Get a cover image given a page id.

        :param str pageid: The pageid for the light novel you want a cover image for
        :return str: the image url or None
    """
    r = requests.get(
        settings['apiurl'],
        params = {
            'action': 'query',
            'prop': 'pageimages',
            'pageids': pageid,
            'format': 'json'
        },
        headers = settings['header']
    )

    try:
        jsd = ujson.loads(r.text)
    except ValueError:
        raise serializationFailed(r.text, r.status_code)
    else:
        if 'pageimage' in jsd['query']['pages'][str(pageid)]:
            # pageid can be returned as an int
            image = "File:" + jsd['query']['pages'][str(pageid)]['pageimage']
            r = requests.get(
                settings['apiurl'],
                params={
                    'action': 'query',
                    'prop': 'imageinfo',
                    'iiprop': 'url',
                    'titles': image,
                    'format': 'json'
                },
                headers = settings['header']
            )

            try:
                jsd = ujson.loads(r.text)
            except ValueError:
                return None
            else:
                return jsd['query']['pages'][list(jsd['query']['pages'].keys())[0]]['imageinfo'][0]['url']
        else:
            return None
        

[docs]def active():
    """
        Get a list of active projects.

        :return list: A list of tuples containing a title and pageid in that order.
    """
    projects = []

    r = requests.get(
        settings['apiurl'],
        params={
            'action': 'query',
            'list': 'categorymembers',
            'cmpageid': settings['active'],
            'cmtype': 'page',
            'cmlimit': '500',
            'format': 'json'
        },
        headers = settings['header']
    )

    if r.status_code == 200:
        jsd = ujson.loads(r.text)
        projects.append([(x['title'], x['pageid']) for x in jsd['query']['categorymembers']])

        if 'query-continue' in jsd:
            while True:
                r = requests.get(
                    settings['apiurl'],
                    params={
                        'action': 'query',
                        'list': 'categorymembers',
                        'cmpageid': settings['active'],
                        'cmtype': 'page',
                        'cmlimit': '500',
                        'cmcontinue': jsd['query-continue']['categorymembers']['cmcontinue'],
                        'format': 'json'
                    },
                    headers = settings['header']
                )

                if r.status_code == 200:
                
                    jsd = ujson.loads(r.text)
                    projects.append([(x['title'], x['pageid']) for x in jsd['query']['categorymembers']])
                
                    if 'query-continue' not in jsd:
                        break
                
                else:
                    break

    return projects[0]

[docs]def chapters(title: str):
    """
        Get a list of chapters for a visual novel. Keep in mind, this can be slow. I've certainly tried to make it as fast as possible, but it's still pulling text out of a webpage.

        :param str title: The title of the novel you want chapters from
        :return OrderedDict: An OrderedDict which contains the chapters found for the visual novel supplied
    """
    r = requests.get(
        "https://www.baka-tsuki.org/project/index.php?title={}".format(title.replace(' ','_')),
        headers = settings['header']
    )

    if r.status_code != 200:
        raise serverError(r.text, r.status_code)
    else:
        parsed = soup(r.text, 'html.parser')
        dd = parsed.find_all('a')
        
        volumes = []
        for link in dd:
            if 'class' in link.attrs:
                if 'image' in link.get('class'):
                    continue
            
            if 'href' in link.attrs:
                if re.search(settings['compiledRegex']['chapter'], link.get('href')) is not None and not link.get('href').startswith("#"):
                    volumes.append(link)
        
        seplist = OrderedDict()
        for item in volumes:
            if 'title' in item.attrs:
                result = re.search(settings['compiledRegex']['separate'], item.get('title').lower())
            else:
                result = re.search(settings['compiledRegex']['separate'], item.text.lower())
            
            if result and result.groups():
                if result.group('chapter').lstrip('0') in seplist:
                    seplist[result.group('chapter').lstrip('0')].append(
                        [
                            item.get('href'),
                            item.get('title') if 'title' in item.attrs else item.text
                        ]
                    )
                else:
                    seplist[result.group('chapter').lstrip('0')] = [
                        [
                            item.get('href'),
                            item.get('title') if 'title' in item.attrs else item.text
                        ]
                    ]
        
        return seplist