Source code for qiuwenbot.utils

# qiuwenbot, a bot to contribute to qiuwen.wiki
# Copyright (C) 2022  Jinzhe Zeng
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
import re
from typing import Dict

import pywikibot
from pywikibot.textlib import ignore_case

from .bot import get_page



[docs]
def archieve_page(page: pywikibot.Page, site: pywikibot.Site) -> pywikibot.Page:
    """Archieve a page.

    Parameters
    ----------
    page: pywikibot.Page
        page to archieve
    site: pywikibot.Site
        qiuwen site

    Returns
    -------
    pywikibot.Page
        page with old title
    """
    ii = 1
    oldtitle = page.title()
    while True:
        title = oldtitle + "/存档%d" % ii
        if title:
            archieve_page = get_page(title, site)
            if not archieve_page.exists():
                page.text = page.text.replace("{{Archives}}", "")
                page.save("删除archieves模板")
                page.move(title, "存档", movetalk=False, movesubpages=False)
                oldpage = get_page(oldtitle, site)
                oldpage.text = "{{Archives}}"
                oldpage.save("加入archieves模板")
                return oldpage
        ii += 1




[docs]
def get_cat_regex(name: str = r"[^\[\]]+") -> re.Pattern:
    """Get categories regex.

    Parameters
    ----------
    name : str, optional
        Name or regex of the category, by default all categories.

    Returns
    -------
    List[str]
        Categories.
    """
    namespaces = [ignore_case("Category"), ignore_case("分類"), ignore_case("分类")]
    return re.compile(
        r"\[\[ *(?P<namespace>{namespace})\s*:(?P<name>{name})\]\]".format(
            name=name, namespace="|".join(namespaces)
        )
    )




[docs]
def get_template_regex(name: str = r"[^{\|#0-9][^{\|#]*?", end: str = "") -> re.Pattern:
    """Get templates regex.

    Parameters
    ----------
    name : str, optional
        Name or regex of the template, by default all templates.
    end : str, optional
        End of the template, by default "".

    Returns
    -------
    List[str]
        Templates.
    """
    return re.compile(
        rf"""
        {{{{\s*(?:msg:\s*)?
        (?P<name>({name}))\s*
        (?:\|(?P<params> [^{{]*?
                (({{{{{{[^{{}}]+?}}}}}}
                    |{{{{[^{{}}]+?}}}}
                    |{{[^{{}}]*?}}
                ) [^{{]*?
                )*?
            )?
        )?
        }}}}{end}
        """,
        re.VERBOSE | re.DOTALL,
    )




[docs]
def devide_parameters(params: str) -> Dict[str, str]:
    """Devide parameters and remove subtemplate in it.

    Parameters
    ----------
    params : str
        parameter string

    Returns
    -------
    Dict[str, str]
        dict of params
    """
    if params is None:
        return {}
    # detect | in another template
    regex_template = get_template_regex()
    params = regex_template.sub("", params)
    params_dict = {}
    for ii, param in enumerate(params.split("|"), 1):
        if "=" in param:
            key, value = param.split("=", 1)
            params_dict[key.strip()] = value.strip()
        else:
            params_dict[str(ii)] = param.strip()
    return params_dict



coutries = [
    "阿富汗",
    "阿尔巴尼亚",
    "阿尔及利亚",
    "安道尔",
    "安哥拉",
    "安提瓜和巴布达",
    "阿根廷",
    "亚美尼亚",
    "澳大利亚",
    "奥地利",
    "阿塞拜疆",
    "巴哈马",
    "巴林",
    "孟加拉国",
    "巴巴多斯",
    "白俄罗斯",
    "比利时",
    "伯利兹",
    "贝宁",
    "不丹",
    "玻利维亚",
    "波斯尼亚和黑塞哥维那",
    "博茨瓦纳",
    "巴西",
    "文莱",
    "保加利亚",
    "布基纳法索",
    "布隆迪",
    "柬埔寨",
    "喀麦隆",
    "加拿大",
    "佛得角",
    "中非共和国",
    "乍得",
    "智利",
    "中国",
    "哥伦比亚",
    "科摩罗",
    "刚果（布）",
    "刚果（金）",
    "哥斯达黎加",
    "科特迪瓦",
    "克罗地亚",
    "古巴",
    "塞浦路斯",
    "捷克",
    "朝鲜",
    "丹麦",
    "吉布提",
    "多米尼加",
    "厄瓜多尔",
    "埃及",
    "萨尔瓦多",
    "赤道几内亚",
    "厄立特里亚",
    "爱沙尼亚",
    "埃塞俄比亚",
    "斐济",
    "芬兰",
    "法国",
    "加蓬",
    "冈比亚",
    "格鲁吉亚",
    "德国",
    "加纳",
    "希腊",
    "格林纳达",
    "危地马拉",
    "几内亚",
    "几内亚比绍",
    "圭亚那",
    "海地",
    "洪都拉斯",
    "匈牙利",
    "冰岛",
    "印度",
    "印度尼西亚",
    "伊朗",
    "伊拉克",
    "爱尔兰",
    "以色列",
    "意大利",
    "牙买加",
    "日本",
    "约旦",
    "哈萨克斯坦",
    "肯尼亚",
    "科索沃",
    "科威特",
    "吉尔吉斯斯坦",
    "老挝",
    "拉脱维亚",
    "黎巴嫩",
    "莱索托",
    "利比里亚",
    "利比亚",
    "立陶宛",
    "卢森堡",
    "马其顿",
    "马达加斯加",
    "马拉维",
    "马来西亚",
    "马尔代夫",
    "马里",
    "马耳他",
    "毛里塔尼亚",
    "毛里求斯",
    "墨西哥",
    "摩尔多瓦",
    "蒙古",
    "黑山",
    "摩洛哥",
    "莫桑比克",
    "缅甸",
    "纳米比亚",
    "尼泊尔",
    "荷兰",
    "新西兰",
    "尼加拉瓜",
    "尼日尔",
    "尼日利亚",
    "挪威",
    "阿曼",
    "巴基斯坦",
    "巴拿马",
    "巴布亚新几内亚",
    "巴拉圭",
    "秘鲁",
    "菲律宾",
    "波兰",
    "葡萄牙",
    "卡塔尔",
    "罗马尼亚",
    "俄罗斯",
    "卢旺达",
    "圣基茨和尼维斯",
    "圣卢西亚",
    "圣文森特和格林纳丁斯",
    "萨摩亚",
    "圣马力诺",
    "圣多美和普林西比",
    "沙特阿拉伯",
    "塞内加尔",
    "塞尔维亚",
    "塞舌尔",
    "塞拉利昂",
    "新加坡",
    "斯洛伐克",
    "斯洛文尼亚",
    "所罗门群岛",
    "索马里",
    "南非",
    "韩国",
    "南苏丹",
    "西班牙",
    "斯里兰卡",
    "苏丹",
    "苏里南",
    "斯威士兰",
    "瑞典",
    "瑞士",
    "叙利亚",
    "塔吉克斯坦",
    "坦桑尼亚",
    "泰国",
    "东帝汶",
    "多哥",
    "汤加",
    "特立尼达和多巴哥",
    "突尼斯",
    "土耳其",
    "土库曼斯坦",
    "乌干达",
    "乌克兰",
    "阿拉伯联合酋长国",
    "英国",
    "美国",
    "乌拉圭",
    "乌兹别克斯坦",
    "瓦努阿图",
    "委内瑞拉",
    "越南",
    "也门",
    "赞比亚",
    "津巴布韦",
    "中国台湾",
    "中国香港",
    "中国澳门",
]