Source code for qiuwenbot.utils

# qiuwenbot, a bot to contribute to qiuwen.wiki
# Copyright (C) 2022  Jinzhe Zeng
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
import re
from typing import Dict

import pywikibot
from pywikibot.textlib import ignore_case

from .bot import get_page


[docs] def archieve_page(page: pywikibot.Page, site: pywikibot.Site) -> pywikibot.Page: """Archieve a page. Parameters ---------- page: pywikibot.Page page to archieve site: pywikibot.Site qiuwen site Returns ------- pywikibot.Page page with old title """ ii = 1 oldtitle = page.title() while True: title = oldtitle + "/存档%d" % ii if title: archieve_page = get_page(title, site) if not archieve_page.exists(): page.text = page.text.replace("{{Archives}}", "") page.save("删除archieves模板") page.move(title, "存档", movetalk=False, movesubpages=False) oldpage = get_page(oldtitle, site) oldpage.text = "{{Archives}}" oldpage.save("加入archieves模板") return oldpage ii += 1
[docs] def get_cat_regex(name: str = r"[^\[\]]+") -> re.Pattern: """Get categories regex. Parameters ---------- name : str, optional Name or regex of the category, by default all categories. Returns ------- List[str] Categories. """ namespaces = [ignore_case("Category"), ignore_case("分類"), ignore_case("分类")] return re.compile( r"\[\[ *(?P<namespace>{namespace})\s*:(?P<name>{name})\]\]".format( name=name, namespace="|".join(namespaces) ) )
[docs] def get_template_regex(name: str = r"[^{\|#0-9][^{\|#]*?", end: str = "") -> re.Pattern: """Get templates regex. Parameters ---------- name : str, optional Name or regex of the template, by default all templates. end : str, optional End of the template, by default "". Returns ------- List[str] Templates. """ return re.compile( rf""" {{{{\s*(?:msg:\s*)? (?P<name>({name}))\s* (?:\|(?P<params> [^{{]*? (({{{{{{[^{{}}]+?}}}}}} |{{{{[^{{}}]+?}}}} |{{[^{{}}]*?}} ) [^{{]*? )*? )? )? }}}}{end} """, re.VERBOSE | re.DOTALL, )
[docs] def devide_parameters(params: str) -> Dict[str, str]: """Devide parameters and remove subtemplate in it. Parameters ---------- params : str parameter string Returns ------- Dict[str, str] dict of params """ if params is None: return {} # detect | in another template regex_template = get_template_regex() params = regex_template.sub("", params) params_dict = {} for ii, param in enumerate(params.split("|"), 1): if "=" in param: key, value = param.split("=", 1) params_dict[key.strip()] = value.strip() else: params_dict[str(ii)] = param.strip() return params_dict
coutries = [ "阿富汗", "阿尔巴尼亚", "阿尔及利亚", "安道尔", "安哥拉", "安提瓜和巴布达", "阿根廷", "亚美尼亚", "澳大利亚", "奥地利", "阿塞拜疆", "巴哈马", "巴林", "孟加拉国", "巴巴多斯", "白俄罗斯", "比利时", "伯利兹", "贝宁", "不丹", "玻利维亚", "波斯尼亚和黑塞哥维那", "博茨瓦纳", "巴西", "文莱", "保加利亚", "布基纳法索", "布隆迪", "柬埔寨", "喀麦隆", "加拿大", "佛得角", "中非共和国", "乍得", "智利", "中国", "哥伦比亚", "科摩罗", "刚果(布)", "刚果(金)", "哥斯达黎加", "科特迪瓦", "克罗地亚", "古巴", "塞浦路斯", "捷克", "朝鲜", "丹麦", "吉布提", "多米尼加", "厄瓜多尔", "埃及", "萨尔瓦多", "赤道几内亚", "厄立特里亚", "爱沙尼亚", "埃塞俄比亚", "斐济", "芬兰", "法国", "加蓬", "冈比亚", "格鲁吉亚", "德国", "加纳", "希腊", "格林纳达", "危地马拉", "几内亚", "几内亚比绍", "圭亚那", "海地", "洪都拉斯", "匈牙利", "冰岛", "印度", "印度尼西亚", "伊朗", "伊拉克", "爱尔兰", "以色列", "意大利", "牙买加", "日本", "约旦", "哈萨克斯坦", "肯尼亚", "科索沃", "科威特", "吉尔吉斯斯坦", "老挝", "拉脱维亚", "黎巴嫩", "莱索托", "利比里亚", "利比亚", "立陶宛", "卢森堡", "马其顿", "马达加斯加", "马拉维", "马来西亚", "马尔代夫", "马里", "马耳他", "毛里塔尼亚", "毛里求斯", "墨西哥", "摩尔多瓦", "蒙古", "黑山", "摩洛哥", "莫桑比克", "缅甸", "纳米比亚", "尼泊尔", "荷兰", "新西兰", "尼加拉瓜", "尼日尔", "尼日利亚", "挪威", "阿曼", "巴基斯坦", "巴拿马", "巴布亚新几内亚", "巴拉圭", "秘鲁", "菲律宾", "波兰", "葡萄牙", "卡塔尔", "罗马尼亚", "俄罗斯", "卢旺达", "圣基茨和尼维斯", "圣卢西亚", "圣文森特和格林纳丁斯", "萨摩亚", "圣马力诺", "圣多美和普林西比", "沙特阿拉伯", "塞内加尔", "塞尔维亚", "塞舌尔", "塞拉利昂", "新加坡", "斯洛伐克", "斯洛文尼亚", "所罗门群岛", "索马里", "南非", "韩国", "南苏丹", "西班牙", "斯里兰卡", "苏丹", "苏里南", "斯威士兰", "瑞典", "瑞士", "叙利亚", "塔吉克斯坦", "坦桑尼亚", "泰国", "东帝汶", "多哥", "汤加", "特立尼达和多巴哥", "突尼斯", "土耳其", "土库曼斯坦", "乌干达", "乌克兰", "阿拉伯联合酋长国", "英国", "美国", "乌拉圭", "乌兹别克斯坦", "瓦努阿图", "委内瑞拉", "越南", "也门", "赞比亚", "津巴布韦", "中国台湾", "中国香港", "中国澳门", ]