Source code for qiuwenbot.task.duplicate

# qiuwenbot, a bot to contribute to qiuwen.wiki
# Copyright (C) 2022  Jinzhe Zeng
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
"""Check duplicated page with different variants
of Chinese titles, such as zh-cn and zh-hk.
"""

import pywikibot
from pywikibot import Page, Site
from zhconv import convert_for_mw

from qiuwenbot.bot import get_page
from qiuwenbot.qwlogger import qwlogger
from qiuwenbot.task.task import Task

# variants = ("zh-cn", "zh-tw", "zh-hk")
variants = ("zh-hans", "zh-hant")


[docs] def check_page(page: Page, site: Site): """Check if a page has duplicated variants. arameters ---------- page: pywikibot.Page page to check site : pywikibot.Site qiuwen site user : str username of the bot """ title = page.title() for variant in variants: title_v = convert_for_mw(title, variant) page_v = get_page(title_v, site) if page_v != page and page_v.exists(): if not page_v.isRedirectPage(): if page.text.startswith( "<noinclude>{{delete|" ) or page_v.text.startswith("<noinclude>{{delete|"): # has been marked to delete continue # duplicated pages A2 page_v.text = ( f"<noinclude>{{{{delete|A2|c1=[[User:Njzjzbot/task2|Njzjzbot]]发现-{{'''{title_v}'''}}-与-{{[[{title}]]}}-仅有简繁差异;请管理员复查页面历史记录,合并差异[[Category:Njzjzbot/A2]]}}}}</noinclude>\n" + page_v.text ) else: # duplicated redirects R1 if ( variant in ("zh-cn", "zh-hans") and page_v.getRedirectTarget() == page ): # do not process zh-hans redirect, otherwise it is wrong continue if ( convert_for_mw(title_v, "zh-cn") != convert_for_mw(title, "zh-cn") and page_v.getRedirectTarget() == page ): # technical issue continue page_v.text = ( f"<noinclude>{{{{delete|R1|c1=[[User:Njzjzbot/task2|Njzjzbot]]发现-{{'''{title_v}'''}}-与-{{[[:{title}]]}}-仅有简繁差异[[Category:Njzjzbot/R1]]}}}}</noinclude>\n" + page_v.text ) page_v.save( f"[[User:Njzjzbot/task2|标记速删模板]]:[[{title_v}]]与[[{title}]]仅有简繁差异", asynchronous=True, )
[docs] class CheckDuplicatedPageTask(Task): """A task to check duplicated pages. Parameters ---------- user : str Username. password : str Password. pages : str Pages to operate. """ def __init__( self, user: str, password: str, pages: dict, ): """Initialize.""" super().__init__( user, password, pages, r"User:%s/check_duplicated_log" % user, "检查重复页面", )
[docs] def do(self, page: Page) -> bool: """Do the task.""" if page.isRedirectPage(): return False try: check_page(page, self.site) except pywikibot.exceptions.Error: qwlogger.exception("Failed to save page %s" % page.title()) return False return True