Source code for qiuwenbot.filter.roc_year

# qiuwenbot, a bot to contribute to qiuwen.wiki
# Copyright (C) 2022  Jinzhe Zeng
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
import re

import cn2an

from .filter import Filter, register_filter


[docs] @register_filter class ReplaceROCyear(Filter): """Filter to replace ROC year from a string. Parameters ---------- pattern : str Pattern to replace. repl : str Replacement. """ def __init__(self): self.re_roc_year = re.compile( r"(((\[\[([^\[\]]*\|)?(中(华|華))?民(国|國)\]\])|((中(华|華))?民(国|國)))(\d+|[一二三四五六七八九十]+)年)" )
[docs] def filter(self, text: str) -> str: """Filter text. Parameters ---------- text : str Text to filter. Returns ------- str Filtered text. """ # group 0 is the entire string, group -1 is the year matched = self.re_roc_year.findall(text) for mm in matched: try: roc_year = int(mm[-1]) except ValueError: try: roc_year = cn2an.cn2an(mm[-1]) except ValueError: continue # 38 - 1949; prevent conversion of 民国19xx年 if roc_year > 38 and roc_year < 1000: ce_year = 1911 + roc_year entire_year_str = mm[0] ce_year_str = "%d年" % ce_year text = text.replace(entire_year_str, ce_year_str) # remove duplicate # first remove links text = text.replace("[[%s]]" % ce_year_str, ce_year_str) text = text.replace(f"{ce_year_str}{ce_year_str})", ce_year_str) text = text.replace(f"{ce_year_str}({ce_year_str})", ce_year_str) text = text.replace(f"{ce_year_str}{str(ce_year)})", ce_year_str) text = text.replace(f"{ce_year_str}({str(ce_year)})", ce_year_str) return text
@property def log(self) -> str: return "[[User:Njzjzbot/task4|替换非法纪年]]"