Bikarhêner:Balyozxane/skrîpt/py/citeKurdifier.py

Ji Wîkîpediya, ensîklopediya azad.
"""
To-do:


"""
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import SingleSiteBot, ConfigParserBot, AutomaticTWSummaryBot
from kucosmetics import CANCEL, CosmeticChangesToolkit
from pywikibot.exceptions import InvalidPageError

import json
import re
import mwparserfromhell


class CiteKurdifierBot(
    SingleSiteBot,
    ConfigParserBot,
    AutomaticTWSummaryBot,
):
    use_redirects = False
    update_options = {
        'async': False,
        'showdiff': False,
        'ignore': CANCEL.MATCH,
    }

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.bot_name = "Bikarhêner:Balyozxane/skrîpt/py/citeKurdifier.py"
        # Fetch JSON content from the specified page
        json_title = "Bikarhêner:Balyozxane/skrîpt/json/citeKurdifier.json"
        json_page = pywikibot.Page(self.site, json_title)
        self.json_content = json_page.text

        # Load JSON content into a dictionary
        try:
            self.parameter_conversions = json.loads(self.json_content)
            self.cite_templates = self.parameter_conversions.get('templates', {})
            self.basic_params = self.parameter_conversions.get('basic_params', {})
            self.numbered_params = self.parameter_conversions.get('numbered_params', {})
            self.months = self.parameter_conversions.get('month_list', {})
            self.date_params = self.parameter_conversions.get('date_params', {})
            self.url_status = self.parameter_conversions.get('rewşa_urlyê', {})
            self.ziman = self.parameter_conversions.get('ziman', {})

        except json.JSONDecodeError as e:
            pywikibot.error(f"Error decoding JSON content: {e}")
            self.parameter_conversions = {}

    def do_kozmetik(self, old_text, kozmetik_cebu):
        cc_toolkit = CosmeticChangesToolkit(self.current_page,
                                            ignore=self.opt.ignore)
        new_text, summaries = cc_toolkit.change(old_text)
        applies_summaries = ', '.join(summaries.values())
        if new_text is not False and new_text != old_text:
            kozmetik_cebu = "; paqijiyên kozmetîk"
            if applies_summaries:
                kozmetik_cebu += f' ({applies_summaries}.)'

        return new_text, kozmetik_cebu

    def treat_page(self) -> None:
        page = self.current_page
        if not page.exists():
            pywikibot.error(f"Skipping {page.title()} since it does not exist")
            return

        if page.namespace() != 0 and page.title() != 'Bikarhêner:Balyozxane/ceribandin':
            pywikibot.output(f"Skipping page '{page.title()}' because it is not in namespace 0")
            return

        text = page.text
        wikicode = mwparserfromhell.parse(text)

        # Iterate through each template in the page
        for template in wikicode.filter_templates():
            # Check if the template name matches any of the citation templates
            if self.should_process_template(template):
                self.replace_parameters(template.params)
                self.replace_date_values(template.params)
                self.replace_param_values(template.params)
                self.remove_params(template)
                self.replace_template_name(template)

        # Save the page
        new_text = str(wikicode)
        if ''.join(text.split()) != ''.join(new_text.split()):

            kozmetik_cebu = ""
            cleaned_new_text, kozmetik_cebu = self.do_kozmetik(new_text,kozmetik_cebu)

            summary = f'[[{self.bot_name}|Bot]]: Kurdîkirina çavkaniyan{kozmetik_cebu}'

            self.put_current(
                cleaned_new_text,
                summary=summary,
                asynchronous=self.opt['async'],
                show_diff=self.opt['showdiff']
            )

    def should_process_template(self, template):
        """Checks if a template should be processed based on self.cite_templates."""
        template_name = template.name
        template_name_lower = template_name.strip().lower()
        for key, value in self.cite_templates.items():
            if (
                    key.lower() == template_name_lower
                    or value.lower() == template_name_lower
            ):
                return True
        return False

    def replace_template_name(self, template):
        template_name = template.name.strip().lower()
        try:
            new_template_name = self.cite_templates[template_name]
            template.name = f'{new_template_name} '
        except KeyError:
            pass

    def remove_params(self, template):

        if template.has("df"):
            template.remove("df")

    def replace_parameters(self, params):
        for param in params:
            param_name = param.name.strip()

            # Basic replacements for parameter names
            if param_name.lower() in self.basic_params:
                new_param_name = self.basic_params[param_name.lower()]
                param.name = new_param_name

            # Numbered arguments replacements using regex
            for pattern, new_param_name in self.numbered_params.items():
                match = re.match(pattern, param_name)
                if match:
                    param.name = re.sub(pattern, new_param_name, param_name)

            # Add empty an space after param value
            param.value = f'{param.value.strip()} '

    def replace_date_values(self, params):
        for param in params:
            param_name = param.name.strip()
            # Date parameter handling
            if param_name.lower() in self.date_params:
                new_value = ""

                # First replacement pattern for existing format: Day Month Year
                match1 = re.search(r"(?P<day>\d+)\s+(?P<month>\w+)\s+(?P<year>\d+)", param.value.strip())
                # Second replacement pattern for new format: Month Day, Year
                match2 = re.search(r"(?P<month>\w+)\s+(?P<day>\d+),\s+(?P<year>\d+)", param.value.strip())

                # If the first replacement pattern matches
                if match1:
                    # Convert month name to Kurdish using month_list
                    kurdish_month = self.months.get(match1.group("month").lower(), match1.group("month"))

                    # Construct new value in Kurdish format
                    new_value = f"{match1.group('day')} {kurdish_month} {match1.group('year')}"

                # If the second replacement pattern matches
                elif match2:
                    # Convert month name to Kurdish using month_list
                    kurdish_month = self.months.get(match2.group("month").lower(), match2.group("month"))

                    # Construct new value in Kurdish format
                    new_value = f"{match2.group('day')} {kurdish_month} {match2.group('year')}"

                # Update the parameter value if a match was found
                if new_value:
                    param.value = new_value.strip() + " "

    def replace_param_values(self, params):
        for param in params:
            param_name = param.name.strip()

            if param_name.lower() == "çap":
                param_val = str(param.value.strip())
                match = re.search(r"^(\d+) ed\.?$", param_val)
                match2 = re.search(r"^(\d+)(th|rd|st|nd)$", param_val)
                if match:
                    new_value = re.sub(r"^(\d+) ed\.?$", r"\1", param_val)
                    param.value = new_value.strip() + " "
                elif match2:
                    new_value = re.sub(r"^(\d+)(th|rd|st|nd)$", r"\1", param_val)
                    param.value = new_value.strip() + " "

            elif param_name.lower() == "rewşa-urlyê":
                param_val = param.value.strip()
                if param_val:
                    try:

                        new_value = self.url_status[param_val]
                    except KeyError:
                        # Handle the missing key case:
                        new_value = param_val  # Keep the original value
                    param.value = new_value.strip() + " "

            elif param_name.lower() == "ziman":
                param_val = param.value.strip()
                if param_val:
                    try:

                        new_value = self.ziman[param_val]
                    except KeyError:
                        # Handle the missing key case:
                        new_value = param_val  # Keep the original value
                    param.value = new_value.strip() + " "

            elif param_name.lower() == "sernav":
                param_val = param.value.strip()
                if param_val.lower() in ["archived copy", "arşivlenmiş kopya"]:
                    param.value = "Kopîkirina arşîvê "


def main(*args: str) -> None:
    local_args = pywikibot.handle_args(args)
    gen_factory = pagegenerators.GeneratorFactory()
    local_args = gen_factory.handle_args(local_args)

    options = {}

    for arg in local_args:
        option, _, value = arg.partition(':')
        if option in ('-always', '-async', '-showdiff'):
            options[option[1:]] = True
        elif option == '-ignore':
            value = value.upper()
            try:
                options['ignore'] = getattr(CANCEL, value)
            except AttributeError:
                raise ValueError(f'Unknown ignore mode {value!r}!')

    gen = gen_factory.getCombinedGenerator(preload=True)

    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        bot = CiteKurdifierBot(generator=gen, **options)
        bot.run()


if __name__ == '__main__':
    main()