Here naverokê

Bikarhêner:Balyozxane/skrîpt/py/replacesitil.py

Ji Wîkîpediya, ensîklopediya azad.
#!/usr/bin/env python3
"""
python pwb.py updatewin -f:"replacesitil.py" -s:"+change arg"


Eger ne şitil be, şablonê radike.
Şûnda Şablona Şitil şabloneke şitilan a din li gorî kategoriyên rûpelê zêde dike.

Bikaranîn:
Work on all pages in Kategorî:Şitil no recursive
python pwb.py replacesitil -always -norec

Work on all pages in Kategorî:Şitil recursively search parent categories
python pwb.py replacesitil  -showdiff

Work on all pages in mylist.txt file show differences in the console and don't ask confirmation before saving a page
python pwb.py replacesitil -file:mylist.txt -showdiff -always

Work on all pages in Kategorî:Şitil remove the stub tag if not sitil else skip. Skips the page if not in Hemû şitil cat:
python pwb.py replacesitil -always -remove
or use
python pwb.py replacesitil -cat:"Hemû şitil"  -always -remove


The following parameters are supported:

-change           Adds a given template like {{Hewlêr-erdnîgarî-şitil}}} to the page does not search the categories and removes all existing stub tags so should be used with a page gen like -cat: or file:

-norec            If given, only checks the current categories. Does not
                  recursively search parent categories.

-remove           Sets the bot to remove the stub tag if not stub, else skips.

-always           The bot won't ask for confirmation when putting a page.

-showdiff         The bot will show the differences in the console.

-async            Edits will be performed asynchronously.

Use global -simulate option for test purposes. No changes to live wiki
will be done.

"""
import os
import re
import json
import random
import mytools
import pywikibot
import mwparserfromhell
from mytools import TagHelpers
from functools import lru_cache
from collections import OrderedDict
from kucosmetics import CANCEL, do_kozmetik
from pywikibot import pagegenerators, textlib
from pywikibot.bot import (
    ConfigParserBot,
    ExistingPageBot,
    SingleSiteBot,
)

VERBOSE = False

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'&params;': pagegenerators.parameterHelp}  # noqa: N816


class StubBot(
    SingleSiteBot,  # A bot only working on one site
    ConfigParserBot,  # A bot which reads options from scripts.ini setting file
    ExistingPageBot,  # CurrentPageBot which only treats existing pages
):
    use_redirects = False  # treats non-redirects only

    update_options = {
        'async': False,
        'showdiff': False,
        'remove': False,
        'norec': False,
        'ignore': CANCEL.MATCH,
        'change': '',
    }

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        global VERBOSE
        VERBOSE = self.opt.get('showdiff', False)

        self.remove_only = self.opt.get("remove", False)
        self.norec = self.opt.get("norec", False)
        self.change_only = self.opt.get("change", '')

        self.bot_name = "User:Balyozxane/skrîpt/py/replacesitil.py"
        self.kategoriyen_sitilan, self.serkategoriyen_sitilan, self.sitil_data = self.get_json_cats()
        self.sitil_redirects = mytools.get_template_redirects(self.site, "Şitil")
        self.added_categories = set()
        self.kalik_redirects = mytools.get_template_redirects(self.site, 'Kalika wîkîprojeyê')
        if VERBOSE:
            print("<<< serkategoriyen_sitilan >>>\n,", self.serkategoriyen_sitilan)
        print(f"Initial self.change_only: {self.change_only} (type: {type(self.change_only)})")
    @staticmethod
    def get_json_cats():
        with open('kategoriyen_sitilan.json', 'r', encoding='utf-8') as f:
            sitil_data = json.load(f)

        kategoriyen_sitilan = set()
        serkategoriyen_sitilan = {}

        for key in sitil_data:
            kategori = sitil_data[key].get("kategorî", "")
            kategoriyen_sitilan.add(kategori)

            serkategori = sitil_data[key].get("serkategorî")
            if serkategori:
                for subkey in serkategori:
                    serkategori_kategori = serkategori[subkey].get("kategorî", "")
                    if serkategori_kategori:
                        if serkategori_kategori not in serkategoriyen_sitilan:
                            serkategoriyen_sitilan[serkategori_kategori] = set()
                        serkategoriyen_sitilan[serkategori_kategori].add(kategori)

        return kategoriyen_sitilan, serkategoriyen_sitilan, sitil_data

    @staticmethod
    def count_sitil(added_categories):
        # Load existing data from the JSON file if it exists
        json_file = "sitilcount.json"
        if os.path.exists(json_file):
            with open(json_file, 'r', encoding='utf-8') as f:
                data = json.load(f)
        else:
            data = {}

        # Update the JSON data with the new categories
        for category in added_categories:
            if category in data:
                data[category]['number'] += 1
            else:
                data[category] = {'number': 1}

        sorted_data = OrderedDict(sorted(data.items(), key=lambda item: item[1]['number'], reverse=True))

        with open(json_file, 'w', encoding='utf-8') as f:
            json.dump(sorted_data, f, ensure_ascii=False, indent=4)

    @lru_cache(maxsize=None)
    def get_existing_cats(self, page):
        """Get a list() of unhidden categories the page is in."""
        cats = mytools.get_unhidden_categories(page.site.code, page.title())
        return cats

    def get_from_recursive_parents(self, categories, depth=0):
        if VERBOSE:
            print(f"depth {depth}: fetching rec parents for:\n{categories}")

        parent_cats = set()

        for category in categories:

            cat_page = pywikibot.Category(self.site, "Kategorî:" + category)

            if cat_page.isHiddenCategory():
                print(f"depth {depth}: {category} hidden. Skipped.")
                continue

            cat_parents = self.get_existing_cats(cat_page)

            if VERBOSE:
                print(f"depth {depth}: rec cat_parents {category}:\n{cat_parents}")

            if cat_parents:
                parent_cats.update(cat_parents)

                common_categories = self.kategoriyen_sitilan.intersection(parent_cats)
                if len(common_categories) < 1:
                    print(f"depth {depth}: no match rec again")
                    if depth < 2:
                        parent_cats.update(self.get_from_recursive_parents(cat_parents, depth + 1))
                else:
                    if VERBOSE:
                        print(f"depth {depth}: found match")

        common_categories = self.kategoriyen_sitilan.intersection(parent_cats)

        if VERBOSE:
            print("common_categories:\n", common_categories)

        return common_categories

    def get_from_parents(self, categories):
        if len(categories) == 0:
            return

        if VERBOSE:
            print("fetching parents for:\n")

        parent_cats = list()

        for category in categories:

            cat_page = pywikibot.Category(self.site, "Kategorî:" + category)

            if cat_page.isHiddenCategory():
                print(f"{category} hidden. Skipped.")
                continue

            cat_parents = self.get_existing_cats(cat_page)

            if VERBOSE:
                print(f"{category} parents:\n{cat_parents}")

            if cat_parents:
                parent_cats.extend(cat_parents)

        if VERBOSE:
            print("parent_cats:\n", parent_cats)

        common_categories = self.kategoriyen_sitilan.intersection(parent_cats)

        if len(common_categories) < 1:
            common_categories = self.get_from_recursive_parents(parent_cats)
        elif (len(common_categories) == 1) and ("Kes" in common_categories):
            common_categories = self.get_from_recursive_parents(parent_cats)

        return common_categories

    def remove_categories(self, common_categories):
        def has_subcategory_in_common(category):
            if category not in self.serkategoriyen_sitilan:
                return False
            for subcategory in self.serkategoriyen_sitilan[category]:
                if subcategory in common_categories or has_subcategory_in_common(subcategory):
                    return True
            return False

        to_remove = {category for category in common_categories if has_subcategory_in_common(category)}
        common_categories.difference_update(to_remove)

        if len(common_categories) > 1 and "Sal" in common_categories:
            common_categories.remove("Sal")

        cancellation_dict = {
            "Televîzyon": "Rojname",
            "Sal": "Kes",
            "Teknolojî": "Felsefe",
            "Zanist": "Felsefe",
            "Wêje": "Televîzyon"
        }
        for key, value in cancellation_dict.items():
            if key in common_categories and value in common_categories:
                print(f"{key} removed coz {value} exists")
                common_categories.remove(key)

        return common_categories

    def get_common_cats(self, categories):
        """
        Checks if the current page should be treated based on its categories.

        Returns:
        set: A set of common categories.
        """
        if len(categories) == 0:
            return

        common_categories = self.kategoriyen_sitilan.intersection(categories)
        if VERBOSE:
            print("categories: ", categories)
            print("common_categories: ", common_categories)

        if self.norec:
            return common_categories

        if len(common_categories) < 1:
            common_categories = self.get_from_parents(categories)

            if VERBOSE:
                print("common_categories: ", common_categories)

        is_mirov = mytools.get_pvalue(self.site, self.current_page.title(), pvalue="P31")
        if is_mirov == "Q5":
            if VERBOSE:
                print("mirov e")

            if "Kes" not in common_categories:
                common_categories.add("Kes")

        if len(common_categories) < 1:
            return

        return common_categories

    def find_category_data(self, category):
        def search(data):
            # Check if the current dictionary has the "kategorî" key matching the category
            if "kategorî" in data and data["kategorî"] == category:
                return data

            # Recursively search in the "serkategorî" if it exists
            if "serkategorî" in data:
                for subkey, subdata in data["serkategorî"].items():
                    serkategori_data = search(subdata)
                    if serkategori_data:
                        return serkategori_data

            return None

        # Start searching from the top level of sitil_data
        for key, value in self.sitil_data.items():
            result = search(value)
            if result:
                return result

        return None

    def add_templates(self, text, common_categories):
        """
        Adds the corresponding templates from sitil_data.
        """
        added_templates = []
        for category in common_categories:
            print("category: ", category)
            category_data = self.find_category_data(category)
            print("category_data: ", category_data)
            if category_data:
                template_value = category_data["şablon"]
                print("template_value: ", template_value)
                new_template = f'{{{{{template_value}}}}}'
                text = textlib.add_text(text, new_template, site=self.site)
                added_template = "{{[[Şablon:" + template_value + "|" + template_value + "]]}}"
                added_templates.append(added_template)
                self.added_categories.add(category_data["kategorî"])

        added_templates_summary = ', '.join(added_templates)
        return text, added_templates_summary

    def remove_stub_tag(self, text: str) -> str:

        text = mytools.remove_template(text, self.sitil_redirects)
        # Find and remove other templates
        template_regex = r'{{\s*([^\}]+\-şitil|[Şş]iti?l|[Kk]urt|[Ss]tub|[Şş]itlek|[^\}]+\-şitil\-[^\}]+)\s*}}'
        text = re.sub(template_regex, '', text)

        # Find and remove other templates
        template_sitil_regex = r'{{\s*([Şş]itil-[^\}]+)\s*}}'
        text = re.sub(template_sitil_regex, '', text)

        return text

    def remove_sitil_class(self, page):
        talk_page = page.toggleTalkPage()
        if not talk_page.exists():
            return
        text = talk_page.text
        wikicode = mwparserfromhell.parse(text)
        for template in wikicode.filter_templates():
            template_name = mytools.ucfirst(template.name)
            if template_name in self.kalik_redirects and template.has('sinif'):
                sinif_val = mytools.lcfirst(template.get('sinif').value)
                if sinif_val == 'şitil':
                    template.add('sinif', '')

        new_text = str(wikicode)
        if new_text != text:
            talk_page.text = new_text
            talk_page.save(summary=f'[[{self.bot_name}|Bot]]: Sinifa şitil hat rakirin')

    def match_from_title(self):
        title = self.current_page.title()
        common_categories = set()

        if title.startswith('Balafirgeha'):
            common_categories.add('Balafirgeh')
            return common_categories

        if '(albûm)' in title:
            common_categories.add('Albûm')
            return common_categories

        if '(eşîr)' in title:
            common_categories.add('Eşîr')
            return common_categories

        if '(fîlm)' in title:
            common_categories.add('Fîlm')
            return common_categories

        if '(pirtûk)' in title:
            common_categories.add('Pirtûk')
            return common_categories

        if '(rojname)' in title:
            common_categories.add('Rojname')
            return common_categories

        if '(kovar)' in title:
            common_categories.add('Kovar')
            return common_categories

        if '(mîtolojî)' in title:
            common_categories.add('Mîtolojî')
            return common_categories

    def treat_page(self) -> None:
        page = self.current_page

        if page.namespace() != 0:
            if VERBOSE:
                print("Skipping Namespace not 0.")
            return

        hemu_sitil = mytools.is_category_in_page(page, "Hemû şitil")

        if not hemu_sitil and not self.change_only:
            if VERBOSE:
                print("Skipping hemû şitil tine.")
            return

        categories = mytools.get_unhidden_categories("ku", page.title())

        if not categories:
            if VERBOSE:
                print("Skipped no cats")
            return

        is_list = mytools.is_liste(self.site, categories)

        if is_list:
            if VERBOSE:
                print("Skipped liste")
            return

        is_sitil = TagHelpers.is_sitil(page)

        if VERBOSE:
            print("is_sitil: ", is_sitil)
        if is_sitil == 'lîste':
            if VERBOSE:
                print("Skipped lîste")
            return
        zarava_heye = mytools.zaravayen_din(page.categories())

        text = page.text
        if is_sitil is False:
            new_text = self.remove_stub_tag(text)

            append_sum = "hat rakirin"
        elif self.remove_only:
            if VERBOSE:
                print('Skipped remove only')
            return
        elif self.change_only:
            new_text = self.remove_stub_tag(text)
            new_text = textlib.add_text(new_text, add=self.change_only)
            append_sum = "bi " + self.change_only + " hat guhartin"

        else:
            if not mytools.is_category_in_page(page, "Şitil") and not self.norec:
                if VERBOSE:
                    print("Skipping şitil tine.")
                return

            if page.isDisambig():
                if VERBOSE:
                    print("Skipping disambig.")
                return
            if zarava_heye:
                if VERBOSE:
                    print("Skipping zarava_heye.")
                return

            # compare page cats with şitil cats and return found ones
            common_categories = self.get_common_cats(categories)
            if not common_categories:
                common_categories = self.match_from_title()
                if not common_categories:
                    self.count_sitil(categories)
                    return

            common_categories = self.remove_categories(common_categories)

            new_text = self.remove_stub_tag(text)

            new_text, added_templates = self.add_templates(new_text, common_categories)
            append_sum = f"bi {added_templates} hat guhartin"

        if text != new_text:
            new_text, kozmetik_cebu = do_kozmetik(self.current_page,
                                                  new_text,
                                                  show_diff=self.opt.showdiff,
                                                  ignore=self.opt.ignore)
            summary = f'[[{self.bot_name}|Bot]]: Şablona {{{{[[Şablon:Şitil|Şitil]]}}}}'
            summary += f' {append_sum}{kozmetik_cebu}'

            if VERBOSE:
                print(f"\n<<< {page.title()} >>>\n")
            self.put_current(
                new_text,
                summary=summary,
                asynchronous=self.opt['async'],
                show_diff=self.opt['showdiff']
            )
            if append_sum == 'hat rakirin':
                self.remove_sitil_class(page)


def main(*args: str) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    :param args: command line arguments
    """
    if os.path.exists("sitilcount.json"):
        with open("sitilcount.json", 'w', encoding='utf-8') as file:
            file.write("{}")

    options = {}
    # Process global arguments to determine desired site
    local_args = pywikibot.handle_args(args)

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    gen_factory = pagegenerators.GeneratorFactory()

    # Process pagegenerators arguments
    local_args = gen_factory.handle_args(local_args)

    # Parse your own command line arguments
    for arg in local_args:
        arg, _, value = arg.partition(':')
        option = arg[1:]
        if option in ('-always', '-async', '-showdiff', '-remove', '-norec'):
            options[option[1:]] = True
        elif option == 'change':
            options['change'] = value
        elif option == '-ignore':
            value = value.upper()
            try:
                options['ignore'] = getattr(CANCEL, value)
            except AttributeError:
                raise ValueError(f'Unknown ignore mode {value!r}!')
        # take the remaining options as booleans.
        # You will get a hint if they aren't pre-defined in your bot class
        else:
            options[option] = True
    # The preloading option is responsible for downloading multiple
    # pages from the wiki simultaneously.
    gen = gen_factory.getCombinedGenerator(preload=True)

    if not gen:
        gen = mytools.get_cat_members(pywikibot.Site(), "Şitil", 0, itr_page=True)
        random.shuffle(gen)

    # check if further help is needed
    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        # pass generator and private options to the bot
        bot = StubBot(generator=gen, **options)
        bot.run()  # guess what it does


if __name__ == '__main__':
    main()