Bikarhêner:Balyozxane/skrîpt/py/category creator.py

Ji Wîkîpediya, ensîklopediya azad.
#!/usr/bin/env python3
"""
python pwb.py updatewin -f:"category_creator.py" -s:"fix"

Du fonksiyonên vê skrîptê hene:

create_category() bi [[Bikarhêner:Balyozxane/skrîpt/py/categorizeWithCreator.py]] dixebite, jê re sernavekî kategoriya îngilîzî didî û li gorî regexan kategoriya kurdî çêdike. bi categorizewithcreator dixebite.

create_ku_category() bi [[Bikarhêner:Balyozxane/skrîpt/py/category creator run.py]] dixebite, jê re sernavekî kategoriya kurdî didî û eger ev kategorî tinebe rûpela wê li gorî regexan çêdike.

Herdu fonksiyon jî kategoriya çêkirî ya nû bi wîkîdaneyê girêdide.
"""
import re
import json
import pywikibot

SIROVETINE = True
TESTING = False


class CategoryCreator:
    def __init__(self):
        self.country_mapping = None
        self.main_cats = None
        self.tracking_cats = None
        self.site_ku = pywikibot.Site("ku", 'wikipedia')
        self.site_en = pywikibot.Site("en", 'wikipedia')
        self.load_patterns()
        self.bot_name = "Bikarhêner:Balyozxane/skrîpt/py/category creator.py"
        self.ku_months = {
            'January': 'kanûna paşîn',
            'February': 'sibat',
            'March': 'adar',
            'April': 'nîsan',
            'May': 'gulan',
            'June': 'hezîran',
            'July': 'tîrmeh',
            'August': 'tebax',
            'September': 'îlon',
            'October': 'çiriya pêşîn',
            'November': 'çiriya paşîn',
            'December': 'kanûna pêşîn'
        }

    @staticmethod
    def print_sirove(sirove):
        if not SIROVETINE:
            return print(sirove)

    def load_patterns(self):

        if not TESTING:
            json_page_title = f"Bikarhêner:Balyozxane/skrîpt/json/category creator regex.json"
        else:
            json_page_title = f"Bikarhêner:Balyozxane/test.json"
        json_page_cats = pywikibot.Page(self.site_ku, json_page_title)
        json_page_welat_title = "Bikarhêner:Balyozxane/skrîpt/json/category creator welat.json"
        json_page_welat = pywikibot.Page(self.site_ku, json_page_welat_title)

        try:
            # Fetch the content of the user page
            cats_json = json_page_cats.text
            welat_json = json_page_welat.text

            # Load the JSON data
            patterns_data = json.loads(cats_json)

            # Load main category regexes
            self.main_cats = patterns_data["main_cats"]
            self.print_sirove("main_cats loaded")

            # Load tracking category regexes
            self.tracking_cats = patterns_data["tracking_cats"]
            self.print_sirove("tracking_cats loaded")

            # Load country titles
            self.country_mapping = json.loads(welat_json)
            self.print_sirove("country_mapping loaded")
        except Exception as e:
            # Handle exceptions (e.g., page not found, invalid JSON format)
            pywikibot.warning(f"Failed to load category patterns: {e}")
            self.main_cats = {}
            self.tracking_cats = {}
            self.country_mapping = {}

    def ku_exists(self, page_title):
        page = pywikibot.Page(self.site_ku, page_title)
        return page.exists()

    def en_exists(self, page_title):
        page = pywikibot.Page(self.site_en, page_title)
        return page.exists()

    def get_wikidata_id(self, interwiki_title):
        page = pywikibot.Page(self.site_en, interwiki_title)

        # Check if the page is a redirect
        if page.isRedirectPage():
            # If it is a redirect, update interwiki_title to the redirected page title
            interwiki_title = page.getRedirectTarget().title()

        # Get the Wikidata item ID based on the updated Wikipedia title
        try:
            item_page = pywikibot.Page(pywikibot.Link(f'en:{interwiki_title}', source=self.site_en.data_repository()))
            item_id = item_page.data_item().id
            return item_id
        except pywikibot.exceptions.InvalidTitleError:
            pywikibot.error(f"Invalid title: {interwiki_title}")
            return None
        except pywikibot.exceptions.NoPageError:
            pywikibot.error(f"Page not found: {interwiki_title}")
            return None

    def add_sitelink(self, page, item_id):
        repo = self.site_ku.data_repository()

        try:
            item = pywikibot.ItemPage(repo, title=item_id)

            # Check if there is an existing sitelink to kuwiki
            if "kuwiki" in item.sitelinks:
                existing_sitelink = item.sitelinks["kuwiki"]
                self.log_cat(f"{item_id} sernavê {existing_sitelink} bi kar tîne ji bo {page.title()}")

                self.print_sirove(f"Sitelink already exists for {page.title()} in {item_id}. Skipping modification.")

                return None

            item.setSitelink(page, summary=f"Added sitelink {page}")
            self.print_sirove(f"Sitelink added for {page.title()} to {item_id}")

            item_dict = item.get()

            labels = item_dict.get('labels', {})
            descriptions = item_dict.get('descriptions', {})

            existing_label = labels.get('ku', None)
            existing_description = descriptions.get('ku', None)

            if existing_label is None:
                item.editLabels({"ku": page.title()}, summary=f"Added [ku] label: {page.title()}")
                self.print_sirove(f"label '{page.title()}' added to {item_id}")
            else:
                self.print_sirove(f"Label already exists: {existing_label}")
            description = "Wîkîmediya:Kategorî"
            if existing_description is None:
                item.editDescriptions({"ku": description}, summary=f"Added [ku] description: {description}")
                self.print_sirove(f"Description '{description}' added to {item_id}")
            else:
                self.print_sirove(f"Description already exists: {existing_description}")

        except pywikibot.exceptions.OtherPageSaveError as e:
            self.print_sirove(f"Error adding sitelink for {page.title()}: {str(e)}")
        except Exception as e:
            self.print_sirove(f"Error in add_sitelink function: {str(e)}")

    def save_page(self, page_title, text, en_title, summary):
        page = pywikibot.Page(self.site_ku, page_title)
        page.text = text
        if not summary:
            summary = f"[[{self.bot_name}|Bot]]: Wekheva [[en:{en_title}]] hat çêkirin"

        page.save(summary=summary)

        item_id = None
        if self.en_exists(en_title):
            item_id = self.get_wikidata_id(en_title)

        if item_id:
            try:
                self.add_sitelink(page, item_id)

            except pywikibot.exceptions.OtherPageSaveError as e:

                self.print_sirove(
                    f"Error saving sitelink for page {page.title()}: {str(e)}.")
            except Exception as e:
                self.print_sirove(
                    f"Error processing page {page.title()}: {str(e)}.")
        else:
            interwiki = f"\n[[en:{en_title}]]"
            page.text = page.text + interwiki
            interwiki_summary = f"[[{self.bot_name}|Bot]]: Înterwîkî [[en:{en_title}]] lê hat zêdekirin ji ber ku îtema Wîkîdata tine"
            page.save(summary=interwiki_summary)

    def create_category(self, en_title):

        for enwiki_regex, config in self.main_cats.items():

            # Replace {country} with the actual country names
            if "{country}" in enwiki_regex:
                for en_country_name, ku_country_name in self.country_mapping.items():
                    updated_enwiki_regex = enwiki_regex.replace("{country}", en_country_name)
                    enwiki_match = re.match(updated_enwiki_regex, en_title)
                    if enwiki_match:
                        self.print_sirove("enwiki_match matched")
                        kuwiki_pattern = config["ku_title"]

                        # Replace {country} with the actual ku_country_name

                        kuwiki_pattern = kuwiki_pattern.replace("{country}", self.get_cemandi(ku_country_name))

                        year_match = enwiki_match.group(
                            1) if enwiki_match and enwiki_match.lastindex and enwiki_match.group(1) else None
                        if year_match:
                            year = int(year_match) if year_match else None
                            self.print_sirove(f"Year: {year}")

                            if config.get("add_ku_suffix", False):
                                suffix = self.get_ku_suffix(year)
                                kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
                                self.print_sirove("add_ku_suffix is true")
                                self.print_sirove(f"Kurdish Title: {kurdish_title}")
                            else:
                                kurdish_title = kuwiki_pattern.format(year=year)
                                self.print_sirove("add_ku_suffix is false")
                                self.print_sirove(f"Kurdish Title: {kurdish_title}")
                        else:
                            kurdish_title = kuwiki_pattern

                        kuwiki_title = "Kategorî:" + kurdish_title
                        self.print_sirove(f"Kurdish Page Title: {kuwiki_title}")

                        if self.ku_exists(kuwiki_title):
                            pywikibot.output(f"The category page {kuwiki_title} already exists.")
                            return None

                        self.print_sirove(f"Creating category {kuwiki_title}.")

                        if config.get("otokat", False):
                            page_text = "{{Otokat}}"
                        else:
                            page_text = "{{Standard-kat"

                            if config.get("wext", False):
                                page_text += "|wext=1"

                            if config.get("sereke", False):
                                page_text += "|sereke=1"

                            page_text += "}}\n\n{{subst:bêkategorî}}"

                        en_title = f'Category:{en_title}'
                        self.save_page(kuwiki_title, page_text, en_title, None)

                        return kurdish_title

            else:
                enwiki_match = re.match(enwiki_regex, en_title)
                if enwiki_match:
                    self.print_sirove("enwiki_match matched")
                    year_match = enwiki_match.group(1) if enwiki_match.group(1) else None
                    year = int(year_match) if year_match else None
                    kuwiki_pattern = config["ku_title"]
                    self.print_sirove(f"Year: {year}")

                    if config.get("add_ku_suffix", False):
                        suffix = self.get_ku_suffix(year)
                        kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
                        self.print_sirove("add_ku_suffix is true")
                        self.print_sirove(f"Kurdish Title: {kurdish_title}")
                    else:
                        kurdish_title = kuwiki_pattern.format(year=str(year))
                        self.print_sirove("add_ku_suffix is false")
                        self.print_sirove(f"Kurdish Title: {kurdish_title}")

                    ku_title = "Kategorî:" + kurdish_title
                    self.print_sirove(f"Kurdish Page Title: {ku_title}")

                    if self.ku_exists(ku_title):
                        pywikibot.output(f"The category page {ku_title} already exists.")
                        return None

                    self.print_sirove(f"Creating category {ku_title}.")

                    if config.get("otokat", False):
                        page_text = "{{Otokat}}"
                    else:
                        page_text = "{{Standard-kat"

                        if config.get("wext", False):
                            page_text += "|wext=1"

                        if config.get("sereke", False):
                            page_text += "|sereke=1"

                        page_text += "}}\n\n{{subst:bêkategorî}}"

                    en_title = f'Category:{en_title}'

                    self.save_page(ku_title, page_text, en_title, None)

                    return kurdish_title

        self.print_sirove("No match found")
        return None

    @staticmethod
    def get_ku_suffix(year):
        # Check if the last digit of the year is 1 and it's not 11
        if year % 10 == 1 and year % 100 != 11:
            suffix = "ê"
        else:
            suffix = "an"
        return suffix

    @staticmethod
    def get_en_suffix(year):
        # Get the last two digits of the year
        last_two_digits = year % 100

        # Check for special cases
        if last_two_digits in [11, 12, 13]:
            return "th"

        # Get the last digit of the year
        last_digit = last_two_digits % 10

        # Return the corresponding suffix
        if last_digit == 1:
            return "st"
        elif last_digit == 2:
            return "nd"
        elif last_digit == 3:
            return "rd"
        else:
            return "th"

    @staticmethod
    def get_cemandi(welat):
        welat_lower = welat.lower()

        if ' ' in welat:
            # If there is a space in ku_country_name, return it as is
            return welat
        elif welat_lower.endswith(('a', 'e', 'ê', 'i', 'u', 'û', 'o')):
            # If ku_country_name ends with any of these characters in lowercase, append "yê"
            return welat + "yê"
        elif welat_lower.endswith('î'):
            # If ku_country_name ends with "î" in lowercase, remove it and append "iyê"
            return welat[:-1] + "iyê"
        else:
            # Otherwise, append "ê"
            return welat + "ê"

    def log_cat(self, new_text):
        page = pywikibot.Page(self.site_ku, "Bikarhêner:Balyozbot/kontrol/category creator")
        page.text = page.text + new_text
        page.save(summary=f"[[{self.bot_name}|Bot]]: Problemek hat qeydkirin")

    def create_ku_category(self, ku_category):
        self.print_sirove(f"ku_category: {ku_category}")
        if self.ku_exists(f"Kategorî:{ku_category}"):
            print(f"{ku_category} already exists. Derdê te çi ye?")
            return

        for tracking_regex, tracking_enwiki_regex in self.tracking_cats.items():
            # Replace {month} placeholder with each month and try to match the regex
            for en_month, ku_month in self.ku_months.items():
                replaced_tracking_regex = tracking_regex.replace("{month}", ku_month)
                replaced_enwiki_tracking_regex = tracking_enwiki_regex.replace("{month}", en_month)

                if re.match(replaced_tracking_regex, ku_category):
                    self.print_sirove("ku_category matches tracking regex")
                    # Extract the numeric value from ku_category using ku_regex
                    ku_numeric_value = re.search(replaced_tracking_regex, ku_category).group(1)

                    # Replace the numeric value in en_regex with the extracted value
                    en_title = re.sub(r'\{year}', f'{ku_numeric_value}', replaced_enwiki_tracking_regex)
                    en_category = "Category:" + en_title
                    ku_title = "Kategorî:" + ku_category
                    # Print or use en_title and ku_category as needed
                    self.print_sirove(f"en_category: {en_category}")
                    self.print_sirove(f"ku_category: {ku_title}")

                    text = "{{Kategoriya paqijkirinê ya mehane}}"
                    summary = f"[[{self.bot_name}|Bot]]: Kategoriya paqijkirinê ya mehane hat çêkirin"
                    self.save_page(ku_title, text, en_category, summary)
                    return

        for enwiki_regex, config in self.main_cats.items():
            if "ku_regex" in config:
                ku_regex = config["ku_regex"]

                # Replace {country} with the actual country names
                if "{country}" in ku_regex:
                    for en_country_name, ku_country_name in self.country_mapping.items():
                        updated_ku_regex = ku_regex.replace("{country}", self.get_cemandi(ku_country_name))
                        kuwiki_match = re.match(updated_ku_regex, ku_category)
                        if kuwiki_match:
                            self.print_sirove("kuwiki_match matched")
                            enwiki_pattern = config["en_title"]

                            # Replace {country} with the actual en_country_name

                            enwiki_pattern = enwiki_pattern.replace("{country}", en_country_name)

                            year_match = kuwiki_match.group(
                                1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
                            if year_match:
                                year = int(year_match) if year_match else None

                                if config.get("add_en_suffix", False):
                                    suffix = self.get_en_suffix(year)
                                    en_title = enwiki_pattern.format(year=str(year) + suffix)
                                    self.print_sirove("add_ku_suffix is true")
                                    self.print_sirove(f"en_title: {en_title}")
                                else:
                                    en_title = enwiki_pattern.format(year=year)
                                    self.print_sirove("add_en_suffix is false")
                                    self.print_sirove(f"en_title: {en_title}")
                            else:
                                en_title = enwiki_pattern
                                self.print_sirove(f"en_title: {en_title}")

                            en_page_title = "Category:" + en_title
                            self.print_sirove(f"enwiki Page Title: {en_page_title}")

                            if self.en_exists(en_page_title):
                                self.print_sirove(
                                    f"The category page {en_page_title} exists. Creating category Kategorî:{ku_category}.")

                                if config.get("otokat", False):
                                    page_text = "{{Otokat}}"
                                else:
                                    page_text = "{{Standard-kat"

                                    if config.get("wext", False):
                                        page_text += "|wext=1"

                                    if config.get("sereke", False):
                                        page_text += "|sereke=1"

                                    page_text += "}}\n\n{{subst:bêkategorî}}"

                                new_title = "Kategorî:" + ku_category

                                self.save_page(new_title, page_text, en_page_title, None)

                            else:
                                # Kategorî tê xwestin lê eynî kategorî li enwîkiyê nîne çima?
                                new_text = f"\n# [[:Kategorî:{ku_category}]] tê xwestin lê li en.wîkiyê tine"

                                self.log_cat(new_text)
                else:
                    kuwiki_match = re.match(ku_regex, ku_category)
                    if kuwiki_match:
                        self.print_sirove("kuwiki_match matched")
                        enwiki_pattern = config["en_title"]

                        year_match = kuwiki_match.group(
                            1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
                        if year_match:
                            year = int(year_match) if year_match else None

                            if config.get("add_en_suffix", False):
                                suffix = self.get_en_suffix(year)
                                en_title = enwiki_pattern.format(year=str(year) + suffix)
                                self.print_sirove("add_en_suffix is true")
                                self.print_sirove(f"en_title: {en_title}")
                            else:
                                en_title = enwiki_pattern.format(year=year)
                                self.print_sirove("add_en_suffix is false")
                                self.print_sirove(f"en_title: {en_title}")
                        else:
                            en_title = enwiki_pattern
                            self.print_sirove(f"en_title: {en_title}")

                        en_page_title = "Category:" + en_title
                        self.print_sirove(f"enwiki Page Title: {en_page_title}")

                        if self.en_exists(en_page_title):
                            self.print_sirove(
                                f"The category page {en_page_title} exists. Creating category {ku_category}.")

                            if config.get("otokat", False):
                                page_text = "{{Otokat}}"
                            else:
                                page_text = "{{Standard-kat"

                                if config.get("wext", False):
                                    page_text += "|wext=1"

                                if config.get("sereke", False):
                                    page_text += "|sereke=1"

                                page_text += "}}\n\n{{subst:bêkategorî}}"

                            new_title = "Kategorî:" + ku_category
                            self.save_page(new_title, page_text, en_page_title, None)
                        else:
                            new_text = f"\n# [[:Kategorî:{ku_category}]] tê xwestin lê li en.wîkiyê tine"

                            self.log_cat(new_text)
        self.print_sirove("No match found")
        return None