Bikarhêner:Balyozxane/skrîpt/py/category creator.py

Ji Wîkîpediya, ensîklopediya azad.
"""
Du fonksiya vê skrîptê hene. 
create_category() bi [[Bikarhêner:Balyozxane/skrîpt/py/categorizeWithCreator.py]] dixebite, jê re sernavekî kategoriya îngilîzî didî û li gorî regexan kategoriya kurdî çêdike.
create_ku_category() bi [[Bikarhêner:Balyozxane/skrîpt/py/category creator run.py]] dixebite, jê re sernavekî kategoriya kurdî didî û eger ev kategorî tinebe rûpela wê li gorî regexan çêdike. 

Herdu fonksiyon jî kategoriya çêkirî ya nû bi wîkîdaneyê girêdide.
"""
import re
import json
import pywikibot

sirovetine = True
TESTING = False


def print_sirove(sirove):
    if not sirovetine:
        return print(sirove)


def get_wikidata_id(interwiki_title):
    site = pywikibot.Site("en", "wikipedia")
    page = pywikibot.Page(site, interwiki_title)

    # Check if the page is a redirect
    if page.isRedirectPage():
        # If it is a redirect, update interwiki_title to the redirected page title
        interwiki_title = page.getRedirectTarget().title()

    # Get the Wikidata item ID based on the updated Wikipedia title
    try:
        item_page = pywikibot.Page(pywikibot.Link(f'en:{interwiki_title}', source=site.data_repository()))
        item_id = item_page.data_item().id
        return item_id
    except pywikibot.exceptions.InvalidTitleError:
        pywikibot.error(f"Invalid title: {interwiki_title}")
        return None


def add_sitelink(page, item_id):
    site = pywikibot.Site("ku", "wikipedia")
    repo = site.data_repository()

    try:
        item = pywikibot.ItemPage(repo, title=item_id)
        item.setSitelink(page, summary=f"Added sitelink {page}")
        print_sirove(f"Sitelink added for {page.title()} to {item_id}")

        item_dict = item.get()

        labels = item_dict.get('labels', {})
        descriptions = item_dict.get('descriptions', {})

        existing_label = labels.get('ku', None)
        existing_description = descriptions.get('ku', None)

        if existing_label is None:
            item.editLabels({"ku": page.title()}, summary=f"Added [ku] label: {page.title()}")
            print_sirove(f"label '{page.title()}' added to {item_id}")
        else:
            print_sirove(f"Label already exists: {existing_label}")
        description = "Wîkîmediya:Kategorî"
        if existing_description is None:
            item.editDescriptions({"ku": description}, summary=f"Added [ku] description: {description}")
            print_sirove(f"Description '{description}' added to {item_id}")
        else:
            print_sirove(f"Description already exists: {existing_description}")

    except pywikibot.exceptions.OtherPageSaveError as e:
        print_sirove(f"Error adding sitelink for {page.title()}: {str(e)}")
    except Exception as e:
        print_sirove(f"Error in add_sitelink function: {str(e)}")


class CategoryCreator:
    def __init__(self):
        self.country_mapping = None
        self.main_cats = None
        self.tracking_cats = None
        self.site_ku = pywikibot.Site("ku", 'wikipedia')
        self.site_en = pywikibot.Site("en", 'wikipedia')
        self.load_patterns()
        self.bot_name = "Bikarhêner:Balyozxane/skrîpt/py/category creator.py"
        self.ku_months = {
            'January': 'kanûna paşîn',
            'February': 'sibat',
            'March': 'adar',
            'April': 'nîsan',
            'May': 'gulan',
            'June': 'hezîran',
            'July': 'tîrmeh',
            'August': 'tebax',
            'September': 'îlon',
            'October': 'çiriya pêşîn',
            'November': 'çiriya paşîn',
            'December': 'kanûna pêşîn'
        }

    def load_patterns(self):

        if not TESTING:
            json_page_title = f"Bikarhêner:Balyozxane/skrîpt/json/category creator regex.json"
        else:
            json_page_title = f"Bikarhêner:Balyozxane/test.json"
        json_page_cats = pywikibot.Page(self.site_ku, json_page_title)
        json_page_welat_title = "Bikarhêner:Balyozxane/skrîpt/json/category creator welat.json"
        json_page_welat = pywikibot.Page(self.site_ku, json_page_welat_title)

        try:
            # Fetch the content of the user page
            cats_json = json_page_cats.text
            welat_json = json_page_welat.text

            # Load the JSON data
            patterns_data = json.loads(cats_json)

            # Load main category regexes
            self.main_cats = patterns_data["main_cats"]
            print_sirove("main_cats loaded")

            # Load tracking category regexes
            self.tracking_cats = patterns_data["tracking_cats"]
            print_sirove("tracking_cats loaded")

            # Load country titles
            self.country_mapping = json.loads(welat_json)
            print_sirove("country_mapping loaded")
        except Exception as e:
            # Handle exceptions (e.g., page not found, invalid JSON format)
            pywikibot.warning(f"Failed to load category patterns: {e}")
            self.main_cats = {}
            self.tracking_cats = {}
            self.country_mapping = {}

    def ku_exists(self, page_title):
        page = pywikibot.Page(self.site_ku, page_title)
        return page.exists()

    def en_exists(self, page_title):
        page = pywikibot.Page(self.site_en, page_title)
        return page.exists()

    def save_page(self, page_title, text, en_title, summary):
        page = pywikibot.Page(self.site_ku, page_title)
        page.text = text
        if not summary:
            summary = f"[[{self.bot_name}|Bot]]: Wekheva [[en:{en_title}]] hat çêkirin"

        page.save(summary=summary)

        item_id = get_wikidata_id(en_title)
        if item_id:
            try:
                add_sitelink(page, item_id)

            except pywikibot.exceptions.OtherPageSaveError as e:
                print_sirove(
                    f"Error saving sitelink for page {page.title()}: {str(e)}.")
            except Exception as e:
                print_sirove(
                    f"Error processing page {page.title()}: {str(e)}.")

    def create_category(self, en_title):

        for enwiki_regex, config in self.main_cats.items():

            # Replace {country} with the actual country names
            if "{country}" in enwiki_regex:
                country_mapping = self.country_mapping
                for en_country_name, ku_country_name in country_mapping.items():
                    updated_enwiki_regex = enwiki_regex.replace("{country}", en_country_name)
                    enwiki_match = re.match(updated_enwiki_regex, en_title)
                    if enwiki_match:
                        print_sirove("enwiki_match matched")
                        kuwiki_pattern = config["ku_title"]

                        # Replace {country} with the actual ku_country_name

                        kuwiki_pattern = kuwiki_pattern.replace("{country}", self.get_cemandi(ku_country_name))

                        year_match = enwiki_match.group(
                            1) if enwiki_match and enwiki_match.lastindex and enwiki_match.group(1) else None
                        if year_match:
                            year = int(year_match) if year_match else None
                            print_sirove(f"Year: {year}")

                            if config.get("add_ku_suffix", False):
                                suffix = self.get_ku_suffix(year)
                                kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
                                print_sirove("add_ku_suffix is true")
                                print_sirove(f"Kurdish Title: {kurdish_title}")
                            else:
                                kurdish_title = kuwiki_pattern.format(year=year)
                                print_sirove("add_ku_suffix is false")
                                print_sirove(f"Kurdish Title: {kurdish_title}")
                        else:
                            kurdish_title = kuwiki_pattern

                        kuwiki_title = "Kategorî:" + kurdish_title
                        print_sirove(f"Kurdish Page Title: {kuwiki_title}")

                        if self.ku_exists(kuwiki_title):
                            pywikibot.output(f"The category page {kuwiki_title} already exists.")
                            return None

                        print_sirove(f"Creating category {kuwiki_title}.")

                        if config.get("otokat", False):
                            page_text = "{{Otokat}}"
                        elif config.get("wext", False):
                            page_text = "{{Standard-kat|wext=1}}"
                        else:
                            page_text = "{{Standard-kat}}"
                        en_title = f'Category:{en_title}'
                        self.save_page(kuwiki_title, page_text, en_title, None)

                        return kurdish_title

            else:
                enwiki_match = re.match(enwiki_regex, en_title)
                if enwiki_match:
                    print_sirove("enwiki_match matched")
                    year_match = enwiki_match.group(1) if enwiki_match.group(1) else None
                    year = int(year_match) if year_match else None
                    kuwiki_pattern = config["ku_title"]
                    print_sirove(f"Year: {year}")

                    if config.get("add_ku_suffix", False):
                        suffix = self.get_ku_suffix(year)
                        kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
                        print_sirove("add_ku_suffix is true")
                        print_sirove(f"Kurdish Title: {kurdish_title}")
                    else:
                        kurdish_title = kuwiki_pattern.format(year=str(year))
                        print_sirove("add_ku_suffix is false")
                        print_sirove(f"Kurdish Title: {kurdish_title}")

                    ku_title = "Kategorî:" + kurdish_title
                    print_sirove(f"Kurdish Page Title: {ku_title}")

                    if self.ku_exists(ku_title):
                        pywikibot.output(f"The category page {ku_title} already exists.")
                        return None

                    print_sirove(f"Creating category {ku_title}.")

                    if config.get("otokat", False):
                        page_text = "{{Otokat}}"
                    elif config.get("wext", False):
                        page_text = "{{Standard-kat|wext=1}}"
                    else:
                        page_text = "{{Standard-kat}}"

                    en_title = f'Category:{en_title}'

                    self.save_page(ku_title, page_text, en_title, None)

                    return kurdish_title

        print_sirove("No match found")
        return None

    @staticmethod
    def get_ku_suffix(year):
        # Check if the last digit of the year is 1 and it's not 11
        if year % 10 == 1 and year % 100 != 11:
            suffix = "ê"
        else:
            suffix = "an"
        return suffix

    @staticmethod
    def get_en_suffix(year):
        # Get the last two digits of the year
        last_two_digits = year % 100

        # Check for special cases
        if last_two_digits in [11, 12, 13]:
            return "th"

        # Get the last digit of the year
        last_digit = last_two_digits % 10

        # Return the corresponding suffix
        if last_digit == 1:
            return "st"
        elif last_digit == 2:
            return "nd"
        elif last_digit == 3:
            return "rd"
        else:
            return "th"

    @staticmethod
    def get_cemandi(welat):
        welat_lower = welat.lower()

        if ' ' in welat:
            # If there is a space in ku_country_name, return it as is
            return welat
        elif welat_lower.endswith(('a', 'e', 'ê', 'i', 'u', 'û', 'o')):
            # If ku_country_name ends with any of these characters in lowercase, append "yê"
            return welat + "yê"
        elif welat_lower.endswith('î'):
            # If ku_country_name ends with "î" in lowercase, remove it and append "iyê"
            return welat[:-1] + "iyê"
        else:
            # Otherwise, append "ê"
            return welat + "ê"

    def log_cat(self, page_title):
        page = pywikibot.Page(self.site_ku, "Bikarhêner:Balyozbot/kontrol/category creator")
        new_text = f"\n# [[:Kategorî:{page_title}]] tê xwestin lê li en.wîkiyê tine"
        page.text = page.text + new_text
        page.save(summary=f"[[{self.bot_name}|Bot]]: {page_title} tê xwestin lê li en.wîkiyê tine")

    def create_ku_category(self, ku_category):
        print_sirove(f"ku_category: {ku_category}")
        if self.ku_exists(f"Kategorî:{ku_category}"):
            print(f"{ku_category} already exists. Derdê te çi ye?")
            return

        for tracking_regex, tracking_enwiki_regex in self.tracking_cats.items():
            # Replace {month} placeholder with each month and try to match the regex
            for en_month, ku_month in self.ku_months.items():
                replaced_tracking_regex = tracking_regex.replace("{month}", ku_month)
                replaced_enwiki_tracking_regex = tracking_enwiki_regex.replace("{month}", en_month)

                if re.match(replaced_tracking_regex, ku_category):
                    print_sirove("ku_category matches tracking regex")
                    # Extract the numeric value from ku_category using ku_regex
                    ku_numeric_value = re.search(replaced_tracking_regex, ku_category).group(1)

                    # Replace the numeric value in en_regex with the extracted value
                    en_title = re.sub(r'\{year}', f'{ku_numeric_value}', replaced_enwiki_tracking_regex)
                    en_category = "Category:" + en_title
                    ku_title = "Kategorî:" + ku_category
                    # Print or use en_title and ku_category as needed
                    print_sirove(f"en_category: {en_category}")
                    print_sirove(f"ku_category: {ku_title}")

                    if not self.en_exists(en_category):
                        print_sirove(f"{en_category} li enwîkiyê tine ye loma {ku_title} nehat çêkirin")
                        self.log_cat(ku_category)
                    else:
                        text = "{{Kategoriya paqijkirinê ya mehane}}"
                        summary = f"[[{self.bot_name}|Bot]]: Kategoriya paqijkirinê ya mehane hat çêkirin"
                        self.save_page(ku_title, text, en_category, summary)
                    return

        for enwiki_regex, config in self.main_cats.items():
            if "ku_regex" in config:
                ku_regex = config["ku_regex"]

                # Replace {country} with the actual country names
                if "{country}" in ku_regex:
                    country_mapping = self.country_mapping
                    for en_country_name, ku_country_name in country_mapping.items():
                        updated_ku_regex = ku_regex.replace("{country}", self.get_cemandi(ku_country_name))
                        kuwiki_match = re.match(updated_ku_regex, ku_category)
                        if kuwiki_match:
                            print_sirove("kuwiki_match matched")
                            enwiki_pattern = config["en_title"]

                            # Replace {country} with the actual en_country_name

                            enwiki_pattern = enwiki_pattern.replace("{country}", en_country_name)

                            year_match = kuwiki_match.group(
                                1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
                            if year_match:
                                year = int(year_match) if year_match else None

                                if config.get("add_en_suffix", False):
                                    suffix = self.get_en_suffix(year)
                                    en_title = enwiki_pattern.format(year=str(year) + suffix)
                                    print_sirove("add_ku_suffix is true")
                                    print_sirove(f"en_title: {en_title}")
                                else:
                                    en_title = enwiki_pattern.format(year=year)
                                    print_sirove("add_en_suffix is false")
                                    print_sirove(f"en_title: {en_title}")
                            else:
                                en_title = enwiki_pattern
                                print_sirove(f"en_title: {en_title}")

                            en_page_title = "Category:" + en_title
                            print_sirove(f"enwiki Page Title: {en_page_title}")

                            if self.en_exists(en_page_title):
                                print_sirove(
                                    f"The category page {en_page_title} exists. Creating category Kategorî:{ku_category}.")

                                if config.get("otokat", False):
                                    page_text = "{{Otokat}}"
                                elif config.get("wext", False):
                                    page_text = "{{Standard-kat|wext=1}}"
                                else:
                                    page_text = "{{Standard-kat}}"
                                new_title = "Kategorî:" + ku_category

                                self.save_page(new_title, page_text, en_page_title, None)

                            else:
                                # Kategorî tê xwestin lê eynî kategorî li enwîkiyê nîne çima?
                                self.log_cat(ku_category)
                else:
                    kuwiki_match = re.match(ku_regex, ku_category)
                    if kuwiki_match:
                        print_sirove("kuwiki_match matched")
                        enwiki_pattern = config["en_title"]

                        year_match = kuwiki_match.group(
                            1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
                        if year_match:
                            year = int(year_match) if year_match else None

                            if config.get("add_en_suffix", False):
                                suffix = self.get_en_suffix(year)
                                en_title = enwiki_pattern.format(year=str(year) + suffix)
                                print_sirove("add_en_suffix is true")
                                print_sirove(f"en_title: {en_title}")
                            else:
                                en_title = enwiki_pattern.format(year=year)
                                print_sirove("add_en_suffix is false")
                                print_sirove(f"en_title: {en_title}")
                        else:
                            en_title = enwiki_pattern
                            print_sirove(f"en_title: {en_title}")

                        en_page_title = "Category:" + en_title
                        print_sirove(f"enwiki Page Title: {en_page_title}")

                        if self.en_exists(en_page_title):
                            print_sirove(
                                f"The category page {en_page_title} exists. Creating category {ku_category}.")

                            if config.get("otokat", False):
                                page_text = "{{Otokat}}"
                            elif config.get("wext", False):
                                page_text = "{{Standard-kat|wext=1}}"
                            else:
                                page_text = "{{Standard-kat}}"

                            new_title = "Kategorî:" + ku_category
                            self.save_page(new_title, page_text, en_page_title, None)
                            self.categorize(new_title)
                        else:
                            self.log_cat(ku_category)
        print_sirove("No match found")
        return None