Here naverokê

Bikarhêner:Wikihez/skrîpt/py/category creator.py

Ji Wîkîpediya, ensîklopediya azad.
#!/usr/bin/env python3
"""
python pwb.py updatewin -f:"category_creator.py" -s:"+fix"

Du fonksiyonên vê skrîptê hene:

create_category() bi [[Bikarhêner:Wikihez/skrîpt/py/categorizeWithCreator.py]] dixebite, jê re sernavekî kategoriya îngilîzî didî û li gorî regexan kategoriya kurdî çêdike. bi categorizewithcreator dixebite.

create_ku_category() bi [[Bikarhêner:Wikihez/skrîpt/py/category creator run.py]] dixebite, jê re sernavekî kategoriya kurdî didî û eger ev kategorî tinebe rûpela wê li gorî regexan çêdike.

Herdu fonksiyon jî kategoriya çêkirî ya nû bi wîkîdaneyê girêdide.
"""
import re
import json
import mytools
import requests

import pywikibot

SIROVETINE = True
TESTING = False


class CategoryCreator:
    def __init__(self):
        self.country_mapping = None
        self.main_cats = None
        self.tracking_cats = None
        self.site_ku = pywikibot.Site("ku", 'wikipedia')
        self.site_en = pywikibot.Site("en", 'wikipedia')
        self.load_patterns()
        self.bot_name = "Bikarhêner:Wikihez/skrîpt/py/category creator.py"
        self.ku_months = {
            'January': 'kanûna paşîn',
            'February': 'sibat',
            'March': 'adar',
            'April': 'nîsan',
            'May': 'gulan',
            'June': 'hezîran',
            'July': 'tîrmeh',
            'August': 'tebax',
            'September': 'îlon',
            'October': 'çiriya pêşîn',
            'November': 'çiriya paşîn',
            'December': 'kanûna pêşîn'
        }

    @staticmethod
    def print_sirove(sirove):
        if not SIROVETINE:
            return print(sirove)

    def load_patterns(self):

        if not TESTING:
            json_page_title = f"Bikarhêner:Wikihez/skrîpt/json/category creator regex.json"
        else:
            json_page_title = f"Bikarhêner:Wikihez/test.json"
        json_page_cats = pywikibot.Page(self.site_ku, json_page_title)
        json_page_welat_title = "Bikarhêner:Wikihez/skrîpt/json/category creator welat.json"
        json_page_welat = pywikibot.Page(self.site_ku, json_page_welat_title)

        try:
            # Fetch the content of the user page
            cats_json = json_page_cats.text
            welat_json = json_page_welat.text

            # Load the JSON data
            patterns_data = json.loads(cats_json)

            # Load main category regexes
            self.main_cats = patterns_data["main_cats"]
            self.print_sirove("main_cats loaded")

            # Load tracking category regexes
            self.tracking_cats = patterns_data["tracking_cats"]
            self.print_sirove("tracking_cats loaded")

            # Load country titles
            self.country_mapping = json.loads(welat_json)
            self.print_sirove("country_mapping loaded")
        except Exception as e:
            # Handle exceptions (e.g., page not found, invalid JSON format)
            pywikibot.warning(f"Failed to load category patterns: {e}")
            self.main_cats = {}
            self.tracking_cats = {}
            self.country_mapping = {}

    def ku_exists(self, page_title):
        page = pywikibot.Page(self.site_ku, page_title)
        return page.exists()

    def en_exists(self, page_title):
        page = pywikibot.Page(self.site_en, page_title)
        return page.exists()

    def get_wikidata_id(self, interwiki_title):
        page = pywikibot.Page(self.site_en, interwiki_title)

        # Check if the page is a redirect
        if page.isRedirectPage():
            # If it is a redirect, update interwiki_title to the redirected page title
            interwiki_title = page.getRedirectTarget().title()

        # Get the Wikidata item ID based on the updated Wikipedia title
        try:
            item_page = pywikibot.Page(pywikibot.Link(f'en:{interwiki_title}', source=self.site_en.data_repository()))
            item_id = item_page.data_item().id
            return item_id
        except pywikibot.exceptions.InvalidTitleError:
            pywikibot.error(f"Invalid title: {interwiki_title}")
            return None
        except pywikibot.exceptions.NoPageError:
            pywikibot.error(f"Page not found: {interwiki_title}")
            return None

    @staticmethod
    def get_sitelinks(page_titles):
        """
        Retrieve sitelink titles for multiple pages from one site to another site using Wikidata.
        """
        url = "https://www.wikidata.org/w/api.php"
        params = {
            "action": "wbgetentities",
            "sites": "enwiki",
            "titles": "|".join(page_titles),  # Send all titles in one request
            "props": "sitelinks",
            "format": "json"
        }
        to_site = "kuwiki"

        try:
            response = requests.get(url, params=params)
            response.raise_for_status()  # Raise an HTTPError for bad responses
            data = response.json()

            # Check if the response contains entities
            if "entities" not in data:
                raise ValueError("The response does not contain 'entities'.")

            # Extract sitelinks for the requested pages
            ku_sitelinks = {}
            for entity in data["entities"].values():
                if "sitelinks" in entity and to_site in entity["sitelinks"]:
                    ku_sitelinks[entity["sitelinks"][to_site]["title"]] = entity["sitelinks"][to_site]["title"]

            return list(ku_sitelinks.values())  # Return only the sitelink titles

        except requests.exceptions.RequestException as e:
            print(f"An error occurred while making the request: {e}")
            return []
        except ValueError as e:
            print(f"An error occurred with the response data: {e}")
            return []
        except KeyError as e:
            print(f"An expected key is missing in the response data: {e}")
            return []

    def add_sitelink(self, page, item_id):
        repo = self.site_ku.data_repository()

        try:
            item = pywikibot.ItemPage(repo, title=item_id)

            # Check if there is an existing sitelink to kuwiki
            if "kuwiki" in item.sitelinks:
                existing_sitelink = item.sitelinks["kuwiki"]
                self.log_cat(f"{item_id} sernavê {existing_sitelink} bi kar tîne ji bo {page.title()}")

                self.print_sirove(f"Sitelink already exists for {page.title()} in {item_id}. Skipping modification.")

                return None

            item.setSitelink(page, summary=f"Added sitelink {page}")
            self.print_sirove(f"Sitelink added for {page.title()} to {item_id}")

            item_dict = item.get()

            labels = item_dict.get('labels', {})
            descriptions = item_dict.get('descriptions', {})

            existing_label = labels.get('ku', None)
            existing_description = descriptions.get('ku', None)

            if existing_label is None:
                item.editLabels({"ku": page.title()}, summary=f"Added [ku] label: {page.title()}")
                self.print_sirove(f"label '{page.title()}' added to {item_id}")
            else:
                self.print_sirove(f"Label already exists: {existing_label}")
            description = "Wîkîmediya:Kategorî"
            if existing_description is None:
                item.editDescriptions({"ku": description}, summary=f"Added [ku] description: {description}")
                self.print_sirove(f"Description '{description}' added to {item_id}")
            else:
                self.print_sirove(f"Description already exists: {existing_description}")

        except pywikibot.exceptions.OtherPageSaveError as e:
            self.print_sirove(f"Error adding sitelink for {page.title()}: {str(e)}")
        except Exception as e:
            self.print_sirove(f"Error in add_sitelink function: {str(e)}")

    def save_page(self, page_title, text, en_title, summary):
        page = pywikibot.Page(self.site_ku, page_title)
        page.text = text
        if not summary:
            summary = f"[[{self.bot_name}|Bot]]: Wekheva [[en:{en_title}]] hat çêkirin"

        page.save(summary=summary)

        item_id = None
        if self.en_exists(en_title):
            item_id = self.get_wikidata_id(en_title)

        if item_id:
            try:
                self.add_sitelink(page, item_id)

            except pywikibot.exceptions.OtherPageSaveError as e:

                self.print_sirove(
                    f"Error saving sitelink for page {page.title()}: {str(e)}.")
            except Exception as e:
                self.print_sirove(
                    f"Error processing page {page.title()}: {str(e)}.")
        else:
            interwiki = f"\n[[en:{en_title}]]"
            page.text = page.text + interwiki
            interwiki_summary = f"[[{self.bot_name}|Bot]]: Înterwîkî [[en:{en_title}]] lê hat zêdekirin ji ber ku îtema Wîkîdata tine"
            page.save(summary=interwiki_summary)

    def create_category(self, en_title):

        for enwiki_regex, config in self.main_cats.items():

            # Replace {country} with the actual country names
            if "{country}" in enwiki_regex:
                for en_country_name, ku_country_name in self.country_mapping.items():
                    updated_enwiki_regex = enwiki_regex.replace("{country}", en_country_name)
                    enwiki_match = re.match(updated_enwiki_regex, en_title)
                    if enwiki_match:
                        self.print_sirove("enwiki_match matched")
                        kuwiki_pattern = config["ku_title"]

                        # Replace {country} with the actual ku_country_name

                        kuwiki_pattern = kuwiki_pattern.replace("{country}", self.get_cemandi(ku_country_name))

                        year_match = enwiki_match.group(
                            1) if enwiki_match and enwiki_match.lastindex and enwiki_match.group(1) else None
                        if year_match:
                            year = int(year_match) if year_match else None
                            self.print_sirove(f"Year: {year}")

                            if config.get("add_ku_suffix", False):
                                suffix = self.get_ku_suffix(year)
                                kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
                                self.print_sirove("add_ku_suffix is true")
                                self.print_sirove(f"Kurdish Title: {kurdish_title}")
                            else:
                                kurdish_title = kuwiki_pattern.format(year=year)
                                self.print_sirove("add_ku_suffix is false")
                                self.print_sirove(f"Kurdish Title: {kurdish_title}")
                        else:
                            kurdish_title = kuwiki_pattern

                        kuwiki_title = "Kategorî:" + kurdish_title
                        self.print_sirove(f"Kurdish Page Title: {kuwiki_title}")

                        if self.ku_exists(kuwiki_title):
                            pywikibot.output(f"The category page {kuwiki_title} already exists.")
                            return None

                        self.print_sirove(f"Creating category {kuwiki_title}.")

                        if config.get("otokat", False):
                            page_text = "{{Otokat}}"
                        else:
                            page_text = "{{Standard-kat"

                            if config.get("wext", False):
                                page_text += "|wext=1"

                            if config.get("sereke", False):
                                page_text += "|sereke=1"

                            page_text += "}}\n\n"

                            parent_categories = mytools.get_unhidden_categories("en", f'Category:{en_title}', withNS=True)
                            self.print_sirove(f"parent_categories: {parent_categories}")

                            # Retrieve sitelinks in a single request
                            ku_parent_categories = self.get_sitelinks(parent_categories)
                            if ku_parent_categories:
                                self.print_sirove(f"ku_parent_categories: {ku_parent_categories}")
                                page_text += "\n".join(f"[[{cat}]]" for cat in ku_parent_categories) + "\n"
                            else:
                                page_text += "{{subst:bêkategorî}}"

                        en_title = f'Category:{en_title}'
                        self.save_page(kuwiki_title, page_text, en_title, None)

                        return kurdish_title

            else:
                enwiki_match = re.match(enwiki_regex, en_title)
                if enwiki_match:
                    self.print_sirove("enwiki_match matched")
                    year_match = enwiki_match.group(1) if enwiki_match.group(1) else None
                    year = int(year_match) if year_match else None
                    kuwiki_pattern = config["ku_title"]
                    self.print_sirove(f"Year: {year}")

                    if config.get("add_ku_suffix", False):
                        suffix = self.get_ku_suffix(year)
                        kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
                        self.print_sirove("add_ku_suffix is true")
                        self.print_sirove(f"Kurdish Title: {kurdish_title}")
                    else:
                        kurdish_title = kuwiki_pattern.format(year=str(year))
                        self.print_sirove("add_ku_suffix is false")
                        self.print_sirove(f"Kurdish Title: {kurdish_title}")

                    ku_title = "Kategorî:" + kurdish_title
                    self.print_sirove(f"Kurdish Page Title: {ku_title}")

                    if self.ku_exists(ku_title):
                        pywikibot.output(f"The category page {ku_title} already exists.")
                        return None

                    self.print_sirove(f"Creating category {ku_title}.")

                    if config.get("otokat", False):
                        page_text = "{{Otokat}}"
                    else:
                        page_text = "{{Standard-kat"

                        if config.get("wext", False):
                            page_text += "|wext=1"

                        if config.get("sereke", False):
                            page_text += "|sereke=1"

                        page_text += "}}\n\n"

                        parent_categories = mytools.get_unhidden_categories("en", f'Category:{en_title}', withNS=True)
                        self.print_sirove(f"parent_categories: {parent_categories}")

                        # Retrieve sitelinks in a single request
                        ku_parent_categories = self.get_sitelinks(parent_categories)
                        if ku_parent_categories:
                            self.print_sirove(f"ku_parent_categories: {ku_parent_categories}")
                            page_text += "\n".join(f"[[{cat}]]" for cat in ku_parent_categories) + "\n"
                        else:
                            page_text += "{{subst:bêkategorî}}"

                    en_title = f'Category:{en_title}'

                    self.save_page(ku_title, page_text, en_title, None)

                    return kurdish_title

        self.print_sirove("No match found")
        return None

    @staticmethod
    def get_ku_suffix(year):
        # Check if the last digit of the year is 1 and it's not 11
        if year % 10 == 1 and year % 100 != 11:
            suffix = "ê"
        else:
            suffix = "an"
        return suffix

    @staticmethod
    def get_en_suffix(year):
        # Get the last two digits of the year
        last_two_digits = year % 100

        # Check for special cases
        if last_two_digits in [11, 12, 13]:
            return "th"

        # Get the last digit of the year
        last_digit = last_two_digits % 10

        # Return the corresponding suffix
        if last_digit == 1:
            return "st"
        elif last_digit == 2:
            return "nd"
        elif last_digit == 3:
            return "rd"
        else:
            return "th"

    @staticmethod
    def get_cemandi(welat):
        welat_lower = welat.lower()

        if ' ' in welat:
            # If there is a space in ku_country_name, return it as is
            return welat
        elif welat_lower.endswith(('a', 'e', 'ê', 'i', 'u', 'û', 'o')):
            # If ku_country_name ends with any of these characters in lowercase, append "yê"
            return welat + "yê"
        elif welat_lower.endswith('î'):
            # If ku_country_name ends with "î" in lowercase, remove it and append "iyê"
            return welat[:-1] + "iyê"
        else:
            # Otherwise, append "ê"
            return welat + "ê"

    def log_cat(self, new_text):
        page = pywikibot.Page(self.site_ku, "Bikarhêner:Balyozbot/kontrol/category creator")
        page.text = page.text + new_text
        page.save(summary=f"[[{self.bot_name}|Bot]]: Problemek hat qeydkirin")

    def create_ku_category(self, ku_category):
        self.print_sirove(f"ku_category: {ku_category}")
        if self.ku_exists(f"Kategorî:{ku_category}"):
            print(f"{ku_category} already exists. Derdê te çi ye?")
            return

        for tracking_regex, tracking_enwiki_regex in self.tracking_cats.items():
            # Replace {month} placeholder with each month and try to match the regex
            for en_month, ku_month in self.ku_months.items():
                replaced_tracking_regex = tracking_regex.replace("{month}", ku_month)
                replaced_enwiki_tracking_regex = tracking_enwiki_regex.replace("{month}", en_month)

                if re.match(replaced_tracking_regex, ku_category):
                    self.print_sirove("ku_category matches tracking regex")
                    # Extract the numeric value from ku_category using ku_regex
                    ku_numeric_value = re.search(replaced_tracking_regex, ku_category).group(1)

                    # Replace the numeric value in en_regex with the extracted value
                    en_title = re.sub(r'\{year}', f'{ku_numeric_value}', replaced_enwiki_tracking_regex)
                    en_category = "Category:" + en_title
                    ku_title = "Kategorî:" + ku_category
                    # Print or use en_title and ku_category as needed
                    self.print_sirove(f"en_category: {en_category}")
                    self.print_sirove(f"ku_category: {ku_title}")

                    text = "{{Kategoriya paqijkirinê ya mehane}}"
                    summary = f"[[{self.bot_name}|Bot]]: Kategoriya paqijkirinê ya mehane hat çêkirin"
                    self.save_page(ku_title, text, en_category, summary)
                    return

        for enwiki_regex, config in self.main_cats.items():
            if "ku_regex" in config:
                ku_regex = config["ku_regex"]

                # Replace {country} with the actual country names
                if "{country}" in ku_regex:
                    for en_country_name, ku_country_name in self.country_mapping.items():
                        updated_ku_regex = ku_regex.replace("{country}", self.get_cemandi(ku_country_name))
                        kuwiki_match = re.match(updated_ku_regex, ku_category)
                        if kuwiki_match:
                            self.print_sirove("kuwiki_match matched")
                            enwiki_pattern = config["en_title"]

                            # Replace {country} with the actual en_country_name

                            enwiki_pattern = enwiki_pattern.replace("{country}", en_country_name)

                            year_match = kuwiki_match.group(
                                1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
                            if year_match:
                                year = int(year_match) if year_match else None

                                if config.get("add_en_suffix", False):
                                    suffix = self.get_en_suffix(year)
                                    en_title = enwiki_pattern.format(year=str(year) + suffix)
                                    self.print_sirove("add_ku_suffix is true")
                                    self.print_sirove(f"en_title: {en_title}")
                                else:
                                    en_title = enwiki_pattern.format(year=year)
                                    self.print_sirove("add_en_suffix is false")
                                    self.print_sirove(f"en_title: {en_title}")
                            else:
                                en_title = enwiki_pattern
                                self.print_sirove(f"en_title: {en_title}")

                            en_page_title = "Category:" + en_title
                            self.print_sirove(f"enwiki Page Title: {en_page_title}")

                            if self.en_exists(en_page_title):
                                self.print_sirove(
                                    f"The category page {en_page_title} exists. Creating category Kategorî:{ku_category}.")

                                if config.get("otokat", False):
                                    page_text = "{{Otokat}}"
                                else:
                                    page_text = "{{Standard-kat"

                                    if config.get("wext", False):
                                        page_text += "|wext=1"

                                    if config.get("sereke", False):
                                        page_text += "|sereke=1"

                                    page_text += "}}\n\n"

                                    parent_categories = mytools.get_unhidden_categories("en", en_page_title,
                                                                                        withNS=True)
                                    self.print_sirove(f"parent_categories: {parent_categories}")

                                    # Retrieve sitelinks in a single request
                                    ku_parent_categories = self.get_sitelinks(parent_categories)
                                    if ku_parent_categories:
                                        self.print_sirove(f"ku_parent_categories: {ku_parent_categories}")
                                        page_text += "\n".join(f"[[{cat}]]" for cat in ku_parent_categories) + "\n"
                                    else:
                                        page_text += "{{subst:bêkategorî}}"

                                new_title = "Kategorî:" + ku_category

                                self.save_page(new_title, page_text, en_page_title, None)

                            else:
                                # Kategorî tê xwestin lê eynî kategorî li enwîkiyê nîne çima?
                                new_text = f"\n# [[:Kategorî:{ku_category}]] tê xwestin lê li en.wîkiyê tine"

                                self.log_cat(new_text)
                else:
                    kuwiki_match = re.match(ku_regex, ku_category)
                    if kuwiki_match:
                        self.print_sirove("kuwiki_match matched")
                        enwiki_pattern = config["en_title"]

                        year_match = kuwiki_match.group(
                            1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
                        if year_match:
                            year = int(year_match) if year_match else None

                            if config.get("add_en_suffix", False):
                                suffix = self.get_en_suffix(year)
                                en_title = enwiki_pattern.format(year=str(year) + suffix)
                                self.print_sirove("add_en_suffix is true")
                                self.print_sirove(f"en_title: {en_title}")
                            else:
                                en_title = enwiki_pattern.format(year=year)
                                self.print_sirove("add_en_suffix is false")
                                self.print_sirove(f"en_title: {en_title}")
                        else:
                            en_title = enwiki_pattern
                            self.print_sirove(f"en_title: {en_title}")

                        en_page_title = "Category:" + en_title
                        self.print_sirove(f"enwiki Page Title: {en_page_title}")

                        if self.en_exists(en_page_title):
                            self.print_sirove(
                                f"The category page {en_page_title} exists. Creating category {ku_category}.")

                            if config.get("otokat", False):
                                page_text = "{{Otokat}}"
                            else:
                                page_text = "{{Standard-kat"

                                if config.get("wext", False):
                                    page_text += "|wext=1"

                                if config.get("sereke", False):
                                    page_text += "|sereke=1"

                                page_text += "}}\n\n"

                                parent_categories = mytools.get_unhidden_categories("en", en_page_title,
                                                                                    withNS=True)
                                self.print_sirove(f"parent_categories: {parent_categories}")

                                # Retrieve sitelinks in a single request
                                ku_parent_categories = self.get_sitelinks(parent_categories)
                                if ku_parent_categories:
                                    self.print_sirove(f"ku_parent_categories: {ku_parent_categories}")
                                    page_text += "\n".join(f"[[{cat}]]" for cat in ku_parent_categories) + "\n"
                                else:
                                    page_text += "{{subst:bêkategorî}}"

                            new_title = "Kategorî:" + ku_category
                            self.save_page(new_title, page_text, en_page_title, None)
                        else:
                            new_text = f"\n# [[:Kategorî:{ku_category}]] tê xwestin lê li en.wîkiyê tine"

                            self.log_cat(new_text)
        self.print_sirove("No match found")
        return None