Bikarhêner:Balyozxane/skrîpt/py/mytools.py

#!/usr/bin/env python3
"""
python pwb.py updatewin -file:"mytools.py" -s:"+referring_page_generator"

Tools:

ucfirst

lcfirst

without_comments

get_cur_month_year	Returns current month and year as a string in Kurdish

get_cat_members	Retrieve all members of a category that belong to a given namespace

get_unhidden_categories Fetches the unhidden categories for a page

get_sitelinks   Retrieve sitelinks for the title and dbNames
get_sitelinks_qid	Retrieve sitelinks for the QID and language codes.

get_template_redirects	Return a list of redirects of the given template.

is_template_in_page	Check if a given template or its redirects is included in the page text.

is_category_in_page	Check if a given category is included in the categories of a page.

remove_template	Remove template from wiki text.

zaravayen_din	Kontrol bike eger sayfe di kategoriyên zaravayan de ye.

matrix_to_wikitable

"""
import datetime
import requests
import mwparserfromhell
import pywikibot
from pywikibot.tools import first_lower, first_upper
from typing import List, Union, Iterator
from functools import lru_cache

ku_months = {
    1: 'kanûna paşîn',
    2: 'sibat',
    3: 'adar',
    4: 'nîsan',
    5: 'gulan',
    6: 'hezîran',
    7: 'tîrmeh',
    8: 'tebax',
    9: 'îlon',
    10: 'çiriya pêşîn',
    11: 'çiriya paşîn',
    12: 'kanûna pêşîn'
}


def get_cur_month_year() -> str:
    """
    Returns current month and year as a string in Kurdish
    :return: month year
    """
    current_month_number = datetime.datetime.now().month
    current_year = datetime.datetime.now().year
    month_name = ku_months[current_month_number]
    month_year = f"{month_name} {current_year}"
    return month_year


@lru_cache(maxsize=None)
def get_cat_members(site: pywikibot.site.BaseSite, category_name: str, namespace: int) -> List[str]:
    """
    Retrieve all members of a category that belong to a given namespace.

    :param site: The Pywikibot site object representing the target wiki.
    :param category_name: The name of the category from which to retrieve members.
    :param namespace: The namespace number to filter the category members by.
    :return: A list of titles (without namespace) of the category members that belong to the specified namespace.
    """
    category = pywikibot.Category(site, 'Category:' + category_name)
    members_list = []

    for member in category.members():
        if member.namespace() == namespace:
            members_list.append(member.title(with_ns=False))

    return members_list


@lru_cache(maxsize=None)
def get_template_redirects(site, template_title):
    """
    Return a list of redirects of the given template.

    :param site: pywikibot Site
    :param template_title: without "Şablon:"
    :return: list of redirects including the given template
    """
    template_title = "Şablon:" + template_title
    template_page = pywikibot.Page(site, template_title)
    redirects = template_page.backlinks(filter_redirects=True, namespaces=[10])
    redirect_titles = [redirect.title(with_ns=False) for redirect in redirects]
    redirect_titles.append(template_title.split(":")[-1])

    return redirect_titles


def remove_template(text: str, template_redirects) -> str:
    """
    Remove specified template from wiki text.

    :param text: Wiki text
    :param template_redirects: List of template names or a single template name as a string
    :return: str Wiki text
    """
    if isinstance(template_redirects, str):
        template_redirects = [template_redirects]

    wikicode = mwparserfromhell.parse(text)

    for template in wikicode.filter_templates():
        template_name = template.name.strip()
        template_name = template_name[0].upper() + template_name[1:]
        if template_name in template_redirects:
            wikicode.remove(template)

    return str(wikicode)


def is_template_in_page(text: str, template_redirects) -> bool:
    """
    Check if a given template and its redirects is included in the page text.
    :param text : wikitext
    :param template_redirects : If str is given turns into a list
    :return: True if the category is included in the page categories, False otherwise.
    """
    if isinstance(template_redirects, str):
        template_redirects = [template_redirects]

    wikicode = mwparserfromhell.parse(text)

    for template in wikicode.filter_templates():
        template_name = template.name.strip()
        template_name = template_name[0].upper() + template_name[1:]
        if template_name in template_redirects:
            return True

    return False


def is_category_in_page(page: pywikibot.page.BasePage, category_title: str) -> bool:
    """
    Check if a given category is included in the categories of a page.

    :param page: A Pywikibot page object.
    :param category_title: The title of the category to check.
    :return: True if the category is included in the page categories, False otherwise.
    """
    if not page or not page.exists():
        return False

    category_title = category_title.strip()
    category = pywikibot.Category(page.site, category_title)

    # Iterate through the categories of the page
    for page_category in page.categories():
        # Check if the titles of the categories match
        if page_category.title(with_ns=False) == category.title(with_ns=False):
            return True

    return False


def zaravayen_din(categories: Iterator[pywikibot.Page]) -> bool:
    """
    Kontrol bike eger sayfe di kategoriyên zaravayan de ye.

    :param categories: pywikibot.Categories
    :return: True eger kategoriya zaravayan di rûpelê de be, wekî din False.
    """
    kurdish_categories = [
        "Gotara bi soranî",
        "Gotara bi kirmaşanî",
        "Gotara bi kurdiya başûr",
        "Gotarên bi kurmanciya behdînî",
        "Gotara bi zazakî"
    ]

    page_categories = {c.title(with_ns=False) for c in categories}

    return any(cat in page_categories for cat in kurdish_categories)


def ucfirst(parsed) -> str:
    """
    :param parsed: text parsed by mwparserfromhell
    :return: First char uppercase string stripped. Use first_upper for other strings
    """
    return first_upper(str(parsed).strip())


def lcfirst(parsed) -> str:
    """
    :param parsed: text parsed by mwparserfromhell
    :return: First char lowercase string stripped Use first_lower for other strings
    """
    return first_lower(str(parsed).strip())


def get_sitelink(from_site, to_site, page_title):
    """
    Retrieve the sitelink title for a page from one site to another site using Wikidata.

    Args:
        from_site (str): (e.g., 'enwiki' for English Wikipedia) use self.site.dbName().
        to_site (str):  (e.g., 'kuwiki' for Kurdish Wikipedia) use self.site.dbName().
        page_title (str): The title of the page on the source site.

    Returns:
        str or None: The title of the page on the target site if found, otherwise None.

    Raises:
        requests.exceptions.RequestException: If there's an issue with the network request.
        ValueError: If the response from Wikidata does not contain the expected data.
    """
    url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "sites": from_site,
        "titles": page_title,
        "props": "sitelinks",
        "format": "json"
    }

    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an HTTPError for bad responses
        data = response.json()

        # Check if the response contains the entities data
        if "entities" not in data:
            raise ValueError("The response does not contain 'entities'.")

        entity = next(iter(data["entities"].values()))

        # Check if the sitelinks exist in the entity and the target site is present
        if "sitelinks" in entity and to_site in entity["sitelinks"]:
            found_title = entity["sitelinks"][to_site]["title"]
            return found_title
        else:
            return None

    except requests.exceptions.RequestException as e:
        print(f"An error occurred while making the request: {e}")
        return None
    except ValueError as e:
        print(f"An error occurred with the response data: {e}")
        return None
    except KeyError as e:
        print(f"An expected key is missing in the response data: {e}")
        return None


def get_sitelinks_qid(qid: str, lang_codes: Union[str, List[str]]) -> dict:
    """
    Retrieve sitelinks for the specified Wikidata QID and language codes.

    :param qid: Wikidata QID
    :param lang_codes: String or list of language codes (without 'wiki' suffix). If a single language code is provided as a string, 'wiki' suffix will be appended automatically.
    :return: If ['ku', 'en'] send, returns dictionary with kuwiki, enwiki
    """
    url = f"https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "format": "json",
        "ids": qid,
        "props": "sitelinks"
    }

    # Convert lang_codes to a list if it's a string
    if isinstance(lang_codes, str):
        lang_codes = [lang_codes]

    try:
        # Sending the API request
        response = requests.get(url, params=params)
        data = response.json()
        result = {}
        # Extracting titles of sitelinks for each language code
        if 'sitelinks' in data['entities'][qid]:
            sitelinks = data['entities'][qid]['sitelinks']
            for lang_code in lang_codes:
                lang_code_with_wiki = lang_code + 'wiki'
                site_data = sitelinks.get(lang_code_with_wiki, None)
                result[lang_code_with_wiki] = site_data['title'] if site_data else None
            return result
        else:
            return {lang_code + 'wiki': None for lang_code in lang_codes}
    except Exception as e:
        print(f"An error occurred: {e}")
        return {lang_code + 'wiki': None for lang_code in lang_codes}


# from #https://github.com/ashotjanibekyan/WikiPyScripts/blob/master/helpers.py
def without_comments(wiki_text):
    if wiki_text is None:
        return None
    wikicode = mwparserfromhell.parse(wiki_text)
    for node in wikicode.nodes[:]:
        if isinstance(node, mwparserfromhell.nodes.Comment):
            wikicode.remove(node)
    return str(wikicode).strip()


# from #https://github.com/ashotjanibekyan/WikiPyScripts/blob/master/helpers.py
def matrix_to_wikitable(matrix):
    text = '{| class="wikitable sortable"\n'
    text += '!' + '!!'.join(matrix[0]) + '\n'
    for i in range(1, len(matrix)):
        if isinstance(matrix[i], list) and len(matrix[i]) == len(matrix[0]):
            row = (str(x) if x or x == 0 else ' ' for x in matrix[i])
            text += '|-\n|' + '||'.join(row) + '\n'
    text += '|}'
    return text


def get_unhidden_categories(lang_code, page_title, withNS=False):
    """
    Fetches the unhidden categories for a given Wikipedia page.

    Parameters:
    page_title (str): The title of the Wikipedia page to retrieve categories for.

    Returns:
    list: A list of unhidden category titles associated with the page.
    """
    url = f"https://{lang_code}.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "format": "json",
        "prop": "categories",
        "titles": page_title,
        "clshow": "!hidden",
        "cllimit": "max"  # Increase the limit to get more categories if available
    }

    response = requests.get(url, params=params)
    data = response.json()
    pages = data.get("query", {}).get("pages", {})

    unhidden_categories = []
    for page_id, page_data in pages.items():
        if 'categories' in page_data:
            for category in page_data['categories']:
                if withNS is True:
                    cat_title = category['title']
                else:
                    cat_title = category['title'].replace("Kategorî:", "")
                unhidden_categories.append(cat_title)

    return unhidden_categories


#tweaked from https://gist.github.com/hesyifei/00f6ee0890ac3477b58e4d6b9c712fc2#file-deletepersondata-py-L29
def referring_page_generator(referred_page, follow_redirects=False,
                             with_template_inclusion=True,
                             only_template_inclusion=False,
                             total=None, content=False):
    """
    Return a list of all pages that refer to or embed the page.
    If you need a full list of referring pages, use pages = list(s.getReferences())

    :param referred_page: Template name withNS=True
    :param follow_redirects: if True, also iterate pages that link to a redirect pointing to the page. (default true)
    :param with_template_inclusion: if True, also iterate pages where self is used as a template. (default False)
    :param only_template_inclusion: if True, only iterate pages where self is used as a template.
    :param total: iterate no more than this number of pages in total
    :param content: if True, retrieve the content of the current version of each referring page (default False)
    :return: a list of Pages
    """
    gen = referred_page.getReferences(
        follow_redirects=follow_redirects,
        with_template_inclusion=with_template_inclusion,
        only_template_inclusion=only_template_inclusion,
        total=total, content=content)

    page_list = []
    for page in gen:
        page.getReferences()
        page_list.append(page)
    return page_list