#!/usr/bin/env python3
"""
python pwb.py updatewin -file:"mytools.py" -s:"+referring_page_generator"
Tools:
ucfirst
lcfirst
without_comments
get_cur_month_year Returns current month and year as a string in Kurdish
get_cat_members Retrieve all members of a category that belong to a given namespace
get_unhidden_categories Fetches the unhidden categories for a page
get_sitelinks Retrieve sitelinks for the title and dbNames
get_sitelinks_qid Retrieve sitelinks for the QID and language codes.
get_template_redirects Return a list of redirects of the given template.
is_template_in_page Check if a given template or its redirects is included in the page text.
is_category_in_page Check if a given category is included in the categories of a page.
remove_template Remove template from wiki text.
zaravayen_din Kontrol bike eger sayfe di kategoriyên zaravayan de ye.
matrix_to_wikitable
"""
import datetime
import requests
import mwparserfromhell
import pywikibot
from pywikibot.tools import first_lower, first_upper
from typing import List, Union, Iterator
from functools import lru_cache
ku_months = {
1: 'kanûna paşîn',
2: 'sibat',
3: 'adar',
4: 'nîsan',
5: 'gulan',
6: 'hezîran',
7: 'tîrmeh',
8: 'tebax',
9: 'îlon',
10: 'çiriya pêşîn',
11: 'çiriya paşîn',
12: 'kanûna pêşîn'
}
def get_cur_month_year() -> str:
"""
Returns current month and year as a string in Kurdish
:return: month year
"""
current_month_number = datetime.datetime.now().month
current_year = datetime.datetime.now().year
month_name = ku_months[current_month_number]
month_year = f"{month_name} {current_year}"
return month_year
@lru_cache(maxsize=None)
def get_cat_members(site: pywikibot.site.BaseSite, category_name: str, namespace: int) -> List[str]:
"""
Retrieve all members of a category that belong to a given namespace.
:param site: The Pywikibot site object representing the target wiki.
:param category_name: The name of the category from which to retrieve members.
:param namespace: The namespace number to filter the category members by.
:return: A list of titles (without namespace) of the category members that belong to the specified namespace.
"""
category = pywikibot.Category(site, 'Category:' + category_name)
members_list = []
for member in category.members():
if member.namespace() == namespace:
members_list.append(member.title(with_ns=False))
return members_list
@lru_cache(maxsize=None)
def get_template_redirects(site, template_title):
"""
Return a list of redirects of the given template.
:param site: pywikibot Site
:param template_title: without "Şablon:"
:return: list of redirects including the given template
"""
template_title = "Şablon:" + template_title
template_page = pywikibot.Page(site, template_title)
redirects = template_page.backlinks(filter_redirects=True, namespaces=[10])
redirect_titles = [redirect.title(with_ns=False) for redirect in redirects]
redirect_titles.append(template_title.split(":")[-1])
return redirect_titles
def remove_template(text: str, template_redirects) -> str:
"""
Remove specified template from wiki text.
:param text: Wiki text
:param template_redirects: List of template names or a single template name as a string
:return: str Wiki text
"""
if isinstance(template_redirects, str):
template_redirects = [template_redirects]
wikicode = mwparserfromhell.parse(text)
for template in wikicode.filter_templates():
template_name = template.name.strip()
template_name = template_name[0].upper() + template_name[1:]
if template_name in template_redirects:
wikicode.remove(template)
return str(wikicode)
def is_template_in_page(text: str, template_redirects) -> bool:
"""
Check if a given template and its redirects is included in the page text.
:param text : wikitext
:param template_redirects : If str is given turns into a list
:return: True if the category is included in the page categories, False otherwise.
"""
if isinstance(template_redirects, str):
template_redirects = [template_redirects]
wikicode = mwparserfromhell.parse(text)
for template in wikicode.filter_templates():
template_name = template.name.strip()
template_name = template_name[0].upper() + template_name[1:]
if template_name in template_redirects:
return True
return False
def is_category_in_page(page: pywikibot.page.BasePage, category_title: str) -> bool:
"""
Check if a given category is included in the categories of a page.
:param page: A Pywikibot page object.
:param category_title: The title of the category to check.
:return: True if the category is included in the page categories, False otherwise.
"""
if not page or not page.exists():
return False
category_title = category_title.strip()
category = pywikibot.Category(page.site, category_title)
# Iterate through the categories of the page
for page_category in page.categories():
# Check if the titles of the categories match
if page_category.title(with_ns=False) == category.title(with_ns=False):
return True
return False
def zaravayen_din(categories: Iterator[pywikibot.Page]) -> bool:
"""
Kontrol bike eger sayfe di kategoriyên zaravayan de ye.
:param categories: pywikibot.Categories
:return: True eger kategoriya zaravayan di rûpelê de be, wekî din False.
"""
kurdish_categories = [
"Gotara bi soranî",
"Gotara bi kirmaşanî",
"Gotara bi kurdiya başûr",
"Gotarên bi kurmanciya behdînî",
"Gotara bi zazakî"
]
page_categories = {c.title(with_ns=False) for c in categories}
return any(cat in page_categories for cat in kurdish_categories)
def ucfirst(parsed) -> str:
"""
:param parsed: text parsed by mwparserfromhell
:return: First char uppercase string stripped. Use first_upper for other strings
"""
return first_upper(str(parsed).strip())
def lcfirst(parsed) -> str:
"""
:param parsed: text parsed by mwparserfromhell
:return: First char lowercase string stripped Use first_lower for other strings
"""
return first_lower(str(parsed).strip())
def get_sitelink(from_site, to_site, page_title):
"""
Retrieve the sitelink title for a page from one site to another site using Wikidata.
Args:
from_site (str): (e.g., 'enwiki' for English Wikipedia) use self.site.dbName().
to_site (str): (e.g., 'kuwiki' for Kurdish Wikipedia) use self.site.dbName().
page_title (str): The title of the page on the source site.
Returns:
str or None: The title of the page on the target site if found, otherwise None.
Raises:
requests.exceptions.RequestException: If there's an issue with the network request.
ValueError: If the response from Wikidata does not contain the expected data.
"""
url = "https://www.wikidata.org/w/api.php"
params = {
"action": "wbgetentities",
"sites": from_site,
"titles": page_title,
"props": "sitelinks",
"format": "json"
}
try:
response = requests.get(url, params=params)
response.raise_for_status() # Raise an HTTPError for bad responses
data = response.json()
# Check if the response contains the entities data
if "entities" not in data:
raise ValueError("The response does not contain 'entities'.")
entity = next(iter(data["entities"].values()))
# Check if the sitelinks exist in the entity and the target site is present
if "sitelinks" in entity and to_site in entity["sitelinks"]:
found_title = entity["sitelinks"][to_site]["title"]
return found_title
else:
return None
except requests.exceptions.RequestException as e:
print(f"An error occurred while making the request: {e}")
return None
except ValueError as e:
print(f"An error occurred with the response data: {e}")
return None
except KeyError as e:
print(f"An expected key is missing in the response data: {e}")
return None
def get_sitelinks_qid(qid: str, lang_codes: Union[str, List[str]]) -> dict:
"""
Retrieve sitelinks for the specified Wikidata QID and language codes.
:param qid: Wikidata QID
:param lang_codes: String or list of language codes (without 'wiki' suffix). If a single language code is provided as a string, 'wiki' suffix will be appended automatically.
:return: If ['ku', 'en'] send, returns dictionary with kuwiki, enwiki
"""
url = f"https://www.wikidata.org/w/api.php"
params = {
"action": "wbgetentities",
"format": "json",
"ids": qid,
"props": "sitelinks"
}
# Convert lang_codes to a list if it's a string
if isinstance(lang_codes, str):
lang_codes = [lang_codes]
try:
# Sending the API request
response = requests.get(url, params=params)
data = response.json()
result = {}
# Extracting titles of sitelinks for each language code
if 'sitelinks' in data['entities'][qid]:
sitelinks = data['entities'][qid]['sitelinks']
for lang_code in lang_codes:
lang_code_with_wiki = lang_code + 'wiki'
site_data = sitelinks.get(lang_code_with_wiki, None)
result[lang_code_with_wiki] = site_data['title'] if site_data else None
return result
else:
return {lang_code + 'wiki': None for lang_code in lang_codes}
except Exception as e:
print(f"An error occurred: {e}")
return {lang_code + 'wiki': None for lang_code in lang_codes}
# from #https://github.com/ashotjanibekyan/WikiPyScripts/blob/master/helpers.py
def without_comments(wiki_text):
if wiki_text is None:
return None
wikicode = mwparserfromhell.parse(wiki_text)
for node in wikicode.nodes[:]:
if isinstance(node, mwparserfromhell.nodes.Comment):
wikicode.remove(node)
return str(wikicode).strip()
# from #https://github.com/ashotjanibekyan/WikiPyScripts/blob/master/helpers.py
def matrix_to_wikitable(matrix):
text = '{| class="wikitable sortable"\n'
text += '!' + '!!'.join(matrix[0]) + '\n'
for i in range(1, len(matrix)):
if isinstance(matrix[i], list) and len(matrix[i]) == len(matrix[0]):
row = (str(x) if x or x == 0 else ' ' for x in matrix[i])
text += '|-\n|' + '||'.join(row) + '\n'
text += '|}'
return text
def get_unhidden_categories(lang_code, page_title, withNS=False):
"""
Fetches the unhidden categories for a given Wikipedia page.
Parameters:
page_title (str): The title of the Wikipedia page to retrieve categories for.
Returns:
list: A list of unhidden category titles associated with the page.
"""
url = f"https://{lang_code}.wikipedia.org/w/api.php"
params = {
"action": "query",
"format": "json",
"prop": "categories",
"titles": page_title,
"clshow": "!hidden",
"cllimit": "max" # Increase the limit to get more categories if available
}
response = requests.get(url, params=params)
data = response.json()
pages = data.get("query", {}).get("pages", {})
unhidden_categories = []
for page_id, page_data in pages.items():
if 'categories' in page_data:
for category in page_data['categories']:
if withNS is True:
cat_title = category['title']
else:
cat_title = category['title'].replace("Kategorî:", "")
unhidden_categories.append(cat_title)
return unhidden_categories
#tweaked from https://gist.github.com/hesyifei/00f6ee0890ac3477b58e4d6b9c712fc2#file-deletepersondata-py-L29
def referring_page_generator(referred_page, follow_redirects=False,
with_template_inclusion=True,
only_template_inclusion=False,
total=None, content=False):
"""
Return a list of all pages that refer to or embed the page.
If you need a full list of referring pages, use pages = list(s.getReferences())
:param referred_page: Template name withNS=True
:param follow_redirects: if True, also iterate pages that link to a redirect pointing to the page. (default true)
:param with_template_inclusion: if True, also iterate pages where self is used as a template. (default False)
:param only_template_inclusion: if True, only iterate pages where self is used as a template.
:param total: iterate no more than this number of pages in total
:param content: if True, retrieve the content of the current version of each referring page (default False)
:return: a list of Pages
"""
gen = referred_page.getReferences(
follow_redirects=follow_redirects,
with_template_inclusion=with_template_inclusion,
only_template_inclusion=only_template_inclusion,
total=total, content=content)
page_list = []
for page in gen:
page.getReferences()
page_list.append(page)
return page_list