Bikarhêner:Wikihez/skrîpt/py/category creator.py
Xuyakirin
#!/usr/bin/env python3
"""
python pwb.py updatewin -f:"category_creator.py" -s:"+fix"
Du fonksiyonên vê skrîptê hene:
create_category() bi [[Bikarhêner:Wikihez/skrîpt/py/categorizeWithCreator.py]] dixebite, jê re sernavekî kategoriya îngilîzî didî û li gorî regexan kategoriya kurdî çêdike. bi categorizewithcreator dixebite.
create_ku_category() bi [[Bikarhêner:Wikihez/skrîpt/py/category creator run.py]] dixebite, jê re sernavekî kategoriya kurdî didî û eger ev kategorî tinebe rûpela wê li gorî regexan çêdike.
Herdu fonksiyon jî kategoriya çêkirî ya nû bi wîkîdaneyê girêdide.
"""
import re
import json
import mytools
import requests
import pywikibot
SIROVETINE = True
TESTING = False
class CategoryCreator:
def __init__(self):
self.country_mapping = None
self.main_cats = None
self.tracking_cats = None
self.site_ku = pywikibot.Site("ku", 'wikipedia')
self.site_en = pywikibot.Site("en", 'wikipedia')
self.load_patterns()
self.bot_name = "Bikarhêner:Wikihez/skrîpt/py/category creator.py"
self.ku_months = {
'January': 'kanûna paşîn',
'February': 'sibat',
'March': 'adar',
'April': 'nîsan',
'May': 'gulan',
'June': 'hezîran',
'July': 'tîrmeh',
'August': 'tebax',
'September': 'îlon',
'October': 'çiriya pêşîn',
'November': 'çiriya paşîn',
'December': 'kanûna pêşîn'
}
@staticmethod
def print_sirove(sirove):
if not SIROVETINE:
return print(sirove)
def load_patterns(self):
if not TESTING:
json_page_title = f"Bikarhêner:Wikihez/skrîpt/json/category creator regex.json"
else:
json_page_title = f"Bikarhêner:Wikihez/test.json"
json_page_cats = pywikibot.Page(self.site_ku, json_page_title)
json_page_welat_title = "Bikarhêner:Wikihez/skrîpt/json/category creator welat.json"
json_page_welat = pywikibot.Page(self.site_ku, json_page_welat_title)
try:
# Fetch the content of the user page
cats_json = json_page_cats.text
welat_json = json_page_welat.text
# Load the JSON data
patterns_data = json.loads(cats_json)
# Load main category regexes
self.main_cats = patterns_data["main_cats"]
self.print_sirove("main_cats loaded")
# Load tracking category regexes
self.tracking_cats = patterns_data["tracking_cats"]
self.print_sirove("tracking_cats loaded")
# Load country titles
self.country_mapping = json.loads(welat_json)
self.print_sirove("country_mapping loaded")
except Exception as e:
# Handle exceptions (e.g., page not found, invalid JSON format)
pywikibot.warning(f"Failed to load category patterns: {e}")
self.main_cats = {}
self.tracking_cats = {}
self.country_mapping = {}
def ku_exists(self, page_title):
page = pywikibot.Page(self.site_ku, page_title)
return page.exists()
def en_exists(self, page_title):
page = pywikibot.Page(self.site_en, page_title)
return page.exists()
def get_wikidata_id(self, interwiki_title):
page = pywikibot.Page(self.site_en, interwiki_title)
# Check if the page is a redirect
if page.isRedirectPage():
# If it is a redirect, update interwiki_title to the redirected page title
interwiki_title = page.getRedirectTarget().title()
# Get the Wikidata item ID based on the updated Wikipedia title
try:
item_page = pywikibot.Page(pywikibot.Link(f'en:{interwiki_title}', source=self.site_en.data_repository()))
item_id = item_page.data_item().id
return item_id
except pywikibot.exceptions.InvalidTitleError:
pywikibot.error(f"Invalid title: {interwiki_title}")
return None
except pywikibot.exceptions.NoPageError:
pywikibot.error(f"Page not found: {interwiki_title}")
return None
@staticmethod
def get_sitelinks(page_titles):
"""
Retrieve sitelink titles for multiple pages from one site to another site using Wikidata.
"""
url = "https://www.wikidata.org/w/api.php"
params = {
"action": "wbgetentities",
"sites": "enwiki",
"titles": "|".join(page_titles), # Send all titles in one request
"props": "sitelinks",
"format": "json"
}
to_site = "kuwiki"
try:
response = requests.get(url, params=params)
response.raise_for_status() # Raise an HTTPError for bad responses
data = response.json()
# Check if the response contains entities
if "entities" not in data:
raise ValueError("The response does not contain 'entities'.")
# Extract sitelinks for the requested pages
ku_sitelinks = {}
for entity in data["entities"].values():
if "sitelinks" in entity and to_site in entity["sitelinks"]:
ku_sitelinks[entity["sitelinks"][to_site]["title"]] = entity["sitelinks"][to_site]["title"]
return list(ku_sitelinks.values()) # Return only the sitelink titles
except requests.exceptions.RequestException as e:
print(f"An error occurred while making the request: {e}")
return []
except ValueError as e:
print(f"An error occurred with the response data: {e}")
return []
except KeyError as e:
print(f"An expected key is missing in the response data: {e}")
return []
def add_sitelink(self, page, item_id):
repo = self.site_ku.data_repository()
try:
item = pywikibot.ItemPage(repo, title=item_id)
# Check if there is an existing sitelink to kuwiki
if "kuwiki" in item.sitelinks:
existing_sitelink = item.sitelinks["kuwiki"]
self.log_cat(f"{item_id} sernavê {existing_sitelink} bi kar tîne ji bo {page.title()}")
self.print_sirove(f"Sitelink already exists for {page.title()} in {item_id}. Skipping modification.")
return None
item.setSitelink(page, summary=f"Added sitelink {page}")
self.print_sirove(f"Sitelink added for {page.title()} to {item_id}")
item_dict = item.get()
labels = item_dict.get('labels', {})
descriptions = item_dict.get('descriptions', {})
existing_label = labels.get('ku', None)
existing_description = descriptions.get('ku', None)
if existing_label is None:
item.editLabels({"ku": page.title()}, summary=f"Added [ku] label: {page.title()}")
self.print_sirove(f"label '{page.title()}' added to {item_id}")
else:
self.print_sirove(f"Label already exists: {existing_label}")
description = "Wîkîmediya:Kategorî"
if existing_description is None:
item.editDescriptions({"ku": description}, summary=f"Added [ku] description: {description}")
self.print_sirove(f"Description '{description}' added to {item_id}")
else:
self.print_sirove(f"Description already exists: {existing_description}")
except pywikibot.exceptions.OtherPageSaveError as e:
self.print_sirove(f"Error adding sitelink for {page.title()}: {str(e)}")
except Exception as e:
self.print_sirove(f"Error in add_sitelink function: {str(e)}")
def save_page(self, page_title, text, en_title, summary):
page = pywikibot.Page(self.site_ku, page_title)
page.text = text
if not summary:
summary = f"[[{self.bot_name}|Bot]]: Wekheva [[en:{en_title}]] hat çêkirin"
page.save(summary=summary)
item_id = None
if self.en_exists(en_title):
item_id = self.get_wikidata_id(en_title)
if item_id:
try:
self.add_sitelink(page, item_id)
except pywikibot.exceptions.OtherPageSaveError as e:
self.print_sirove(
f"Error saving sitelink for page {page.title()}: {str(e)}.")
except Exception as e:
self.print_sirove(
f"Error processing page {page.title()}: {str(e)}.")
else:
interwiki = f"\n[[en:{en_title}]]"
page.text = page.text + interwiki
interwiki_summary = f"[[{self.bot_name}|Bot]]: Înterwîkî [[en:{en_title}]] lê hat zêdekirin ji ber ku îtema Wîkîdata tine"
page.save(summary=interwiki_summary)
def create_category(self, en_title):
for enwiki_regex, config in self.main_cats.items():
# Replace {country} with the actual country names
if "{country}" in enwiki_regex:
for en_country_name, ku_country_name in self.country_mapping.items():
updated_enwiki_regex = enwiki_regex.replace("{country}", en_country_name)
enwiki_match = re.match(updated_enwiki_regex, en_title)
if enwiki_match:
self.print_sirove("enwiki_match matched")
kuwiki_pattern = config["ku_title"]
# Replace {country} with the actual ku_country_name
kuwiki_pattern = kuwiki_pattern.replace("{country}", self.get_cemandi(ku_country_name))
year_match = enwiki_match.group(
1) if enwiki_match and enwiki_match.lastindex and enwiki_match.group(1) else None
if year_match:
year = int(year_match) if year_match else None
self.print_sirove(f"Year: {year}")
if config.get("add_ku_suffix", False):
suffix = self.get_ku_suffix(year)
kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
self.print_sirove("add_ku_suffix is true")
self.print_sirove(f"Kurdish Title: {kurdish_title}")
else:
kurdish_title = kuwiki_pattern.format(year=year)
self.print_sirove("add_ku_suffix is false")
self.print_sirove(f"Kurdish Title: {kurdish_title}")
else:
kurdish_title = kuwiki_pattern
kuwiki_title = "Kategorî:" + kurdish_title
self.print_sirove(f"Kurdish Page Title: {kuwiki_title}")
if self.ku_exists(kuwiki_title):
pywikibot.output(f"The category page {kuwiki_title} already exists.")
return None
self.print_sirove(f"Creating category {kuwiki_title}.")
if config.get("otokat", False):
page_text = "{{Otokat}}"
else:
page_text = "{{Standard-kat"
if config.get("wext", False):
page_text += "|wext=1"
if config.get("sereke", False):
page_text += "|sereke=1"
page_text += "}}\n\n"
parent_categories = mytools.get_unhidden_categories("en", f'Category:{en_title}', withNS=True)
self.print_sirove(f"parent_categories: {parent_categories}")
# Retrieve sitelinks in a single request
ku_parent_categories = self.get_sitelinks(parent_categories)
if ku_parent_categories:
self.print_sirove(f"ku_parent_categories: {ku_parent_categories}")
page_text += "\n".join(f"[[{cat}]]" for cat in ku_parent_categories) + "\n"
else:
page_text += "{{subst:bêkategorî}}"
en_title = f'Category:{en_title}'
self.save_page(kuwiki_title, page_text, en_title, None)
return kurdish_title
else:
enwiki_match = re.match(enwiki_regex, en_title)
if enwiki_match:
self.print_sirove("enwiki_match matched")
year_match = enwiki_match.group(1) if enwiki_match.group(1) else None
year = int(year_match) if year_match else None
kuwiki_pattern = config["ku_title"]
self.print_sirove(f"Year: {year}")
if config.get("add_ku_suffix", False):
suffix = self.get_ku_suffix(year)
kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
self.print_sirove("add_ku_suffix is true")
self.print_sirove(f"Kurdish Title: {kurdish_title}")
else:
kurdish_title = kuwiki_pattern.format(year=str(year))
self.print_sirove("add_ku_suffix is false")
self.print_sirove(f"Kurdish Title: {kurdish_title}")
ku_title = "Kategorî:" + kurdish_title
self.print_sirove(f"Kurdish Page Title: {ku_title}")
if self.ku_exists(ku_title):
pywikibot.output(f"The category page {ku_title} already exists.")
return None
self.print_sirove(f"Creating category {ku_title}.")
if config.get("otokat", False):
page_text = "{{Otokat}}"
else:
page_text = "{{Standard-kat"
if config.get("wext", False):
page_text += "|wext=1"
if config.get("sereke", False):
page_text += "|sereke=1"
page_text += "}}\n\n"
parent_categories = mytools.get_unhidden_categories("en", f'Category:{en_title}', withNS=True)
self.print_sirove(f"parent_categories: {parent_categories}")
# Retrieve sitelinks in a single request
ku_parent_categories = self.get_sitelinks(parent_categories)
if ku_parent_categories:
self.print_sirove(f"ku_parent_categories: {ku_parent_categories}")
page_text += "\n".join(f"[[{cat}]]" for cat in ku_parent_categories) + "\n"
else:
page_text += "{{subst:bêkategorî}}"
en_title = f'Category:{en_title}'
self.save_page(ku_title, page_text, en_title, None)
return kurdish_title
self.print_sirove("No match found")
return None
@staticmethod
def get_ku_suffix(year):
# Check if the last digit of the year is 1 and it's not 11
if year % 10 == 1 and year % 100 != 11:
suffix = "ê"
else:
suffix = "an"
return suffix
@staticmethod
def get_en_suffix(year):
# Get the last two digits of the year
last_two_digits = year % 100
# Check for special cases
if last_two_digits in [11, 12, 13]:
return "th"
# Get the last digit of the year
last_digit = last_two_digits % 10
# Return the corresponding suffix
if last_digit == 1:
return "st"
elif last_digit == 2:
return "nd"
elif last_digit == 3:
return "rd"
else:
return "th"
@staticmethod
def get_cemandi(welat):
welat_lower = welat.lower()
if ' ' in welat:
# If there is a space in ku_country_name, return it as is
return welat
elif welat_lower.endswith(('a', 'e', 'ê', 'i', 'u', 'û', 'o')):
# If ku_country_name ends with any of these characters in lowercase, append "yê"
return welat + "yê"
elif welat_lower.endswith('î'):
# If ku_country_name ends with "î" in lowercase, remove it and append "iyê"
return welat[:-1] + "iyê"
else:
# Otherwise, append "ê"
return welat + "ê"
def log_cat(self, new_text):
page = pywikibot.Page(self.site_ku, "Bikarhêner:Balyozbot/kontrol/category creator")
page.text = page.text + new_text
page.save(summary=f"[[{self.bot_name}|Bot]]: Problemek hat qeydkirin")
def create_ku_category(self, ku_category):
self.print_sirove(f"ku_category: {ku_category}")
if self.ku_exists(f"Kategorî:{ku_category}"):
print(f"{ku_category} already exists. Derdê te çi ye?")
return
for tracking_regex, tracking_enwiki_regex in self.tracking_cats.items():
# Replace {month} placeholder with each month and try to match the regex
for en_month, ku_month in self.ku_months.items():
replaced_tracking_regex = tracking_regex.replace("{month}", ku_month)
replaced_enwiki_tracking_regex = tracking_enwiki_regex.replace("{month}", en_month)
if re.match(replaced_tracking_regex, ku_category):
self.print_sirove("ku_category matches tracking regex")
# Extract the numeric value from ku_category using ku_regex
ku_numeric_value = re.search(replaced_tracking_regex, ku_category).group(1)
# Replace the numeric value in en_regex with the extracted value
en_title = re.sub(r'\{year}', f'{ku_numeric_value}', replaced_enwiki_tracking_regex)
en_category = "Category:" + en_title
ku_title = "Kategorî:" + ku_category
# Print or use en_title and ku_category as needed
self.print_sirove(f"en_category: {en_category}")
self.print_sirove(f"ku_category: {ku_title}")
text = "{{Kategoriya paqijkirinê ya mehane}}"
summary = f"[[{self.bot_name}|Bot]]: Kategoriya paqijkirinê ya mehane hat çêkirin"
self.save_page(ku_title, text, en_category, summary)
return
for enwiki_regex, config in self.main_cats.items():
if "ku_regex" in config:
ku_regex = config["ku_regex"]
# Replace {country} with the actual country names
if "{country}" in ku_regex:
for en_country_name, ku_country_name in self.country_mapping.items():
updated_ku_regex = ku_regex.replace("{country}", self.get_cemandi(ku_country_name))
kuwiki_match = re.match(updated_ku_regex, ku_category)
if kuwiki_match:
self.print_sirove("kuwiki_match matched")
enwiki_pattern = config["en_title"]
# Replace {country} with the actual en_country_name
enwiki_pattern = enwiki_pattern.replace("{country}", en_country_name)
year_match = kuwiki_match.group(
1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
if year_match:
year = int(year_match) if year_match else None
if config.get("add_en_suffix", False):
suffix = self.get_en_suffix(year)
en_title = enwiki_pattern.format(year=str(year) + suffix)
self.print_sirove("add_ku_suffix is true")
self.print_sirove(f"en_title: {en_title}")
else:
en_title = enwiki_pattern.format(year=year)
self.print_sirove("add_en_suffix is false")
self.print_sirove(f"en_title: {en_title}")
else:
en_title = enwiki_pattern
self.print_sirove(f"en_title: {en_title}")
en_page_title = "Category:" + en_title
self.print_sirove(f"enwiki Page Title: {en_page_title}")
if self.en_exists(en_page_title):
self.print_sirove(
f"The category page {en_page_title} exists. Creating category Kategorî:{ku_category}.")
if config.get("otokat", False):
page_text = "{{Otokat}}"
else:
page_text = "{{Standard-kat"
if config.get("wext", False):
page_text += "|wext=1"
if config.get("sereke", False):
page_text += "|sereke=1"
page_text += "}}\n\n"
parent_categories = mytools.get_unhidden_categories("en", en_page_title,
withNS=True)
self.print_sirove(f"parent_categories: {parent_categories}")
# Retrieve sitelinks in a single request
ku_parent_categories = self.get_sitelinks(parent_categories)
if ku_parent_categories:
self.print_sirove(f"ku_parent_categories: {ku_parent_categories}")
page_text += "\n".join(f"[[{cat}]]" for cat in ku_parent_categories) + "\n"
else:
page_text += "{{subst:bêkategorî}}"
new_title = "Kategorî:" + ku_category
self.save_page(new_title, page_text, en_page_title, None)
else:
# Kategorî tê xwestin lê eynî kategorî li enwîkiyê nîne çima?
new_text = f"\n# [[:Kategorî:{ku_category}]] tê xwestin lê li en.wîkiyê tine"
self.log_cat(new_text)
else:
kuwiki_match = re.match(ku_regex, ku_category)
if kuwiki_match:
self.print_sirove("kuwiki_match matched")
enwiki_pattern = config["en_title"]
year_match = kuwiki_match.group(
1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
if year_match:
year = int(year_match) if year_match else None
if config.get("add_en_suffix", False):
suffix = self.get_en_suffix(year)
en_title = enwiki_pattern.format(year=str(year) + suffix)
self.print_sirove("add_en_suffix is true")
self.print_sirove(f"en_title: {en_title}")
else:
en_title = enwiki_pattern.format(year=year)
self.print_sirove("add_en_suffix is false")
self.print_sirove(f"en_title: {en_title}")
else:
en_title = enwiki_pattern
self.print_sirove(f"en_title: {en_title}")
en_page_title = "Category:" + en_title
self.print_sirove(f"enwiki Page Title: {en_page_title}")
if self.en_exists(en_page_title):
self.print_sirove(
f"The category page {en_page_title} exists. Creating category {ku_category}.")
if config.get("otokat", False):
page_text = "{{Otokat}}"
else:
page_text = "{{Standard-kat"
if config.get("wext", False):
page_text += "|wext=1"
if config.get("sereke", False):
page_text += "|sereke=1"
page_text += "}}\n\n"
parent_categories = mytools.get_unhidden_categories("en", en_page_title,
withNS=True)
self.print_sirove(f"parent_categories: {parent_categories}")
# Retrieve sitelinks in a single request
ku_parent_categories = self.get_sitelinks(parent_categories)
if ku_parent_categories:
self.print_sirove(f"ku_parent_categories: {ku_parent_categories}")
page_text += "\n".join(f"[[{cat}]]" for cat in ku_parent_categories) + "\n"
else:
page_text += "{{subst:bêkategorî}}"
new_title = "Kategorî:" + ku_category
self.save_page(new_title, page_text, en_page_title, None)
else:
new_text = f"\n# [[:Kategorî:{ku_category}]] tê xwestin lê li en.wîkiyê tine"
self.log_cat(new_text)
self.print_sirove("No match found")
return None