Bikarhêner:Balyozxane/skrîpt/py/category creator.py
Xuyakirin
(Ji Bikarhêner:Balyozxane/category creator.py hat beralîkirin)
#!/usr/bin/env python3
"""
python pwb.py updatewin -f:"category_creator.py" -s:"fix"
Du fonksiyonên vê skrîptê hene:
create_category() bi [[Bikarhêner:Balyozxane/skrîpt/py/categorizeWithCreator.py]] dixebite, jê re sernavekî kategoriya îngilîzî didî û li gorî regexan kategoriya kurdî çêdike. bi categorizewithcreator dixebite.
create_ku_category() bi [[Bikarhêner:Balyozxane/skrîpt/py/category creator run.py]] dixebite, jê re sernavekî kategoriya kurdî didî û eger ev kategorî tinebe rûpela wê li gorî regexan çêdike.
Herdu fonksiyon jî kategoriya çêkirî ya nû bi wîkîdaneyê girêdide.
"""
import re
import json
import pywikibot
SIROVETINE = True
TESTING = False
class CategoryCreator:
def __init__(self):
self.country_mapping = None
self.main_cats = None
self.tracking_cats = None
self.site_ku = pywikibot.Site("ku", 'wikipedia')
self.site_en = pywikibot.Site("en", 'wikipedia')
self.load_patterns()
self.bot_name = "Bikarhêner:Balyozxane/skrîpt/py/category creator.py"
self.ku_months = {
'January': 'kanûna paşîn',
'February': 'sibat',
'March': 'adar',
'April': 'nîsan',
'May': 'gulan',
'June': 'hezîran',
'July': 'tîrmeh',
'August': 'tebax',
'September': 'îlon',
'October': 'çiriya pêşîn',
'November': 'çiriya paşîn',
'December': 'kanûna pêşîn'
}
@staticmethod
def print_sirove(sirove):
if not SIROVETINE:
return print(sirove)
def load_patterns(self):
if not TESTING:
json_page_title = f"Bikarhêner:Balyozxane/skrîpt/json/category creator regex.json"
else:
json_page_title = f"Bikarhêner:Balyozxane/test.json"
json_page_cats = pywikibot.Page(self.site_ku, json_page_title)
json_page_welat_title = "Bikarhêner:Balyozxane/skrîpt/json/category creator welat.json"
json_page_welat = pywikibot.Page(self.site_ku, json_page_welat_title)
try:
# Fetch the content of the user page
cats_json = json_page_cats.text
welat_json = json_page_welat.text
# Load the JSON data
patterns_data = json.loads(cats_json)
# Load main category regexes
self.main_cats = patterns_data["main_cats"]
self.print_sirove("main_cats loaded")
# Load tracking category regexes
self.tracking_cats = patterns_data["tracking_cats"]
self.print_sirove("tracking_cats loaded")
# Load country titles
self.country_mapping = json.loads(welat_json)
self.print_sirove("country_mapping loaded")
except Exception as e:
# Handle exceptions (e.g., page not found, invalid JSON format)
pywikibot.warning(f"Failed to load category patterns: {e}")
self.main_cats = {}
self.tracking_cats = {}
self.country_mapping = {}
def ku_exists(self, page_title):
page = pywikibot.Page(self.site_ku, page_title)
return page.exists()
def en_exists(self, page_title):
page = pywikibot.Page(self.site_en, page_title)
return page.exists()
def get_wikidata_id(self, interwiki_title):
page = pywikibot.Page(self.site_en, interwiki_title)
# Check if the page is a redirect
if page.isRedirectPage():
# If it is a redirect, update interwiki_title to the redirected page title
interwiki_title = page.getRedirectTarget().title()
# Get the Wikidata item ID based on the updated Wikipedia title
try:
item_page = pywikibot.Page(pywikibot.Link(f'en:{interwiki_title}', source=self.site_en.data_repository()))
item_id = item_page.data_item().id
return item_id
except pywikibot.exceptions.InvalidTitleError:
pywikibot.error(f"Invalid title: {interwiki_title}")
return None
except pywikibot.exceptions.NoPageError:
pywikibot.error(f"Page not found: {interwiki_title}")
return None
def add_sitelink(self, page, item_id):
repo = self.site_ku.data_repository()
try:
item = pywikibot.ItemPage(repo, title=item_id)
# Check if there is an existing sitelink to kuwiki
if "kuwiki" in item.sitelinks:
existing_sitelink = item.sitelinks["kuwiki"]
self.log_cat(f"{item_id} sernavê {existing_sitelink} bi kar tîne ji bo {page.title()}")
self.print_sirove(f"Sitelink already exists for {page.title()} in {item_id}. Skipping modification.")
return None
item.setSitelink(page, summary=f"Added sitelink {page}")
self.print_sirove(f"Sitelink added for {page.title()} to {item_id}")
item_dict = item.get()
labels = item_dict.get('labels', {})
descriptions = item_dict.get('descriptions', {})
existing_label = labels.get('ku', None)
existing_description = descriptions.get('ku', None)
if existing_label is None:
item.editLabels({"ku": page.title()}, summary=f"Added [ku] label: {page.title()}")
self.print_sirove(f"label '{page.title()}' added to {item_id}")
else:
self.print_sirove(f"Label already exists: {existing_label}")
description = "Wîkîmediya:Kategorî"
if existing_description is None:
item.editDescriptions({"ku": description}, summary=f"Added [ku] description: {description}")
self.print_sirove(f"Description '{description}' added to {item_id}")
else:
self.print_sirove(f"Description already exists: {existing_description}")
except pywikibot.exceptions.OtherPageSaveError as e:
self.print_sirove(f"Error adding sitelink for {page.title()}: {str(e)}")
except Exception as e:
self.print_sirove(f"Error in add_sitelink function: {str(e)}")
def save_page(self, page_title, text, en_title, summary):
page = pywikibot.Page(self.site_ku, page_title)
page.text = text
if not summary:
summary = f"[[{self.bot_name}|Bot]]: Wekheva [[en:{en_title}]] hat çêkirin"
page.save(summary=summary)
item_id = None
if self.en_exists(en_title):
item_id = self.get_wikidata_id(en_title)
if item_id:
try:
self.add_sitelink(page, item_id)
except pywikibot.exceptions.OtherPageSaveError as e:
self.print_sirove(
f"Error saving sitelink for page {page.title()}: {str(e)}.")
except Exception as e:
self.print_sirove(
f"Error processing page {page.title()}: {str(e)}.")
else:
interwiki = f"\n[[en:{en_title}]]"
page.text = page.text + interwiki
interwiki_summary = f"[[{self.bot_name}|Bot]]: Înterwîkî [[en:{en_title}]] lê hat zêdekirin ji ber ku îtema Wîkîdata tine"
page.save(summary=interwiki_summary)
def create_category(self, en_title):
for enwiki_regex, config in self.main_cats.items():
# Replace {country} with the actual country names
if "{country}" in enwiki_regex:
for en_country_name, ku_country_name in self.country_mapping.items():
updated_enwiki_regex = enwiki_regex.replace("{country}", en_country_name)
enwiki_match = re.match(updated_enwiki_regex, en_title)
if enwiki_match:
self.print_sirove("enwiki_match matched")
kuwiki_pattern = config["ku_title"]
# Replace {country} with the actual ku_country_name
kuwiki_pattern = kuwiki_pattern.replace("{country}", self.get_cemandi(ku_country_name))
year_match = enwiki_match.group(
1) if enwiki_match and enwiki_match.lastindex and enwiki_match.group(1) else None
if year_match:
year = int(year_match) if year_match else None
self.print_sirove(f"Year: {year}")
if config.get("add_ku_suffix", False):
suffix = self.get_ku_suffix(year)
kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
self.print_sirove("add_ku_suffix is true")
self.print_sirove(f"Kurdish Title: {kurdish_title}")
else:
kurdish_title = kuwiki_pattern.format(year=year)
self.print_sirove("add_ku_suffix is false")
self.print_sirove(f"Kurdish Title: {kurdish_title}")
else:
kurdish_title = kuwiki_pattern
kuwiki_title = "Kategorî:" + kurdish_title
self.print_sirove(f"Kurdish Page Title: {kuwiki_title}")
if self.ku_exists(kuwiki_title):
pywikibot.output(f"The category page {kuwiki_title} already exists.")
return None
self.print_sirove(f"Creating category {kuwiki_title}.")
if config.get("otokat", False):
page_text = "{{Otokat}}"
else:
page_text = "{{Standard-kat"
if config.get("wext", False):
page_text += "|wext=1"
if config.get("sereke", False):
page_text += "|sereke=1"
page_text += "}}\n\n{{subst:bêkategorî}}"
en_title = f'Category:{en_title}'
self.save_page(kuwiki_title, page_text, en_title, None)
return kurdish_title
else:
enwiki_match = re.match(enwiki_regex, en_title)
if enwiki_match:
self.print_sirove("enwiki_match matched")
year_match = enwiki_match.group(1) if enwiki_match.group(1) else None
year = int(year_match) if year_match else None
kuwiki_pattern = config["ku_title"]
self.print_sirove(f"Year: {year}")
if config.get("add_ku_suffix", False):
suffix = self.get_ku_suffix(year)
kurdish_title = kuwiki_pattern.format(year=str(year) + suffix)
self.print_sirove("add_ku_suffix is true")
self.print_sirove(f"Kurdish Title: {kurdish_title}")
else:
kurdish_title = kuwiki_pattern.format(year=str(year))
self.print_sirove("add_ku_suffix is false")
self.print_sirove(f"Kurdish Title: {kurdish_title}")
ku_title = "Kategorî:" + kurdish_title
self.print_sirove(f"Kurdish Page Title: {ku_title}")
if self.ku_exists(ku_title):
pywikibot.output(f"The category page {ku_title} already exists.")
return None
self.print_sirove(f"Creating category {ku_title}.")
if config.get("otokat", False):
page_text = "{{Otokat}}"
else:
page_text = "{{Standard-kat"
if config.get("wext", False):
page_text += "|wext=1"
if config.get("sereke", False):
page_text += "|sereke=1"
page_text += "}}\n\n{{subst:bêkategorî}}"
en_title = f'Category:{en_title}'
self.save_page(ku_title, page_text, en_title, None)
return kurdish_title
self.print_sirove("No match found")
return None
@staticmethod
def get_ku_suffix(year):
# Check if the last digit of the year is 1 and it's not 11
if year % 10 == 1 and year % 100 != 11:
suffix = "ê"
else:
suffix = "an"
return suffix
@staticmethod
def get_en_suffix(year):
# Get the last two digits of the year
last_two_digits = year % 100
# Check for special cases
if last_two_digits in [11, 12, 13]:
return "th"
# Get the last digit of the year
last_digit = last_two_digits % 10
# Return the corresponding suffix
if last_digit == 1:
return "st"
elif last_digit == 2:
return "nd"
elif last_digit == 3:
return "rd"
else:
return "th"
@staticmethod
def get_cemandi(welat):
welat_lower = welat.lower()
if ' ' in welat:
# If there is a space in ku_country_name, return it as is
return welat
elif welat_lower.endswith(('a', 'e', 'ê', 'i', 'u', 'û', 'o')):
# If ku_country_name ends with any of these characters in lowercase, append "yê"
return welat + "yê"
elif welat_lower.endswith('î'):
# If ku_country_name ends with "î" in lowercase, remove it and append "iyê"
return welat[:-1] + "iyê"
else:
# Otherwise, append "ê"
return welat + "ê"
def log_cat(self, new_text):
page = pywikibot.Page(self.site_ku, "Bikarhêner:Balyozbot/kontrol/category creator")
page.text = page.text + new_text
page.save(summary=f"[[{self.bot_name}|Bot]]: Problemek hat qeydkirin")
def create_ku_category(self, ku_category):
self.print_sirove(f"ku_category: {ku_category}")
if self.ku_exists(f"Kategorî:{ku_category}"):
print(f"{ku_category} already exists. Derdê te çi ye?")
return
for tracking_regex, tracking_enwiki_regex in self.tracking_cats.items():
# Replace {month} placeholder with each month and try to match the regex
for en_month, ku_month in self.ku_months.items():
replaced_tracking_regex = tracking_regex.replace("{month}", ku_month)
replaced_enwiki_tracking_regex = tracking_enwiki_regex.replace("{month}", en_month)
if re.match(replaced_tracking_regex, ku_category):
self.print_sirove("ku_category matches tracking regex")
# Extract the numeric value from ku_category using ku_regex
ku_numeric_value = re.search(replaced_tracking_regex, ku_category).group(1)
# Replace the numeric value in en_regex with the extracted value
en_title = re.sub(r'\{year}', f'{ku_numeric_value}', replaced_enwiki_tracking_regex)
en_category = "Category:" + en_title
ku_title = "Kategorî:" + ku_category
# Print or use en_title and ku_category as needed
self.print_sirove(f"en_category: {en_category}")
self.print_sirove(f"ku_category: {ku_title}")
text = "{{Kategoriya paqijkirinê ya mehane}}"
summary = f"[[{self.bot_name}|Bot]]: Kategoriya paqijkirinê ya mehane hat çêkirin"
self.save_page(ku_title, text, en_category, summary)
return
for enwiki_regex, config in self.main_cats.items():
if "ku_regex" in config:
ku_regex = config["ku_regex"]
# Replace {country} with the actual country names
if "{country}" in ku_regex:
for en_country_name, ku_country_name in self.country_mapping.items():
updated_ku_regex = ku_regex.replace("{country}", self.get_cemandi(ku_country_name))
kuwiki_match = re.match(updated_ku_regex, ku_category)
if kuwiki_match:
self.print_sirove("kuwiki_match matched")
enwiki_pattern = config["en_title"]
# Replace {country} with the actual en_country_name
enwiki_pattern = enwiki_pattern.replace("{country}", en_country_name)
year_match = kuwiki_match.group(
1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
if year_match:
year = int(year_match) if year_match else None
if config.get("add_en_suffix", False):
suffix = self.get_en_suffix(year)
en_title = enwiki_pattern.format(year=str(year) + suffix)
self.print_sirove("add_ku_suffix is true")
self.print_sirove(f"en_title: {en_title}")
else:
en_title = enwiki_pattern.format(year=year)
self.print_sirove("add_en_suffix is false")
self.print_sirove(f"en_title: {en_title}")
else:
en_title = enwiki_pattern
self.print_sirove(f"en_title: {en_title}")
en_page_title = "Category:" + en_title
self.print_sirove(f"enwiki Page Title: {en_page_title}")
if self.en_exists(en_page_title):
self.print_sirove(
f"The category page {en_page_title} exists. Creating category Kategorî:{ku_category}.")
if config.get("otokat", False):
page_text = "{{Otokat}}"
else:
page_text = "{{Standard-kat"
if config.get("wext", False):
page_text += "|wext=1"
if config.get("sereke", False):
page_text += "|sereke=1"
page_text += "}}\n\n{{subst:bêkategorî}}"
new_title = "Kategorî:" + ku_category
self.save_page(new_title, page_text, en_page_title, None)
else:
# Kategorî tê xwestin lê eynî kategorî li enwîkiyê nîne çima?
new_text = f"\n# [[:Kategorî:{ku_category}]] tê xwestin lê li en.wîkiyê tine"
self.log_cat(new_text)
else:
kuwiki_match = re.match(ku_regex, ku_category)
if kuwiki_match:
self.print_sirove("kuwiki_match matched")
enwiki_pattern = config["en_title"]
year_match = kuwiki_match.group(
1) if kuwiki_match and kuwiki_match.lastindex and kuwiki_match.group(1) else None
if year_match:
year = int(year_match) if year_match else None
if config.get("add_en_suffix", False):
suffix = self.get_en_suffix(year)
en_title = enwiki_pattern.format(year=str(year) + suffix)
self.print_sirove("add_en_suffix is true")
self.print_sirove(f"en_title: {en_title}")
else:
en_title = enwiki_pattern.format(year=year)
self.print_sirove("add_en_suffix is false")
self.print_sirove(f"en_title: {en_title}")
else:
en_title = enwiki_pattern
self.print_sirove(f"en_title: {en_title}")
en_page_title = "Category:" + en_title
self.print_sirove(f"enwiki Page Title: {en_page_title}")
if self.en_exists(en_page_title):
self.print_sirove(
f"The category page {en_page_title} exists. Creating category {ku_category}.")
if config.get("otokat", False):
page_text = "{{Otokat}}"
else:
page_text = "{{Standard-kat"
if config.get("wext", False):
page_text += "|wext=1"
if config.get("sereke", False):
page_text += "|sereke=1"
page_text += "}}\n\n{{subst:bêkategorî}}"
new_title = "Kategorî:" + ku_category
self.save_page(new_title, page_text, en_page_title, None)
else:
new_text = f"\n# [[:Kategorî:{ku_category}]] tê xwestin lê li en.wîkiyê tine"
self.log_cat(new_text)
self.print_sirove("No match found")
return None