Bikarhêner:Balyozxane/skrîpt/py/replacesitil.py
Xuyakirin
#!/usr/bin/env python3
"""
python pwb.py updatewin -f:"replacesitil.py" -s:"+change arg"
Eger ne şitil be, şablonê radike.
Şûnda Şablona Şitil şabloneke şitilan a din li gorî kategoriyên rûpelê zêde dike.
Bikaranîn:
Work on all pages in Kategorî:Şitil no recursive
python pwb.py replacesitil -always -norec
Work on all pages in Kategorî:Şitil recursively search parent categories
python pwb.py replacesitil -showdiff
Work on all pages in mylist.txt file show differences in the console and don't ask confirmation before saving a page
python pwb.py replacesitil -file:mylist.txt -showdiff -always
Work on all pages in Kategorî:Şitil remove the stub tag if not sitil else skip. Skips the page if not in Hemû şitil cat:
python pwb.py replacesitil -always -remove
or use
python pwb.py replacesitil -cat:"Hemû şitil" -always -remove
The following parameters are supported:
-change Adds a given template like {{Hewlêr-erdnîgarî-şitil}}} to the page does not search the categories and removes all existing stub tags so should be used with a page gen like -cat: or file:
-norec If given, only checks the current categories. Does not
recursively search parent categories.
-remove Sets the bot to remove the stub tag if not stub, else skips.
-always The bot won't ask for confirmation when putting a page.
-showdiff The bot will show the differences in the console.
-async Edits will be performed asynchronously.
Use global -simulate option for test purposes. No changes to live wiki
will be done.
"""
import os
import re
import json
import random
import mytools
import pywikibot
import mwparserfromhell
from mytools import TagHelpers
from functools import lru_cache
from collections import OrderedDict
from kucosmetics import CANCEL, do_kozmetik
from pywikibot import pagegenerators, textlib
from pywikibot.bot import (
ConfigParserBot,
ExistingPageBot,
SingleSiteBot,
)
VERBOSE = False
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816
class StubBot(
SingleSiteBot, # A bot only working on one site
ConfigParserBot, # A bot which reads options from scripts.ini setting file
ExistingPageBot, # CurrentPageBot which only treats existing pages
):
use_redirects = False # treats non-redirects only
update_options = {
'async': False,
'showdiff': False,
'remove': False,
'norec': False,
'ignore': CANCEL.MATCH,
'change': '',
}
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
global VERBOSE
VERBOSE = self.opt.get('showdiff', False)
self.remove_only = self.opt.get("remove", False)
self.norec = self.opt.get("norec", False)
self.change_only = self.opt.get("change", '')
self.bot_name = "User:Balyozxane/skrîpt/py/replacesitil.py"
self.kategoriyen_sitilan, self.serkategoriyen_sitilan, self.sitil_data = self.get_json_cats()
self.sitil_redirects = mytools.get_template_redirects(self.site, "Şitil")
self.added_categories = set()
self.kalik_redirects = mytools.get_template_redirects(self.site, 'Kalika wîkîprojeyê')
if VERBOSE:
print("<<< serkategoriyen_sitilan >>>\n,", self.serkategoriyen_sitilan)
print(f"Initial self.change_only: {self.change_only} (type: {type(self.change_only)})")
@staticmethod
def get_json_cats():
with open('kategoriyen_sitilan.json', 'r', encoding='utf-8') as f:
sitil_data = json.load(f)
kategoriyen_sitilan = set()
serkategoriyen_sitilan = {}
for key in sitil_data:
kategori = sitil_data[key].get("kategorî", "")
kategoriyen_sitilan.add(kategori)
serkategori = sitil_data[key].get("serkategorî")
if serkategori:
for subkey in serkategori:
serkategori_kategori = serkategori[subkey].get("kategorî", "")
if serkategori_kategori:
if serkategori_kategori not in serkategoriyen_sitilan:
serkategoriyen_sitilan[serkategori_kategori] = set()
serkategoriyen_sitilan[serkategori_kategori].add(kategori)
return kategoriyen_sitilan, serkategoriyen_sitilan, sitil_data
@staticmethod
def count_sitil(added_categories):
# Load existing data from the JSON file if it exists
json_file = "sitilcount.json"
if os.path.exists(json_file):
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
else:
data = {}
# Update the JSON data with the new categories
for category in added_categories:
if category in data:
data[category]['number'] += 1
else:
data[category] = {'number': 1}
sorted_data = OrderedDict(sorted(data.items(), key=lambda item: item[1]['number'], reverse=True))
with open(json_file, 'w', encoding='utf-8') as f:
json.dump(sorted_data, f, ensure_ascii=False, indent=4)
@lru_cache(maxsize=None)
def get_existing_cats(self, page):
"""Get a list() of unhidden categories the page is in."""
cats = mytools.get_unhidden_categories(page.site.code, page.title())
return cats
def get_from_recursive_parents(self, categories, depth=0):
if VERBOSE:
print(f"depth {depth}: fetching rec parents for:\n{categories}")
parent_cats = set()
for category in categories:
cat_page = pywikibot.Category(self.site, "Kategorî:" + category)
if cat_page.isHiddenCategory():
print(f"depth {depth}: {category} hidden. Skipped.")
continue
cat_parents = self.get_existing_cats(cat_page)
if VERBOSE:
print(f"depth {depth}: rec cat_parents {category}:\n{cat_parents}")
if cat_parents:
parent_cats.update(cat_parents)
common_categories = self.kategoriyen_sitilan.intersection(parent_cats)
if len(common_categories) < 1:
print(f"depth {depth}: no match rec again")
if depth < 2:
parent_cats.update(self.get_from_recursive_parents(cat_parents, depth + 1))
else:
if VERBOSE:
print(f"depth {depth}: found match")
common_categories = self.kategoriyen_sitilan.intersection(parent_cats)
if VERBOSE:
print("common_categories:\n", common_categories)
return common_categories
def get_from_parents(self, categories):
if len(categories) == 0:
return
if VERBOSE:
print("fetching parents for:\n")
parent_cats = list()
for category in categories:
cat_page = pywikibot.Category(self.site, "Kategorî:" + category)
if cat_page.isHiddenCategory():
print(f"{category} hidden. Skipped.")
continue
cat_parents = self.get_existing_cats(cat_page)
if VERBOSE:
print(f"{category} parents:\n{cat_parents}")
if cat_parents:
parent_cats.extend(cat_parents)
if VERBOSE:
print("parent_cats:\n", parent_cats)
common_categories = self.kategoriyen_sitilan.intersection(parent_cats)
if len(common_categories) < 1:
common_categories = self.get_from_recursive_parents(parent_cats)
elif (len(common_categories) == 1) and ("Kes" in common_categories):
common_categories = self.get_from_recursive_parents(parent_cats)
return common_categories
def remove_categories(self, common_categories):
def has_subcategory_in_common(category):
if category not in self.serkategoriyen_sitilan:
return False
for subcategory in self.serkategoriyen_sitilan[category]:
if subcategory in common_categories or has_subcategory_in_common(subcategory):
return True
return False
to_remove = {category for category in common_categories if has_subcategory_in_common(category)}
common_categories.difference_update(to_remove)
if len(common_categories) > 1 and "Sal" in common_categories:
common_categories.remove("Sal")
cancellation_dict = {
"Televîzyon": "Rojname",
"Sal": "Kes",
"Teknolojî": "Felsefe",
"Zanist": "Felsefe",
"Wêje": "Televîzyon"
}
for key, value in cancellation_dict.items():
if key in common_categories and value in common_categories:
print(f"{key} removed coz {value} exists")
common_categories.remove(key)
return common_categories
def get_common_cats(self, categories):
"""
Checks if the current page should be treated based on its categories.
Returns:
set: A set of common categories.
"""
if len(categories) == 0:
return
common_categories = self.kategoriyen_sitilan.intersection(categories)
if VERBOSE:
print("categories: ", categories)
print("common_categories: ", common_categories)
if self.norec:
return common_categories
if len(common_categories) < 1:
common_categories = self.get_from_parents(categories)
if VERBOSE:
print("common_categories: ", common_categories)
is_mirov = mytools.get_pvalue(self.site, self.current_page.title(), pvalue="P31")
if is_mirov == "Q5":
if VERBOSE:
print("mirov e")
if "Kes" not in common_categories:
common_categories.add("Kes")
if len(common_categories) < 1:
return
return common_categories
def find_category_data(self, category):
def search(data):
# Check if the current dictionary has the "kategorî" key matching the category
if "kategorî" in data and data["kategorî"] == category:
return data
# Recursively search in the "serkategorî" if it exists
if "serkategorî" in data:
for subkey, subdata in data["serkategorî"].items():
serkategori_data = search(subdata)
if serkategori_data:
return serkategori_data
return None
# Start searching from the top level of sitil_data
for key, value in self.sitil_data.items():
result = search(value)
if result:
return result
return None
def add_templates(self, text, common_categories):
"""
Adds the corresponding templates from sitil_data.
"""
added_templates = []
for category in common_categories:
print("category: ", category)
category_data = self.find_category_data(category)
print("category_data: ", category_data)
if category_data:
template_value = category_data["şablon"]
print("template_value: ", template_value)
new_template = f'{{{{{template_value}}}}}'
text = textlib.add_text(text, new_template, site=self.site)
added_template = "{{[[Şablon:" + template_value + "|" + template_value + "]]}}"
added_templates.append(added_template)
self.added_categories.add(category_data["kategorî"])
added_templates_summary = ', '.join(added_templates)
return text, added_templates_summary
def remove_stub_tag(self, text: str) -> str:
text = mytools.remove_template(text, self.sitil_redirects)
# Find and remove other templates
template_regex = r'{{\s*([^\}]+\-şitil|[Şş]iti?l|[Kk]urt|[Ss]tub|[Şş]itlek|[^\}]+\-şitil\-[^\}]+)\s*}}'
text = re.sub(template_regex, '', text)
# Find and remove other templates
template_sitil_regex = r'{{\s*([Şş]itil-[^\}]+)\s*}}'
text = re.sub(template_sitil_regex, '', text)
return text
def remove_sitil_class(self, page):
talk_page = page.toggleTalkPage()
if not talk_page.exists():
return
text = talk_page.text
wikicode = mwparserfromhell.parse(text)
for template in wikicode.filter_templates():
template_name = mytools.ucfirst(template.name)
if template_name in self.kalik_redirects and template.has('sinif'):
sinif_val = mytools.lcfirst(template.get('sinif').value)
if sinif_val == 'şitil':
template.add('sinif', '')
new_text = str(wikicode)
if new_text != text:
talk_page.text = new_text
talk_page.save(summary=f'[[{self.bot_name}|Bot]]: Sinifa şitil hat rakirin')
def match_from_title(self):
title = self.current_page.title()
common_categories = set()
if title.startswith('Balafirgeha'):
common_categories.add('Balafirgeh')
return common_categories
if '(albûm)' in title:
common_categories.add('Albûm')
return common_categories
if '(eşîr)' in title:
common_categories.add('Eşîr')
return common_categories
if '(fîlm)' in title:
common_categories.add('Fîlm')
return common_categories
if '(pirtûk)' in title:
common_categories.add('Pirtûk')
return common_categories
if '(rojname)' in title:
common_categories.add('Rojname')
return common_categories
if '(kovar)' in title:
common_categories.add('Kovar')
return common_categories
if '(mîtolojî)' in title:
common_categories.add('Mîtolojî')
return common_categories
def treat_page(self) -> None:
page = self.current_page
if page.namespace() != 0:
if VERBOSE:
print("Skipping Namespace not 0.")
return
hemu_sitil = mytools.is_category_in_page(page, "Hemû şitil")
if not hemu_sitil and not self.change_only:
if VERBOSE:
print("Skipping hemû şitil tine.")
return
categories = mytools.get_unhidden_categories("ku", page.title())
if not categories:
if VERBOSE:
print("Skipped no cats")
return
is_list = mytools.is_liste(self.site, categories)
if is_list:
if VERBOSE:
print("Skipped liste")
return
is_sitil = TagHelpers.is_sitil(page)
if VERBOSE:
print("is_sitil: ", is_sitil)
if is_sitil == 'lîste':
if VERBOSE:
print("Skipped lîste")
return
zarava_heye = mytools.zaravayen_din(page.categories())
text = page.text
if is_sitil is False:
new_text = self.remove_stub_tag(text)
append_sum = "hat rakirin"
elif self.remove_only:
if VERBOSE:
print('Skipped remove only')
return
elif self.change_only:
new_text = self.remove_stub_tag(text)
new_text = textlib.add_text(new_text, add=self.change_only)
append_sum = "bi " + self.change_only + " hat guhartin"
else:
if not mytools.is_category_in_page(page, "Şitil") and not self.norec:
if VERBOSE:
print("Skipping şitil tine.")
return
if page.isDisambig():
if VERBOSE:
print("Skipping disambig.")
return
if zarava_heye:
if VERBOSE:
print("Skipping zarava_heye.")
return
# compare page cats with şitil cats and return found ones
common_categories = self.get_common_cats(categories)
if not common_categories:
common_categories = self.match_from_title()
if not common_categories:
self.count_sitil(categories)
return
common_categories = self.remove_categories(common_categories)
new_text = self.remove_stub_tag(text)
new_text, added_templates = self.add_templates(new_text, common_categories)
append_sum = f"bi {added_templates} hat guhartin"
if text != new_text:
new_text, kozmetik_cebu = do_kozmetik(self.current_page,
new_text,
show_diff=self.opt.showdiff,
ignore=self.opt.ignore)
summary = f'[[{self.bot_name}|Bot]]: Şablona {{{{[[Şablon:Şitil|Şitil]]}}}}'
summary += f' {append_sum}{kozmetik_cebu}'
if VERBOSE:
print(f"\n<<< {page.title()} >>>\n")
self.put_current(
new_text,
summary=summary,
asynchronous=self.opt['async'],
show_diff=self.opt['showdiff']
)
if append_sum == 'hat rakirin':
self.remove_sitil_class(page)
def main(*args: str) -> None:
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
if os.path.exists("sitilcount.json"):
with open("sitilcount.json", 'w', encoding='utf-8') as file:
file.write("{}")
options = {}
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
gen_factory = pagegenerators.GeneratorFactory()
# Process pagegenerators arguments
local_args = gen_factory.handle_args(local_args)
# Parse your own command line arguments
for arg in local_args:
arg, _, value = arg.partition(':')
option = arg[1:]
if option in ('-always', '-async', '-showdiff', '-remove', '-norec'):
options[option[1:]] = True
elif option == 'change':
options['change'] = value
elif option == '-ignore':
value = value.upper()
try:
options['ignore'] = getattr(CANCEL, value)
except AttributeError:
raise ValueError(f'Unknown ignore mode {value!r}!')
# take the remaining options as booleans.
# You will get a hint if they aren't pre-defined in your bot class
else:
options[option] = True
# The preloading option is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = gen_factory.getCombinedGenerator(preload=True)
if not gen:
gen = mytools.get_cat_members(pywikibot.Site(), "Şitil", 0, itr_page=True)
random.shuffle(gen)
# check if further help is needed
if not pywikibot.bot.suggest_help(missing_generator=not gen):
# pass generator and private options to the bot
bot = StubBot(generator=gen, **options)
bot.run() # guess what it does
if __name__ == '__main__':
main()