Bikarhêner:Balyozxane/skrîpt/py/removedubs.py
Xuyakirin
#!/usr/bin/env python3
"""
Bikaranîn
The following parameters are supported:
-always The bot won't ask for confirmation when putting a page.
-showdiff The bot will show the differences in the console.
-async Edits will be performed asynchronously.
Use global -simulate option for test purposes. No changes to live wiki
will be done.
"""
#
# (C) w:ku:User:Balyozxane
#
# Distributed under the terms of the MIT license.
#
import pywikibot
from pywikibot.bot import SingleSiteBot, ConfigParserBot, AutomaticTWSummaryBot
from pywikibot import pagegenerators
import requests
import datetime
import mwparserfromhell
from kucosmetics import CANCEL, CosmeticChangesToolkit
import re
VERBOSE = False
TESTING = True
class AddTarixBot(
SingleSiteBot,
ConfigParserBot,
AutomaticTWSummaryBot,
):
use_redirects = False
update_options = {
'async': False,
'showdiff': False,
'ignore': CANCEL.MATCH,
}
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.bot_name = "Bikarhêner:Balyozxane/skrîpt/py/removedubs.py"
def do_cosmetics(self, old_text, kozmetik_cebu):
cc_toolkit = CosmeticChangesToolkit(self.current_page,
ignore=self.opt.ignore)
cleaned_new_text = cc_toolkit.change(old_text)
if cleaned_new_text is not False and cleaned_new_text != old_text:
kozmetik_cebu = "; paqijiyên kozmetîk"
return cleaned_new_text, kozmetik_cebu
def treat_page(self) -> None:
page = self.current_page
page_text = page.text
category_pattern = r'\[\[Kategorî:(.*?)\]\]'
categories = re.findall(category_pattern, page_text)
unique_categories = []
duplicate_categories = set()
for category in categories:
if category not in unique_categories:
unique_categories.append(category)
else:
duplicate_categories.add(category)
if duplicate_categories:
pywikibot.output("Duplicate categories found on page '{}'. Removing duplicates...".format(page.title()))
new_text = page_text
for category in duplicate_categories:
# Find the first occurrence of the duplicate category
index = new_text.find("[[Kategorî:{}]]".format(category))
if index != -1:
# Find the next occurrence after the first one
next_index = new_text.find("[[Kategorî:{}]]".format(category), index + 1)
if next_index != -1:
# Remove the next occurrence
new_text = new_text[:next_index] + new_text[next_index:].replace(
"[[Kategorî:{}]]".format(category), '', 1)
else:
# If there's only one occurrence, remove it
new_text = new_text.replace("[[Kategorî:{}]]".format(category), '', 1)
kozmetik_cebu = ""
if page.text != new_text:
if TESTING:
new_text = new_text
else:
cleaned_new_text, kozmetik_cebu = self.do_cosmetics(new_text, kozmetik_cebu)
new_text = cleaned_new_text
# Generate summary
summary = f'[[{self.bot_name}|Bot]]: Kategoriyên ducarî hat jêbirin'
summary += f'{kozmetik_cebu}'
self.put_current(
new_text,
summary=summary,
asynchronous=self.opt['async'],
show_diff=self.opt['showdiff']
)
else:
pywikibot.output("No duplicate categories found on page '{}'.".format(page.title()))
def main(*args: str) -> None:
local_args = pywikibot.handle_args(args)
gen_factory = pagegenerators.GeneratorFactory()
local_args = gen_factory.handle_args(local_args)
options = {}
for arg in local_args:
option, _, value = arg.partition(':')
if option in ('-always', '-async', '-showdiff'):
options[option[1:]] = True
elif option == '-ignore':
value = value.upper()
try:
options['ignore'] = getattr(CANCEL, value)
except AttributeError:
raise ValueError(f'Unknown ignore mode {value!r}!')
gen = gen_factory.getCombinedGenerator(preload=True)
if not pywikibot.bot.suggest_help(missing_generator=not gen):
bot = AddTarixBot(generator=gen, **options)
bot.run()
if __name__ == '__main__':
main()