Bikarhêner:Balyozxane/skrîpt/py/quickstatements.py
Xuyakirin
import pywikibot
def parse_template(page_text):
template_start = page_text.find("{{Înterwîkî etîket û danasîn|")
template_end = page_text.find("}}", template_start)
if template_start != -1 and template_end != -1:
template_content = page_text[template_start + len("{{Înterwîkî etîket û danasîn|"):template_end].split('|')
# Initialize variables
lang_code = None
interwiki_title = None
label = None
description = None
# Check each parameter
for param in template_content:
if param.startswith('d='):
description = param[2:].strip()
elif param.startswith('e='):
label = param[2:].strip()
else:
if lang_code is None:
lang_code = param.strip()
elif interwiki_title is None:
interwiki_title = param.strip()
# Adjust label and description based on 'd=' and 'e='
if 'd=' in description:
label = description.split('d=')[1].strip()
description = label
elif 'e=' in label:
description = label.split('e=')[1].strip()
return lang_code, interwiki_title, label, description
def get_wikidata_id(lang_code, interwiki_title):
site = pywikibot.Site(lang_code, "wikipedia")
page = pywikibot.Page(site, interwiki_title)
# Check if the page is a redirect
if page.isRedirectPage():
# If it is a redirect, update interwiki_title to the redirected page title
interwiki_title = page.getRedirectTarget().title()
# Get the Wikidata item ID based on the updated Wikipedia title
try:
item_page = pywikibot.Page(pywikibot.Link(f'{lang_code}:{interwiki_title}', source=site.data_repository()))
item_id = item_page.data_item().id
return item_id
except pywikibot.exceptions.InvalidTitleError:
pywikibot.error(f"Invalid title: {interwiki_title}")
return None
def is_disambiguation_page(lang_code, interwiki_title):
site = pywikibot.Site(lang_code, "wikipedia")
page = pywikibot.Page(site, interwiki_title)
# Load the page content to get categories
page.get()
# Check if the page has the "Disambiguation pages" category
for category in page.categories():
if category.title() == 'Category:Disambiguation pages':
return True
return False
class WiktionaryInterwikiUpdater:
def __init__(self):
self.ku_wikipedia_site = pywikibot.Site("ku", "wikipedia")
self.en_wikipedia_site = pywikibot.Site("en", "wikipedia")
def get_wid(self, lang, rupel):
site = pywikibot.Site(lang, "wikipedia")
page = pywikibot.Page(site, rupel)
try:
item = pywikibot.ItemPage.fromPage(page)
if item and item.title(with_ns=False) != '-1':
return item.title(with_ns=False)
except pywikibot.exceptions.NoPageError:
pass # Handle the case where the page doesn't exist on Wikidata
return None # Return None for pages with missing or invalid Wikidata IDs
def update_interwiki_links(self, page_titles):
interwiki_data = []
for page_title in page_titles:
print(f"Fetching interwiki links for page: {page_title}")
site = pywikibot.Site("ku", "wikipedia")
page = pywikibot.Page(site, page_title)
text = page.text
if page.namespace() == 0:
lang_code, interwiki_title, label, description = parse_template(text)
if lang_code and interwiki_title:
if is_disambiguation_page(lang_code, interwiki_title):
pywikibot.output(f"Skipping disambiguation page: {page.title()}")
# Add "Category:Pages interwiki linking to disambiguation pages" only if it doesn't exist
category_page = pywikibot.Page(page.site, "Kategorî:Pages interwiki linking to disambiguation pages")
category_link = f'[[{category_page.title()}]]'
if category_link not in page.text:
page.text = page.text + "\n" + category_link
page.save(summary="+Kategorî:Pages interwiki linking to disambiguation pages")
continue # Skip to the next page
item_id = get_wikidata_id(lang_code, interwiki_title)
if item_id:
print(f'interwiki_title: {interwiki_title}, page_title: {page_title}, label: {label}, description: {description}, item_id: {item_id} ')
interwiki_data.append({
"en_page": interwiki_title,
"ku_page": page_title,
"label": label,
"description": description,
"wikidata_id": item_id
})
return interwiki_data
def main():
site = pywikibot.Site("ku", "wikipedia")
template_title = "Şablon:Înterwîkî etîket û danasîn"
template_page = pywikibot.Page(site, template_title)
# Get the list of pages which transclude the template in the main namespace
transcluded_pages = template_page.getReferences(
follow_redirects=True,
with_template_inclusion=True,
only_template_inclusion=True,
namespaces=[0]
)
ku_wikipedia_page_titles = [page.title() for page in transcluded_pages]
updater = WiktionaryInterwikiUpdater()
interwiki_data = updater.update_interwiki_links(ku_wikipedia_page_titles)
with open("newquick.txt", "w", encoding="utf-8") as output_file:
with open("createquick.txt", "w", encoding="utf-8") as createquick_file:
for data in interwiki_data:
if data.get("wikidata_id"):
output_line = f'{data["wikidata_id"]}\tSkuwiki\t"{data["ku_page"]}"\n'
output_line += f'{data["wikidata_id"]}\tDku\t"{data["description"]}"\n'
output_line += f'{data["wikidata_id"]}\tLku\t"{data["label"]}"\n'
output_file.write(output_line)
if __name__ == '__main__':
main()