Bikarhêner:Balyozxane/skrîpt/py/quickstatements.py

Ji Wîkîpediya, ensîklopediya azad.
import pywikibot

def parse_template(page_text):
    template_start = page_text.find("{{Înterwîkî etîket û danasîn|")
    template_end = page_text.find("}}", template_start)

    if template_start != -1 and template_end != -1:
        template_content = page_text[template_start + len("{{Înterwîkî etîket û danasîn|"):template_end].split('|')

        # Initialize variables
        lang_code = None
        interwiki_title = None
        label = None
        description = None

        # Check each parameter
        for param in template_content:
            if param.startswith('d='):
                description = param[2:].strip()
            elif param.startswith('e='):
                label = param[2:].strip()
            else:
                if lang_code is None:
                    lang_code = param.strip()
                elif interwiki_title is None:
                    interwiki_title = param.strip()

        # Adjust label and description based on 'd=' and 'e='
        if 'd=' in description:
            label = description.split('d=')[1].strip()
            description = label
        elif 'e=' in label:
            description = label.split('e=')[1].strip()

        return lang_code, interwiki_title, label, description

def get_wikidata_id(lang_code, interwiki_title):
    site = pywikibot.Site(lang_code, "wikipedia")
    page = pywikibot.Page(site, interwiki_title)

    # Check if the page is a redirect
    if page.isRedirectPage():
        # If it is a redirect, update interwiki_title to the redirected page title
        interwiki_title = page.getRedirectTarget().title()

    # Get the Wikidata item ID based on the updated Wikipedia title
    try:
        item_page = pywikibot.Page(pywikibot.Link(f'{lang_code}:{interwiki_title}', source=site.data_repository()))
        item_id = item_page.data_item().id
        return item_id
    except pywikibot.exceptions.InvalidTitleError:
        pywikibot.error(f"Invalid title: {interwiki_title}")
        return None

def is_disambiguation_page(lang_code, interwiki_title):
    site = pywikibot.Site(lang_code, "wikipedia")
    page = pywikibot.Page(site, interwiki_title)

    # Load the page content to get categories
    page.get()

    # Check if the page has the "Disambiguation pages" category
    for category in page.categories():
        if category.title() == 'Category:Disambiguation pages':
            return True

    return False
    
class WiktionaryInterwikiUpdater:
    def __init__(self):
        self.ku_wikipedia_site = pywikibot.Site("ku", "wikipedia")
        self.en_wikipedia_site = pywikibot.Site("en", "wikipedia")
    
    def get_wid(self, lang, rupel):
        site = pywikibot.Site(lang, "wikipedia")
        page = pywikibot.Page(site, rupel)
        try:
            item = pywikibot.ItemPage.fromPage(page)
            if item and item.title(with_ns=False) != '-1':
                return item.title(with_ns=False)
        except pywikibot.exceptions.NoPageError:
            pass  # Handle the case where the page doesn't exist on Wikidata
    
        return None  # Return None for pages with missing or invalid Wikidata IDs

    def update_interwiki_links(self, page_titles):
        interwiki_data = []
    
        for page_title in page_titles:
            print(f"Fetching interwiki links for page: {page_title}")
            site = pywikibot.Site("ku", "wikipedia")
            page = pywikibot.Page(site, page_title)

            text = page.text
            
            if page.namespace() == 0:
                
                lang_code, interwiki_title, label, description = parse_template(text)

                if lang_code and interwiki_title:

                    if is_disambiguation_page(lang_code, interwiki_title):
                        pywikibot.output(f"Skipping disambiguation page: {page.title()}")

                        # Add "Category:Pages interwiki linking to disambiguation pages" only if it doesn't exist
                        category_page = pywikibot.Page(page.site, "Kategorî:Pages interwiki linking to disambiguation pages")
                        category_link = f'[[{category_page.title()}]]'
                        
                        if category_link not in page.text:
                            page.text = page.text + "\n" + category_link
                            page.save(summary="+Kategorî:Pages interwiki linking to disambiguation pages")
                            continue  # Skip to the next page
                        
                    item_id = get_wikidata_id(lang_code, interwiki_title)
                    if item_id:
                        print(f'interwiki_title: {interwiki_title}, page_title: {page_title}, label: {label}, description: {description}, item_id: {item_id}  ')

                        interwiki_data.append({
                            "en_page": interwiki_title,
                            "ku_page": page_title,
                            "label": label,
                            "description": description,
                            "wikidata_id": item_id
                        })
        return interwiki_data


def main():
    site = pywikibot.Site("ku", "wikipedia")
    template_title = "Şablon:Înterwîkî etîket û danasîn"
    template_page = pywikibot.Page(site, template_title)

    # Get the list of pages which transclude the template in the main namespace
    transcluded_pages = template_page.getReferences(
        follow_redirects=True,
        with_template_inclusion=True,
        only_template_inclusion=True,
        namespaces=[0]
    )

    ku_wikipedia_page_titles = [page.title() for page in transcluded_pages]

    updater = WiktionaryInterwikiUpdater()
    interwiki_data = updater.update_interwiki_links(ku_wikipedia_page_titles)

    with open("newquick.txt", "w", encoding="utf-8") as output_file:
        with open("createquick.txt", "w", encoding="utf-8") as createquick_file:
            for data in interwiki_data:
                if data.get("wikidata_id"):
                    output_line = f'{data["wikidata_id"]}\tSkuwiki\t"{data["ku_page"]}"\n'
                    output_line += f'{data["wikidata_id"]}\tDku\t"{data["description"]}"\n'
                    output_line += f'{data["wikidata_id"]}\tLku\t"{data["label"]}"\n'
                    output_file.write(output_line)

if __name__ == '__main__':
    main()