Bikarhêner:Balyozxane/skrîpt/py/addgotubej.py

Ji Wîkîpediya, ensîklopediya azad.
#!/usr/bin/python3
import pywikibot
import pymysql
import re
import os
import time

def getList(lang_code, query_name, file_name, query_params=None):
    # Database connection details
    # Note: If you are using Toolforge, you may ignore the database username and password
    db_hostname_format = lang_code + "wiki.analytics.db.svc.wikimedia.cloud"  # Hostname of the database server
    db_port = 3306  # Port number for the database server
    # db_username = ""  # Add your actual database username credential (if not using Toolforge)
    # db_password = ""  # Add your actual database password credential (if not using Toolforge)
    db_name_format = lang_code + "wiki_p"  # Name of the target database
    db_connect_file = "~/replica.my.cnf" # path to the "my.cnf" file

    # Create a connection to the database
    connection = pymysql.connect(
        host=db_hostname_format,
        port=db_port,
        # user=db_username,
        # password=db_password,
        database=db_name_format,
        read_default_file=db_connect_file, # "my.cnf" file contains user and password and read these parameters from under the [client] section.
        charset='utf8'
    )

    # Create a cursor
    cursor = connection.cursor()

    # Use the kuwiki_p database
    cursor.execute("USE " + lang_code + "wiki_p;")

    # Execute the query with parameters if provided
    if query_params is not None:
        cursor.execute(query_name, query_params)
    else:
        cursor.execute(query_name)

    # Fetch the results
    results = cursor.fetchall()

    # Close the cursor and the database connection
    cursor.close()
    connection.close()
    # Extract page titles from results

    page_titles = []
    for result in results:
        ku_page_title = result[0]  # Assuming there's only one column in the result
        ku_page_title = ku_page_title.decode('utf-8')
        ku_page_title = ku_page_title.replace('_', ' ')
        page_titles.append(ku_page_title)

    return page_titles

def treat_page(page_title):
    site = pywikibot.Site("ku", "wikipedia")
    page = pywikibot.Page(site, page_title)

    # Check if the page exists
    if not page.exists():
        print(f"Page {page.title()} does not exist. Skipping...")
        return

    sereke = page.toggleTalkPage()

    if not sereke.exists():
        print(f"rûpela {sereke.title} does not exist. Skipping...")
        return
    
    if page.isRedirectPage():
        print(f"Skipping page {page.title()} - it's a redirect.")
        return
        
    if "{{Serê gotûbêjê" in page.text or "{{serê gotûbêjê" in page.text or "{{Talk header" in page.text or "{{talk header" in page.text:
        print("Skipping page - {{Serê gotûbêjê is already in the text.")
        return
        
    if "{{jêbirin" in page.text or "{{Jêbirin" in page.text:
        print("Skipping page - {{Jêbirin is in the text.")
        return

    # Remove whitespace characters, dots, and commas from the page text
    cleaned_text = re.sub(r'[\s.,]', '', page.text)

    # Check if the page is empty or has other content
    if not cleaned_text:
        # If the page is empty, add the {{jêbirin|rûpela vala}} template
        template_text = "{{jêbirin|Rûpela vala}}"
        summary = f"Bot: +{template_text}"
    else:
        # If the page has content, add the specified template_text
        template_text = "{{Serê gotûbêjê}}"
        summary = f"Bot: {{{{[[Şablon:Serê gotûbêjê|Serê gotûbêjê]]}}}} lê hat zêdekirin"
        
    # Add the template to the page text
    page.text = f"{template_text}\n\n{page.text}"

    try:
        page.save(summary=summary)
    except pywikibot.exceptions.LockedPageError:
        print(f'Skip locked page {page.title(as_link=True)}')
    except Exception as e:
        print(f"Error editing page {page.title()}: {str(e)}")

# query
ku_query = """
SELECT 
  CASE 
    WHEN page_namespace = 1 THEN CONCAT('Gotûbêj:', page_title)
    WHEN page_namespace = 5 THEN CONCAT('Gotûbêja Wîkîpediyayê:', page_title)
    WHEN page_namespace = 11 THEN CONCAT('Gotûbêja şablonê:', page_title)
    WHEN page_namespace = 15 THEN CONCAT('Gotûbêja kategoriyê:', page_title)
    WHEN page_namespace = 829 THEN CONCAT('Gotûbêja modulê:', page_title)
    ELSE page_title
  END AS full_page_title
FROM page
WHERE page_is_redirect = 0
  AND page_namespace IN (1,5,11,15,829)
  AND page_id NOT IN (
    SELECT tl_from FROM templatelinks JOIN linktarget ON tl_target_id = lt_id
    WHERE lt_namespace = 10
      AND lt_title IN (
        'Serê_gotûbêjê',
        'Talk_header'
      )
  )
  AND page_title NOT LIKE '%Arşîv%'
  AND page_title NOT LIKE '%/Jêbirin%';
"""
ku_file_name = "testtext.txt"
ku_page_titles = getList("ku", ku_query, ku_file_name)
if not ku_page_titles:
    print("No results found from the query. %s!" % (time.ctime()))
else:
    all_pages_processed = True  # Assume all pages will be processed successfully
    
    for gotubej in ku_page_titles:
        if gotubej == "Gotûbêj:WP:VS":
            print("Skipping page 'Gotûbêj:WP:VS'")
            continue  # Skip the current iteration and move to the next page
        
        try:
            treat_page(gotubej)
        except Exception as e:
            print(f"Error processing gotubej '{gotubej}': {str(e)}")
            all_pages_processed = False  # Set to False if any page processing fails
    
    if all_pages_processed:
        print("All pages processed successfully at %s!" % (time.ctime()))
    else:
        print("Some pages failed to process at %s!" % (time.ctime()))