Bikarhêner:Balyozxane/skrîpt/py/getwikiproje.py
Xuyakirin
#!/usr/bin/python3
"""
Lîsteya rûpelên ku li ser enwîkiyê wîkîproje heye lê li kuwîkiyê tine ye çêdike û skrîpteke din dide şuxilandin ji bo ku eynî Wîkîprojeyê li rûpelan zêde bike.
"""
#
# (C) w:ku:User:Balyozxane :)
#
# Distributed under the terms of the MIT license.
#
import pymysql
import os
import requests
def generate_en_query(pap_project):
en_query = f"""
SELECT
ll.ll_title AS ku_page_title
FROM
page
INNER JOIN page_assessments as1 ON as1.pa_page_id = page.page_id
AND as1.pa_project_id IN (SELECT pap_project_id FROM page_assessments_projects WHERE pap_project_title = '{pap_project}')
INNER JOIN page_assessments as2 ON as2.pa_page_id = page.page_id
INNER JOIN langlinks AS ll ON ll.ll_from = page.page_id AND ll.ll_lang = 'ku'
WHERE
page.page_namespace IN (0, 14, 10)
GROUP BY
ku_page_title
ORDER BY
FIELD(
(CASE
WHEN as2.pa_importance = "Top" THEN "Herî zêde"
WHEN as2.pa_importance = "High" THEN "Zêde"
WHEN as2.pa_importance = "Mid" THEN "Navîn"
WHEN as2.pa_importance = "Low" THEN "Kêm"
WHEN as2.pa_importance = "NA" THEN "NA"
WHEN as2.pa_importance = "Unknown" THEN "Nayê zanîn"
END),
"Herî zêde", "Zêde", "Navîn", "Kêm", "Nayê zanîn"
), ku_page_title;
"""
return en_query
def generate_ku_query(ku_project):
ku_query = f"""
SELECT
CASE
WHEN page.page_namespace = 1 THEN page_title
WHEN page.page_namespace = 15 THEN CONCAT('Kategorî:', page_title)
WHEN page.page_namespace = 11 THEN CONCAT('Şablon:', page_title)
ELSE page_title
END AS modified_page_title
FROM
categorylinks
JOIN
page ON page_id = cl_from
WHERE
BINARY cl_to = 'Hemû_rûpelên_Wîkîproje_{ku_project}';
"""
return ku_query
def get_category_members(category_title):
base_url = f'https://ku.wikipedia.org/w/api.php'
params = {
"action": "query",
"format": "json",
"list": "categorymembers",
"cmtitle": category_title,
"cmlimit": "max" # Retrieve all members of the category
}
response = requests.get(base_url, params=params)
data = response.json()
if 'error' in data:
print(f"Error: {data['error']['info']}")
return None
else:
return [member['title'] for member in data['query']['categorymembers']]
def get_enwiki_title(kuwiki_title):
url = f"https://www.wikidata.org/w/api.php?action=wbgetentities&sites=kuwiki&titles={kuwiki_title}&props=sitelinks&format=json"
response = requests.get(url)
data = response.json()
entity = next(iter(data["entities"].values())) # Get the first (and only) entity
if "sitelinks" in entity and "enwiki" in entity["sitelinks"]:
enwiki_title = entity["sitelinks"]["enwiki"]["title"]
return enwiki_title
else:
return None
def getList(lang_code, query_name):
# Database connection details
# Note: If you are using Toolforge, you may ignore the database username and password
db_hostname_format = lang_code + "wiki.analytics.db.svc.wikimedia.cloud" # Hostname of the database server
db_port = 3306 # Port number for the database server
# db_username = "" # Add your actual database username credential (if not using Toolforge)
# db_password = "" # Add your actual database password credential (if not using Toolforge)
db_name_format = lang_code + "wiki_p" # Name of the target database
db_connect_file = "~/replica.my.cnf" # path to the "my.cnf" file
# Create a connection to the database
connection = pymysql.connect(
host=db_hostname_format,
port=db_port,
# user=db_username,
# password=db_password,
database=db_name_format,
read_default_file=db_connect_file,
# "my.cnf" file contains user and password and read these parameters from under the [client] section.
charset='utf8'
)
# Create a cursor
cursor = connection.cursor()
# Use the kuwiki_p database
cursor.execute("USE " + lang_code + "wiki_p;")
query = query_name
# Execute the query
cursor.execute(query)
# Fetch the results
results = cursor.fetchall()
# Close the cursor and the database connection
cursor.close()
connection.close()
# Process the results and return
if results:
processed_results = []
for result in results:
ku_page_title = result[0]
ku_page_title = ku_page_title.decode('utf-8')
ku_page_title = ku_page_title.replace('_', ' ')
processed_results.append(ku_page_title)
return processed_results
else:
print("No results found from the query.")
return []
category_title = 'Kategorî:Wîkîprojeyên aktîv'
category_members = get_category_members(category_title)
project_titles = []
queries = {}
for member in category_members:
ku_title = member
en_title = get_enwiki_title(member)
if en_title:
project_titles.append((ku_title, en_title))
ku_project = ku_title.replace("Wîkîpediya:Wîkîproje ", "")
pap_project = en_title.replace("Wikipedia:WikiProject ", "")
en_query = generate_en_query(pap_project)
ku_query = generate_ku_query(ku_project)
queries[ku_title] = {'pap_project': pap_project, 'ku_project': ku_project, 'en_query': en_query, 'ku_query': ku_query}
unique_results = []
for ku_title, query_data in queries.items():
en_listeya_projeye = getList("en", query_data['en_query'])
ku_listeya_projeye = getList("ku", query_data['ku_query'])
# Find unique results by taking the set difference
unique_results.extend(set(en_listeya_projeye) - set(ku_listeya_projeye))
toolforge_home = os.getenv('HOMEPATH', '/data/project/balyozbot/')
# Write unique results to a file
output_file_path = os.path.join(toolforge_home, 'unique_results.txt')
with open(output_file_path, 'w', encoding='utf-8') as output_file:
for result in unique_results:
output_file.write(f"{result}\n")
print(f"Unique results written to {output_file_path}.")
# Now call the second script on unique_results
unique_results_path = os.path.join(toolforge_home, 'getwikiprojelist.txt')
os.system(f"$HOME/pwbvenv/bin/pwb addwikiproje -always -file:getwikiprojelist.txt")