Bikarhêner:Balyozxane/skrîpt/py/gotarahefteye.py
Xuyakirin
import time
import datetime
import re
import requests
import tweepy
import pywikibot
import os
import mwparserfromhell
from bs4 import BeautifulSoup
from keys import consumer_key, consumer_secret, access_token, access_token_secret, bearer_token
VERBOSE = True
def get_twitter_conn_v1(api_key, api_secret, access_token, access_token_secret) -> tweepy.API:
"""Get twitter conn 1.1"""
auth = tweepy.OAuth1UserHandler(api_key, api_secret)
auth.set_access_token(
access_token,
access_token_secret,
)
return tweepy.API(auth)
def get_file_info(image_name):
start_of_end_point_str = 'https://commons.wikimedia.org' \
'/w/api.php?action=query&titles=File:'
end_of_end_point_str = '&prop=imageinfo&iiprop=user' \
'|userid|canonicaltitle|url|extmetadata&format=json'
result = requests.get(start_of_end_point_str + image_name + end_of_end_point_str)
result = result.json()
page_id = next(iter(result['query']['pages']))
image_info = result['query']['pages'][page_id]['imageinfo'][0] # Selecting the first item in the list
# Extracting the license value
license_value = image_info['extmetadata']['License']['value']
if license_value == "pd":
license_value = "Public Domain"
else:
license_value = license_value.upper()
# Extracting the artist's value
artist_value = image_info['extmetadata']['Artist']['value']
artist_title = None
# If artist_value is an HTML link, extract the title from it
if artist_value.startswith('<a ') and artist_value.endswith('</a>'):
match = re.search(r'>(.*?)<', artist_value)
if match:
artist_title = match.group(1)
else:
artist_title = artist_value
artist_title = mwparserfromhell.parse(artist_title).strip_code()
if artist_title == "Unknown authorUnknown author" or artist_title == "Unknown author":
artist_title = "Xwedî nayê zanîn"
return license_value, artist_title
def get_image(file):
# Set up Pywikibot
commons = pywikibot.Site('commons', 'commons')
# Get the File page object
file_page = pywikibot.FilePage(commons, 'File:' + file)
# Get the file URL
file_url = file_page.get_file_url()
# Extract file name from the URL
file_name = os.path.basename(file_url)
save_dir = os.path.expanduser("~")
#save_dir = os.path.join(save_dir, "Pywikibot")
file_path = os.path.join(save_dir, file_name) # Construct the full file path
# Download the image
try:
success = file_page.download(filename=file_path)
if success:
if VERBOSE:
print("Image downloaded successfully and saved as", file_path)
return file_path # Return the full file path
else:
if VERBOSE:
print("Failed to download the image")
return None # Return None if failed to download
except IOError as e:
if VERBOSE:
print(f"Failed to download the image: {e}")
return None # Return None if failed to download
def shorten_url(title):
# Construct the full Wikipedia URL
url = f"https://ku.wikipedia.org/wiki/{title.replace(' ', '_')}"
# Define the API endpoint for URL shortening
api_url = "https://ku.wikipedia.org/w/api.php"
# Prepare the parameters for the API request
params = {
"action": "shortenurl",
"format": "json",
"url": url
}
# Specify the Content-Type header
headers = {
"Content-Type": "application/x-www-form-urlencoded"
}
# Send a POST request to the API
response = requests.post(api_url, params=params, headers=headers)
# Check if the request was successful
if response.status_code == 200:
# Parse the JSON response
data = response.json()
if VERBOSE:
print(data)
# Check if the 'shortenurl' key exists in the response
if 'shortenurl' in data:
return data['shortenurl']['shorturlalt']
else:
if 'warnings' in data and 'shortenurl' in data['warnings']:
# If there is a warning, but a shortened URL is available, return it
return data['shortenurl']['shorturlalt']
else:
if VERBOSE:
print("Error: Short URL not available.")
return url
else:
if VERBOSE:
print("Error: Unable to shorten URL.")
return url
def split_tweet_text(tweet_text):
max_chars_per_tweet = 280 # Twitter's character limit per tweet
# Split by line breaks
chunks = tweet_text.split('\n')
final_chunks = []
prev_chunk = ''
for chunk in chunks:
if chunk.strip(): # Check if the chunk is not an empty string after stripping whitespace
if len(prev_chunk + chunk) <= max_chars_per_tweet:
# If adding this chunk to the previous one doesn't exceed the character limit, append it
prev_chunk += f"\n{chunk}"
else:
# If adding this chunk exceeds the character limit, split it further
sub_chunks = re.split(r'(\.+)', chunk)
temp_chunk = ''
for sub_chunk in sub_chunks:
if temp_chunk and len(prev_chunk + temp_chunk + sub_chunk) > max_chars_per_tweet:
# If adding this sub-chunk exceeds the character limit, start a new tweet
final_chunks.append(prev_chunk)
prev_chunk = temp_chunk
temp_chunk = sub_chunk
elif len(prev_chunk + temp_chunk + sub_chunk) <= max_chars_per_tweet:
# If adding this sub-chunk keeps it within the character limit, append it
temp_chunk += sub_chunk
elif len(sub_chunk) > max_chars_per_tweet:
# If the sub-chunk itself is larger than the character limit, split it further
sub_sub_chunks = [sub_chunk[i:i + max_chars_per_tweet] for i in
range(0, len(sub_chunk), max_chars_per_tweet)]
for sub_sub_chunk in sub_sub_chunks:
final_chunks.append(prev_chunk + temp_chunk)
prev_chunk = ''
temp_chunk = sub_sub_chunk
else:
temp_chunk = sub_chunk # Start a new chunk
if temp_chunk: # Append any remaining chunk
final_chunks.append(prev_chunk + temp_chunk)
prev_chunk = ''
# Append any remaining chunk
if prev_chunk:
final_chunks.append(prev_chunk)
# Remove chunks that consist only of a dot character
final_chunks = [chunk for chunk in final_chunks if chunk.strip() != '.']
return final_chunks
def tweet_thread(tweet_texts, media_path, client):
total_tweets = len(tweet_texts)
client_v1 = get_twitter_conn_v1(consumer_key, consumer_secret, access_token, access_token_secret)
media = client_v1.media_upload(filename=media_path)
media_id = media.media_id
# Tweet the first part of the thread
first_tweet = client.create_tweet(text=f"{tweet_texts[0].strip()} (1/{total_tweets})", media_ids=[media_id])
# Retrieve the ID of the first tweet
first_tweet_id = first_tweet.data['id']
# Tweet the subsequent parts of the thread
reply_to_id = first_tweet_id
for i in range(1, total_tweets):
time.sleep(1) # Add a 1-second delay
tweet_text = f"{tweet_texts[i].strip()} ({i + 1}/{total_tweets})"
if len(tweet_text) > 280:
tweet_text = tweet_texts[i] # If adding numbering exceeds limit, remove numbering
if tweet_text.startswith('.'):
tweet_text = tweet_text[1:] # Remove the leading dot
if len(tweet_text) > 280:
tweet_text = tweet_texts[i] # If adding numbering exceeds limit, remove numbering
# Reply to the last created tweet
reply = client.create_tweet(text=tweet_text.strip(), in_reply_to_tweet_id=reply_to_id)
# Update the ID for subsequent replies
reply_to_id = reply.data['id'] # Reply to the latest tweet
def parse_week(page_title):
# Set up Pywikibot
site = pywikibot.Site("ku", "wikipedia")
# Get the page object
page = pywikibot.Page(site, page_title)
if not page.exists():
if VERBOSE:
print("Gotara hefteyê tine loma hat betalkirin")
return
text = page.text
# Parse the page content using mwparserfromhell
parsed_text = mwparserfromhell.parse(text)
# Find the template named "ceribandin"
for template in parsed_text.filter_templates(matches="GH/format"):
# Get parameter values
wene = template.get("wêne").value.strip()
gotar = template.get("gotar").value.strip()
wenesaz = None
if template.has("wênesaz"):
print("erê template has wênesaz")
wenesaz = template.get("wênesaz").value.strip()
return {"wene": wene, "gotar": gotar, "wenesaz": wenesaz}
# If "ceribandin" template is not found
if VERBOSE:
print("The 'GH/format' template is not found on the page.")
return None
def get_kurte(page_title):
# URL of the page
url = f"https://ku.wikipedia.org/wiki/%C5%9Eablon:{page_title}"
# Send a GET request to fetch the HTML content of the page
response = requests.get(url)
# Check if the request was successful (status code 200)
if response.status_code == 200:
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")
# Find the span element with class "kurteya-gotare"
span_element = soup.find("div", class_="kurteya-gotare")
# Check if the span element is found
if span_element:
# Get the content of the span element
content = span_element.text.replace('\xa0(zêdetir...)', '')
if VERBOSE:
print("Content of span with class 'kurteya-gotare':", content)
return content
else:
if VERBOSE:
print("Span element with class 'kurteya-gotare' not found.")
else:
if VERBOSE:
print("Failed to retrieve the webpage. Status code:", response.status_code)
def main():
client = tweepy.Client(bearer_token, consumer_key, consumer_secret, access_token, access_token_secret)
# Get the current date
current_date = datetime.date.today()
current_year = current_date.year
current_week_number = current_date.isocalendar()[1]
template_title = f"GH/{current_year}/{current_week_number}"
#template_title = "GH/2024/49"
if VERBOSE:
print("template_title:", template_title)
# Example usage:
result = parse_week(f"Şablon:{template_title}")
kurteya_gotare = get_kurte(template_title)
if result and kurteya_gotare:
shorturl = shorten_url(result["gotar"])
license_info, file_artist = get_file_info(result["wene"])
if result.get("wenesaz"):
artist = result["wenesaz"]
else:
artist = file_artist
license_text = ""
if artist or license_info:
license_text = f"📷 "
if artist:
license_text += f"{artist}".replace('\n', ' ')
if license_info:
license_text += f", "
if license_info:
license_text += f"{license_info}"
license_text += "\n"
if VERBOSE:
print("Wêne:", result["wene"])
print("Gotar:", result["gotar"])
print(shorturl)
print(license_info)
print(artist)
print(license_text)
tweet_text = (f"Gotara hefteyê ya Wîkîpediyayê\n\n"
f"{result['gotar']} ⬇️ \n"
f"{license_text}"
f"{kurteya_gotare}\n"
f"Zêdetir hîn bibe ➡️ {shorturl}")
chunks = split_tweet_text(tweet_text)
if VERBOSE:
print(chunks)
media_path = get_image(result["wene"])
# Tweet the thread
tweet_thread(chunks, media_path, client)
# Delete the downloaded image file
if media_path:
try:
os.remove(media_path)
if VERBOSE:
print("Image file deleted successfully:", media_path)
except Exception as e:
if VERBOSE:
print("Error deleting image file:", e)
if __name__ == "__main__":
main()