Bikarhêner:Balyozxane/skrîpt/py/gotarahefteye.py

import time
import datetime
import re
import requests
import tweepy
import pywikibot
import os
import mwparserfromhell
from bs4 import BeautifulSoup
from keys import consumer_key, consumer_secret, access_token, access_token_secret, bearer_token

VERBOSE = True

def get_twitter_conn_v1(api_key, api_secret, access_token, access_token_secret) -> tweepy.API:
    """Get twitter conn 1.1"""

    auth = tweepy.OAuth1UserHandler(api_key, api_secret)
    auth.set_access_token(
        access_token,
        access_token_secret,
    )
    return tweepy.API(auth)


def get_file_info(image_name):
    start_of_end_point_str = 'https://commons.wikimedia.org' \
                             '/w/api.php?action=query&titles=File:'
    end_of_end_point_str = '&prop=imageinfo&iiprop=user' \
                           '|userid|canonicaltitle|url|extmetadata&format=json'
    result = requests.get(start_of_end_point_str + image_name + end_of_end_point_str)
    result = result.json()
    page_id = next(iter(result['query']['pages']))
    image_info = result['query']['pages'][page_id]['imageinfo'][0]  # Selecting the first item in the list

    # Extracting the license value
    license_value = image_info['extmetadata']['License']['value']

    if license_value == "pd":
        license_value = "Public Domain"
    else:
        license_value = license_value.upper()
    # Extracting the artist's value
    artist_value = image_info['extmetadata']['Artist']['value']
    artist_title = None

    # If artist_value is an HTML link, extract the title from it
    if artist_value.startswith('<a ') and artist_value.endswith('</a>'):
        match = re.search(r'>(.*?)<', artist_value)
        if match:
            artist_title = match.group(1)
    else:
        artist_title = artist_value

    artist_title = mwparserfromhell.parse(artist_title).strip_code()
    if artist_title == "Unknown authorUnknown author" or artist_title == "Unknown author":
        artist_title = "Xwedî nayê zanîn"

    return license_value, artist_title


def get_image(file):
    # Set up Pywikibot
    commons = pywikibot.Site('commons', 'commons')

    # Get the File page object
    file_page = pywikibot.FilePage(commons, 'File:' + file)

    # Get the file URL
    file_url = file_page.get_file_url()

    # Extract file name from the URL
    file_name = os.path.basename(file_url)

    save_dir = os.path.expanduser("~")
    #save_dir = os.path.join(save_dir, "Pywikibot")

    file_path = os.path.join(save_dir, file_name)  # Construct the full file path

    # Download the image
    try:
        success = file_page.download(filename=file_path)
        if success:
            if VERBOSE:
                print("Image downloaded successfully and saved as", file_path)
            return file_path  # Return the full file path
        else:
            if VERBOSE:
                print("Failed to download the image")
            return None  # Return None if failed to download
    except IOError as e:
        if VERBOSE:
            print(f"Failed to download the image: {e}")
        return None  # Return None if failed to download



def shorten_url(title):
    # Construct the full Wikipedia URL
    url = f"https://ku.wikipedia.org/wiki/{title.replace(' ', '_')}"

    # Define the API endpoint for URL shortening
    api_url = "https://ku.wikipedia.org/w/api.php"

    # Prepare the parameters for the API request
    params = {
        "action": "shortenurl",
        "format": "json",
        "url": url
    }

    # Specify the Content-Type header
    headers = {
        "Content-Type": "application/x-www-form-urlencoded"
    }

    # Send a POST request to the API
    response = requests.post(api_url, params=params, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        if VERBOSE:
            print(data)
        # Check if the 'shortenurl' key exists in the response
        if 'shortenurl' in data:
            return data['shortenurl']['shorturlalt']
        else:
            if 'warnings' in data and 'shortenurl' in data['warnings']:
                # If there is a warning, but a shortened URL is available, return it
                return data['shortenurl']['shorturlalt']
            else:
                if VERBOSE:
                    print("Error: Short URL not available.")
                return url
    else:
        if VERBOSE:
            print("Error: Unable to shorten URL.")
        return url


def split_tweet_text(tweet_text):
    max_chars_per_tweet = 280  # Twitter's character limit per tweet

    # Split by line breaks
    chunks = tweet_text.split('\n')

    final_chunks = []
    prev_chunk = ''

    for chunk in chunks:
        if chunk.strip():  # Check if the chunk is not an empty string after stripping whitespace
            if len(prev_chunk + chunk) <= max_chars_per_tweet:
                # If adding this chunk to the previous one doesn't exceed the character limit, append it
                prev_chunk += f"\n{chunk}"
            else:
                # If adding this chunk exceeds the character limit, split it further
                sub_chunks = re.split(r'(\.+)', chunk)
                temp_chunk = ''
                for sub_chunk in sub_chunks:
                    if temp_chunk and len(prev_chunk + temp_chunk + sub_chunk) > max_chars_per_tweet:
                        # If adding this sub-chunk exceeds the character limit, start a new tweet
                        final_chunks.append(prev_chunk)
                        prev_chunk = temp_chunk
                        temp_chunk = sub_chunk
                    elif len(prev_chunk + temp_chunk + sub_chunk) <= max_chars_per_tweet:
                        # If adding this sub-chunk keeps it within the character limit, append it
                        temp_chunk += sub_chunk
                    elif len(sub_chunk) > max_chars_per_tweet:
                        # If the sub-chunk itself is larger than the character limit, split it further
                        sub_sub_chunks = [sub_chunk[i:i + max_chars_per_tweet] for i in
                                          range(0, len(sub_chunk), max_chars_per_tweet)]
                        for sub_sub_chunk in sub_sub_chunks:
                            final_chunks.append(prev_chunk + temp_chunk)
                            prev_chunk = ''
                            temp_chunk = sub_sub_chunk
                    else:
                        temp_chunk = sub_chunk  # Start a new chunk

                if temp_chunk:  # Append any remaining chunk
                    final_chunks.append(prev_chunk + temp_chunk)
                    prev_chunk = ''

    # Append any remaining chunk
    if prev_chunk:
        final_chunks.append(prev_chunk)

    # Remove chunks that consist only of a dot character
    final_chunks = [chunk for chunk in final_chunks if chunk.strip() != '.']

    return final_chunks


def tweet_thread(tweet_texts, media_path, client):
    total_tweets = len(tweet_texts)

    client_v1 = get_twitter_conn_v1(consumer_key, consumer_secret, access_token, access_token_secret)

    media = client_v1.media_upload(filename=media_path)
    media_id = media.media_id

    # Tweet the first part of the thread
    first_tweet = client.create_tweet(text=f"{tweet_texts[0].strip()} (1/{total_tweets})", media_ids=[media_id])
    # Retrieve the ID of the first tweet
    first_tweet_id = first_tweet.data['id']

    # Tweet the subsequent parts of the thread
    reply_to_id = first_tweet_id
    for i in range(1, total_tweets):
        time.sleep(1)  # Add a 1-second delay
        tweet_text = f"{tweet_texts[i].strip()} ({i + 1}/{total_tweets})"
        if len(tweet_text) > 280:
            tweet_text = tweet_texts[i]  # If adding numbering exceeds limit, remove numbering

        if tweet_text.startswith('.'):
            tweet_text = tweet_text[1:]  # Remove the leading dot
        if len(tweet_text) > 280:
            tweet_text = tweet_texts[i]  # If adding numbering exceeds limit, remove numbering
        # Reply to the last created tweet
        reply = client.create_tweet(text=tweet_text.strip(), in_reply_to_tweet_id=reply_to_id)
        # Update the ID for subsequent replies
        reply_to_id = reply.data['id']  # Reply to the latest tweet


def parse_week(page_title):
    # Set up Pywikibot
    site = pywikibot.Site("ku", "wikipedia")

    # Get the page object
    page = pywikibot.Page(site, page_title)

    if not page.exists():
        if VERBOSE:
            print("Gotara hefteyê tine loma hat betalkirin")
        return

    text = page.text

    # Parse the page content using mwparserfromhell
    parsed_text = mwparserfromhell.parse(text)

    # Find the template named "ceribandin"
    for template in parsed_text.filter_templates(matches="GH/format"):
        # Get parameter values
        wene = template.get("wêne").value.strip()
        gotar = template.get("gotar").value.strip()
        wenesaz = None
        if template.has("wênesaz"):
            print("erê template has wênesaz")
            wenesaz = template.get("wênesaz").value.strip()

        return {"wene": wene, "gotar": gotar, "wenesaz": wenesaz}

    # If "ceribandin" template is not found
    if VERBOSE:
        print("The 'GH/format' template is not found on the page.")
    return None


def get_kurte(page_title):
    # URL of the page
    url = f"https://ku.wikipedia.org/wiki/%C5%9Eablon:{page_title}"

    # Send a GET request to fetch the HTML content of the page
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, "html.parser")

        # Find the span element with class "kurteya-gotare"
        span_element = soup.find("div", class_="kurteya-gotare")

        # Check if the span element is found
        if span_element:
            # Get the content of the span element
            content = span_element.text.replace('\xa0(zêdetir...)', '')
            if VERBOSE:
                print("Content of span with class 'kurteya-gotare':", content)
            return content
        else:
            if VERBOSE:
                print("Span element with class 'kurteya-gotare' not found.")
    else:
        if VERBOSE:
            print("Failed to retrieve the webpage. Status code:", response.status_code)

def main():
    client = tweepy.Client(bearer_token, consumer_key, consumer_secret, access_token, access_token_secret)

    # Get the current date
    current_date = datetime.date.today()
    current_year = current_date.year
    current_week_number = current_date.isocalendar()[1]

    template_title = f"GH/{current_year}/{current_week_number}"
    #template_title = "GH/2024/49"
    if VERBOSE:
        print("template_title:", template_title)

    # Example usage:
    result = parse_week(f"Şablon:{template_title}")
    kurteya_gotare = get_kurte(template_title)
    if result and kurteya_gotare:
        shorturl = shorten_url(result["gotar"])
        license_info, file_artist = get_file_info(result["wene"])

        if result.get("wenesaz"):
            artist = result["wenesaz"]
        else:
            artist = file_artist

        license_text = ""
        if artist or license_info:
            license_text = f"📷 "
            if artist:
                license_text += f"{artist}".replace('\n', ' ')
                if license_info:
                    license_text += f", "
            if license_info:
                license_text += f"{license_info}"
            license_text += "\n"

        if VERBOSE:
            print("Wêne:", result["wene"])
            print("Gotar:", result["gotar"])
            print(shorturl)
            print(license_info)
            print(artist)
            print(license_text)

        tweet_text = (f"Gotara hefteyê ya Wîkîpediyayê\n\n"
                      f"{result['gotar']} ⬇️ \n"
                      f"{license_text}"
                      f"{kurteya_gotare}\n"
                      f"Zêdetir hîn bibe ➡️ {shorturl}")

        chunks = split_tweet_text(tweet_text)
        if VERBOSE:
            print(chunks)

        media_path = get_image(result["wene"])

        # Tweet the thread
        tweet_thread(chunks, media_path, client)
        # Delete the downloaded image file
        if media_path:
            try:
                os.remove(media_path)
                if VERBOSE:
                    print("Image file deleted successfully:", media_path)
            except Exception as e:
                if VERBOSE:
                    print("Error deleting image file:", e)


if __name__ == "__main__":
    main()