import os
import sys
import re
import requests
from urllib.parse import urlparse


def get_slug_from_url(url):
    """Parses a URL to extract only the channel slug (the last part of the path)."""
    try:
        parsed_url = urlparse(url)
        if "are.na" not in parsed_url.netloc:
            return None
        path_parts = parsed_url.path.strip("/").split("/")
        if path_parts:
            return path_parts[-1]
        return None
    except Exception:
        return None


def download_channel_files():
    """
    Guides the user through providing a token and URL, then downloads
    all files from the specified channel using direct API requests.
    """
    # --- Step 1: Get Access Token ---
    # For convenience, you can paste your token here.
    # If it's empty, the script will prompt you to enter it in the terminal.
    access_token = ""

    if not access_token:
        access_token = input("Enter your are.na personal access token: ")

    if not access_token:
        print("❌ Token cannot be empty.")
        sys.exit(1)

    # --- Step 2: Get Channel URL ---
    channel_url = input("Enter the full are.na channel URL: ")
    channel_slug = get_slug_from_url(channel_url)

    if not channel_slug:
        print("❌ Error: Could not parse a valid are.na channel slug from the URL.")
        sys.exit(1)

    # --- Step 3: Set up Requests Session ---
    headers = {
        "Authorization": f"Bearer {access_token}",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        "Accept": "application/json",
    }

    session = requests.Session()
    session.headers.update(headers)

    # --- Step 4: Fetch All Content from the Channel (with pagination) ---
    print(f"\n▶️  Fetching content for channel slug: '{channel_slug}'...")
    all_contents = []
    current_page = 1

    try:
        while True:
            api_url = f"https://api.are.na/v2/channels/{channel_slug}/contents?page={current_page}&per=100"
            response = session.get(api_url)
            response.raise_for_status()

            data = response.json()
            page_contents = data.get("contents", [])

            if not page_contents:
                break

            all_contents.extend(page_contents)
            print(f"   Fetched page {current_page}...")
            current_page += 1

        print(f"✅ Success! Found {len(all_contents)} total items.")

    except requests.exceptions.HTTPError as e:
        print("\n❌ FAILURE!")
        print(
            f"   Could not fetch channel content. Status Code: {e.response.status_code}"
        )
        print(f"   API Response: {e.response.text[:300]}")
        sys.exit(1)
    except requests.exceptions.RequestException as e:
        print(f"\n❌ A network error occurred: {e}")
        sys.exit(1)

    # --- Step 5: Download Files ---
    safe_directory_name = channel_slug
    print(f"\n▶️  Starting download to directory: '{safe_directory_name}'")
    if not os.path.exists(safe_directory_name):
        os.makedirs(safe_directory_name)
        print(f"   Created directory.")

    # Lists for the run report
    downloaded_files = []
    skipped_files = []
    failed_files = []

    attachments_to_download = [
        item
        for item in all_contents
        if item.get("class") == "Attachment" and item.get("attachment")
    ]
    total_attachments = len(attachments_to_download)

    for i, item in enumerate(attachments_to_download):
        attachment_data = item["attachment"]
        file_url = attachment_data.get("url")

        if not file_url:
            continue

        block_title = item.get("title")
        extension = attachment_data.get("extension")

        if not block_title or block_title.isspace():
            block_title = str(item.get("id", "unknown_id"))

        block_title = re.sub(r"\.pdf$", "", block_title, flags=re.IGNORECASE).strip()

        invalid_chars = '<>:"/\\|?*'
        for char in invalid_chars:
            block_title = block_title.replace(char, "_")

        filename = f"{block_title}.{extension}"
        filepath = os.path.join(safe_directory_name, filename)
        progress_counter = f"({i + 1}/{total_attachments})"

        if not os.path.exists(filepath):
            print(f"   {progress_counter} Downloading: {filename}...")
            try:
                file_response = session.get(file_url, stream=True)
                file_response.raise_for_status()

                with open(filepath, "wb") as f:
                    for chunk in file_response.iter_content(1024):
                        f.write(chunk)
                downloaded_files.append(filename)

            except requests.exceptions.RequestException as e:
                print(f"     > Download failed for {filename}: {e}")
                failed_files.append(filename)
        else:
            print(f"   {progress_counter} Skipping (already exists): {filename}")
            skipped_files.append(filename)

    # --- Step 6: Print Run Report ---
    print("\n" + "=" * 30)
    print("✅ Download complete. Run Report:")
    print("=" * 30)
    print(f"Successfully downloaded: {len(downloaded_files)} file(s)")
    print(f"Skipped (already existed): {len(skipped_files)} file(s)")
    print(f"Failed to download: {len(failed_files)} file(s)")

    if downloaded_files:
        print("\n--- Downloaded Files ---")
        for f in downloaded_files:
            print(f" - {f}")

    if skipped_files:
        print("\n--- Skipped Files ---")
        for f in skipped_files:
            print(f" - {f}")

    if failed_files:
        print("\n--- Failed Files ---")
        for f in failed_files:
            print(f" - {f}")

    print("\n" + "=" * 30)


if __name__ == "__main__":
    download_channel_files()
