#!/usr/bin/env python3
"""
Directory File Downloader with SHA256 Logging
Downloads all files from a web directory at specified intervals
"""

import requests
import hashlib
import csv
import time
import os
import sys
from datetime import datetime
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
from pathlib import Path

class DirectoryDownloader:
    def __init__(self, directory_url, interval=30, output_dir='downloads', log_file='download_log.csv'):
        self.directory_url = directory_url
        self.interval = interval
        self.output_dir = output_dir
        self.log_file = log_file
        self.files_found = []
        self.download_count = 0
        
        # Create output directory
        Path(output_dir).mkdir(parents=True, exist_ok=True)
        
        # Initialize CSV log
        self.init_log()
    
    def init_log(self):
        """Initialize CSV log file with headers"""
        with open(self.log_file, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['Timestamp', 'Filename', 'SHA256', 'Size (bytes)', 'URL', 'Status'])
    
    def log_download(self, filename, sha256, size, url, status='SUCCESS', error_msg=''):
        """Log download information to CSV"""
        timestamp = datetime.now().isoformat()
        with open(self.log_file, 'a', newline='') as f:
            writer = csv.writer(f)
            status_msg = f"ERROR: {error_msg}" if status == 'FAILED' else status
            writer.writerow([timestamp, filename, sha256, size, url, status_msg])
    
    def calculate_sha256(self, file_path):
        """Calculate SHA256 hash of a file"""
        sha256_hash = hashlib.sha256()
        with open(file_path, "rb") as f:
            for byte_block in iter(lambda: f.read(4096), b""):
                sha256_hash.update(byte_block)
        return sha256_hash.hexdigest()
    
    def scan_directory(self):
        """Scan directory URL and extract file links"""
        print(f"Scanning directory: {self.directory_url}")
        
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        try:
            response = requests.get(self.directory_url, headers=headers, timeout=30)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.text, 'html.parser')
            links = soup.find_all('a')
            
            for link in links:
                href = link.get('href')
                if href and href != '../' and not href.endswith('/') and not href.startswith('?'):
                    # Convert to absolute URL
                    file_url = urljoin(self.directory_url, href)
                    filename = os.path.basename(urlparse(file_url).path)
                    
                    if filename and not filename.startswith('.'):
                        self.files_found.append({
                            'url': file_url,
                            'filename': filename
                        })
            
            print(f"Found {len(self.files_found)} files")
            return True
            
        except Exception as e:
            print(f"Error scanning directory: {e}")
            return False
    
    def download_file(self, file_info):
        """Download a single file and calculate its SHA256"""
        filename = file_info['filename']
        url = file_info['url']
        file_path = os.path.join(self.output_dir, filename)
        
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        print(f"\n[{self.download_count + 1}/{len(self.files_found)}] Downloading: {filename}")
        
        try:
            response = requests.get(url, headers=headers, timeout=60, stream=True)
            response.raise_for_status()
            
            # Download file
            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            
            file_size = os.path.getsize(file_path)
            
            # Calculate SHA256
            print(f"Calculating SHA256 hash...")
            sha256 = self.calculate_sha256(file_path)
            
            # Log success
            self.log_download(filename, sha256, file_size, url, 'SUCCESS')
            
            print(f"✓ Success: {filename}")
            print(f"  Size: {file_size:,} bytes ({file_size/1024:.2f} KB)")
            print(f"  SHA256: {sha256}")
            
            self.download_count += 1
            return True
            
        except Exception as e:
            print(f"✗ Failed: {filename} - {e}")
            self.log_download(filename, 'DOWNLOAD_FAILED', 0, url, 'FAILED', str(e))
            return False
    
    def run(self):
        """Main execution loop"""
        print("=" * 70)
        print("Directory File Downloader")
        print("=" * 70)
        print(f"Directory URL: {self.directory_url}")
        print(f"Download interval: {self.interval} seconds")
        print(f"Output directory: {self.output_dir}")
        print(f"Log file: {self.log_file}")
        print("=" * 70)
        
        # Scan directory
        if not self.scan_directory():
            print("Failed to scan directory. Exiting.")
            return
        
        if not self.files_found:
            print("No files found in directory. Exiting.")
            return
        
        # Display files
        print("\nFiles to download:")
        for i, file_info in enumerate(self.files_found, 1):
            print(f"  {i}. {file_info['filename']}")
        
        print(f"\nStarting downloads in 3 seconds...")
        time.sleep(3)
        
        # Download files with interval
        try:
            for i, file_info in enumerate(self.files_found):
                self.download_file(file_info)
                
                # Wait before next download (except for last file)
                if i < len(self.files_found) - 1:
                    print(f"\nWaiting {self.interval} seconds before next download...")
                    time.sleep(self.interval)
            
            print("\n" + "=" * 70)
            print(f"Download complete!")
            print(f"Files downloaded: {self.download_count}/{len(self.files_found)}")
            print(f"Output directory: {self.output_dir}")
            print(f"Log file: {self.log_file}")
            print("=" * 70)
            
        except KeyboardInterrupt:
            print("\n\nDownload interrupted by user.")
            print(f"Files downloaded: {self.download_count}/{len(self.files_found)}")
            print(f"Log saved to: {self.log_file}")


def main():
    if len(sys.argv) < 2:
        print("Usage: python3 directory_downloader.py <directory_url> [interval_seconds] [output_dir]")
        print("\nExample:")
        print("  python3 directory_downloader.py http://82.165.215.146/InTheWild/InTheWild.0208/ 30 downloads")
        print("\nArguments:")
        print("  directory_url    - URL of the directory to download from")
        print("  interval_seconds - Time between downloads (default: 30)")
        print("  output_dir       - Directory to save files (default: downloads)")
        sys.exit(1)
    
    directory_url = sys.argv[1]
    interval = int(sys.argv[2]) if len(sys.argv) > 2 else 30
    output_dir = sys.argv[3] if len(sys.argv) > 3 else 'downloads'
    
    downloader = DirectoryDownloader(directory_url, interval, output_dir)
    downloader.run()


if __name__ == '__main__':
    main()
