import os
import requests
import time
import sys

# --- Configuration ---
# Your premium VirusTotal API Key. It's recommended to set this as an environment variable
# for better security.
VT_API_KEY = os.getenv('VT_API_KEY')

# Search criteria
MIN_POSITIVE_DETECTIONS = 25
TIME_WINDOW_HOURS = 24
MAX_FILES_TO_DOWNLOAD = 25

# Directory to save downloaded files
DOWNLOAD_DIR = 'malware_samples'

# VirusTotal API endpoints
VT_SEARCH_URL = 'https://www.virustotal.com/api/v3/intelligence/search'
VT_DOWNLOAD_URL_TEMPLATE = 'https://www.virustotal.com/api/v3/files/{}/download'

def search_for_files(api_key):
    """
    Searches VirusTotal for files matching the defined criteria.
    """
    print(f"[*] Searching for files with {MIN_POSITIVE_DETECTIONS}+ detections submitted in the last {TIME_WINDOW_HOURS} hours...")
    
    # Calculate the Unix timestamp for the start of the time window.
    # This is a more reliable method than using relative time strings like '24h+'.
    start_timestamp = int(time.time()) - (TIME_WINDOW_HOURS * 60 * 60)
    
    # The VirusTotal search query uses 'ls' (last submission) with a timestamp.
    query = f"positives:{MIN_POSITIVE_DETECTIONS}+ ls:{start_timestamp}+"
    
    headers = {
        'x-apikey': api_key
    }
    params = {
        'query': query,
        'limit': MAX_FILES_TO_DOWNLOAD, # We only need 25 results
        'order': 'last_submission_date-' # Get the most recent files first
    }
    
    try:
        response = requests.get(VT_SEARCH_URL, headers=headers, params=params)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4xx or 5xx)
        
        results = response.json().get('data', [])
        if not results:
            print("[!] No files found matching your criteria.")
            return []
            
        print(f"[+] Found {len(results)} files.")
        return results

    except requests.exceptions.HTTPError as e:
        print(f"[!] HTTP Error: {e.response.status_code} - {e.response.text}")
        return []
    except requests.exceptions.RequestException as e:
        print(f"[!] An error occurred during the search request: {e}")
        return []

def download_file(api_key, file_info):
    """
    Downloads a single file from VirusTotal given its info object.
    """
    file_hash = file_info.get('attributes', {}).get('sha256')
    if not file_hash:
        print("[!] Could not find SHA256 hash for a file. Skipping.")
        return False

    # Try to get a meaningful name, otherwise use the hash
    file_names = file_info.get('attributes', {}).get('names', [])
    file_name = file_names[0] if file_names else file_hash
    
    # Sanitize filename to prevent directory traversal or invalid characters
    safe_file_name = "".join(c for c in file_name if c.isalnum() or c in (' ', '.', '_')).rstrip()
    download_path = os.path.join(DOWNLOAD_DIR, safe_file_name)

    if os.path.exists(download_path):
        print(f"[*] File '{safe_file_name}' already exists. Skipping.")
        return True

    print(f"[*] Downloading '{safe_file_name}' (hash: {file_hash[:10]}...)...")
    
    headers = {
        'x-apikey': api_key
    }
    download_url = VT_DOWNLOAD_URL_TEMPLATE.format(file_hash)
    
    try:
        response = requests.get(download_url, headers=headers, stream=True)
        response.raise_for_status()
        
        with open(download_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        
        print(f"[+] Successfully saved to '{download_path}'")
        return True

    except requests.exceptions.HTTPError as e:
        print(f"[!] HTTP Error downloading {safe_file_name}: {e.response.status_code} - {e.response.text}")
        return False
    except requests.exceptions.RequestException as e:
        print(f"[!] An error occurred downloading {safe_file_name}: {e}")
        return False


def main():
    """
    Main function to run the script.
    """
    print("--- VirusTotal File Downloader ---")

    api_key = VT_API_KEY
    # This script is designed to be run in non-interactive environments.
    # It requires the API key to be set as an environment variable.
    if not api_key:
        print("\n[!] ERROR: VT_API_KEY environment variable not set.")
        print("[!] This script requires a VirusTotal API key to be configured as an environment variable.")
        print("[!] For example, on Linux/macOS: export VT_API_KEY='your_api_key_here'")
        print("[!] On Windows: set VT_API_KEY=your_api_key_here")
        sys.exit(1) # Exit with an error code

    # Create the download directory if it doesn't exist
    if not os.path.exists(DOWNLOAD_DIR):
        print(f"[*] Creating download directory: '{DOWNLOAD_DIR}'")
        os.makedirs(DOWNLOAD_DIR)

    files_to_download = search_for_files(api_key)
    
    if not files_to_download:
        return

    download_count = 0
    for file_info in files_to_download:
        if download_file(api_key, file_info):
            download_count += 1
        
        # VirusTotal API has a rate limit (typically 4 requests per minute for public API,
        # higher for premium). A small delay helps avoid hitting it.
        # Adjust if you have a higher rate limit.
        time.sleep(15) 

    print(f"\n--- Download Complete ---")
    print(f"[*] Total files downloaded: {download_count}")

if __name__ == "__main__":
    main()
