#!/usr/bin/env python3 """ Generate vendor manifest from site component JSON files. Scans /src/data/components/*.json for printedParts entries with GitHub URLs and creates or updates manifest/vendor_manifest.json. """ import argparse import json import os import re import sys from pathlib import Path from typing import Dict, List, Optional, Any from urllib.parse import urlparse, parse_qs, unquote def parse_github_url(url: str) -> Optional[Dict[str, str]]: """ Parse GitHub URL to extract owner, repo, path, and ref. Supports: - https://github.com/owner/repo/blob//path/to/file - https://github.com/owner/repo/raw//path/to/file - https://raw.githubusercontent.com/owner/repo//path/to/file """ if not url or not isinstance(url, str): return None # Check if it's a GitHub URL if 'github.com' not in url: return None # Handle raw.githubusercontent.com if 'raw.githubusercontent.com' in url: match = re.match(r'https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)', url) if match: owner, repo, ref, path = match.groups() return { 'owner': owner, 'repo': repo, 'ref': ref, 'path': unquote(path).split('?')[0] # Remove query params } # Handle github.com URLs parsed = urlparse(url) path_parts = parsed.path.strip('/').split('/') if len(path_parts) < 5: return None owner = path_parts[0] repo = path_parts[1] mode = path_parts[2] # 'blob' or 'raw' ref = path_parts[3] # Get file path (everything after ref) file_path = '/'.join(path_parts[4:]) # Remove query params from path file_path = unquote(file_path).split('?')[0] # Handle ?raw=true in query params (sometimes used with blob URLs) query_params = parse_qs(parsed.query) if 'raw' in query_params or mode == 'raw': return { 'owner': owner, 'repo': repo, 'ref': ref, 'path': file_path } return None def find_printed_parts(data: Any, path: str = '') -> List[Dict[str, Any]]: """ Recursively find all printedParts entries in nested JSON structure. Returns list of (part_dict, json_file_path, part_id) tuples. """ parts = [] if isinstance(data, dict): # Check if this dict has a 'printedParts' key if 'printedParts' in data: for part in data['printedParts']: if isinstance(part, dict) and 'id' in part: parts.append({ 'part': part, 'json_path': path, 'part_id': part.get('id') }) # Also check for 'bodyParts', 'knobs', etc. that might contain parts for key in ['bodyParts', 'knobs']: if key in data and isinstance(data[key], list): for part in data[key]: if isinstance(part, dict) and 'id' in part: parts.append({ 'part': part, 'json_path': path, 'part_id': part.get('id') }) # Recursively search nested structures for key, value in data.items(): if isinstance(value, (dict, list)): parts.extend(find_printed_parts(value, path)) elif isinstance(data, list): for item in data: parts.extend(find_printed_parts(item, path)) return parts def generate_manifest_id(part_id: str, owner: str, repo: str, path: str) -> str: """Generate a manifest ID from part ID or create one from repo/path.""" if part_id: return part_id # Generate slug from owner-repo-path slug = f"{owner}-{repo}-{path.replace('/', '-').replace(' ', '-')}" # Remove special chars slug = re.sub(r'[^a-zA-Z0-9_-]', '', slug) return slug[:100] # Limit length def generate_local_path(owner: str, repo: str, path: str) -> str: """Generate local vendor path from owner, repo, and file path.""" repo_dir = f"{owner}-{repo}" return f"vendor/{repo_dir}/{path}" def load_existing_manifest(manifest_path: Path) -> Dict[str, Dict]: """Load existing manifest or return empty dict.""" if manifest_path.exists(): try: with open(manifest_path, 'r', encoding='utf-8') as f: data = json.load(f) # Convert list to dict keyed by id if isinstance(data, list): return {entry['id']: entry for entry in data} elif isinstance(data, dict) and 'entries' in data: return {entry['id']: entry for entry in data['entries']} elif isinstance(data, dict): # Assume it's already keyed by id return data except (json.JSONDecodeError, KeyError) as e: print(f"Warning: Could not parse existing manifest: {e}", file=sys.stderr) return {} def scan_component_files(site_dir: Path, repo_root: Path) -> List[Dict[str, Any]]: """Scan all component JSON files and extract printedParts with GitHub URLs.""" entries = [] if not site_dir.exists(): print(f"Error: Site directory does not exist: {site_dir}", file=sys.stderr) return entries for json_file in site_dir.glob('*.json'): try: with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) parts = find_printed_parts(data, str(json_file)) for item in parts: part = item['part'] url = part.get('url') if not url: continue github_info = parse_github_url(url) if not github_info: print(f"Warning: Skipping non-GitHub URL in {json_file}: {url}", file=sys.stderr) continue part_id = item['part_id'] manifest_id = generate_manifest_id( part_id, github_info['owner'], github_info['repo'], github_info['path'] ) local_path = generate_local_path( github_info['owner'], github_info['repo'], github_info['path'] ) # Store relative path from repo root try: json_file_rel = json_file.relative_to(repo_root) except ValueError: # If not relative, use absolute path json_file_rel = json_file entries.append({ 'manifest_id': manifest_id, 'part_id': part_id, 'part': part, 'json_file': str(json_file_rel), 'github_info': github_info, 'local_path': local_path }) except (json.JSONDecodeError, IOError) as e: print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr) continue return entries def create_or_update_manifest_entry( existing_entry: Optional[Dict], new_data: Dict[str, Any] ) -> Dict[str, Any]: """Create new manifest entry or merge with existing.""" github_info = new_data['github_info'] manifest_id = new_data['manifest_id'] if existing_entry: # Merge: keep existing pinned data, update source info if changed entry = existing_entry.copy() entry['source_repo'] = f"{github_info['owner']}/{github_info['repo']}" entry['source_path'] = github_info['path'] entry['source_ref'] = github_info.get('ref', 'main') entry['local_path'] = new_data['local_path'] entry['orig_site_json'] = new_data['json_file'] entry['orig_item_id'] = new_data['part_id'] # Don't overwrite pinned_sha, checksum, etc. if they exist return entry # Create new entry return { 'id': manifest_id, 'source_repo': f"{github_info['owner']}/{github_info['repo']}", 'source_path': github_info['path'], 'source_ref': github_info.get('ref', 'main'), 'pinned_sha': None, 'pinned_raw_url': None, 'local_path': new_data['local_path'], 'checksum_sha256': None, 'last_checked': None, 'upstream_latest_sha': None, 'status': 'unknown', 'license': None, 'orig_site_json': new_data['json_file'], 'orig_item_id': new_data['part_id'] } def main(): parser = argparse.ArgumentParser( description='Generate vendor manifest from site component JSON files' ) parser.add_argument( '--site-dir', type=Path, default=Path('website/src/data/components'), help='Directory containing component JSON files (default: website/src/data/components)' ) parser.add_argument( '--manifest', type=Path, default=Path('manifest/vendor_manifest.json'), help='Path to manifest file (default: manifest/vendor_manifest.json)' ) args = parser.parse_args() # Resolve paths relative to script location or current directory script_dir = Path(__file__).parent.parent site_dir = (script_dir / args.site_dir).resolve() manifest_path = (script_dir / args.manifest).resolve() # Ensure manifest directory exists manifest_path.parent.mkdir(parents=True, exist_ok=True) # Load existing manifest existing_manifest = load_existing_manifest(manifest_path) # Scan component files print(f"Scanning component files in {site_dir}...") entries = scan_component_files(site_dir, repo_root=script_dir) if not entries: print("No GitHub URLs found in component files.", file=sys.stderr) sys.exit(1) # Create or update manifest entries updated_manifest = existing_manifest.copy() for entry_data in entries: manifest_id = entry_data['manifest_id'] existing_entry = updated_manifest.get(manifest_id) new_entry = create_or_update_manifest_entry(existing_entry, entry_data) updated_manifest[manifest_id] = new_entry # Convert to sorted list for deterministic output manifest_list = sorted(updated_manifest.values(), key=lambda x: x['id']) # Write manifest print(f"Writing manifest to {manifest_path}...") with open(manifest_path, 'w', encoding='utf-8') as f: json.dump(manifest_list, f, indent=2, sort_keys=False) print(f"Generated {len(manifest_list)} manifest entries.") # Show summary new_entries = len(manifest_list) - len(existing_manifest) if new_entries > 0: print(f"Added {new_entries} new entries.") if len(existing_manifest) > 0: print(f"Updated {len(existing_manifest)} existing entries.") if __name__ == '__main__': main()