refactor: Restructure data files into component-specific and common directories, add new UI components, and update project documentation.

2026-01-07 02:06:43 -05:00
parent 97d2b66f02
commit 5366865b4b
28 changed files with 1894 additions and 2051 deletions
--- a/scripts/pycache/vendor_update.cpython-311.pyc
+++ b/scripts/pycache/vendor_update.cpython-311.pyc
--- a/scripts/generate_manifest_from_site.py
+++ b/scripts/generate_manifest_from_site.py
@@ -1,327 +0,0 @@
-#!/usr/bin/env python3
-"""
-Generate vendor manifest from site component JSON files.
-
-Scans /src/data/components/*.json for printedParts entries with GitHub URLs
-and creates or updates manifest/vendor_manifest.json.
-"""
-
-import argparse
-import json
-import os
-import re
-import sys
-from pathlib import Path
-from typing import Dict, List, Optional, Any
-from urllib.parse import urlparse, parse_qs, unquote
-
-
-def parse_github_url(url: str) -> Optional[Dict[str, str]]:
-    """
-    Parse GitHub URL to extract owner, repo, path, and ref.
-    
-    Supports:
-    - https://github.com/owner/repo/blob/<ref>/path/to/file
-    - https://github.com/owner/repo/raw/<ref>/path/to/file
-    - https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
-    """
-    if not url or not isinstance(url, str):
-        return None
-    
-    # Check if it's a GitHub URL
-    if 'github.com' not in url:
-        return None
-    
-    # Handle raw.githubusercontent.com
-    if 'raw.githubusercontent.com' in url:
-        match = re.match(r'https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)', url)
-        if match:
-            owner, repo, ref, path = match.groups()
-            return {
-                'owner': owner,
-                'repo': repo,
-                'ref': ref,
-                'path': unquote(path).split('?')[0]  # Remove query params
-            }
-    
-    # Handle github.com URLs
-    parsed = urlparse(url)
-    path_parts = parsed.path.strip('/').split('/')
-    
-    if len(path_parts) < 5:
-        return None
-    
-    owner = path_parts[0]
-    repo = path_parts[1]
-    mode = path_parts[2]  # 'blob' or 'raw'
-    ref = path_parts[3]
-    
-    # Get file path (everything after ref)
-    file_path = '/'.join(path_parts[4:])
-    
-    # Remove query params from path
-    file_path = unquote(file_path).split('?')[0]
-    
-    # Handle ?raw=true in query params (sometimes used with blob URLs)
-    query_params = parse_qs(parsed.query)
-    if 'raw' in query_params or mode == 'raw':
-        return {
-            'owner': owner,
-            'repo': repo,
-            'ref': ref,
-            'path': file_path
-        }
-    
-    return None
-
-
-def find_printed_parts(data: Any, path: str = '') -> List[Dict[str, Any]]:
-    """
-    Recursively find all printedParts entries in nested JSON structure.
-    Returns list of (part_dict, json_file_path, part_id) tuples.
-    """
-    parts = []
-    
-    if isinstance(data, dict):
-        # Check if this dict has a 'printedParts' key
-        if 'printedParts' in data:
-            for part in data['printedParts']:
-                if isinstance(part, dict) and 'id' in part:
-                    parts.append({
-                        'part': part,
-                        'json_path': path,
-                        'part_id': part.get('id')
-                    })
-        
-        # Also check for 'bodyParts', 'knobs', etc. that might contain parts
-        for key in ['bodyParts', 'knobs']:
-            if key in data and isinstance(data[key], list):
-                for part in data[key]:
-                    if isinstance(part, dict) and 'id' in part:
-                        parts.append({
-                            'part': part,
-                            'json_path': path,
-                            'part_id': part.get('id')
-                        })
-        
-        # Recursively search nested structures
-        for key, value in data.items():
-            if isinstance(value, (dict, list)):
-                parts.extend(find_printed_parts(value, path))
-    
-    elif isinstance(data, list):
-        for item in data:
-            parts.extend(find_printed_parts(item, path))
-    
-    return parts
-
-
-def generate_manifest_id(part_id: str, owner: str, repo: str, path: str) -> str:
-    """Generate a manifest ID from part ID or create one from repo/path."""
-    if part_id:
-        return part_id
-    
-    # Generate slug from owner-repo-path
-    slug = f"{owner}-{repo}-{path.replace('/', '-').replace(' ', '-')}"
-    # Remove special chars
-    slug = re.sub(r'[^a-zA-Z0-9_-]', '', slug)
-    return slug[:100]  # Limit length
-
-
-def generate_local_path(owner: str, repo: str, path: str) -> str:
-    """Generate local vendor path from owner, repo, and file path."""
-    repo_dir = f"{owner}-{repo}"
-    return f"vendor/{repo_dir}/{path}"
-
-
-def load_existing_manifest(manifest_path: Path) -> Dict[str, Dict]:
-    """Load existing manifest or return empty dict."""
-    if manifest_path.exists():
-        try:
-            with open(manifest_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-                # Convert list to dict keyed by id
-                if isinstance(data, list):
-                    return {entry['id']: entry for entry in data}
-                elif isinstance(data, dict) and 'entries' in data:
-                    return {entry['id']: entry for entry in data['entries']}
-                elif isinstance(data, dict):
-                    # Assume it's already keyed by id
-                    return data
-        except (json.JSONDecodeError, KeyError) as e:
-            print(f"Warning: Could not parse existing manifest: {e}", file=sys.stderr)
-    
-    return {}
-
-
-def scan_component_files(site_dir: Path, repo_root: Path) -> List[Dict[str, Any]]:
-    """Scan all component JSON files and extract printedParts with GitHub URLs."""
-    entries = []
-    
-    if not site_dir.exists():
-        print(f"Error: Site directory does not exist: {site_dir}", file=sys.stderr)
-        return entries
-    
-    for json_file in site_dir.glob('*.json'):
-        try:
-            with open(json_file, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-            
-            parts = find_printed_parts(data, str(json_file))
-            
-            for item in parts:
-                part = item['part']
-                url = part.get('url')
-                
-                if not url:
-                    continue
-                
-                github_info = parse_github_url(url)
-                if not github_info:
-                    print(f"Warning: Skipping non-GitHub URL in {json_file}: {url}", file=sys.stderr)
-                    continue
-                
-                part_id = item['part_id']
-                manifest_id = generate_manifest_id(
-                    part_id,
-                    github_info['owner'],
-                    github_info['repo'],
-                    github_info['path']
-                )
-                
-                local_path = generate_local_path(
-                    github_info['owner'],
-                    github_info['repo'],
-                    github_info['path']
-                )
-                
-                # Store relative path from repo root
-                try:
-                    json_file_rel = json_file.relative_to(repo_root)
-                except ValueError:
-                    # If not relative, use absolute path
-                    json_file_rel = json_file
-                
-                entries.append({
-                    'manifest_id': manifest_id,
-                    'part_id': part_id,
-                    'part': part,
-                    'json_file': str(json_file_rel),
-                    'github_info': github_info,
-                    'local_path': local_path
-                })
-        
-        except (json.JSONDecodeError, IOError) as e:
-            print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
-            continue
-    
-    return entries
-
-
-def create_or_update_manifest_entry(
-    existing_entry: Optional[Dict],
-    new_data: Dict[str, Any]
-) -> Dict[str, Any]:
-    """Create new manifest entry or merge with existing."""
-    github_info = new_data['github_info']
-    manifest_id = new_data['manifest_id']
-    
-    if existing_entry:
-        # Merge: keep existing pinned data, update source info if changed
-        entry = existing_entry.copy()
-        entry['source_repo'] = f"{github_info['owner']}/{github_info['repo']}"
-        entry['source_path'] = github_info['path']
-        entry['source_ref'] = github_info.get('ref', 'main')
-        entry['local_path'] = new_data['local_path']
-        entry['orig_site_json'] = new_data['json_file']
-        entry['orig_item_id'] = new_data['part_id']
-        # Don't overwrite pinned_sha, checksum, etc. if they exist
-        return entry
-    
-    # Create new entry
-    return {
-        'id': manifest_id,
-        'source_repo': f"{github_info['owner']}/{github_info['repo']}",
-        'source_path': github_info['path'],
-        'source_ref': github_info.get('ref', 'main'),
-        'pinned_sha': None,
-        'pinned_raw_url': None,
-        'local_path': new_data['local_path'],
-        'checksum_sha256': None,
-        'last_checked': None,
-        'upstream_latest_sha': None,
-        'status': 'unknown',
-        'license': None,
-        'orig_site_json': new_data['json_file'],
-        'orig_item_id': new_data['part_id']
-    }
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description='Generate vendor manifest from site component JSON files'
-    )
-    parser.add_argument(
-        '--site-dir',
-        type=Path,
-        default=Path('website/src/data/components'),
-        help='Directory containing component JSON files (default: website/src/data/components)'
-    )
-    parser.add_argument(
-        '--manifest',
-        type=Path,
-        default=Path('manifest/vendor_manifest.json'),
-        help='Path to manifest file (default: manifest/vendor_manifest.json)'
-    )
-    
-    args = parser.parse_args()
-    
-    # Resolve paths relative to script location or current directory
-    script_dir = Path(__file__).parent.parent
-    site_dir = (script_dir / args.site_dir).resolve()
-    manifest_path = (script_dir / args.manifest).resolve()
-    
-    # Ensure manifest directory exists
-    manifest_path.parent.mkdir(parents=True, exist_ok=True)
-    
-    # Load existing manifest
-    existing_manifest = load_existing_manifest(manifest_path)
-    
-    # Scan component files
-    print(f"Scanning component files in {site_dir}...")
-    entries = scan_component_files(site_dir, repo_root=script_dir)
-    
-    if not entries:
-        print("No GitHub URLs found in component files.", file=sys.stderr)
-        sys.exit(1)
-    
-    # Create or update manifest entries
-    updated_manifest = existing_manifest.copy()
-    
-    for entry_data in entries:
-        manifest_id = entry_data['manifest_id']
-        existing_entry = updated_manifest.get(manifest_id)
-        
-        new_entry = create_or_update_manifest_entry(existing_entry, entry_data)
-        updated_manifest[manifest_id] = new_entry
-    
-    # Convert to sorted list for deterministic output
-    manifest_list = sorted(updated_manifest.values(), key=lambda x: x['id'])
-    
-    # Write manifest
-    print(f"Writing manifest to {manifest_path}...")
-    with open(manifest_path, 'w', encoding='utf-8') as f:
-        json.dump(manifest_list, f, indent=2, sort_keys=False)
-    
-    print(f"Generated {len(manifest_list)} manifest entries.")
-    
-    # Show summary
-    new_entries = len(manifest_list) - len(existing_manifest)
-    if new_entries > 0:
-        print(f"Added {new_entries} new entries.")
-    if len(existing_manifest) > 0:
-        print(f"Updated {len(existing_manifest)} existing entries.")
-
-
-if __name__ == '__main__':
-    main()
--- a/scripts/vendor_update.py
+++ b/scripts/vendor_update.py
@@ -2,8 +2,8 @@
 """
 Download and pin external asset files from GitHub.

-Downloads files specified in manifest, pins them to commit SHAs,
-computes checksums, and optionally syncs vendor metadata back to site JSON files.
+Automatically scans website/src/data/components for parts with GitHub URLs,
+updates the manifest, and then downloads/pins files.
 """

 import argparse
@@ -14,8 +14,8 @@ import sys
 import time
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Dict, List, Optional
-from urllib.parse import urlparse
+from typing import Dict, List, Optional, Tuple, Generator, Any
+from urllib.parse import urlparse, unquote, parse_qs

 import requests

@@ -226,6 +226,182 @@ def download_file(url: str, dest_path: Path) -> bool:
        return False


+def parse_github_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    """
+    Parse GitHub URL to return (owner, repo, ref, path).
+    Supports:
+    - https://github.com/owner/repo/blob/<ref>/path/to/file
+    - https://github.com/owner/repo/raw/<ref>/path/to/file
+    - https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
+    """
+    if not url or not isinstance(url, str):
+        return None, None, None, None
+    
+    # Check if it's a GitHub URL
+    if 'github.com' not in url:
+        return None, None, None, None
+    
+    try:
+        # Handle raw.githubusercontent.com
+        if 'raw.githubusercontent.com' in url:
+            match_parts = url.split('/')
+            # https://raw.githubusercontent.com/OWNER/REPO/REF/PATH...
+            # parts: [https:, , raw.githubusercontent.com, OWNER, REPO, REF, PATH...]
+            if len(match_parts) >= 6:
+                owner = match_parts[3]
+                repo = match_parts[4]
+                ref = match_parts[5]
+                path = '/'.join(match_parts[6:]).split('?')[0]
+                return owner, repo, ref, unquote(path)
+        
+        # Handle github.com and action.github.com
+        parsed = urlparse(url)
+        path = parsed.path.strip('/')
+        path_parts = path.split('/')
+        
+        if len(path_parts) >= 4:
+            owner = path_parts[0]
+            repo = path_parts[1]
+            mode = path_parts[2] # 'blob' or 'raw'
+            
+            if mode in ('blob', 'raw'):
+                ref = path_parts[3]
+                file_path = '/'.join(path_parts[4:])
+                
+                # Check query params for ?raw=true
+                query_params = parse_qs(parsed.query)
+                if 'raw' in query_params or mode == 'raw':
+                    return owner, repo, ref, unquote(file_path)
+                
+                # Also treat 'blob' as a valid source if we just want the path
+                return owner, repo, ref, unquote(file_path)
+
+    except Exception:
+        pass
+
+    return None, None, None, None
+
+
+def scan_site_components(components_dir: Path) -> Generator[Dict[str, Any], None, None]:
+    """Recursively scan JSON files for parts with GitHub URLs."""
+    for json_file in components_dir.rglob('*.json'):
+        try:
+            with open(json_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            
+            # Helper to find parts
+            queue = [data]
+            while queue:
+                item = queue.pop(0)
+                if isinstance(item, dict):
+                    # Check if this item is a part
+                    if 'id' in item and 'url' in item and item['url']:
+                        owner, repo, ref, source_path = parse_github_url(item['url'])
+                        if owner and repo and source_path:
+                            yield {
+                                'id': item['id'],
+                                'url': item['url'],
+                                'owner': owner,
+                                'repo': repo,
+                                'ref': ref or 'main',
+                                'source_path': source_path,
+                                'orig_site_json': json_file
+                            }
+                    
+                    # Add children to queue
+                    queue.extend(item.values())
+                elif isinstance(item, list):
+                    queue.extend(item)
+                    
+        except (json.JSONDecodeError, IOError) as e:
+            print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
+
+
+def regenerate_manifest(manifest_path: Path, repo_root: Path) -> Tuple[List[Dict], int]:
+    """
+    Regenerate manifest from site data.
+    Preserves state of existing entries.
+    Returns (new_manifest_list, changes_count).
+    """
+    print("Scanning website components to regenerate manifest...")
+    
+    # Load existing manifest to preserve state
+    old_manifest = {}
+    if manifest_path.exists():
+        with open(manifest_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            if isinstance(data, list):
+                old_manifest = {entry['id']: entry for entry in data}
+    
+    new_manifest = {}
+    components_dir = repo_root / 'website/src/data/components'
+    changes_count = 0
+    
+    if not components_dir.exists():
+        print(f"Warning: Components directory not found: {components_dir}", file=sys.stderr)
+        return list(old_manifest.values()), 0
+
+    for part in scan_site_components(components_dir):
+        part_id = part['id']
+        old_entry = old_manifest.get(part_id)
+        
+        # Calculate local path
+        # vendor/{owner}-{repo}/{path}
+        local_path = f"vendor/{part['owner']}-{part['repo']}/{part['source_path']}"
+        source_repo = f"{part['owner']}/{part['repo']}"
+        orig_site_json = str(part['orig_site_json'].relative_to(repo_root))
+        
+        entry = {
+            'id': part_id,
+            'source_repo': source_repo,
+            'source_path': part['source_path'],
+            'source_ref': part['ref'],
+            'local_path': local_path,
+            'orig_site_json': orig_site_json,
+            'orig_item_id': part_id
+        }
+        
+        # Preserve state if exists and config matches
+        if old_entry:
+            # Check if source config changed
+            config_changed = (
+                old_entry.get('source_repo') != source_repo or
+                old_entry.get('source_path') != part['source_path'] or
+                old_entry.get('source_ref') != part['ref']
+            )
+            
+            if not config_changed:
+                # Copy state
+                for key in ['pinned_sha', 'pinned_raw_url', 'checksum_sha256', 'last_checked', 'status', 'license', 'upstream_latest_sha']:
+                    if key in old_entry:
+                        entry[key] = old_entry[key]
+            else:
+                print(f"  Config changed for {part_id}, resetting status.")
+                entry['status'] = 'pending'
+                entry['pinned_sha'] = None
+                changes_count += 1
+                
+            # Check if we updated manifest info (like orig_site_json moved)
+            if (old_entry.get('orig_site_json') != orig_site_json or 
+                old_entry.get('local_path') != local_path):
+                 changes_count += 1
+        else:
+            print(f"  New part found: {part_id}")
+            entry['status'] = 'pending'
+            entry['pinned_sha'] = None
+            changes_count += 1
+        
+        new_manifest[part_id] = entry
+    
+    # Check for removed items
+    removed_count = len(old_manifest) - len(new_manifest)
+    if removed_count > 0:
+        print(f"  Removed {removed_count} parts that are no longer in site JSONs.")
+        changes_count += removed_count
+        
+    return sorted(new_manifest.values(), key=lambda x: x['id']), changes_count
+
+
 def update_manifest_entry(
    entry: Dict,
    api: GitHubAPI,
@@ -254,6 +430,31 @@ def update_manifest_entry(
    local_path = Path(entry['local_path'])
    if not local_path.is_absolute():
        local_path = repo_root / local_path
+
+    # Check if file exists and is already at the correct version
+    current_pinned_sha = entry.get('pinned_sha')
+    if current_pinned_sha == commit_sha and local_path.exists():
+        if dry_run:
+             print(f"  [DRY RUN] File up to date ({commit_sha}), would skip download.")
+        else:
+             print(f"  File up to date ({commit_sha}), skipping download.")
+             # Ensure checksum is present
+             if 'checksum_sha256' not in entry or not entry['checksum_sha256']:
+                 entry['checksum_sha256'] = compute_sha256(local_path)
+
+        entry['pinned_sha'] = commit_sha
+        entry['pinned_raw_url'] = pinned_raw_url
+        entry['last_checked'] = datetime.now(timezone.utc).isoformat()
+        entry['upstream_latest_sha'] = commit_sha
+        entry['status'] = 'up-to-date'
+        
+        # If license is missing, try to get it, otherwise keep existing
+        if 'license' not in entry and not dry_run:
+             license_info = api.get_license(owner, repo, commit_sha)
+             if license_info:
+                entry['license'] = license_info
+        
+        return entry
    
    if dry_run:
        print(f"  [DRY RUN] Would download to {local_path}")
@@ -309,45 +510,24 @@ def sync_to_site_json(entry: Dict, repo_root: Path) -> bool:
            data = json.load(f)
        
        # Find the printed part in the nested structure
-        def find_and_update_part(obj, target_id, path=''):
+        def find_and_update_part(obj, target_id):
            if isinstance(obj, dict):
-                # Check if this is a printedParts array
-                if 'printedParts' in obj and isinstance(obj['printedParts'], list):
-                    for part in obj['printedParts']:
-                        if isinstance(part, dict) and part.get('id') == target_id:
-                            # Update this part
-                            if 'vendor' not in part:
-                                part['vendor'] = {}
-                            part['vendor'].update({
-                                'manifest_id': entry['id'],
-                                'local_path': entry['local_path'],
-                                'pinned_sha': entry['pinned_sha'],
-                                'pinned_raw_url': entry['pinned_raw_url'],
-                                'checksum_sha256': entry['checksum_sha256'],
-                                'last_checked': entry['last_checked'],
-                                'status': entry['status']
-                            })
-                            return True
-                
-                # Check bodyParts, knobs, etc.
-                for key in ['bodyParts', 'knobs']:
-                    if key in obj and isinstance(obj[key], list):
-                        for part in obj[key]:
-                            if isinstance(part, dict) and part.get('id') == target_id:
-                                if 'vendor' not in part:
-                                    part['vendor'] = {}
-                                part['vendor'].update({
-                                    'manifest_id': entry['id'],
-                                    'local_path': entry['local_path'],
-                                    'pinned_sha': entry['pinned_sha'],
-                                    'pinned_raw_url': entry['pinned_raw_url'],
-                                    'checksum_sha256': entry['checksum_sha256'],
-                                    'last_checked': entry['last_checked'],
-                                    'status': entry['status']
-                                })
-                                return True
-                
-                # Recursively search
+                # If this object IS the part (has the ID)
+                if obj.get('id') == target_id:
+                     if 'vendor' not in obj:
+                         obj['vendor'] = {}
+                     obj['vendor'].update({
+                         'manifest_id': entry['id'],
+                         'local_path': entry['local_path'],
+                         'pinned_sha': entry['pinned_sha'],
+                         'pinned_raw_url': entry['pinned_raw_url'],
+                         'checksum_sha256': entry['checksum_sha256'],
+                         'last_checked': entry['last_checked'],
+                         'status': entry['status']
+                     })
+                     return True
+
+                # Recursively search values
                for value in obj.values():
                    if find_and_update_part(value, target_id):
                        return True
@@ -396,9 +576,9 @@ def main():
        help='Show what would be done without downloading files'
    )
    parser.add_argument(
-        '--sync-site',
+        '--no-sync',
        action='store_true',
-        help='Sync vendor metadata back to site JSON files'
+        help='Skip syncing vendor metadata back to site JSON files'
    )
    parser.add_argument(
        '--delay',
@@ -406,6 +586,16 @@ def main():
        default=0.5,
        help='Delay between API requests in seconds (default: 0.5)'
    )
+    parser.add_argument(
+        '--no-scan',
+        action='store_true',
+        help='Skip scanning website for new components'
+    )
+    parser.add_argument(
+        '--scan-only',
+        action='store_true',
+        help='Only scan website and update manifest, do not check/download files'
+    )
    
    args = parser.parse_args()
    
@@ -414,13 +604,30 @@ def main():
    manifest_path = (script_dir / args.manifest).resolve()
    repo_root = script_dir
    
-    if not manifest_path.exists():
-        print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
-        sys.exit(1)
-    
-    # Load manifest
-    with open(manifest_path, 'r', encoding='utf-8') as f:
-        manifest_data = json.load(f)
+    # Regenerate manifest from website scan (unless disabled)
+    if not args.no_scan and not args.entry:
+        manifest_list, changes = regenerate_manifest(manifest_path, repo_root)
+        if changes > 0:
+            print(f"Manifest regenerated with {changes} changes.")
+            if not args.dry_run:
+                manifest_path.parent.mkdir(parents=True, exist_ok=True)
+                with open(manifest_path, 'w', encoding='utf-8') as f:
+                    json.dump(manifest_list, f, indent=2, sort_keys=False)
+        else:
+            print("No changes in manifest structure detected.")
+        
+        if args.scan_only:
+            return
+
+        # Reload manifest data for processing
+        manifest_data = manifest_list
+    else:
+        if not manifest_path.exists():
+            print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
+            sys.exit(1)
+        
+        with open(manifest_path, 'r', encoding='utf-8') as f:
+            manifest_data = json.load(f)
    
    # Convert to dict if it's a list
    if isinstance(manifest_data, list):
@@ -446,7 +653,7 @@ def main():
        updated_entry = update_manifest_entry(entry, api, repo_root, dry_run=args.dry_run)
        manifest[entry_id] = updated_entry
        
-        if args.sync_site and not args.dry_run:
+        if not args.no_sync and not args.dry_run:
            sync_to_site_json(updated_entry, repo_root)
        
        updated_count += 1