refactor: Restructure data files into component-specific and common directories, add new UI components, and update project documentation.

This commit is contained in:
MunchDev-oss
2026-01-07 02:06:43 -05:00
parent 97d2b66f02
commit 5366865b4b
28 changed files with 1894 additions and 2051 deletions

Binary file not shown.

View File

@@ -1,327 +0,0 @@
#!/usr/bin/env python3
"""
Generate vendor manifest from site component JSON files.
Scans /src/data/components/*.json for printedParts entries with GitHub URLs
and creates or updates manifest/vendor_manifest.json.
"""
import argparse
import json
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Any
from urllib.parse import urlparse, parse_qs, unquote
def parse_github_url(url: str) -> Optional[Dict[str, str]]:
"""
Parse GitHub URL to extract owner, repo, path, and ref.
Supports:
- https://github.com/owner/repo/blob/<ref>/path/to/file
- https://github.com/owner/repo/raw/<ref>/path/to/file
- https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
"""
if not url or not isinstance(url, str):
return None
# Check if it's a GitHub URL
if 'github.com' not in url:
return None
# Handle raw.githubusercontent.com
if 'raw.githubusercontent.com' in url:
match = re.match(r'https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)', url)
if match:
owner, repo, ref, path = match.groups()
return {
'owner': owner,
'repo': repo,
'ref': ref,
'path': unquote(path).split('?')[0] # Remove query params
}
# Handle github.com URLs
parsed = urlparse(url)
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) < 5:
return None
owner = path_parts[0]
repo = path_parts[1]
mode = path_parts[2] # 'blob' or 'raw'
ref = path_parts[3]
# Get file path (everything after ref)
file_path = '/'.join(path_parts[4:])
# Remove query params from path
file_path = unquote(file_path).split('?')[0]
# Handle ?raw=true in query params (sometimes used with blob URLs)
query_params = parse_qs(parsed.query)
if 'raw' in query_params or mode == 'raw':
return {
'owner': owner,
'repo': repo,
'ref': ref,
'path': file_path
}
return None
def find_printed_parts(data: Any, path: str = '') -> List[Dict[str, Any]]:
"""
Recursively find all printedParts entries in nested JSON structure.
Returns list of (part_dict, json_file_path, part_id) tuples.
"""
parts = []
if isinstance(data, dict):
# Check if this dict has a 'printedParts' key
if 'printedParts' in data:
for part in data['printedParts']:
if isinstance(part, dict) and 'id' in part:
parts.append({
'part': part,
'json_path': path,
'part_id': part.get('id')
})
# Also check for 'bodyParts', 'knobs', etc. that might contain parts
for key in ['bodyParts', 'knobs']:
if key in data and isinstance(data[key], list):
for part in data[key]:
if isinstance(part, dict) and 'id' in part:
parts.append({
'part': part,
'json_path': path,
'part_id': part.get('id')
})
# Recursively search nested structures
for key, value in data.items():
if isinstance(value, (dict, list)):
parts.extend(find_printed_parts(value, path))
elif isinstance(data, list):
for item in data:
parts.extend(find_printed_parts(item, path))
return parts
def generate_manifest_id(part_id: str, owner: str, repo: str, path: str) -> str:
"""Generate a manifest ID from part ID or create one from repo/path."""
if part_id:
return part_id
# Generate slug from owner-repo-path
slug = f"{owner}-{repo}-{path.replace('/', '-').replace(' ', '-')}"
# Remove special chars
slug = re.sub(r'[^a-zA-Z0-9_-]', '', slug)
return slug[:100] # Limit length
def generate_local_path(owner: str, repo: str, path: str) -> str:
"""Generate local vendor path from owner, repo, and file path."""
repo_dir = f"{owner}-{repo}"
return f"vendor/{repo_dir}/{path}"
def load_existing_manifest(manifest_path: Path) -> Dict[str, Dict]:
"""Load existing manifest or return empty dict."""
if manifest_path.exists():
try:
with open(manifest_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Convert list to dict keyed by id
if isinstance(data, list):
return {entry['id']: entry for entry in data}
elif isinstance(data, dict) and 'entries' in data:
return {entry['id']: entry for entry in data['entries']}
elif isinstance(data, dict):
# Assume it's already keyed by id
return data
except (json.JSONDecodeError, KeyError) as e:
print(f"Warning: Could not parse existing manifest: {e}", file=sys.stderr)
return {}
def scan_component_files(site_dir: Path, repo_root: Path) -> List[Dict[str, Any]]:
"""Scan all component JSON files and extract printedParts with GitHub URLs."""
entries = []
if not site_dir.exists():
print(f"Error: Site directory does not exist: {site_dir}", file=sys.stderr)
return entries
for json_file in site_dir.glob('*.json'):
try:
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
parts = find_printed_parts(data, str(json_file))
for item in parts:
part = item['part']
url = part.get('url')
if not url:
continue
github_info = parse_github_url(url)
if not github_info:
print(f"Warning: Skipping non-GitHub URL in {json_file}: {url}", file=sys.stderr)
continue
part_id = item['part_id']
manifest_id = generate_manifest_id(
part_id,
github_info['owner'],
github_info['repo'],
github_info['path']
)
local_path = generate_local_path(
github_info['owner'],
github_info['repo'],
github_info['path']
)
# Store relative path from repo root
try:
json_file_rel = json_file.relative_to(repo_root)
except ValueError:
# If not relative, use absolute path
json_file_rel = json_file
entries.append({
'manifest_id': manifest_id,
'part_id': part_id,
'part': part,
'json_file': str(json_file_rel),
'github_info': github_info,
'local_path': local_path
})
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
continue
return entries
def create_or_update_manifest_entry(
existing_entry: Optional[Dict],
new_data: Dict[str, Any]
) -> Dict[str, Any]:
"""Create new manifest entry or merge with existing."""
github_info = new_data['github_info']
manifest_id = new_data['manifest_id']
if existing_entry:
# Merge: keep existing pinned data, update source info if changed
entry = existing_entry.copy()
entry['source_repo'] = f"{github_info['owner']}/{github_info['repo']}"
entry['source_path'] = github_info['path']
entry['source_ref'] = github_info.get('ref', 'main')
entry['local_path'] = new_data['local_path']
entry['orig_site_json'] = new_data['json_file']
entry['orig_item_id'] = new_data['part_id']
# Don't overwrite pinned_sha, checksum, etc. if they exist
return entry
# Create new entry
return {
'id': manifest_id,
'source_repo': f"{github_info['owner']}/{github_info['repo']}",
'source_path': github_info['path'],
'source_ref': github_info.get('ref', 'main'),
'pinned_sha': None,
'pinned_raw_url': None,
'local_path': new_data['local_path'],
'checksum_sha256': None,
'last_checked': None,
'upstream_latest_sha': None,
'status': 'unknown',
'license': None,
'orig_site_json': new_data['json_file'],
'orig_item_id': new_data['part_id']
}
def main():
parser = argparse.ArgumentParser(
description='Generate vendor manifest from site component JSON files'
)
parser.add_argument(
'--site-dir',
type=Path,
default=Path('website/src/data/components'),
help='Directory containing component JSON files (default: website/src/data/components)'
)
parser.add_argument(
'--manifest',
type=Path,
default=Path('manifest/vendor_manifest.json'),
help='Path to manifest file (default: manifest/vendor_manifest.json)'
)
args = parser.parse_args()
# Resolve paths relative to script location or current directory
script_dir = Path(__file__).parent.parent
site_dir = (script_dir / args.site_dir).resolve()
manifest_path = (script_dir / args.manifest).resolve()
# Ensure manifest directory exists
manifest_path.parent.mkdir(parents=True, exist_ok=True)
# Load existing manifest
existing_manifest = load_existing_manifest(manifest_path)
# Scan component files
print(f"Scanning component files in {site_dir}...")
entries = scan_component_files(site_dir, repo_root=script_dir)
if not entries:
print("No GitHub URLs found in component files.", file=sys.stderr)
sys.exit(1)
# Create or update manifest entries
updated_manifest = existing_manifest.copy()
for entry_data in entries:
manifest_id = entry_data['manifest_id']
existing_entry = updated_manifest.get(manifest_id)
new_entry = create_or_update_manifest_entry(existing_entry, entry_data)
updated_manifest[manifest_id] = new_entry
# Convert to sorted list for deterministic output
manifest_list = sorted(updated_manifest.values(), key=lambda x: x['id'])
# Write manifest
print(f"Writing manifest to {manifest_path}...")
with open(manifest_path, 'w', encoding='utf-8') as f:
json.dump(manifest_list, f, indent=2, sort_keys=False)
print(f"Generated {len(manifest_list)} manifest entries.")
# Show summary
new_entries = len(manifest_list) - len(existing_manifest)
if new_entries > 0:
print(f"Added {new_entries} new entries.")
if len(existing_manifest) > 0:
print(f"Updated {len(existing_manifest)} existing entries.")
if __name__ == '__main__':
main()

View File

@@ -2,8 +2,8 @@
"""
Download and pin external asset files from GitHub.
Downloads files specified in manifest, pins them to commit SHAs,
computes checksums, and optionally syncs vendor metadata back to site JSON files.
Automatically scans website/src/data/components for parts with GitHub URLs,
updates the manifest, and then downloads/pins files.
"""
import argparse
@@ -14,8 +14,8 @@ import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional
from urllib.parse import urlparse
from typing import Dict, List, Optional, Tuple, Generator, Any
from urllib.parse import urlparse, unquote, parse_qs
import requests
@@ -226,6 +226,182 @@ def download_file(url: str, dest_path: Path) -> bool:
return False
def parse_github_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
"""
Parse GitHub URL to return (owner, repo, ref, path).
Supports:
- https://github.com/owner/repo/blob/<ref>/path/to/file
- https://github.com/owner/repo/raw/<ref>/path/to/file
- https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
"""
if not url or not isinstance(url, str):
return None, None, None, None
# Check if it's a GitHub URL
if 'github.com' not in url:
return None, None, None, None
try:
# Handle raw.githubusercontent.com
if 'raw.githubusercontent.com' in url:
match_parts = url.split('/')
# https://raw.githubusercontent.com/OWNER/REPO/REF/PATH...
# parts: [https:, , raw.githubusercontent.com, OWNER, REPO, REF, PATH...]
if len(match_parts) >= 6:
owner = match_parts[3]
repo = match_parts[4]
ref = match_parts[5]
path = '/'.join(match_parts[6:]).split('?')[0]
return owner, repo, ref, unquote(path)
# Handle github.com and action.github.com
parsed = urlparse(url)
path = parsed.path.strip('/')
path_parts = path.split('/')
if len(path_parts) >= 4:
owner = path_parts[0]
repo = path_parts[1]
mode = path_parts[2] # 'blob' or 'raw'
if mode in ('blob', 'raw'):
ref = path_parts[3]
file_path = '/'.join(path_parts[4:])
# Check query params for ?raw=true
query_params = parse_qs(parsed.query)
if 'raw' in query_params or mode == 'raw':
return owner, repo, ref, unquote(file_path)
# Also treat 'blob' as a valid source if we just want the path
return owner, repo, ref, unquote(file_path)
except Exception:
pass
return None, None, None, None
def scan_site_components(components_dir: Path) -> Generator[Dict[str, Any], None, None]:
"""Recursively scan JSON files for parts with GitHub URLs."""
for json_file in components_dir.rglob('*.json'):
try:
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# Helper to find parts
queue = [data]
while queue:
item = queue.pop(0)
if isinstance(item, dict):
# Check if this item is a part
if 'id' in item and 'url' in item and item['url']:
owner, repo, ref, source_path = parse_github_url(item['url'])
if owner and repo and source_path:
yield {
'id': item['id'],
'url': item['url'],
'owner': owner,
'repo': repo,
'ref': ref or 'main',
'source_path': source_path,
'orig_site_json': json_file
}
# Add children to queue
queue.extend(item.values())
elif isinstance(item, list):
queue.extend(item)
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
def regenerate_manifest(manifest_path: Path, repo_root: Path) -> Tuple[List[Dict], int]:
"""
Regenerate manifest from site data.
Preserves state of existing entries.
Returns (new_manifest_list, changes_count).
"""
print("Scanning website components to regenerate manifest...")
# Load existing manifest to preserve state
old_manifest = {}
if manifest_path.exists():
with open(manifest_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if isinstance(data, list):
old_manifest = {entry['id']: entry for entry in data}
new_manifest = {}
components_dir = repo_root / 'website/src/data/components'
changes_count = 0
if not components_dir.exists():
print(f"Warning: Components directory not found: {components_dir}", file=sys.stderr)
return list(old_manifest.values()), 0
for part in scan_site_components(components_dir):
part_id = part['id']
old_entry = old_manifest.get(part_id)
# Calculate local path
# vendor/{owner}-{repo}/{path}
local_path = f"vendor/{part['owner']}-{part['repo']}/{part['source_path']}"
source_repo = f"{part['owner']}/{part['repo']}"
orig_site_json = str(part['orig_site_json'].relative_to(repo_root))
entry = {
'id': part_id,
'source_repo': source_repo,
'source_path': part['source_path'],
'source_ref': part['ref'],
'local_path': local_path,
'orig_site_json': orig_site_json,
'orig_item_id': part_id
}
# Preserve state if exists and config matches
if old_entry:
# Check if source config changed
config_changed = (
old_entry.get('source_repo') != source_repo or
old_entry.get('source_path') != part['source_path'] or
old_entry.get('source_ref') != part['ref']
)
if not config_changed:
# Copy state
for key in ['pinned_sha', 'pinned_raw_url', 'checksum_sha256', 'last_checked', 'status', 'license', 'upstream_latest_sha']:
if key in old_entry:
entry[key] = old_entry[key]
else:
print(f" Config changed for {part_id}, resetting status.")
entry['status'] = 'pending'
entry['pinned_sha'] = None
changes_count += 1
# Check if we updated manifest info (like orig_site_json moved)
if (old_entry.get('orig_site_json') != orig_site_json or
old_entry.get('local_path') != local_path):
changes_count += 1
else:
print(f" New part found: {part_id}")
entry['status'] = 'pending'
entry['pinned_sha'] = None
changes_count += 1
new_manifest[part_id] = entry
# Check for removed items
removed_count = len(old_manifest) - len(new_manifest)
if removed_count > 0:
print(f" Removed {removed_count} parts that are no longer in site JSONs.")
changes_count += removed_count
return sorted(new_manifest.values(), key=lambda x: x['id']), changes_count
def update_manifest_entry(
entry: Dict,
api: GitHubAPI,
@@ -254,6 +430,31 @@ def update_manifest_entry(
local_path = Path(entry['local_path'])
if not local_path.is_absolute():
local_path = repo_root / local_path
# Check if file exists and is already at the correct version
current_pinned_sha = entry.get('pinned_sha')
if current_pinned_sha == commit_sha and local_path.exists():
if dry_run:
print(f" [DRY RUN] File up to date ({commit_sha}), would skip download.")
else:
print(f" File up to date ({commit_sha}), skipping download.")
# Ensure checksum is present
if 'checksum_sha256' not in entry or not entry['checksum_sha256']:
entry['checksum_sha256'] = compute_sha256(local_path)
entry['pinned_sha'] = commit_sha
entry['pinned_raw_url'] = pinned_raw_url
entry['last_checked'] = datetime.now(timezone.utc).isoformat()
entry['upstream_latest_sha'] = commit_sha
entry['status'] = 'up-to-date'
# If license is missing, try to get it, otherwise keep existing
if 'license' not in entry and not dry_run:
license_info = api.get_license(owner, repo, commit_sha)
if license_info:
entry['license'] = license_info
return entry
if dry_run:
print(f" [DRY RUN] Would download to {local_path}")
@@ -309,45 +510,24 @@ def sync_to_site_json(entry: Dict, repo_root: Path) -> bool:
data = json.load(f)
# Find the printed part in the nested structure
def find_and_update_part(obj, target_id, path=''):
def find_and_update_part(obj, target_id):
if isinstance(obj, dict):
# Check if this is a printedParts array
if 'printedParts' in obj and isinstance(obj['printedParts'], list):
for part in obj['printedParts']:
if isinstance(part, dict) and part.get('id') == target_id:
# Update this part
if 'vendor' not in part:
part['vendor'] = {}
part['vendor'].update({
'manifest_id': entry['id'],
'local_path': entry['local_path'],
'pinned_sha': entry['pinned_sha'],
'pinned_raw_url': entry['pinned_raw_url'],
'checksum_sha256': entry['checksum_sha256'],
'last_checked': entry['last_checked'],
'status': entry['status']
})
return True
# Check bodyParts, knobs, etc.
for key in ['bodyParts', 'knobs']:
if key in obj and isinstance(obj[key], list):
for part in obj[key]:
if isinstance(part, dict) and part.get('id') == target_id:
if 'vendor' not in part:
part['vendor'] = {}
part['vendor'].update({
'manifest_id': entry['id'],
'local_path': entry['local_path'],
'pinned_sha': entry['pinned_sha'],
'pinned_raw_url': entry['pinned_raw_url'],
'checksum_sha256': entry['checksum_sha256'],
'last_checked': entry['last_checked'],
'status': entry['status']
})
return True
# Recursively search
# If this object IS the part (has the ID)
if obj.get('id') == target_id:
if 'vendor' not in obj:
obj['vendor'] = {}
obj['vendor'].update({
'manifest_id': entry['id'],
'local_path': entry['local_path'],
'pinned_sha': entry['pinned_sha'],
'pinned_raw_url': entry['pinned_raw_url'],
'checksum_sha256': entry['checksum_sha256'],
'last_checked': entry['last_checked'],
'status': entry['status']
})
return True
# Recursively search values
for value in obj.values():
if find_and_update_part(value, target_id):
return True
@@ -396,9 +576,9 @@ def main():
help='Show what would be done without downloading files'
)
parser.add_argument(
'--sync-site',
'--no-sync',
action='store_true',
help='Sync vendor metadata back to site JSON files'
help='Skip syncing vendor metadata back to site JSON files'
)
parser.add_argument(
'--delay',
@@ -406,6 +586,16 @@ def main():
default=0.5,
help='Delay between API requests in seconds (default: 0.5)'
)
parser.add_argument(
'--no-scan',
action='store_true',
help='Skip scanning website for new components'
)
parser.add_argument(
'--scan-only',
action='store_true',
help='Only scan website and update manifest, do not check/download files'
)
args = parser.parse_args()
@@ -414,13 +604,30 @@ def main():
manifest_path = (script_dir / args.manifest).resolve()
repo_root = script_dir
if not manifest_path.exists():
print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
sys.exit(1)
# Load manifest
with open(manifest_path, 'r', encoding='utf-8') as f:
manifest_data = json.load(f)
# Regenerate manifest from website scan (unless disabled)
if not args.no_scan and not args.entry:
manifest_list, changes = regenerate_manifest(manifest_path, repo_root)
if changes > 0:
print(f"Manifest regenerated with {changes} changes.")
if not args.dry_run:
manifest_path.parent.mkdir(parents=True, exist_ok=True)
with open(manifest_path, 'w', encoding='utf-8') as f:
json.dump(manifest_list, f, indent=2, sort_keys=False)
else:
print("No changes in manifest structure detected.")
if args.scan_only:
return
# Reload manifest data for processing
manifest_data = manifest_list
else:
if not manifest_path.exists():
print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
sys.exit(1)
with open(manifest_path, 'r', encoding='utf-8') as f:
manifest_data = json.load(f)
# Convert to dict if it's a list
if isinstance(manifest_data, list):
@@ -446,7 +653,7 @@ def main():
updated_entry = update_manifest_entry(entry, api, repo_root, dry_run=args.dry_run)
manifest[entry_id] = updated_entry
if args.sync_site and not args.dry_run:
if not args.no_sync and not args.dry_run:
sync_to_site_json(updated_entry, repo_root)
updated_count += 1