refactor: Restructure data files into component-specific and common directories, add new UI components, and update project documentation.

This commit is contained in:
MunchDev-oss
2026-01-07 02:06:43 -05:00
parent 97d2b66f02
commit 5366865b4b
28 changed files with 1894 additions and 2051 deletions

View File

@@ -2,8 +2,8 @@
"""
Download and pin external asset files from GitHub.
Downloads files specified in manifest, pins them to commit SHAs,
computes checksums, and optionally syncs vendor metadata back to site JSON files.
Automatically scans website/src/data/components for parts with GitHub URLs,
updates the manifest, and then downloads/pins files.
"""
import argparse
@@ -14,8 +14,8 @@ import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional
from urllib.parse import urlparse
from typing import Dict, List, Optional, Tuple, Generator, Any
from urllib.parse import urlparse, unquote, parse_qs
import requests
@@ -226,6 +226,182 @@ def download_file(url: str, dest_path: Path) -> bool:
return False
def parse_github_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
"""
Parse GitHub URL to return (owner, repo, ref, path).
Supports:
- https://github.com/owner/repo/blob/<ref>/path/to/file
- https://github.com/owner/repo/raw/<ref>/path/to/file
- https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
"""
if not url or not isinstance(url, str):
return None, None, None, None
# Check if it's a GitHub URL
if 'github.com' not in url:
return None, None, None, None
try:
# Handle raw.githubusercontent.com
if 'raw.githubusercontent.com' in url:
match_parts = url.split('/')
# https://raw.githubusercontent.com/OWNER/REPO/REF/PATH...
# parts: [https:, , raw.githubusercontent.com, OWNER, REPO, REF, PATH...]
if len(match_parts) >= 6:
owner = match_parts[3]
repo = match_parts[4]
ref = match_parts[5]
path = '/'.join(match_parts[6:]).split('?')[0]
return owner, repo, ref, unquote(path)
# Handle github.com and action.github.com
parsed = urlparse(url)
path = parsed.path.strip('/')
path_parts = path.split('/')
if len(path_parts) >= 4:
owner = path_parts[0]
repo = path_parts[1]
mode = path_parts[2] # 'blob' or 'raw'
if mode in ('blob', 'raw'):
ref = path_parts[3]
file_path = '/'.join(path_parts[4:])
# Check query params for ?raw=true
query_params = parse_qs(parsed.query)
if 'raw' in query_params or mode == 'raw':
return owner, repo, ref, unquote(file_path)
# Also treat 'blob' as a valid source if we just want the path
return owner, repo, ref, unquote(file_path)
except Exception:
pass
return None, None, None, None
def scan_site_components(components_dir: Path) -> Generator[Dict[str, Any], None, None]:
"""Recursively scan JSON files for parts with GitHub URLs."""
for json_file in components_dir.rglob('*.json'):
try:
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# Helper to find parts
queue = [data]
while queue:
item = queue.pop(0)
if isinstance(item, dict):
# Check if this item is a part
if 'id' in item and 'url' in item and item['url']:
owner, repo, ref, source_path = parse_github_url(item['url'])
if owner and repo and source_path:
yield {
'id': item['id'],
'url': item['url'],
'owner': owner,
'repo': repo,
'ref': ref or 'main',
'source_path': source_path,
'orig_site_json': json_file
}
# Add children to queue
queue.extend(item.values())
elif isinstance(item, list):
queue.extend(item)
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
def regenerate_manifest(manifest_path: Path, repo_root: Path) -> Tuple[List[Dict], int]:
"""
Regenerate manifest from site data.
Preserves state of existing entries.
Returns (new_manifest_list, changes_count).
"""
print("Scanning website components to regenerate manifest...")
# Load existing manifest to preserve state
old_manifest = {}
if manifest_path.exists():
with open(manifest_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if isinstance(data, list):
old_manifest = {entry['id']: entry for entry in data}
new_manifest = {}
components_dir = repo_root / 'website/src/data/components'
changes_count = 0
if not components_dir.exists():
print(f"Warning: Components directory not found: {components_dir}", file=sys.stderr)
return list(old_manifest.values()), 0
for part in scan_site_components(components_dir):
part_id = part['id']
old_entry = old_manifest.get(part_id)
# Calculate local path
# vendor/{owner}-{repo}/{path}
local_path = f"vendor/{part['owner']}-{part['repo']}/{part['source_path']}"
source_repo = f"{part['owner']}/{part['repo']}"
orig_site_json = str(part['orig_site_json'].relative_to(repo_root))
entry = {
'id': part_id,
'source_repo': source_repo,
'source_path': part['source_path'],
'source_ref': part['ref'],
'local_path': local_path,
'orig_site_json': orig_site_json,
'orig_item_id': part_id
}
# Preserve state if exists and config matches
if old_entry:
# Check if source config changed
config_changed = (
old_entry.get('source_repo') != source_repo or
old_entry.get('source_path') != part['source_path'] or
old_entry.get('source_ref') != part['ref']
)
if not config_changed:
# Copy state
for key in ['pinned_sha', 'pinned_raw_url', 'checksum_sha256', 'last_checked', 'status', 'license', 'upstream_latest_sha']:
if key in old_entry:
entry[key] = old_entry[key]
else:
print(f" Config changed for {part_id}, resetting status.")
entry['status'] = 'pending'
entry['pinned_sha'] = None
changes_count += 1
# Check if we updated manifest info (like orig_site_json moved)
if (old_entry.get('orig_site_json') != orig_site_json or
old_entry.get('local_path') != local_path):
changes_count += 1
else:
print(f" New part found: {part_id}")
entry['status'] = 'pending'
entry['pinned_sha'] = None
changes_count += 1
new_manifest[part_id] = entry
# Check for removed items
removed_count = len(old_manifest) - len(new_manifest)
if removed_count > 0:
print(f" Removed {removed_count} parts that are no longer in site JSONs.")
changes_count += removed_count
return sorted(new_manifest.values(), key=lambda x: x['id']), changes_count
def update_manifest_entry(
entry: Dict,
api: GitHubAPI,
@@ -254,6 +430,31 @@ def update_manifest_entry(
local_path = Path(entry['local_path'])
if not local_path.is_absolute():
local_path = repo_root / local_path
# Check if file exists and is already at the correct version
current_pinned_sha = entry.get('pinned_sha')
if current_pinned_sha == commit_sha and local_path.exists():
if dry_run:
print(f" [DRY RUN] File up to date ({commit_sha}), would skip download.")
else:
print(f" File up to date ({commit_sha}), skipping download.")
# Ensure checksum is present
if 'checksum_sha256' not in entry or not entry['checksum_sha256']:
entry['checksum_sha256'] = compute_sha256(local_path)
entry['pinned_sha'] = commit_sha
entry['pinned_raw_url'] = pinned_raw_url
entry['last_checked'] = datetime.now(timezone.utc).isoformat()
entry['upstream_latest_sha'] = commit_sha
entry['status'] = 'up-to-date'
# If license is missing, try to get it, otherwise keep existing
if 'license' not in entry and not dry_run:
license_info = api.get_license(owner, repo, commit_sha)
if license_info:
entry['license'] = license_info
return entry
if dry_run:
print(f" [DRY RUN] Would download to {local_path}")
@@ -309,45 +510,24 @@ def sync_to_site_json(entry: Dict, repo_root: Path) -> bool:
data = json.load(f)
# Find the printed part in the nested structure
def find_and_update_part(obj, target_id, path=''):
def find_and_update_part(obj, target_id):
if isinstance(obj, dict):
# Check if this is a printedParts array
if 'printedParts' in obj and isinstance(obj['printedParts'], list):
for part in obj['printedParts']:
if isinstance(part, dict) and part.get('id') == target_id:
# Update this part
if 'vendor' not in part:
part['vendor'] = {}
part['vendor'].update({
'manifest_id': entry['id'],
'local_path': entry['local_path'],
'pinned_sha': entry['pinned_sha'],
'pinned_raw_url': entry['pinned_raw_url'],
'checksum_sha256': entry['checksum_sha256'],
'last_checked': entry['last_checked'],
'status': entry['status']
})
return True
# Check bodyParts, knobs, etc.
for key in ['bodyParts', 'knobs']:
if key in obj and isinstance(obj[key], list):
for part in obj[key]:
if isinstance(part, dict) and part.get('id') == target_id:
if 'vendor' not in part:
part['vendor'] = {}
part['vendor'].update({
'manifest_id': entry['id'],
'local_path': entry['local_path'],
'pinned_sha': entry['pinned_sha'],
'pinned_raw_url': entry['pinned_raw_url'],
'checksum_sha256': entry['checksum_sha256'],
'last_checked': entry['last_checked'],
'status': entry['status']
})
return True
# Recursively search
# If this object IS the part (has the ID)
if obj.get('id') == target_id:
if 'vendor' not in obj:
obj['vendor'] = {}
obj['vendor'].update({
'manifest_id': entry['id'],
'local_path': entry['local_path'],
'pinned_sha': entry['pinned_sha'],
'pinned_raw_url': entry['pinned_raw_url'],
'checksum_sha256': entry['checksum_sha256'],
'last_checked': entry['last_checked'],
'status': entry['status']
})
return True
# Recursively search values
for value in obj.values():
if find_and_update_part(value, target_id):
return True
@@ -396,9 +576,9 @@ def main():
help='Show what would be done without downloading files'
)
parser.add_argument(
'--sync-site',
'--no-sync',
action='store_true',
help='Sync vendor metadata back to site JSON files'
help='Skip syncing vendor metadata back to site JSON files'
)
parser.add_argument(
'--delay',
@@ -406,6 +586,16 @@ def main():
default=0.5,
help='Delay between API requests in seconds (default: 0.5)'
)
parser.add_argument(
'--no-scan',
action='store_true',
help='Skip scanning website for new components'
)
parser.add_argument(
'--scan-only',
action='store_true',
help='Only scan website and update manifest, do not check/download files'
)
args = parser.parse_args()
@@ -414,13 +604,30 @@ def main():
manifest_path = (script_dir / args.manifest).resolve()
repo_root = script_dir
if not manifest_path.exists():
print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
sys.exit(1)
# Load manifest
with open(manifest_path, 'r', encoding='utf-8') as f:
manifest_data = json.load(f)
# Regenerate manifest from website scan (unless disabled)
if not args.no_scan and not args.entry:
manifest_list, changes = regenerate_manifest(manifest_path, repo_root)
if changes > 0:
print(f"Manifest regenerated with {changes} changes.")
if not args.dry_run:
manifest_path.parent.mkdir(parents=True, exist_ok=True)
with open(manifest_path, 'w', encoding='utf-8') as f:
json.dump(manifest_list, f, indent=2, sort_keys=False)
else:
print("No changes in manifest structure detected.")
if args.scan_only:
return
# Reload manifest data for processing
manifest_data = manifest_list
else:
if not manifest_path.exists():
print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
sys.exit(1)
with open(manifest_path, 'r', encoding='utf-8') as f:
manifest_data = json.load(f)
# Convert to dict if it's a list
if isinstance(manifest_data, list):
@@ -446,7 +653,7 @@ def main():
updated_entry = update_manifest_entry(entry, api, repo_root, dry_run=args.dry_run)
manifest[entry_id] = updated_entry
if args.sync_site and not args.dry_run:
if not args.no_sync and not args.dry_run:
sync_to_site_json(updated_entry, repo_root)
updated_count += 1