refactor: Restructure data files into component-specific and common directories, add new UI components, and update project documentation.
This commit is contained in:
@@ -2,8 +2,8 @@
|
||||
"""
|
||||
Download and pin external asset files from GitHub.
|
||||
|
||||
Downloads files specified in manifest, pins them to commit SHAs,
|
||||
computes checksums, and optionally syncs vendor metadata back to site JSON files.
|
||||
Automatically scans website/src/data/components for parts with GitHub URLs,
|
||||
updates the manifest, and then downloads/pins files.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -14,8 +14,8 @@ import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
from typing import Dict, List, Optional, Tuple, Generator, Any
|
||||
from urllib.parse import urlparse, unquote, parse_qs
|
||||
|
||||
import requests
|
||||
|
||||
@@ -226,6 +226,182 @@ def download_file(url: str, dest_path: Path) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def parse_github_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Parse GitHub URL to return (owner, repo, ref, path).
|
||||
Supports:
|
||||
- https://github.com/owner/repo/blob/<ref>/path/to/file
|
||||
- https://github.com/owner/repo/raw/<ref>/path/to/file
|
||||
- https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
|
||||
"""
|
||||
if not url or not isinstance(url, str):
|
||||
return None, None, None, None
|
||||
|
||||
# Check if it's a GitHub URL
|
||||
if 'github.com' not in url:
|
||||
return None, None, None, None
|
||||
|
||||
try:
|
||||
# Handle raw.githubusercontent.com
|
||||
if 'raw.githubusercontent.com' in url:
|
||||
match_parts = url.split('/')
|
||||
# https://raw.githubusercontent.com/OWNER/REPO/REF/PATH...
|
||||
# parts: [https:, , raw.githubusercontent.com, OWNER, REPO, REF, PATH...]
|
||||
if len(match_parts) >= 6:
|
||||
owner = match_parts[3]
|
||||
repo = match_parts[4]
|
||||
ref = match_parts[5]
|
||||
path = '/'.join(match_parts[6:]).split('?')[0]
|
||||
return owner, repo, ref, unquote(path)
|
||||
|
||||
# Handle github.com and action.github.com
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path.strip('/')
|
||||
path_parts = path.split('/')
|
||||
|
||||
if len(path_parts) >= 4:
|
||||
owner = path_parts[0]
|
||||
repo = path_parts[1]
|
||||
mode = path_parts[2] # 'blob' or 'raw'
|
||||
|
||||
if mode in ('blob', 'raw'):
|
||||
ref = path_parts[3]
|
||||
file_path = '/'.join(path_parts[4:])
|
||||
|
||||
# Check query params for ?raw=true
|
||||
query_params = parse_qs(parsed.query)
|
||||
if 'raw' in query_params or mode == 'raw':
|
||||
return owner, repo, ref, unquote(file_path)
|
||||
|
||||
# Also treat 'blob' as a valid source if we just want the path
|
||||
return owner, repo, ref, unquote(file_path)
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None, None, None, None
|
||||
|
||||
|
||||
def scan_site_components(components_dir: Path) -> Generator[Dict[str, Any], None, None]:
|
||||
"""Recursively scan JSON files for parts with GitHub URLs."""
|
||||
for json_file in components_dir.rglob('*.json'):
|
||||
try:
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Helper to find parts
|
||||
queue = [data]
|
||||
while queue:
|
||||
item = queue.pop(0)
|
||||
if isinstance(item, dict):
|
||||
# Check if this item is a part
|
||||
if 'id' in item and 'url' in item and item['url']:
|
||||
owner, repo, ref, source_path = parse_github_url(item['url'])
|
||||
if owner and repo and source_path:
|
||||
yield {
|
||||
'id': item['id'],
|
||||
'url': item['url'],
|
||||
'owner': owner,
|
||||
'repo': repo,
|
||||
'ref': ref or 'main',
|
||||
'source_path': source_path,
|
||||
'orig_site_json': json_file
|
||||
}
|
||||
|
||||
# Add children to queue
|
||||
queue.extend(item.values())
|
||||
elif isinstance(item, list):
|
||||
queue.extend(item)
|
||||
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
def regenerate_manifest(manifest_path: Path, repo_root: Path) -> Tuple[List[Dict], int]:
|
||||
"""
|
||||
Regenerate manifest from site data.
|
||||
Preserves state of existing entries.
|
||||
Returns (new_manifest_list, changes_count).
|
||||
"""
|
||||
print("Scanning website components to regenerate manifest...")
|
||||
|
||||
# Load existing manifest to preserve state
|
||||
old_manifest = {}
|
||||
if manifest_path.exists():
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, list):
|
||||
old_manifest = {entry['id']: entry for entry in data}
|
||||
|
||||
new_manifest = {}
|
||||
components_dir = repo_root / 'website/src/data/components'
|
||||
changes_count = 0
|
||||
|
||||
if not components_dir.exists():
|
||||
print(f"Warning: Components directory not found: {components_dir}", file=sys.stderr)
|
||||
return list(old_manifest.values()), 0
|
||||
|
||||
for part in scan_site_components(components_dir):
|
||||
part_id = part['id']
|
||||
old_entry = old_manifest.get(part_id)
|
||||
|
||||
# Calculate local path
|
||||
# vendor/{owner}-{repo}/{path}
|
||||
local_path = f"vendor/{part['owner']}-{part['repo']}/{part['source_path']}"
|
||||
source_repo = f"{part['owner']}/{part['repo']}"
|
||||
orig_site_json = str(part['orig_site_json'].relative_to(repo_root))
|
||||
|
||||
entry = {
|
||||
'id': part_id,
|
||||
'source_repo': source_repo,
|
||||
'source_path': part['source_path'],
|
||||
'source_ref': part['ref'],
|
||||
'local_path': local_path,
|
||||
'orig_site_json': orig_site_json,
|
||||
'orig_item_id': part_id
|
||||
}
|
||||
|
||||
# Preserve state if exists and config matches
|
||||
if old_entry:
|
||||
# Check if source config changed
|
||||
config_changed = (
|
||||
old_entry.get('source_repo') != source_repo or
|
||||
old_entry.get('source_path') != part['source_path'] or
|
||||
old_entry.get('source_ref') != part['ref']
|
||||
)
|
||||
|
||||
if not config_changed:
|
||||
# Copy state
|
||||
for key in ['pinned_sha', 'pinned_raw_url', 'checksum_sha256', 'last_checked', 'status', 'license', 'upstream_latest_sha']:
|
||||
if key in old_entry:
|
||||
entry[key] = old_entry[key]
|
||||
else:
|
||||
print(f" Config changed for {part_id}, resetting status.")
|
||||
entry['status'] = 'pending'
|
||||
entry['pinned_sha'] = None
|
||||
changes_count += 1
|
||||
|
||||
# Check if we updated manifest info (like orig_site_json moved)
|
||||
if (old_entry.get('orig_site_json') != orig_site_json or
|
||||
old_entry.get('local_path') != local_path):
|
||||
changes_count += 1
|
||||
else:
|
||||
print(f" New part found: {part_id}")
|
||||
entry['status'] = 'pending'
|
||||
entry['pinned_sha'] = None
|
||||
changes_count += 1
|
||||
|
||||
new_manifest[part_id] = entry
|
||||
|
||||
# Check for removed items
|
||||
removed_count = len(old_manifest) - len(new_manifest)
|
||||
if removed_count > 0:
|
||||
print(f" Removed {removed_count} parts that are no longer in site JSONs.")
|
||||
changes_count += removed_count
|
||||
|
||||
return sorted(new_manifest.values(), key=lambda x: x['id']), changes_count
|
||||
|
||||
|
||||
def update_manifest_entry(
|
||||
entry: Dict,
|
||||
api: GitHubAPI,
|
||||
@@ -254,6 +430,31 @@ def update_manifest_entry(
|
||||
local_path = Path(entry['local_path'])
|
||||
if not local_path.is_absolute():
|
||||
local_path = repo_root / local_path
|
||||
|
||||
# Check if file exists and is already at the correct version
|
||||
current_pinned_sha = entry.get('pinned_sha')
|
||||
if current_pinned_sha == commit_sha and local_path.exists():
|
||||
if dry_run:
|
||||
print(f" [DRY RUN] File up to date ({commit_sha}), would skip download.")
|
||||
else:
|
||||
print(f" File up to date ({commit_sha}), skipping download.")
|
||||
# Ensure checksum is present
|
||||
if 'checksum_sha256' not in entry or not entry['checksum_sha256']:
|
||||
entry['checksum_sha256'] = compute_sha256(local_path)
|
||||
|
||||
entry['pinned_sha'] = commit_sha
|
||||
entry['pinned_raw_url'] = pinned_raw_url
|
||||
entry['last_checked'] = datetime.now(timezone.utc).isoformat()
|
||||
entry['upstream_latest_sha'] = commit_sha
|
||||
entry['status'] = 'up-to-date'
|
||||
|
||||
# If license is missing, try to get it, otherwise keep existing
|
||||
if 'license' not in entry and not dry_run:
|
||||
license_info = api.get_license(owner, repo, commit_sha)
|
||||
if license_info:
|
||||
entry['license'] = license_info
|
||||
|
||||
return entry
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY RUN] Would download to {local_path}")
|
||||
@@ -309,45 +510,24 @@ def sync_to_site_json(entry: Dict, repo_root: Path) -> bool:
|
||||
data = json.load(f)
|
||||
|
||||
# Find the printed part in the nested structure
|
||||
def find_and_update_part(obj, target_id, path=''):
|
||||
def find_and_update_part(obj, target_id):
|
||||
if isinstance(obj, dict):
|
||||
# Check if this is a printedParts array
|
||||
if 'printedParts' in obj and isinstance(obj['printedParts'], list):
|
||||
for part in obj['printedParts']:
|
||||
if isinstance(part, dict) and part.get('id') == target_id:
|
||||
# Update this part
|
||||
if 'vendor' not in part:
|
||||
part['vendor'] = {}
|
||||
part['vendor'].update({
|
||||
'manifest_id': entry['id'],
|
||||
'local_path': entry['local_path'],
|
||||
'pinned_sha': entry['pinned_sha'],
|
||||
'pinned_raw_url': entry['pinned_raw_url'],
|
||||
'checksum_sha256': entry['checksum_sha256'],
|
||||
'last_checked': entry['last_checked'],
|
||||
'status': entry['status']
|
||||
})
|
||||
return True
|
||||
|
||||
# Check bodyParts, knobs, etc.
|
||||
for key in ['bodyParts', 'knobs']:
|
||||
if key in obj and isinstance(obj[key], list):
|
||||
for part in obj[key]:
|
||||
if isinstance(part, dict) and part.get('id') == target_id:
|
||||
if 'vendor' not in part:
|
||||
part['vendor'] = {}
|
||||
part['vendor'].update({
|
||||
'manifest_id': entry['id'],
|
||||
'local_path': entry['local_path'],
|
||||
'pinned_sha': entry['pinned_sha'],
|
||||
'pinned_raw_url': entry['pinned_raw_url'],
|
||||
'checksum_sha256': entry['checksum_sha256'],
|
||||
'last_checked': entry['last_checked'],
|
||||
'status': entry['status']
|
||||
})
|
||||
return True
|
||||
|
||||
# Recursively search
|
||||
# If this object IS the part (has the ID)
|
||||
if obj.get('id') == target_id:
|
||||
if 'vendor' not in obj:
|
||||
obj['vendor'] = {}
|
||||
obj['vendor'].update({
|
||||
'manifest_id': entry['id'],
|
||||
'local_path': entry['local_path'],
|
||||
'pinned_sha': entry['pinned_sha'],
|
||||
'pinned_raw_url': entry['pinned_raw_url'],
|
||||
'checksum_sha256': entry['checksum_sha256'],
|
||||
'last_checked': entry['last_checked'],
|
||||
'status': entry['status']
|
||||
})
|
||||
return True
|
||||
|
||||
# Recursively search values
|
||||
for value in obj.values():
|
||||
if find_and_update_part(value, target_id):
|
||||
return True
|
||||
@@ -396,9 +576,9 @@ def main():
|
||||
help='Show what would be done without downloading files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--sync-site',
|
||||
'--no-sync',
|
||||
action='store_true',
|
||||
help='Sync vendor metadata back to site JSON files'
|
||||
help='Skip syncing vendor metadata back to site JSON files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--delay',
|
||||
@@ -406,6 +586,16 @@ def main():
|
||||
default=0.5,
|
||||
help='Delay between API requests in seconds (default: 0.5)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--no-scan',
|
||||
action='store_true',
|
||||
help='Skip scanning website for new components'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--scan-only',
|
||||
action='store_true',
|
||||
help='Only scan website and update manifest, do not check/download files'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -414,13 +604,30 @@ def main():
|
||||
manifest_path = (script_dir / args.manifest).resolve()
|
||||
repo_root = script_dir
|
||||
|
||||
if not manifest_path.exists():
|
||||
print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load manifest
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
manifest_data = json.load(f)
|
||||
# Regenerate manifest from website scan (unless disabled)
|
||||
if not args.no_scan and not args.entry:
|
||||
manifest_list, changes = regenerate_manifest(manifest_path, repo_root)
|
||||
if changes > 0:
|
||||
print(f"Manifest regenerated with {changes} changes.")
|
||||
if not args.dry_run:
|
||||
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(manifest_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(manifest_list, f, indent=2, sort_keys=False)
|
||||
else:
|
||||
print("No changes in manifest structure detected.")
|
||||
|
||||
if args.scan_only:
|
||||
return
|
||||
|
||||
# Reload manifest data for processing
|
||||
manifest_data = manifest_list
|
||||
else:
|
||||
if not manifest_path.exists():
|
||||
print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
manifest_data = json.load(f)
|
||||
|
||||
# Convert to dict if it's a list
|
||||
if isinstance(manifest_data, list):
|
||||
@@ -446,7 +653,7 @@ def main():
|
||||
updated_entry = update_manifest_entry(entry, api, repo_root, dry_run=args.dry_run)
|
||||
manifest[entry_id] = updated_entry
|
||||
|
||||
if args.sync_site and not args.dry_run:
|
||||
if not args.no_sync and not args.dry_run:
|
||||
sync_to_site_json(updated_entry, repo_root)
|
||||
|
||||
updated_count += 1
|
||||
|
||||
Reference in New Issue
Block a user