328 lines
11 KiB
Python
Executable File
328 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Generate vendor manifest from site component JSON files.
|
|
|
|
Scans /src/data/components/*.json for printedParts entries with GitHub URLs
|
|
and creates or updates manifest/vendor_manifest.json.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Any
|
|
from urllib.parse import urlparse, parse_qs, unquote
|
|
|
|
|
|
def parse_github_url(url: str) -> Optional[Dict[str, str]]:
|
|
"""
|
|
Parse GitHub URL to extract owner, repo, path, and ref.
|
|
|
|
Supports:
|
|
- https://github.com/owner/repo/blob/<ref>/path/to/file
|
|
- https://github.com/owner/repo/raw/<ref>/path/to/file
|
|
- https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
|
|
"""
|
|
if not url or not isinstance(url, str):
|
|
return None
|
|
|
|
# Check if it's a GitHub URL
|
|
if 'github.com' not in url:
|
|
return None
|
|
|
|
# Handle raw.githubusercontent.com
|
|
if 'raw.githubusercontent.com' in url:
|
|
match = re.match(r'https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)', url)
|
|
if match:
|
|
owner, repo, ref, path = match.groups()
|
|
return {
|
|
'owner': owner,
|
|
'repo': repo,
|
|
'ref': ref,
|
|
'path': unquote(path).split('?')[0] # Remove query params
|
|
}
|
|
|
|
# Handle github.com URLs
|
|
parsed = urlparse(url)
|
|
path_parts = parsed.path.strip('/').split('/')
|
|
|
|
if len(path_parts) < 5:
|
|
return None
|
|
|
|
owner = path_parts[0]
|
|
repo = path_parts[1]
|
|
mode = path_parts[2] # 'blob' or 'raw'
|
|
ref = path_parts[3]
|
|
|
|
# Get file path (everything after ref)
|
|
file_path = '/'.join(path_parts[4:])
|
|
|
|
# Remove query params from path
|
|
file_path = unquote(file_path).split('?')[0]
|
|
|
|
# Handle ?raw=true in query params (sometimes used with blob URLs)
|
|
query_params = parse_qs(parsed.query)
|
|
if 'raw' in query_params or mode == 'raw':
|
|
return {
|
|
'owner': owner,
|
|
'repo': repo,
|
|
'ref': ref,
|
|
'path': file_path
|
|
}
|
|
|
|
return None
|
|
|
|
|
|
def find_printed_parts(data: Any, path: str = '') -> List[Dict[str, Any]]:
|
|
"""
|
|
Recursively find all printedParts entries in nested JSON structure.
|
|
Returns list of (part_dict, json_file_path, part_id) tuples.
|
|
"""
|
|
parts = []
|
|
|
|
if isinstance(data, dict):
|
|
# Check if this dict has a 'printedParts' key
|
|
if 'printedParts' in data:
|
|
for part in data['printedParts']:
|
|
if isinstance(part, dict) and 'id' in part:
|
|
parts.append({
|
|
'part': part,
|
|
'json_path': path,
|
|
'part_id': part.get('id')
|
|
})
|
|
|
|
# Also check for 'bodyParts', 'knobs', etc. that might contain parts
|
|
for key in ['bodyParts', 'knobs']:
|
|
if key in data and isinstance(data[key], list):
|
|
for part in data[key]:
|
|
if isinstance(part, dict) and 'id' in part:
|
|
parts.append({
|
|
'part': part,
|
|
'json_path': path,
|
|
'part_id': part.get('id')
|
|
})
|
|
|
|
# Recursively search nested structures
|
|
for key, value in data.items():
|
|
if isinstance(value, (dict, list)):
|
|
parts.extend(find_printed_parts(value, path))
|
|
|
|
elif isinstance(data, list):
|
|
for item in data:
|
|
parts.extend(find_printed_parts(item, path))
|
|
|
|
return parts
|
|
|
|
|
|
def generate_manifest_id(part_id: str, owner: str, repo: str, path: str) -> str:
|
|
"""Generate a manifest ID from part ID or create one from repo/path."""
|
|
if part_id:
|
|
return part_id
|
|
|
|
# Generate slug from owner-repo-path
|
|
slug = f"{owner}-{repo}-{path.replace('/', '-').replace(' ', '-')}"
|
|
# Remove special chars
|
|
slug = re.sub(r'[^a-zA-Z0-9_-]', '', slug)
|
|
return slug[:100] # Limit length
|
|
|
|
|
|
def generate_local_path(owner: str, repo: str, path: str) -> str:
|
|
"""Generate local vendor path from owner, repo, and file path."""
|
|
repo_dir = f"{owner}-{repo}"
|
|
return f"vendor/{repo_dir}/{path}"
|
|
|
|
|
|
def load_existing_manifest(manifest_path: Path) -> Dict[str, Dict]:
|
|
"""Load existing manifest or return empty dict."""
|
|
if manifest_path.exists():
|
|
try:
|
|
with open(manifest_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
# Convert list to dict keyed by id
|
|
if isinstance(data, list):
|
|
return {entry['id']: entry for entry in data}
|
|
elif isinstance(data, dict) and 'entries' in data:
|
|
return {entry['id']: entry for entry in data['entries']}
|
|
elif isinstance(data, dict):
|
|
# Assume it's already keyed by id
|
|
return data
|
|
except (json.JSONDecodeError, KeyError) as e:
|
|
print(f"Warning: Could not parse existing manifest: {e}", file=sys.stderr)
|
|
|
|
return {}
|
|
|
|
|
|
def scan_component_files(site_dir: Path, repo_root: Path) -> List[Dict[str, Any]]:
|
|
"""Scan all component JSON files and extract printedParts with GitHub URLs."""
|
|
entries = []
|
|
|
|
if not site_dir.exists():
|
|
print(f"Error: Site directory does not exist: {site_dir}", file=sys.stderr)
|
|
return entries
|
|
|
|
for json_file in site_dir.glob('*.json'):
|
|
try:
|
|
with open(json_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
parts = find_printed_parts(data, str(json_file))
|
|
|
|
for item in parts:
|
|
part = item['part']
|
|
url = part.get('url')
|
|
|
|
if not url:
|
|
continue
|
|
|
|
github_info = parse_github_url(url)
|
|
if not github_info:
|
|
print(f"Warning: Skipping non-GitHub URL in {json_file}: {url}", file=sys.stderr)
|
|
continue
|
|
|
|
part_id = item['part_id']
|
|
manifest_id = generate_manifest_id(
|
|
part_id,
|
|
github_info['owner'],
|
|
github_info['repo'],
|
|
github_info['path']
|
|
)
|
|
|
|
local_path = generate_local_path(
|
|
github_info['owner'],
|
|
github_info['repo'],
|
|
github_info['path']
|
|
)
|
|
|
|
# Store relative path from repo root
|
|
try:
|
|
json_file_rel = json_file.relative_to(repo_root)
|
|
except ValueError:
|
|
# If not relative, use absolute path
|
|
json_file_rel = json_file
|
|
|
|
entries.append({
|
|
'manifest_id': manifest_id,
|
|
'part_id': part_id,
|
|
'part': part,
|
|
'json_file': str(json_file_rel),
|
|
'github_info': github_info,
|
|
'local_path': local_path
|
|
})
|
|
|
|
except (json.JSONDecodeError, IOError) as e:
|
|
print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
|
|
continue
|
|
|
|
return entries
|
|
|
|
|
|
def create_or_update_manifest_entry(
|
|
existing_entry: Optional[Dict],
|
|
new_data: Dict[str, Any]
|
|
) -> Dict[str, Any]:
|
|
"""Create new manifest entry or merge with existing."""
|
|
github_info = new_data['github_info']
|
|
manifest_id = new_data['manifest_id']
|
|
|
|
if existing_entry:
|
|
# Merge: keep existing pinned data, update source info if changed
|
|
entry = existing_entry.copy()
|
|
entry['source_repo'] = f"{github_info['owner']}/{github_info['repo']}"
|
|
entry['source_path'] = github_info['path']
|
|
entry['source_ref'] = github_info.get('ref', 'main')
|
|
entry['local_path'] = new_data['local_path']
|
|
entry['orig_site_json'] = new_data['json_file']
|
|
entry['orig_item_id'] = new_data['part_id']
|
|
# Don't overwrite pinned_sha, checksum, etc. if they exist
|
|
return entry
|
|
|
|
# Create new entry
|
|
return {
|
|
'id': manifest_id,
|
|
'source_repo': f"{github_info['owner']}/{github_info['repo']}",
|
|
'source_path': github_info['path'],
|
|
'source_ref': github_info.get('ref', 'main'),
|
|
'pinned_sha': None,
|
|
'pinned_raw_url': None,
|
|
'local_path': new_data['local_path'],
|
|
'checksum_sha256': None,
|
|
'last_checked': None,
|
|
'upstream_latest_sha': None,
|
|
'status': 'unknown',
|
|
'license': None,
|
|
'orig_site_json': new_data['json_file'],
|
|
'orig_item_id': new_data['part_id']
|
|
}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Generate vendor manifest from site component JSON files'
|
|
)
|
|
parser.add_argument(
|
|
'--site-dir',
|
|
type=Path,
|
|
default=Path('website/src/data/components'),
|
|
help='Directory containing component JSON files (default: website/src/data/components)'
|
|
)
|
|
parser.add_argument(
|
|
'--manifest',
|
|
type=Path,
|
|
default=Path('manifest/vendor_manifest.json'),
|
|
help='Path to manifest file (default: manifest/vendor_manifest.json)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Resolve paths relative to script location or current directory
|
|
script_dir = Path(__file__).parent.parent
|
|
site_dir = (script_dir / args.site_dir).resolve()
|
|
manifest_path = (script_dir / args.manifest).resolve()
|
|
|
|
# Ensure manifest directory exists
|
|
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Load existing manifest
|
|
existing_manifest = load_existing_manifest(manifest_path)
|
|
|
|
# Scan component files
|
|
print(f"Scanning component files in {site_dir}...")
|
|
entries = scan_component_files(site_dir, repo_root=script_dir)
|
|
|
|
if not entries:
|
|
print("No GitHub URLs found in component files.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Create or update manifest entries
|
|
updated_manifest = existing_manifest.copy()
|
|
|
|
for entry_data in entries:
|
|
manifest_id = entry_data['manifest_id']
|
|
existing_entry = updated_manifest.get(manifest_id)
|
|
|
|
new_entry = create_or_update_manifest_entry(existing_entry, entry_data)
|
|
updated_manifest[manifest_id] = new_entry
|
|
|
|
# Convert to sorted list for deterministic output
|
|
manifest_list = sorted(updated_manifest.values(), key=lambda x: x['id'])
|
|
|
|
# Write manifest
|
|
print(f"Writing manifest to {manifest_path}...")
|
|
with open(manifest_path, 'w', encoding='utf-8') as f:
|
|
json.dump(manifest_list, f, indent=2, sort_keys=False)
|
|
|
|
print(f"Generated {len(manifest_list)} manifest entries.")
|
|
|
|
# Show summary
|
|
new_entries = len(manifest_list) - len(existing_manifest)
|
|
if new_entries > 0:
|
|
print(f"Added {new_entries} new entries.")
|
|
if len(existing_manifest) > 0:
|
|
print(f"Updated {len(existing_manifest)} existing entries.")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|