Update package version to 0.0.1-beta, add new dependencies including ExcelJS, and refactor export utilities to utilize ExcelJS for Excel file generation. Enhance component JSON files with vendor information for improved asset management.
This commit is contained in:
327
scripts/generate_manifest_from_site.py
Executable file
327
scripts/generate_manifest_from_site.py
Executable file
@@ -0,0 +1,327 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate vendor manifest from site component JSON files.
|
||||
|
||||
Scans /src/data/components/*.json for printedParts entries with GitHub URLs
|
||||
and creates or updates manifest/vendor_manifest.json.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from urllib.parse import urlparse, parse_qs, unquote
|
||||
|
||||
|
||||
def parse_github_url(url: str) -> Optional[Dict[str, str]]:
|
||||
"""
|
||||
Parse GitHub URL to extract owner, repo, path, and ref.
|
||||
|
||||
Supports:
|
||||
- https://github.com/owner/repo/blob/<ref>/path/to/file
|
||||
- https://github.com/owner/repo/raw/<ref>/path/to/file
|
||||
- https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
|
||||
"""
|
||||
if not url or not isinstance(url, str):
|
||||
return None
|
||||
|
||||
# Check if it's a GitHub URL
|
||||
if 'github.com' not in url:
|
||||
return None
|
||||
|
||||
# Handle raw.githubusercontent.com
|
||||
if 'raw.githubusercontent.com' in url:
|
||||
match = re.match(r'https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)', url)
|
||||
if match:
|
||||
owner, repo, ref, path = match.groups()
|
||||
return {
|
||||
'owner': owner,
|
||||
'repo': repo,
|
||||
'ref': ref,
|
||||
'path': unquote(path).split('?')[0] # Remove query params
|
||||
}
|
||||
|
||||
# Handle github.com URLs
|
||||
parsed = urlparse(url)
|
||||
path_parts = parsed.path.strip('/').split('/')
|
||||
|
||||
if len(path_parts) < 5:
|
||||
return None
|
||||
|
||||
owner = path_parts[0]
|
||||
repo = path_parts[1]
|
||||
mode = path_parts[2] # 'blob' or 'raw'
|
||||
ref = path_parts[3]
|
||||
|
||||
# Get file path (everything after ref)
|
||||
file_path = '/'.join(path_parts[4:])
|
||||
|
||||
# Remove query params from path
|
||||
file_path = unquote(file_path).split('?')[0]
|
||||
|
||||
# Handle ?raw=true in query params (sometimes used with blob URLs)
|
||||
query_params = parse_qs(parsed.query)
|
||||
if 'raw' in query_params or mode == 'raw':
|
||||
return {
|
||||
'owner': owner,
|
||||
'repo': repo,
|
||||
'ref': ref,
|
||||
'path': file_path
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def find_printed_parts(data: Any, path: str = '') -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Recursively find all printedParts entries in nested JSON structure.
|
||||
Returns list of (part_dict, json_file_path, part_id) tuples.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
if isinstance(data, dict):
|
||||
# Check if this dict has a 'printedParts' key
|
||||
if 'printedParts' in data:
|
||||
for part in data['printedParts']:
|
||||
if isinstance(part, dict) and 'id' in part:
|
||||
parts.append({
|
||||
'part': part,
|
||||
'json_path': path,
|
||||
'part_id': part.get('id')
|
||||
})
|
||||
|
||||
# Also check for 'bodyParts', 'knobs', etc. that might contain parts
|
||||
for key in ['bodyParts', 'knobs']:
|
||||
if key in data and isinstance(data[key], list):
|
||||
for part in data[key]:
|
||||
if isinstance(part, dict) and 'id' in part:
|
||||
parts.append({
|
||||
'part': part,
|
||||
'json_path': path,
|
||||
'part_id': part.get('id')
|
||||
})
|
||||
|
||||
# Recursively search nested structures
|
||||
for key, value in data.items():
|
||||
if isinstance(value, (dict, list)):
|
||||
parts.extend(find_printed_parts(value, path))
|
||||
|
||||
elif isinstance(data, list):
|
||||
for item in data:
|
||||
parts.extend(find_printed_parts(item, path))
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def generate_manifest_id(part_id: str, owner: str, repo: str, path: str) -> str:
|
||||
"""Generate a manifest ID from part ID or create one from repo/path."""
|
||||
if part_id:
|
||||
return part_id
|
||||
|
||||
# Generate slug from owner-repo-path
|
||||
slug = f"{owner}-{repo}-{path.replace('/', '-').replace(' ', '-')}"
|
||||
# Remove special chars
|
||||
slug = re.sub(r'[^a-zA-Z0-9_-]', '', slug)
|
||||
return slug[:100] # Limit length
|
||||
|
||||
|
||||
def generate_local_path(owner: str, repo: str, path: str) -> str:
|
||||
"""Generate local vendor path from owner, repo, and file path."""
|
||||
repo_dir = f"{owner}-{repo}"
|
||||
return f"vendor/{repo_dir}/{path}"
|
||||
|
||||
|
||||
def load_existing_manifest(manifest_path: Path) -> Dict[str, Dict]:
|
||||
"""Load existing manifest or return empty dict."""
|
||||
if manifest_path.exists():
|
||||
try:
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
# Convert list to dict keyed by id
|
||||
if isinstance(data, list):
|
||||
return {entry['id']: entry for entry in data}
|
||||
elif isinstance(data, dict) and 'entries' in data:
|
||||
return {entry['id']: entry for entry in data['entries']}
|
||||
elif isinstance(data, dict):
|
||||
# Assume it's already keyed by id
|
||||
return data
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
print(f"Warning: Could not parse existing manifest: {e}", file=sys.stderr)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def scan_component_files(site_dir: Path, repo_root: Path) -> List[Dict[str, Any]]:
|
||||
"""Scan all component JSON files and extract printedParts with GitHub URLs."""
|
||||
entries = []
|
||||
|
||||
if not site_dir.exists():
|
||||
print(f"Error: Site directory does not exist: {site_dir}", file=sys.stderr)
|
||||
return entries
|
||||
|
||||
for json_file in site_dir.glob('*.json'):
|
||||
try:
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
parts = find_printed_parts(data, str(json_file))
|
||||
|
||||
for item in parts:
|
||||
part = item['part']
|
||||
url = part.get('url')
|
||||
|
||||
if not url:
|
||||
continue
|
||||
|
||||
github_info = parse_github_url(url)
|
||||
if not github_info:
|
||||
print(f"Warning: Skipping non-GitHub URL in {json_file}: {url}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
part_id = item['part_id']
|
||||
manifest_id = generate_manifest_id(
|
||||
part_id,
|
||||
github_info['owner'],
|
||||
github_info['repo'],
|
||||
github_info['path']
|
||||
)
|
||||
|
||||
local_path = generate_local_path(
|
||||
github_info['owner'],
|
||||
github_info['repo'],
|
||||
github_info['path']
|
||||
)
|
||||
|
||||
# Store relative path from repo root
|
||||
try:
|
||||
json_file_rel = json_file.relative_to(repo_root)
|
||||
except ValueError:
|
||||
# If not relative, use absolute path
|
||||
json_file_rel = json_file
|
||||
|
||||
entries.append({
|
||||
'manifest_id': manifest_id,
|
||||
'part_id': part_id,
|
||||
'part': part,
|
||||
'json_file': str(json_file_rel),
|
||||
'github_info': github_info,
|
||||
'local_path': local_path
|
||||
})
|
||||
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def create_or_update_manifest_entry(
|
||||
existing_entry: Optional[Dict],
|
||||
new_data: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""Create new manifest entry or merge with existing."""
|
||||
github_info = new_data['github_info']
|
||||
manifest_id = new_data['manifest_id']
|
||||
|
||||
if existing_entry:
|
||||
# Merge: keep existing pinned data, update source info if changed
|
||||
entry = existing_entry.copy()
|
||||
entry['source_repo'] = f"{github_info['owner']}/{github_info['repo']}"
|
||||
entry['source_path'] = github_info['path']
|
||||
entry['source_ref'] = github_info.get('ref', 'main')
|
||||
entry['local_path'] = new_data['local_path']
|
||||
entry['orig_site_json'] = new_data['json_file']
|
||||
entry['orig_item_id'] = new_data['part_id']
|
||||
# Don't overwrite pinned_sha, checksum, etc. if they exist
|
||||
return entry
|
||||
|
||||
# Create new entry
|
||||
return {
|
||||
'id': manifest_id,
|
||||
'source_repo': f"{github_info['owner']}/{github_info['repo']}",
|
||||
'source_path': github_info['path'],
|
||||
'source_ref': github_info.get('ref', 'main'),
|
||||
'pinned_sha': None,
|
||||
'pinned_raw_url': None,
|
||||
'local_path': new_data['local_path'],
|
||||
'checksum_sha256': None,
|
||||
'last_checked': None,
|
||||
'upstream_latest_sha': None,
|
||||
'status': 'unknown',
|
||||
'license': None,
|
||||
'orig_site_json': new_data['json_file'],
|
||||
'orig_item_id': new_data['part_id']
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate vendor manifest from site component JSON files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--site-dir',
|
||||
type=Path,
|
||||
default=Path('website/src/data/components'),
|
||||
help='Directory containing component JSON files (default: website/src/data/components)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--manifest',
|
||||
type=Path,
|
||||
default=Path('manifest/vendor_manifest.json'),
|
||||
help='Path to manifest file (default: manifest/vendor_manifest.json)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve paths relative to script location or current directory
|
||||
script_dir = Path(__file__).parent.parent
|
||||
site_dir = (script_dir / args.site_dir).resolve()
|
||||
manifest_path = (script_dir / args.manifest).resolve()
|
||||
|
||||
# Ensure manifest directory exists
|
||||
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Load existing manifest
|
||||
existing_manifest = load_existing_manifest(manifest_path)
|
||||
|
||||
# Scan component files
|
||||
print(f"Scanning component files in {site_dir}...")
|
||||
entries = scan_component_files(site_dir, repo_root=script_dir)
|
||||
|
||||
if not entries:
|
||||
print("No GitHub URLs found in component files.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Create or update manifest entries
|
||||
updated_manifest = existing_manifest.copy()
|
||||
|
||||
for entry_data in entries:
|
||||
manifest_id = entry_data['manifest_id']
|
||||
existing_entry = updated_manifest.get(manifest_id)
|
||||
|
||||
new_entry = create_or_update_manifest_entry(existing_entry, entry_data)
|
||||
updated_manifest[manifest_id] = new_entry
|
||||
|
||||
# Convert to sorted list for deterministic output
|
||||
manifest_list = sorted(updated_manifest.values(), key=lambda x: x['id'])
|
||||
|
||||
# Write manifest
|
||||
print(f"Writing manifest to {manifest_path}...")
|
||||
with open(manifest_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(manifest_list, f, indent=2, sort_keys=False)
|
||||
|
||||
print(f"Generated {len(manifest_list)} manifest entries.")
|
||||
|
||||
# Show summary
|
||||
new_entries = len(manifest_list) - len(existing_manifest)
|
||||
if new_entries > 0:
|
||||
print(f"Added {new_entries} new entries.")
|
||||
if len(existing_manifest) > 0:
|
||||
print(f"Updated {len(existing_manifest)} existing entries.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user