Update package version to 0.0.1-beta, add new dependencies including ExcelJS, and refactor export utilities to utilize ExcelJS for Excel file generation. Enhance component JSON files with vendor information for improved asset management.

This commit is contained in:
2026-01-07 02:12:12 +00:00
parent 4bc0fd203f
commit 97d2b66f02
33 changed files with 4394 additions and 1088 deletions

View File

@@ -0,0 +1,327 @@
#!/usr/bin/env python3
"""
Generate vendor manifest from site component JSON files.
Scans /src/data/components/*.json for printedParts entries with GitHub URLs
and creates or updates manifest/vendor_manifest.json.
"""
import argparse
import json
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Any
from urllib.parse import urlparse, parse_qs, unquote
def parse_github_url(url: str) -> Optional[Dict[str, str]]:
"""
Parse GitHub URL to extract owner, repo, path, and ref.
Supports:
- https://github.com/owner/repo/blob/<ref>/path/to/file
- https://github.com/owner/repo/raw/<ref>/path/to/file
- https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
"""
if not url or not isinstance(url, str):
return None
# Check if it's a GitHub URL
if 'github.com' not in url:
return None
# Handle raw.githubusercontent.com
if 'raw.githubusercontent.com' in url:
match = re.match(r'https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)', url)
if match:
owner, repo, ref, path = match.groups()
return {
'owner': owner,
'repo': repo,
'ref': ref,
'path': unquote(path).split('?')[0] # Remove query params
}
# Handle github.com URLs
parsed = urlparse(url)
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) < 5:
return None
owner = path_parts[0]
repo = path_parts[1]
mode = path_parts[2] # 'blob' or 'raw'
ref = path_parts[3]
# Get file path (everything after ref)
file_path = '/'.join(path_parts[4:])
# Remove query params from path
file_path = unquote(file_path).split('?')[0]
# Handle ?raw=true in query params (sometimes used with blob URLs)
query_params = parse_qs(parsed.query)
if 'raw' in query_params or mode == 'raw':
return {
'owner': owner,
'repo': repo,
'ref': ref,
'path': file_path
}
return None
def find_printed_parts(data: Any, path: str = '') -> List[Dict[str, Any]]:
"""
Recursively find all printedParts entries in nested JSON structure.
Returns list of (part_dict, json_file_path, part_id) tuples.
"""
parts = []
if isinstance(data, dict):
# Check if this dict has a 'printedParts' key
if 'printedParts' in data:
for part in data['printedParts']:
if isinstance(part, dict) and 'id' in part:
parts.append({
'part': part,
'json_path': path,
'part_id': part.get('id')
})
# Also check for 'bodyParts', 'knobs', etc. that might contain parts
for key in ['bodyParts', 'knobs']:
if key in data and isinstance(data[key], list):
for part in data[key]:
if isinstance(part, dict) and 'id' in part:
parts.append({
'part': part,
'json_path': path,
'part_id': part.get('id')
})
# Recursively search nested structures
for key, value in data.items():
if isinstance(value, (dict, list)):
parts.extend(find_printed_parts(value, path))
elif isinstance(data, list):
for item in data:
parts.extend(find_printed_parts(item, path))
return parts
def generate_manifest_id(part_id: str, owner: str, repo: str, path: str) -> str:
"""Generate a manifest ID from part ID or create one from repo/path."""
if part_id:
return part_id
# Generate slug from owner-repo-path
slug = f"{owner}-{repo}-{path.replace('/', '-').replace(' ', '-')}"
# Remove special chars
slug = re.sub(r'[^a-zA-Z0-9_-]', '', slug)
return slug[:100] # Limit length
def generate_local_path(owner: str, repo: str, path: str) -> str:
"""Generate local vendor path from owner, repo, and file path."""
repo_dir = f"{owner}-{repo}"
return f"vendor/{repo_dir}/{path}"
def load_existing_manifest(manifest_path: Path) -> Dict[str, Dict]:
"""Load existing manifest or return empty dict."""
if manifest_path.exists():
try:
with open(manifest_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Convert list to dict keyed by id
if isinstance(data, list):
return {entry['id']: entry for entry in data}
elif isinstance(data, dict) and 'entries' in data:
return {entry['id']: entry for entry in data['entries']}
elif isinstance(data, dict):
# Assume it's already keyed by id
return data
except (json.JSONDecodeError, KeyError) as e:
print(f"Warning: Could not parse existing manifest: {e}", file=sys.stderr)
return {}
def scan_component_files(site_dir: Path, repo_root: Path) -> List[Dict[str, Any]]:
"""Scan all component JSON files and extract printedParts with GitHub URLs."""
entries = []
if not site_dir.exists():
print(f"Error: Site directory does not exist: {site_dir}", file=sys.stderr)
return entries
for json_file in site_dir.glob('*.json'):
try:
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
parts = find_printed_parts(data, str(json_file))
for item in parts:
part = item['part']
url = part.get('url')
if not url:
continue
github_info = parse_github_url(url)
if not github_info:
print(f"Warning: Skipping non-GitHub URL in {json_file}: {url}", file=sys.stderr)
continue
part_id = item['part_id']
manifest_id = generate_manifest_id(
part_id,
github_info['owner'],
github_info['repo'],
github_info['path']
)
local_path = generate_local_path(
github_info['owner'],
github_info['repo'],
github_info['path']
)
# Store relative path from repo root
try:
json_file_rel = json_file.relative_to(repo_root)
except ValueError:
# If not relative, use absolute path
json_file_rel = json_file
entries.append({
'manifest_id': manifest_id,
'part_id': part_id,
'part': part,
'json_file': str(json_file_rel),
'github_info': github_info,
'local_path': local_path
})
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
continue
return entries
def create_or_update_manifest_entry(
existing_entry: Optional[Dict],
new_data: Dict[str, Any]
) -> Dict[str, Any]:
"""Create new manifest entry or merge with existing."""
github_info = new_data['github_info']
manifest_id = new_data['manifest_id']
if existing_entry:
# Merge: keep existing pinned data, update source info if changed
entry = existing_entry.copy()
entry['source_repo'] = f"{github_info['owner']}/{github_info['repo']}"
entry['source_path'] = github_info['path']
entry['source_ref'] = github_info.get('ref', 'main')
entry['local_path'] = new_data['local_path']
entry['orig_site_json'] = new_data['json_file']
entry['orig_item_id'] = new_data['part_id']
# Don't overwrite pinned_sha, checksum, etc. if they exist
return entry
# Create new entry
return {
'id': manifest_id,
'source_repo': f"{github_info['owner']}/{github_info['repo']}",
'source_path': github_info['path'],
'source_ref': github_info.get('ref', 'main'),
'pinned_sha': None,
'pinned_raw_url': None,
'local_path': new_data['local_path'],
'checksum_sha256': None,
'last_checked': None,
'upstream_latest_sha': None,
'status': 'unknown',
'license': None,
'orig_site_json': new_data['json_file'],
'orig_item_id': new_data['part_id']
}
def main():
parser = argparse.ArgumentParser(
description='Generate vendor manifest from site component JSON files'
)
parser.add_argument(
'--site-dir',
type=Path,
default=Path('website/src/data/components'),
help='Directory containing component JSON files (default: website/src/data/components)'
)
parser.add_argument(
'--manifest',
type=Path,
default=Path('manifest/vendor_manifest.json'),
help='Path to manifest file (default: manifest/vendor_manifest.json)'
)
args = parser.parse_args()
# Resolve paths relative to script location or current directory
script_dir = Path(__file__).parent.parent
site_dir = (script_dir / args.site_dir).resolve()
manifest_path = (script_dir / args.manifest).resolve()
# Ensure manifest directory exists
manifest_path.parent.mkdir(parents=True, exist_ok=True)
# Load existing manifest
existing_manifest = load_existing_manifest(manifest_path)
# Scan component files
print(f"Scanning component files in {site_dir}...")
entries = scan_component_files(site_dir, repo_root=script_dir)
if not entries:
print("No GitHub URLs found in component files.", file=sys.stderr)
sys.exit(1)
# Create or update manifest entries
updated_manifest = existing_manifest.copy()
for entry_data in entries:
manifest_id = entry_data['manifest_id']
existing_entry = updated_manifest.get(manifest_id)
new_entry = create_or_update_manifest_entry(existing_entry, entry_data)
updated_manifest[manifest_id] = new_entry
# Convert to sorted list for deterministic output
manifest_list = sorted(updated_manifest.values(), key=lambda x: x['id'])
# Write manifest
print(f"Writing manifest to {manifest_path}...")
with open(manifest_path, 'w', encoding='utf-8') as f:
json.dump(manifest_list, f, indent=2, sort_keys=False)
print(f"Generated {len(manifest_list)} manifest entries.")
# Show summary
new_entries = len(manifest_list) - len(existing_manifest)
if new_entries > 0:
print(f"Added {new_entries} new entries.")
if len(existing_manifest) > 0:
print(f"Updated {len(existing_manifest)} existing entries.")
if __name__ == '__main__':
main()