Update package version to 0.0.1-beta, add new dependencies including ExcelJS, and refactor export utilities to utilize ExcelJS for Excel file generation. Enhance component JSON files with vendor information for improved asset management.
This commit is contained in:
BIN
scripts/__pycache__/check_updates.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/check_updates.cpython-312.pyc
Normal file
Binary file not shown.
BIN
scripts/__pycache__/generate_manifest_from_site.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/generate_manifest_from_site.cpython-312.pyc
Normal file
Binary file not shown.
BIN
scripts/__pycache__/vendor_update.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/vendor_update.cpython-312.pyc
Normal file
Binary file not shown.
268
scripts/check_updates.py
Executable file
268
scripts/check_updates.py
Executable file
@@ -0,0 +1,268 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Check for upstream updates to vendored files.
|
||||
|
||||
Queries GitHub API to detect if upstream files have changed since
|
||||
they were pinned. Produces a report of up-to-date and out-of-date entries.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class GitHubAPI:
|
||||
"""Simple GitHub API client for checking updates."""
|
||||
|
||||
def __init__(self, token: Optional[str] = None, delay: float = 0.5):
|
||||
self.token = token or os.getenv('GITHUB_API_TOKEN') or os.getenv('GITHUB_TOKEN')
|
||||
self.session = requests.Session()
|
||||
if self.token:
|
||||
self.session.headers.update({
|
||||
'Authorization': f'token {self.token}',
|
||||
'Accept': 'application/vnd.github.v3+json'
|
||||
})
|
||||
self.base_url = 'https://api.github.com'
|
||||
self.delay = delay # Delay between requests in seconds
|
||||
self.last_request_time = 0
|
||||
|
||||
def _wait_for_rate_limit(self, response: requests.Response) -> None:
|
||||
"""Wait if rate limited, using reset time from headers."""
|
||||
if response.status_code == 403:
|
||||
# Check if it's a rate limit error
|
||||
rate_limit_remaining = response.headers.get('X-RateLimit-Remaining', '1')
|
||||
if rate_limit_remaining == '0' or 'rate limit' in response.text.lower():
|
||||
reset_time = response.headers.get('X-RateLimit-Reset')
|
||||
if reset_time:
|
||||
reset_timestamp = int(reset_time)
|
||||
wait_seconds = max(0, reset_timestamp - int(time.time())) + 1
|
||||
print(f" Rate limit exceeded. Waiting {wait_seconds} seconds until reset...", file=sys.stderr)
|
||||
time.sleep(wait_seconds)
|
||||
else:
|
||||
# Fallback: wait 60 seconds
|
||||
print(" Rate limit exceeded. Waiting 60 seconds...", file=sys.stderr)
|
||||
time.sleep(60)
|
||||
|
||||
def _rate_limit_delay(self) -> None:
|
||||
"""Add delay between requests to avoid hitting rate limits."""
|
||||
current_time = time.time()
|
||||
time_since_last = current_time - self.last_request_time
|
||||
if time_since_last < self.delay:
|
||||
time.sleep(self.delay - time_since_last)
|
||||
self.last_request_time = time.time()
|
||||
|
||||
def _make_request(self, method: str, url: str, max_retries: int = 3, **kwargs) -> requests.Response:
|
||||
"""Make a request with rate limit handling and retries."""
|
||||
for attempt in range(max_retries):
|
||||
self._rate_limit_delay()
|
||||
|
||||
try:
|
||||
response = self.session.request(method, url, **kwargs)
|
||||
|
||||
# Check rate limit
|
||||
if response.status_code == 403:
|
||||
self._wait_for_rate_limit(response)
|
||||
# Retry the request after waiting
|
||||
if attempt < max_retries - 1:
|
||||
continue
|
||||
|
||||
# Check remaining rate limit
|
||||
remaining = response.headers.get('X-RateLimit-Remaining')
|
||||
if remaining:
|
||||
remaining_int = int(remaining)
|
||||
if remaining_int < 10:
|
||||
print(f" Warning: Only {remaining_int} API requests remaining. Adding delay...", file=sys.stderr)
|
||||
time.sleep(2)
|
||||
|
||||
return response
|
||||
|
||||
except requests.RequestException as e:
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 2 ** attempt # Exponential backoff
|
||||
print(f" Request failed, retrying in {wait_time}s... ({e})", file=sys.stderr)
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
raise
|
||||
|
||||
return response
|
||||
|
||||
def get_latest_commit_sha(self, owner: str, repo: str, path: str, ref: str) -> Optional[str]:
|
||||
"""
|
||||
Get the latest commit SHA that modified a file at the given ref.
|
||||
"""
|
||||
commits_url = f"{self.base_url}/repos/{owner}/{repo}/commits"
|
||||
params = {
|
||||
'path': path,
|
||||
'sha': ref,
|
||||
'per_page': 1
|
||||
}
|
||||
|
||||
try:
|
||||
response = self._make_request('GET', commits_url, params=params)
|
||||
response.raise_for_status()
|
||||
commits = response.json()
|
||||
|
||||
if commits:
|
||||
return commits[0]['sha']
|
||||
|
||||
# If no commits found, try to resolve the ref to a SHA
|
||||
# Check if ref is already a SHA
|
||||
if len(ref) == 40 and all(c in '0123456789abcdef' for c in ref.lower()):
|
||||
return ref
|
||||
|
||||
# Try to resolve branch/tag to SHA
|
||||
ref_url = f"{self.base_url}/repos/{owner}/{repo}/git/ref/heads/{ref}"
|
||||
ref_response = self._make_request('GET', ref_url)
|
||||
if ref_response.status_code == 200:
|
||||
return ref_response.json()['object']['sha']
|
||||
|
||||
# Try tag
|
||||
ref_url = f"{self.base_url}/repos/{owner}/{repo}/git/ref/tags/{ref}"
|
||||
ref_response = self._make_request('GET', ref_url)
|
||||
if ref_response.status_code == 200:
|
||||
return ref_response.json()['object']['sha']
|
||||
|
||||
return None
|
||||
|
||||
except requests.RequestException as e:
|
||||
print(f"Error checking updates for {owner}/{repo}/{path}@{ref}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def check_entry(entry: Dict, api: GitHubAPI) -> Dict:
|
||||
"""Check a single manifest entry for updates."""
|
||||
source_repo = entry['source_repo']
|
||||
owner, repo = source_repo.split('/', 1)
|
||||
source_path = entry['source_path']
|
||||
source_ref = entry.get('source_ref', 'main')
|
||||
pinned_sha = entry.get('pinned_sha')
|
||||
|
||||
# Get latest commit SHA
|
||||
latest_sha = api.get_latest_commit_sha(owner, repo, source_path, source_ref)
|
||||
|
||||
if not latest_sha:
|
||||
entry['status'] = 'unknown'
|
||||
entry['upstream_latest_sha'] = None
|
||||
return entry
|
||||
|
||||
# Update upstream_latest_sha
|
||||
entry['upstream_latest_sha'] = latest_sha
|
||||
entry['last_checked'] = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
# Compare with pinned SHA
|
||||
if not pinned_sha:
|
||||
entry['status'] = 'unknown'
|
||||
elif latest_sha == pinned_sha:
|
||||
entry['status'] = 'up-to-date'
|
||||
else:
|
||||
entry['status'] = 'out-of-date'
|
||||
|
||||
return entry
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Check for upstream updates to vendored files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--manifest',
|
||||
type=Path,
|
||||
default=Path('manifest/vendor_manifest.json'),
|
||||
help='Path to manifest file (default: manifest/vendor_manifest.json)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
type=Path,
|
||||
help='Path to write report JSON (optional)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--delay',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='Delay between API requests in seconds (default: 0.5)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve paths
|
||||
script_dir = Path(__file__).parent.parent
|
||||
manifest_path = (script_dir / args.manifest).resolve()
|
||||
|
||||
if not manifest_path.exists():
|
||||
print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load manifest
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
manifest_data = json.load(f)
|
||||
|
||||
# Convert to list if it's a dict
|
||||
if isinstance(manifest_data, dict):
|
||||
manifest_list = list(manifest_data.values())
|
||||
else:
|
||||
manifest_list = manifest_data
|
||||
|
||||
# Initialize GitHub API with delay
|
||||
api = GitHubAPI(delay=args.delay)
|
||||
|
||||
# Check each entry
|
||||
print("Checking for upstream updates...")
|
||||
updated_entries = []
|
||||
out_of_date_count = 0
|
||||
|
||||
for entry in manifest_list:
|
||||
updated_entry = check_entry(entry, api)
|
||||
updated_entries.append(updated_entry)
|
||||
|
||||
if updated_entry['status'] == 'out-of-date':
|
||||
out_of_date_count += 1
|
||||
print(f" ⚠️ {updated_entry['id']}: OUT-OF-DATE")
|
||||
print(f" Pinned: {updated_entry.get('pinned_sha', 'N/A')[:8]}...")
|
||||
print(f" Latest: {updated_entry.get('upstream_latest_sha', 'N/A')[:8]}...")
|
||||
elif updated_entry['status'] == 'up-to-date':
|
||||
print(f" ✓ {updated_entry['id']}: up-to-date")
|
||||
else:
|
||||
print(f" ? {updated_entry['id']}: {updated_entry['status']}")
|
||||
|
||||
# Create report
|
||||
report = {
|
||||
'generated_at': datetime.now(timezone.utc).isoformat(),
|
||||
'total_entries': len(updated_entries),
|
||||
'up_to_date': sum(1 for e in updated_entries if e['status'] == 'up-to-date'),
|
||||
'out_of_date': out_of_date_count,
|
||||
'unknown': sum(1 for e in updated_entries if e['status'] == 'unknown'),
|
||||
'entries': updated_entries
|
||||
}
|
||||
|
||||
# Write report if requested
|
||||
if args.output:
|
||||
output_path = (script_dir / args.output).resolve()
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(report, f, indent=2, sort_keys=False)
|
||||
print(f"\nReport written to {output_path}")
|
||||
|
||||
# Print summary
|
||||
print(f"\nSummary:")
|
||||
print(f" Total entries: {report['total_entries']}")
|
||||
print(f" Up-to-date: {report['up_to_date']}")
|
||||
print(f" Out-of-date: {report['out_of_date']}")
|
||||
print(f" Unknown: {report['unknown']}")
|
||||
|
||||
# Exit with non-zero code if any entries are out-of-date
|
||||
if out_of_date_count > 0:
|
||||
print(f"\n⚠️ {out_of_date_count} entries need updates!", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("\n✓ All entries are up-to-date.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
327
scripts/generate_manifest_from_site.py
Executable file
327
scripts/generate_manifest_from_site.py
Executable file
@@ -0,0 +1,327 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate vendor manifest from site component JSON files.
|
||||
|
||||
Scans /src/data/components/*.json for printedParts entries with GitHub URLs
|
||||
and creates or updates manifest/vendor_manifest.json.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from urllib.parse import urlparse, parse_qs, unquote
|
||||
|
||||
|
||||
def parse_github_url(url: str) -> Optional[Dict[str, str]]:
|
||||
"""
|
||||
Parse GitHub URL to extract owner, repo, path, and ref.
|
||||
|
||||
Supports:
|
||||
- https://github.com/owner/repo/blob/<ref>/path/to/file
|
||||
- https://github.com/owner/repo/raw/<ref>/path/to/file
|
||||
- https://raw.githubusercontent.com/owner/repo/<ref>/path/to/file
|
||||
"""
|
||||
if not url or not isinstance(url, str):
|
||||
return None
|
||||
|
||||
# Check if it's a GitHub URL
|
||||
if 'github.com' not in url:
|
||||
return None
|
||||
|
||||
# Handle raw.githubusercontent.com
|
||||
if 'raw.githubusercontent.com' in url:
|
||||
match = re.match(r'https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)', url)
|
||||
if match:
|
||||
owner, repo, ref, path = match.groups()
|
||||
return {
|
||||
'owner': owner,
|
||||
'repo': repo,
|
||||
'ref': ref,
|
||||
'path': unquote(path).split('?')[0] # Remove query params
|
||||
}
|
||||
|
||||
# Handle github.com URLs
|
||||
parsed = urlparse(url)
|
||||
path_parts = parsed.path.strip('/').split('/')
|
||||
|
||||
if len(path_parts) < 5:
|
||||
return None
|
||||
|
||||
owner = path_parts[0]
|
||||
repo = path_parts[1]
|
||||
mode = path_parts[2] # 'blob' or 'raw'
|
||||
ref = path_parts[3]
|
||||
|
||||
# Get file path (everything after ref)
|
||||
file_path = '/'.join(path_parts[4:])
|
||||
|
||||
# Remove query params from path
|
||||
file_path = unquote(file_path).split('?')[0]
|
||||
|
||||
# Handle ?raw=true in query params (sometimes used with blob URLs)
|
||||
query_params = parse_qs(parsed.query)
|
||||
if 'raw' in query_params or mode == 'raw':
|
||||
return {
|
||||
'owner': owner,
|
||||
'repo': repo,
|
||||
'ref': ref,
|
||||
'path': file_path
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def find_printed_parts(data: Any, path: str = '') -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Recursively find all printedParts entries in nested JSON structure.
|
||||
Returns list of (part_dict, json_file_path, part_id) tuples.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
if isinstance(data, dict):
|
||||
# Check if this dict has a 'printedParts' key
|
||||
if 'printedParts' in data:
|
||||
for part in data['printedParts']:
|
||||
if isinstance(part, dict) and 'id' in part:
|
||||
parts.append({
|
||||
'part': part,
|
||||
'json_path': path,
|
||||
'part_id': part.get('id')
|
||||
})
|
||||
|
||||
# Also check for 'bodyParts', 'knobs', etc. that might contain parts
|
||||
for key in ['bodyParts', 'knobs']:
|
||||
if key in data and isinstance(data[key], list):
|
||||
for part in data[key]:
|
||||
if isinstance(part, dict) and 'id' in part:
|
||||
parts.append({
|
||||
'part': part,
|
||||
'json_path': path,
|
||||
'part_id': part.get('id')
|
||||
})
|
||||
|
||||
# Recursively search nested structures
|
||||
for key, value in data.items():
|
||||
if isinstance(value, (dict, list)):
|
||||
parts.extend(find_printed_parts(value, path))
|
||||
|
||||
elif isinstance(data, list):
|
||||
for item in data:
|
||||
parts.extend(find_printed_parts(item, path))
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def generate_manifest_id(part_id: str, owner: str, repo: str, path: str) -> str:
|
||||
"""Generate a manifest ID from part ID or create one from repo/path."""
|
||||
if part_id:
|
||||
return part_id
|
||||
|
||||
# Generate slug from owner-repo-path
|
||||
slug = f"{owner}-{repo}-{path.replace('/', '-').replace(' ', '-')}"
|
||||
# Remove special chars
|
||||
slug = re.sub(r'[^a-zA-Z0-9_-]', '', slug)
|
||||
return slug[:100] # Limit length
|
||||
|
||||
|
||||
def generate_local_path(owner: str, repo: str, path: str) -> str:
|
||||
"""Generate local vendor path from owner, repo, and file path."""
|
||||
repo_dir = f"{owner}-{repo}"
|
||||
return f"vendor/{repo_dir}/{path}"
|
||||
|
||||
|
||||
def load_existing_manifest(manifest_path: Path) -> Dict[str, Dict]:
|
||||
"""Load existing manifest or return empty dict."""
|
||||
if manifest_path.exists():
|
||||
try:
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
# Convert list to dict keyed by id
|
||||
if isinstance(data, list):
|
||||
return {entry['id']: entry for entry in data}
|
||||
elif isinstance(data, dict) and 'entries' in data:
|
||||
return {entry['id']: entry for entry in data['entries']}
|
||||
elif isinstance(data, dict):
|
||||
# Assume it's already keyed by id
|
||||
return data
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
print(f"Warning: Could not parse existing manifest: {e}", file=sys.stderr)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def scan_component_files(site_dir: Path, repo_root: Path) -> List[Dict[str, Any]]:
|
||||
"""Scan all component JSON files and extract printedParts with GitHub URLs."""
|
||||
entries = []
|
||||
|
||||
if not site_dir.exists():
|
||||
print(f"Error: Site directory does not exist: {site_dir}", file=sys.stderr)
|
||||
return entries
|
||||
|
||||
for json_file in site_dir.glob('*.json'):
|
||||
try:
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
parts = find_printed_parts(data, str(json_file))
|
||||
|
||||
for item in parts:
|
||||
part = item['part']
|
||||
url = part.get('url')
|
||||
|
||||
if not url:
|
||||
continue
|
||||
|
||||
github_info = parse_github_url(url)
|
||||
if not github_info:
|
||||
print(f"Warning: Skipping non-GitHub URL in {json_file}: {url}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
part_id = item['part_id']
|
||||
manifest_id = generate_manifest_id(
|
||||
part_id,
|
||||
github_info['owner'],
|
||||
github_info['repo'],
|
||||
github_info['path']
|
||||
)
|
||||
|
||||
local_path = generate_local_path(
|
||||
github_info['owner'],
|
||||
github_info['repo'],
|
||||
github_info['path']
|
||||
)
|
||||
|
||||
# Store relative path from repo root
|
||||
try:
|
||||
json_file_rel = json_file.relative_to(repo_root)
|
||||
except ValueError:
|
||||
# If not relative, use absolute path
|
||||
json_file_rel = json_file
|
||||
|
||||
entries.append({
|
||||
'manifest_id': manifest_id,
|
||||
'part_id': part_id,
|
||||
'part': part,
|
||||
'json_file': str(json_file_rel),
|
||||
'github_info': github_info,
|
||||
'local_path': local_path
|
||||
})
|
||||
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
print(f"Warning: Could not read {json_file}: {e}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def create_or_update_manifest_entry(
|
||||
existing_entry: Optional[Dict],
|
||||
new_data: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""Create new manifest entry or merge with existing."""
|
||||
github_info = new_data['github_info']
|
||||
manifest_id = new_data['manifest_id']
|
||||
|
||||
if existing_entry:
|
||||
# Merge: keep existing pinned data, update source info if changed
|
||||
entry = existing_entry.copy()
|
||||
entry['source_repo'] = f"{github_info['owner']}/{github_info['repo']}"
|
||||
entry['source_path'] = github_info['path']
|
||||
entry['source_ref'] = github_info.get('ref', 'main')
|
||||
entry['local_path'] = new_data['local_path']
|
||||
entry['orig_site_json'] = new_data['json_file']
|
||||
entry['orig_item_id'] = new_data['part_id']
|
||||
# Don't overwrite pinned_sha, checksum, etc. if they exist
|
||||
return entry
|
||||
|
||||
# Create new entry
|
||||
return {
|
||||
'id': manifest_id,
|
||||
'source_repo': f"{github_info['owner']}/{github_info['repo']}",
|
||||
'source_path': github_info['path'],
|
||||
'source_ref': github_info.get('ref', 'main'),
|
||||
'pinned_sha': None,
|
||||
'pinned_raw_url': None,
|
||||
'local_path': new_data['local_path'],
|
||||
'checksum_sha256': None,
|
||||
'last_checked': None,
|
||||
'upstream_latest_sha': None,
|
||||
'status': 'unknown',
|
||||
'license': None,
|
||||
'orig_site_json': new_data['json_file'],
|
||||
'orig_item_id': new_data['part_id']
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate vendor manifest from site component JSON files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--site-dir',
|
||||
type=Path,
|
||||
default=Path('website/src/data/components'),
|
||||
help='Directory containing component JSON files (default: website/src/data/components)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--manifest',
|
||||
type=Path,
|
||||
default=Path('manifest/vendor_manifest.json'),
|
||||
help='Path to manifest file (default: manifest/vendor_manifest.json)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve paths relative to script location or current directory
|
||||
script_dir = Path(__file__).parent.parent
|
||||
site_dir = (script_dir / args.site_dir).resolve()
|
||||
manifest_path = (script_dir / args.manifest).resolve()
|
||||
|
||||
# Ensure manifest directory exists
|
||||
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Load existing manifest
|
||||
existing_manifest = load_existing_manifest(manifest_path)
|
||||
|
||||
# Scan component files
|
||||
print(f"Scanning component files in {site_dir}...")
|
||||
entries = scan_component_files(site_dir, repo_root=script_dir)
|
||||
|
||||
if not entries:
|
||||
print("No GitHub URLs found in component files.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Create or update manifest entries
|
||||
updated_manifest = existing_manifest.copy()
|
||||
|
||||
for entry_data in entries:
|
||||
manifest_id = entry_data['manifest_id']
|
||||
existing_entry = updated_manifest.get(manifest_id)
|
||||
|
||||
new_entry = create_or_update_manifest_entry(existing_entry, entry_data)
|
||||
updated_manifest[manifest_id] = new_entry
|
||||
|
||||
# Convert to sorted list for deterministic output
|
||||
manifest_list = sorted(updated_manifest.values(), key=lambda x: x['id'])
|
||||
|
||||
# Write manifest
|
||||
print(f"Writing manifest to {manifest_path}...")
|
||||
with open(manifest_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(manifest_list, f, indent=2, sort_keys=False)
|
||||
|
||||
print(f"Generated {len(manifest_list)} manifest entries.")
|
||||
|
||||
# Show summary
|
||||
new_entries = len(manifest_list) - len(existing_manifest)
|
||||
if new_entries > 0:
|
||||
print(f"Added {new_entries} new entries.")
|
||||
if len(existing_manifest) > 0:
|
||||
print(f"Updated {len(existing_manifest)} existing entries.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
6
scripts/requirements.txt
Normal file
6
scripts/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
requests>=2.31.0
|
||||
PyGithub>=2.1.0
|
||||
pytest>=7.4.0
|
||||
pytest-mock>=3.11.1
|
||||
responses>=0.23.1
|
||||
flask>=3.0.0
|
||||
465
scripts/vendor_update.py
Executable file
465
scripts/vendor_update.py
Executable file
@@ -0,0 +1,465 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Download and pin external asset files from GitHub.
|
||||
|
||||
Downloads files specified in manifest, pins them to commit SHAs,
|
||||
computes checksums, and optionally syncs vendor metadata back to site JSON files.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class GitHubAPI:
|
||||
"""Simple GitHub API client with rate limit handling."""
|
||||
|
||||
def __init__(self, token: Optional[str] = None, delay: float = 0.5):
|
||||
self.token = token or os.getenv('GITHUB_API_TOKEN') or os.getenv('GITHUB_TOKEN')
|
||||
self.session = requests.Session()
|
||||
if self.token:
|
||||
self.session.headers.update({
|
||||
'Authorization': f'token {self.token}',
|
||||
'Accept': 'application/vnd.github.v3+json'
|
||||
})
|
||||
self.base_url = 'https://api.github.com'
|
||||
self.delay = delay # Delay between requests in seconds
|
||||
self.last_request_time = 0
|
||||
|
||||
def _wait_for_rate_limit(self, response: requests.Response) -> None:
|
||||
"""Wait if rate limited, using reset time from headers."""
|
||||
if response.status_code == 403:
|
||||
# Check if it's a rate limit error
|
||||
rate_limit_remaining = response.headers.get('X-RateLimit-Remaining', '1')
|
||||
if rate_limit_remaining == '0' or 'rate limit' in response.text.lower():
|
||||
reset_time = response.headers.get('X-RateLimit-Reset')
|
||||
if reset_time:
|
||||
reset_timestamp = int(reset_time)
|
||||
wait_seconds = max(0, reset_timestamp - int(time.time())) + 1
|
||||
print(f" Rate limit exceeded. Waiting {wait_seconds} seconds until reset...", file=sys.stderr)
|
||||
time.sleep(wait_seconds)
|
||||
else:
|
||||
# Fallback: wait 60 seconds
|
||||
print(" Rate limit exceeded. Waiting 60 seconds...", file=sys.stderr)
|
||||
time.sleep(60)
|
||||
|
||||
def _rate_limit_delay(self) -> None:
|
||||
"""Add delay between requests to avoid hitting rate limits."""
|
||||
current_time = time.time()
|
||||
time_since_last = current_time - self.last_request_time
|
||||
if time_since_last < self.delay:
|
||||
time.sleep(self.delay - time_since_last)
|
||||
self.last_request_time = time.time()
|
||||
|
||||
def _make_request(self, method: str, url: str, max_retries: int = 3, **kwargs) -> requests.Response:
|
||||
"""Make a request with rate limit handling and retries."""
|
||||
for attempt in range(max_retries):
|
||||
self._rate_limit_delay()
|
||||
|
||||
try:
|
||||
response = self.session.request(method, url, **kwargs)
|
||||
|
||||
# Check rate limit
|
||||
if response.status_code == 403:
|
||||
self._wait_for_rate_limit(response)
|
||||
# Retry the request after waiting
|
||||
if attempt < max_retries - 1:
|
||||
continue
|
||||
|
||||
# Check remaining rate limit
|
||||
remaining = response.headers.get('X-RateLimit-Remaining')
|
||||
if remaining:
|
||||
remaining_int = int(remaining)
|
||||
if remaining_int < 10:
|
||||
print(f" Warning: Only {remaining_int} API requests remaining. Adding delay...", file=sys.stderr)
|
||||
time.sleep(2)
|
||||
|
||||
return response
|
||||
|
||||
except requests.RequestException as e:
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 2 ** attempt # Exponential backoff
|
||||
print(f" Request failed, retrying in {wait_time}s... ({e})", file=sys.stderr)
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
raise
|
||||
|
||||
return response
|
||||
|
||||
def get_default_branch(self, owner: str, repo: str) -> str:
|
||||
"""Get default branch for a repository."""
|
||||
url = f"{self.base_url}/repos/{owner}/{repo}"
|
||||
try:
|
||||
response = self._make_request('GET', url)
|
||||
response.raise_for_status()
|
||||
return response.json().get('default_branch', 'main')
|
||||
except requests.RequestException as e:
|
||||
print(f"Warning: Could not get default branch for {owner}/{repo}: {e}", file=sys.stderr)
|
||||
return 'main'
|
||||
|
||||
def get_file_sha(self, owner: str, repo: str, path: str, ref: str) -> Optional[str]:
|
||||
"""
|
||||
Get the commit SHA that last modified a file at a given ref.
|
||||
Uses Contents API to get file info, then finds the commit.
|
||||
"""
|
||||
# First, try to get file contents to verify it exists
|
||||
url = f"{self.base_url}/repos/{owner}/{repo}/contents/{path}"
|
||||
params = {'ref': ref}
|
||||
|
||||
try:
|
||||
response = self._make_request('GET', url, params=params)
|
||||
if response.status_code == 404:
|
||||
# File doesn't exist at this ref, try default branch
|
||||
default_branch = self.get_default_branch(owner, repo)
|
||||
if default_branch != ref:
|
||||
params['ref'] = default_branch
|
||||
response = self._make_request('GET', url, params=params)
|
||||
|
||||
response.raise_for_status()
|
||||
file_info = response.json()
|
||||
|
||||
# Get the commit SHA from the file info
|
||||
# The Contents API returns 'sha' which is the blob SHA, not commit SHA
|
||||
# We need to find the commit that last modified this file
|
||||
commits_url = f"{self.base_url}/repos/{owner}/{repo}/commits"
|
||||
commits_params = {
|
||||
'path': path,
|
||||
'sha': ref,
|
||||
'per_page': 1
|
||||
}
|
||||
|
||||
commits_response = self._make_request('GET', commits_url, params=commits_params)
|
||||
commits_response.raise_for_status()
|
||||
commits = commits_response.json()
|
||||
|
||||
if commits:
|
||||
return commits[0]['sha']
|
||||
|
||||
# Fallback: use the ref as-is if it's already a SHA
|
||||
if len(ref) == 40 and all(c in '0123456789abcdef' for c in ref.lower()):
|
||||
return ref
|
||||
|
||||
# Last resort: resolve ref to SHA
|
||||
ref_url = f"{self.base_url}/repos/{owner}/{repo}/git/ref/heads/{ref}"
|
||||
ref_response = self._make_request('GET', ref_url)
|
||||
if ref_response.status_code == 200:
|
||||
return ref_response.json()['object']['sha']
|
||||
|
||||
# If ref is a tag
|
||||
ref_url = f"{self.base_url}/repos/{owner}/{repo}/git/ref/tags/{ref}"
|
||||
ref_response = self._make_request('GET', ref_url)
|
||||
if ref_response.status_code == 200:
|
||||
return ref_response.json()['object']['sha']
|
||||
|
||||
return None
|
||||
|
||||
except requests.RequestException as e:
|
||||
print(f"Error getting file SHA for {owner}/{repo}/{path}@{ref}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def get_license(self, owner: str, repo: str, sha: str) -> Optional[str]:
|
||||
"""Try to detect license from repository root at given SHA."""
|
||||
license_files = ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'LICENCE', 'LICENCE.txt']
|
||||
|
||||
for license_file in license_files:
|
||||
url = f"{self.base_url}/repos/{owner}/{repo}/contents/{license_file}"
|
||||
params = {'ref': sha}
|
||||
|
||||
try:
|
||||
response = self._make_request('GET', url, params=params)
|
||||
if response.status_code == 200:
|
||||
# Found a license file, return URL to it
|
||||
return f"https://raw.githubusercontent.com/{owner}/{repo}/{sha}/{license_file}"
|
||||
except requests.RequestException:
|
||||
continue
|
||||
|
||||
# Try to get license from repository info
|
||||
try:
|
||||
repo_url = f"{self.base_url}/repos/{owner}/{repo}"
|
||||
response = self._make_request('GET', repo_url)
|
||||
response.raise_for_status()
|
||||
repo_info = response.json()
|
||||
license_info = repo_info.get('license')
|
||||
if license_info:
|
||||
return license_info.get('spdx_id') or license_info.get('url')
|
||||
except requests.RequestException:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def compute_sha256(file_path: Path) -> str:
|
||||
"""Compute SHA256 checksum of a file."""
|
||||
sha256 = hashlib.sha256()
|
||||
with open(file_path, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(4096), b''):
|
||||
sha256.update(chunk)
|
||||
return sha256.hexdigest()
|
||||
|
||||
|
||||
def download_file(url: str, dest_path: Path) -> bool:
|
||||
"""Download a file from URL to destination path."""
|
||||
try:
|
||||
response = requests.get(url, stream=True, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
# Create parent directories
|
||||
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Download file
|
||||
with open(dest_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
return True
|
||||
except requests.RequestException as e:
|
||||
print(f"Error downloading {url}: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def update_manifest_entry(
|
||||
entry: Dict,
|
||||
api: GitHubAPI,
|
||||
repo_root: Path,
|
||||
dry_run: bool = False
|
||||
) -> Dict:
|
||||
"""Update a single manifest entry by downloading and pinning the file."""
|
||||
source_repo = entry['source_repo']
|
||||
owner, repo = source_repo.split('/', 1)
|
||||
source_path = entry['source_path']
|
||||
source_ref = entry.get('source_ref', 'main')
|
||||
|
||||
print(f"Processing {entry['id']} from {source_repo}/{source_path}@{source_ref}...")
|
||||
|
||||
# Get commit SHA for the file
|
||||
commit_sha = api.get_file_sha(owner, repo, source_path, source_ref)
|
||||
if not commit_sha:
|
||||
print(f" Warning: Could not resolve SHA for {source_ref}, skipping", file=sys.stderr)
|
||||
entry['status'] = 'error'
|
||||
return entry
|
||||
|
||||
# Build pinned raw URL
|
||||
pinned_raw_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{commit_sha}/{source_path}"
|
||||
|
||||
# Determine local path
|
||||
local_path = Path(entry['local_path'])
|
||||
if not local_path.is_absolute():
|
||||
local_path = repo_root / local_path
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY RUN] Would download to {local_path}")
|
||||
print(f" [DRY RUN] Pinned SHA: {commit_sha}")
|
||||
entry['pinned_sha'] = commit_sha
|
||||
entry['pinned_raw_url'] = pinned_raw_url
|
||||
entry['last_checked'] = datetime.now(timezone.utc).isoformat()
|
||||
entry['upstream_latest_sha'] = commit_sha
|
||||
entry['status'] = 'up-to-date'
|
||||
return entry
|
||||
|
||||
# Download file
|
||||
print(f" Downloading from {pinned_raw_url}...")
|
||||
if not download_file(pinned_raw_url, local_path):
|
||||
entry['status'] = 'error'
|
||||
return entry
|
||||
|
||||
# Compute checksum
|
||||
checksum = compute_sha256(local_path)
|
||||
print(f" Checksum: {checksum[:16]}...")
|
||||
|
||||
# Get license info
|
||||
license_info = api.get_license(owner, repo, commit_sha)
|
||||
|
||||
# Update entry
|
||||
entry['pinned_sha'] = commit_sha
|
||||
entry['pinned_raw_url'] = pinned_raw_url
|
||||
entry['checksum_sha256'] = checksum
|
||||
entry['last_checked'] = datetime.now(timezone.utc).isoformat()
|
||||
entry['upstream_latest_sha'] = commit_sha
|
||||
entry['status'] = 'up-to-date'
|
||||
if license_info:
|
||||
entry['license'] = license_info
|
||||
|
||||
return entry
|
||||
|
||||
|
||||
def sync_to_site_json(entry: Dict, repo_root: Path) -> bool:
|
||||
"""Sync vendor metadata back to the original site JSON file."""
|
||||
orig_json_path = entry.get('orig_site_json')
|
||||
orig_item_id = entry.get('orig_item_id')
|
||||
|
||||
if not orig_json_path or not orig_item_id:
|
||||
return False
|
||||
|
||||
json_path = repo_root / orig_json_path
|
||||
if not json_path.exists():
|
||||
print(f" Warning: Site JSON file not found: {json_path}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Find the printed part in the nested structure
|
||||
def find_and_update_part(obj, target_id, path=''):
|
||||
if isinstance(obj, dict):
|
||||
# Check if this is a printedParts array
|
||||
if 'printedParts' in obj and isinstance(obj['printedParts'], list):
|
||||
for part in obj['printedParts']:
|
||||
if isinstance(part, dict) and part.get('id') == target_id:
|
||||
# Update this part
|
||||
if 'vendor' not in part:
|
||||
part['vendor'] = {}
|
||||
part['vendor'].update({
|
||||
'manifest_id': entry['id'],
|
||||
'local_path': entry['local_path'],
|
||||
'pinned_sha': entry['pinned_sha'],
|
||||
'pinned_raw_url': entry['pinned_raw_url'],
|
||||
'checksum_sha256': entry['checksum_sha256'],
|
||||
'last_checked': entry['last_checked'],
|
||||
'status': entry['status']
|
||||
})
|
||||
return True
|
||||
|
||||
# Check bodyParts, knobs, etc.
|
||||
for key in ['bodyParts', 'knobs']:
|
||||
if key in obj and isinstance(obj[key], list):
|
||||
for part in obj[key]:
|
||||
if isinstance(part, dict) and part.get('id') == target_id:
|
||||
if 'vendor' not in part:
|
||||
part['vendor'] = {}
|
||||
part['vendor'].update({
|
||||
'manifest_id': entry['id'],
|
||||
'local_path': entry['local_path'],
|
||||
'pinned_sha': entry['pinned_sha'],
|
||||
'pinned_raw_url': entry['pinned_raw_url'],
|
||||
'checksum_sha256': entry['checksum_sha256'],
|
||||
'last_checked': entry['last_checked'],
|
||||
'status': entry['status']
|
||||
})
|
||||
return True
|
||||
|
||||
# Recursively search
|
||||
for value in obj.values():
|
||||
if find_and_update_part(value, target_id):
|
||||
return True
|
||||
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
if find_and_update_part(item, target_id):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
if not find_and_update_part(data, orig_item_id):
|
||||
print(f" Warning: Could not find part with id '{orig_item_id}' in {json_path}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
# Write back to file (preserve formatting)
|
||||
with open(json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f" Updated {json_path}")
|
||||
return True
|
||||
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
print(f" Error updating {json_path}: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Download and pin external asset files from GitHub'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--manifest',
|
||||
type=Path,
|
||||
default=Path('manifest/vendor_manifest.json'),
|
||||
help='Path to manifest file (default: manifest/vendor_manifest.json)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--entry',
|
||||
type=str,
|
||||
help='Process only a specific manifest entry by ID'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be done without downloading files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--sync-site',
|
||||
action='store_true',
|
||||
help='Sync vendor metadata back to site JSON files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--delay',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='Delay between API requests in seconds (default: 0.5)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve paths
|
||||
script_dir = Path(__file__).parent.parent
|
||||
manifest_path = (script_dir / args.manifest).resolve()
|
||||
repo_root = script_dir
|
||||
|
||||
if not manifest_path.exists():
|
||||
print(f"Error: Manifest file not found: {manifest_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load manifest
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
manifest_data = json.load(f)
|
||||
|
||||
# Convert to dict if it's a list
|
||||
if isinstance(manifest_data, list):
|
||||
manifest = {entry['id']: entry for entry in manifest_data}
|
||||
else:
|
||||
manifest = manifest_data
|
||||
|
||||
# Filter entries if --entry specified
|
||||
if args.entry:
|
||||
if args.entry not in manifest:
|
||||
print(f"Error: Entry '{args.entry}' not found in manifest", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
entries_to_process = {args.entry: manifest[args.entry]}
|
||||
else:
|
||||
entries_to_process = manifest
|
||||
|
||||
# Initialize GitHub API with delay
|
||||
api = GitHubAPI(delay=args.delay)
|
||||
|
||||
# Process entries
|
||||
updated_count = 0
|
||||
for entry_id, entry in entries_to_process.items():
|
||||
updated_entry = update_manifest_entry(entry, api, repo_root, dry_run=args.dry_run)
|
||||
manifest[entry_id] = updated_entry
|
||||
|
||||
if args.sync_site and not args.dry_run:
|
||||
sync_to_site_json(updated_entry, repo_root)
|
||||
|
||||
updated_count += 1
|
||||
|
||||
# Write updated manifest
|
||||
if not args.dry_run:
|
||||
manifest_list = sorted(manifest.values(), key=lambda x: x['id'])
|
||||
with open(manifest_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(manifest_list, f, indent=2, sort_keys=False)
|
||||
print(f"\nUpdated manifest with {updated_count} entries.")
|
||||
else:
|
||||
print(f"\n[DRY RUN] Would update {updated_count} entries.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user