Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: update download script
  • Loading branch information
hhzhang16 committed Aug 11, 2025
commit b629059f31d9d35377657ed02ed29f1fe237f032
107 changes: 72 additions & 35 deletions benchmarks/profiler/download_pvc_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
It creates the necessary access pod, downloads the files, and cleans up automatically.

Usage:
python3 download_pvc_results.py --namespace <namespace> --output-dir <local_directory>
python3 download_pvc_results.py --namespace <namespace> --output-dir <local_directory> [--no-config]

Examples:
# Download to ./results directory
python3 download_pvc_results.py --namespace <namespace> --output-dir ./results

# Download to specific directory
python3 download_pvc_results.py --namespace <namespace> --output-dir /home/user/profiling_data

# Download without configuration files
python3 download_pvc_results.py --namespace <namespace> --output-dir ./results --no-config
"""

import argparse
Expand Down Expand Up @@ -57,7 +60,7 @@ def deploy_access_pod(namespace: str) -> str:

# Check if pod already exists and is running
try:
result = run_command(
result = subprocess.run(
[
"kubectl",
"get",
Expand All @@ -69,12 +72,14 @@ def deploy_access_pod(namespace: str) -> str:
"jsonpath={.status.phase}",
],
capture_output=True,
text=True,
check=False,
)

if result.stdout.strip() == "Running":
if result.returncode == 0 and result.stdout.strip() == "Running":
print(f"✓ Access pod '{pod_name}' already running")
return pod_name
except subprocess.CalledProcessError:
except Exception:
# Pod doesn't exist or isn't running
pass

Expand All @@ -99,7 +104,7 @@ def deploy_access_pod(namespace: str) -> str:
# Wait for pod to be ready (up to 60 seconds)
for i in range(60):
try:
result = run_command(
result = subprocess.run(
[
"kubectl",
"get",
Expand All @@ -111,13 +116,15 @@ def deploy_access_pod(namespace: str) -> str:
"jsonpath={.status.phase}",
],
capture_output=True,
text=True,
check=False,
)

if result.stdout.strip() == "Running":
if result.returncode == 0 and result.stdout.strip() == "Running":
print("✓ Access pod is ready")
return pod_name

except subprocess.CalledProcessError:
except Exception:
pass

time.sleep(1)
Expand All @@ -128,40 +135,50 @@ def deploy_access_pod(namespace: str) -> str:
sys.exit(1)


def list_pvc_contents(namespace: str, pod_name: str) -> List[str]:
def list_pvc_contents(
namespace: str, pod_name: str, skip_config: bool = False
) -> List[str]:
"""List contents of the PVC to identify relevant files."""
print("Scanning PVC contents...")

try:
result = run_command(
# Build find command with optional config file exclusion
find_cmd = [
"kubectl",
"exec",
pod_name,
"-n",
namespace,
"--",
"find",
"/profiling_results",
"-type",
"f",
"-name",
"*.png",
"-o",
"-name",
"*.npz",
]

# Add config file patterns if not skipping them
if not skip_config:
find_cmd.extend(
[
"kubectl",
"exec",
pod_name,
"-n",
namespace,
"--",
"find",
"/profiling_results",
"-type",
"f",
"-name",
"*.png",
"-o",
"-name",
"*.npz",
"-o",
"-name",
"*.yaml",
"-o",
"-name",
"*.yml",
],
capture_output=True,
]
)

try:
result = run_command(find_cmd, capture_output=True)

files = [f.strip() for f in result.stdout.split("\n") if f.strip()]
print(f"Found {len(files)} relevant files to download")
config_note = " (excluding config files)" if skip_config else ""
print(f"Found {len(files)} relevant files to download{config_note}")
return files

except subprocess.CalledProcessError:
Expand Down Expand Up @@ -213,20 +230,27 @@ def download_files(
print(f"✓ Download completed: {downloaded} successful, {failed} failed")


def download_summary_files(namespace: str, pod_name: str, output_dir: Path) -> None:
def download_summary_files(
namespace: str, pod_name: str, output_dir: Path, skip_config: bool = False
) -> None:
"""Download key summary files that might not match the pattern."""
summary_files = [
"/profiling_results/prefill_performance.png",
"/profiling_results/decode_performance.png",
"/profiling_results/disagg.yaml", # In case it was injected
]

# Add config files if not skipping them
if not skip_config:
summary_files.append(
"/profiling_results/disagg.yaml"
) # In case it was injected

print("Downloading summary files...")

for file_path in summary_files:
try:
# Check if file exists first
run_command(
# Check if file exists first using subprocess.run directly
result = subprocess.run(
[
"kubectl",
"exec",
Expand All @@ -239,8 +263,14 @@ def download_summary_files(namespace: str, pod_name: str, output_dir: Path) -> N
file_path,
],
capture_output=True,
text=True,
check=False,
)

if result.returncode != 0:
# File doesn't exist, skip silently
continue

# File exists, download it
rel_path = file_path.replace("/profiling_results/", "")
local_file = output_dir / rel_path
Expand All @@ -258,8 +288,9 @@ def download_summary_files(namespace: str, pod_name: str, output_dir: Path) -> N

print(f" ✓ {rel_path}")

except subprocess.CalledProcessError:
except Exception as e:
# File doesn't exist or failed to download, skip silently
print(f" ⚠️ Skipped {file_path.split('/')[-1]}: {e}")
pass


Expand Down Expand Up @@ -342,6 +373,12 @@ def main():
help="Local directory to download results to",
)

parser.add_argument(
"--no-config",
action="store_true",
help="Skip downloading configuration files (*.yaml, *.yml)",
)

args = parser.parse_args()

print("📥 PVC Results Download")
Expand All @@ -354,11 +391,11 @@ def main():
pod_name = deploy_access_pod(args.namespace)

# List and download files
files = list_pvc_contents(args.namespace, pod_name)
files = list_pvc_contents(args.namespace, pod_name, args.no_config)
download_files(args.namespace, pod_name, files, args.output_dir)

# Download additional summary files
download_summary_files(args.namespace, pod_name, args.output_dir)
download_summary_files(args.namespace, pod_name, args.output_dir, args.no_config)

# Generate README
generate_readme(args.output_dir, len(files))
Expand Down
3 changes: 3 additions & 0 deletions docs/architecture/pre_deployment_profiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,9 @@ python3 benchmarks/profiler/download_pvc_results.py --namespace $NAMESPACE --out

# Download to specific directory
python3 benchmarks/profiler/download_pvc_results.py --namespace $NAMESPACE --output-dir /path/to/my/results

# Download without any of the auto-created config.yaml files used in profiling
python3 benchmarks/profiler/download_pvc_results.py --namespace $NAMESPACE --output-dir ./results --no-config
```

The script will:
Expand Down
Loading