About
Statement of Purpose
0G Storage · 0xe4c1bb3a…99b6b3 ↗
Rendered from protocol.json
Experiment protocol
AgentRules
Autonomy
- NoAskHumanToContinue: Yes
- ExperimentTimeoutNotes: Treat runs exceeding 15 minutes as failed or hung.
- LogRedirectExample: uv run train.py > run.log 2>&1
- SimplicityCriterion: Yes
ArchetypeExtensions
Ml Train
- DataSnapshot: Local ~/.cache/autoresearch/ data and tokenizer prepared by uv run prepare.py.
- HardwareNotes: Apple Silicon MLX results are hardware-sensitive; compare against a fresh baseline on the same machine or clearly compatible hardware class.
Environment
AssetPrep
- Prepare local data and tokenizer cache at ~/.cache/autoresearch/ via uv run prepare.py.
Constraints
- NetworkPolicy: full
- NoNewDependencies: Yes
OsHints
- darwin-arm64
- Apple Silicon Mac
PackageManagers
- uv
SetupCommands
- uv sync
- uv run prepare.py
Execution
- Command: uv run train.py
- Cwd: .
Determinism
- Notes: Compare fresh baseline and trials on the same Apple Silicon hardware and prepared data snapshot.
- SeedPolicy: optional
- HardTimeoutSeconds: 900
StopCondition
- ExcludeCompilationFromBudget: Yes
- Notes: The script runs a fixed 5-minute training budget, with startup/compilation and final eval overhead outside the training timer.
- TrainingSecondsBudget: 300
- Type: wall_clock
ImmutableHarness
Paths
- prepare.py
- Rationale: prepare.py defines the data preparation, tokenizer, dataloader, fixed constants, and evaluate_bpb metric; modifying it would invalidate comparisons.
Measurement
BaselinePolicy
- BaselineNotes: Baseline was established on this hardware after uv run prepare.py using local ~/.cache/autoresearch/ data/tokenizer snapshot. FINALEVALBATCH_SIZE was reduced from 256 to 64 in train.py to fit this machine's Metal buffer limit while preserving the same total eval token budget.
- EstablishOnHardware: Yes
- SameDataSnapshot: Yes
PrimaryMetric
- Direction: minimize
Extract
- ExampleStdout:
---
val_bpb: 2.534000
training_seconds: 312.4
total_seconds: 405.7
peak_vram_mb: 27528.9
mfu_percent: 0.00
total_tokens_M: 39.8
num_steps: 46
num_params_M: 50.3
depth: 8- Kind: regex
- Pattern: ^val_bpb:\s+([0-9]+(?:\.[0-9]+)?)
- Name: val_bpb
SecondaryMetrics
- - Direction: minimize
Extract
- ExampleStdout: peakvrammb: 27528.9
- Kind: regex
- Pattern: ^peakvrammb:\s+([0-9]+(?:\.[0-9]+)?)
- Name: peakvrammb
- - Direction: maximize
Extract
- ExampleStdout: num_steps: 46
- Kind: regex
- Pattern: ^num_steps:\s+([0-9]+)
- Name: num_steps
Meta
- Archetype: ml_train
- CreatedAt: 2026-05-03T15:04:16Z
- Eligibility: eligible
- ProtocolBundleId: trevin-creator-autoresearch-mlx-ba6ebf6-20260503
- PurposeStatement: Improve the Apple Silicon MLX training recipe to minimize validation bits-per-byte under a fixed local training budget.
Repo
- DefaultBranch: main
- Name: autoresearch-mlx
- Owner: trevin-creator
- UpdatedAt: 2026-05-03T15:04:16Z
MutableSurface
AllowedGlobs
- train.py
AllowedKinds
- code_edit
ForbiddenGlobs
- prepare.py
- pyproject.toml
- uv.lock
ProtocolVersion: 1.0
Provenance
- BaselineArtifactPath: .autoresearch/create/baseline_run.log
GitWorkflow
- BranchPattern: autoresearch/<tag>
- CommitScope: Only stage experiment changes and result log updates for this repo.
- StagingExample: git add train.py results.tsv
ResultsLog
Columns
- commit
- val_bpb
- memory_gb
- status
- description
- Format: tsv
- Path: results.tsv
Safety
- CrashStatus: discard
- OomPolicy: reduce_batch
SchemaKind: protocol
protocol.json (raw)
{
"schemaKind": "protocol",
"protocolVersion": "1.0",
"meta": {
"archetype": "ml_train",
"eligibility": "eligible",
"repo": {
"owner": "trevin-creator",
"name": "autoresearch-mlx",
"defaultBranch": "main",
"cloneUrl": "https://github.com/trevin-creator/autoresearch-mlx"
},
"purposeStatement": "Improve the Apple Silicon MLX training recipe to minimize validation bits-per-byte under a fixed local training budget.",
"createdAt": "2026-05-03T15:04:16Z",
"updatedAt": "2026-05-03T15:04:16Z",
"protocolBundleId": "trevin-creator-autoresearch-mlx-ba6ebf6-20260503"
},
"environment": {
"osHints": [
"darwin-arm64",
"Apple Silicon Mac"
],
"packageManagers": [
"uv"
],
"setupCommands": [
"uv sync",
"uv run prepare.py"
],
"assetPrep": [
"Prepare local data and tokenizer cache at ~/.cache/autoresearch/ via uv run prepare.py."
],
"constraints": {
"noNewDependencies": true,
"networkPolicy": "full"
}
},
"mutableSurface": {
"allowedGlobs": [
"train.py"
],
"forbiddenGlobs": [
"prepare.py",
"pyproject.toml",
"uv.lock"
],
"allowedKinds": [
"code_edit"
]
},
"immutableHarness": {
"paths": [
"prepare.py"
],
"rationale": "prepare.py defines the data preparation, tokenizer, dataloader, fixed constants, and evaluate_bpb metric; modifying it would invalidate comparisons."
},
"execution": {
"command": "uv run train.py",
"cwd": ".",
"stopCondition": {
"type": "wall_clock",
"trainingSecondsBudget": 300,
"excludeCompilationFromBudget": true,
"notes": "The script runs a fixed 5-minute training budget, with startup/compilation and final eval overhead outside the training timer."
},
"hardTimeoutSeconds": 900,
"determinism": {
"seedPolicy": "optional",
"notes": "Compare fresh baseline and trials on the same Apple Silicon hardware and prepared data snapshot."
}
},
"measurement": {
"primaryMetric": {
"name": "val_bpb",
"direction": "minimize",
"extract": {
"kind": "regex",
"pattern": "^val_bpb:\\s+([0-9]+(?:\\.[0-9]+)?)",
"exampleStdout": "---\nval_bpb: 2.534000\ntraining_seconds: 312.4\ntotal_seconds: 405.7\npeak_vram_mb: 27528.9\nmfu_percent: 0.00\ntotal_tokens_M: 39.8\nnum_steps: 46\nnum_params_M: 50.3\ndepth: 8"
}
},
"secondaryMetrics": [
{
"name": "peak_vram_mb",
"direction": "minimize",
"extract": {
"kind": "regex",
"pattern": "^peak_vram_mb:\\s+([0-9]+(?:\\.[0-9]+)?)",
"exampleStdout": "peak_vram_mb: 27528.9"
}
},
{
"name": "num_steps",
"direction": "maximize",
"extract": {
"kind": "regex",
"pattern": "^num_steps:\\s+([0-9]+)",
"exampleStdout": "num_steps: 46"
}
}
],
"baselinePolicy": {
"establishOnHardware": true,
"sameDataSnapshot": true,
"baselineNotes": "Baseline was established on this hardware after uv run prepare.py using local ~/.cache/autoresearch/ data/tokenizer snapshot. FINAL_EVAL_BATCH_SIZE was reduced from 256 to 64 in train.py to fit this machine's Metal buffer limit while preserving the same total eval token budget."
}
},
"provenance": {
"resultsLog": {
"format": "tsv",
"path": "results.tsv",
"columns": [
"commit",
"val_bpb",
"memory_gb",
"status",
"description"
]
},
"gitWorkflow": {
"branchPattern": "autoresearch/<tag>",
"commitScope": "Only stage experiment changes and result log updates for this repo.",
"stagingExample": "git add train.py results.tsv"
},
"baselineArtifactPath": ".autoresearch/create/baseline_run.log"
},
"safety": {
"oomPolicy": "reduce_batch",
"crashStatus": "discard"
},
"agentRules": {
"simplicityCriterion": true,
"autonomy": {
"noAskHumanToContinue": true
},
"experimentTimeoutNotes": "Treat runs exceeding 15 minutes as failed or hung.",
"logRedirectExample": "uv run train.py > run.log 2>&1"
},
"archetypeExtensions": {
"ml_train": {
"dataSnapshot": "Local ~/.cache/autoresearch/ data and tokenizer prepared by uv run prepare.py.",
"hardwareNotes": "Apple Silicon MLX results are hardware-sensitive; compare against a fresh baseline on the same machine or clearly compatible hardware class."
}
}
}