|
| 1 | +# Metee |
| 2 | + |
| 3 | +Tree-based machine learning library for Go. "Metee" means "trees" in Swahili/Kikuyu. |
| 4 | + |
| 5 | +Metee provides LightGBM and XGBoost bindings, ensemble methods, cross-validation, hyperparameter tuning, and a full feature-engineering pipeline — all behind clean Go interfaces. It is a standalone complement to the [Zerfoo](https://github.com/zerfoo/zerfoo) neural network framework with no dependency on it. |
| 6 | + |
| 7 | +## Features |
| 8 | + |
| 9 | +- **Model backends** — LightGBM and XGBoost via CGO bindings (optional build tags), with stubs for CGO-free builds |
| 10 | +- **Ensemble methods** — Rank-normalized blending and out-of-fold stacking |
| 11 | +- **Cross-validation** — Era-aware KFold and WalkForward splits |
| 12 | +- **Hyperparameter tuning** — Grid search and random search over typed parameter spaces |
| 13 | +- **Feature transforms** — Rank normalization, Gaussianization, SVD-based neutralization, exposure computation |
| 14 | +- **Metrics** — Pearson, Spearman, Sharpe ratio, max drawdown, feature-neutral correlation (FNC), per-era reports |
| 15 | +- **Data loading** — CSV and Parquet with streaming support |
| 16 | +- **Config** — Generic YAML loader with environment variable overrides and struct validation |
| 17 | +- **Training orchestrator** — Checkpointing, early stopping, and callback hooks |
| 18 | + |
| 19 | +## Install |
| 20 | + |
| 21 | +```bash |
| 22 | +go get github.com/zerfoo/metee |
| 23 | +``` |
| 24 | + |
| 25 | +Requires Go 1.25+. |
| 26 | + |
| 27 | +## Quick Start |
| 28 | + |
| 29 | +```go |
| 30 | +package main |
| 31 | + |
| 32 | +import ( |
| 33 | + "context" |
| 34 | + "fmt" |
| 35 | + "log" |
| 36 | + |
| 37 | + "github.com/zerfoo/metee/cv" |
| 38 | + "github.com/zerfoo/metee/data" |
| 39 | + "github.com/zerfoo/metee/metrics" |
| 40 | + "github.com/zerfoo/metee/registry" |
| 41 | + _ "github.com/zerfoo/metee/lightgbm" // register backend (requires -tags lightgbm) |
| 42 | +) |
| 43 | + |
| 44 | +func main() { |
| 45 | + ctx := context.Background() |
| 46 | + |
| 47 | + // Load data |
| 48 | + ds, err := data.LoadCSV("train.csv", data.CSVOptions{ |
| 49 | + TargetColumn: "target", |
| 50 | + IDColumn: "id", |
| 51 | + EraColumn: "era", |
| 52 | + }) |
| 53 | + if err != nil { |
| 54 | + log.Fatal(err) |
| 55 | + } |
| 56 | + |
| 57 | + // Get a model from the registry |
| 58 | + m, err := registry.GetBackend("lightgbm") |
| 59 | + if err != nil { |
| 60 | + log.Fatal(err) |
| 61 | + } |
| 62 | + |
| 63 | + // Cross-validate with walk-forward splits |
| 64 | + folds := cv.WalkForward(ds, 4, 2) |
| 65 | + results, err := cv.CrossValidate(ctx, m, ds, folds, metrics.Spearman) |
| 66 | + if err != nil { |
| 67 | + log.Fatal(err) |
| 68 | + } |
| 69 | + |
| 70 | + for i, r := range results { |
| 71 | + fmt.Printf("Fold %d: %.4f\n", i, r.Score) |
| 72 | + } |
| 73 | +} |
| 74 | +``` |
| 75 | + |
| 76 | +## Build Tags |
| 77 | + |
| 78 | +CGO backends are optional. Without build tags, stub implementations return descriptive errors and `go build ./...` always succeeds. |
| 79 | + |
| 80 | +| Tag | Backend | Requirement | |
| 81 | +|-----|---------|-------------| |
| 82 | +| `lightgbm` | LightGBM | `libLightGBM` headers and shared library | |
| 83 | +| `xgboost` | XGBoost | `libxgboost` headers and shared library | |
| 84 | + |
| 85 | +```bash |
| 86 | +# CPU-only, no CGO: |
| 87 | +go test ./... |
| 88 | + |
| 89 | +# With LightGBM: |
| 90 | +CGO_CFLAGS="-I/usr/local/include" CGO_LDFLAGS="-L/usr/local/lib -lLightGBM" \ |
| 91 | + go test -tags lightgbm ./... |
| 92 | + |
| 93 | +# With both backends: |
| 94 | +go test -tags "lightgbm,xgboost" ./... |
| 95 | +``` |
| 96 | + |
| 97 | +## Package Overview |
| 98 | + |
| 99 | +| Package | Purpose | |
| 100 | +|---------|---------| |
| 101 | +| `model/` | `Model`, `Validator`, and `Configurable` interfaces | |
| 102 | +| `registry/` | Thread-safe backend registry (`RegisterBackend` / `GetBackend`) | |
| 103 | +| `lightgbm/` | LightGBM CGO bindings + stub | |
| 104 | +| `xgboost/` | XGBoost CGO bindings + stub | |
| 105 | +| `data/` | `Dataset` type, CSV and Parquet loaders | |
| 106 | +| `transform/` | Rank normalization, Gaussianization, neutralization, exposure, pipeline | |
| 107 | +| `metrics/` | Pearson, Spearman, Sharpe, max drawdown, FNC, per-era reports | |
| 108 | +| `cv/` | KFold, WalkForward splits, `CrossValidate` | |
| 109 | +| `tuning/` | Parameter spaces (`Discrete`, `Uniform`, `LogUniform`, `IntRange`), grid/random search | |
| 110 | +| `trainer/` | Training orchestrator with checkpointing and callbacks | |
| 111 | +| `ensemble/` | Rank blending and out-of-fold stacking | |
| 112 | +| `config/` | Generic YAML loader with env overrides and validation | |
| 113 | + |
| 114 | +## Interfaces |
| 115 | + |
| 116 | +### model.Model |
| 117 | + |
| 118 | +All backends implement the core model interface: |
| 119 | + |
| 120 | +```go |
| 121 | +type Model interface { |
| 122 | + Train(ctx context.Context, features [][]float64, targets []float64) error |
| 123 | + Predict(ctx context.Context, features [][]float64) ([]float64, error) |
| 124 | + Save(ctx context.Context, path string) error |
| 125 | + Load(ctx context.Context, path string) error |
| 126 | + Importance() (map[string]float64, error) |
| 127 | + Name() string |
| 128 | +} |
| 129 | +``` |
| 130 | + |
| 131 | +### model.Validator (optional) |
| 132 | + |
| 133 | +```go |
| 134 | +type Validator interface { |
| 135 | + Validate(ctx context.Context, features [][]float64, targets []float64) (map[string]float64, error) |
| 136 | +} |
| 137 | +``` |
| 138 | + |
| 139 | +### model.Configurable (optional) |
| 140 | + |
| 141 | +For runtime parameter updates during hyperparameter tuning: |
| 142 | + |
| 143 | +```go |
| 144 | +type Configurable interface { |
| 145 | + SetParams(params map[string]any) error |
| 146 | +} |
| 147 | +``` |
| 148 | + |
| 149 | +## Dependencies |
| 150 | + |
| 151 | +| Dependency | Purpose | |
| 152 | +|-----------|---------| |
| 153 | +| `gonum.org/v1/gonum` | Matrix operations (neutralization, FNC) | |
| 154 | +| `gopkg.in/yaml.v3` | YAML config parsing | |
| 155 | +| `github.com/parquet-go/parquet-go` | Parquet data loading | |
| 156 | + |
| 157 | +## A Product of [Feza, Inc](https://feza.ai) |
0 commit comments