Models
Linear models can be built from a variety of types by using the ModelMatrix
intermediate type. This allows us to get rid of some of the bloat from the previous example and create a fit on the iris data.
We can access the data from a fit via methods on the fit. For example below we use the parameters().estimate()
methods to get the estimated parameters of the fit and parameters().confidence_interval()
to get the confidence interval of each parameter.
use std::error::Error; use strafe::datasets::iris; use strafe::datasets::polars::prelude::*; use strafe::tests::two_way::LeastSquaresRegressionBuilder; use strafe::traits::{Model, ModelBuilder}; use strafe::types::ModelMatrix; fn main() -> Result<(), Box<dyn Error>> { let iris = iris()?; let x = iris .clone() .lazy() .filter(col("Species").eq(lit("setosa"))) .select(&[col("Sepal.Length")]) .collect()?; let y = iris .clone() .lazy() .filter(col("Species").eq(lit("setosa"))) .select(&[col("Sepal.Width")]) .collect()?; let mut fit = LeastSquaresRegressionBuilder::new() .with_x(&ModelMatrix::from(x)) .with_y(&ModelMatrix::from(y)) .build(); for p in fit.parameters()? { println!("{} {:?}", p.estimate(), p.confidence_interval()); } Ok(()) }
-0.569432673039648 (-2.390925862499916, 1.2520605164206204)
0.7985283006471533 (0.43554707179202234, 1.1615095295022844)
Below is an example of fitting the model, printing the fit, and plotting some of the fit plots.
use std::error::Error; use strafe::datasets::iris; use strafe::datasets::polars::prelude::*; use strafe::plots::prelude::*; use strafe::tests::two_way::LeastSquaresRegressionBuilder; use strafe::traits::ModelBuilder; use strafe::types::ModelMatrix; fn main() -> Result<(), Box<dyn Error>> { let iris = iris()?; let x = iris .clone() .lazy() .filter(col("Species").eq(lit("setosa"))) .select(&[col("Sepal.Length")]) .collect()?; let y = iris .clone() .lazy() .filter(col("Species").eq(lit("setosa"))) .select(&[col("Sepal.Width")]) .collect()?; let mut fit = LeastSquaresRegressionBuilder::new() .with_x(&ModelMatrix::from(x)) .with_y(&ModelMatrix::from(y)) .build(); println!("{fit}"); let root = SVGBackend::new("fit.svg", (1024, 768)).into_drawing_area(); fit.plot_fit(&root, &Default::default()); let root = SVGBackend::new("resid_lev.svg", (1024, 768)).into_drawing_area(); fit.plot_residual_leverage(&root, &Default::default()); let root = SVGBackend::new("qq.svg", (1024, 768)).into_drawing_area(); fit.plot_quantile_quantile(&root, &Default::default()); Ok(()) }
Residuals:
┌─────────┬──────────────┬─────────┬──────────────┬─────────┐
│ Minimum │ 1st Quantile │ Median │ 3rd Quantile │ Maximum │
╞═════════╪══════════════╪═════════╪══════════════╪═════════╡
│ -0.7239 │ -0.1827 │ -0.0030 │ 0.1573 │ 0.5170 │
└─────────┴──────────────┴─────────┴──────────────┴─────────┘
Coefficients:
┌────┬──────────┬─────────────────────────┬─────────────────────────┬─────────┬────────────┐
│ │ Estimate │ Confidence Interval (L) │ Confidence Interval (U) │ T-Value │ P-Value │
╞════╪══════════╪═════════════════════════╪═════════════════════════╪═════════╪════════════╡
│ x0 │ -0.5694 │ -2.3909 │ 1.2520 │ -1.0914 │ 0.2805 │
│ x1 │ 0.7985 │ 0.4355 │ 1.1615 │ 7.6807 │ 6.7098e-10 │
└────┴──────────┴─────────────────────────┴─────────────────────────┴─────────┴────────────┘
Tests:
┌──────────────────────────────┬───────────┬────────────┬────────┐
│ │ Statistic │ P-Value │ Alpha │
╞══════════════════════════════╪═══════════╪════════════╪════════╡
│ Multiple R-squared (Robust) │ 0.9945 │ 0.0054 │ 0.1500 │
│ Significance of Regression │ 58.993 │ 6.7098e-10 │ 0.05 │
│ Shapiro-Wilk Normal Residual │ 0.9868 │ 0.8459 │ 0.05 │
└──────────────────────────────┴───────────┴────────────┴────────┘