FluxML · ablaom · Dec 8, 2025
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -21,8 +21,9 @@ ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 WordTokenizers = "796a5d58-b03d-544a-977e-18100b691f6e"
 
 [compat]
-Documenter = "1"
+Documenter = "1"
diff --git a/docs/src/common_workflows/composition/notebook.md b/docs/src/common_workflows/composition/notebook.md
@@ -18,27 +18,33 @@ learning model from MLJFlux.
 ````@example composition
 using MLJ               # Has MLJFlux models
 using Flux              # For more flexibility
-import RDatasets        # Dataset source
 import Random           # To create imbalance
 import Imbalance        # To solve the imbalance
 import Optimisers       # native Flux.jl optimisers no longer supported
+using StableRNGs        # for reproducability
+using CategoricalArrays
 ````
 
 ### Loading and Splitting the Data
 
 ````@example composition
-iris = RDatasets.dataset("datasets", "iris");
-y, X = unpack(iris, ==(:Species), rng=123);
-X = Float32.(X);      # To be compatible with type of network network parameters
+iris = load_iris() # a named-tuple of vectors
+species, X = unpack(iris, ==(:target), rng=StableRNG(123))
+X = Flux.fmap(column-> Float32.(column), X) # Flux prefers Float32 data
 nothing #hide
 ````
 
-To simulate an imbalanced dataset, we will take a random sample:
+The iris dataset has a target with uniformly distributed values, `"versicolor"`,
+`"setosa"`, and `"virginica"`. To manufacture an unbalanced dataset, we'll combine the
+first two into a single classs, `"colosa"`:
 
 ````@example composition
-Random.seed!(803429)
-subset_indices = rand(1:size(X, 1), 100)
-X, y = X[subset_indices, :], y[subset_indices]
+y = coerce(
+        map(y) do species
+            species == "virginica" ? unwrap(species) : "colosa"
+        end,
+        Multiclass,
+)
 Imbalance.checkbalance(y)
 ````
 
@@ -61,7 +67,7 @@ clf = NeuralNetworkClassifier(
     optimiser=Optimisers.Adam(0.01),
     batch_size=8,
     epochs=50,
-    rng=42,
+    rng=StableRNG(123),
 )
 ````
 
@@ -83,20 +89,18 @@ pipeline = standarizer |> balanced_model
 
 By this, any training data will be standardized then oversampled then passed to the
 model. Meanwhile, for inference, the standardizer will automatically use the training
-set's mean and std and the oversampler will be transparent.
+set's mean and std and the oversampler will play no role.
 
 ### Training the Composed Model
 
-It's indistinguishable from training a single model.
+The pipeline model can be evaluated like any other model:
 
 ````@example composition
 mach = machine(pipeline, X, y)
-fit!(mach)
-cv=CV(nfolds=5)
-evaluate!(mach, resampling=cv, measure=accuracy)
+cv=CV(nfolds=6)
+evaluate!(mach, resampling=cv, measure=accuracy, verbosity=0)
 ````
 
 ---
 
 *This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-