Open-EO · PondiB · Nov 10, 2022 · May 25, 2023 · Jul 26, 2024 · Jul 29, 2024
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -14,7 +14,7 @@ jobs:
         uses: rlespinasse/github-slug-action@v3.x
       - uses: actions/setup-node@v3
         with:
-          node-version: 'lts/*'
+          node-version: "lts/*"
       - uses: actions/checkout@v3
       - run: |
           npm install
@@ -40,7 +40,7 @@ jobs:
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: gh-pages
-          user_name: 'openEO CI'
+          user_name: "openEO CI"
           user_email: openeo.ci@uni-muenster.de
           cname: processes.openeo.org
       - name: deploy to ${{ env.GITHUB_REF_SLUG }}
@@ -50,5 +50,5 @@ jobs:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: gh-pages
           destination_dir: ${{ env.GITHUB_REF_SLUG }}
-          user_name: 'openEO CI'
-          user_email: openeo.ci@uni-muenster.de
+          user_name: "openEO CI"
+          user_email: openeo.ci@uni-muenster.de
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,12 +15,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
     - `date_difference`
     - `filter_vector`
     - `flatten_dimensions`
+    - `import_cube`
+    - `import_ml_model`
     - `load_geojson`
     - `load_ml_model`
+    - `load_stac_ml`
     - `load_url`
-    - `ml_fit_class_random_forest`
-    - `ml_fit_regr_random_forest`
+    - `mlm_class_catboost`
+    - `mlm_class_lighttae`
+    - `mlm_class_mlp`
+    - `mlm_class_random_forest`
+    - `mlm_class_svm`
+    - `mlm_class_tae`
+    - `mlm_class_tempcnn`
+    - `mlm_class_xgboost`
+    - `mlm_regr_random_forest`
+    - `mlm_regr_svm`
+    - `ml_fit`
+    - `ml_label_class`
     - `ml_predict`
+    - `ml_predict_probabilities`
+    - `ml_smooth_class`
+    - `ml_uncertainty_class`
     - `save_ml_model`
     - `unflatten_dimension`
     - `vector_buffer`
@@ -379,4 +395,3 @@ Older versions of the processes were released as part of the openEO API, see the
 [0.4.2]: <https://github.com/Open-EO/openeo-processes/compare/0.4.1...0.4.2>
 [0.4.1]: <https://github.com/Open-EO/openeo-processes/compare/0.4.0...0.4.1>
 [0.4.0]: <https://github.com/Open-EO/openeo-processes/tree/0.4.0>
-
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ This repository contains a set of files formally describing the openEO Processes
 ## Process
 
 * All new processes must be added to the [`proposals`](proposals/) folder.
-* Processes will only be moved from proposals to the stable process specifications once there are at least two implementations and an example process in the [`examples`](examples/) folder showing it in a use case. This doesn't require a PSC vote individually as it's not a breaking change, just an addition.
+* Processes will only be moved from proposals to the stable process specifications once there are at least two implementations and an example process in the [`openEO community examples`](https://github.com/Open-EO/openeo-community-examples/) showing it in a use case. This doesn't require a PSC vote individually as it's not a breaking change, just an addition.
 * The [`proposals`](proposals/) folder allows breaking changes without a PSC vote and without increasing the major version number (i.e. a breaking change in the proposals doesn't require us to make the next version number 2.0.0).
 * The proposals are released as experimental processes with the other processes.
 * Each release and all breaking changes in the stable process specifications must go through PSC vote.
diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json
@@ -1,46 +1,43 @@
 {
     "id": "load_ml_model",
-    "summary": "Load a ML model",
-    "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as  ``ml_fit_regr_random_forest()`` and ``save_ml_model()``.",
+    "summary": "Load a machine learning model by ID",
+    "description": "Loads a machine learning model that is managed by the current back-end, identified by a back-end specific model identifier.\n\nThis allows the back-end to host and optimize models for efficient inference and training (e.g., via model-specific libraries such as Terratorch). Back-end specific models can be referenced by a string identifier similar to collections.\n\nIf you want to load a model from a STAC Item implementing the `mlm` extension, use `load_stac_ml()` instead.",
     "categories": [
         "machine learning",
         "import"
     ],
     "experimental": true,
     "parameters": [
         {
-            "name": "uri",
-            "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.",
-            "schema": [
-                {
-                    "title": "URL",
-                    "type": "string",
-                    "format": "uri",
-                    "subtype": "uri",
-                    "pattern": "^https?://"
-                },
-                {
-                    "title": "User-uploaded File",
-                    "type": "string",
-                    "subtype": "file-path",
-                    "pattern": "^[^\r\n\\:'\"]+$"
-                }
-            ]
+            "name": "id",
+            "description": "The back-end specific identifier of the machine learning model to load.",
+            "schema": {
+                "type": "string",
+                "pattern": "^[\\w\\-\\.~/]+$"
+            }
         }
     ],
     "returns": {
-        "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()``.",
+        "description": "A machine learning model to be used with machine learning processes such as `ml_predict()`.",
         "schema": {
             "type": "object",
             "subtype": "ml-model"
         }
     },
     "links": [
         {
-            "href": "https://github.com/stac-extensions/ml-model",
-            "title": "STAC ml-model extension",
+            "href": "https://github.com/stac-extensions/mlm",
+            "title": "Machine Learning Model STAC extension",
+            "type": "text/html",
+            "rel": "about"
+        },
+        {
+            "href": "https://openeo.org/documentation/1.0/",
+            "title": "openEO API documentation",
             "type": "text/html",
             "rel": "about"
         }
     ]
 }
+
+
diff --git a/proposals/load_stac_ml.json b/proposals/load_stac_ml.json
@@ -0,0 +1,73 @@
+{
+    "id": "load_stac_ml",
+    "summary": "Load a ML model from a STAC Item",
+    "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as  ``ml_fit()`` and ``save_ml_model()`` or  externally hosted models.",
+    "categories": [
+        "machine learning",
+        "import"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "uri",
+            "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the [`mlm`](https://github.com/stac-extensions/mlm) extension. This parameter can point to a remote STAC Item via ``URL`` or a local JSON file.",
+            "schema": [
+                {
+                    "title": "URL",
+                    "type": "string",
+                    "format": "uri",
+                    "subtype": "uri",
+                    "pattern": "^https?://"
+                },
+                {
+                    "title": "User-uploaded File",
+                    "type": "string",
+                    "subtype": "file-path",
+                    "pattern": "^[^\r\n\\:'\"]+$"
+                }
+            ]
+        },
+        {
+            "name": "model_asset",
+            "description": "The Asset name of the given STAC Item which represents the actual ML model. The asset must list ``mlm:model`` as its role. If only one asset lists ``mlm:model`` as its role, this parameter is optional as this asset will be used by default. If multiple assets list ``mlm:model`` as their role, this parameter is required to determine which asset to use.",
+            "schema": {
+                "type": "string"
+            },
+            "default": null,
+            "optional": true
+        },
+        {
+            "name": "input_index",
+            "description": "STAC:MLM items supports multiple ML model input specification. This parameter specifies the index of the input specification in the ``mlm:input`` array to use for prediction or training. As ``mlm:input`` is an array, the first input in the array has index 0.",
+            "schema": {
+                "type": "integer"
+            },
+            "default": 0,
+            "optional": true
+        },
+        {
+            "name": "output_index",
+            "description": "STAC:MLM items supports multiple ML model output specification. This parameter specifies the index of the output specification in the ``mlm:output`` array to use for prediction or training. As ``mlm:output`` is an array, the first output in the array has index 0.",
+            "schema": {
+                "type": "integer"
+            },
+            "default": 0,
+            "optional": true
+        }
+    ],
+    "returns": {
+        "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()``.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    },
+    "links": [
+        {
+            "href": "https://github.com/stac-extensions/mlm",
+            "title": "Machine Learning Model STAC extension",
+            "type": "text/html",
+            "rel": "about"
+        }
+    ]
+}
diff --git a/proposals/ml_fit.json b/proposals/ml_fit.json
@@ -0,0 +1,67 @@
+{
+    "id": "ml_fit",
+    "summary": "Train a machine learning model",
+    "description": "Executes the fit of a specified machine learning model based on training data.\n\nThe function is generic and supports different machine learning models.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "model",
+            "description": "The machine learning model to be trained. This should be an instance of a model that supports the `ml_fit` method.",
+            "schema": {
+                "type": "object",
+                "subtype": "ml-model"
+            }
+        },
+        {
+            "name": "training_set",
+            "description": "The training set for the model, provided as a vector data cube. This set contains both the independent variables and the dependent variable that the model analyzes to learn patterns and relationships within the data.",
+            "schema": [
+                {
+                    "type": "object",
+                    "subtype": "datacube",
+                    "dimensions": [
+                        {
+                            "type": "geometry"
+                        },
+                        {
+                            "type": "bands"
+                        }
+                    ]
+                },
+                {
+                    "type": "object",
+                    "subtype": "datacube",
+                    "dimensions": [
+                        {
+                            "type": "geometry"
+                        },
+                        {
+                            "type": "other"
+                        }
+                    ]
+                },
+                {
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "target",
+            "description": "The name of the variable in the training set that serves as the target for model training.",
+            "schema": {
+                "type": "string"
+            }
+        }
+    ],
+    "returns": {
+        "description": "A trained model object that can be saved with `save_ml_model()` and restored with `load_ml_model()`.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    }
+}
+
diff --git a/proposals/ml_label_class.json b/proposals/ml_label_class.json
@@ -0,0 +1,38 @@
+{
+    "id": "ml_label_class",
+    "summary": "Convert probability data cube to labeled data cube",
+    "description": "Converts a probability data cube to a labeled data cube by applying softmax normalization and selecting the class with the highest probability. Optionally allows mapping of class indices to custom labels.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "data",
+            "description": "The input probability data cube to be labeled. Each band should represent the probability of a different class.",
+            "schema": {
+                "type": "object",
+                "subtype": "datacube"
+            }
+        },
+        {
+            "name": "labels",
+            "description": "Optional dictionary mapping class indices to custom labels. The dictionary keys are the class indices (as integers) and the values are the custom labels for each class. If not provided, the class indices will be used as labels.",
+            "optional": true,
+            "default": null,
+            "schema": {
+                "type": [
+                    "object",
+                    "null"
+                ]
+            }
+        }
+    ],
+    "returns": {
+        "description": "A labeled data cube where each pixel contains the class label with the highest probability after softmax normalization.",
+        "schema": {
+            "type": "object",
+            "subtype": "datacube"
+        }
+    }
+} 
diff --git a/proposals/ml_predict.json b/proposals/ml_predict.json
@@ -17,21 +17,11 @@
         },
         {
             "name": "model",
-            "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_regr_random_forest()``.",
+            "description": "A ML model that was trained.",
             "schema": {
                 "type": "object",
                 "subtype": "ml-model"
             }
-        },
-        {
-            "name": "dimensions",
-            "description": "Zero or more dimensions that will be reduced by the model. Fails with a `DimensionNotAvailable` exception if one of the specified dimensions does not exist.",
-            "schema": {
-                "type": "array",
-                "items": {
-                    "type": "string"
-                }
-            }
         }
     ],
     "returns": {
@@ -47,3 +37,4 @@
         }
     }
 }
+
diff --git a/proposals/ml_predict_probabilities.json b/proposals/ml_predict_probabilities.json
@@ -0,0 +1,39 @@
+{
+    "id": "ml_predict_probabilities",
+    "summary": "Predict class probabilities using ML",
+    "description": "Applies a machine learning model to a data cube of input features and returns the predicted class probabilities.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "data",
+            "description": "The data cube containing the input features.",
+            "schema": {
+                "type": "object",
+                "subtype": "datacube"
+            }
+        },
+        {
+            "name": "model",
+            "description": "A ML model that was trained and supports probability predictions.",
+            "schema": {
+                "type": "object",
+                "subtype": "ml-model"
+            }
+        }
+    ],
+    "returns": {
+        "description": "A data cube with the predicted class probabilities. It removes the specified dimensions and adds a new dimension for the class probabilities. The dimension has the name `classes` and is of type `other`. Each label in the dimension represents a class, and the values are the probabilities for each class.",
+        "schema": {
+            "type": "object",
+            "subtype": "datacube",
+            "dimensions": [
+                {
+                    "type": "other"
+                }
+            ]
+        }
+    }
+}