diff --git a/sygra/core/eval/metrics/aggregator_metrics/pass_at_k.py b/sygra/core/eval/metrics/aggregator_metrics/pass_at_k.py
new file mode 100644
index 00000000..a2877832
--- /dev/null
+++ b/sygra/core/eval/metrics/aggregator_metrics/pass_at_k.py
@@ -0,0 +1,126 @@
+"""
+Pass@k Metrics
+"""
+
+import math
+from typing import Any, Dict, List
+
+from pydantic import BaseModel, Field, field_validator
+
+from sygra.core.eval.metrics.aggregator_metrics.aggregator_metric_registry import aggregator_metric
+from sygra.core.eval.metrics.aggregator_metrics.base_aggregator_metric import BaseAggregatorMetric
+from sygra.core.eval.metrics.base_metric_metadata import BaseMetricMetadata
+from sygra.core.eval.metrics.unit_metrics.unit_metric_result import UnitMetricResult
+from sygra.logger.logger_config import logger
+
+
+class PassAtKMetricConfig(BaseModel):
+    """Configuration for PassAtK Metric"""
+
+    k: int = Field(..., description="Number of samples to draw")
+
+    @field_validator("k")
+    @classmethod
+    def validate_k(cls, v):
+        if v is None or v < 0:
+            raise ValueError(
+                "value of k is required and must be positive (cannot be None or less than equal to 0)"
+            )
+        return v
+
+
+@aggregator_metric("pass@k")
+class PassAtKMetric(BaseAggregatorMetric):
+    """Calculate pass@k metric: probability that at least one of k independent attempts will succeed.
+
+    Required configuration:
+        k: Number of samples to draw
+    """
+
+    def __init__(self, **config):
+        """Initialize F1 score metric with two-phase initialization."""
+        super().__init__(**config)
+        self.validate_config()
+        self.metadata = self.get_metadata()
+
+    def validate_config(self):
+        """Validate and store F1-specific configuration requirements"""
+        # Validate using Pydantic config class
+        config_obj = PassAtKMetricConfig(**self.config)
+
+        # Store validated fields as instance attributes
+        self.k = config_obj.k
+
+    def get_metadata(self) -> BaseMetricMetadata:
+        """Return metadata for F1 score metric"""
+        return BaseMetricMetadata(
+            name="pass@k",
+            display_name="Pass@k",
+            description="Probability that at least one of k independent attempts will succeed.",
+            range=(0.0, 1.0),
+            higher_is_better=True,
+            metric_type="industry",
+        )
+
+    def calculate(self, results: List[UnitMetricResult]) -> Dict[str, Any]:
+        """Calculate  Pass@k score.
+
+        Args:
+            results: List of UnitMetricResult
+
+        Returns:
+            dict: {"pass@k": float (0.0 to 1.0)}
+        """
+        if not results:
+            logger.warning(f"{self.__class__.__name__}: No results provided")
+            return {
+                "pass@k": 0.0,
+            }
+        # Total number of attempts/samples
+        n = len(results)
+        # Number of correct solutions
+        c = self._count_correct(results)
+        pass_at_k_value = self.pass_at_k(n, c, self.k)
+
+        return {
+            "pass@k": pass_at_k_value,
+        }
+
+    @staticmethod
+    def pass_at_k(n: int, c: int, k: int) -> float:
+        """Calculate pass@k metric: probability that at least one of k independent attempts will succeed.
+
+        Args:
+            n (int): Total number of attempts/samples
+            c (int): Number of correct solutions
+            k (int): Number of samples to draw
+
+        Returns:
+            float: Pass@k probability (0 to 1)
+
+        Raises:
+            ValueError: If invalid parameters are provided
+        """
+        if n <= 0 or c < 0 or k <= 0:
+            raise ValueError("n and k must be positive, c must be non-negative")
+        if c > n:
+            raise ValueError("Number of correct solutions (c) cannot exceed total attempts (n)")
+        if k > n:
+            raise ValueError("Sample size (k) cannot exceed total attempts (n)")
+
+        # If all solutions are correct, pass@k = 1
+        if c == n:
+            return 1.0
+
+        # If no solutions are correct, pass@k = 0
+        if c == 0:
+            return 0.0
+
+        # Calculate using the complement: 1 - P(all k samples are incorrect)
+        # P(all incorrect) = C(n-c, k) / C(n, k)
+        try:
+            prob_all_incorrect = math.comb(n - c, k) / math.comb(n, k)
+            return 1.0 - prob_all_incorrect
+        except (ValueError, ZeroDivisionError):
+            # Handle edge cases where combinations are invalid
+            return 0.0
diff --git a/sygra/core/eval/metrics/aggregator_metrics/pass_power_k.py b/sygra/core/eval/metrics/aggregator_metrics/pass_power_k.py
new file mode 100644
index 00000000..61efd068
--- /dev/null
+++ b/sygra/core/eval/metrics/aggregator_metrics/pass_power_k.py
@@ -0,0 +1,100 @@
+"""
+Pass^K Metrics
+"""
+
+from typing import Any, Dict, List
+
+from sygra.core.eval.metrics.aggregator_metrics.aggregator_metric_registry import aggregator_metric
+from sygra.core.eval.metrics.aggregator_metrics.base_aggregator_metric import BaseAggregatorMetric
+from sygra.core.eval.metrics.aggregator_metrics.pass_at_k import PassAtKMetricConfig
+from sygra.core.eval.metrics.base_metric_metadata import BaseMetricMetadata
+from sygra.core.eval.metrics.unit_metrics.unit_metric_result import UnitMetricResult
+from sygra.logger.logger_config import logger
+
+
+@aggregator_metric("pass^k")
+class PassPowerKMetric(BaseAggregatorMetric):
+    """Calculate pass^k metric: probability that an agent would succeed on all k independent attempts.
+
+    Required configuration:
+        k: Number of samples to draw
+    """
+
+    def __init__(self, **config):
+        """Initialize F1 score metric with two-phase initialization."""
+        super().__init__(**config)
+        self.validate_config()
+        self.metadata = self.get_metadata()
+
+    def validate_config(self):
+        """Validate and store F1-specific configuration requirements"""
+        # Validate using Pydantic config class
+        config_obj = PassAtKMetricConfig(**self.config)
+
+        # Store validated fields as instance attributes
+        self.k = config_obj.k
+
+    def get_metadata(self) -> BaseMetricMetadata:
+        """Return metadata for F1 score metric"""
+        return BaseMetricMetadata(
+            name="pass^k",
+            display_name="Pass^k",
+            description="Probability that an agent would succeed on all k independent attempts.",
+            range=(0.0, 1.0),
+            higher_is_better=True,
+            metric_type="industry",
+        )
+
+    def calculate(self, results: List[UnitMetricResult]) -> Dict[str, Any]:
+        """Calculate Pass^k score.
+
+        Args:
+            results: List of UnitMetricResult
+
+        Returns:
+            dict: Dictionary containing metrics and related information
+                 {"success_rate": float (0.0 to 1.0), "pass^k": float (0.0 to 1.0)}
+
+         Raises:
+            ValueError: If invalid parameters are provided
+        """
+        if not results:
+            logger.warning(f"{self.__class__.__name__}: No results provided")
+            return {"success_rate": 0.0, "pass^k": 0.0}
+        # Total number of attempts/samples
+        n = len(results)
+        # Number of correct solutions
+        c = self._count_correct(results)
+
+        if n <= 0:
+            raise ValueError("Total attempts (n) must be positive")
+        if c < 0:
+            raise ValueError("Correct solutions (c) must be non-negative")
+        if c > n:
+            raise ValueError("Correct solutions (c) cannot exceed total attempts (n)")
+
+        success_rate = self._safe_divide(c, n)
+        pass_power_k_value = self.pass_power_k(success_rate, self.k)
+
+        return {"success_rate": success_rate, "pass^k": pass_power_k_value}
+
+    @staticmethod
+    def pass_power_k(success_rate: float, k: int) -> float:
+        """Calculate pass^k metric: probability that an agent would succeed on all k independent attempts.
+
+        Args:
+            success_rate (float): Raw success rate on a single attempt (0 to 1)
+            k (int): Number of consecutive attempts
+
+        Returns:
+            float: Pass^k probability (0 to 1)
+
+        Raises:
+            ValueError: If invalid parameters are provided
+        """
+        if not 0 <= success_rate <= 1:
+            raise ValueError("Success rate must be between 0 and 1")
+        if k <= 0:
+            raise ValueError("k must be positive")
+
+        return success_rate**k
diff --git a/tests/core/eval/metrics/aggregator_metrics/test_pass_at_k.py b/tests/core/eval/metrics/aggregator_metrics/test_pass_at_k.py
new file mode 100644
index 00000000..0a446e05
--- /dev/null
+++ b/tests/core/eval/metrics/aggregator_metrics/test_pass_at_k.py
@@ -0,0 +1,255 @@
+"""
+Unit tests for PassAtKMetric
+Tests pass@k calculation from unit metric results.
+"""
+
+import os
+import sys
+
+# Add project root to sys.path for relative imports to work
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", ".."))
+)
+
+import pytest
+
+from sygra.core.eval.metrics.aggregator_metrics.pass_at_k import PassAtKMetric
+from sygra.core.eval.metrics.unit_metrics.unit_metric_result import UnitMetricResult
+from sygra.logger.logger_config import logger
+
+
+class TestPassAtKMetric:
+    """Test suite for PassAtKMetric"""
+
+    def test_get_metric_name(self):
+        """Test that metric name is 'pass@k'"""
+        metric = PassAtKMetric(k=1)
+        assert metric.get_metric_name() == "pass@k"
+
+    def test_calculate_empty_results(self):
+        """Test calculate with empty results list"""
+        metric = PassAtKMetric(k=1)
+        results = []
+        output = metric.calculate(results)
+
+        assert "pass@k" in output
+        assert output["pass@k"] == 0.0
+
+    def test_calculate_all_correct(self):
+        """Test calculate when all predictions are correct"""
+        metric = PassAtKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "A"},
+                predicted={"class": "A"},
+                metadata={"id": 1},
+            ),
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "B"},
+                predicted={"class": "B"},
+                metadata={"id": 2},
+            ),
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "C"},
+                predicted={"class": "C"},
+                metadata={"id": 3},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass@k" in output
+        assert output["pass@k"] == 1.0
+
+    def test_calculate_all_incorrect(self):
+        """Test calculate when all predictions are incorrect"""
+        metric = PassAtKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "A"},
+                predicted={"class": "B"},
+                metadata={"id": 1},
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "B"},
+                predicted={"class": "C"},
+                metadata={"id": 2},
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "C"},
+                predicted={"class": "A"},
+                metadata={"id": 3},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass@k" in output
+        assert output["pass@k"] == 0.0
+
+    def test_calculate_mixed_results(self):
+        """Test calculate with mixed correct/incorrect predictions"""
+        metric = PassAtKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "A"},
+                predicted={"class": "A"},
+                metadata={"id": 1},
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "B"},
+                predicted={"class": "C"},
+                metadata={"id": 2},
+            ),
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "C"},
+                predicted={"class": "C"},
+                metadata={"id": 3},
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "D"},
+                predicted={"class": "A"},
+                metadata={"id": 4},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass@k" in output
+        assert output["pass@k"] == 0.5  # 2 correct out of 4
+
+    def test_calculate_single_correct_result(self):
+        """Test calculate with single correct result"""
+        metric = PassAtKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "A"},
+                predicted={"class": "A"},
+                metadata={"id": 1},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass@k" in output
+        assert output["pass@k"] == 1.0
+
+    def test_calculate_single_incorrect_result(self):
+        """Test calculate with single incorrect result"""
+        metric = PassAtKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "A"},
+                predicted={"class": "B"},
+                metadata={"id": 1},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass@k" in output
+        assert output["pass@k"] == 0.0
+
+    def test_calculate_various_pass_at_k_values(self):
+        """Test calculate with various pass@k percentages"""
+
+        # 75% pass@k (3 out of 4)
+        results = [
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+        ]
+        output = PassAtKMetric(k=1).calculate(results)
+        assert output["pass@k"] == 0.75
+
+        output = PassAtKMetric(k=2).calculate(results)
+        assert output["pass@k"] == 1.0
+
+        # 60% pass@k (3 out of 5)
+        results = [
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+        ]
+        output = PassAtKMetric(k=1).calculate(results)
+        assert output["pass@k"] == 0.6
+
+        output = PassAtKMetric(k=2).calculate(results)
+        assert output["pass@k"] == 0.9
+
+        output = PassAtKMetric(k=3).calculate(results)
+        assert output["pass@k"] == 1.0
+
+        # 33.33% pass@k (1 out of 3)
+        results = [
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+        ]
+        output = PassAtKMetric(k=1).calculate(results)
+        assert output["pass@k"] == pytest.approx(0.333, rel=1e-2)
+
+        output = PassAtKMetric(k=2).calculate(results)
+        assert output["pass@k"] == pytest.approx(0.666, rel=1e-2)
+
+        output = PassAtKMetric(k=3).calculate(results)
+        assert output["pass@k"] == 1.0
+
+    def test_calculate_with_complex_metadata(self):
+        """Test calculate with complex metadata in results"""
+        results = [
+            UnitMetricResult(
+                correct=True,
+                golden={"event": "click", "x": 100, "y": 200},
+                predicted={"tool": "click", "x": 105, "y": 195},
+                metadata={
+                    "mission_id": "mission_01",
+                    "step_id": "step_1",
+                    "validation_type": "tool_only",
+                },
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"event": "type", "text": "hello"},
+                predicted={"tool": "click", "text": "world"},
+                metadata={
+                    "mission_id": "mission_01",
+                    "step_id": "step_2",
+                    "validation_type": "full",
+                },
+            ),
+        ]
+        output = PassAtKMetric(k=1).calculate(results)
+        assert "pass@k" in output
+        assert output["pass@k"] == 0.5
+
+        output = PassAtKMetric(k=2).calculate(results)
+        assert "pass@k" in output
+        assert output["pass@k"] == 1.0
+
+    def test_calculate_with_different_data_types(self):
+        """Test calculate with different data types in golden/predicted"""
+        logger.info("Testing calculate with different data types in golden/predicted")
+        results = [
+            UnitMetricResult(correct=True, golden={"value": 1}, predicted={"value": 1}),
+            UnitMetricResult(correct=True, golden={"value": "text"}, predicted={"value": "text"}),
+            UnitMetricResult(correct=True, golden={"value": True}, predicted={"value": True}),
+            UnitMetricResult(correct=False, golden={"value": [1, 2]}, predicted={"value": [1, 3]}),
+        ]
+        output = PassAtKMetric(k=1).calculate(results)
+        assert "pass@k" in output
+        assert output["pass@k"] == 0.75
+
+        output = PassAtKMetric(k=2).calculate(results)
+        assert "pass@k" in output
+        assert output["pass@k"] == 1.0
diff --git a/tests/core/eval/metrics/aggregator_metrics/test_pass_power_k.py b/tests/core/eval/metrics/aggregator_metrics/test_pass_power_k.py
new file mode 100644
index 00000000..ff8a836d
--- /dev/null
+++ b/tests/core/eval/metrics/aggregator_metrics/test_pass_power_k.py
@@ -0,0 +1,252 @@
+"""
+Unit tests for PassPowerKMetric
+Tests pass^k calculation from unit metric results.
+"""
+
+import os
+import sys
+
+# Add project root to sys.path for relative imports to work
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", ".."))
+)
+
+import pytest
+
+from sygra.core.eval.metrics.aggregator_metrics.pass_power_k import PassPowerKMetric
+from sygra.core.eval.metrics.unit_metrics.unit_metric_result import UnitMetricResult
+from sygra.logger.logger_config import logger
+
+
+class TestPassPowerKMetric:
+    """Test suite for PassAtKMetric"""
+
+    def test_get_metric_name(self):
+        """Test that metric name is 'pass^k'"""
+        metric = PassPowerKMetric(k=1)
+        assert metric.get_metric_name() == "pass^k"
+
+    def test_calculate_empty_results(self):
+        """Test calculate with empty results list"""
+        metric = PassPowerKMetric(k=1)
+        results = []
+        output = metric.calculate(results)
+
+        assert "pass^k" in output
+        assert output["pass^k"] == 0.0
+
+    def test_calculate_all_correct(self):
+        """Test calculate when all predictions are correct"""
+        metric = PassPowerKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "A"},
+                predicted={"class": "A"},
+                metadata={"id": 1},
+            ),
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "B"},
+                predicted={"class": "B"},
+                metadata={"id": 2},
+            ),
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "C"},
+                predicted={"class": "C"},
+                metadata={"id": 3},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass^k" in output
+        assert output["pass^k"] == 1.0
+
+    def test_calculate_all_incorrect(self):
+        """Test calculate when all predictions are incorrect"""
+        metric = PassPowerKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "A"},
+                predicted={"class": "B"},
+                metadata={"id": 1},
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "B"},
+                predicted={"class": "C"},
+                metadata={"id": 2},
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "C"},
+                predicted={"class": "A"},
+                metadata={"id": 3},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass^k" in output
+        assert output["pass^k"] == 0.0
+
+    def test_calculate_mixed_results(self):
+        """Test calculate with mixed correct/incorrect predictions"""
+        metric = PassPowerKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "A"},
+                predicted={"class": "A"},
+                metadata={"id": 1},
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "B"},
+                predicted={"class": "C"},
+                metadata={"id": 2},
+            ),
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "C"},
+                predicted={"class": "C"},
+                metadata={"id": 3},
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "D"},
+                predicted={"class": "A"},
+                metadata={"id": 4},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass^k" in output
+        assert output["pass^k"] == 0.5  # 2 correct out of 4
+
+    def test_calculate_single_correct_result(self):
+        """Test calculate with single correct result"""
+        metric = PassPowerKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=True,
+                golden={"class": "A"},
+                predicted={"class": "A"},
+                metadata={"id": 1},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass^k" in output
+        assert output["pass^k"] == 1.0
+
+    def test_calculate_single_incorrect_result(self):
+        """Test calculate with single incorrect result"""
+        metric = PassPowerKMetric(k=1)
+        results = [
+            UnitMetricResult(
+                correct=False,
+                golden={"class": "A"},
+                predicted={"class": "B"},
+                metadata={"id": 1},
+            ),
+        ]
+        output = metric.calculate(results)
+
+        assert "pass^k" in output
+        assert output["pass^k"] == 0.0
+
+    def test_calculate_various_pass_at_k_values(self):
+        """Test calculate with various pass^k percentages"""
+
+        # 75% pass^k (3 out of 4)
+        results = [
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+        ]
+        output = PassPowerKMetric(k=1).calculate(results)
+        assert output["pass^k"] == 0.75
+
+        output = PassPowerKMetric(k=2).calculate(results)
+        assert output["pass^k"] == 0.5625
+
+        output = PassPowerKMetric(k=3).calculate(results)
+        assert output["pass^k"] == 0.421875
+
+        # 60% pass^k (3 out of 5)
+        results = [
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+        ]
+        output = PassPowerKMetric(k=1).calculate(results)
+        assert output["pass^k"] == 0.6
+
+        output = PassPowerKMetric(k=2).calculate(results)
+        assert output["pass^k"] == 0.36
+
+        # 33.33% pass^k (1 out of 3)
+        results = [
+            UnitMetricResult(correct=True, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+            UnitMetricResult(correct=False, golden={}, predicted={}),
+        ]
+        output = PassPowerKMetric(k=1).calculate(results)
+        assert output["pass^k"] == pytest.approx(0.333, rel=1e-2)
+
+        output = PassPowerKMetric(k=2).calculate(results)
+        assert output["pass^k"] == pytest.approx(0.111, rel=1e-2)
+
+    def test_calculate_with_complex_metadata(self):
+        """Test calculate with complex metadata in results"""
+        results = [
+            UnitMetricResult(
+                correct=True,
+                golden={"event": "click", "x": 100, "y": 200},
+                predicted={"tool": "click", "x": 105, "y": 195},
+                metadata={
+                    "mission_id": "mission_01",
+                    "step_id": "step_1",
+                    "validation_type": "tool_only",
+                },
+            ),
+            UnitMetricResult(
+                correct=False,
+                golden={"event": "type", "text": "hello"},
+                predicted={"tool": "click", "text": "world"},
+                metadata={
+                    "mission_id": "mission_01",
+                    "step_id": "step_2",
+                    "validation_type": "full",
+                },
+            ),
+        ]
+        output = PassPowerKMetric(k=1).calculate(results)
+        assert "pass^k" in output
+        assert output["pass^k"] == 0.5
+
+        output = PassPowerKMetric(k=2).calculate(results)
+        assert "pass^k" in output
+        assert output["pass^k"] == 0.25
+
+    def test_calculate_with_different_data_types(self):
+        """Test calculate with different data types in golden/predicted"""
+        logger.info("Testing calculate with different data types in golden/predicted")
+        results = [
+            UnitMetricResult(correct=True, golden={"value": 1}, predicted={"value": 1}),
+            UnitMetricResult(correct=True, golden={"value": "text"}, predicted={"value": "text"}),
+            UnitMetricResult(correct=True, golden={"value": True}, predicted={"value": True}),
+            UnitMetricResult(correct=False, golden={"value": [1, 2]}, predicted={"value": [1, 3]}),
+        ]
+        output = PassPowerKMetric(k=1).calculate(results)
+        assert "pass^k" in output
+        assert output["pass^k"] == 0.75
+
+        output = PassPowerKMetric(k=2).calculate(results)
+        assert "pass^k" in output
+        assert output["pass^k"] == 0.5625