add temp fix for dice

fitzlithepius · fitzlithepius · commit a41b34ab98c3 · 2025-03-12T16:14:42.000+01:00
diff --git a/multimedeval/task_families.py b/multimedeval/task_families.py
@@ -511,8 +511,9 @@ def evaluate(self, predictions):
             return metrics
 
         for label in labels_list:
-            predicted_answers = []
-            ground_truth = []
+            # predicted_answers = []
+            # ground_truth = []
+            dsc_list = []
 
             for prediction in predictions:
                 answer = prediction["answer"].masks
@@ -527,27 +528,57 @@ def evaluate(self, predictions):
                     else:
                         pred = self.get_predicted_answer(answer)
 
-                    predicted_answers.append(pred)
-                    ground_truth.append(gt)
+                    dice_similarity_coefficient = self.compute_dice_coefficient(
+                        gt, pred
+                    )
+                    # print(dice_similarity_coefficient)
+                    dsc_list.append(dice_similarity_coefficient)
+                    # predicted_answers.append(pred)
+                    # ground_truth.append(gt)
 
-            predicted_answers = np.array(predicted_answers)
-            ground_truth = np.array(ground_truth)
+            # predicted_answers = np.array(predicted_answers)
+            # ground_truth = np.array(ground_truth)
             # print(predicted_answers.shape, ground_truth.shape)
 
-            predicted_answers = torch.tensor(predicted_answers, dtype=torch.long)
-            ground_truth = torch.tensor(ground_truth, dtype=torch.long)
+            # dice_similarity_coefficient = self.compute_dice_coefficient(
+            #     ground_truth, predicted_answers
+            # )
 
-            dice = dice_scorer(predicted_answers, ground_truth).item()
-            answers_log.append(
-                (
-                    f"Label {label} have {len(predicted_answers)} data points, and the dice score is: {dice}."
-                )
-            )
+            # predicted_answers = torch.tensor(predicted_answers, dtype=torch.long)
+            # ground_truth = torch.tensor(ground_truth, dtype=torch.long)
 
-            metrics[f"{label}_dice"] = dice
+            # dice = dice_scorer(predicted_answers, ground_truth).item()
+            # answers_log.append(
+            #     (
+            #         f"Label {label} have {len(predicted_answers)} data points, and the dice score is: {dice}."
+            #     )
+            # )
 
+            # metrics[f"{label}_generalized_dice_score"] = dice
+            # print(sum(dsc_list), len(dsc_list))
+            metrics[f"{label}_DSC"] = sum(dsc_list) / len(dsc_list)
+            # del predicted_answers, ground_truth
         return EvaluationOutput(metrics=metrics, answer_log=answers_log)
 
+    def compute_dice_coefficient(self, mask_gt, mask_pred):
+        """Compute soerensen-dice coefficient.
+
+        compute the soerensen-dice coefficient between the ground truth mask `mask_gt`
+        and the predicted mask `mask_pred`.
+
+        Args:
+        mask_gt: 3-dim Numpy array of type bool. The ground truth mask.
+        mask_pred: 3-dim Numpy array of type bool. The predicted mask.
+
+        Returns:
+        the dice coeffcient as float. If both masks are empty, the result is NaN
+        """
+        volume_sum = mask_gt.sum() + mask_pred.sum()
+        if volume_sum == 0:
+            return np.NaN
+        volume_intersect = (mask_gt & mask_pred).sum()
+        return 2 * volume_intersect / volume_sum
+
 
 class ReportComparison(Benchmark):
     """A benchmark for report comparison tasks."""