HumanSignal · mcanu · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 28, 2025
diff --git a/label_studio/projects/api.py b/label_studio/projects/api.py
@@ -308,6 +308,7 @@ def get_queryset(self):
                             'total_predictions_number': 0,
                             'sampling': 'Sequential sampling',
                             'show_ground_truth_first': True,
+                            'annotator_evaluation_enabled': False,
                             'show_overlap_first': True,
                             'overlap_cohort_percentage': 100,
                             'task_data_login': 'user',

diff --git a/label_studio/projects/functions/next_task.py b/label_studio/projects/functions/next_task.py
@@ -17,14 +17,14 @@
 
 
 # Hook for GT-first gating (Enterprise can override via settings)
-def _oss_should_attempt_gt_first(user: User, project: Project) -> bool:
-    # Open-source default: if project enables GT-first, allow it without onboarding gates
-    return bool(project.show_ground_truth_first)
+def _lso_should_attempt_gt_first(user: User, project: Project) -> bool:
+    # Open-source default: if project enables annotator evaluation, allow it without onboarding gates
+    return bool(project.annotator_evaluation_enabled)
 
 
 get_tasks_agreement_queryset = load_func(settings.GET_TASKS_AGREEMENT_QUERYSET)
 should_attempt_ground_truth_first = (
-    load_func(settings.SHOULD_ATTEMPT_GROUND_TRUTH_FIRST) or _oss_should_attempt_gt_first
+    load_func(settings.SHOULD_ATTEMPT_GROUND_TRUTH_FIRST) or _lso_should_attempt_gt_first
 )
 
 
@@ -59,10 +59,7 @@ def _get_first_unlocked(tasks_query: QuerySet[Task], user) -> Union[Task, None]:
 
 def _try_ground_truth(tasks: QuerySet[Task], project: Project, user: User) -> Union[Task, None]:
     """Returns task from ground truth set"""
-    ground_truth = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=True)
-    not_solved_tasks_with_ground_truths = tasks.annotate(has_ground_truths=Exists(ground_truth)).filter(
-        has_ground_truths=True
-    )
+    not_solved_tasks_with_ground_truths = _annotate_has_ground_truths(tasks).filter(has_ground_truths=True)
     if not_solved_tasks_with_ground_truths.exists():
         if project.sampling == project.SEQUENCE:
             return _get_first_unlocked(not_solved_tasks_with_ground_truths, user)
@@ -81,10 +78,10 @@ def _try_tasks_with_overlap(tasks: QuerySet[Task]) -> Tuple[Union[Task, None], Q
 def _try_breadth_first(tasks: QuerySet[Task], user: User, project: Project) -> Union[Task, None]:
     """Try to find tasks with maximum amount of annotations, since we are trying to label tasks as fast as possible"""
 
-    # Exclude ground truth annotations from the count when not in onboarding mode
+    # Exclude ground truth annotations from the count when not in annotator evaluation mode
     # to prevent GT tasks from being prioritized via breadth-first logic
     annotation_filter = ~Q(annotations__completed_by=user)
-    if not project.show_ground_truth_first:
+    if not project.annotator_evaluation_enabled:
         annotation_filter &= ~Q(annotations__ground_truth=True)
 
     tasks = tasks.annotate(annotations_count=Count('annotations', filter=annotation_filter))
@@ -158,13 +155,18 @@ def _try_uncertainty_sampling(
     return next_task
 
 
+def _annotate_has_ground_truths(tasks: QuerySet[Task]) -> QuerySet[Task]:
+    ground_truth = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=True)
+    return tasks.annotate(has_ground_truths=Exists(ground_truth))
+
+
 def get_not_solved_tasks_qs(
     user: User,
     project: Project,
     prepared_tasks: QuerySet[Task],
     assigned_flag: Union[bool, None],
     queue_info: str,
-    allow_gt_first: bool,
+    attempt_gt_first: bool,
 ) -> Tuple[QuerySet[Task], List[int], str, bool]:
     user_solved_tasks_array = user.annotations.filter(project=project, task__isnull=False)
     user_solved_tasks_array = user_solved_tasks_array.distinct().values_list('task__pk', flat=True)
@@ -188,7 +190,6 @@ def get_not_solved_tasks_qs(
             and get_tasks_agreement_queryset
             and user.is_project_annotator(project)
         ):
-            # Onboarding mode (GT-first) should keep GT tasks eligible regardless of is_labeled/agreement
             qs = get_tasks_agreement_queryset(not_solved_tasks)
             qs = qs.annotate(annotators=Count('annotations__completed_by', distinct=True))
 
@@ -197,13 +198,10 @@ def get_not_solved_tasks_qs(
             )
             capacity_pred = Q(annotators__lt=F('overlap') + (lse_project.max_additional_annotators_assignable or 0))
 
-            if project.show_ground_truth_first:
-                gt_subq = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=True)
-                qs = qs.annotate(has_ground_truths=Exists(gt_subq))
-                # Keep all GT tasks + apply low-agreement+capacity to the rest. For sure, we can do:
-                # - if user.solved_tasks_array.count < lse_project.annotator_evaluation_minimum_tasks
-                # - else, apply low-agreement+capacity to the rest (maybe performance will be better)
-                # but it's a question - what is better here. This version is simpler at least from the code perspective.
+            if project.annotator_evaluation_enabled:
+                # Include ground truth tasks in the query if annotator evaluation is enabled
+                qs = _annotate_has_ground_truths(qs)
+                # Keep all GT tasks + apply low-agreement+capacity to the rest.
                 not_solved_tasks = qs.filter(Q(has_ground_truths=True) | (low_agreement_pred & capacity_pred))
             else:
                 not_solved_tasks = qs.filter(low_agreement_pred & capacity_pred)
@@ -212,9 +210,15 @@ def get_not_solved_tasks_qs(
 
         # otherwise, filtering out completed tasks is sufficient
         else:
-            # ignore tasks that are already labeled when GT-first is NOT allowed
-            if not allow_gt_first:
-                not_solved_tasks = not_solved_tasks.filter(is_labeled=False)
+            if not attempt_gt_first:
+                # Outside of onboarding window
+                if project.annotator_evaluation_enabled:
+                    # Include ground truth tasks in the query if outside of onboarding window and annotator evaluation is enabled
+                    not_solved_tasks = _annotate_has_ground_truths(not_solved_tasks)
+                    not_solved_tasks = not_solved_tasks.filter(Q(is_labeled=False) | Q(has_ground_truths=True))
+                else:
+                    # Ignore tasks that are already labeled when outside of onboarding window and annotator evaluation is not enabled
+                    not_solved_tasks = not_solved_tasks.filter(is_labeled=False)
 
     if not flag_set('fflag_fix_back_lsdv_4523_show_overlap_first_order_27022023_short'):
         # show tasks with overlap > 1 first (unless tasks are already prioritized on agreement)
@@ -244,7 +248,7 @@ def get_next_task_without_dm_queue(
     not_solved_tasks: QuerySet,
     assigned_flag: Union[bool, None],
     prioritized_low_agreement: bool,
-    allow_gt_first: bool,
+    attempt_gt_first: bool,
 ) -> Tuple[Union[Task, None], bool, str]:
     next_task = None
     use_task_lock = True
@@ -265,8 +269,8 @@ def get_next_task_without_dm_queue(
             use_task_lock = False
             queue_info += (' & ' if queue_info else '') + 'Task lock'
 
-    # Ground truth: use precomputed gating for GT-first
-    if not next_task and allow_gt_first:
+    # Ground truth: attempt to label ground truth tasks in onboarding window
+    if not next_task and attempt_gt_first:
         logger.debug(f'User={user} tries ground truth from prepared tasks')
         next_task = _try_ground_truth(not_solved_tasks, project, user)
         if next_task:
@@ -378,16 +382,16 @@ def get_next_task(
         use_task_lock = True
         queue_info = ''
 
-        # Ground truth: label GT first only during onboarding window for user (gated by min tasks and min score)
-        allow_gt_first = should_attempt_ground_truth_first(user, project)
+        # Ground truth: label GT first only during onboarding window for user (gated by onboarding task number)
+        attempt_gt_first = should_attempt_ground_truth_first(user, project)
 
         not_solved_tasks, user_solved_tasks_array, queue_info, prioritized_low_agreement = get_not_solved_tasks_qs(
-            user, project, prepared_tasks, assigned_flag, queue_info, allow_gt_first
+            user, project, prepared_tasks, assigned_flag, queue_info, attempt_gt_first
         )
 
         if not dm_queue:
             next_task, use_task_lock, queue_info = get_next_task_without_dm_queue(
-                user, project, not_solved_tasks, assigned_flag, prioritized_low_agreement, allow_gt_first
+                user, project, not_solved_tasks, assigned_flag, prioritized_low_agreement, attempt_gt_first
             )
 
         if flag_set('fflag_fix_back_lsdv_4523_show_overlap_first_order_27022023_short'):
@@ -452,7 +456,7 @@ def get_next_task(
                         'maximum_annotations': project.maximum_annotations,
                         'skip_queue': project.skip_queue,
                         'sampling': project.sampling,
-                        'show_ground_truth_first': project.show_ground_truth_first,
+                        'annotator_evaluation_enabled': project.annotator_evaluation_enabled,
                         'show_overlap_first': project.show_overlap_first,
                         'overlap_cohort_percentage': project.overlap_cohort_percentage,
                         'project_id': project.id,

@@ -0,0 +1,23 @@
+# Generated by Django 5.1.14 on 2025-12-05 19:24
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("projects", "0033_projects_soft_delete_indexes_async"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="project",
+            name="annotator_evaluation_enabled",
+            field=models.BooleanField(
+                db_default=False,
+                default=False,
+                help_text="Enable annotator evaluation for the project",
+                verbose_name="annotator evaluation enabled",
+            ),
+        ),
+    ]
diff --git a/label_studio/projects/models.py b/label_studio/projects/models.py
@@ -313,11 +313,21 @@ class SkipQueue(models.TextChoices):
     skip_queue = models.CharField(
         max_length=100, choices=SkipQueue.choices, null=True, default=SkipQueue.REQUEUE_FOR_OTHERS
     )
+
+    # Deprecated
     show_ground_truth_first = models.BooleanField(
         _('show ground truth first'),
         default=False,
         help_text='Onboarding mode (true): show ground truth tasks first in the labeling stream',
     )
+
+    annotator_evaluation_enabled = models.BooleanField(
+        _('annotator evaluation enabled'),
+        default=False,
+        db_default=False,
+        help_text='Enable annotator evaluation for the project',
+    )
+
     show_overlap_first = models.BooleanField(_('show overlap first'), default=False)
     overlap_cohort_percentage = models.IntegerField(_('overlap_cohort_percentage'), default=100)
 

diff --git a/label_studio/projects/serializers.py b/label_studio/projects/serializers.py
@@ -236,6 +236,7 @@ class Meta:
             'total_predictions_number',
             'sampling',
             'show_ground_truth_first',
+            'annotator_evaluation_enabled',
             'show_overlap_first',
             'overlap_cohort_percentage',
             'task_data_login',

diff --git a/label_studio/tasks/models.py b/label_studio/tasks/models.py
@@ -289,10 +289,8 @@ def has_lock(self, user=None):
         """
         from projects.functions.next_task import get_next_task_logging_level
 
-        if self.project.show_ground_truth_first:
-            # in show_ground_truth_first mode(onboarding)
-            # we ignore overlap setting for ground_truth tasks
-            # https://humansignal.atlassian.net/browse/LEAP-1963
+        if self.project.annotator_evaluation_enabled:
+            # In annotator evaluation mode, ignore overlap setting for ground truth tasks
             if self.annotations.filter(ground_truth=True).exists():
                 return False
 

diff --git a/label_studio/tests/data_manager/columns.tavern.yml b/label_studio/tests/data_manager/columns.tavern.yml
@@ -42,7 +42,7 @@ stages:
              "start_training_on_annotation_update": false, "show_collab_predictions": true, "num_tasks_with_annotations": null,
              "task_number": null, "useful_annotation_number": null, "ground_truth_number": null, "skipped_annotations_number": null,
              "total_annotations_number": null, "total_predictions_number": null, "sampling": "Sequential sampling",
-             "show_ground_truth_first": false, "show_overlap_first": false, "overlap_cohort_percentage": 100,
+             "show_ground_truth_first": false, "annotator_evaluation_enabled": false, "show_overlap_first": false, "overlap_cohort_percentage": 100,
              "task_data_login": null, "task_data_password": null,
              "control_weights": {"label": {"overall": 1.0, "type": "Choices", "labels": {"pos": 1.0, "neg": 1.0}}},
              "parsed_label_config": {

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -74,7 +74,7 @@ dependencies = [
     "tldextract (>=5.1.3)",
     "uuid-utils (>=0.11.0,<1.0.0)",
     ## HumanSignal repo dependencies :start
-    "label-studio-sdk @ https://github.com/HumanSignal/label-studio-sdk/archive/4a5949e554a5574377c79294a14296a6efec8b02.zip",
+    "label-studio-sdk @ https://github.com/HumanSignal/label-studio-sdk/archive/cd6c6d98845167a0fbf636b374de38f805925f7e.zip",
     ## HumanSignal repo dependencies :end
 ]
 

diff --git a/web/apps/labelstudio/src/config/ApiConfig.example.js b/web/apps/labelstudio/src/config/ApiConfig.example.js
@@ -52,6 +52,7 @@ export const API_CONFIG = {
           total_predictions_number: 100,
           sampling: "Sequential sampling",
           show_ground_truth_first: false,
+          annotator_evaluation_enabled: false,
           show_overlap_first: false,
           overlap_cohort_percentage: 100,
           task_data_login: null,

diff --git a/web/apps/labelstudio/src/types/Project.d.ts b/web/apps/labelstudio/src/types/Project.d.ts
@@ -73,6 +73,7 @@ declare type APIProject = {
   total_predictions_number?: string;
   sampling?: "Sequential sampling" | "Uniform sampling" | "Uncertainty sampling" | null;
   show_ground_truth_first?: boolean;
+  annotator_evaluation_enabled?: boolean;
   show_overlap_first?: boolean;
   overlap_cohort_percentage?: number;
 

diff --git a/web/libs/ui/src/assets/icons/index.ts b/web/libs/ui/src/assets/icons/index.ts
@@ -177,6 +177,10 @@ export { ReactComponent as IconMinus } from "./minus.svg";
 export { ReactComponent as IconModel } from "./model.svg";
 export { ReactComponent as IconModels } from "./models.svg";
 export { ReactComponent as IconModelVersion } from "./model-version.svg";
+export { ReactComponent as IconMoveLeft } from "./move-left.svg";
+export { ReactComponent as IconMoveRight } from "./move-right.svg";
+export { ReactComponent as IconMoveUp } from "./move-up.svg";
+export { ReactComponent as IconMoveDown } from "./move-down.svg";
 export { ReactComponent as IconMoveTool } from "./move-tool.svg";
 export { ReactComponent as IconNext } from "./next-step.svg";
 export { ReactComponent as IconOctagonAlert } from "./octagon-alert.svg";

diff --git a/web/libs/ui/src/assets/icons/move-down.svg b/web/libs/ui/src/assets/icons/move-down.svg
diff --git a/web/libs/ui/src/assets/icons/move-left.svg b/web/libs/ui/src/assets/icons/move-left.svg
diff --git a/web/libs/ui/src/assets/icons/move-right.svg b/web/libs/ui/src/assets/icons/move-right.svg
diff --git a/web/libs/ui/src/assets/icons/move-up.svg b/web/libs/ui/src/assets/icons/move-up.svg
diff --git a/web/libs/ui/src/shad/components/ui/badge.tsx b/web/libs/ui/src/shad/components/ui/badge.tsx
@@ -12,6 +12,8 @@ const badgeVariants = cva(
         secondary: "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
         destructive: "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80",
         success: "border-transparent bg-positive-background text-positive-content hover:bg-positive-background/80",
+        warning:
+          "bg-warning-background border-warning-border-subtlest text-warning-content hover:bg-warning-background/80",
         info: "bg-primary-background border-primary-emphasis text-accent-grape-dark font-normal",
         outline: "text-neutral-content border-neutral-border",
         beta: "bg-accent-plum-subtle text-accent-plum-dark font-medium border-transparent",