Export LLM response evaluation annotations

How to export LLM response evaluation annotations and sample export formats.

Export JSON annotations

# Set the export params to include/exclude certain fields. 
export_params= {
  "attachments": True,
  "metadata_fields": True,
  "data_row_details": True,
  "project_details": True,
  "label_details": True,
  "performance_details": True,
  "interpolated_frames": True
}

# Note: Filters follow AND logic, so typically using one filter is sufficient.
filters= {
  "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
  "workflow_status": "<wkf-status>"
}

export_task = project.export(params=export_params, filters=filters)
export_task.wait_till_done()


# Return JSON output strings from export task results/errors, one by one:

# Callback used for JSON Converter
def json_stream_handler(output: lb.BufferedJsonConverterOutput):
  print(output.json)


if export_task.has_errors():
  export_task.get_buffered_stream(
  stream_type=lb.StreamType.ERRORS
  ).start(stream_handler=lambda error: print(error))

if export_task.has_result():
  export_json = export_task.get_buffered_stream(
    stream_type=lb.StreamType.RESULT
  ).start(stream_handler=json_stream_handler)

print("file size: ", export_task.get_total_file_size(stream_type=lb.StreamType.RESULT))
print("line count: ", export_task.get_total_lines(stream_type=lb.StreamType.RESULT))
     

Annotation export formats

Classification - Radio

{
		"feature_id": "clp8gzc7a000h07iu2uqcfziq",
		"feature_schema_id": "clp8fepqc02nd071phl4tck59",
		"name": "Choose the best response",
		"radio_answer": {
				"feature_id": "clp8gzc7a000i07iu46z39uwl",
				"feature_schema_id": "clp8fepqc02ng071p9ufm76xj",
				"name": "Response B",
				"classifications": []
		}
}

Classification - Free-form text

{
    "feature_id": "clph8rvw700013b6pbsq1lwcg",
    "feature_schema_id": "clp8fepqc02nl071p8z948pd4",
    "name": "Provide a reason for your choice",
    "text_answer": {
        "content": "Clear recommendation"
    }
}

Classification - Checklist

{
    "feature_id": "clpiyav9m00053b6mpe2qk0im",
    "feature_schema_id": "clpiya8h507dx072efo9kb97j",
    "name": "Evaluations done",
    "checklist_answers": [
        {
            "feature_id": "clpiyav9m00043b6mvhur1p19",
            "name": "Usefulness",
            "classifications": []
        },
        {
            "feature_id": "clpiyax4d00073b6mqyyt0504",
            "name": "Toxicity",
            "classifications": []
        }
    ]
}

Sample project export

{
    "data_row": {
        "id": "clpiqgsvn4lxr079096g5mt9v",
        "global_key": "0914c7da-5999-464b-b621-054a5247cf48",
        "row_data": "https://storage.googleapis.com/labelbox-developer-testing-assets/Pranoy/shopping_3.json",
        "details": {
            "dataset_id": "clpiqgrsr00f60737sqeiaf4v",
            "dataset_name": "pairwise_demo_42454af2-b7b5-45c0-a25a-90f97efd49be",
            "created_at": "2023-11-28T19:30:49.729+00:00",
            "updated_at": "2023-11-28T19:30:50.609+00:00",
            "last_activity_at": "2023-11-28T23:11:08.000+00:00",
            "created_by": "[email protected]"
        }
    },
    "media_attributes": {
        "mime_type": "application/vnd.labelbox.conversational",
        "labelable_ids": [
            "message-1",
            "message-3"
        ],
        "message_count": 4
    },
    "attachments": [],
    "metadata_fields": [],
    "projects": {
        "clpiy2itz08vg0705akh60zc1": {
            "name": "Test LLM Human Preference",
            "labels": [
                {
                    "label_kind": "Default",
                    "version": "1.0.0",
                    "id": "clpiyah63071f07e4756yer2d",
                    "label_details": {
                        "created_at": "2023-11-28T23:11:08.000+00:00",
                        "updated_at": "2023-11-28T23:11:07.000+00:00",
                        "created_by": "[email protected]",
                        "content_last_updated_at": "2023-11-28T23:11:07.568+00:00",
                        "reviews": []
                    },
                    "annotations": {
                        "objects": [],
                        "classifications": [
                            {
                                "feature_id": "clpiybl7h000g3b6mil1amjme",
                                "feature_schema_id": "clpiqh0v104mq0729homqffup",
                                "name": "Choose the best response",
                                "radio_answer": {
                                    "feature_id": "clpiybl7h000f3b6m03fn8ry3",
                                    "feature_schema_id": "clpiqh0v104mt0729atflgvvw",
                                    "name": "Response B",
                                    "classifications": []
                                }
                            },
                            {
                                "feature_id": "clpiybzp3000h3b6m9uowaci4",
                                "feature_schema_id": "clpiqh0v104my072907gt7id0",
                                "name": "Provide a reason for your choice",
                                "text_answer": {
                                    "content": "Direct approach that is convenient"
                                }
                            },
                            {
                                "feature_id": "clpiyc10q000j3b6m01i2d1pv",
                                "feature_schema_id": "clpiya8h507dx072efo9kb97j",
                                "name": "Evaluations done",
                                "checklist_answers": [
                                    {
                                        "feature_id": "clpiyc10q000i3b6mhhvhdzqs",
                                        "name": "Relevance",
                                        "classifications": []
                                    },
                                    {
                                        "feature_id": "clpiyc1ie000l3b6m3kpff0lp",
                                        "name": "Usefulness",
                                        "classifications": []
                                    }
                                ]
                            }
                        ],
                        "relationships": []
                    }
                }
            ],
            "project_details": {
                "ontology_id": "clpiqh0ud04mp0729adeoccsl",
                "task_id": "33e77b4d-424f-49ba-8792-628c1cb746b7",
                "task_name": "Initial review task",
                "batch_id": "66882c10-8e42-11ee-8f8d-71857f06db2f",
                "batch_name": "test_llm_human_preference_batch_1",
                "workflow_status": "IN_REVIEW",
                "priority": 5,
                "consensus_expected_label_count": 1,
                "workflow_history": [
                    {
                        "action": "Move",
                        "created_at": "2023-11-28T23:11:07.654+00:00",
                        "created_by": "[email protected]",
                        "previous_task_name": "Initial labeling task",
                        "previous_task_id": "8f9917a7-6030-0914-befc-ac06f59a7ec9",
                        "next_task_name": "Initial review task",
                        "next_task_id": "33e77b4d-424f-49ba-8792-628c1cb746b7"
                    },
                    {
                        "action": "Move",
                        "created_at": "2023-11-28T23:11:07.631+00:00",
                        "created_by": "[email protected]",
                        "next_task_name": "Initial labeling task",
                        "next_task_id": "8f9917a7-6030-0914-befc-ac06f59a7ec9"
                    }
                ]
            }
        }
    }
}