Export LLM response evaluation annotations

How to export LLM response evaluation annotations and sample export formats.

Export JSON annotations

# Set the export params to include/exclude certain fields. export_params= { "attachments": True, "metadata_fields": True, "data_row_details": True, "project_details": True, "label_details": True, "performance_details": True, "interpolated_frames": True } # Note: Filters follow AND logic, so typically using one filter is sufficient. filters= { "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], "workflow_status": "<wkf-status>" } export_task = project.export(params=export_params, filters=filters) export_task.wait_till_done() # Stream the export using a callback function def json_stream_handler(output: labelbox.BufferedJsonConverterOutput): print(output.json) export_task.get_buffered_stream(stream_type=labelbox.StreamType.RESULT).start(stream_handler=json_stream_handler) # Collect all exported data into a list export_json = [data_row.json for data_row in export_task.get_buffered_stream()] print("file size: ", export_task.get_total_file_size(stream_type=lb.StreamType.RESULT)) print("line count: ", export_task.get_total_lines(stream_type=lb.StreamType.RESULT))

Annotation export formats

Classification - Radio

{ "feature_id": "clp8gzc7a000h07iu2uqcfziq", "feature_schema_id": "clp8fepqc02nd071phl4tck59", "name": "Choose the best response", "radio_answer": { "feature_id": "clp8gzc7a000i07iu46z39uwl", "feature_schema_id": "clp8fepqc02ng071p9ufm76xj", "name": "Response B", "classifications": [] } }

Classification - Free-form text

{ "feature_id": "clph8rvw700013b6pbsq1lwcg", "feature_schema_id": "clp8fepqc02nl071p8z948pd4", "name": "Provide a reason for your choice", "text_answer": { "content": "Clear recommendation" } }

Classification - Checklist

{ "feature_id": "clpiyav9m00053b6mpe2qk0im", "feature_schema_id": "clpiya8h507dx072efo9kb97j", "name": "Evaluations done", "checklist_answers": [ { "feature_id": "clpiyav9m00043b6mvhur1p19", "name": "Usefulness", "classifications": [] }, { "feature_id": "clpiyax4d00073b6mqyyt0504", "name": "Toxicity", "classifications": [] } ] }

Sample project export

{ "data_row": { "id": "clpiqgsvn4lxr079096g5mt9v", "global_key": "0914c7da-5999-464b-b621-054a5247cf48", "row_data": "https://storage.googleapis.com/labelbox-developer-testing-assets/Pranoy/shopping_3.json", "details": { "dataset_id": "clpiqgrsr00f60737sqeiaf4v", "dataset_name": "pairwise_demo_42454af2-b7b5-45c0-a25a-90f97efd49be", "created_at": "2023-11-28T19:30:49.729+00:00", "updated_at": "2023-11-28T19:30:50.609+00:00", "last_activity_at": "2023-11-28T23:11:08.000+00:00", "created_by": "user@labelbox.com" } }, "media_attributes": { "mime_type": "application/vnd.labelbox.conversational", "labelable_ids": [ "message-1", "message-3" ], "message_count": 4 }, "attachments": [], "metadata_fields": [], "projects": { "clpiy2itz08vg0705akh60zc1": { "name": "Test LLM Human Preference", "labels": [ { "label_kind": "Default", "version": "1.0.0", "id": "clpiyah63071f07e4756yer2d", "label_details": { "created_at": "2023-11-28T23:11:08.000+00:00", "updated_at": "2023-11-28T23:11:07.000+00:00", "created_by": "user@labelbox.com", "content_last_updated_at": "2023-11-28T23:11:07.568+00:00", "reviews": [] }, "annotations": { "objects": [], "classifications": [ { "feature_id": "clpiybl7h000g3b6mil1amjme", "feature_schema_id": "clpiqh0v104mq0729homqffup", "name": "Choose the best response", "radio_answer": { "feature_id": "clpiybl7h000f3b6m03fn8ry3", "feature_schema_id": "clpiqh0v104mt0729atflgvvw", "name": "Response B", "classifications": [] } }, { "feature_id": "clpiybzp3000h3b6m9uowaci4", "feature_schema_id": "clpiqh0v104my072907gt7id0", "name": "Provide a reason for your choice", "text_answer": { "content": "Direct approach that is convenient" } }, { "feature_id": "clpiyc10q000j3b6m01i2d1pv", "feature_schema_id": "clpiya8h507dx072efo9kb97j", "name": "Evaluations done", "checklist_answers": [ { "feature_id": "clpiyc10q000i3b6mhhvhdzqs", "name": "Relevance", "classifications": [] }, { "feature_id": "clpiyc1ie000l3b6m3kpff0lp", "name": "Usefulness", "classifications": [] } ] } ], "relationships": [] } } ], "project_details": { "ontology_id": "clpiqh0ud04mp0729adeoccsl", "task_id": "33e77b4d-424f-49ba-8792-628c1cb746b7", "task_name": "Initial review task", "batch_id": "66882c10-8e42-11ee-8f8d-71857f06db2f", "batch_name": "test_llm_human_preference_batch_1", "workflow_status": "IN_REVIEW", "priority": 5, "consensus_expected_label_count": 1, "workflow_history": [ { "action": "Move", "created_at": "2023-11-28T23:11:07.654+00:00", "created_by": "user@labelbox.com", "previous_task_name": "Initial labeling task", "previous_task_id": "8f9917a7-6030-0914-befc-ac06f59a7ec9", "next_task_name": "Initial review task", "next_task_id": "33e77b4d-424f-49ba-8792-628c1cb746b7" }, { "action": "Move", "created_at": "2023-11-28T23:11:07.631+00:00", "created_by": "user@labelbox.com", "next_task_name": "Initial labeling task", "next_task_id": "8f9917a7-6030-0914-befc-ac06f59a7ec9" } ] }, "project_tags": [] } } }