Import LLM human preference annotations

How to import LLM human preference annotations and sample import formats.

Open this Colab for an interactive tutorial on importing human preference annotations on conversation data model outputs.

Supported annotations

To import annotations in Labelbox, you need to create an annotations payload. In this section, we provide this payload for every supported annotation type.

Labelbox supports the following formats for the annotations payload:

  • NDJSON
  • Python Annotation Types

Formats are described below.

Entity

ner_annotation = lb_types.ObjectAnnotation(
    name="ner",
    value=lb_types.ConversationEntity(
        start=0,
        end=8,
        message_id="message-1"
    )
)
ner_annotation_ndjson = {
        "name": "ner",
        "location": {
            "start": 0,
            "end": 8
        },
        "messageId": "message-1"
    }

Classification: Free-form text

text_annotation = lb_types.ClassificationAnnotation(
    name="Provide a reason for your choice",
    value=lb_types.Text(answer="the answer to the text questions right here")
)
text_annotation_ndjson = {
    "name": "text_convo",
    "answer": "the answer to the text questions right here",
}

Classification: Checklist (Multi-choice)

checklist_annotation= lb_types.ClassificationAnnotation(
  name="checklist_convo", # must match your ontology feature"s name
  value=lb_types.Checklist(
      answer = [
        lb_types.ClassificationAnswer(
            name = "first_checklist_answer"
        ),
        lb_types.ClassificationAnswer(
            name = "second_checklist_answer"
        )
      ]
    ),
  message_id="message-1" # Message specific annotation
 )
checklist_annotation_ndjson = {
    "name": "checklist_convo",
    "answers": [
        {"name": "first_checklist_answer"},
        {"name": "second_checklist_answer"}
    ]
}

Classification: Radio (Single-choice)

radio_annotation = lb_types.ClassificationAnnotation(
    name="Choose the best response",
    value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
        name="Response B")))
radio_annotation_ndjson = {
    "name": "radio_convo",
    "answer": {
        "name": "first_radio_answer"
    }
}

Classification: Nested radio

nested_radio_annotation = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer"
            )
          )
        )
      ]
    )
  )
)
nested_radio_annotation_ndjson = {
  "name": "nested_radio_question",
  "answer": {
      "name": "first_radio_answer",
      "classifications": [{
          "name":"sub_radio_question",
          "answer": { "name" : "first_sub_radio_answer"}
        }]
    }
}

Classification: Nested checklist

nested_checklist_annotation = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  message_id="message-1",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer"
          )]
        ))
      ]
    )]
  )
)
nested_checklist_annotation_ndjson = {
  "name": "nested_checklist_question",
  "messageId": "message-1",
  "answer": [{
      "name": "first_checklist_answer",
      "classifications" : [
        {
          "name": "sub_checklist_question",
          "answer": {
            "name": "first_sub_checklist_answer",
          }
        }
      ]
  }]
}

End-to-end example: Import pre-labels or ground truth

Whether you are importing annotations as pre-labels or as ground truth, the steps are very similar. Steps 5 and 6 (creating and importing the annotation payload) are where the process becomes slightly different and is explained below in detail.

Before you start

You must import these libraries to use the code examples in this section.

import labelbox as lb
import uuid
import labelbox.types as lb_types

Replace with your API key

To learn how to create an API key, please follow the instructions on this page.

API_KEY = ""
client = lb.Client(api_key=API_KEY)

Step 1: Import data rows

Upload your data rows to Catalog. To learn how to format your import JSON files, see Import conversation & model response data.

Note the additional modelOutputs object is required. For descriptions, see Model outputs object definitions.

global_key = "pairwise_shooping_asset"

# Upload data rows
convo_data = {
    "row_data": "https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json",
    "global_key": global_key
}

# Create a dataset
dataset = client.create_dataset(name="pairwise_annotation_demo")
# Create a datarow
task = dataset.create_data_rows([convo_data])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)
pairwise_shopping_2 =  """
 {
  "type": "application/vnd.labelbox.conversational",
  "version": 1,
  "messages": [
    {
      "messageId": "message-0",
      "timestampUsec": 1530718491,
      "content": "Hi! How can I help?",
      "user": {
        "userId": "Bot 002",
        "name": "Bot"
      },
      "align": "left",
      "canLabel": false
    },
    {
      "messageId": "message-1",
      "timestampUsec": 1530718503,
      "content": "I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!",
      "user": {
        "userId": "User 00686",
        "name": "User"
      },
      "align": "right",
      "canLabel": true
    }

  ],
  "modelOutputs": [
    {
      "title": "Response A",
      "content": "I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\n\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\n\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.",
      "modelConfigName": "GPT-3.5 with temperature 0"
    },
    {
      "title": "Response B",
      "content": "I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!",
      "modelConfigName": "Fine Tuned GPT-3.5 with demo data"
    }
  ]
}
"""

Step 2: Create/select an ontology

Your project should have the correct ontology set up with all the tools and classifications supported for your annotations. The value for the name parameter should match the name field in your annotations to ensure the correct feature schemas are matched.

Here is an example of creating an ontology programmatically for all the sample annotations above.

ontology_builder = lb.OntologyBuilder(
  tools=[
    lb.Tool(tool=lb.Tool.Type.NER,name="ner"),
  ],
  classifications=[
    lb.Classification(
      class_type=lb.Classification.Type.RADIO,
      scope=lb.Classification.Scope.GLOBAL,
      name="Choose the best response",
      options=[lb.Option(value="Response A"), lb.Option(value="Response B"), lb.Option(value="Tie")]
    ),
    lb.Classification(
      class_type=lb.Classification.Type.TEXT,
      name="Provide a reason for your choice"
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      scope=lb.Classification.Scope.INDEX,
      name="checklist_convo",
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer")
      ]
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      name="nested_checklist_question",
      scope = lb.Classification.Scope.INDEX,
      options=[
          lb.Option("first_checklist_answer",
            options=[
              lb.Classification(
                  class_type=lb.Classification.Type.CHECKLIST,
                  name="sub_checklist_question",
                  options=[lb.Option("first_sub_checklist_answer")]
              )
          ])
      ]
    ),
    lb.Classification(
        class_type=lb.Classification.Type.RADIO,
        name="nested_radio_question",
        scope = lb.Classification.Scope.GLOBAL,
        options=[
            lb.Option("first_radio_answer",
                options=[
                    lb.Classification(
                        class_type=lb.Classification.Type.RADIO,
                        name="sub_radio_question",
                        options=[lb.Option("first_sub_radio_answer")]
                    )
                ])
          ]
    )
  ]
)

ontology = client.create_ontology("Pairwise comparison ontology", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)


Step 3: Create a labeling project

# Create Labelbox project
project = client.create_project(name="Conversational Text Annotation Import Demo (Pairwise comparison)",
                                    media_type=lb.MediaType.Conversational)

# Setup your ontology
project.setup_editor(ontology) # Connect your ontology and editor to your project

Step 4: Send a batch of data rows to the project

# Create a batch to send to your project
batch = project.create_batch(
  "first-batch-convo-demo", # Each batch in a project must have a unique name
  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

print("Batch: ", batch)

Step 5: Create the annotation payload

To learn how to format the annotations for import, see the Supported annotations section above.

label = []
label.append(
  lb_types.Label(
    data=lb_types.ConversationData(
      global_key=global_key
    ),
    annotations=[
      ner_annotation,
      text_annotation,
      checklist_annotation,
      radio_annotation,
      nested_radio_annotation,
      nested_checklist_annotation
    ]
  )
)
label_ndjson = []
for annotations in [
    ner_annotation_ndjson,
    text_annotation_ndjson,
    checklist_annotation_ndjson,
    radio_annotation_ndjson,
    nested_checklist_annotation_ndjson,
    nested_radio_annotation_ndjson
    ]:
  annotations.update({
      "dataRow": {
          "globalKey": global_key
      }
  })
  label_ndjson.append(annotations)

Step 6: Import the annotation payload

Option A: Upload annotations as pre-labels (Model-assisted labeling)

upload_job = lb.MALPredictionImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name=f"mal_job-{str(uuid.uuid4())}",
    predictions=label)

upload_job.wait_until_done()
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)

Option B: Upload annotations as ground truth

upload_job = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="label_import_job"+str(uuid.uuid4()),
    labels=label)

upload_job.wait_until_done();
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)