Upload conversational text predictions

How to upload predictions on conversational text data in a model run and sample upload formats.

Open this Colab for an interactive tutorial on uploading predictions on documents in a model run.

Supported predictions

To upload predictions in Labelbox, you need to create a predictions payload. In this section, we provide this payload for every supported prediction type.

Labelbox supports two formats for the predictions payload:

  • Python annotation types (recommended)
  • NDJSON

Both are described below.

Entity

ner_prediction = lb_types.ObjectAnnotation(
    name="ner",
    confidence=0.5,
    value=lb_types.ConversationEntity(
        start=0,
        end=8,
        message_id="4"
    )
)
ner_prediction_ndjson = {
  "name": "ner",
  "confidence":0.5,
  "location": { 
    "start": 0, 
    "end": 8
  },
  "messageId": "4"
}

Global classification: Radio

radio_prediction = lb_types.ClassificationAnnotation(
    name="radio_question", 
    value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "second_radio_answer", confidence=0.5))
)
radio_prediction_ndjson = {
  "name": "radio_question",
  "answer": {"name": "second_radio_answer", "confidence": 0.5}
} 

Message-based classification: Radio

radio_prediction = lb_types.ClassificationAnnotation(
    name="radio_convo", 
    value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5)),
    message_id="0"
)
radio_prediction_ndjson = {
    "name": "radio_convo",
    "answer": {
        "name": "first_radio_answer", "confidence":0.5
    },
    "messageId": "0",
}

Message-based classification: Free-form text

# Confidence scores are not supported for text predictions
text_prediction = lb_types.ClassificationAnnotation(
    name="text_convo",
    value=lb_types.Text(answer="the answer to the text questions are right here"),
    message_id="0"
)
text_prediction_ndjson = {
    "name": "text_convo",
    "answer": "the answer to the text questions are right here",
    "messageId": "0"
}

Message-based classification: Checklist

checklist_prediction= lb_types.ClassificationAnnotation(
  name="checklist_convo", # must match your ontology feature"s name
  value=lb_types.Checklist(
      answer = [
        lb_types.ClassificationAnswer(
            name = "first_checklist_answer", confidence=0.5
        ), 
        lb_types.ClassificationAnswer(
            name = "second_checklist_answer", confidence=0.5
        )
      ]
    ),
  message_id="2"
)
checklist_prediction_ndjson = {
    "name": "checklist_convo",
    "answers": [
        {"name": "first_checklist_answer", "confidence":0.5},
        {"name": "second_checklist_answer", "confidence":0.5}
    ],
    "messageId": "2"
}

Message-based nested classifications: Checklist and radio

nested_checklist_prediction = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  message_id="10",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer",
            confidence=0.5 # Confidence scores should be added to the answer 
          )]
        ))
      ]
    )]
  )
)

nested_radio_prediction = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer",
              confidence=0.5 # Confidence scores should be added to the answer
            )
          )
        )
      ]
    )
  )
)
nested_checklist_prediction_ndjson = {
  "name": "nested_checklist_question",
  "messageId": "10",
  "answer": [{
      "name": "first_checklist_answer", 
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {
            "name": "first_sub_checklist_answer", 
            "confidence": 0.5, # Confidence scores should be added to the answer 
          }
        }          
      ]         
  }]
}

nested_radio_prediction_ndjson = {
  "name": "nested_radio_question",
  "answer": {
      "name": "first_radio_answer",
      "confidence": 0.5,
      "classifications": [{
          "name":"sub_radio_question",
          "answer": { "name" : "first_sub_radio_answer", 
                     "confidence": 0.5}
        }]
    }
}

End-to-end example: Upload predictions to a model run

Here are the steps to upload predictions to a model run:

Before you start

You will need to import these libraries to use the code examples in this section:

import labelbox as lb
import labelbox.types as lb_types
import uuid

Replace with your API key

To learn how to create an API key, please follow the instructions on this page.

API_KEY= ""
client = lb.Client(API_KEY)

Step1: Import data rows into Catalog

global_key = "conversation-1.json"

asset = {
    "row_data": "https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json",
    "global_key": global_key
}

dataset = client.create_dataset(name="conversational_annotation_import_demo_dataset")
task = dataset.create_data_rows([asset])
task.wait_till_done()
print("Errors:", task.errors)
print("Failed data rows: ", task.failed_data_rows)

Step 2: Create/select an ontology for your model predictions

For best results, verify your ontology before submitting a model run. Verify that:

  • Your ontology supports all features and classifications that might be needed during the model run.
  • Tool names and classification instructions match the name and instructions fields in your annotation payloads.

This helps ensure that feature schemas match your intentions.

ontology_builder = lb.OntologyBuilder(
  tools=[ 
    lb.Tool(tool=lb.Tool.Type.NER,name="ner")
    ], 
  classifications=[ 
    lb.Classification( 
      class_type=lb.Classification.Type.TEXT,
      scope=lb.Classification.Scope.INDEX,          
      instructions="text_convo"), 
    lb.Classification( 
      class_type=lb.Classification.Type.CHECKLIST, 
      scope=lb.Classification.Scope.INDEX,                     
      instructions="checklist_convo", 
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer")            
      ]
    ), 
    lb.Classification( 
      class_type=lb.Classification.Type.RADIO, 
      instructions="radio_convo", 
      scope=lb.Classification.Scope.INDEX,          
      options=[
        lb.Option(value="first_radio_answer"),
        lb.Option(value="second_radio_answer")
      ]
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      name="nested_checklist_question",
      scope = lb.Classification.Scope.INDEX,
      options=[
          lb.Option("first_checklist_answer",
            options=[
              lb.Classification(
                  class_type=lb.Classification.Type.CHECKLIST,
                  name="sub_checklist_question", 
                  options=[lb.Option("first_sub_checklist_answer")]
              )
          ])
      ]
    ),
    lb.Classification(
        class_type=lb.Classification.Type.RADIO,
        name="nested_radio_question",
        scope = lb.Classification.Scope.GLOBAL,
        options=[
            lb.Option("first_radio_answer",
                options=[
                    lb.Classification(
                        class_type=lb.Classification.Type.RADIO,
                        name="sub_radio_question",
                        options=[lb.Option("first_sub_radio_answer")]
                    )
                ])
          ]
    )
  ]
)

ontology = client.create_ontology("Ontology Conversation Annotations", ontology_builder.asdict())

Step 3: Create a model and a model run

# create model
model = client.create_model(name="Conversational_model_run_"+ str(uuid.uuid4()), 
                            ontology_id=ontology.uid)
# create model run
model_run = model.create_model_run("iteration 1")
     

Step 4: Send data rows to the model run

model_run.upsert_data_rows(global_keys=[global_key])

Step 5: Create the predictions payload

Create the prediction payload using the code snippets in the section above.

Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both are described below to compose your annotations into labels attached to the data rows.

The resulting payload should have exactly the same content for the annotations supported by both formats.

Python annotations

label_prediction = []
label_prediction.append(
  lb_types.Label(
    data=lb_types.ConversationData(
      global_key=global_key
    ),
    annotations=[
      ner_prediction,
      checklist_prediction,
      text_prediction,
      radio_prediction,
      nested_checklist_prediction,
      nested_radio_prediction
    ]
  )
)

NDJSON

label_prediction_ndjson = []
for annotations in [
    ner_prediction_ndjson,
    text_prediction_ndjson,
    checklist_prediction_ndjson,
    radio_prediction_ndjson,
    nested_checklist_prediction_ndjson,
    nested_radio_prediction_ndjson
    ]:
  annotations.update({
      "dataRow": {
          "globalKey": global_key
      }
  })
  label_prediction_ndjson.append(annotations)

Step 6: Upload the predictions payload to the model run

# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
    name="prediction_upload_job"+str(uuid.uuid4()),
    predictions=label_prediction)

# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)

Step 7: Send annotations to the model run

To send annotations to a model run, we must first import them into a project, create a label payload and then send them to the model run.

# 7.1 Create a labelbox project
project = client.create_project(name="Conversational Text Prediction Import Demo",                                    
                                    media_type=lb.MediaType.Conversational)
project.setup_editor(ontology)

# 7.2 Create a batch to send to the project
project.create_batch(
  "batch_convo_prediction_demo", # Each batch in a project must have a unique name
  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

# 7.3 Create the annotations payload
ner_annotation = lb_types.ObjectAnnotation(
    name="ner",
    value=lb_types.ConversationEntity(
        start=0,
        end=8,
        message_id="4"
    )
)

text_annotation = lb_types.ClassificationAnnotation(
    name="text_convo",
    value=lb_types.Text(answer="the answer to the text questions are right here"),
    message_id="0"
)

checklist_annotation= lb_types.ClassificationAnnotation(
  name="checklist_convo", # must match your ontology feature"s name
  value=lb_types.Checklist(
      answer = [
        lb_types.ClassificationAnswer(
            name = "first_checklist_answer"
        ), 
        lb_types.ClassificationAnswer(
            name = "second_checklist_answer"
        )
      ]
    ),
  message_id="2"
 )

radio_annotation = lb_types.ClassificationAnnotation(
    name="radio_convo", 
    value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "first_radio_answer")),
    message_id="0"
)


nested_checklist_annotation = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  message_id="10",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer"
          )]
        ))
      ]
    )]
  )
)

nested_radio_annotation = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer"
            )
          )
        )
      ]
    )
  )
)

# 7.4 Create the label object
label = []
label.append(
  lb_types.Label(
    data=lb_types.ConversationData(
      global_key=global_key
    ),
    annotations=[
      ner_annotation,
      text_annotation,
      checklist_annotation,
      radio_annotation,
      nested_radio_annotation,
      nested_checklist_annotation
    ]
  )
)

# 7.5 Upload annotations to the project using label import
upload_job_annotation = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="text_label_import_job"+ str(uuid.uuid4()),
    labels=label)

upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)

# 7.6 Send the annotations to the model run

# get the labels from the project
model_run.upsert_labels(project_id=project.uid)