Upload text predictions

How to upload predictions on text data in a model run and sample upload formats.

Open this Colab for an interactive tutorial on uploading predictions and annotations on text in a model run.

Supported predictions

To upload predictions in Labelbox, you need to create a predictions payload. In this section, we provide this payload for every supported prediction type.

Labelbox supports two formats for the predictions payload:

  • Python annotation types (recommended)
  • NDJSON

Both are described below.

📘

Uploading confidence scores is optional

If you do not specify a confidence score, the prediction will be treated as if it had a confidence score of 1.

Entity

named_entity = lb_types.TextEntity(start=10, end=20)
entities_prediction = lb_types.ObjectAnnotation(value=named_entity, name = "named_entity", confidence=0.5)
entities_prediction_ndjson = { 
    "name": "named_entity",
    "confidence": 0.5, 
    "location": { 
        "start": 10, 
        "end": 20 
    }
}

Classification: Radio (single-choice)

radio_prediction = lb_types.ClassificationAnnotation(
    name="radio_question",
    value=lb_types.Radio(answer =
        lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5)
    )
)
radio_prediction_ndjson = {
  "name": "radio_question",
  "answer": {"name": "first_radio_answer", "confidence": 0.5}
} 

Classification: Radio nested

nested_radio_prediction = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer",
              confidence=0.5 
            )
          )
        )
      ]
    )
  )
)
nested_radio_prediction_ndjson= {
  "name": "nested_radio_question",
  "answer": {
      "name": "first_radio_answer",
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications": [{
          "name":"sub_radio_question",
          "answer": { "name" : "first_sub_radio_answer", 
                     "confidence": 0.5}
        }]
    }
}

Classification: Checklist nested

nested_checklist_prediction = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer",
            confidence=0.5,
          )]
        ))
      ]
    )]
  )
)
nested_checklist_prediction_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer", 
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {"name": "first_sub_checklist_answer", 
                     "confidence": 0.5}
        }          
      ]         
  }]
}

Classification: Checklist (multi-choice)

checklist_prediction = lb_types.ClassificationAnnotation(
    name="checklist_question",
    value=lb_types.Checklist(
        answer = [
            lb_types.ClassificationAnswer(
                name = "first_checklist_answer",
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "second_checklist_answer", 
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "third_checklist_answer", 
                confidence=0.5
            )
    ])
  )
checklist_prediction_ndjson = {
  "name": "checklist_question",
  "answer": [
    {"name": "first_checklist_answer", "confidence": 0.5}
  ]
}

Classification: Free-form text

text_prediction = lb_types.ClassificationAnnotation(
    name = "free_text", 
    value = lb_types.Text(answer="sample text", confidence=0.5)
)
text_prediction_ndjson = {
  "name": "free_text",
  "answer": "sample text",
  "confidence": 0.5
}

End-to-end example: Upload predictions to a model run

Follow the steps below to upload predictions to a model run.

Before you start

You will need to import these libraries to the code examples in this section:

import labelbox as lb
import labelbox.data.annotation_types as lb_types
import labelbox.data.serialization as lb_serializers
import uuid

Replace with your API key

To learn how to create an API key, please follow the instructions on this page.

API_KEY = ""
client = lb.Client(API_KEY)

Step 1: Import data rows into Catalog

global_key = "lorem-ipsum.txt"
test_img_url = {
    "row_data": "https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt",
    "global_key": global_key
}
dataset = client.create_dataset(
    name="text prediction demo dataset",
    iam_integration=None # Removing this argument will default to the organziation's default iam integration
    )
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)

Step 2: Create/select an ontology for your model predictions

Your model run should have the correct ontology set up with all the tools and classifications supported for your predictions.

Here is an example of creating an ontology programmatically for all the example predictions above:

## Setup the ontology and link the tools created above.

ontology_builder = lb.OntologyBuilder(
  classifications=[ # List of Classification objects
    lb.Classification(
      class_type=lb.Classification.Type.RADIO,
      name="radio_question", 
      options=[lb.Option(value="first_radio_answer")]
    ),
    lb.Classification(
      class_type=lb.Classification.Type.RADIO,
      name="nested_radio_question", 
      options=[
        lb.Option(value="first_radio_answer",
          options=[
              lb.Classification(
                class_type=lb.Classification.Type.RADIO,
                name="sub_radio_question",
                options=[
                  lb.Option(value="first_sub_radio_answer")
                ]
            ),
          ]
        )
      ],
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      name="checklist_question", 
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer"),
        lb.Option(value="third_checklist_answer")
      ]
    ), 
     lb.Classification(
      class_type=lb.Classification.Type.TEXT,
      name="free_text"
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      name="nested_checklist_question",
      options=[
          lb.Option("first_checklist_answer",
            options=[
              lb.Classification(
                  class_type=lb.Classification.Type.CHECKLIST,
                  name="sub_checklist_question", 
                  options=[lb.Option("first_sub_checklist_answer")]
              )
          ]
        )
      ]
    )
  ],
  tools=[ # List of Tool objects
         lb.Tool(tool=lb.Tool.Type.NER,
              name="named_entity")
    ]
)

ontology = client.create_ontology("Ontology Text Predictions", ontology_builder.asdict() , media_type=lb.MediaType.Text)

Step 3: Create a Model and model run

model = client.create_model(name="text_model_run_"+ str(uuid.uuid4()), 
                            ontology_id=ontology.uid)
model_run = model.create_model_run("iteration 1")

Step 4: Send data rows to the model run

model_run.upsert_data_rows(global_keys=[global_key])

Step 5: Create the predictions payload

Create the prediction payload using the snippets of code shown above.

Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both approaches are described below with instructions to compose annotations into Labels attached to the data rows.

The resulting label_ndjson_predictions and label_predictions payloads should have exactly the same prediction content (with the exception of the uuid strings that are generated).

label_predictions = []
label_predictions.append(
  lb_types.Label(
    data=lb_types.TextData(global_key=global_key),
    annotations = [
      entities_prediction, 
      nested_radio_prediction,
      radio_prediction, 
      checklist_prediction,
      nested_checklist_prediction,
      text_prediction,
    ]
  )
)
label_ndjson_predictions= []
for annot in [
    entities_prediction_ndjson, 
    radio_prediction_ndjson, 
    checklist_prediction_ndjson,
    text_prediction_ndjson, 
    nested_radio_prediction_ndjson,
    nested_checklist_prediction_ndjson
  ]:
  annot.update({
      "dataRow": {"globalKey": global_key}
  })
  label_ndjson_predictions.append(annot)

Step 6: Upload the predictions payload to the model run

# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
    name="prediction_upload_job"+str(uuid.uuid4()),
    predictions=label_predictions)

# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)

Step 7: Send annotations to the model run (optional)

# 7.1. Create a labelbox project
project = client.create_project(name="Text Prediction Import Demo",
                                    media_type=lb.MediaType.Text)
project.setup_editor(ontology)

# 7.2. Create a batch to send to the project
project.create_batch(
  "batch_text_prediction_demo", # Each batch in a project must have a unique name
  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

# 7.3 Create your annotation payload as explained in:
# https://docs.labelbox.com/reference/import-text-annotations#supported-annotations
entities_annotation ... 
nested_radio_annotation ...
radio_annotation ...
checklist_annotation ...
nested_checklist_annotation ...
text_annotation ...

# 7.4. Create the label object 
label = []
label.append(
  lb_types.Label(
    data=lb_types.TextData(global_key=global_key),
    annotations = [
      entities_annotation, 
      nested_radio_annotation,
      radio_annotation, 
      checklist_annotation,
      nested_checklist_annotation,
      text_annotation,
    ]
  )
)

# 7.5 Upload annotations to the project using Label Import 
upload_job_annotation = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="text_label_import_job"+ str(uuid.uuid4()),
    labels=label)

upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)


# 7.6 Send the annotations to the model run

# get the labels id from the project
model_run.upsert_labels(project_id=project.uid)