How to upload predictions on text data in a model run and sample upload formats.
Supported prediction types
To upload predictions in Labelbox, you need to create a predictions payload. This section shows how to declare payloads for each supported prediction type. You can declare payloads using Python annotation types (preferred) or as NDJSON objects.
Confidence scores are optional. If you do not include confidence scores in your prediction payloads, the prediction is treated as if it had a confidence value of one (1
).
Entity
named_entity = lb_types.TextEntity(start=10, end=20)
entities_prediction = lb_types.ObjectAnnotation(value=named_entity, name = "named_entity", confidence=0.5)
entities_prediction_ndjson = {
"name": "named_entity",
"confidence": 0.5,
"location": {
"start": 10,
"end": 20
}
}
Classification: radio (single choice)
radio_prediction = lb_types.ClassificationAnnotation(
name="radio_question",
value=lb_types.Radio(answer =
lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5)
)
)
radio_prediction_ndjson = {
"name": "radio_question",
"answer": {"name": "first_radio_answer", "confidence": 0.5}
}
Classification: radio nested
nested_radio_prediction = lb_types.ClassificationAnnotation(
name="nested_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_radio_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
confidence=0.5
)
)
)
]
)
)
)
nested_radio_prediction_ndjson= {
"name": "nested_radio_question",
"answer": {
"name": "first_radio_answer",
"confidence": 0.5, # Confidence scores should be added to the answer
"classifications": [{
"name":"sub_radio_question",
"answer": { "name" : "first_sub_radio_answer",
"confidence": 0.5}
}]
}
}
Classification: checklist nested
nested_checklist_prediction = lb_types.ClassificationAnnotation(
name="nested_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_checklist_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_sub_checklist_answer",
confidence=0.5,
)]
))
]
)]
)
)
nested_checklist_prediction_ndjson = {
"name": "nested_checklist_question",
"answer": [{
"name": "first_checklist_answer",
"confidence": 0.5, # Confidence scores should be added to the answer
"classifications" : [
{
"name": "sub_checklist_question",
"answer": {"name": "first_sub_checklist_answer",
"confidence": 0.5}
}
]
}]
}
Classification: checklist (multiple choice)
checklist_prediction = lb_types.ClassificationAnnotation(
name="checklist_question",
value=lb_types.Checklist(
answer = [
lb_types.ClassificationAnswer(
name = "first_checklist_answer",
confidence=0.5
),
lb_types.ClassificationAnswer(
name = "second_checklist_answer",
confidence=0.5
),
lb_types.ClassificationAnswer(
name = "third_checklist_answer",
confidence=0.5
)
])
)
checklist_prediction_ndjson = {
"name": "checklist_question",
"answer": [
{"name": "first_checklist_answer", "confidence": 0.5}
]
}
Classification: free-form text
text_prediction = lb_types.ClassificationAnnotation(
name = "free_text",
value = lb_types.Text(answer="sample text", confidence=0.5)
)
text_prediction_ndjson = {
"name": "free_text",
"answer": "sample text",
"confidence": 0.5
}
Example: Upload predictions to model run
To upload predictions to a model run:
Before you start
These examples require the following libraries:
import labelbox as lb
import labelbox.data.annotation_types as lb_types
import labelbox.data.serialization as lb_serializers
import uuid
Replace the value of API_KEY
with a valid API key to connect to the Labelbox client.
API_KEY = None
client = lb.Client(API_KEY)
Step 1: Import data rows into Catalog
global_key = "lorem-ipsum.txt"
test_img_url = {
"row_data": "https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt",
"global_key": global_key
}
dataset = client.create_dataset(
name="text prediction demo dataset",
iam_integration=None # Removing this argument will default to the organziation's default iam integration
)
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)
Step 2: Set up ontology for predictions
Your model run ontology should support all tools and classifications required by your predictions.
this example shows how to create an ontology containing all supported prediction types.
## Set up the ontology and link the tools created above.
ontology_builder = lb.OntologyBuilder(
classifications=[ # List of Classification objects
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="radio_question",
options=[lb.Option(value="first_radio_answer")]
),
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="nested_radio_question",
options=[
lb.Option(value="first_radio_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[
lb.Option(value="first_sub_radio_answer")
]
),
]
)
],
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="checklist_question",
options=[
lb.Option(value="first_checklist_answer"),
lb.Option(value="second_checklist_answer"),
lb.Option(value="third_checklist_answer")
]
),
lb.Classification(
class_type=lb.Classification.Type.TEXT,
name="free_text"
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="nested_checklist_question",
options=[
lb.Option("first_checklist_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[lb.Option("first_sub_checklist_answer")]
)
]
)
]
)
],
tools=[ # List of Tool objects
lb.Tool(tool=lb.Tool.Type.NER,
name="named_entity")
]
)
ontology = client.create_ontology("Ontology Text Predictions", ontology_builder.asdict() , media_type=lb.MediaType.Text)
Step 3: Create model and model run
model = client.create_model(name="text_model_run_"+ str(uuid.uuid4()),
ontology_id=ontology.uid)
model_run = model.create_model_run("iteration 1")
Step 4: Send data rows to model run
model_run.upsert_data_rows(global_keys=[global_key])
Step 5: Create prediction payloads
See supported prediction types for help creating prediction payloads. You can declare predictions as Python annotation types (preferred) or NDJSON objects. These examples show each type and describe how to compose predictions into labels attached to the data rows.
The resulting label_ndjson_predictions
and label_predictions
payloads should have exactly the same prediction content (except for the uuid
string values).
label_predictions = []
label_predictions.append(
lb_types.Label(
data={"global_key" : global_key },
annotations = [
entities_prediction,
nested_radio_prediction,
radio_prediction,
checklist_prediction,
nested_checklist_prediction,
text_prediction,
]
)
)
label_ndjson_predictions= []
for annot in [
entities_prediction_ndjson,
radio_prediction_ndjson,
checklist_prediction_ndjson,
text_prediction_ndjson,
nested_radio_prediction_ndjson,
nested_checklist_prediction_ndjson
]:
annot.update({
"dataRow": {"globalKey": global_key}
})
label_ndjson_predictions.append(annot)
Step 6: Upload payload to model run
# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
name="prediction_upload_job"+str(uuid.uuid4()),
predictions=label_predictions)
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)
Step 7: Send annotations to model run
This step is optional.
# 7.1. Create a labelbox project
project = client.create_project(name="Text Prediction Import Demo",
media_type=lb.MediaType.Text)
project.connect_ontology(ontology)
# 7.2. Create a batch to send to the project
project.create_batch(
"batch_text_prediction_demo", # Each batch in a project must have a unique name
global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
priority=5 # priority between 1(Highest) - 5(lowest)
)
# 7.3 Create your annotation payload as explained in:
# https://docs.labelbox.com/reference/import-text-annotations#supported-annotations
entities_annotation ...
nested_radio_annotation ...
radio_annotation ...
checklist_annotation ...
nested_checklist_annotation ...
text_annotation ...
# 7.4. Create the label object
label = []
label.append(
lb_types.Label(
data={"global_key" : global_key },
annotations = [
entities_annotation,
nested_radio_annotation,
radio_annotation,
checklist_annotation,
nested_checklist_annotation,
text_annotation,
]
)
)
# 7.5 Upload annotations to the project using Label Import
upload_job_annotation = lb.LabelImport.create_from_objects(
client = client,
project_id = project.uid,
name="text_label_import_job"+ str(uuid.uuid4()),
labels=label)
upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)
# 7.6 Send the annotations to the model run
# get the labels id from the project
model_run.upsert_labels(project_id=project.uid)