How to upload predictions on text in a model run
Open this Colab for an interactive tutorial about uploading predictions on text in a model run.
Supported predictions
To upload predictions in Labelbox, you need to create a predictions payload. In this section, we provide this payload for every supported prediction type.
Labelbox supports two formats for the predictions payload:
- Python annotation types (recommended)
- NDJSON
Both are described below.
Uploading confidence scores is optional
If you do not specify a confidence score, the prediction will be treated as if it had a confidence score of 1.
Entity
named_entity = lb_types.TextEntity(start=10, end=20)
entities_prediction = lb_types.ObjectAnnotation(value=named_entity, name = "named_entity", confidence=0.5)
{
"name": "named_entity",
"location": {
"start": 67,
"end": 128
},
"confidence":0.5
}
Classification: Radio (single-choice)
radio_prediction = lb_types.ClassificationAnnotation(
name="radio_question",
value=lb_types.Radio(answer =
lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5)
)
)
radio_prediction_ndjson = {
'name': 'radio_question',
'answer': {'name': 'second_radio_answer', 'confidence': 0.5}
}
Classification: Checklist (multi-choice)
checklist_prediction = lb_types.ClassificationAnnotation(
name="checklist_question",
value=lb_types.Checklist(
answer = [
lb_types.ClassificationAnswer(
name = "first_checklist_answer",
confidence=0.5
),
lb_types.ClassificationAnswer(
name = "second_checklist_answer",
confidence=0.5
),
lb_types.ClassificationAnswer(
name = "third_checklist_answer",
confidence=0.5
)
])
)
checklist_prediction_ndjson = {
'name': 'checklist_question',
'answer': [
{'name': 'first_checklist_answer', 'confidence': 0.5},
{'name': 'second_checklist_answer', 'confidence': 0.5}
]
}
Classification: Free-form text
# no support for confidence score for Classification with free-form text (coming soon)
text_prediction = lb_types.ClassificationAnnotation(
name = "free_text",
value = lb_types.Text(answer="sample text")
)
// no support for confidence score for Classification with free-form text (coming soon)
text_prediction_ndjson = {
'name': 'free_text',
'answer': 'sample text',
}
End-to-end example: Upload predictions to a Model Run
Follow the steps below to upload predictions to a Model Run
Before you start
You will need to import these libraries to the code examples in this section:
import labelbox as lb
import labelbox.data.annotation_types as lb_types
import labelbox.data.serialization as lb_serializers
import uuid
Replace with your API key
To learn how to create an API key, please follow the instructions on this page.
API_KEY = ""
client = lb.Client(API_KEY)
Step 1: Import data rows into Catalog
test_img_url = {
"row_data": "https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt",
"global_key": str(uuid.uuid4())
}
dataset = client.create_dataset(name="text_prediction_import")
data_row = dataset.create_data_row(test_img_url)
print(data_row)
Step 2: Create/select an ontology for your model predictions
Your model run should have the correct ontology set up with all the tools and classifications supported for your predictions.
Here is an example of creating an ontology programmatically for all the example predictions above:
# Setup the ontology and link the tools created above.
ontology_builder = lb.OntologyBuilder(
classifications=[ # List of Classification objects
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="radio_question",
options=[lb.Option(value="first_radio_answer")]
),
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="nested_radio_question",
options=[
lb.Option(value="first_radio_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[
lb.Option(value="first_sub_radio_answer")
]
),
]
)
],
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="checklist_question",
options=[
lb.Option(value="first_checklist_answer"),
lb.Option(value="second_checklist_answer"),
lb.Option(value="third_checklist_answer")
]
),
lb.Classification(
class_type=lb.Classification.Type.TEXT,
name="free_text"
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="nested_checklist_question",
options=[
lb.Option("first_checklist_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[lb.Option("first_sub_checklist_answer")]
)
]
)
]
)
],
tools=[ # List of Tool objects
lb.Tool(tool=lb.Tool.Type.NER,
name="named_entity")
]
)
ontology = client.create_ontology("Ontology Text Predictions",
ontology_builder.asdict() ,
media_type=lb.MediaType.Text)
Step 3: Create a Model and model run
# create Model
model = client.create_model(name="text_model_run_"+ str(uuid.uuid4()),
ontology_id=ontology.uid)
# create model run
model_run = model.create_model_run("iteration 1")
Step 4: Send data rows to the model run
model_run.upsert_data_rows([data_row.uid])
Step 5: Create the predictions payload
Create the prediction payload using the snippets of code shown above.
Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both approaches are described below with instructions to compose annotations into Labels attached to the data rows.
The resulting ndjson_prediction_method
and label_list_prediction
payloads should have exactly the same prediction content (with the exception of the uuid
strings that are generated).
ndjson_prediction_method = []
for annot in [
entities_prediction_ndjson,
radio_prediction_ndjson,
checklist_prediction_ndjson,
text_prediction_ndjson,
nested_radio_prediction_ndjson,
nested_checklist_prediction_ndjson
]:
annot.update({
'dataRow': {'id': data_row.uid},
})
ndjson_prediction_method.append(annot)
# Create a Label for predictions
label_prediction = lb_types.Label(
data=lb_types.TextData(uid=data_row.uid),
annotations = [
entities_prediction,
radio_prediction,
checklist_prediction,
text_prediction,
]
)
# Create a label list
label_list_prediction = [label_prediction]
Step 6: Upload the predictions payload to the model run
# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
name="prediction_upload_job"+str(uuid.uuid4()),
predictions=ndjson_prediction_method2)
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print(" ")
Step 7: Send annotations to the model run (optional)
# 7.1. Create a labelbox project
project = client.create_project(name="Text Prediction Import",
# Quality Settings setup
auto_audit_percentage=1,
auto_audit_number_of_labels=1,
media_type=lb.MediaType.Text)
project.setup_editor(ontology)
# 7.2. Create a batch to send to the project
project.create_batch(
"batch_text_prediction_demo", # Each batch in a project must have a unique name
dataset.export_data_rows(), # A list of data rows or data row ids
5 # priority between 1(Highest) - 5(lowest)
)
# 7.3 Create your annotation payload as explained in:
# https://docs.labelbox.com/reference/import-text-annotations#supported-annotations
entities_ndjson...
radio_annotation_ndjson...
checklist_annotation_ndjson...
text_annotation_ndjson...
nested_radio_annotation_ndjson...
nested_checklist_annotation_ndjson...
# 7.4. Create the label object
ndjson_annotation = []
for annot in [
entities_ndjson,
radio_annotation_ndjson,
checklist_annotation_ndjson,
text_annotation_ndjson,
nested_checklist_annotation_ndjson ,
nested_radio_annotation_ndjson
]:
annot.update({
'dataRow': {'id': data_row.uid},
})
ndjson_annotation.append(annot)
# 7.5 Upload annotations to the project using Label Import
upload_job_annotation = lb.LabelImport.create_from_objects(
client = client,
project_id = project.uid,
name="text_label_import_job"+ str(uuid.uuid4()),
labels=ndjson_annotation)
upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
# 7.6 Send the annotations to the model run
# get the labels id from the project
label_ids = [x['ID'] for x in project.export_labels(download=True)]
model_run.upsert_labels(label_ids)
End-to-end Python tutorial
Open a Colab to go through the exercise of importing text predictions.