How to upload predictions on geospatial data in a model run and sample upload formats.
Open this Colab for an interactive tutorial on uploading predictions on documents in a model run.
Supported predictions
To upload predictions in Labelbox, you need to create a predictions payload. In this section, we provide this payload for every supported prediction type.
Labelbox supports two formats for the predictions payload:
- Python annotation types (recommended)
- NDJSON
Both are described below.
Entity
The textSelections
field is required in the payload for each entity annotation. Each textSelections
item in the list requires the following fields:
- A list of
token_ids
for each word in the group of words. - The
group_id
associated with a group of words. - The
page
of the document (1-indexed).
Both the token_ids
and the group_id
are extracted from the text layer URL attached to the data row. Please follow the end-to-end demo to learn how to construct an entity annotation for documents.
entities_prediction = lb_types.ObjectAnnotation(
name="named_entity",
confidence=0.5,
value= lb_types.DocumentEntity(
name="named_entity",
textSelections=[
lb_types.DocumentTextSelection(
token_ids=[],
group_id="",
page=1
)
]
)
)
entities_prediction_ndjson = {
"name": "named_entity",
"confidence": 0.5,
"textSelections": [
{
"tokenIds": [
"<UUID>",
],
"groupId": "<UUID>",
"page": 1,
}
]
}
Classification: Radio
radio_prediction = lb_types.ClassificationAnnotation(
name="radio_question",
value=lb_types.Radio(answer =
lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5)
)
)
radio_prediction_ndjson = {
"name": "radio_question",
"answer": {"name": "first_radio_answer", "confidence": 0.5}
}
Classification: Checklist
checklist_prediction = lb_types.ClassificationAnnotation(
name="checklist_question",
value=lb_types.Checklist(answer = [
lb_types.ClassificationAnswer(name = "first_checklist_answer", confidence=0.5),
lb_types.ClassificationAnswer(name = "second_checklist_answer", confidence=0.5)
])
)
checklist_prediction_ndjson = {
"name": "checklist_question",
"answer": [
{"name": "first_checklist_answer", "confidence": 0.5},
{"name": "second_checklist_answer", "confidence": 0.5}
]
}
Bounding box
bbox_dim_1 = {
"top": 135.3,
"left": 102.771,
"height": 109.843,
"width": 415.8
}
bbox_prediction = lb_types.ObjectAnnotation(
name="bounding_box", # must match your ontology feature"s name
value=lb_types.DocumentRectangle(
start=lb_types.Point(x=bbox_dim_1["left"], y=bbox_dim_1["top"]), # x = left, y = top
end=lb_types.Point(x=bbox_dim_1["left"] + bbox_dim_1["width"], y=bbox_dim_1["top"]+ bbox_dim_1["height"]), # x= left + width , y = top + height
page=0,
unit=lb_types.RectangleUnit.POINTS
)
)
bbox_dim_1 = {
"top": 135.3,
"left": 102.771,
"height": 109.843,
"width": 415.8
}
bbox_prediction_ndjson = {
"name": "bounding_box",
"bbox": bbox_dim_1,
"page": 0,
"unit": "POINTS"
}
Nested classifications: Checklist and radio
nested_checklist_prediction = lb_types.ClassificationAnnotation(
name="nested_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_checklist_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_sub_checklist_answer",
confidence=0.5 # Confidence scores should be added to the answer
)]
))
]
)]
)
)
nested_radio_prediction = lb_types.ClassificationAnnotation(
name="nested_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_radio_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
confidence=0.5 # Confidence scores should be added to the answer
)
)
)
]
)
)
)
nested_checklist_prediction_ndjson = {
"name": "nested_checklist_question",
"answer": [{
"name": "first_checklist_answer",
"confidence": 0.5, # Confidence scores should be added to the answer
"classifications" : [
{
"name": "sub_checklist_question",
"answer": {
"name": "first_sub_checklist_answer",
"confidence": 0.5, # Confidence scores should be added to the answer
}
}
]
}]
}
nested_radio_prediction_ndjson = {
"name": "nested_radio_question",
"answer": {
"name": "first_radio_answer",
"confidence": 0.5,
"classifications": [{
"name":"sub_radio_question",
"answer": { "name" : "first_sub_radio_answer",
"confidence": 0.5}
}]
}
}
Classification: Free-form text
text_prediction = lb_types.ClassificationAnnotation(
name="free_text", # must match your ontology feature"s name
value=lb_types.Text(answer="sample text")
)
text_prediction_ndjson = {
"name": "free_text",
"answer": "sample text"
}
Bounding box with nested classification
bbox_dim = {
"top": 226.757,
"left": 317.271,
"height": 194.229,
"width": 249.386
}
bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(
name="bbox_with_radio_subclass",
confidence=0.5,
value=lb_types.DocumentRectangle(
start=lb_types.Point(x=bbox_dim["left"], y=bbox_dim["top"]), # x = left, y = top
end=lb_types.Point(x=bbox_dim["left"] + bbox_dim["width"], y=bbox_dim["top"] + bbox_dim["height"]), # x= left + width , y = top + height
unit=lb_types.RectangleUnit.POINTS,
page=1
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
confidence=0.5,
classifications=[
lb_types.ClassificationAnnotation(
name="second_sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="second_sub_radio_answer",
confidence=0.5
)
)
)
]
)
)
)
]
)
bbox_with_radio_subclass_prediction_ndjson = {
"name": "bbox_with_radio_subclass",
"classifications": [
{
"name": "sub_radio_question",
"answer": {
"name": "first_sub_radio_answer",
"confidence": 0.5,
"classifications": [
{
"name": "second_sub_radio_question",
"answer": {
"name": "second_sub_radio_answer", "confidence": 0.5}
}
]
}
}
],
"bbox":bbox_dim,
"page": 1,
"unit": "POINTS"
}
Entity with nested classification
ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(
name="ner_with_checklist_subclass",
confidence=0.5,
value=lb_types.DocumentEntity(
name="ner_with_checklist_subclass",
text_selections=[
lb_types.DocumentTextSelection(
token_ids=[],
group_id="",
page=1
)
]
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer", confidence=0.5)]
)
)
]
)
ner_with_checklist_subclass_prediction_ndjson = {
"name": "ner_with_checklist_subclass",
"classifications":[
{
"name": "sub_checklist_question",
"answer": [{"name": "first_sub_checklist_answer", "confidence":0.5 }]
}
],
"textSelections": [
{
"tokenIds": [
""
],
"groupId": "",
"page": 1
}
]
}
End-to-end example: Upload predictions to a model run
Here are the steps to upload predictions to a model run:
Before you start
You will need to import these libraries to use the code examples in this section:
import labelbox as lb
import labelbox.types as lb_types
import uuid
Replace with your API key
To learn how to create an API key, please follow the instructions on this page.
API_KEY= ""
client = lb.Client(API_KEY)
Step1: Import data rows into Catalog
## Text layer url is required for uploading entity annotations
global_key = "0801.3483.pdf"
img_url = {
"row_data": {
"pdf_url": "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf",
"text_layer_url": "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json"
},
"global_key": global_key
}
dataset = client.create_dataset(name="pdf_demo_dataset")
task = dataset.create_data_rows([img_url])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)
Step 2: Create/select an ontology for your model predictions
Your model run should have the correct ontology setup with all the tools and classifications supported for your predictions, and the tool names and classification instructions should match the name
/instructions
fields in your annotation payloads to ensure the correct feature schemas are matched.
## Setup the ontology and link the tools created above.
ontology_builder = lb.OntologyBuilder(
classifications=[ # List of Classification objects
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="radio_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option(value="first_radio_answer"),
lb.Option(value="second_radio_answer")
]
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="checklist_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option(value="first_checklist_answer"),
lb.Option(value="second_checklist_answer")
]
),
lb.Classification(
class_type=lb.Classification.Type.TEXT,
name="free_text",
scope = lb.Classification.Scope.GLOBAL
),
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="nested_radio_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option("first_radio_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[lb.Option("first_sub_radio_answer")]
)
])
]
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="nested_checklist_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option("first_checklist_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[lb.Option("first_sub_checklist_answer")]
)
])
]
),
],
tools=[ # List of Tool objects
lb.Tool( tool=lb.Tool.Type.BBOX,name="bounding_box"),
lb.Tool(tool=lb.Tool.Type.NER, name="named_entity"),
lb.Tool(tool=lb.Tool.Type.NER,
name="ner_with_checklist_subclass",
classifications=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[
lb.Option(value="first_sub_checklist_answer")
]
)
]),
lb.Tool( tool=lb.Tool.Type.BBOX,
name="bbox_with_radio_subclass",
classifications=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[
lb.Option(
value="first_sub_radio_answer" ,
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="second_sub_radio_question",
options=[lb.Option("second_sub_radio_answer")]
)]
)]
)]
)]
)
ontology = client.create_ontology("Document Annotation Import Demo",
ontology_builder.asdict(),
media_type=lb.MediaType.Document)
Step 3: Create a model and a model run
# create model
model = client.create_model(name="PDF_model_run_"+ str(uuid.uuid4()),
ontology_id=ontology.uid)
# create model run
model_run = model.create_model_run("iteration 1")
Step 4: Send data rows to the model run
model_run.upsert_data_rows(global_keys=[global_key])
Step 5: Create the predictions payload
Create the prediction payload using the code snippets in the section above.
Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both are described below to compose your annotations into labels attached to the data rows.
The resulting payload should have exactly the same content for the annotations that are supported by both formats.
First, we need to populate the textSelections
for entity annotations. To learn how to generate a text layer for your documents, please refer to the following repositories/files:
- https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py
- https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py
import requests
import json
# Helper method
def update_text_selections(annotation, group_id, list_tokens, page):
return annotation.update({
"textSelections": [
{
"groupId": group_id,
"tokenIds": list_tokens,
"page": page
}
]
})
text_layer = "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json"
# Fetch the content of the text layer
res = requests.get(text_layer)
# Phrases that we want to annotate obtained from the text layer url
content_phrases = ["Metal-insulator (MI) transitions have been one of the", "T. Sasaki,* N. Yoneyama, and N. Kobayashi"]
# Parse the text layer
text_selections = []
text_selections_ner = []
for obj in json.loads(res.text):
for group in obj["groups"]:
if group["content"] == content_phrases[0]:
list_tokens = [x["id"] for x in group["tokens"]]
# build text selections for Python annotations
document_text_selection = lb_types.DocumentTextSelection(groupId=group["id"], tokenIds=list_tokens, page=1)
text_selections.append(document_text_selection)
# build text selection for the NDJson annotations
update_text_selections(annotation=entities_prediction_ndjson,
group_id=group["id"], # id representing group of words
list_tokens=list_tokens, # ids representing individual words from the group
page=1)
if group["content"] == content_phrases[1]:
list_tokens_2 = [x["id"] for x in group["tokens"]]
# build text selections for Python annotations
ner_text_selection = lb_types.DocumentTextSelection(groupId=group["id"], tokenIds=list_tokens_2, page=1)
text_selections_ner.append(ner_text_selection)
# build text selection for the NDJson annotations
update_text_selections(annotation=ner_with_checklist_subclass_prediction_ndjson,
group_id=group["id"], # id representing group of words
list_tokens=list_tokens_2, # ids representing individual words from the group
page=1)
#re-write the entity annotation with text selections
entities_prediction_document_entity = lb_types.DocumentEntity(name="named_entity",confidence=0.5, textSelections = text_selections)
entities_prediction = lb_types.ObjectAnnotation(name="named_entity",value=entities_prediction_document_entity)
# re-write the entity annotation + subclassification with text selections
classifications = [
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer", confidence=0.5)]
)
)
]
ner_annotation_with_subclass = lb_types.DocumentEntity(name="ner_with_checklist_subclass",confidence=0.5, textSelections= text_selections_ner)
ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name="ner_with_checklist_subclass",
confidence=0.5,
value=ner_annotation_with_subclass,
classifications=classifications)
# Final NDJSON and python annotations
print(f"entities_annotations_ndjson={entities_prediction_ndjson}")
print(f"entities_annotation={entities_prediction}")
print(f"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}")
print(f"nested_entities_annotation={ner_with_checklist_subclass_annotation}")
Step 5: Create the predictions payload
Create the predictions payload using the snippets of code shown above.
Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both approaches are described below with instructions to compose annotations into Labels attached to the data rows.
The resulting label_predictions_ndjson
and label_predictions
payloads should have exactly the same prediction content (with the exception of the uuid strings that are generated).
label_predictions = []
label_predictions.append(
lb_types.Label(
data=lb_types.DocumentData(
global_key=global_key),
annotations = [
entities_prediction,
checklist_prediction,
nested_checklist_prediction,
text_prediction,
radio_prediction,
nested_radio_prediction,
bbox_prediction,
bbox_with_radio_subclass_prediction,
ner_with_checklist_subclass_prediction
]
)
)
label_predictions_ndjson = []
for annot in [
entities_prediction_ndjson,
checklist_prediction_ndjson,
nested_checklist_prediction_ndjson,
text_prediction_ndjson,
radio_prediction_ndjson,
nested_radio_prediction_ndjson,
bbox_prediction_ndjson,
bbox_with_radio_subclass_prediction_ndjson,
ner_with_checklist_subclass_prediction_ndjson
]:
annot.update({
"dataRow": {"globalKey": global_key},
})
label_predictions_ndjson.append(annot)
Step 6: Upload the predictions payload to the model run
# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
name="prediction_upload_job"+str(uuid.uuid4()),
predictions=label_predictions)
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)
Step 7: Send annotations to the model run
To send annotations to a model run, we must first import them into a project, create a label payload and then send them to the model run.
# 7.1 Create a labelbox project
project = client.create_project(name="Document Prediction Import Demo",
media_type=lb.MediaType.Document)
project.setup_editor(ontology)
# 7.2 Create a batch to send to the project
project.create_batch(
"batch_text_prediction_demo", # Each batch in a project must have a unique name
global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
priority=5 # priority between 1(Highest) - 5(lowest)
)
# 7.3 Create the annotations payload
entities_annotation = lb_types.ObjectAnnotation(
name="named_entity",
value= lb_types.DocumentEntity(
name="named_entity",
textSelections=text_selections
)
)
radio_annotation = lb_types.ClassificationAnnotation(
name="radio_question",
value=lb_types.Radio(answer =
lb_types.ClassificationAnswer(name = "first_radio_answer")
)
)
checklist_annotation = lb_types.ClassificationAnnotation(
name="checklist_question",
value=lb_types.Checklist(answer = [
lb_types.ClassificationAnswer(name = "first_checklist_answer"),
lb_types.ClassificationAnswer(name = "second_checklist_answer"),
])
)
bbox_dim_1 = {
"top": 135.3,
"left": 102.771,
"height": 109.843,
"width": 415.8
}
bbox_annotation = lb_types.ObjectAnnotation(
name="bounding_box", # must match your ontology feature"s name
value=lb_types.DocumentRectangle(
start=lb_types.Point(x=bbox_dim_1["left"], y=bbox_dim_1["top"]), # x = left, y = top
end=lb_types.Point(x=bbox_dim_1["left"] + bbox_dim_1["width"], y=bbox_dim_1["top"]+ bbox_dim_1["height"]), # x= left + width , y = top + height
page=0,
unit=lb_types.RectangleUnit.POINTS
)
)
nested_checklist_annotation = lb_types.ClassificationAnnotation(
name="nested_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_checklist_answer",
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_sub_checklist_answer",
)]
))
]
)]
)
)
nested_radio_annotation = lb_types.ClassificationAnnotation(
name="nested_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_radio_answer",
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
)
)
)
]
)
)
)
text_annotation = lb_types.ClassificationAnnotation(
name="free_text",
value=lb_types.Text(answer="sample text")
)
bbox_dim = {
"top": 226.757,
"left": 317.271,
"height": 194.229,
"width": 249.386
}
bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(
name="bbox_with_radio_subclass",
value=lb_types.DocumentRectangle(
start=lb_types.Point(x=bbox_dim["left"], y=bbox_dim["top"]), # x = left, y = top
end=lb_types.Point(x=bbox_dim["left"] + bbox_dim["width"], y=bbox_dim["top"] + bbox_dim["height"]), # x= left + width , y = top + height
unit=lb_types.RectangleUnit.POINTS,
page=1
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
classifications=[
lb_types.ClassificationAnnotation(
name="second_sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="second_sub_radio_answer"
)
)
)
]
)
)
)
]
)
ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(
name="ner_with_checklist_subclass",
value=lb_types.DocumentEntity(
name="ner_with_checklist_subclass",
text_selections=text_selections_ner
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer")]
)
)
]
)
# 7.4 Create the label object
labels = []
labels.append(
lb_types.Label(
data=lb_types.DocumentData(
global_key=global_key),
annotations = [
entities_annotation,
checklist_annotation,
nested_checklist_annotation,
text_annotation,
radio_annotation,
nested_radio_annotation,
bbox_annotation,
bbox_with_radio_subclass_annotation,
ner_with_checklist_subclass_annotation
]
)
)
# 7.5 Upload annotations to the project using label import
upload_job_annotation = lb.LabelImport.create_from_objects(
client = client,
project_id = project.uid,
name="text_label_import_job"+ str(uuid.uuid4()),
labels=labels)
upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)
# 7.6 Send the annotations to the model run
# get the labels id from the project
model_run.upsert_labels(project_id=project.uid)