Upload document predictions

How to upload predictions on geospatial data in a model run and sample upload formats.

Supported predictions

To upload predictions in Labelbox, you need to create a predictions payload. In this section, we provide this payload for every supported prediction type.

Labelbox supports two formats for the predictions payload:

  • Python annotation types (recommended)
  • NDJSON

Both are described below.

Entity

The textSelections field is required in the payload for each entity annotation. Each textSelections item in the list requires the following fields:

  • A list of token_ids for each word in the group of words.
  • The group_id associated with a group of words.
  • The page of the document (1-indexed).

Both the token_idsand the group_id are extracted from the text layer URL attached to the data row. Please follow the end-to-end demo to learn how to construct an entity annotation for documents.

entities_prediction = lb_types.ObjectAnnotation( name="named_entity", confidence=0.5, value= lb_types.DocumentEntity( name="named_entity", textSelections=[ lb_types.DocumentTextSelection( token_ids=[], group_id="", page=1 ) ] ) )
entities_prediction_ndjson = { "name": "named_entity", "confidence": 0.5, "textSelections": [ { "tokenIds": [ "<UUID>", ], "groupId": "<UUID>", "page": 1, } ] }

Classification: Radio

radio_prediction = lb_types.ClassificationAnnotation( name="radio_question", value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5) ) )
radio_prediction_ndjson = { "name": "radio_question", "answer": {"name": "first_radio_answer", "confidence": 0.5} }

Classification: Checklist

checklist_prediction = lb_types.ClassificationAnnotation( name="checklist_question", value=lb_types.Checklist(answer = [ lb_types.ClassificationAnswer(name = "first_checklist_answer", confidence=0.5), lb_types.ClassificationAnswer(name = "second_checklist_answer", confidence=0.5) ]) )
checklist_prediction_ndjson = { "name": "checklist_question", "answer": [ {"name": "first_checklist_answer", "confidence": 0.5}, {"name": "second_checklist_answer", "confidence": 0.5} ] }

Bounding box

bbox_dim_1 = { "top": 135.3, "left": 102.771, "height": 109.843, "width": 415.8 } bbox_prediction = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature"s name value=lb_types.DocumentRectangle( start=lb_types.Point(x=bbox_dim_1["left"], y=bbox_dim_1["top"]), # x = left, y = top end=lb_types.Point(x=bbox_dim_1["left"] + bbox_dim_1["width"], y=bbox_dim_1["top"]+ bbox_dim_1["height"]), # x= left + width , y = top + height page=0, unit=lb_types.RectangleUnit.POINTS ) )
bbox_dim_1 = { "top": 135.3, "left": 102.771, "height": 109.843, "width": 415.8 } bbox_prediction_ndjson = { "name": "bounding_box", "bbox": bbox_dim_1, "page": 0, "unit": "POINTS" }

Nested classifications: Checklist and radio

nested_checklist_prediction = lb_types.ClassificationAnnotation( name="nested_checklist_question", value=lb_types.Checklist( answer=[lb_types.ClassificationAnswer( name="first_checklist_answer", confidence=0.5, # Confidence scores should be added to the answer classifications=[ lb_types.ClassificationAnnotation( name="sub_checklist_question", value=lb_types.Checklist( answer=[lb_types.ClassificationAnswer( name="first_sub_checklist_answer", confidence=0.5 # Confidence scores should be added to the answer )] )) ] )] ) ) nested_radio_prediction = lb_types.ClassificationAnnotation( name="nested_radio_question", value=lb_types.Radio( answer=lb_types.ClassificationAnswer( name="first_radio_answer", confidence=0.5, # Confidence scores should be added to the answer classifications=[ lb_types.ClassificationAnnotation( name="sub_radio_question", value=lb_types.Radio( answer=lb_types.ClassificationAnswer( name="first_sub_radio_answer", confidence=0.5 # Confidence scores should be added to the answer ) ) ) ] ) ) )
nested_checklist_prediction_ndjson = { "name": "nested_checklist_question", "answer": [{ "name": "first_checklist_answer", "confidence": 0.5, # Confidence scores should be added to the answer "classifications" : [ { "name": "sub_checklist_question", "answer": { "name": "first_sub_checklist_answer", "confidence": 0.5, # Confidence scores should be added to the answer } } ] }] } nested_radio_prediction_ndjson = { "name": "nested_radio_question", "answer": { "name": "first_radio_answer", "confidence": 0.5, "classifications": [{ "name":"sub_radio_question", "answer": { "name" : "first_sub_radio_answer", "confidence": 0.5} }] } }

Classification: Free-form text

text_prediction = lb_types.ClassificationAnnotation( name="free_text", # must match your ontology feature"s name value=lb_types.Text(answer="sample text") )
text_prediction_ndjson = { "name": "free_text", "answer": "sample text" }

Bounding box with nested classification

bbox_dim = { "top": 226.757, "left": 317.271, "height": 194.229, "width": 249.386 } bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation( name="bbox_with_radio_subclass", confidence=0.5, value=lb_types.DocumentRectangle( start=lb_types.Point(x=bbox_dim["left"], y=bbox_dim["top"]), # x = left, y = top end=lb_types.Point(x=bbox_dim["left"] + bbox_dim["width"], y=bbox_dim["top"] + bbox_dim["height"]), # x= left + width , y = top + height unit=lb_types.RectangleUnit.POINTS, page=1 ), classifications=[ lb_types.ClassificationAnnotation( name="sub_radio_question", value=lb_types.Radio( answer=lb_types.ClassificationAnswer( name="first_sub_radio_answer", confidence=0.5, classifications=[ lb_types.ClassificationAnnotation( name="second_sub_radio_question", value=lb_types.Radio( answer=lb_types.ClassificationAnswer( name="second_sub_radio_answer", confidence=0.5 ) ) ) ] ) ) ) ] )
bbox_with_radio_subclass_prediction_ndjson = { "name": "bbox_with_radio_subclass", "classifications": [ { "name": "sub_radio_question", "answer": { "name": "first_sub_radio_answer", "confidence": 0.5, "classifications": [ { "name": "second_sub_radio_question", "answer": { "name": "second_sub_radio_answer", "confidence": 0.5} } ] } } ], "bbox":bbox_dim, "page": 1, "unit": "POINTS" }

Entity with nested classification

ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation( name="ner_with_checklist_subclass", confidence=0.5, value=lb_types.DocumentEntity( name="ner_with_checklist_subclass", text_selections=[ lb_types.DocumentTextSelection( token_ids=[], group_id="", page=1 ) ] ), classifications=[ lb_types.ClassificationAnnotation( name="sub_checklist_question", value=lb_types.Checklist( answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer", confidence=0.5)] ) ) ] )
ner_with_checklist_subclass_prediction_ndjson = { "name": "ner_with_checklist_subclass", "classifications":[ { "name": "sub_checklist_question", "answer": [{"name": "first_sub_checklist_answer", "confidence":0.5 }] } ], "textSelections": [ { "tokenIds": [ "" ], "groupId": "", "page": 1 } ] }

End-to-end example: Upload predictions to a model run

Here are the steps to upload predictions to a model run:

Before you start

You will need to import these libraries to use the code examples in this section:

import uuid import json import requests import labelbox as lb import labelbox.types as lb_types

Replace the value of API_KEY with a valid API key to connect to the Labelbox client.

API_KEY = None client = lb.Client(API_KEY)

Step1: Import data rows into Catalog

## Text layer url is required for uploading entity annotations global_key = "0801.3483.pdf" img_url = { "row_data": { "pdf_url": "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf", "text_layer_url": "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json" }, "global_key": global_key } dataset = client.create_dataset(name="pdf_demo_dataset") task = dataset.create_data_rows([img_url]) task.wait_till_done() print(f"Failed data rows: {task.failed_data_rows}") print(f"Errors: {task.errors}") if task.errors: for error in task.errors: if 'Duplicate global key' in error['message'] and dataset.row_count == 0: # If the global key already exists in the workspace the dataset will be created empty, so we can delete it. print(f"Deleting empty dataset: {dataset}") dataset.delete()

Step 2: Create/select an ontology for your model predictions

Your model run should have the correct ontology setup with all the tools and classifications supported for your predictions, and the tool names and classification instructions should match the name/instructions fields in your annotation payloads to ensure the correct feature schemas are matched.

## Setup the ontology and link the tools created above. ontology_builder = lb.OntologyBuilder( classifications=[ # List of Classification objects lb.Classification( class_type=lb.Classification.Type.RADIO, name="radio_question", scope = lb.Classification.Scope.GLOBAL, options=[ lb.Option(value="first_radio_answer"), lb.Option(value="second_radio_answer") ] ), lb.Classification( class_type=lb.Classification.Type.CHECKLIST, name="checklist_question", scope = lb.Classification.Scope.GLOBAL, options=[ lb.Option(value="first_checklist_answer"), lb.Option(value="second_checklist_answer") ] ), lb.Classification( class_type=lb.Classification.Type.TEXT, name="free_text", scope = lb.Classification.Scope.GLOBAL ), lb.Classification( class_type=lb.Classification.Type.RADIO, name="nested_radio_question", scope = lb.Classification.Scope.GLOBAL, options=[ lb.Option("first_radio_answer", options=[ lb.Classification( class_type=lb.Classification.Type.RADIO, name="sub_radio_question", options=[lb.Option("first_sub_radio_answer")] ) ]) ] ), lb.Classification( class_type=lb.Classification.Type.CHECKLIST, name="nested_checklist_question", scope = lb.Classification.Scope.GLOBAL, options=[ lb.Option("first_checklist_answer", options=[ lb.Classification( class_type=lb.Classification.Type.CHECKLIST, name="sub_checklist_question", options=[lb.Option("first_sub_checklist_answer")] ) ]) ] ), ], tools=[ # List of Tool objects lb.Tool( tool=lb.Tool.Type.BBOX,name="bounding_box"), lb.Tool(tool=lb.Tool.Type.NER, name="named_entity"), lb.Tool(tool=lb.Tool.Type.NER, name="ner_with_checklist_subclass", classifications=[ lb.Classification( class_type=lb.Classification.Type.CHECKLIST, name="sub_checklist_question", options=[ lb.Option(value="first_sub_checklist_answer") ] ) ]), lb.Tool( tool=lb.Tool.Type.BBOX, name="bbox_with_radio_subclass", classifications=[ lb.Classification( class_type=lb.Classification.Type.RADIO, name="sub_radio_question", options=[ lb.Option( value="first_sub_radio_answer" , options=[ lb.Classification( class_type=lb.Classification.Type.RADIO, name="second_sub_radio_question", options=[lb.Option("second_sub_radio_answer")] )] )] )] )] ) ontology = client.create_ontology("Document Annotation Import Demo", ontology_builder.asdict(), media_type=lb.MediaType.Document)

Step 3: Create a model and a model run

# create model model = client.create_model(name="PDF_model_run_"+ str(uuid.uuid4()), ontology_id=ontology.uid) # create model run model_run = model.create_model_run("iteration 1")

Step 4: Send data rows to the model run

model_run.upsert_data_rows(global_keys=[global_key])

Step 5: Create the predictions payload

To import ner annotations, you can either pass atext_layer_url or use Labelbox-generated text_layer_url.

To extract the generated text layer url we first need to export the data row

task = lb.DataRow.export(client=client,global_keys=[global_key]) task.wait_till_done() if task.has_result(): stream = task.get_buffered_stream() text_layer = "" for output in stream: output_json = output.json text_layer = output_json['media_attributes']['text_layer_url'] print(text_layer)
import requests import json # Helper method def update_text_selections(annotation, group_id, list_tokens, page): return annotation.update({ "textSelections": [ { "groupId": group_id, "tokenIds": list_tokens, "page": page } ] }) # Fetch the content of the text layer res = requests.get(text_layer) # Phrases that we want to annotate obtained from the text layer url content_phrases = ["Metal-insulator (MI) transitions have been one of the", "T. Sasaki,* N. Yoneyama, and N. Kobayashi"] # Parse the text layer text_selections = [] text_selections_ner = [] for obj in json.loads(res.text): for group in obj["groups"]: if group["content"] == content_phrases[0]: list_tokens = [x["id"] for x in group["tokens"]] # build text selections for Python annotations document_text_selection = lb_types.DocumentTextSelection(groupId=group["id"], tokenIds=list_tokens, page=1) text_selections.append(document_text_selection) # build text selection for the NDJson annotations update_text_selections(annotation=entities_prediction_ndjson, group_id=group["id"], # id representing group of words list_tokens=list_tokens, # ids representing individual words from the group page=1) if group["content"] == content_phrases[1]: list_tokens_2 = [x["id"] for x in group["tokens"]] # build text selections for Python annotations ner_text_selection = lb_types.DocumentTextSelection(groupId=group["id"], tokenIds=list_tokens_2, page=1) text_selections_ner.append(ner_text_selection) # build text selection for the NDJson annotations update_text_selections(annotation=ner_with_checklist_subclass_prediction_ndjson, group_id=group["id"], # id representing group of words list_tokens=list_tokens_2, # ids representing individual words from the group page=1) #re-write the entity annotation with text selections entities_prediction_document_entity = lb_types.DocumentEntity(name="named_entity",confidence=0.5, textSelections = text_selections) entities_prediction = lb_types.ObjectAnnotation(name="named_entity",value=entities_prediction_document_entity) # re-write the entity annotation + subclassification with text selections classifications = [ lb_types.ClassificationAnnotation( name="sub_checklist_question", value=lb_types.Checklist( answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer", confidence=0.5)] ) ) ] ner_annotation_with_subclass = lb_types.DocumentEntity(name="ner_with_checklist_subclass",confidence=0.5, textSelections= text_selections_ner) ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name="ner_with_checklist_subclass", confidence=0.5, value=ner_annotation_with_subclass, classifications=classifications) # Final NDJSON and python annotations print(f"entities_annotations_ndjson={entities_prediction_ndjson}") print(f"entities_annotation={entities_prediction}") print(f"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}") print(f"nested_entities_annotation={ner_with_checklist_subclass_annotation}")

Step 5: Create the predictions payload

Create the predictions payload using the snippets of code shown above.

Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both approaches are described below with instructions to compose annotations into Labels attached to the data rows.

The resulting label_predictions_ndjson and label_predictions payloads should have exactly the same prediction content (with the exception of the uuid strings that are generated).

label_predictions = [] label_predictions.append( lb_types.Label( data={"global_key" : global_key }, annotations = [ entities_prediction, checklist_prediction, nested_checklist_prediction, text_prediction, radio_prediction, nested_radio_prediction, bbox_prediction, bbox_with_radio_subclass_prediction, ner_with_checklist_subclass_prediction ] ) )
label_predictions_ndjson = [] for annot in [ entities_prediction_ndjson, checklist_prediction_ndjson, nested_checklist_prediction_ndjson, text_prediction_ndjson, radio_prediction_ndjson, nested_radio_prediction_ndjson, bbox_prediction_ndjson, bbox_with_radio_subclass_prediction_ndjson, ner_with_checklist_subclass_prediction_ndjson ]: annot.update({ "dataRow": {"globalKey": global_key}, }) label_predictions_ndjson.append(annot)

Step 6: Upload the predictions payload to the model run

# Upload the prediction label to the Model Run upload_job_prediction = model_run.add_predictions( name="prediction_upload_job"+str(uuid.uuid4()), predictions=label_predictions) # Errors will appear for annotation uploads that failed. print("Errors:", upload_job_prediction.errors) print("Status of uploads: ", upload_job_prediction.statuses)

Step 7: Send annotations to the model run

To send annotations to a model run, we must first import them into a project, create a label payload and then send them to the model run.

# 7.1 Create a labelbox project project = client.create_project(name="Document Prediction Import Demo", media_type=lb.MediaType.Document) project.connect_ontology(ontology) # 7.2 Create a batch to send to the project project.create_batch( "batch_text_prediction_demo", # Each batch in a project must have a unique name global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys priority=5 # priority between 1(Highest) - 5(lowest) ) # 7.3 Create the annotations payload entities_annotation = lb_types.ObjectAnnotation( name="named_entity", value= lb_types.DocumentEntity( name="named_entity", textSelections=text_selections ) ) radio_annotation = lb_types.ClassificationAnnotation( name="radio_question", value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "first_radio_answer") ) ) checklist_annotation = lb_types.ClassificationAnnotation( name="checklist_question", value=lb_types.Checklist(answer = [ lb_types.ClassificationAnswer(name = "first_checklist_answer"), lb_types.ClassificationAnswer(name = "second_checklist_answer"), ]) ) bbox_dim_1 = { "top": 135.3, "left": 102.771, "height": 109.843, "width": 415.8 } bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature"s name value=lb_types.DocumentRectangle( start=lb_types.Point(x=bbox_dim_1["left"], y=bbox_dim_1["top"]), # x = left, y = top end=lb_types.Point(x=bbox_dim_1["left"] + bbox_dim_1["width"], y=bbox_dim_1["top"]+ bbox_dim_1["height"]), # x= left + width , y = top + height page=0, unit=lb_types.RectangleUnit.POINTS ) ) nested_checklist_annotation = lb_types.ClassificationAnnotation( name="nested_checklist_question", value=lb_types.Checklist( answer=[lb_types.ClassificationAnswer( name="first_checklist_answer", classifications=[ lb_types.ClassificationAnnotation( name="sub_checklist_question", value=lb_types.Checklist( answer=[lb_types.ClassificationAnswer( name="first_sub_checklist_answer", )] )) ] )] ) ) nested_radio_annotation = lb_types.ClassificationAnnotation( name="nested_radio_question", value=lb_types.Radio( answer=lb_types.ClassificationAnswer( name="first_radio_answer", classifications=[ lb_types.ClassificationAnnotation( name="sub_radio_question", value=lb_types.Radio( answer=lb_types.ClassificationAnswer( name="first_sub_radio_answer", ) ) ) ] ) ) ) text_annotation = lb_types.ClassificationAnnotation( name="free_text", value=lb_types.Text(answer="sample text") ) bbox_dim = { "top": 226.757, "left": 317.271, "height": 194.229, "width": 249.386 } bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation( name="bbox_with_radio_subclass", value=lb_types.DocumentRectangle( start=lb_types.Point(x=bbox_dim["left"], y=bbox_dim["top"]), # x = left, y = top end=lb_types.Point(x=bbox_dim["left"] + bbox_dim["width"], y=bbox_dim["top"] + bbox_dim["height"]), # x= left + width , y = top + height unit=lb_types.RectangleUnit.POINTS, page=1 ), classifications=[ lb_types.ClassificationAnnotation( name="sub_radio_question", value=lb_types.Radio( answer=lb_types.ClassificationAnswer( name="first_sub_radio_answer", classifications=[ lb_types.ClassificationAnnotation( name="second_sub_radio_question", value=lb_types.Radio( answer=lb_types.ClassificationAnswer( name="second_sub_radio_answer" ) ) ) ] ) ) ) ] ) ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation( name="ner_with_checklist_subclass", value=lb_types.DocumentEntity( name="ner_with_checklist_subclass", text_selections=text_selections_ner ), classifications=[ lb_types.ClassificationAnnotation( name="sub_checklist_question", value=lb_types.Checklist( answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer")] ) ) ] ) # 7.4 Create the label object labels = [] labels.append( lb_types.Label( data={"global_key" : global_key }, annotations = [ entities_annotation, checklist_annotation, nested_checklist_annotation, text_annotation, radio_annotation, nested_radio_annotation, bbox_annotation, bbox_with_radio_subclass_annotation, ner_with_checklist_subclass_annotation ] ) ) # 7.5 Upload annotations to the project using label import upload_job_annotation = lb.LabelImport.create_from_objects( client = client, project_id = project.uid, name="text_label_import_job"+ str(uuid.uuid4()), labels=labels) upload_job_annotation.wait_until_done() # Errors will appear for annotation uploads that failed. print("Errors:", upload_job_annotation.errors) print("Status of uploads: ", upload_job_annotation.statuses) # 7.6 Send the annotations to the model run # get the labels id from the project model_run.upsert_labels(project_id=project.uid)