How to upload predictions on geospatial data in a model run and sample upload formats.
textSelections
field is required in the payload for each entity annotation. Each textSelections
item in the list requires the following fields:
token_ids
for each word in the group of words.group_id
associated with a group of words.page
of the document (1-indexed).token_ids
and the group_id
are extracted from the text layer URL attached to the data row. Please follow the end-to-end demo to learn how to construct an entity annotation for documents.
entities_prediction = lb_types.ObjectAnnotation(
name="named_entity",
confidence=0.5,
value= lb_types.DocumentEntity(
name="named_entity",
textSelections=[
lb_types.DocumentTextSelection(
token_ids=[],
group_id="",
page=1
)
]
)
)
radio_prediction = lb_types.ClassificationAnnotation(
name="radio_question",
value=lb_types.Radio(answer =
lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5)
)
)
checklist_prediction = lb_types.ClassificationAnnotation(
name="checklist_question",
value=lb_types.Checklist(answer = [
lb_types.ClassificationAnswer(name = "first_checklist_answer", confidence=0.5),
lb_types.ClassificationAnswer(name = "second_checklist_answer", confidence=0.5)
])
)
bbox_dim_1 = {
"top": 135.3,
"left": 102.771,
"height": 109.843,
"width": 415.8
}
bbox_prediction = lb_types.ObjectAnnotation(
name="bounding_box", # must match your ontology feature"s name
value=lb_types.DocumentRectangle(
start=lb_types.Point(x=bbox_dim_1["left"], y=bbox_dim_1["top"]), # x = left, y = top
end=lb_types.Point(x=bbox_dim_1["left"] + bbox_dim_1["width"], y=bbox_dim_1["top"]+ bbox_dim_1["height"]), # x= left + width , y = top + height
page=0,
unit=lb_types.RectangleUnit.POINTS
)
)
nested_checklist_prediction = lb_types.ClassificationAnnotation(
name="nested_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_checklist_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_sub_checklist_answer",
confidence=0.5 # Confidence scores should be added to the answer
)]
))
]
)]
)
)
nested_radio_prediction = lb_types.ClassificationAnnotation(
name="nested_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_radio_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
confidence=0.5 # Confidence scores should be added to the answer
)
)
)
]
)
)
)
text_prediction = lb_types.ClassificationAnnotation(
name="free_text", # must match your ontology feature"s name
value=lb_types.Text(answer="sample text")
)
bbox_dim = {
"top": 226.757,
"left": 317.271,
"height": 194.229,
"width": 249.386
}
bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(
name="bbox_with_radio_subclass",
confidence=0.5,
value=lb_types.DocumentRectangle(
start=lb_types.Point(x=bbox_dim["left"], y=bbox_dim["top"]), # x = left, y = top
end=lb_types.Point(x=bbox_dim["left"] + bbox_dim["width"], y=bbox_dim["top"] + bbox_dim["height"]), # x= left + width , y = top + height
unit=lb_types.RectangleUnit.POINTS,
page=1
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
confidence=0.5,
classifications=[
lb_types.ClassificationAnnotation(
name="second_sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="second_sub_radio_answer",
confidence=0.5
)
)
)
]
)
)
)
]
)
ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(
name="ner_with_checklist_subclass",
confidence=0.5,
value=lb_types.DocumentEntity(
name="ner_with_checklist_subclass",
text_selections=[
lb_types.DocumentTextSelection(
token_ids=[],
group_id="",
page=1
)
]
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer", confidence=0.5)]
)
)
]
)
import uuid
import json
import requests
import labelbox as lb
import labelbox.types as lb_types
API_KEY
with a valid API key to connect to the Labelbox client.
API_KEY = None
client = lb.Client(API_KEY)
## Text layer url is required for uploading entity annotations
global_key = "0801.3483.pdf"
img_url = {
"row_data": {
"pdf_url": "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf",
"text_layer_url": "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json"
},
"global_key": global_key
}
dataset = client.create_dataset(name="pdf_demo_dataset")
task = dataset.create_data_rows([img_url])
task.wait_till_done()
print(f"Failed data rows: {task.failed_data_rows}")
print(f"Errors: {task.errors}")
if task.errors:
for error in task.errors:
if 'Duplicate global key' in error['message'] and dataset.row_count == 0: # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.
print(f"Deleting empty dataset: {dataset}")
dataset.delete()
name
/instructions
fields in your annotation payloads to ensure the correct feature schemas are matched.
## Setup the ontology and link the tools created above.
ontology_builder = lb.OntologyBuilder(
classifications=[ # List of Classification objects
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="radio_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option(value="first_radio_answer"),
lb.Option(value="second_radio_answer")
]
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="checklist_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option(value="first_checklist_answer"),
lb.Option(value="second_checklist_answer")
]
),
lb.Classification(
class_type=lb.Classification.Type.TEXT,
name="free_text",
scope = lb.Classification.Scope.GLOBAL
),
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="nested_radio_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option("first_radio_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[lb.Option("first_sub_radio_answer")]
)
])
]
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="nested_checklist_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option("first_checklist_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[lb.Option("first_sub_checklist_answer")]
)
])
]
),
],
tools=[ # List of Tool objects
lb.Tool( tool=lb.Tool.Type.BBOX,name="bounding_box"),
lb.Tool(tool=lb.Tool.Type.NER, name="named_entity"),
lb.Tool(tool=lb.Tool.Type.NER,
name="ner_with_checklist_subclass",
classifications=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[
lb.Option(value="first_sub_checklist_answer")
]
)
]),
lb.Tool( tool=lb.Tool.Type.BBOX,
name="bbox_with_radio_subclass",
classifications=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[
lb.Option(
value="first_sub_radio_answer" ,
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="second_sub_radio_question",
options=[lb.Option("second_sub_radio_answer")]
)]
)]
)]
)]
)
ontology = client.create_ontology("Document Annotation Import Demo",
ontology_builder.asdict(),
media_type=lb.MediaType.Document)
# create model
model = client.create_model(name="PDF_model_run_"+ str(uuid.uuid4()),
ontology_id=ontology.uid)
# create model run
model_run = model.create_model_run("iteration 1")
model_run.upsert_data_rows(global_keys=[global_key])
text_layer_url
or use Labelbox-generated text_layer_url
.
To extract the generated text layer url we first need to export the data row
task = lb.DataRow.export(client=client,global_keys=[global_key])
task.wait_till_done()
if task.has_result():
stream = task.get_buffered_stream()
text_layer = ""
for output in stream:
output_json = output.json
text_layer = output_json['media_attributes']['text_layer_url']
print(text_layer)
import requests
import json
# Helper method
def update_text_selections(annotation, group_id, list_tokens, page):
return annotation.update({
"textSelections": [
{
"groupId": group_id,
"tokenIds": list_tokens,
"page": page
}
]
})
# Fetch the content of the text layer
res = requests.get(text_layer)
# Phrases that we want to annotate obtained from the text layer url
content_phrases = ["Metal-insulator (MI) transitions have been one of the", "T. Sasaki,* N. Yoneyama, and N. Kobayashi"]
# Parse the text layer
text_selections = []
text_selections_ner = []
for obj in json.loads(res.text):
for group in obj["groups"]:
if group["content"] == content_phrases[0]:
list_tokens = [x["id"] for x in group["tokens"]] # build text selections for Python annotations
document_text_selection = lb_types.DocumentTextSelection(groupId=group["id"], tokenIds=list_tokens, page=1)
text_selections.append(document_text_selection) # build text selection for the NDJson annotations
update_text_selections(annotation=entities_prediction_ndjson,
group_id=group["id"], # id representing group of words
list_tokens=list_tokens, # ids representing individual words from the group
page=1)
if group["content"] == content_phrases[1]:
list_tokens_2 = [x["id"] for x in group["tokens"]] # build text selections for Python annotations
ner_text_selection = lb_types.DocumentTextSelection(groupId=group["id"], tokenIds=list_tokens_2, page=1)
text_selections_ner.append(ner_text_selection) # build text selection for the NDJson annotations
update_text_selections(annotation=ner_with_checklist_subclass_prediction_ndjson,
group_id=group["id"], # id representing group of words
list_tokens=list_tokens_2, # ids representing individual words from the group
page=1)
#re-write the entity annotation with text selections
entities_prediction_document_entity = lb_types.DocumentEntity(name="named_entity",confidence=0.5, textSelections = text_selections)
entities_prediction = lb_types.ObjectAnnotation(name="named_entity",value=entities_prediction_document_entity)
# re-write the entity annotation + subclassification with text selections
classifications = [
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer", confidence=0.5)]
)
)
]
ner_annotation_with_subclass = lb_types.DocumentEntity(name="ner_with_checklist_subclass",confidence=0.5, textSelections= text_selections_ner)
ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name="ner_with_checklist_subclass",
confidence=0.5,
value=ner_annotation_with_subclass,
classifications=classifications)
# Final NDJSON and python annotations
print(f"entities_annotations_ndjson={entities_prediction_ndjson}")
print(f"entities_annotation={entities_prediction}")
print(f"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}")
print(f"nested_entities_annotation={ner_with_checklist_subclass_annotation}")
label_predictions_ndjson
and label_predictions
payloads should have exactly the same prediction content (with the exception of the uuid strings that are generated).
label_predictions = []
label_predictions.append(
lb_types.Label(
data={"global_key" : global_key },
annotations = [
entities_prediction,
checklist_prediction,
nested_checklist_prediction,
text_prediction,
radio_prediction,
nested_radio_prediction,
bbox_prediction,
bbox_with_radio_subclass_prediction,
ner_with_checklist_subclass_prediction
]
)
)
# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
name="prediction_upload_job"+str(uuid.uuid4()),
predictions=label_predictions)
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)
# 7.1 Create a labelbox project
project = client.create_project(name="Document Prediction Import Demo",
media_type=lb.MediaType.Document)
project.connect_ontology(ontology)
# 7.2 Create a batch to send to the project
project.create_batch(
"batch_text_prediction_demo", # Each batch in a project must have a unique name
global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
priority=5 # priority between 1(Highest) - 5(lowest)
)
# 7.3 Create the annotations payload
entities_annotation = lb_types.ObjectAnnotation(
name="named_entity",
value= lb_types.DocumentEntity(
name="named_entity",
textSelections=text_selections
)
)
radio_annotation = lb_types.ClassificationAnnotation(
name="radio_question",
value=lb_types.Radio(answer =
lb_types.ClassificationAnswer(name = "first_radio_answer")
)
)
checklist_annotation = lb_types.ClassificationAnnotation(
name="checklist_question",
value=lb_types.Checklist(answer = [
lb_types.ClassificationAnswer(name = "first_checklist_answer"),
lb_types.ClassificationAnswer(name = "second_checklist_answer"),
])
)
bbox_dim_1 = {
"top": 135.3,
"left": 102.771,
"height": 109.843,
"width": 415.8
}
bbox_annotation = lb_types.ObjectAnnotation(
name="bounding_box", # must match your ontology feature"s name
value=lb_types.DocumentRectangle(
start=lb_types.Point(x=bbox_dim_1["left"], y=bbox_dim_1["top"]), # x = left, y = top
end=lb_types.Point(x=bbox_dim_1["left"] + bbox_dim_1["width"], y=bbox_dim_1["top"]+ bbox_dim_1["height"]), # x= left + width , y = top + height
page=0,
unit=lb_types.RectangleUnit.POINTS
)
)
nested_checklist_annotation = lb_types.ClassificationAnnotation(
name="nested_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_checklist_answer",
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_sub_checklist_answer",
)]
))
]
)]
)
)
nested_radio_annotation = lb_types.ClassificationAnnotation(
name="nested_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_radio_answer",
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
)
)
)
]
)
)
)
text_annotation = lb_types.ClassificationAnnotation(
name="free_text",
value=lb_types.Text(answer="sample text")
)
bbox_dim = {
"top": 226.757,
"left": 317.271,
"height": 194.229,
"width": 249.386
}
bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(
name="bbox_with_radio_subclass",
value=lb_types.DocumentRectangle(
start=lb_types.Point(x=bbox_dim["left"], y=bbox_dim["top"]), # x = left, y = top
end=lb_types.Point(x=bbox_dim["left"] + bbox_dim["width"], y=bbox_dim["top"] + bbox_dim["height"]), # x= left + width , y = top + height
unit=lb_types.RectangleUnit.POINTS,
page=1
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
classifications=[
lb_types.ClassificationAnnotation(
name="second_sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="second_sub_radio_answer"
)
)
)
]
)
)
)
]
)
ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(
name="ner_with_checklist_subclass",
value=lb_types.DocumentEntity(
name="ner_with_checklist_subclass",
text_selections=text_selections_ner
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(name="first_sub_checklist_answer")]
)
)
]
)
# 7.4 Create the label object
labels = []
labels.append(
lb_types.Label(
data={"global_key" : global_key },
annotations = [
entities_annotation,
checklist_annotation,
nested_checklist_annotation,
text_annotation,
radio_annotation,
nested_radio_annotation,
bbox_annotation,
bbox_with_radio_subclass_annotation,
ner_with_checklist_subclass_annotation
]
)
)
# 7.5 Upload annotations to the project using label import
upload_job_annotation = lb.LabelImport.create_from_objects(
client = client,
project_id = project.uid,
name="text_label_import_job"+ str(uuid.uuid4()),
labels=labels)
upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)
# 7.6 Send the annotations to the model run
# get the labels id from the project
model_run.upsert_labels(project_id=project.uid)