How to import annotations on image data and sample import formats.
Open this Colab for an interactive tutorial on importing annotations on image data.
Supported annotations
To import annotations in Labelbox, you need to create an annotations payload. In this section, we provide this payload for every supported annotation type.
Labelbox supports two formats for the annotations payload:
- Python annotation types (recommended)
- NDJSON
Both are described below.
Classification: Radio (single-choice)
radio_annotation = lb_types.ClassificationAnnotation(
name="radio_question",
value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
name="second_radio_answer")))
radio_annotation_ndjson = {
"name": "radio_question",
"answer": {
"name": "second_radio_answer"
}
}
Classification: Checklist (multi-choice)
checklist_annotation = lb_types.ClassificationAnnotation(
name="checklist_question", # must match your ontology feature's name
value=lb_types.Checklist(answer=[
lb_types.ClassificationAnswer(name="first_checklist_answer"),
lb_types.ClassificationAnswer(name="second_checklist_answer")
]))
checklist_annotation_ndjson = {
"name": "checklist_question",
"answer": [{
"name": "first_checklist_answer"
}, {
"name": "second_checklist_answer"
}]
}
Classification: Nested radio
nested_radio_annotation = lb_types.ClassificationAnnotation(
name="nested_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_radio_answer",
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer"
)
)
)
]
)
)
)
nested_radio_annotation_ndjson = {
"name": "nested_radio_question",
"answer": {
"name": "first_radio_answer"
},
"classifications": [{
"name": "sub_radio_question",
"answer": {
"name": "first_sub_radio_answer"
}
}]
}
Classification: Nested checklist
nested_checklist_annotation = lb_types.ClassificationAnnotation(
name="nested_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_checklist_answer",
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_sub_checklist_answer"
)]
))
]
)]
)
)
nested_checklist_annotation_ndjson = {
"name": "nested_checklist_question",
"answer": [{
"name": "first_checklist_answer",
"classifications": [{
"name": "sub_checklist_question",
"answer": {
"name": "first_sub_checklist_answer"
}
}]
}]
}
Classification: Free-form text
text_annotation = lb_types.ClassificationAnnotation(
name="free_text", # must match your ontology feature's name
value=lb_types.Text(answer="sample text"))
text_annotation_ndjson = {
"name": "free_text",
"answer": "sample text",
}
Relationship with bounding box
Relationship annotations are only supported for MAL imports.
bbox_source = lb_types.ObjectAnnotation(
name="bounding_box",
value=lb_types.Rectangle(
start=lb_types.Point(x=2096, y=1264),
end=lb_types.Point(x=2240, y=1689),
),
)
bbox_target = lb_types.ObjectAnnotation(
name="bounding_box",
value=lb_types.Rectangle(
start=lb_types.Point(x=2272, y=1346),
end=lb_types.Point(x=2416, y=1704),
),
)
relationship = lb_types.RelationshipAnnotation(
name="relationship",
value=lb_types.Relationship(
source=bbox_source, # Python annotations do not required a UUID reference
target=bbox_target, # Python annotations do not required a UUID reference
type=lb_types.Relationship.Type.UNIDIRECTIONAL,
))
uuid_source = str(uuid.uuid4())
uuid_target = str(uuid.uuid4())
bbox_source_ndjson = {
"uuid": uuid_source,
"name": "bounding_box",
"bbox": {
"top": 1264.0,
"left": 2096.0,
"height": 425.0,
"width": 144.0
}
}
bbox_target_ndjson = {
"uuid": uuid_target,
"name": "bounding_box",
"bbox": {
"top": 1346.0,
"left": 2272.0,
"height": 358.0,
"width": 144.0
}
}
relationship_ndjson = {
"name": "relationship",
"relationship": {
"source": uuid_source, # UUID reference to the source annotation
"target": uuid_target, # UUID reference to the target annotation
"type": "unidirectional"
}
}
Bounding Box
bbox_annotation = lb_types.ObjectAnnotation(
name="bounding_box", # must match your ontology feature's name
value=lb_types.Rectangle(
start=lb_types.Point(x=1690, y=977), # x = left, y = top
end=lb_types.Point(x=1915, y=1307), # x= left + width , y = top + height
))
bbox_annotation_ndjson = {
"name": "bounding_box",
"bbox": {
"top": 977,
"left": 1690,
"height": 330,
"width": 225
}
}
Bounding Box with nested classification
bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(
name="bbox_with_radio_subclass",
value=lb_types.Rectangle(
start=lb_types.Point(x=541, y=933), # x = left, y = top
end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height
),
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer")))
])
bbox_with_radio_subclass_ndjson = {
"name": "bbox_with_radio_subclass",
"classifications": [{
"name": "sub_radio_question",
"answer": {
"name": "first_sub_radio_answer"
}
}],
"bbox": {
"top": 933,
"left": 541,
"height": 191,
"width": 330
}
}
Polygon
polygon_annotation = lb_types.ObjectAnnotation(
name="polygon", # must match your ontology feature's name
value=lb_types.Polygon( # Coordinates for the vertices of your polygon
points=[
lb_types.Point(x=1489.581, y=183.934),
lb_types.Point(x=2278.306, y=256.885),
lb_types.Point(x=2428.197, y=200.437),
lb_types.Point(x=2560.0, y=335.419),
lb_types.Point(x=2557.386, y=503.165),
lb_types.Point(x=2320.596, y=503.103),
lb_types.Point(x=2156.083, y=628.943),
lb_types.Point(x=2161.111, y=785.519),
lb_types.Point(x=2002.115, y=894.647),
lb_types.Point(x=1838.456, y=877.874),
lb_types.Point(x=1436.53, y=874.636),
lb_types.Point(x=1411.403, y=758.579),
lb_types.Point(x=1353.853, y=751.74),
lb_types.Point(x=1345.264, y=453.461),
lb_types.Point(x=1426.011, y=421.129)
]))
polygon_annotation_ndjson = {
"name": "polygon",
"polygon": [
{"x": 1489.581, "y": 183.934},
{"x": 2278.306, "y": 256.885},
{"x": 2428.197, "y": 200.437},
{"x": 2560.0, "y": 335.419},
{"x": 2557.386, "y": 503.165},
{"x": 2320.596, "y": 503.103},
{"x": 2156.083, "y": 628.943},
{"x": 2161.111, "y": 785.519},
{"x": 2002.115, "y": 894.647},
{"x": 1838.456, "y": 877.874},
{"x": 1436.53, "y": 874.636},
{"x": 1411.403, "y": 758.579},
{"x": 1353.853, "y": 751.74},
{"x": 1345.264, "y": 453.461},
{"x": 1426.011, "y": 421.129},
{"x": 1489.581, "y": 183.934}
]
}
Segmentation mask
MaskData is mask data in a uint8 array of [H, W, 3]. You can also convert a polygon annotation or a 2D array to MaskData. You can also specify a URL to a cloud-hosted mask (it can be hosted on any cloud provider).
Mask limits
To be valid for import, masks must be smaller than:
- height: 9000 px
- width: 9000 px
# Identify what values in the numpy array correspond to the mask annotation
color = (0, 0, 0)
# convert a polygon to mask
im_height, im_width = 100,100 # need to provide the height and width of image
mask_data = lb_types.MaskData(arr=
polygon_annotation.value.draw(height=im_height,width=im_width,color=color))
# convert a 2D array to 3D array
arr_2d = np.zeros((100,100), dtype="uint8")
mask_data = lb_types.MaskData.from_2D_arr(arr_2d)
# a 3D array where 3rd axis is RGB values
mask_data = lb_types.MaskData(arr=np.zeros([400,450,3],dtype="uint8"))
# Python annotation
mask_annotation = lb_types.ObjectAnnotation(
name = "mask", # must match your ontology feature's name
value=lb_types.Mask(mask=mask_data, color=color),
)
mask_annotation_ndjson = {
"name": "mask",
"classifications": [],
"mask": {"instanceURI": "https://storage.labelbox.com/cjhfn5y6s0pk507024nz1ocys%2F1d60856c-59b7-3060-2754-83f7e93e0d01-1?Expires=1666901963361&KeyName=labelbox-assets-key-3&Signature=t-2s2DB4YjFuWEFak0wxYqfBfZA",
"colorRGB": (0, 0, 0)}
}
Point
point_annotation = lb_types.ObjectAnnotation(
name="point", # must match your ontology feature's name
value=lb_types.Point(x=1166.606, y=1441.768),
)
point_annotation_ndjson = {
"name": "point",
"classifications": [],
"point": {
"x": 1166.606,
"y": 1441.768
}
}
Polyline
polyline_annotation = lb_types.ObjectAnnotation(
name="polyline", # must match your ontology feature's name
value=lb_types.Line( # Coordinates for the keypoints in your polyline
points=[
lb_types.Point(x=2534.353, y=249.471),
lb_types.Point(x=2429.492, y=182.092),
lb_types.Point(x=2294.322, y=221.962),
lb_types.Point(x=2224.491, y=180.463),
lb_types.Point(x=2136.123, y=204.716),
lb_types.Point(x=1712.247, y=173.949),
lb_types.Point(x=1703.838, y=84.438),
lb_types.Point(x=1579.772, y=82.61),
lb_types.Point(x=1583.442, y=167.552),
lb_types.Point(x=1478.869, y=164.903),
lb_types.Point(x=1418.941, y=318.149),
lb_types.Point(x=1243.128, y=400.815),
lb_types.Point(x=1022.067, y=319.007),
lb_types.Point(x=892.367, y=379.216),
lb_types.Point(x=670.273, y=364.408),
lb_types.Point(x=613.114, y=288.16),
lb_types.Point(x=377.559, y=238.251),
lb_types.Point(x=368.087, y=185.064),
lb_types.Point(x=246.557, y=167.286),
lb_types.Point(x=236.648, y=285.61),
lb_types.Point(x=90.929, y=326.412)
]),
)
polyline_annotation_ndjson = {
"name": "polyline",
"classifications": [],
"line": [
{"x": 2534.353, "y": 249.471},
{"x": 2429.492, "y": 182.092},
{"x": 2294.322, "y": 221.962},
{"x": 2224.491, "y": 180.463},
{"x": 2136.123, "y": 204.716},
{"x": 1712.247, "y": 173.949},
{"x": 1703.838, "y": 84.438},
{"x": 1579.772, "y": 82.61},
{"x": 1583.442, "y": 167.552},
{"x": 1478.869, "y": 164.903},
{"x": 1418.941, "y": 318.149},
{"x": 1243.128, "y": 400.815},
{"x": 1022.067, "y": 319.007},
{"x": 892.367, "y": 379.216},
{"x": 670.273, "y": 364.408},
{"x": 613.114, "y": 288.16},
{"x": 377.559, "y": 238.251},
{"x": 368.087, "y": 185.064},
{"x": 246.557, "y": 167.286},
{"x": 236.648, "y": 285.61},
{"x": 90.929, "y": 326.412}
]
}
End-to-end example: Import pre-labels or ground truth
Whether you are importing annotations as pre-labels or as ground truth, the steps are very similar. Steps 5 and 6 (creating and importing the annotation payload) are where the process becomes slightly different and is explained below in detail.
Before you start
You will need to import these libraries to use the code examples in this section.
import labelbox as lb
import labelbox.data.annotation_types as lb_types
import uuid
import numpy as np
Replace with your API key
API_KEY = ""
client = lb.Client(API_KEY)
Step 1: Import data rows
To attach annotations to a data row, it must first be uploaded to Catalog. Here we create an example image data row in Catalog.
# send a sample image as batch to the project
global_key = "2560px-Kitano_Street_Kobe01s5s41102.jpeg"
test_img_url = {
"row_data": "https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg",
"global_key": global_key
}
dataset = client.create_dataset(name="image-demo-dataset")
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()
print(f"Errors: {task.errors}")
print(f"Failed data rows: {task.failed_data_rows}")
Step 2: Create an ontology
Your project should have the correct ontology set up with all the tools and classifications supported for your annotations. The value for the name
parameter should match the name
field in your annotations to ensure the correct feature schemas are matched.
Here is an example of creating an ontology programmatically for all the sample annotations above.
ontology_builder = lb.OntologyBuilder(
classifications=[ # list of classification objects
lb.Classification(class_type=lb.Classification.Type.RADIO,
name="radio_question",
options=[
lb.Option(value="first_radio_answer"),
lb.Option(value="second_radio_answer")
]),
lb.Classification(class_type=lb.Classification.Type.CHECKLIST,
name="checklist_question",
options=[
lb.Option(value="first_checklist_answer"),
lb.Option(value="second_checklist_answer")
]),
lb.Classification(class_type=lb.Classification.Type.TEXT,
name="free_text"),
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="nested_radio_question",
options=[
lb.Option("first_radio_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[lb.Option("first_sub_radio_answer")])
])
]),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="nested_checklist_question",
options=[
lb.Option(
"first_checklist_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[lb.Option("first_sub_checklist_answer")])
])
]),
],
tools=[ # List of Tool objects
lb.Tool(tool=lb.Tool.Type.BBOX, name="bounding_box"),
lb.Tool(tool=lb.Tool.Type.BBOX,
name="bbox_with_radio_subclass",
classifications=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[lb.Option(value="first_sub_radio_answer")]),
]),
lb.Tool(tool=lb.Tool.Type.POLYGON, name="polygon"),
lb.Tool(tool=lb.Tool.Type.SEGMENTATION, name="mask"),
lb.Tool(tool=lb.Tool.Type.POINT, name="point"),
lb.Tool(tool=lb.Tool.Type.LINE, name="polyline"),
lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name="relationship")
])
ontology = client.create_ontology("Image Annotation Import Demo",
ontology_builder.asdict(),
media_type=lb.MediaType.Image)
Step 3: Create a labeling project
Connect the ontology to the labeling project.
# create a project and configure the ontology
project = client.create_project(name="image-demo-project",
media_type=lb.MediaType.Image,
queue_mode=lb.QueueMode.Batch)
project.setup_editor(ontology)
Step 4: Send a batch of data rows to the project
batch = project.create_batch(
"image-demo-batch", # each batch in a project must have a unique name
global_keys=[global_key], # paginated collection of data row objects, list of data row ids or global keys
priority=1 # priority between 1(highest) - 5(lowest)
)
print(f"Batch: {batch}")
Step 5: Create the annotations payload
Create the annotations payload using the snippets of code shown above.
Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both approaches are described below with instructions to compose annotations into Labels attached to the data rows.
The resulting labels
and ndjson_labels
from each approach will include every annotation (created above) supported by the respective method.
# create a Label
labels = []
annotations = [
radio_annotation,
nested_radio_annotation,
checklist_annotation,
nested_checklist_annotation,
text_annotation,
bbox_annotation,
bbox_with_radio_subclass_annotation,
polygon_annotation,
mask_annotation,
point_annotation,
polyline_annotation,
bbox_source,
bbox_target,
relationship,
]
labels.append(
lb_types.Label(data=lb_types.ImageData(global_key=global_key),
annotations=annotations))
ndjson_labels = []
annotations = [
radio_annotation_ndjson,
nested_radio_annotation_ndjson,
nested_checklist_annotation_ndjson,
checklist_annotation_ndjson,
text_annotation_ndjson,
bbox_annotation_ndjson,
bbox_with_radio_subclass_ndjson,
polygon_annotation_ndjson,
mask_annotation_ndjson,
point_annotation_ndjson,
polyline_annotation_ndjson,
bbox_source_ndjson,
bbox_target_ndjson,
relationship_ndjson, ## Only supported for MAL imports
]
for annotation in annotations:
annotation.update({
"dataRow": {
"globalKey": global_key
},
})
ndjson_labels.append(annotation)
Step 6: Import the annotation payload
For both options, you can pass either the labels
or ndjson_labels
payload as the value for the predictions
or labels
parameter.
Option A: Upload to a labeling project as pre-labels (Model-assisted labeling)
# Upload MAL label for this data row in project
upload_job = lb.MALPredictionImport.create_from_objects(
client = client,
project_id = project.uid,
name="mal_job"+str(uuid.uuid4()),
predictions=labels)
print(f"Errors: {upload_job.errors}", )
print(f"Status of uploads: {upload_job.statuses}")
Option B: Upload to a labeling project as ground truth
Relationship annotations are not supported in label import jobs
# Upload label for this data row in project
upload_job = lb.LabelImport.create_from_objects(
client = client,
project_id = project.uid,
name="label_import_job"+str(uuid.uuid4()),
labels=labels)
print(f"Errors: {upload_job.errors}", )
print(f"Status of uploads: {upload_job.statuses}")