Import video annotations

How to import annotations on video data and sample import formats.

Open this Colab for an interactive tutorial on importing annotations on video assets.

Supported annotations

To import annotations in Labelbox, you need to create the annotations payload. In this section, we provide this payload for every supported annotation type.

πŸ“˜

Tool features are frame-based

Global and frame-based classifications are supported on video assets. However, tool annotations are required to be frame-based.

Bounding box (frame-based)

# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation class
bbox_dm = {
  "top":617,
  "left":1371,
  "height":419,
  "width":505
}

# Python Annotation
bbox_annotation = [
  lb_types.VideoObjectAnnotation(
    name = "bbox_video",
    keyframe=True,
    frame=13,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm["left"], y=bbox_dm["top"]), # x = left, y = top
          end=lb_types.Point(x=bbox_dm["left"] + bbox_dm["width"], y=bbox_dm["top"] + bbox_dm["height"]), # x= left + width , y = top + height
      )
  ),
  lb_types.VideoObjectAnnotation(
    name = "bbox_video",
    keyframe=True,
    frame=19,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm["left"], y=bbox_dm["top"]),
          end=lb_types.Point(x=bbox_dm["left"] + bbox_dm["width"], y=bbox_dm["top"] + bbox_dm["height"]),
      )
  )
]
bbox_dm = {
  "top":617,
  "left":1371,
  "height":419,
  "width":505
}

bbox_annotation_ndjson = {
    "name" : "bbox_video",
    "segments" : [{
        "keyframes" : [
            {
              "frame": 13,
              "bbox" : bbox_dm
           },
           {
              "frame": 19,
              "bbox" : bbox_dm
           }
        ]
      }
    ]
}

Point (frame-based)

point_annotation = [
    lb_types.VideoObjectAnnotation(
        name = "point_video",
        keyframe=True,
        frame=17,
        value = lb_types.Point(x=660.134, y=407.926),
        )
]
point_annotation_ndjson = {
    "name": "point_video", 
    "segments": [{
        "keyframes": [{
            "frame": 17,
            "point" : {
                "x": 660.134 ,
                "y": 407.926
            }
        }]
    }] 
}

Polyline (frame-based)

polyline_annotation = [
  lb_types.VideoObjectAnnotation(
    name = "line_video_frame",
    keyframe=True,
    frame=5,
    segment_index=0,
    value=lb_types.Line(
          points=[lb_types.Point(x=680, y=100), lb_types.Point(x=100, y=190)]
      )
  ),
  lb_types.VideoObjectAnnotation(
    name = "line_video_frame",
    keyframe=True,
    frame=20,
    segment_index=0,
    value=lb_types.Line(
          points=[lb_types.Point(x=680, y=100), lb_types.Point(x=100, y=190)]
      )
  ),
  lb_types.VideoObjectAnnotation(
    name = "line_video_frame",
    keyframe=True,
    frame=24,
    segment_index=1,
    value=lb_types.Line(
          points=[lb_types.Point(x=680, y=100), lb_types.Point(x=100, y=190)]
      )
  ),
  lb_types.VideoObjectAnnotation(
    name = "line_video_frame",
    keyframe=True,
    frame=45,
    segment_index=1,
    value=lb_types.Line(
          points=[lb_types.Point(x=680, y=100), lb_types.Point(x=100, y=190)]
      )
  )

]
polyline_frame_annotation_ndjson = {
  "name": "line_video_frame",
  "segments": [
      {
        "keyframes": [
          {
            "frame": 5,
            "line": [{
              "x": 680,
              "y": 100
            },{
              "x": 100,
              "y": 190
            },{
              "x": 190,
              "y": 220
            }]
          },
          {
            "frame": 20,
            "line": [{
              "x": 680,
              "y": 180
            },{
              "x": 100,
              "y": 200
            },{
              "x": 200,
              "y": 260
            }]
          }
        ]
      },
      {
        "keyframes": [
          {
            "frame": 24,
            "line": [{
              "x": 300,
              "y": 310
            },{
              "x": 330,
              "y": 430
            }]
          },
          {
            "frame": 45,
            "line": [{
              "x": 600,
              "y": 810
            },{
              "x": 900,
              "y": 930
            }]
          }
        ]
      }
    ]
}

Classification: Radio (frame-based)

radio_annotation = [
    lb_types.VideoClassificationAnnotation(
        name="radio_class",
        frame=9,
        segment_index=0,
        value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "first_radio_answer"))
    ),
    lb_types.VideoClassificationAnnotation(
        name="radio_class",
        frame=15,
        segment_index=0,
        value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "first_radio_answer"))
    )
]

frame_radio_classification_ndjson = {
    "name": "radio_class",
    "answer": { "name": "first_radio_answer", "frames": [{"start": 9, "end": 15}]}
}

Classification: Checklist (frame-based)

checklist_annotation= [
    lb_types.VideoClassificationAnnotation(
        name="checklist_class",
        frame=29,
        segment_index=0,
        value=lb_types.Checklist(
            answer = [
                lb_types.ClassificationAnswer(
                    name = "first_checklist_answer"
                ),
                lb_types.ClassificationAnswer(
                    name = "second_checklist_answer"
                )
            ]
            )
        ),
    lb_types.VideoClassificationAnnotation(
        name="checklist_class",
        frame=35,
        segment_index=0,
        value=lb_types.Checklist(
            answer = [
                lb_types.ClassificationAnswer(
                    name = "first_checklist_answer"
                ),
                 lb_types.ClassificationAnswer(
                    name = "second_checklist_answer"
                )
            ]
            )
        )
]
frame_checklist_classification_ndjson = {
    "name": "checklist_class",
    "answer": [
        { "name": "first_checklist_answer" , "frames": [{"start": 29, "end": 35 }]},
        { "name": "second_checklist_answer" , "frames": [{"start": 29, "end": 35 }]}
  ]
}

Classification: Checklist (global)

# Use ClassificationAnnotation instead of VideoClassification for global annotations
global_checklist_annotation=[lb_types.ClassificationAnnotation(
  name="checklist_class_global", 
  value=lb_types.Checklist(
      answer = [
        lb_types.ClassificationAnswer(
            name = "first_checklist_answer"
        ), 
        lb_types.ClassificationAnswer(
            name = "second_checklist_answer"
        )
      ]
    )
 )]
global_checklist_classification_ndjson = {
    "name": "checklist_class_global",
    "answer": [
        { "name": "first_checklist_answer" },
        { "name": "second_checklist_answer"}
  ]
}

Classification: Radio (global)

# Use ClassificationAnnotation instead of VideoClassification for global annotations
global_radio_annotation = [lb_types.ClassificationAnnotation(
    name="radio_class_global",
    value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "first_radio_answer"))
)]
global_radio_classification_ndjson = {
    "name": "radio_class_global",
    "answer": { "name": "first_radio_answer"}
}

Classification: Nested Checklist (global)

nested_checklist_annotation = [lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer"
          )]
        ))
      ]
    )]
  )
)]
nested_checklist_annotation_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer",
      "classifications" : [
        {
          "name": "sub_checklist_question",
          "answer": {"name": "first_sub_checklist_answer"}
        }
      ]
  }]
}

Classification: Nested Radio (global)

nested_radio_annotation =[lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer"
            )
          )
        )
      ]
    )
  )
)]
nested_radio_annotation_ndjson = {
  "name": "nested_radio_question",
  "answer": {"name": "first_radio_answer"},
  "classifications" : [
    {"name": "sub_radio_question", "answer": {"name": "first_sub_radio_answer"}}
   ]
}

Classification: Free-form text

text_annotation = lb_types.ClassificationAnnotation(
  name="free_text", 
  value=lb_types.Text(answer="sample text")
)
text_annotation_ndjson = {
  "name": "free_text",
  "answer": "sample text",
}

Bounding box with sub-classifications (frame-based)

bbox_dm2 = {
  "top": 146.0,
  "left": 98.0,
  "height": 382.0,
  "width": 341.0
}

# Python Annotation
frame_bbox_with_checklist_subclass_annotation = [
  lb_types.VideoObjectAnnotation(
    name = "bbox_class",
    keyframe=True,
    frame=10,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm2["left"], y=bbox_dm2["top"]), # x = left, y = top
          end=lb_types.Point(x=bbox_dm2["left"] + bbox_dm2["width"], y=bbox_dm2["top"] + bbox_dm2["height"]), # x= left + width , y = top + height
      )
  ),
  lb_types.VideoObjectAnnotation(
    name = "bbox_class",
    keyframe=True,
    frame=11,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm2["left"], y=bbox_dm2["top"]),
          end=lb_types.Point(x=bbox_dm2["left"] + bbox_dm2["width"], y=bbox_dm2["top"] + bbox_dm2["height"]),
      ),
    classifications=[
                lb_types.ClassificationAnnotation(
                    name="checklist_class",
                    value=lb_types.Checklist(answer=[lb_types.ClassificationAnswer(
                        name="first_checklist_answer")])
                )
            ]
  ),
  lb_types.VideoObjectAnnotation(
    name = "bbox_class",
    keyframe=True,
    frame=13,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm2["left"], y=bbox_dm2["top"]),
          end=lb_types.Point(x=bbox_dm2["left"] + bbox_dm2["width"], y=bbox_dm2["top"] + bbox_dm2["height"]),
      ),
    classifications=[
              lb_types.ClassificationAnnotation(
                  name="checklist_class",
                  value=lb_types.Checklist(answer=[lb_types.ClassificationAnswer(
                      name="second_checklist_answer")])
              )
          ]
  )
]
bbox_dm2 = {
  "top": 146.0,
  "left": 98.0,
  "height": 382.0,
  "width": 341.0
}

frame_bbox_with_checklist_subclass_annotation_ndjson = {
    "name": "bbox_class",
    "segments": [{
        "keyframes": [
            {
            "frame": 10,
            "bbox": bbox_dm2
          },
          {
          "frame": 11,
            "bbox": bbox_dm2,
            "classifications": [
              {
                "name": "checklist_class",
                "answer": [{"name": "first_checklist_answer"}]
              }
            ]
          },
          {
          "frame": 13,
            "bbox": bbox_dm2,
            "classifications": [
              {
                "name": "checklist_class",
                "answer": [{"name": "second_checklist_answer"}]
              }
            ]
          }
        ]
      }
    ]
}

Masks (frame-based)

url = "https://storage.googleapis.com/labelbox-datasets/image_sample_data/color_mask.png"
response = requests.get(url)

video_mask_annotation_bytes = [
    lb_types.VideoMaskAnnotation(
        frames=[
            lb_types.MaskFrame(
                index=20,
                im_bytes=response.content # Instead of bytes you could also pass an instance URI : instance_uri=url
            )
        ],
        instances=[
            lb_types.MaskInstance(color_rgb=(255, 255, 1), name= "video_mask")
        ]
    )
]

# same mask on multiple frames (note that tracking is not supported with masks tools)
video_mask_annotation_bytes_2 = [
    lb_types.VideoMaskAnnotation(
        frames=[
            lb_types.MaskFrame(
                index=23,
                im_bytes=response.content
            ),
            lb_types.MaskFrame(
                index=20,
                im_bytes=response.content
            )
        ],
        instances=[
            lb_types.MaskInstance(color_rgb=(255, 1, 1), name= "video_mask")
        ]
    )
]

url = "https://storage.googleapis.com/labelbox-datasets/image_sample_data/color_mask.png"
response = requests.get(url)
img_bytes = base64.b64encode(response.content).decode('utf-8')

video_mask_ndjson_bytes = {
    'masks': {
      'frames': [
          {
              "index" : 20,
              "imBytes": img_bytes,
          }
      ],
      'instances': [
          {
              "colorRGB" : [255, 255, 1],
              "name" : "video_mask"
          }
      ]
    }
 }

video_mask_ndjson_bytes_2 = {
    'masks': {
      'frames': [
          {
              "index" : 20,
              "imBytes": img_bytes,
          },
          {
              "index" : 23,
              "imBytes": img_bytes,
          }
      ],
      'instances': [
          {
              "colorRGB" : [255, 1, 1],
              "name" : "video_mask"
          }
      ]
    }
 }

Multiple instances of bounding box annotations in the same frame


bbox_dm = {
  "top":617,
  "left":1371,
  "height":419,
  "width":505
}

# Fist instance of bounding box ranging from frame 22 to 27
bbox_annotation_1 = [
  lb_types.VideoObjectAnnotation(
    name = "bbox_video",
    keyframe=True,
    frame=22,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm["left"], y=bbox_dm["top"]), # x = left, y = top
          end=lb_types.Point(x=bbox_dm["left"] + bbox_dm["width"], y=bbox_dm["top"] + bbox_dm["height"]), # x= left + width , y = top + height
      )
  ),
  lb_types.VideoObjectAnnotation(
    name = "bbox_video",
    keyframe=True,
    frame=27,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm["left"], y=bbox_dm["top"]),
          end=lb_types.Point(x=bbox_dm["left"] + bbox_dm["width"], y=bbox_dm["top"] + bbox_dm["height"]),
      )
  )
]

# Second instance of bounding box ranging from frame 22 to 27
bbox_annotation_2 = [
  lb_types.VideoObjectAnnotation(
    name = "bbox_video",
    keyframe=True,
    frame=22,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm2["left"], y=bbox_dm2["top"]),
          end=lb_types.Point(x=bbox_dm2["left"] + bbox_dm2["width"], y=bbox_dm2["top"] + bbox_dm2["height"]),
      )
  ),
  lb_types.VideoObjectAnnotation(
    name = "bbox_video",
    keyframe=True,
    frame=27,
    segment_index=0,
    value = lb_types.Rectangle(
          start=lb_types.Point(x=bbox_dm2["left"], y=bbox_dm2["top"]),
          end=lb_types.Point(x=bbox_dm2["left"] + bbox_dm2["width"], y=bbox_dm2["top"] + bbox_dm2["height"]),
      )
  )
]
bbox_dm = {
  "top":617,
  "left":1371,
  "height":419,
  "width":505
}
# Fist instance of bounding box ranging from frame 22 to 27
bbox_frame_annotation_ndjson = {
    "name": "bbox_video",
    "segments": [{
        "keyframes": [
          {
            "frame": 22,
            "bbox":  bbox_dm
          },
          {
            "frame": 27,
            "bbox": bbox_dm2
          }

        ]
      }]
}
# Second instance of bounding box ranging from frame 22 to 27
bbox_frame_annotation_ndjson2 = {
      "name": "bbox_video",
      "segments": [{
          "keyframes": [
            {
              "frame": 22,
              "bbox": bbox_dm
            },
            {
              "frame": 27,
              "bbox": bbox_dm2
            }
          ]
        }]
    }

End-to-end example: Import pre-labels or ground truth

Whether you are importing annotations as pre-labels or as ground truth, the steps are very similar. Step 6 (importing the annotation payload) is where the process becomes slightly different and is explained below in detail.

Before you start

You will need to import these libraries to use the code examples in this section.

import labelbox as lb
import labelbox.types as lb_types
import uuid
import base64
import requests

Replace with your API key

API_KEY = ""
client = lb.Client(API_KEY)

Step 1: Import data rows

To attach annotations to a data row, it must first be uploaded to Catalog. Here we create an example video data row in Catalog.

global_key = "sample-video-jellyfish.mp4"
asset = {
    "row_data": "https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4",
    "global_key": global_key,
    "media_type": "VIDEO"
 }

dataset = client.create_dataset(
    name="video_demo_dataset",
    iam_integration=None # If this argument is removed, labelbox will use the default integration for your organization.
)
task = dataset.create_data_rows([asset])
task.wait_till_done()
print("Errors :",task.errors)
print("Failed data rows:" ,task.failed_data_rows)

Step 2: Create/select an ontology

Your project should have the correct ontology set up with all the tools and classifications supported for your annotations, and the tool and classification name should match the name fields in your annotations to ensure the correct feature schemas are matched.

For example, when we create the bounding box annotation above, we provided the namebbox_video. Now, when we set up our ontology, we must ensure that the name of our bounding box tool is also bbox_video. The same alignment must hold true for the other tools and classifications we create in our ontology.

Here is an example of creating an ontology programmatically for all the sample annotations above.

ontology_builder = lb.OntologyBuilder(
    tools=[
        lb.Tool(tool=lb.Tool.Type.BBOX, name="bbox_video"),
        lb.Tool(tool=lb.Tool.Type.POINT, name="point_video"),
        lb.Tool(tool=lb.Tool.Type.LINE, name="line_video_frame"),
        lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name="video_mask"),
        lb.Tool(
          tool=lb.Tool.Type.BBOX, name="bbox_class",
          classifications=[
            lb.Classification(
              class_type=lb.Classification.Type.RADIO,
              name="checklist_class",
              scope = lb.Classification.Scope.INDEX,
              options=[
                lb.Option(value="first_checklist_answer"),
                lb.Option(value="second_checklist_answer")
              ]
            )
          ]
        )
    ],
    classifications=[
        lb.Classification(
            class_type=lb.Classification.Type.CHECKLIST,
            name="checklist_class",
            scope = lb.Classification.Scope.INDEX, ## Need to defined scope for frame classifications
            options=[
                lb.Option(value="first_checklist_answer"),
                lb.Option(value="second_checklist_answer")
            ]
        ),
        lb.Classification(
            class_type=lb.Classification.Type.RADIO,
            name="radio_class",
            scope = lb.Classification.Scope.INDEX,
            options=[
                lb.Option(value="first_radio_answer"),
                lb.Option(value="second_radio_answer")
            ]
        ),
         lb.Classification(
              class_type=lb.Classification.Type.RADIO,
              name="nested_radio_question",
              options=[
                  lb.Option("first_radio_answer",
                        options=[
                            lb.Classification(
                                class_type=lb.Classification.Type.RADIO,
                                name="sub_radio_question",
                                options=[lb.Option("first_sub_radio_answer")]
                            )
                        ]
                  )
              ]
        ),
        lb.Classification(
          class_type=lb.Classification.Type.CHECKLIST,
          name="nested_checklist_question",
          options=[
              lb.Option("first_checklist_answer",
                options=[
                  lb.Classification(
                      class_type=lb.Classification.Type.CHECKLIST,
                      name="sub_checklist_question",
                      options=[lb.Option("first_sub_checklist_answer")]
                  )
              ]
            )
          ]
        ),
        lb.Classification(
          class_type=lb.Classification.Type.RADIO,
          name="radio_class_global",
          options=[
                lb.Option(value="first_radio_answer"),
                lb.Option(value="second_radio_answer")
            ]
        ),
        lb.Classification(
          class_type=lb.Classification.Type.CHECKLIST,
          name="checklist_class_global",
          options=[
                lb.Option(value="first_checklist_answer"),
                lb.Option(value="second_checklist_answer")
          ]
        ),
        lb.Classification(
          class_type=lb.Classification.Type.TEXT,
          name="free_text"
        )
    ]
)

ontology = client.create_ontology("Video Annotation Import Demo Ontology",
                                  ontology_builder.asdict(),
                                  media_type=lb.MediaType.Video)

Step 3: Create a labeling project

Connect the ontology to the labeling project.

project = client.create_project(name="Video Annotation Import Demo",
                                    media_type=lb.MediaType.Video)

## connect ontology to your project
project.setup_editor(ontology)

Step 4: Send a batch of data rows to the project

batch = project.create_batch(
  "first-batch-video-demo2", # Each batch in a project must have a unique name
  global_keys=[global_key], # A paginated collection of data row objects, a list of data rows or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

print("Batch: ", batch)

Step 5: Create the annotations payload

Create the annotations payload using the snippets of code shown above.

Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types.

label = []
annotations_list = [
          checklist_annotation,
          radio_annotation,
          bbox_annotation,
          frame_bbox_with_checklist_subclass_annotation,
          bbox_annotation_1,
          bbox_annotation_2,
          point_annotation,
          polyline_annotation,
          global_checklist_annotation,
          global_radio_annotation,
          nested_checklist_annotation,
          nested_radio_annotation,
          text_annotation,
          video_mask_annotation_bytes,
          video_mask_annotation_bytes_2
      ]

for annotation in annotations_list:
    label.append(
        lb_types.Label(
            data=lb_types.VideoData(global_key=global_key),
            annotations = annotation
        )
    )

label_ndjson = []

annotations_list_ndjson = [
    point_annotation_ndjson,
    bbox_annotation_ndjson,
    polyline_frame_annotation_ndjson,
    frame_checklist_classification_ndjson,
    frame_radio_classification_ndjson,
    nested_radio_annotation_ndjson,
    nested_checklist_annotation_ndjson,
    frame_bbox_with_checklist_subclass_annotation_ndjson,
    global_radio_classification_ndjson,
    global_checklist_classification_ndjson,
    text_annotation_ndjson,
    bbox_frame_annotation_ndjson,
    bbox_frame_annotation_ndjson2,
    video_mask_ndjson_bytes,
    video_mask_ndjson_bytes_2,

]

for annotation in annotations_list_ndjson:
  annotation.update({
      "dataRow": {
          "globalKey": global_key
      }
  })
  label_ndjson.append(annotation)

Step 6: Import the annotation payload

Option A: Upload to a labeling project as pre-labels (Model-assisted labeling)

# Upload MAL label for this data row in project
upload_job_mal = lb.MALPredictionImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="mal_import_job-" + str(uuid.uuid4()),
    predictions=label)

upload_job_mal.wait_until_done()
print("Errors:", upload_job_mal.errors)
print("Status of uploads: ", upload_job_mal.statuses)
print("   ")

🚧

For this demo only run MAL or ground truth import

Executing ground truth immediately after running an MAL job may result in the mask being overridden.

Option B: Upload to a labeling project as ground truth

# For this demo either run MAL or Ground truth import, not both.

upload_job_label_import = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name = "label_import_job-" + str(uuid.uuid4()),
    labels=label
)

upload_job_label_import.wait_until_done()
print("Errors:", upload_job_label_import.errors)
print("Status of uploads: ", upload_job_label_import.statuses)
print("   ")