Overview
In this tutorial, you will:
- Create a dataset in Labelbox
- Import model predictions, custom metadata, and ground truth

Notebook
Code
Import libraries
!pip3 install labelbox[data]
import labelbox
from labelbox.schema.ontology import OntologyBuilder, Tool, Classification,Option
from labelbox.schema.annotation_import import MALPredictionImport
from labelbox.data.serialization import NDJsonConverter
from labelbox.schema.annotation_import import LabelImport
from labelbox import LabelingFrontend
from labelbox.data.annotation_types import (
Label,
Point,
LabelList,
ImageData,
Rectangle,
ObjectAnnotation,
)
from labelbox.schema.data_row_metadata import (
DataRowMetadata,
DataRowMetadataField,
DeleteDataRowMetadata,
DataRowMetadataKind
)
import requests
import json
import os
import time
from tqdm.notebook import tqdm
import datetime
import random
Setup client and functions
## Generate API key: https://app.labelbox.com/account/api-keys
LB_API_KEY = ""
client = labelbox.Client(LB_API_KEY)
DATA_ROWS = "https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_datarows.json"
ANNOTATIONS = "https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_annotations.json"
## Generic data download function
def download_files(filemap):
path, uri = filemap
## Download data
if not os.path.exists(path):
r = requests.get(uri, stream=True)
if r.status_code == 200:
with open(path, 'wb') as f:
for chunk in r:
f.write(chunk)
return path
Download a public dataset
download_files(("data_rows.json", DATA_ROWS))
download_files(("annotations.json", ANNOTATIONS))
with open('data_rows.json', 'r') as fp:
data_rows = json.load(fp)
with open('annotations.json', 'r') as fp:
annotations = json.load(fp)
Create a dataset
dataset = client.create_dataset(name="Geospatial vessel detection")
Import Data Rows and metadata
# Here is an example of adding two metadata fields to your Data Rows: a "captureDateTime" field with datetime value, and a "tag" field with string value
metadata_ontology = client.get_data_row_metadata_ontology()
datetime_schema_id = metadata_ontology.reserved_by_name["captureDateTime"].uid
tag_schema_id = metadata_ontology.reserved_by_name["tag"].uid
tag_items = ["WorldView-1", "WorldView-2", "WorldView-3", "WorldView-4"]
for datarow in tqdm(data_rows):
dt = datetime.datetime.utcnow() + datetime.timedelta(days=random.random()*30) # this is random datetime value
tag_item = random.choice(tag_items) # this is a random tag value
# Option 1: Specify metadata with a list of DataRowMetadataField. This is the recommended option since it comes with validation for metadata fields.
metadata_fields = [
DataRowMetadataField(schema_id=datetime_schema_id, value=dt),
DataRowMetadataField(schema_id=tag_schema_id, value=tag_item)
]
# Option 2: Uncomment to try. Alternatively, you can specify the metadata fields with dictionary format without declaring the DataRowMetadataField objects. It is equivalent to Option 1.
# metadata_fields = [
# {"schema_id": datetime_schema_id, "value": dt},
# {"schema_id": tag_schema_id, "value": tag_item}
# ]
datarow["metadata_fields"] = metadata_fields
task = dataset.create_data_rows(data_rows)
task.wait_till_done()
Examine a data row
datarow = next(dataset.data_rows())
print(datarow)
Setup a labeling project
ontology = OntologyBuilder()
for tool in annotations['categories']:
print(tool['name'])
ontology.add_tool(Tool(tool = Tool.Type.BBOX, name = tool['name']))
ontology = client.create_ontology("Vessel detection ontology", ontology.asdict())
project = client.create_project(name = "Vessel detection")
project.setup_editor(ontology)
ontology_from_project = OntologyBuilder.from_project(project)
Prepare and queue batch of data rows
# Randomly select 200 Data Row Objects (optional)
sampled_data_rows = random.sample(list(dataset.export_data_rows()), 200)
batch = project.create_batch(
"first-batch",# Each batch in a project must have a unique name
sampled_data_rows, #sample of a paginated collection of data row objects
5 # priority between 1(Highest) - 5(lowest)
)
Process ground truth annotations for import
queued_data_rows = project.export_queued_data_rows()
ground_truth_list = LabelList()
for datarow in queued_data_rows:
annotations_list = []
folder = datarow['externalId'].split("/")[0]
id = datarow['externalId'].split("/")[1]
if folder == "positive_image_set":
for image in annotations['images']:
if (image['file_name']==id):
for annotation in annotations['annotations']:
if annotation['image_id'] == image['id']:
bbox = annotation['bbox']
id = annotation['category_id'] - 1
class_name = ontology_from_project.tools[id].name
annotations_list.append(ObjectAnnotation(
name = class_name,
value = Rectangle(start = Point(x = bbox[0], y = bbox[1]), end = Point(x = bbox[2]+bbox[0], y = bbox[3]+bbox[1])),
))
image = ImageData(uid = datarow['id'])
ground_truth_list.append(Label(data = image, annotations = annotations_list))
Import ground truth annotations
ground_truth_list.assign_feature_schema_ids(OntologyBuilder.from_project(project))
ground_truth_ndjson = list(NDJsonConverter.serialize(ground_truth_list))
start_time = time.time()
## Upload annotations
upload_task = LabelImport.create_from_objects(client, project.uid, "geospatial-import-job-1", ground_truth_ndjson)
print(upload_task)
#Wait for upload to finish (Will take up to five minutes)
upload_task.wait_until_done()
print(upload_task.errors)
print("--- Finished in %s mins ---" % ((time.time() - start_time)/60))