Metadata

Developer guide for creating, importing, exporting, and modifying metadata fields via the Python SDK.

Metadata types

Custom metadata fields must be one of the following data types:

  • String
  • Number
  • DateTime
  • Enum

Reserved fields

The following field names are reserved and cannot be used as custom metadata field names:

  • tag
  • split
  • captureDateTime
  • embedding

Construct metadata fields

To construct a metadata field, you must provide the Schema ID for the field and the value that will be uploaded. You can do this in two ways:

  • Option 1: Specify the metadata using the DataRowMetadataField object (comes with validation for metadata fields)
  • Option 2: Specify the metadata fields in dictionary format without declaring the DataRowMetadataField objects

The Metadata value attribute should not be null.

metadata_fields = []

## Construct a metadata field of string kind
tag_schema = metadata_ontology.get_by_name("tag")
tag_metadata_field = DataRowMetadataField(
  schema_id=tag_schema.uid,  # specify the schema id
  value="tag_string", # typed inputs
)
metadata_fields.append(tag_metadata_field)

# Construct an metadata field of datetime
datetime_schema = metadata_ontology.get_by_name("captureDateTime")
capture_datetime_field = DataRowMetadataField(
  name=datetime_schema.name,  # specify the schema id
  value=datetime.datetime.utcnow(), # typed inputs
)
metadata_fields.append(capture_datetime_field)

# # Construct a metadata field of Enums options. You can import multiple options.
test_schema = metadata_ontology.get_by_name("split")["test"]
test_schema_field = DataRowMetadataField(
  schema_id=test_schema.parent,  # specify the schema id
  value=test_schema.uid, # typed inputs
)
metadata_fields.append(test_schema_field)

valid_schema = metadata_ontology.get_by_name("split")["valid"]
valid_schema_field = DataRowMetadataField(
  schema_id=valid_schema.parent,  # specify the schema id
  value=valid_schema.uid, # typed inputs
)
metadata_fields.append(valid_schema_field)

metadata_fields = []

## Construct a metadata field of string kind
tag_schema = metadata_ontology.get_by_name("tag")
metadata_fields.append({"name": tag_schema.name, "value": "tag_value"})

# Construct an metadata field of datetime
datetime_schema = metadata_ontology.get_by_name("captureDateTime")
metadata_fields.append({"name": datetime_schema.name, "value": datetime.datetime.utcnow()})

## Construct a metadata field of Enums options. You can import multiple options.
train_schema = metadata_ontology.get_by_name("split")["valid"]
metadata_fields.append({"schema_id": train_schema.parent, "value": train_schema.uid})

test_schema = metadata_ontology.get_by_name("split")["test"]
metadata_fields.append({"schema_id": test_schema.parent, "value": test_schema.uid})

Create custom metadata schema

import labelbox 
from labelbox.schema.data_row_metadata import DataRowMetadataKind

client = labelbox.Client(api_key="LABELBOX_API_KEY")
metadata_ontology = client.get_data_row_metadata_ontology()

# create a custom metadata schema (string, number, datetime, embedding)
metadata_schema = metadata_ontology.create_schema(name="metadata_name", kind=DataRowMetadataKind.string)
# You can create other metadata schema kinds, just change the kind to be one of: 
# DataRowMetadataKind.number, DataRowMetadataKind.datetime, DataRowMetadataKind.embedding

# get the schema id 
schema_id = metadata_schema.uid

Upload data rows with metadata

Custom metadata field limits vary according to your subscription; for details, see Limits.

data_row = {
  "row_data": "https://storage.googleapis.com/labelbox-sample-datasets/Docs/basic.jpg",
  "global_key": "metadata_tutorial",
  "metadata_fields": metadata_fields
}

dataset = client.create_dataset(name="Create data row with metadata")
task = dataset.create_data_rows([data_row])
task.wait_till_done()

Get metadata schema

# you can look up a schema by name. 
metadata_schema = metadata_ontology.get_by_name("tag")
metadata_schema = metadata_ontology.get_by_name("enum_metadata_name")


# check the schema 
print(metadata_schema)
schema_id = metadata_schema.uid

Get metadata fields (ontology)

## Fetch metadata schema ontology. A Labelbox workspace has a single metadata ontology.
metadata_ontology = client.get_data_row_metadata_ontology()

# List all available fields
metadata_ontology.fields

Get metadata fields

datarow = next(dataset.data_rows())
for metadata_field in datarow.metadata_fields:
  print(metadata_field['name'], ":", metadata_field['value'])

Result:

tag : custom_tag
split : train
captureDateTime : 2023-04-04T15:24:37.229417Z

Bulk export data rows with metadata

export_params= {
  "performance_details": True,
  "label_details": True,
  "metadata_fields": True 
}

export_task = dataset.export(params=export_params)
export_task.wait_till_done()

if export_task.has_errors():
    export_task.get_buffered_stream(
                           		 stream_type=lb.StreamType.ERRORS).start(
                               stream_handler=lambda error: print(error))

if export_task.has_result():
  stream = export_task.get_buffered_stream()

  for data_row in stream:
    print(data_row.json)

Result:

{
  "data_row": {
    "id": "clflxqzty07fj077qa3dd4v27",
    "global_key": "metadata_tutorial",
    "row_data": "https://storage.googleapis.com/labelbox-sample-datasets/Docs/basic.jpg"
  },
  "media_attributes": {
    "height": 1285,
    "width": 2258,
    "mime_type": "image/jpeg"
  },
  "metadata_fields": [{
    "schema_id": "cko8s9r5v0001h2dk9elqdidh",
    "schema_name": "tag",
    "value": "tag_string"
  }, {
    "schema_id": "cko8sbczn0002h2dkdaxb5kal",
    "schema_name": "split",
    "value": [{
      "schema_id": "cko8sc2yr0004h2dk69aj5x63",
      "schema_name": "valid"
    }, {
      "schema_id": "cko8scbz70005h2dkastwhgqt",
      "schema_name": "test"
    }]
  }, {
    "schema_id": "cko8sdzv70006h2dk8jg64zvb",
    "schema_name": "captureDateTime",
    "value": "2023-03-24T02:40:40.832576+00:00"
  }]
}

Export metadata by data row ID

You can bulk export metadata by data row with the SDK.

data_row_ids = ['<data_row_id>']
global_keys = ['<global_key>']

#The data row identifiers methods (lb.DataRowIds and lb.GlobalKeys)  validate whether the provided ID is a global key or a data row ID. 
#Additionally, they ensure that all IDs from the list provided are unique

datarow_identifiers = lb.DataRowIds(data_row_ids)
global_key_identifiers = lb.GlobalKeys(global_keys)

# Use one of the identifiers
mdo.bulk_export(data_row_ids=global_key_identifiers)
# mdo.bulk_export(data_row_ids=datarow_identifiers)

Delete metadata fields from a data row

global_key = '<global_key>'
schema_ids_to_delete =['<metadata_schema_id>']
data_row_id = '<data_row_id>'

deletions = [
    lb.DeleteDataRowMetadata(data_row_id=lb.GlobalKey(global_key), fields=schema_ids_to_delete)
    ]

# Delete the specified metadata on the data row
mdo.bulk_delete(deletes=deletions)

Upsert metadata to existing data rows

Labelbox supports individual or bulk metadata upsert of data rows. Metadata overwrites occur on a per-field basis.

tag_schema = metadata_ontology.get_by_name("tag")

# Construct a string field
field = DataRowMetadataField(
    schema_id=tag_schema.uid,  # specify the schema id
    value="updated", # typed inputs
)

# Completed object ready for import
metadata_payload = DataRowMetadata(
    global_key="<global key>",  # optionally, set the argument to data_row_id to use a data row ID
    fields=[field]
)

# Provide a list of DataRowMetadata objects to upload
metadata_ontology.bulk_upsert([metadata_payload])

Update metadata schema

You can update any custom metadata schema's name. However, the type cannot be modified. You also cannot modify the names of reserved fields.

# update a metadata schema's name
metadata_schema = metadata_ontology.update_schema(name="metadata_name", new_name="metadata_name_updated")

# Enum metadata schema is a bit different since it contains options.
# create an Enum metadata with options
enum_schema = metadata_ontology.create_schema(name="enum_metadata_name", kind=DataRowMetadataKind.enum,
                                              options=["option 1", "option 2"])

# update an Enum metadata schema's name, similar to other metadata schema types
enum_schema = metadata_ontology.update_schema(name="enum_metadata_name", new_name="enum_metadata_name_updated")

# update an Enum metadata schema option's name, this only applies to Enum metadata schema.
enum_schema = metadata_ontology.update_enum_option(name="enum_metadata_name_option_updated", option="option 1",
                                                   new_option="option 3")

Delete metadata schema

You can delete a metadata schema by name.

status = metadata_ontology.delete_schema(name=metadata_schema.name)
# returns True if successfully deleted