Import audio annotations

How to import annotations on audio data and sample import formats.

Open In Colab

Overview

To import annotations in Labelbox, you need to create an annotations payload. In this section, we provide this payload for every supported annotation type.

Labelbox supports two formats for the annotations payload:

  • Python annotation types (recommended)
  • NDJSON

Radio (single choice)

radio_annotation = lb_types.ClassificationAnnotation(
    name="radio_question",
    value=lb_types.Radio(answer = 
        lb_types.ClassificationAnswer(name = "first_radio_answer")
    )
)
radio_annotation_ndjson = {
  "name": "radio_question",
  "answer": {"name": "first_radio_answer"}
}

Checklist (multiple choice)

checklist_annotation = lb_types.ClassificationAnnotation(
    name="checklist_question",
    value=lb_types.Checklist(answer = [
        lb_types.ClassificationAnswer(name = "first_checklist_answer"),
        lb_types.ClassificationAnswer(name = "second_checklist_answer"),
        lb_types.ClassificationAnswer(name = "third_checklist_answer")
    ])
  )
checklist_annotation_ndjson = {
  "name": "checklist_question",
  "answer": [
    {"name": "first_checklist_answer"},
    {"name": "second_checklist_answer"},
    {"name": "third_checklist_answer"},
  ]
}

Free-Form Text

text_annotation = lb_types.ClassificationAnnotation(
    name = "free_text", 
    value = lb_types.Text(answer="sample text")
)
text_annotation_ndjson = {
  "name": "free_text",
  "answer": "sample text",
}

Nested Classifications

nested_radio_annotation = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer"
            )
          )
        )
      ]
    )
  )
)

nested_checklist_annotation = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer"
          )]
        ))
      ]
    )]
  )
)
nested_radio_annotation_ndjson= {
  'name': 'nested_radio_question',
  'answer': {
      'name': 'first_radio_answer',
      'classifications': [
        {
          'name':'sub_radio_question',
          'answer': { 'name' : 'first_sub_radio_answer'}
        }
      ]
    }
}

nested_checklist_annotation_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer", 
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {"name": "first_sub_checklist_answer"}
        }          
      ]         
  }]
}

Example: Import pre-labels or ground truth

The steps to import annotations as pre-labels (machine-assisted learning) are similar to those to import annotations as ground truth labels. However, they vary slightly, and we will describe the differences for each scenario.

Before you start

The below imports are needed to use the code examples in this section.

import labelbox as lb
import uuid
import labelbox.types as lb_types

Replace API key

Paste your API key as the value of the API_KEY variable:

API_KEY = None
client = lb.Client(API_KEY)

Step 1: Import data rows into Catalog

global_key = "sample-audio-1.mp3"

asset = {
    "row_data": "https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3",
    "global_key": global_key
}

dataset = client.create_dataset(name="audio_annotation_import_demo_dataset")
task = dataset.create_data_rows([asset])
task.wait_till_done()
print("Errors:", task.errors)
print("Failed data rows: ", task.failed_data_rows)

Step 2: Set up ontology

Your project ontology should support all tools and classifications required by your annotations. To ensure correct schema feature matches, tool names and classification names should match the name field in your annotation.

To illustrate, imagine that you used the name text_audio when you created a text classification. When setting up the ontology, you must use text_audio as the value of the name when defining the classifications of your ontology. In addition, you must follow a similar process when defining other tools and classifications in your ontology.

ontology_builder = lb.OntologyBuilder(
  classifications=[ 
    lb.Classification( 
      class_type=lb.Classification.Type.TEXT,
      name="text_audio"), 
    lb.Classification( 
      class_type=lb.Classification.Type.CHECKLIST,                   
      name="checklist_audio", 
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer")            
      ]
    ), 
    lb.Classification( 
      class_type=lb.Classification.Type.RADIO, 
      name="radio_audio", 
      options=[
        lb.Option(value="first_radio_answer"),
        lb.Option(value="second_radio_answer")
      ]
    )
  ]
)

ontology = client.create_ontology("Ontology Audio Annotations", 
                                  ontology_builder.asdict(), 
                                  media_type=lb.MediaType.Audio)

Step 3: Create a labeling project

Connect the ontology to the labeling project.

 #Create Labelbox project
project = client.create_project(name="audio_project", 
                                    media_type=lb.MediaType.Audio)

# Setup your ontology 
project.setup_editor(ontology) 

Step 4: Send data rows to the project


# Create a batch to send to your MAL project
batch = project.create_batch(
  "first-batch-audio-demo", # Each batch in a project must have a unique name
  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

print("Batch: ", batch)

Step 5: Create annotation payloads

See Supported annotations for help creating annotation payloads.

You can declare annotation payloads as Python annotation types (preferred) or NDJSON objects.

label = []
label.append(
  lb_types.Label(
    data={"global_key" : global_key }),
    annotations=[
      text_annotation,
      checklist_annotation,
      radio_annotation
    ]
  )
)
label_ndjson = []
for annotations in [text_annotation_ndjson,
                    checklist_annotation_ndjson,
                    radio_annotation_ndjson]:
  annotations.update({
      'dataRow': {
          'globalKey': global_key
      }
  })
  label_ndjson.append(annotations)

Step 6: Import annotation payload

For prelabeled (model-assisted labeling) scenarios, pass your payload as the value of the predictions parameter. For ground truths, pass the payload to the labels parameter.

Option A: Upload as prelabels (model-assisted labeling)

upload_job = lb.MALPredictionImport.create_from_objects(
    client = client, 
    project_id = project.uid, 
    name=f"mal_job-{str(uuid.uuid4())}", 
    predictions=label)

upload_job.wait_until_done();
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)

Option B: Upload as ground truth

upload_job = lb.LabelImport.create_from_objects(
    client = client, 
    project_id = project.uid, 
    name="label_import_job"+str(uuid.uuid4()),  
    labels=label)

upload_job.wait_until_done();
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)