Upload HTML predictions

How to upload predictions on HTML data in a model run and sample upload formats.

Open this Colab for an interactive tutorial on uploading predictions on HTML data in a model run.

Supported annotations

To import annotations in Labelbox, you need to create the annotations payload. In this section, we provide this payload for every annotation.

Classification: Radio (single-choice)

radio_prediction = lb_types.ClassificationAnnotation(
  name="radio_question", 
  value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = "second_radio_answer", confidence=0.5))
)

radio_prediction_ndjson = {
  'name': 'radio_question',
  'answer': {'name': 'first_radio_answer'}
}

Classification: Nested radio

nested_radio_prediction = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      confidence=0.5,# Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer", confidence=0.5)
          )
        )
      ]
    )
  )
)
nested_radio_prediction_ndjson = {
  "name": "nested_radio_question",
  "answer": {
      "name": "first_radio_answer",
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications": [{
          "name":"sub_radio_question",
          "answer": { "name" : "first_sub_radio_answer", "confidence": 0.5 }
        }]
    }
}

Classification: Nested checklist

nested_checklist_prediction = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer",
            confidence=0.5
          )]
        ))
      ]
    )]
  )
)
nested_checklist_prediction_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer", 
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {"name": "first_sub_checklist_answer", "confidence": 0.5 }
        }          
      ]         
  }]
}

Classification: Checklist (multi-choice)

checklist_prediction = lb_types.ClassificationAnnotation(
    name="checklist_question",
    value=lb_types.Checklist(
        answer = [
            lb_types.ClassificationAnswer(
                name = "first_checklist_answer",
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "second_checklist_answer", 
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "third_checklist_answer", 
                confidence=0.5
            )
    ])
  )
checklist_prediction_ndjson = {
  'name': 'checklist_question',
  'answer': [
    {'name': 'first_checklist_answer', 'confidence': 0.5}
  ]
}

Classification: Free-Form text

text_prediction = lb_types.ClassificationAnnotation(
    name = "free_text", 
    value = lb_types.Text(answer="sample text", confidence=0.5)
)
text_prediction_ndjson = {
  'name': 'free_text',
  'answer': 'sample text',
  'confidence':0.5
}

End-to-end example: Upload predictions to a Model Run

Follow the steps below to upload predictions to a Model Run

Before you start

You will need to import these two libraries to use the code examples in this section.

import labelbox as lb
import labelbox.types as lb_types
import uuid
import numpy as np

Replace with your API Key

To learn how to create an API key, please follow the instructions on this page.

API_KEY = ""
client = lb.Client(API_KEY)

Step 1: Import data rows into Catalog

global_key ="sample_html_2.html"

test_img_url = {
    "row_data": "https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html",
    "global_key": global_key
}
dataset = client.create_dataset(
    name="html prediction demo dataset", 
    iam_integration=None # Removing this argument will default to the organziation's default iam integration
)
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)

Step 2: Create/select an Ontology for your model predictions

Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classifications' names should match the name field in your annotations to ensure the correct feature schemas are matched.

ontology_builder = lb.OntologyBuilder(
  classifications=[ # List of Classification objects
    lb.Classification( 
      class_type=lb.Classification.Type.RADIO, 
      name="radio_question",  # name matching the tool used in the annotation
      options=[lb.Option(value="first_radio_answer")]
    ),
    lb.Classification( 
      class_type=lb.Classification.Type.RADIO, 
      name="nested_radio_question", 
      options=[
        lb.Option(value="first_radio_answer",
          options=[
              lb.Classification(
                class_type=lb.Classification.Type.RADIO,
                name="sub_radio_question",
                options=[
                  lb.Option(value="first_sub_radio_answer")
                ]
            ),
          ]
        )
      ],
    ),
    lb.Classification( 
      class_type=lb.Classification.Type.CHECKLIST, 
      name="checklist_question", 
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer"), 
        lb.Option(value="third_checklist_answer")            
      ]
    ), 
     lb.Classification( 
      class_type=lb.Classification.Type.TEXT,
      name="free_text"
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST, 
      name="nested_checklist_question",
      options=[
          lb.Option("first_checklist_answer",
            options=[
              lb.Classification(
                  class_type=lb.Classification.Type.CHECKLIST, 
                  name="sub_checklist_question", 
                  options=[lb.Option("first_sub_checklist_answer")]
              )
          ]
        )
      ]
    )
  ]
)

ontology = client.create_ontology("Ontology HTML Predictions", 
                                  ontology_builder.asdict(), 
                                  media_type=lb.MediaType.Html)

     

Step 3: Create a Model and Model Run

#create Model
model = client.create_model(name="HTML_model_run_" + str(uuid.uuid4()), 
                            ontology_id=ontology.uid)
#create Model Run
model_run = model.create_model_run("iteration 1")

Step 4: Send data rows to the Model Run

model_run.upsert_data_rows(global_keys=[global_key])

Step 5: Create the predictions payload

Create the annotations payload using the snippets of code shown above.

Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both approaches are described below with instructions to compose annotations into Labels attached to the data rows.

The resulting label_prediction and label_prediction_ndjson from each approach will include every annotation (created above) supported by the respective method


label_prediction = []
label_prediction.append(
  lb_types.Label(
    data=lb_types.HTMLData(global_key=global_key),
    annotations = [
      radio_prediction, 
      checklist_prediction,
      text_prediction,
      nested_checklist_prediction,
      nested_radio_prediction
    ]
  )
)
label_prediction_ndjson = []
for annot in [
    radio_prediction_ndjson, 
    nested_radio_prediction_ndjson,
    checklist_prediction_ndjson,
    text_prediction_ndjson,
    nested_checklist_prediction_ndjson
]:
  annot.update({
      "dataRow": {"globalKey": global_key},
  })
  label_prediction_ndjson.append(annot)

Step 6: Upload the predictions payload to the Model Run

# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
    name="prediction_upload_job"+str(uuid.uuid4()),
    predictions=label_prediction)

# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)

Step 7: Send annotations to the Model Run (Optional)

To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run.

#7.1. Create a labelbox project
project = client.create_project(name="HTML prediction import demo",                                    
                                    queue_mode=lb.QueueMode.Batch,
                                    media_type=lb.MediaType.Html)
project.setup_editor(ontology)

# 7.2. Create a batch to send to the project

project.create_batch(
  "batch_prediction_html", # Each batch in a project must have a unique name
  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

# 7.3 Create the annotations payload
radio_annotation...
nested_radio_annotation...
nested_checklist_annotation...
checklist_annotation...
text_annotation...

# 7.4 Create the label object 
label.append(
  lb_types.Label(
    data=lb_types.HTMLData(
      global_key=global_key
    ),
    annotations=[
      text_annotation,
      checklist_annotation,
      radio_annotation,
      nested_checklist_annotation,
      nested_radio_annotation
    ]
  )
)

#7.5. Upload annotations to the project using Label Import
upload_job_annotation = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="html_annotation_import" + str(uuid.uuid4()),
    labels=label)

upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)

# 7.6 Send the annotations to the Model Run
model_run.upsert_labels(project_id=project.uid)