How to upload predictions on LLM response evaluation data in a model run and sample upload formats.
Supported predictions
To upload predictions in Labelbox, you need to create a prediction payload. This section shows how to create a payload for each supported prediction type.
Samples are show for the following supported supported payload formats:
- Python annotation types (recommended)
- NDJSON
Both are described below.
Entity (Message-based)
ner_prediction = lb_types.ObjectAnnotation(
name="ner",
confidence=0.5,
value=lb_types.ConversationEntity(
start=0,
end=8,
message_id="message-1"
)
)
ner_prediction_ndjson = {
"name": "ner",
"confidence": 0.5,
"location": {
"start": 0,
"end": 8
},
"messageId": "message-1"
}
Classification: Radio (Single-choice)
radio_prediction = lb_types.ClassificationAnnotation(
name="Choose the best response",
value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
name="Response B", confidence=0.5)))
radio_prediction_ndjson = {
"name": "Choose the best response",
"answer": {
"name": "Response B",
"confidence": 0.5
}
}
Classification: Checklist (Multi-choice, Message-based)
checklist_prediction= lb_types.ClassificationAnnotation(
name="checklist_convo", # must match your ontology feature"s name
value=lb_types.Checklist(
answer = [
lb_types.ClassificationAnswer(
name = "first_checklist_answer",
confidence=0.5
),
lb_types.ClassificationAnswer(
name = "second_checklist_answer",
confidence=0.5
)
]
),
message_id="message-1" # Message specific annotation
)
checklist_prediction_ndjson = {
"name": "checklist_convo",
"answers": [
{"name": "first_checklist_answer","confidence":0.5},
{"name": "second_checklist_answer","confidence":0.5}
],
"messageId": "message-1"
}
Classification: Free-form text
text_prediction = lb_types.ClassificationAnnotation(
name="Provide a reason for your choice",
value=lb_types.Text(answer="the answer to the text questions right here", confidence=0.5)
)
text_prediction_ndjson = {
"name": "Provide a reason for your choice",
"answer": "This is the more concise answer",
"confidence": 0.5
}
Classification: Nested checklist (Message-based)
nested_checklist_prediction = lb_types.ClassificationAnnotation(
name="nested_checklist_question",
message_id="message-1",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_checklist_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_checklist_question",
value=lb_types.Checklist(
answer=[lb_types.ClassificationAnswer(
name="first_sub_checklist_answer",
confidence=0.5 # Confidence scores should be added to the answer
)]
))
]
)]
)
)
nested_checklist_prediction_ndjson = {
"name": "nested_checklist_question",
"messageId": "message-1",
"answer": [{
"name": "first_checklist_answer",
"confidence": 0.5, # Confidence scores should be added to the answer
"classifications" : [
{
"name": "sub_checklist_question",
"answer": {
"name": "first_sub_checklist_answer",
"confidence": 0.5, # Confidence scores should be added to the answer
}
}
]
}]
}
Classification: Nested radio
nested_radio_prediction = lb_types.ClassificationAnnotation(
name="nested_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_radio_answer",
confidence=0.5, # Confidence scores should be added to the answer
classifications=[
lb_types.ClassificationAnnotation(
name="sub_radio_question",
value=lb_types.Radio(
answer=lb_types.ClassificationAnswer(
name="first_sub_radio_answer",
confidence=0.5 # Confidence scores should be added to the answer
)
)
)
]
)
)
)
nested_radio_prediction_ndjson = {
"name": "nested_radio_question",
"answer": {
"name": "first_radio_answer",
"confidence": 0.5,
"classifications": [{
"name":"sub_radio_question",
"answer": { "name" : "first_sub_radio_answer",
"confidence": 0.5}
}]
}
}
Example: Upload predictions to a model run
Here are the steps to upload predictions to a model run:
Before you start
You must import these libraries to use the code examples in this section.
import labelbox as lb
import uuid
Replace the value of API_KEY
with a valid API key to connect to the Labelbox client.
API_KEY = None
client = lb.Client(API_KEY)
Step 1: Import data rows
Upload your data rows to Catalog. To learn how to format your import JSON files, see Import conversation & model response data.
Note the additional modelOutputs
object required. For descriptions, see Model outputs object definitions.
global_key = "pairwise_shooping_asset"
convo_data = {
"row_data": "https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json",
"global_key": global_key
}
# Create a dataset
dataset = client.create_dataset(name="pairwise_prediction_demo")
# Create datarows
task = dataset.create_data_rows([convo_data])
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)
pairwise_shopping_2 = """
{
"type": "application/vnd.labelbox.conversational",
"version": 1,
"messages": [
{
"messageId": "message-0",
"timestampUsec": 1530718491,
"content": "Hi! How can I help?",
"user": {
"userId": "Bot 002",
"name": "Bot"
},
"align": "left",
"canLabel": false
},
{
"messageId": "message-1",
"timestampUsec": 1530718503,
"content": "I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!",
"user": {
"userId": "User 00686",
"name": "User"
},
"align": "right",
"canLabel": true
}
],
"modelOutputs": [
{
"title": "Response A",
"content": "I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\n\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\n\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.",
"modelConfigName": "GPT-3.5 with temperature 0"
},
{
"title": "Response B",
"content": "I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!",
"modelConfigName": "Fine Tuned GPT-3.5 with demo data"
}
]
}
"""
Step 2: Create/select an ontology for your model predictions
Your model run should have the correct ontology set up with all the tools and classifications supported for your predictions.
Here is an example of creating an ontology programmatically for all the example predictions above:
ontology_builder = lb.OntologyBuilder(
tools=[
lb.Tool(tool=lb.Tool.Type.NER,name="ner"),
],
classifications=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
scope=lb.Classification.Scope.GLOBAL,
name="Choose the best response",
options=[lb.Option(value="Response A"), lb.Option(value="Response B"), lb.Option(value="Tie")]
),
lb.Classification(
class_type=lb.Classification.Type.TEXT,
name="Provide a reason for your choice"
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
scope=lb.Classification.Scope.INDEX,
name="checklist_convo",
options=[
lb.Option(value="first_checklist_answer"),
lb.Option(value="second_checklist_answer")
]
),
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="nested_checklist_question",
scope = lb.Classification.Scope.INDEX,
options=[
lb.Option("first_checklist_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.CHECKLIST,
name="sub_checklist_question",
options=[lb.Option("first_sub_checklist_answer")]
)
])
]
),
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="nested_radio_question",
scope = lb.Classification.Scope.GLOBAL,
options=[
lb.Option("first_radio_answer",
options=[
lb.Classification(
class_type=lb.Classification.Type.RADIO,
name="sub_radio_question",
options=[lb.Option("first_sub_radio_answer")]
)
])
]
)
]
)
ontology = client.create_ontology("Pairwise comparison ontology", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)
Step 3: Create a model and a model run
Create a Model using the ontology and a model run.
# create model
model = client.create_model(name="Comparison_model_run_"+ str(uuid.uuid4()),
ontology_id=ontology.uid)
# create model run
model_run = model.create_model_run("iteration 1")
Step 4: Send data rows to the model run
model_run.upsert_data_rows(global_keys=[global_key])
Step 5: Create the predictions payload
Create the predictions payload using the snippets of code shown above.
Labelbox supports two formats for the annotations payload: NDJSON and Python annotation types. Both approaches are described below with instructions to compose annotations into Labels attached to the data rows.
The resulting label_ndjson
and label_prediction
payloads should have exactly the same prediction content.
label_prediction = []
label_prediction.append(lb_types.Label(
data= {"global_key": global_key},
annotations= [
ner_prediction,
text_prediction,
checklist_prediction,
radio_prediction,
nested_radio_prediction,
nested_checklist_prediction
]
))
label_ndjson = []
for annotations in [
ner_prediction_ndjson,
text_prediction_ndjson,
checklist_prediction_ndjson,
radio_prediction_ndjson,
nested_checklist_prediction_ndjson,
nested_radio_prediction_ndjson
]:
annotations.update({
"dataRow": {
"globalKey": global_key
}
})
label_ndjson.append(annotations)
Step 6: Upload predictions payload to the model run
# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
name="prediction_upload_job"+str(uuid.uuid4()),
predictions=label_prediction)
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)
Step 7: Send annotations to the model run (optional)
We will create a project with ground truth annotations to visualize both annotations and predictions in the model run.
To send annotations to a model run, we must first import them into a project, create a label payload, and then send them to the model run.
# 7.1 Create a labelbox project
project = client.create_project(name="Conversational Human Evaluation Demo",
media_type=lb.MediaType.Conversational)
project.connect_ontology(ontology)
# 7.2 Create a batch to send to the project
project.create_batch(
"batch_convo_prediction_demo", # Each batch in a project must have a unique name
global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
priority=5 # priority between 1(Highest) - 5(lowest)
)
# 7.3 Create the annotations payload (
# See here for more details:
# https://labelbox-group.readme.io/reference/import-conversational-text-annotations#supported-annotations
ner_annotation ...
text_annotation ...
checklist_annotation ...
radio_annotation ...
nested_radio_annotation ...
nested_checklist_annotation ...
# 7.4 Create the label object
label_annotation = []
label_annotation.append(lb_types.Label(
data= {"global_key": global_key},
annotations= [
ner_annotation,
text_annotation,
checklist_annotation,
radio_annotation,
nested_radio_annotation,
nested_checklist_annotation
]
))
#7.5 Upload annotations to the project using label import
upload_job_annotation = lb.LabelImport.create_from_objects(
client = client,
project_id = project.uid,
name="label_import_job"+ str(uuid.uuid4()),
labels=label_annotation)
upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)
# 7.6 Send the annotations to the model run
model_run.upsert_labels(project_id=project.uid)