Released in late 2022, the Spoken Task-Oriented semantic Parsing (STOP) dataset is one of the most recent, and most complex, datasets for end-to-end spoken language understanding tasks. It contains a greater number of speakers, audio files, and audio duration than the SLURP, Snips, and Fluent Speech Commands datasets. As such, it makes sense that I leverage this dataset for my initial research .
Domains, intents, and slots The STOP dataset builds upon TOPv2, which is a dataset consisting of text only inputs with nested queries across eight domains, by collecting audio samples for each utterance.
TOPv2 Dataset Statistics Although the paper introducing TOPv2 clearly describes the statistics across each domain as shown in the image above, I have been unable to find a breakdown listing the specific intents and slots found in the utterances for each domain.
Since I want to understand the STOP dataset more deeply, I decided to write a script that parses the manifest files (eval.tsv
, test.tsv
, train.tsv
) to pull out the unique intents and slots, in addition to showing which slots belong to which intents. The output from the script, and the script itself, can be found below.
Manifest parsing script output -------------------------------------------
STOP Dataset - Domains, Intents, and Slots
-------------------------------------------
DOMAIN | IN | SL |
------------------------
ALARM | 8 | 9 |
EVENT | 11 | 17 |
MESSAGING | 12 | 27 |
MUSIC | 15 | 9 |
NAVIGATION | 17 | 33 |
REMINDER | 19 | 32 |
TIMER | 11 | 5 |
WEATHER | 7 | 11 |
------------------------
STOP Dataset Summary
Domain: ALARM
Intents (8): ['CREATE_ALARM', 'DELETE_ALARM', 'GET_ALARM', 'GET_TIME', 'SILENCE_ALARM', 'SNOOZE_ALARM', 'UNSUPPORTED_ALARM', 'UPDATE_ALARM']
Slots (9): ['ALARM_NAME', 'AMOUNT', 'DATE_TIME', 'DATE_TIME_RECURRING', 'DURATION', 'ORDINAL', 'PERIOD', 'RECURRING_DATE_TIME', 'TIME_ZONE']
Slots by intent:
1. CREATE_ALARM: ['ALARM_NAME', 'AMOUNT', 'DATE_TIME', 'DATE_TIME_RECURRING', 'DURATION', 'ORDINAL', 'PERIOD', 'RECURRING_DATE_TIME']
2. DELETE_ALARM: ['ALARM_NAME', 'AMOUNT', 'DATE_TIME', 'DATE_TIME_RECURRING', 'DURATION', 'ORDINAL', 'PERIOD']
3. GET_ALARM: ['ALARM_NAME', 'AMOUNT', 'DATE_TIME', 'DATE_TIME_RECURRING', 'DURATION', 'ORDINAL', 'PERIOD']
4. GET_TIME: ['DATE_TIME', 'TIME_ZONE']
5. SILENCE_ALARM: ['ALARM_NAME', 'AMOUNT', 'DATE_TIME', 'DATE_TIME_RECURRING', 'DURATION', 'ORDINAL', 'PERIOD']
6. SNOOZE_ALARM: ['ALARM_NAME', 'AMOUNT', 'DATE_TIME', 'DATE_TIME_RECURRING', 'DURATION', 'ORDINAL', 'PERIOD']
7. UNSUPPORTED_ALARM: ['ALARM_NAME', 'AMOUNT', 'DATE_TIME', 'DATE_TIME_RECURRING', 'DURATION', 'ORDINAL', 'PERIOD']
8. UPDATE_ALARM: ['ALARM_NAME', 'AMOUNT', 'DATE_TIME', 'DATE_TIME_RECURRING', 'DURATION', 'ORDINAL', 'PERIOD']
Domain: ALARM
Domain: EVENT
Intents (11): ['GET_CONTACT', 'GET_EVENT', 'GET_EVENT_ATTENDEE', 'GET_EVENT_ATTENDEE_AMOUNT', 'GET_EVENT_ORGANIZER', 'GET_LOCATION', 'GET_LOCATION_HOME', 'GET_LOCATION_SCHOOL', 'GET_LOCATION_WORK', 'NEGATION', 'UNSUPPORTED_EVENT']
Slots (17): ['AMOUNT', 'ATTENDEE_EVENT', 'ATTRIBUTE_EVENT', 'CATEGORY_EVENT', 'CATEGORY_LOCATION', 'CONTACT', 'CONTACT_RELATED', 'DATE_TIME', 'LOCATION', 'LOCATION_MODIFIER', 'LOCATION_USER', 'NAME_EVENT', 'ORDINAL', 'ORGANIZER_EVENT', 'POINT_ON_MAP', 'SEARCH_RADIUS', 'TYPE_RELATION']
Slots by intent:
1. GET_CONTACT: ['CONTACT', 'CONTACT_RELATED', 'TYPE_RELATION']
2. GET_EVENT: ['AMOUNT', 'ATTENDEE_EVENT', 'ATTRIBUTE_EVENT', 'CATEGORY_EVENT', 'DATE_TIME', 'LOCATION', 'NAME_EVENT', 'ORDINAL', 'ORGANIZER_EVENT']
3. GET_EVENT_ATTENDEE: ['ATTENDEE_EVENT', 'CATEGORY_EVENT', 'DATE_TIME', 'ORGANIZER_EVENT']
4. GET_EVENT_ATTENDEE_AMOUNT: ['CATEGORY_EVENT', 'DATE_TIME', 'LOCATION', 'NAME_EVENT', 'ORDINAL']
5. GET_EVENT_ORGANIZER: ['CATEGORY_EVENT', 'DATE_TIME', 'LOCATION', 'ORGANIZER_EVENT']
6. GET_LOCATION: ['CATEGORY_LOCATION', 'LOCATION', 'LOCATION_MODIFIER', 'LOCATION_USER', 'POINT_ON_MAP', 'SEARCH_RADIUS']
7. GET_LOCATION_HOME: ['CONTACT', 'CONTACT_RELATED', 'TYPE_RELATION']
8. GET_LOCATION_SCHOOL: []
9. GET_LOCATION_WORK: ['CONTACT']
10. NEGATION: []
11. UNSUPPORTED_EVENT: []
Domain: EVENT
Domain: MESSAGING
Intents (12): ['CANCEL_MESSAGE', 'GET_CONTACT', 'GET_EVENT_ATTENDEE', 'GET_EVENT_ORGANIZER', 'GET_LOCATION', 'GET_MESSAGE', 'IGNORE_MESSAGE', 'REACT_MESSAGE', 'SELECT_ITEM', 'SEND_MESSAGE', 'SEND_TEXT_MESSAGE', 'UNSUPPORTED_MESSAGING']
Slots (27): ['AGE', 'AMOUNT', 'BIRTHDAY', 'CATEGORY_EVENT', 'CATEGORY_LOCATION', 'CONTACT', 'CONTACT_RELATED', 'CONTENT_EMOJI', 'CONTENT_EXACT', 'DATE_TIME', 'DATE_TIME_BIRTHDAY', 'GROUP', 'LOCATION', 'LOCATION_HOME', 'MUTUAL_EMPLOYER', 'MUTUAL_LOCATION', 'MUTUAL_SCHOOL', 'ORDINAL', 'RECIPIENT', 'RESOURCE', 'SENDER', 'TAG_MESSAGE', 'TYPE_CONTACT', 'TYPE_CONTENT', 'TYPE_INFO', 'TYPE_REACTION', 'TYPE_RELATION']
Slots by intent:
1. CANCEL_MESSAGE: ['AMOUNT', 'TYPE_CONTENT']
2. GET_CONTACT: ['AGE', 'AMOUNT', 'BIRTHDAY', 'CONTACT', 'CONTACT_RELATED', 'DATE_TIME', 'DATE_TIME_BIRTHDAY', 'LOCATION_HOME', 'MUTUAL_EMPLOYER', 'MUTUAL_LOCATION', 'MUTUAL_SCHOOL', 'TYPE_CONTACT', 'TYPE_INFO', 'TYPE_RELATION']
3. GET_EVENT_ATTENDEE: ['AMOUNT', 'CATEGORY_EVENT', 'DATE_TIME', 'LOCATION']
4. GET_EVENT_ORGANIZER: ['CATEGORY_EVENT']
5. GET_LOCATION: ['CATEGORY_LOCATION']
6. GET_MESSAGE: ['AMOUNT', 'CONTENT_EXACT', 'DATE_TIME', 'GROUP', 'ORDINAL', 'RECIPIENT', 'RESOURCE', 'SENDER', 'TAG_MESSAGE', 'TYPE_CONTENT']
7. IGNORE_MESSAGE: ['CONTENT_EXACT', 'TYPE_CONTENT']
8. REACT_MESSAGE: ['AMOUNT', 'CONTACT', 'CONTENT_EXACT', 'DATE_TIME', 'GROUP', 'ORDINAL', 'RECIPIENT', 'RESOURCE', 'TAG_MESSAGE', 'TYPE_CONTENT', 'TYPE_REACTION']
9. SELECT_ITEM: ['ORDINAL']
10. SEND_MESSAGE: ['AMOUNT', 'CONTACT', 'CONTENT_EMOJI', 'CONTENT_EXACT', 'DATE_TIME', 'GROUP', 'ORDINAL', 'RECIPIENT', 'RESOURCE', 'SENDER', 'TYPE_CONTENT']
11. SEND_TEXT_MESSAGE: ['CONTENT_EXACT', 'RECIPIENT', 'RESOURCE']
12. UNSUPPORTED_MESSAGING: []
Domain: MESSAGING
Domain: MUSIC
Intents (15): ['ADD_TO_PLAYLIST_MUSIC', 'CREATE_PLAYLIST_MUSIC', 'DISLIKE_MUSIC', 'LIKE_MUSIC', 'LOOP_MUSIC', 'PAUSE_MUSIC', 'PLAY_MUSIC', 'PREVIOUS_TRACK_MUSIC', 'REMOVE_FROM_PLAYLIST_MUSIC', 'REPLAY_MUSIC', 'SET_DEFAULT_PROVIDER_MUSIC', 'SKIP_TRACK_MUSIC', 'START_SHUFFLE_MUSIC', 'STOP_MUSIC', 'UNSUPPORTED_MUSIC']
Slots (9): ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_RADIO_ID', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE', 'ORDINAL']
Slots by intent:
1. ADD_TO_PLAYLIST_MUSIC: ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE', 'ORDINAL']
2. CREATE_PLAYLIST_MUSIC: ['MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE']
3. DISLIKE_MUSIC: ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TYPE', 'ORDINAL']
4. LIKE_MUSIC: ['MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TYPE', 'ORDINAL']
5. LOOP_MUSIC: ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE', 'ORDINAL']
6. PAUSE_MUSIC: ['MUSIC_ARTIST_NAME', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE']
7. PLAY_MUSIC: ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_RADIO_ID', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE', 'ORDINAL']
8. PREVIOUS_TRACK_MUSIC: ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_PROVIDER_NAME', 'MUSIC_TYPE', 'ORDINAL']
9. REMOVE_FROM_PLAYLIST_MUSIC: ['MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE']
10. REPLAY_MUSIC: ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_RADIO_ID', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE', 'ORDINAL']
11. SET_DEFAULT_PROVIDER_MUSIC: ['MUSIC_PROVIDER_NAME']
12. SKIP_TRACK_MUSIC: ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE', 'ORDINAL']
13. START_SHUFFLE_MUSIC: ['MUSIC_ALBUM_TITLE', 'MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE', 'ORDINAL']
14. STOP_MUSIC: ['MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TYPE', 'ORDINAL']
15. UNSUPPORTED_MUSIC: ['MUSIC_ARTIST_NAME', 'MUSIC_GENRE', 'MUSIC_PLAYLIST_TITLE', 'MUSIC_PROVIDER_NAME', 'MUSIC_TRACK_TITLE', 'MUSIC_TYPE']
Domain: MUSIC
Domain: NAVIGATION
Intents (17): ['GET_CONTACT', 'GET_DIRECTIONS', 'GET_DISTANCE', 'GET_ESTIMATED_ARRIVAL', 'GET_ESTIMATED_DEPARTURE', 'GET_ESTIMATED_DURATION', 'GET_EVENT', 'GET_INFO_ROAD_CONDITION', 'GET_INFO_ROUTE', 'GET_INFO_TRAFFIC', 'GET_LOCATION', 'GET_LOCATION_HOME', 'GET_LOCATION_HOMETOWN', 'GET_LOCATION_SCHOOL', 'GET_LOCATION_WORK', 'UNSUPPORTED_NAVIGATION', 'UPDATE_DIRECTIONS']
Slots (33): ['AMOUNT', 'ATTENDEE_EVENT', 'CATEGORY_EVENT', 'CATEGORY_LOCATION', 'CONTACT', 'CONTACT_RELATED', 'DATE_TIME', 'DATE_TIME_ARRIVAL', 'DATE_TIME_DEPARTURE', 'DESTINATION', 'GROUP', 'LOCATION', 'LOCATION_CURRENT', 'LOCATION_MODIFIER', 'LOCATION_USER', 'LOCATION_WORK', 'METHOD_TRAVEL', 'NAME_EVENT', 'OBSTRUCTION_AVOID', 'ORDINAL', 'ORGANIZER_EVENT', 'PATH', 'PATH_AVOID', 'POINT_ON_MAP', 'ROAD_CONDITION', 'ROAD_CONDITION_AVOID', 'SEARCH_RADIUS', 'SOURCE', 'TYPE_RELATION', 'UNIT_DISTANCE', 'WAYPOINT', 'WAYPOINT_ADDED', 'WAYPOINT_AVOID']
Slots by intent:
1. GET_CONTACT: ['CONTACT', 'CONTACT_RELATED', 'GROUP', 'TYPE_RELATION']
2. GET_DIRECTIONS: ['AMOUNT', 'DATE_TIME_ARRIVAL', 'DATE_TIME_DEPARTURE', 'DESTINATION', 'LOCATION', 'METHOD_TRAVEL', 'OBSTRUCTION_AVOID', 'PATH', 'PATH_AVOID', 'ROAD_CONDITION', 'ROAD_CONDITION_AVOID', 'SOURCE', 'WAYPOINT', 'WAYPOINT_AVOID']
3. GET_DISTANCE: ['AMOUNT', 'DATE_TIME_DEPARTURE', 'DESTINATION', 'METHOD_TRAVEL', 'OBSTRUCTION_AVOID', 'PATH', 'PATH_AVOID', 'SOURCE', 'UNIT_DISTANCE', 'WAYPOINT']
4. GET_ESTIMATED_ARRIVAL: ['DATE_TIME_ARRIVAL', 'DATE_TIME_DEPARTURE', 'DESTINATION', 'LOCATION', 'METHOD_TRAVEL', 'OBSTRUCTION_AVOID', 'PATH', 'PATH_AVOID', 'ROAD_CONDITION', 'ROAD_CONDITION_AVOID', 'SOURCE', 'WAYPOINT']
5. GET_ESTIMATED_DEPARTURE: ['DATE_TIME_ARRIVAL', 'DATE_TIME_DEPARTURE', 'DESTINATION', 'LOCATION', 'METHOD_TRAVEL', 'OBSTRUCTION_AVOID', 'PATH', 'PATH_AVOID', 'ROAD_CONDITION', 'SOURCE', 'WAYPOINT']
6. GET_ESTIMATED_DURATION: ['DATE_TIME', 'DATE_TIME_ARRIVAL', 'DATE_TIME_DEPARTURE', 'DESTINATION', 'METHOD_TRAVEL', 'OBSTRUCTION_AVOID', 'PATH', 'PATH_AVOID', 'ROAD_CONDITION', 'ROAD_CONDITION_AVOID', 'SOURCE', 'WAYPOINT', 'WAYPOINT_AVOID']
7. GET_EVENT: ['ATTENDEE_EVENT', 'CATEGORY_EVENT', 'DATE_TIME', 'LOCATION', 'NAME_EVENT', 'ORDINAL', 'ORGANIZER_EVENT']
8. GET_INFO_ROAD_CONDITION: ['DATE_TIME', 'DATE_TIME_ARRIVAL', 'DATE_TIME_DEPARTURE', 'DESTINATION', 'LOCATION', 'METHOD_TRAVEL', 'PATH', 'ROAD_CONDITION', 'SOURCE', 'WAYPOINT']
9. GET_INFO_ROUTE: ['DATE_TIME_DEPARTURE', 'DESTINATION', 'METHOD_TRAVEL', 'PATH', 'SOURCE', 'WAYPOINT']
10. GET_INFO_TRAFFIC: ['DATE_TIME', 'DESTINATION', 'LOCATION', 'METHOD_TRAVEL', 'OBSTRUCTION_AVOID', 'PATH', 'PATH_AVOID', 'ROAD_CONDITION', 'SOURCE', 'WAYPOINT', 'WAYPOINT_AVOID']
11. GET_LOCATION: ['CATEGORY_LOCATION', 'LOCATION', 'LOCATION_MODIFIER', 'LOCATION_USER', 'POINT_ON_MAP', 'SEARCH_RADIUS']
12. GET_LOCATION_HOME: ['CONTACT', 'CONTACT_RELATED', 'LOCATION', 'LOCATION_CURRENT', 'TYPE_RELATION']
13. GET_LOCATION_HOMETOWN: ['CONTACT']
14. GET_LOCATION_SCHOOL: ['CONTACT', 'CONTACT_RELATED', 'TYPE_RELATION']
15. GET_LOCATION_WORK: ['CONTACT', 'CONTACT_RELATED', 'LOCATION', 'LOCATION_CURRENT', 'LOCATION_WORK', 'TYPE_RELATION']
16. UNSUPPORTED_NAVIGATION: []
17. UPDATE_DIRECTIONS: ['DATE_TIME_ARRIVAL', 'DATE_TIME_DEPARTURE', 'DESTINATION', 'OBSTRUCTION_AVOID', 'PATH', 'PATH_AVOID', 'SOURCE', 'WAYPOINT_ADDED', 'WAYPOINT_AVOID']
Domain: NAVIGATION
Domain: REMINDER
Intents (19): ['CREATE_REMINDER', 'DELETE_REMINDER', 'GET_BIRTHDAY', 'GET_CONTACT', 'GET_EVENT', 'GET_EVENT_ATTENDEE', 'GET_MESSAGE', 'GET_RECURRING_DATE_TIME', 'GET_REMINDER', 'GET_REMINDER_AMOUNT', 'GET_REMINDER_DATE_TIME', 'GET_REMINDER_LOCATION', 'GET_TODO', 'HELP_REMINDER', 'REPLY_MESSAGE', 'SEND_MESSAGE', 'UPDATE_REMINDER', 'UPDATE_REMINDER_DATE_TIME', 'UPDATE_REMINDER_TODO']
Slots (32): ['AGE', 'AMOUNT', 'ATTENDEE', 'ATTENDEE_ADDED', 'ATTENDEE_EVENT', 'ATTENDEE_REMOVED', 'CATEGORY_EVENT', 'CONTACT', 'CONTACT_RELATED', 'CONTENT_EXACT', 'DATE_TIME', 'DATE_TIME_NEW', 'FREQUENCY', 'GROUP', 'JOB', 'METHOD_RETRIEVAL_REMINDER', 'MUTUAL_EMPLOYER', 'MUTUAL_SCHOOL', 'NAME_APP', 'ORDINAL', 'ORGANIZER_EVENT', 'PERSON_REMINDED', 'PERSON_REMINDED_ADDED', 'PERSON_REMINDED_REMOVED', 'RECIPIENT', 'RECURRING_DATE_TIME', 'RECURRING_DATE_TIME_NEW', 'SENDER', 'TODO', 'TODO_NEW', 'TYPE_CONTENT', 'TYPE_RELATION']
Slots by intent:
1. CREATE_REMINDER: ['AMOUNT', 'DATE_TIME', 'ORDINAL', 'PERSON_REMINDED', 'RECURRING_DATE_TIME', 'TODO']
2. DELETE_REMINDER: ['AMOUNT', 'DATE_TIME', 'ORDINAL', 'PERSON_REMINDED', 'RECURRING_DATE_TIME', 'TODO']
3. GET_BIRTHDAY: ['CONTACT']
4. GET_CONTACT: ['AGE', 'AMOUNT', 'CONTACT', 'CONTACT_RELATED', 'JOB', 'MUTUAL_EMPLOYER', 'MUTUAL_SCHOOL', 'TYPE_RELATION']
5. GET_EVENT: ['ATTENDEE_EVENT', 'CATEGORY_EVENT', 'DATE_TIME', 'ORDINAL', 'ORGANIZER_EVENT']
6. GET_EVENT_ATTENDEE: ['DATE_TIME', 'ORGANIZER_EVENT']
7. GET_MESSAGE: []
8. GET_RECURRING_DATE_TIME: ['DATE_TIME', 'FREQUENCY', 'ORDINAL']
9. GET_REMINDER: ['AMOUNT', 'DATE_TIME', 'METHOD_RETRIEVAL_REMINDER', 'ORDINAL', 'PERSON_REMINDED', 'RECURRING_DATE_TIME', 'TODO']
10. GET_REMINDER_AMOUNT: ['DATE_TIME', 'METHOD_RETRIEVAL_REMINDER', 'PERSON_REMINDED', 'TODO']
11. GET_REMINDER_DATE_TIME: ['AMOUNT', 'DATE_TIME', 'METHOD_RETRIEVAL_REMINDER', 'ORDINAL', 'PERSON_REMINDED', 'RECURRING_DATE_TIME', 'TODO']
12. GET_REMINDER_LOCATION: ['AMOUNT', 'DATE_TIME', 'METHOD_RETRIEVAL_REMINDER', 'ORDINAL', 'PERSON_REMINDED', 'TODO']
13. GET_TODO: ['AMOUNT', 'ATTENDEE', 'DATE_TIME', 'RECURRING_DATE_TIME', 'TODO']
14. HELP_REMINDER: []
15. REPLY_MESSAGE: ['CONTENT_EXACT', 'RECIPIENT', 'SENDER']
16. SEND_MESSAGE: ['AMOUNT', 'CONTACT', 'CONTENT_EXACT', 'DATE_TIME', 'GROUP', 'NAME_APP', 'RECIPIENT', 'SENDER', 'TYPE_CONTENT']
17. UPDATE_REMINDER: ['AMOUNT', 'ATTENDEE', 'ATTENDEE_ADDED', 'ATTENDEE_REMOVED', 'DATE_TIME', 'DATE_TIME_NEW', 'ORDINAL', 'PERSON_REMINDED', 'PERSON_REMINDED_ADDED', 'PERSON_REMINDED_REMOVED', 'RECURRING_DATE_TIME', 'RECURRING_DATE_TIME_NEW', 'TODO', 'TODO_NEW']
18. UPDATE_REMINDER_DATE_TIME: ['AMOUNT', 'ATTENDEE', 'DATE_TIME', 'DATE_TIME_NEW', 'ORDINAL', 'PERSON_REMINDED', 'PERSON_REMINDED_ADDED', 'RECURRING_DATE_TIME', 'RECURRING_DATE_TIME_NEW', 'TODO']
19. UPDATE_REMINDER_TODO: ['AMOUNT', 'ATTENDEE', 'DATE_TIME', 'PERSON_REMINDED', 'RECURRING_DATE_TIME', 'RECURRING_DATE_TIME_NEW', 'TODO', 'TODO_NEW']
Domain: REMINDER
Domain: TIMER
Intents (11): ['ADD_TIME_TIMER', 'CREATE_TIMER', 'DELETE_TIMER', 'GET_TIME', 'GET_TIMER', 'PAUSE_TIMER', 'RESTART_TIMER', 'RESUME_TIMER', 'SUBTRACT_TIME_TIMER', 'UNSUPPORTED_TIMER', 'UPDATE_TIMER']
Slots (5): ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
Slots by intent:
1. ADD_TIME_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
2. CREATE_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
3. DELETE_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
4. GET_TIME: ['DATE_TIME']
5. GET_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
6. PAUSE_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
7. RESTART_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
8. RESUME_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
9. SUBTRACT_TIME_TIMER: ['DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
10. UNSUPPORTED_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
11. UPDATE_TIMER: ['AMOUNT', 'DATE_TIME', 'METHOD_TIMER', 'ORDINAL', 'TIMER_NAME']
Domain: TIMER
Domain: WEATHER
Intents (7): ['GET_CONTACT', 'GET_INFO_CONTACT', 'GET_LOCATION', 'GET_SUNRISE', 'GET_SUNSET', 'GET_WEATHER', 'UNSUPPORTED_WEATHER']
Slots (11): ['CONTACT', 'CONTACT_RELATED', 'DATE_TIME', 'LOCATION', 'LOCATION_MODIFIER', 'LOCATION_USER', 'MEASUREMENT_UNIT', 'SEARCH_RADIUS', 'TYPE_RELATION', 'WEATHER_ATTRIBUTE', 'WEATHER_TEMPERATURE_UNIT']
Slots by intent:
1. GET_CONTACT: ['CONTACT_RELATED', 'TYPE_RELATION']
2. GET_INFO_CONTACT: ['CONTACT']
3. GET_LOCATION: ['LOCATION_MODIFIER', 'LOCATION_USER', 'SEARCH_RADIUS']
4. GET_SUNRISE: ['DATE_TIME', 'LOCATION']
5. GET_SUNSET: ['DATE_TIME', 'LOCATION']
6. GET_WEATHER: ['DATE_TIME', 'LOCATION', 'MEASUREMENT_UNIT', 'WEATHER_ATTRIBUTE', 'WEATHER_TEMPERATURE_UNIT']
7. UNSUPPORTED_WEATHER: ['DATE_TIME', 'LOCATION', 'WEATHER_ATTRIBUTE']
Domain: WEATHER
Manifest parsing script import os
import pandas as pd
# Source manifest files
manifest_path = "../datasets/stop/manifests"
manifest_splits = ["eval.tsv", "test.tsv", "train.tsv"]
# STOP dataset domains
domains = ["alarm",
"event",
"messaging",
"music",
"navigation",
"reminder",
"timer",
"weather"]
# Function definitions
def parse_decoupled_normalized_seqlogical(input, results):
intents_slots_stack = []
intents_stack = []
if not isinstance(input, str):
return results
tokens = input.split()
for token in tokens:
if "[" in token:
intents_slots_stack.append(token)
if "[IN:" in token:
intent = token[4:]
intents_stack.append(intent)
if intent not in results["intents"]:
results["intents"].append(intent)
results["slots_by_intent"][intent] = set()
elif "[SL:" in token:
slot = token[4:]
if slot not in results["slots"]:
results["slots"].append(slot)
results["slots_by_intent"][intents_stack[-1]].add(slot)
elif "]" == token:
item = intents_slots_stack.pop()
if "[IN:" in item:
intents_stack.pop()
return results
def print_results_header():
print("\n-------------------------------------------\n")
print("STOP Dataset - Domains, Intents, and Slots")
print("\n-------------------------------------------\n")
def print_results_summary(input):
print("DOMAIN | IN | SL |")
print("------------------------")
for domain, results in input.items():
print(f"{domain.upper():12} | {len(results["intents"]):2} | {len(results["slots"]):2} |" )
print("------------------------")
def print_results_details(input):
for domain, results in input.items():
print("\n---\n")
print(f"Domain: {domain.upper()}\n")
print(f"Intents ({len(results["intents"])}): {sorted(results["intents"])}")
print(f"Slots ({len(results["slots"])}): {sorted(results["slots"])}\n")
print("Slots by intent:")
i = 1
sorted_keys = sorted(results["slots_by_intent"].keys())
for key in sorted_keys:
print(f" {i}. {key}: {sorted(results["slots_by_intent"][key])}")
i += 1
print("\n---\n")
# Process each domain separately to identify its intents and slots
all_results = {}
for domain in domains:
all_results[domain] = {"intents": [], "slots": [], "slots_by_intent": {}}
for split in manifest_splits:
print(f"Processing '{split}'...")
df = pd.read_csv(os.path.join(manifest_path, split), sep="\t")
for domain in domains:
df_domain = df[df["domain"] == domain]
for row in df_domain["decoupled_normalized_seqlogical"]:
all_results[domain] = parse_decoupled_normalized_seqlogical(row, all_results[domain])
# Print the results
print_results_header()
print_results_summary(all_results)
print_results_details(all_results)
parse_stop_intents_slots.py
Discussion