实现向量数据库

This commit is contained in:
ChenXL97 2023-11-28 17:21:54 +08:00
parent 81a91cef30
commit 0e8976e846
32 changed files with 877 additions and 158 deletions

View File

@ -0,0 +1,6 @@
# 安装环境
```python
pip3 install torch torchvision torchaudio
conda install -c conda-forge faiss
pip install src
```

View File

@ -0,0 +1,41 @@
{"id": 0, "title": "你好", "text": "{'answer': '您好,我是这家咖啡厅的服务员,请问您要点什么?', 'function': None}"}
{"id": 1, "title": "感谢,这些够啦,你去忙吧。", "text": "{'answer': 'ok, 有需要您再找我。', 'function': None}"}
{"id": 2, "title": "做一杯咖啡", "text": "{'answer': '好的,我马上做咖啡', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Coffee,CoffeeTable)\"}'}"}
{"id": 3, "title": "不用了。", "text": "{'answer': '好的,您有需要再跟我说', 'function': 'stop_serve', 'args': '{}'}"}
{"id": 4, "title": "来一号桌", "text": "{'answer': '好的,我马上来一号桌', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,Table1)\"}'}"}
{"id": 5, "title": "早上好呀,我想找个能晒太阳的地方。", "text": "{'answer': '您右手边就有能晒太阳的位置呢。', 'function': None}"}
{"id": 6, "title": "可以带我过去嘛?", "text": "{'answer': '当然,请跟我来!', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable1)\"}'}"}
{"id": 7, "title": "嘿RoboWaiter过来一下", "text": "{'answer': '我在来的路上啦,请稍等噢!', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable6)\"}'}"}
{"id": 8, "title": "你们这有什么饮料嘛?", "text": "{'answer': '我们咖啡厅提供各种口味咖啡,水,冰红茶,酸奶等饮品,还提供点心蛋糕等甜品。您想点什么?', 'function': None}"}
{"id": 9, "title": "来杯卡布奇诺吧。", "text": "{'answer': 'ok请稍等', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Coffee,BrightTable6)\"}'}"}
{"id": 10, "title": "我昨天保温杯好像落在你们咖啡厅了,你看到了吗?", "text": "{'answer': '是的我有印象保温杯在大厅的2号桌子上。', 'function': 'get_object_info', 'args': '{\"obj\":\"保温杯\"}'}"}
{"id": 11, "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}"}
{"id": 12, "title": "打包吧,快点!", "text": "{'answer': '请稍等,马上就好!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Dessert,Bar),On(Coffee,Bar)\"}'}"}
{"id": 13, "title": "你可以帮我拿来吗,我在前门的桌子前等你。", "text": "{'answer': '当然,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(VacuumCup,Table1)\"}'}"}
{"id": 14, "title": "麻烦啦!", "text": "{'answer': '没事儿,为您服务是我的荣幸!', 'function': None}"}
{"id": 15, "title": "大厅的桌子好啊,快带我去呀!", "text": "{'answer': '好的好的,请跟我来。', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,WaterTable)\"}'}"}
{"id": 16, "title": "我想来杯水,帮我孩子拿个酸奶吧。", "text": "{'answer': 'ok没问题请稍等', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Water,WaterTable),On(Yogurt,WaterTable)\"}'}"}
{"id": 17, "title": "来杯酸奶吧。", "text": "{'answer': '好的没问题,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,WaterTable)\"}'}"}
{"id": 18, "title": "好热呀!太阳也好大!", "text": "{'answer': '抱歉,我这就去开空调和关窗帘。', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(AC,On),Is(Curtain,Off)\"}'}"}
{"id": 19, "title": "下班啦!别忘了打扫卫生。", "text": "{'answer': '收到!下班啦!', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(Floor,Clean),Is(Table1,Clean),Is(Chairs,Clean),Is(AC,Off),Is(HallLight,Off),Is(TubeLight,Off),Is(Curtain,Off)\"}'}"}
{"id": 20, "title": "请问洗手间在哪里?", "text": "{'answer': '洗手间在这附近', 'function': 'get_object_info', 'args': '{\"obj\":\"洗手间\"}'}"}
{"id": 21, "title": "给我来杯酸奶和冰红茶,我坐在对面的桌子那儿。", "text": "{'answer': '好的,请稍等。', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Chips,WaterTable),On(NFCJuice,WaterTable)\"}'}"}
{"id": 22, "title": "给我来份薯片和果汁,我坐在对面的桌子那儿。", "text": "{'answer': '好的,请稍等。', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(BottledDrink,WaterTable),On(Yogurt,WaterTable)\"}'}"}
{"id": 23, "title": "好热呀,想开空调,想要温度调低点!", "text": "{'answer': '抱歉,我马上去开空调并调低空调温度。', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(ACTemperature,Down)\"}'}"}
{"id": 24, "title": "请问哪里有空位啊?", "text": "{'answer': '现在有不少空位呢,请问您有什么要求嘛?', 'function': None}"}
{"id": 25, "title": "我想坐高脚凳子。", "text": "{'answer': '没问题,大厅里就有这样的位置呢!', 'function': None}"}
{"id": 26, "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}"}
{"id": 27, "title": "可以关筒灯和关窗帘吗?", "text": "{'answer': '好的,请稍等。', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(TubeLight,Off),Is(Curtain,Off)\"}'}"}
{"id": 28, "title": "给我来杯咖啡,哦对,再倒一杯水。", "text": "{'answer': '速速就好,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Water,WaterTable),On(Coffee,WaterTable)\"}'}"}
{"id": 29, "title": "我想来份点心和酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar),On(Dessert,Bar)\"}'}"}
{"id": 30, "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}"}
{"id": 31, "title": "你好呀,你们这有啥好吃的?", "text": "{'answer': '我们咖啡厅有咖啡,还有饮料和点心蛋糕呢?您想点什么不?', 'function': None}"}
{"id": 32, "title": "听起来都好甜呀,我女朋友爱吃水果。", "text": "{'answer': '没问题,我们也提供草莓、橘子和苹果等水果呢。', 'function': None}"}
{"id": 33, "title": "你们这人可真多。", "text": "{'answer': '目前还有空位呢,你们可以坐下来一起品尝水果呢?', 'function': 'get_object_info', 'args': '{\"obj\":\"空位\"}'}"}
{"id": 34, "title": "我女朋友怕晒,有空余的阴凉位置嘛?", "text": "{'answer': '我查询完毕,阴凉位置有两个。在你们右侧往里走,就有适合你们二位的沙发呢。', 'function': 'get_object_info', 'args': '{\"obj\":\"阴凉空位\"}'}"}
{"id": 35, "title": "那还不错。", "text": "{'answer': '是的,那边安静,不会晒到太阳呢。', 'function': None}"}
{"id": 36, "title": "我们还想一起下下棋,切磋切磋。", "text": "{'answer': '我们这有两份军棋呢,在大厅的三号桌上。', 'function': 'get_object_info', 'args': '{\"obj\":\"棋\"}'}"}
{"id": 37, "title": "请问洗手间在哪呢?", "text": "{'answer': '在后门,你们的左侧往里走就是了呢。', 'function': 'get_object_info', 'args': '{\"obj\":\"洗手间\"}'}"}
{"id": 38, "title": "我带着孩子呢,想要宽敞亮堂的地方。", "text": "{'answer': '好的,我明白了,那么我们推荐您到大厅的桌子,那里的空间比较宽敞,环境也比较明亮,适合带着孩子一起用餐。', 'function': None}"}
{"id": 39, "title": "冰红茶", "text": "{'answer': '好的', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Softdrink,Bar)\"}'}"}
{"id": 40, "title": "水杯", "text": "{'answer': '好的', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Glass,Bar)\"}'}"}

View File

@ -0,0 +1,41 @@
{"id": 0, "question": "你好"}
{"id": 1, "question": "感谢,这些够啦,你去忙吧。"}
{"id": 2, "question": "做一杯咖啡"}
{"id": 3, "question": "不用了。"}
{"id": 4, "question": "来一号桌"}
{"id": 5, "question": "早上好呀,我想找个能晒太阳的地方。"}
{"id": 6, "question": "可以带我过去嘛?"}
{"id": 7, "question": "嘿RoboWaiter过来一下"}
{"id": 8, "question": "你们这有什么饮料嘛?"}
{"id": 9, "question": "来杯卡布奇诺吧。"}
{"id": 10, "question": "我昨天保温杯好像落在你们咖啡厅了,你看到了吗?"}
{"id": 11, "question": "给我来份午餐套餐。"}
{"id": 12, "question": "打包吧,快点!"}
{"id": 13, "question": "你可以帮我拿来吗,我在前门的桌子前等你。"}
{"id": 14, "question": "麻烦啦!"}
{"id": 15, "question": "大厅的桌子好啊,快带我去呀!"}
{"id": 16, "question": "我想来杯水,帮我孩子拿个酸奶吧。"}
{"id": 17, "question": "来杯酸奶吧。"}
{"id": 18, "question": "好热呀!太阳也好大!"}
{"id": 19, "question": "下班啦!别忘了打扫卫生。"}
{"id": 20, "question": "请问洗手间在哪里?"}
{"id": 21, "question": "给我来杯酸奶和冰红茶,我坐在对面的桌子那儿。"}
{"id": 22, "question": "给我来份薯片和果汁,我坐在对面的桌子那儿。"}
{"id": 23, "question": "好热呀,想开空调,想要温度调低点!"}
{"id": 24, "question": "请问哪里有空位啊?"}
{"id": 25, "question": "我想坐高脚凳子。"}
{"id": 26, "question": "你带我去吧。"}
{"id": 27, "question": "可以关筒灯和关窗帘吗?"}
{"id": 28, "question": "给我来杯咖啡,哦对,再倒一杯水。"}
{"id": 29, "question": "我想来份点心和酸奶。"}
{"id": 30, "question": "酸奶。"}
{"id": 31, "question": "你好呀,你们这有啥好吃的?"}
{"id": 32, "question": "听起来都好甜呀,我女朋友爱吃水果。"}
{"id": 33, "question": "你们这人可真多。"}
{"id": 34, "question": "我女朋友怕晒,有空余的阴凉位置嘛?"}
{"id": 35, "question": "那还不错。"}
{"id": 36, "question": "我们还想一起下下棋,切磋切磋。"}
{"id": 37, "question": "请问洗手间在哪呢?"}
{"id": 38, "question": "我带着孩子呢,想要宽敞亮堂的地方。"}
{"id": 39, "question": "冰红茶"}
{"id": 40, "question": "水杯"}

View File

@ -0,0 +1,59 @@
import time
import openai
from colorama import init, Fore
from loguru import logger
from robowaiter.llm_client.tool_register import get_tools, dispatch_tool
import requests
import json
from collections import deque
import re
import jsonlines
def question2jsonl(file_path, output_path):
with open(file_path, 'r', encoding="utf-8") as f:
lines = f.read().strip()
sections = re.split(r'\n\s*\n', lines)
with jsonlines.open(output_path, "w") as file_jsonl:
k = 0
for s in sections:
x = s.strip().splitlines()
if len(x) == 2:
answer_dict = {
"answer": x[1],
"function": None
}
else:
answer_dict = {
"answer": x[1],
"function": x[2],
"args": x[3]
}
item_dict = {"id": k, 'title': x[0], 'text': str(answer_dict)}
file_jsonl.write(item_dict)
k += 1
def question2jsonl_test(file_path, output_path):
with open(file_path, 'r', encoding="utf-8") as f:
lines = f.read().strip()
sections = re.split(r'\n\s*\n', lines)
with jsonlines.open(output_path, "w") as file_jsonl:
k = 0
for s in sections:
x = s.strip().splitlines()
item_dict = {"id": k, 'question': x[0]}
file_jsonl.write(item_dict)
k += 1
if __name__ == '__main__':
file_path = "fix_questions.txt"
output_path = "fix_questions.jsonl"
question2jsonl(file_path,output_path)

View File

@ -0,0 +1,41 @@
{"id": 0, "question": "你好", "ctxs": [{"id": "0", "title": "你好", "text": "{'answer': '您好,我是这家咖啡厅的服务员,请问您要点什么?', 'function': None}", "score": "2.1162708"}, {"id": "39", "title": "冰红茶", "text": "{'answer': '好的', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Softdrink,Bar)\"}'}", "score": "1.9018158"}, {"id": "14", "title": "麻烦啦!", "text": "{'answer': '没事儿,为您服务是我的荣幸!', 'function': None}", "score": "1.7684793"}]}
{"id": 1, "question": "感谢,这些够啦,你去忙吧。", "ctxs": [{"id": "1", "title": "感谢,这些够啦,你去忙吧。", "text": "{'answer': 'ok, 有需要您再找我。', 'function': None}", "score": "1.9322114"}, {"id": "17", "title": "来杯酸奶吧。", "text": "{'answer': '好的没问题,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,WaterTable)\"}'}", "score": "1.913708"}, {"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "1.886067"}]}
{"id": 2, "question": "做一杯咖啡", "ctxs": [{"id": "4", "title": "来一号桌", "text": "{'answer': '好的,我马上来一号桌', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,Table1)\"}'}", "score": "1.8168783"}, {"id": "2", "title": "做一杯咖啡", "text": "{'answer': '好的,我马上做咖啡', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Coffee,CoffeeTable)\"}'}", "score": "1.78474"}, {"id": "0", "title": "你好", "text": "{'answer': '您好,我是这家咖啡厅的服务员,请问您要点什么?', 'function': None}", "score": "1.4285033"}]}
{"id": 3, "question": "不用了。", "ctxs": [{"id": "3", "title": "不用了。", "text": "{'answer': '好的,您有需要再跟我说', 'function': 'stop_serve', 'args': '{}'}", "score": "2.3739111"}, {"id": "35", "title": "那还不错。", "text": "{'answer': '是的,那边安静,不会晒到太阳呢。', 'function': None}", "score": "2.3256633"}, {"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "2.1746387"}]}
{"id": 4, "question": "来一号桌", "ctxs": [{"id": "4", "title": "来一号桌", "text": "{'answer': '好的,我马上来一号桌', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,Table1)\"}'}", "score": "1.879482"}, {"id": "2", "title": "做一杯咖啡", "text": "{'answer': '好的,我马上做咖啡', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Coffee,CoffeeTable)\"}'}", "score": "1.8168783"}, {"id": "0", "title": "你好", "text": "{'answer': '您好,我是这家咖啡厅的服务员,请问您要点什么?', 'function': None}", "score": "1.5156002"}]}
{"id": 5, "question": "早上好呀,我想找个能晒太阳的地方。", "ctxs": [{"id": "5", "title": "早上好呀,我想找个能晒太阳的地方。", "text": "{'answer': '您右手边就有能晒太阳的位置呢。', 'function': None}", "score": "2.0365968"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "1.93718"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "1.9365059"}]}
{"id": 6, "question": "可以带我过去嘛?", "ctxs": [{"id": "6", "title": "可以带我过去嘛?", "text": "{'answer': '当然,请跟我来!', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable1)\"}'}", "score": "1.8335087"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "1.810061"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "1.8016064"}]}
{"id": 7, "question": "嘿RoboWaiter过来一下", "ctxs": [{"id": "7", "title": "嘿RoboWaiter过来一下", "text": "{'answer': '我在来的路上啦,请稍等噢!', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable6)\"}'}", "score": "2.259178"}, {"id": "12", "title": "打包吧,快点!", "text": "{'answer': '请稍等,马上就好!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Dessert,Bar),On(Coffee,Bar)\"}'}", "score": "1.4411571"}, {"id": "35", "title": "那还不错。", "text": "{'answer': '是的,那边安静,不会晒到太阳呢。', 'function': None}", "score": "1.3609219"}]}
{"id": 8, "question": "你们这有什么饮料嘛?", "ctxs": [{"id": "8", "title": "你们这有什么饮料嘛?", "text": "{'answer': '我们咖啡厅提供各种口味咖啡,水,冰红茶,酸奶等饮品,还提供点心蛋糕等甜品。您想点什么?', 'function': None}", "score": "1.629295"}, {"id": "24", "title": "请问哪里有空位啊?", "text": "{'answer': '现在有不少空位呢,请问您有什么要求嘛?', 'function': None}", "score": "1.4364586"}, {"id": "6", "title": "可以带我过去嘛?", "text": "{'answer': '当然,请跟我来!', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable1)\"}'}", "score": "1.4195945"}]}
{"id": 9, "question": "来杯卡布奇诺吧。", "ctxs": [{"id": "9", "title": "来杯卡布奇诺吧。", "text": "{'answer': 'ok请稍等', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Coffee,BrightTable6)\"}'}", "score": "2.2569346"}, {"id": "17", "title": "来杯酸奶吧。", "text": "{'answer': '好的没问题,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,WaterTable)\"}'}", "score": "2.2550735"}, {"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "2.114133"}]}
{"id": 10, "question": "我昨天保温杯好像落在你们咖啡厅了,你看到了吗?", "ctxs": [{"id": "10", "title": "我昨天保温杯好像落在你们咖啡厅了,你看到了吗?", "text": "{'answer': '是的我有印象保温杯在大厅的2号桌子上。', 'function': 'get_object_info', 'args': '{\"obj\":\"保温杯\"}'}", "score": "1.8863823"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "1.6723425"}, {"id": "29", "title": "我想来份点心和酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar),On(Dessert,Bar)\"}'}", "score": "1.6531497"}]}
{"id": 11, "question": "给我来份午餐套餐。", "ctxs": [{"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.4523244"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "2.4017403"}, {"id": "29", "title": "我想来份点心和酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar),On(Dessert,Bar)\"}'}", "score": "2.294158"}]}
{"id": 12, "question": "打包吧,快点!", "ctxs": [{"id": "12", "title": "打包吧,快点!", "text": "{'answer': '请稍等,马上就好!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Dessert,Bar),On(Coffee,Bar)\"}'}", "score": "1.9456723"}, {"id": "14", "title": "麻烦啦!", "text": "{'answer': '没事儿,为您服务是我的荣幸!', 'function': None}", "score": "1.7008219"}, {"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "1.6680572"}]}
{"id": 13, "question": "你可以帮我拿来吗,我在前门的桌子前等你。", "ctxs": [{"id": "13", "title": "你可以帮我拿来吗,我在前门的桌子前等你。", "text": "{'answer': '当然,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(VacuumCup,Table1)\"}'}", "score": "2.107453"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.027741"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "2.0193963"}]}
{"id": 14, "question": "麻烦啦!", "ctxs": [{"id": "14", "title": "麻烦啦!", "text": "{'answer': '没事儿,为您服务是我的荣幸!', 'function': None}", "score": "1.943545"}, {"id": "0", "title": "你好", "text": "{'answer': '您好,我是这家咖啡厅的服务员,请问您要点什么?', 'function': None}", "score": "1.7684793"}, {"id": "12", "title": "打包吧,快点!", "text": "{'answer': '请稍等,马上就好!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Dessert,Bar),On(Coffee,Bar)\"}'}", "score": "1.7008219"}]}
{"id": 15, "question": "大厅的桌子好啊,快带我去呀!", "ctxs": [{"id": "15", "title": "大厅的桌子好啊,快带我去呀!", "text": "{'answer': '好的好的,请跟我来。', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,WaterTable)\"}'}", "score": "2.0067687"}, {"id": "22", "title": "给我来份薯片和果汁,我坐在对面的桌子那儿。", "text": "{'answer': '好的,请稍等。', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(BottledDrink,WaterTable),On(Yogurt,WaterTable)\"}'}", "score": "1.7625781"}, {"id": "21", "title": "给我来杯酸奶和冰红茶,我坐在对面的桌子那儿。", "text": "{'answer': '好的,请稍等。', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Chips,WaterTable),On(NFCJuice,WaterTable)\"}'}", "score": "1.7411622"}]}
{"id": 16, "question": "我想来杯水,帮我孩子拿个酸奶吧。", "ctxs": [{"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.0797672"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "2.066708"}, {"id": "16", "title": "我想来杯水,帮我孩子拿个酸奶吧。", "text": "{'answer': 'ok没问题请稍等', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Water,WaterTable),On(Yogurt,WaterTable)\"}'}", "score": "2.0554054"}]}
{"id": 17, "question": "来杯酸奶吧。", "ctxs": [{"id": "17", "title": "来杯酸奶吧。", "text": "{'answer': '好的没问题,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,WaterTable)\"}'}", "score": "2.3097181"}, {"id": "9", "title": "来杯卡布奇诺吧。", "text": "{'answer': 'ok请稍等', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Coffee,BrightTable6)\"}'}", "score": "2.2550735"}, {"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "2.2482162"}]}
{"id": 18, "question": "好热呀!太阳也好大!", "ctxs": [{"id": "18", "title": "好热呀!太阳也好大!", "text": "{'answer': '抱歉,我这就去开空调和关窗帘。', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(AC,On),Is(Curtain,Off)\"}'}", "score": "2.1915846"}, {"id": "14", "title": "麻烦啦!", "text": "{'answer': '没事儿,为您服务是我的荣幸!', 'function': None}", "score": "1.6174916"}, {"id": "12", "title": "打包吧,快点!", "text": "{'answer': '请稍等,马上就好!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Dessert,Bar),On(Coffee,Bar)\"}'}", "score": "1.4927042"}]}
{"id": 19, "question": "下班啦!别忘了打扫卫生。", "ctxs": [{"id": "19", "title": "下班啦!别忘了打扫卫生。", "text": "{'answer': '收到!下班啦!', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(Floor,Clean),Is(Table1,Clean),Is(Chairs,Clean),Is(AC,Off),Is(HallLight,Off),Is(TubeLight,Off),Is(Curtain,Off)\"}'}", "score": "2.1349034"}, {"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "2.0114217"}, {"id": "17", "title": "来杯酸奶吧。", "text": "{'answer': '好的没问题,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,WaterTable)\"}'}", "score": "1.9978157"}]}
{"id": 20, "question": "请问洗手间在哪里?", "ctxs": [{"id": "20", "title": "请问洗手间在哪里?", "text": "{'answer': '洗手间在这附近', 'function': 'get_object_info', 'args': '{\"obj\":\"洗手间\"}'}", "score": "2.035435"}, {"id": "24", "title": "请问哪里有空位啊?", "text": "{'answer': '现在有不少空位呢,请问您有什么要求嘛?', 'function': None}", "score": "1.8009094"}, {"id": "37", "title": "请问洗手间在哪呢?", "text": "{'answer': '在后门,你们的左侧往里走就是了呢。', 'function': 'get_object_info', 'args': '{\"obj\":\"洗手间\"}'}", "score": "1.6338948"}]}
{"id": 21, "question": "给我来杯酸奶和冰红茶,我坐在对面的桌子那儿。", "ctxs": [{"id": "29", "title": "我想来份点心和酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar),On(Dessert,Bar)\"}'}", "score": "2.0604768"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.0411088"}, {"id": "22", "title": "给我来份薯片和果汁,我坐在对面的桌子那儿。", "text": "{'answer': '好的,请稍等。', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(BottledDrink,WaterTable),On(Yogurt,WaterTable)\"}'}", "score": "2.0308568"}]}
{"id": 22, "question": "给我来份薯片和果汁,我坐在对面的桌子那儿。", "ctxs": [{"id": "29", "title": "我想来份点心和酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar),On(Dessert,Bar)\"}'}", "score": "2.060318"}, {"id": "22", "title": "给我来份薯片和果汁,我坐在对面的桌子那儿。", "text": "{'answer': '好的,请稍等。', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(BottledDrink,WaterTable),On(Yogurt,WaterTable)\"}'}", "score": "2.0375805"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.033156"}]}
{"id": 23, "question": "好热呀,想开空调,想要温度调低点!", "ctxs": [{"id": "23", "title": "好热呀,想开空调,想要温度调低点!", "text": "{'answer': '抱歉,我马上去开空调并调低空调温度。', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(ACTemperature,Down)\"}'}", "score": "1.7416465"}, {"id": "12", "title": "打包吧,快点!", "text": "{'answer': '请稍等,马上就好!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Dessert,Bar),On(Coffee,Bar)\"}'}", "score": "1.6216934"}, {"id": "15", "title": "大厅的桌子好啊,快带我去呀!", "text": "{'answer': '好的好的,请跟我来。', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,WaterTable)\"}'}", "score": "1.4971969"}]}
{"id": 24, "question": "请问哪里有空位啊?", "ctxs": [{"id": "24", "title": "请问哪里有空位啊?", "text": "{'answer': '现在有不少空位呢,请问您有什么要求嘛?', 'function': None}", "score": "2.1733356"}, {"id": "20", "title": "请问洗手间在哪里?", "text": "{'answer': '洗手间在这附近', 'function': 'get_object_info', 'args': '{\"obj\":\"洗手间\"}'}", "score": "1.8009094"}, {"id": "23", "title": "好热呀,想开空调,想要温度调低点!", "text": "{'answer': '抱歉,我马上去开空调并调低空调温度。', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(ACTemperature,Down)\"}'}", "score": "1.4684138"}]}
{"id": 25, "question": "我想坐高脚凳子。", "ctxs": [{"id": "25", "title": "我想坐高脚凳子。", "text": "{'answer': '没问题,大厅里就有这样的位置呢!', 'function': None}", "score": "2.4146953"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "2.239801"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.2386413"}]}
{"id": 26, "question": "你带我去吧。", "ctxs": [{"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "2.4195614"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.4017403"}, {"id": "29", "title": "我想来份点心和酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar),On(Dessert,Bar)\"}'}", "score": "2.2611532"}]}
{"id": 27, "question": "可以关筒灯和关窗帘吗?", "ctxs": [{"id": "27", "title": "可以关筒灯和关窗帘吗?", "text": "{'answer': '好的,请稍等。', 'function': 'create_sub_task', 'args': '{\"goal\":\"Is(TubeLight,Off),Is(Curtain,Off)\"}'}", "score": "1.7811872"}, {"id": "29", "title": "我想来份点心和酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar),On(Dessert,Bar)\"}'}", "score": "1.4139738"}, {"id": "8", "title": "你们这有什么饮料嘛?", "text": "{'answer': '我们咖啡厅提供各种口味咖啡,水,冰红茶,酸奶等饮品,还提供点心蛋糕等甜品。您想点什么?', 'function': None}", "score": "1.3538787"}]}
{"id": 28, "question": "给我来杯咖啡,哦对,再倒一杯水。", "ctxs": [{"id": "28", "title": "给我来杯咖啡,哦对,再倒一杯水。", "text": "{'answer': '速速就好,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Water,WaterTable),On(Coffee,WaterTable)\"}'}", "score": "2.0709205"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.0688043"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "2.0623634"}]}
{"id": 29, "question": "我想来份点心和酸奶。", "ctxs": [{"id": "29", "title": "我想来份点心和酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar),On(Dessert,Bar)\"}'}", "score": "2.432145"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.294158"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "2.2611532"}]}
{"id": 30, "question": "酸奶。", "ctxs": [{"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "2.4139419"}, {"id": "17", "title": "来杯酸奶吧。", "text": "{'answer': '好的没问题,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,WaterTable)\"}'}", "score": "2.2482162"}, {"id": "35", "title": "那还不错。", "text": "{'answer': '是的,那边安静,不会晒到太阳呢。', 'function': None}", "score": "2.2257895"}]}
{"id": 31, "question": "你好呀,你们这有啥好吃的?", "ctxs": [{"id": "31", "title": "你好呀,你们这有啥好吃的?", "text": "{'answer': '我们咖啡厅有咖啡,还有饮料和点心蛋糕呢?您想点什么不?', 'function': None}", "score": "1.7358209"}, {"id": "15", "title": "大厅的桌子好啊,快带我去呀!", "text": "{'answer': '好的好的,请跟我来。', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,WaterTable)\"}'}", "score": "1.6412368"}, {"id": "12", "title": "打包吧,快点!", "text": "{'answer': '请稍等,马上就好!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Dessert,Bar),On(Coffee,Bar)\"}'}", "score": "1.5643499"}]}
{"id": 32, "question": "听起来都好甜呀,我女朋友爱吃水果。", "ctxs": [{"id": "32", "title": "听起来都好甜呀,我女朋友爱吃水果。", "text": "{'answer': '没问题,我们也提供草莓、橘子和苹果等水果呢。', 'function': None}", "score": "2.3256402"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "1.9134985"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "1.9034417"}]}
{"id": 33, "question": "你们这人可真多。", "ctxs": [{"id": "33", "title": "你们这人可真多。", "text": "{'answer': '目前还有空位呢,你们可以坐下来一起品尝水果呢?', 'function': 'get_object_info', 'args': '{\"obj\":\"空位\"}'}", "score": "2.453949"}, {"id": "17", "title": "来杯酸奶吧。", "text": "{'answer': '好的没问题,请稍等!', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,WaterTable)\"}'}", "score": "2.0001283"}, {"id": "9", "title": "来杯卡布奇诺吧。", "text": "{'answer': 'ok请稍等', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Coffee,BrightTable6)\"}'}", "score": "1.9580796"}]}
{"id": 34, "question": "我女朋友怕晒,有空余的阴凉位置嘛?", "ctxs": [{"id": "34", "title": "我女朋友怕晒,有空余的阴凉位置嘛?", "text": "{'answer': '我查询完毕,阴凉位置有两个。在你们右侧往里走,就有适合你们二位的沙发呢。', 'function': 'get_object_info', 'args': '{\"obj\":\"阴凉空位\"}'}", "score": "1.9651799"}, {"id": "32", "title": "听起来都好甜呀,我女朋友爱吃水果。", "text": "{'answer': '没问题,我们也提供草莓、橘子和苹果等水果呢。', 'function': None}", "score": "1.7868259"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "1.6948454"}]}
{"id": 35, "question": "那还不错。", "ctxs": [{"id": "35", "title": "那还不错。", "text": "{'answer': '是的,那边安静,不会晒到太阳呢。', 'function': None}", "score": "2.355879"}, {"id": "3", "title": "不用了。", "text": "{'answer': '好的,您有需要再跟我说', 'function': 'stop_serve', 'args': '{}'}", "score": "2.3256633"}, {"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "2.2257895"}]}
{"id": 36, "question": "我们还想一起下下棋,切磋切磋。", "ctxs": [{"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "2.2116985"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "2.184878"}, {"id": "36", "title": "我们还想一起下下棋,切磋切磋。", "text": "{'answer': '我们这有两份军棋呢,在大厅的三号桌上。', 'function': 'get_object_info', 'args': '{\"obj\":\"棋\"}'}", "score": "2.1775815"}]}
{"id": 37, "question": "请问洗手间在哪呢?", "ctxs": [{"id": "37", "title": "请问洗手间在哪呢?", "text": "{'answer': '在后门,你们的左侧往里走就是了呢。', 'function': 'get_object_info', 'args': '{\"obj\":\"洗手间\"}'}", "score": "1.6928627"}, {"id": "20", "title": "请问洗手间在哪里?", "text": "{'answer': '洗手间在这附近', 'function': 'get_object_info', 'args': '{\"obj\":\"洗手间\"}'}", "score": "1.6338948"}, {"id": "14", "title": "麻烦啦!", "text": "{'answer': '没事儿,为您服务是我的荣幸!', 'function': None}", "score": "1.413432"}]}
{"id": 38, "question": "我带着孩子呢,想要宽敞亮堂的地方。", "ctxs": [{"id": "38", "title": "我带着孩子呢,想要宽敞亮堂的地方。", "text": "{'answer': '好的,我明白了,那么我们推荐您到大厅的桌子,那里的空间比较宽敞,环境也比较明亮,适合带着孩子一起用餐。', 'function': None}", "score": "2.1044755"}, {"id": "11", "title": "给我来份午餐套餐。", "text": "{'answer': '本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?', 'function': None}", "score": "1.9571061"}, {"id": "26", "title": "你带我去吧。", "text": "{'answer': 'OK请跟我来', 'function': 'create_sub_task', 'args': '{\"goal\":\"At(Robot,BrightTable5)\"}'}", "score": "1.9560921"}]}
{"id": 39, "question": "冰红茶", "ctxs": [{"id": "0", "title": "你好", "text": "{'answer': '您好,我是这家咖啡厅的服务员,请问您要点什么?', 'function': None}", "score": "1.9018158"}, {"id": "39", "title": "冰红茶", "text": "{'answer': '好的', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Softdrink,Bar)\"}'}", "score": "1.8260775"}, {"id": "14", "title": "麻烦啦!", "text": "{'answer': '没事儿,为您服务是我的荣幸!', 'function': None}", "score": "1.6870449"}]}
{"id": 40, "question": "水杯", "ctxs": [{"id": "40", "title": "水杯", "text": "{'answer': '好的', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Glass,Bar)\"}'}", "score": "2.177751"}, {"id": "0", "title": "你好", "text": "{'answer': '您好,我是这家咖啡厅的服务员,请问您要点什么?', 'function': None}", "score": "1.4147856"}, {"id": "30", "title": "酸奶。", "text": "{'answer': '稍等片刻噢~', 'function': 'create_sub_task', 'args': '{\"goal\":\"On(Yogurt,Bar)\"}'}", "score": "1.3056059"}]}

View File

@ -0,0 +1,274 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import os
import argparse
import csv
import json
import logging
import pickle
import time
import glob
from pathlib import Path
import numpy as np
import torch
import transformers
import robowaiter.algos.retrieval.retrieval_lm.src.index
import robowaiter.algos.retrieval.retrieval_lm.src.contriever
import robowaiter.algos.retrieval.retrieval_lm.src.utils
import robowaiter.algos.retrieval.retrieval_lm.src.slurm
import robowaiter.algos.retrieval.retrieval_lm.src.data
from robowaiter.algos.retrieval.retrieval_lm.src.evaluation import calculate_matches
import robowaiter.algos.retrieval.retrieval_lm.src.normalize_text
from robowaiter.algos.retrieval.retrieval_lm import src
os.environ["TOKENIZERS_PARALLELISM"] = "true"
def embed_queries(args, queries, model, tokenizer):
model.eval()
embeddings, batch_question = [], []
with torch.no_grad():
for k, q in enumerate(queries):
if args.lowercase:
q = q.lower()
if args.normalize_text:
q = src.normalize_text.normalize(q)
batch_question.append(q)
if len(batch_question) == args.per_gpu_batch_size or k == len(queries) - 1:
encoded_batch = tokenizer.batch_encode_plus(
batch_question,
return_tensors="pt",
max_length=args.question_maxlength,
padding=True,
truncation=True,
)
encoded_batch = {k: v for k, v in encoded_batch.items()}
output = model(**encoded_batch)
embeddings.append(output.cpu())
batch_question = []
embeddings = torch.cat(embeddings, dim=0)
print(f"Questions embeddings shape: {embeddings.size()}")
return embeddings.numpy()
def index_encoded_data(index, embedding_files, indexing_batch_size):
allids = []
allembeddings = np.array([])
for i, file_path in enumerate(embedding_files):
print(f"Loading file {file_path}")
with open(file_path, "rb") as fin:
ids, embeddings = pickle.load(fin)
allembeddings = np.vstack((allembeddings, embeddings)) if allembeddings.size else embeddings
allids.extend(ids)
while allembeddings.shape[0] > indexing_batch_size:
allembeddings, allids = add_embeddings(index, allembeddings, allids, indexing_batch_size)
while allembeddings.shape[0] > 0:
allembeddings, allids = add_embeddings(index, allembeddings, allids, indexing_batch_size)
print("Data indexing completed.")
def add_embeddings(index, embeddings, ids, indexing_batch_size):
end_idx = min(indexing_batch_size, embeddings.shape[0])
ids_toadd = ids[:end_idx]
embeddings_toadd = embeddings[:end_idx]
ids = ids[end_idx:]
embeddings = embeddings[end_idx:]
index.index_data(ids_toadd, embeddings_toadd)
return embeddings, ids
def validate(data, workers_num):
match_stats = calculate_matches(data, workers_num)
top_k_hits = match_stats.top_k_hits
print("Validation results: top k documents hits %s", top_k_hits)
top_k_hits = [v / len(data) for v in top_k_hits]
message = ""
for k in [5, 10, 20, 100]:
if k <= len(top_k_hits):
message += f"R@{k}: {top_k_hits[k-1]} "
print(message)
return match_stats.questions_doc_hits
def add_passages(data, passages, top_passages_and_scores):
# add passages to original data
merged_data = []
assert len(data) == len(top_passages_and_scores)
for i, d in enumerate(data):
results_and_scores = top_passages_and_scores[i]
docs = [passages[int(doc_id)] for doc_id in results_and_scores[0]]
scores = [str(score) for score in results_and_scores[1]]
ctxs_num = len(docs)
d["ctxs"] = [
{
"id": results_and_scores[0][c],
"title": docs[c]["title"],
"text": docs[c]["text"],
"score": scores[c],
}
for c in range(ctxs_num)
]
def add_hasanswer(data, hasanswer):
# add hasanswer to data
for i, ex in enumerate(data):
for k, d in enumerate(ex["ctxs"]):
d["hasanswer"] = hasanswer[i][k]
def load_data(data_path):
if data_path.endswith(".json"):
with open(data_path, "r") as fin:
data = json.load(fin)
elif data_path.endswith(".jsonl"):
data = []
with open(data_path, "r",encoding='utf-8') as fin:
for k, example in enumerate(fin):
example = json.loads(example)
data.append(example)
return data
parser = argparse.ArgumentParser()
parser.add_argument(
"--data",
type=str,
default=None,
help=".json file containing question and answers, similar format to reader data",
)
parser.add_argument("--passages", type=str, default=None, help="Path to passages (.tsv file)")
parser.add_argument("--passages_embeddings", type=str, default=None, help="Glob path to encoded passages")
parser.add_argument(
"--output_dir", type=str, default=None, help="Results are written to outputdir with data suffix"
)
parser.add_argument("--n_docs", type=int, default=3, help="Number of documents to retrieve per questions")
parser.add_argument(
"--validation_workers", type=int, default=32, help="Number of parallel processes to validate results"
)
parser.add_argument("--per_gpu_batch_size", type=int, default=64, help="Batch size for question encoding")
parser.add_argument(
"--save_or_load_index", action="store_true", help="If enabled, save index and load index if it exists"
)
parser.add_argument(
"--model_name_or_path", type=str, help="path to directory containing model weights and config file"
)
parser.add_argument("--no_fp16", action="store_true", help="inference in fp32")
parser.add_argument("--question_maxlength", type=int, default=512, help="Maximum number of tokens in a question")
parser.add_argument(
"--indexing_batch_size", type=int, default=1000000, help="Batch size of the number of passages indexed"
)
parser.add_argument("--projection_size", type=int, default=768)
parser.add_argument(
"--n_subquantizers",
type=int,
default=0,
help="Number of subquantizer used for vector quantization, if 0 flat index is used",
)
parser.add_argument("--n_bits", type=int, default=8, help="Number of bits per subquantizer")
parser.add_argument("--lang", nargs="+")
parser.add_argument("--dataset", type=str, default="none")
parser.add_argument("--lowercase", action="store_true", help="lowercase text before encoding")
parser.add_argument("--normalize_text", action="store_true", help="normalize text")
args = parser.parse_args()
#src.slurm.init_distributed_mode(args)
from robowaiter.utils.basic import get_root_path
retrieval_lm_path = f'{get_root_path()}/robowaiter/algos/retrieval/retrieval_lm'
args.model_name_or_path = f"{retrieval_lm_path}/../contriever-msmarco"
args.passages_embeddings = f'{retrieval_lm_path}/robot_embeddings/*'
args.no_fp16 = True
args.passages = f"{retrieval_lm_path}/fix_questions.jsonl"
class Retrieval():
def __init__(self, threshold):
print(f"Loading model from: {args.model_name_or_path}")
model, tokenizer, _ = src.contriever.load_retriever(args.model_name_or_path)
model.eval()
#model = model.cuda()
if not args.no_fp16:
model = model.half()
self.model = model
self.tokenizer = tokenizer
self.threshold = threshold
index = src.index.Indexer(args.projection_size, args.n_subquantizers, args.n_bits)
self.index = index
# index all passages
input_paths = glob.glob(args.passages_embeddings)
input_paths = sorted(input_paths)
embeddings_dir = os.path.dirname(input_paths[0])
index_path = os.path.join(embeddings_dir, "index.faiss")
if args.save_or_load_index and os.path.exists(index_path):
index.deserialize_from(embeddings_dir)
else:
print(f"Indexing passages from files {input_paths}")
start_time_indexing = time.time()
index_encoded_data(index, input_paths, args.indexing_batch_size)
print(f"Indexing time: {time.time()-start_time_indexing:.1f} s.")
if args.save_or_load_index:
index.serialize(embeddings_dir)
# load passages
passages = src.data.load_passages(args.passages)
passage_id_map = {x["id"]: x for x in passages}
self.passage_id_map = passage_id_map
def get_result(self, queries):
questions_embedding = embed_queries(args, [queries], self.model, self.tokenizer)
top_ids_and_scores = self.index.search_knn(questions_embedding, args.n_docs)
print(top_ids_and_scores)
data = [{"question":queries}]
add_passages(data, self.passage_id_map, top_ids_and_scores)
if float(data[0]["ctxs"][0]["score"]) >= self.threshold:
result_dict = eval(data[0]["ctxs"][0]["text"])
result_dict["question"] = queries
return result_dict
else:
return None
# data_paths = glob.glob(args.data)
# alldata = []
# for path in data_paths:
# data = load_data(path)
# output_path = os.path.join(args.output_dir, os.path.basename(path))
#
# queries = [ex["question"] for ex in data]
#
# # get top k results
# start_time_retrieval = time.time()
# print(f"Search time: {time.time()-start_time_retrieval:.1f} s.")
#
# add_passages(data, passage_id_map, top_ids_and_scores)
# #hasanswer = validate(data, args.validation_workers)
# #add_hasanswer(data, hasanswer)
# os.makedirs(os.path.dirname(output_path), exist_ok=True)
# with open(output_path, "w",encoding='utf-8') as fout:
# for ex in data:
# json.dump(ex, fout, ensure_ascii=False)
# fout.write("\n")
# print(f"Saved results to {output_path}")
if __name__ == '__main__':
r = Retrieval(1.8)
print(r.get_result("来一号桌"))
print(r.get_result("去二号桌"))
print(r.get_result("去三号桌"))

View File

@ -1,14 +1,24 @@
import os
from robowaiter.algos.retrieval.retrieval_lm.question2jsonl import question2jsonl,question2jsonl_test
path_goal_states_with_description = os.path.join('../../../behavior_tree/dataset/goal_states_with_description.jsonl')
file_path = "fix_questions.txt"
output_path = "fix_questions.jsonl"
output_test_path = "fix_questions_test.jsonl"
cmd_goal_states_with_descrip_to_train = f' .\process_json.py --passages {path_goal_states_with_description} --mode train'
cmd_goal_states_with_descrip_to_test = f" .\process_json.py --passages .\\train_robot.jsonl --mode test"
cmd_get_embedding = f" generate_passage_embeddings.py --model_name_or_path ../contriever-msmarco --passages train_robot.jsonl --output_dir robot_embeddings --shard_id 0 --num_shards 1 --per_gpu_batch_size 500 --no_fp16"
cmd_test_retri = f" passage_retrieval2.py --data test_robot.jsonl --model_name_or_path ../contriever-msmarco --passages train_robot.jsonl --passages_embeddings \"robot_embeddings/*\" --output_dir retr_result --n_docs 3 --no_fp16"
question2jsonl(file_path,output_path)
question2jsonl_test(file_path,output_test_path)
conda_path = os.path.join("C:/Users/Estrella/.conda/envs/py310/python.exe")
os.system(conda_path + cmd_goal_states_with_descrip_to_train)
os.system(conda_path + cmd_goal_states_with_descrip_to_test)
os.system(conda_path + cmd_get_embedding)
os.system(conda_path + cmd_test_retri)
# path_goal_states_with_description = os.path.join('../../../behavior_tree/dataset/goal_states_with_description.jsonl')
#
# cmd_goal_states_with_descrip_to_train = f' .\process_json.py --passages {path_goal_states_with_description} --mode train'
# cmd_goal_states_with_descrip_to_test = f" .\process_json.py --passages .\\train_robot.jsonl --mode test"
cmd_get_embedding = f" generate_passage_embeddings.py --model_name_or_path ../contriever-msmarco --passages {output_path} --output_dir robot_embeddings --shard_id 0 --num_shards 1 --per_gpu_batch_size 500 --no_fp16"
cmd_test_retri = f" passage_retrieval2.py --data {output_test_path} --model_name_or_path ../contriever-msmarco --passages {output_path} --passages_embeddings \"robot_embeddings/*\" --output_dir retr_result --n_docs 3 --no_fp16"
# conda_path = os.path.join("C:/Users/Estrella/.conda/envs/py310/python.exe")
# os.system(conda_path + cmd_goal_states_with_descrip_to_train)
# os.system(conda_path + cmd_goal_states_with_descrip_to_test)
os.system("python " + cmd_get_embedding)
os.system("python " + cmd_test_retri)

View File

@ -5,7 +5,7 @@ import torch
import transformers
from transformers import BertModel, XLMRobertaModel
from src import utils
from robowaiter.algos.retrieval.retrieval_lm.src import utils
class Contriever(BertModel):

View File

@ -12,7 +12,7 @@ import logging
from collections import defaultdict
import torch.distributed as dist
from src import dist_utils
from robowaiter.algos.retrieval.retrieval_lm.src import dist_utils
logger = logging.getLogger(__name__)

View File

@ -8,7 +8,7 @@ import errno
from typing import Union, Tuple, List, Dict
from collections import defaultdict
from src import dist_utils
from robowaiter.algos.retrieval.retrieval_lm.src import dist_utils
Number = Union[float, int]

View File

@ -1,13 +1,13 @@
import py_trees as ptree
from robowaiter.behavior_lib._base.Act import Act
from robowaiter.llm_client.multi_rounds import ask_llm, new_history
from robowaiter.llm_client.multi_rounds_retri import ask_llm, new_history
import random
from collections import deque
# import spacy
# nlp = spacy.load('en_core_web_lg')
import spacy
nlp = spacy.load('en_core_web_lg')
class History(deque):
def __init__(self,scene,customer_name):

View File

@ -1,140 +0,0 @@
import py_trees as ptree
from robowaiter.behavior_lib._base.Act import Act
from robowaiter.llm_client.multi_rounds import ask_llm, new_history
import random
class DealChatNLP(Act):
def __init__(self):
super().__init__()
self.chat_history = ""
self.function_success = False
self.func_map = {
"create_sub_task": self.create_sub_task,
"stop_serve": self.stop_serve,
"get_object_info": self.get_object_info,
"find_location": self.find_location
}
def _update(self) -> ptree.common.Status:
# if self.scene.status?
name, sentence = self.scene.state['chat_list'].pop(0)
if name == "Goal":
self.create_sub_task(goal=sentence)
return ptree.common.Status.RUNNING
if name not in self.scene.state["chat_history"]:
self.scene.state["chat_history"][name] = new_history()
history = self.scene.state["chat_history"][name]
self.scene.state["attention"]["customer"] = name
self.scene.state["serve_state"] = {
"last_chat_time": self.scene.time,
}
function_call, response = ask_llm(sentence,history,func_map=self.func_map)
self.scene.chat_bubble(response) # 机器人输出对话
return ptree.common.Status.RUNNING
def create_sub_task(self, **args):
try:
goal = args['goal']
w = goal.split(")")
goal_set = set()
goal_set.add(w[0] + ")")
if len(w) > 1:
for x in w[1:]:
if x != "":
goal_set.add(x[1:] + ")")
self.function_success = True
except:
print("参数解析错误")
self.scene.robot.expand_sub_task_tree(goal_set)
def get_object_info(self,**args):
import spacy
nlp = spacy.load('en_core_web_lg')
try:
obj = args['obj']
self.function_success = True
except:
obj = None
print("参数解析错误")
near_object = "None"
max_similarity = 0.02
similar_word = None
# 场景中现有物品
cur_things = set()
for item in self.scene.status.objects:
cur_things.add(item.name)
# obj与现有物品进行相似度匹配
query_token = nlp(obj)
for w in cur_things:
word_token = nlp(w)
similarity = query_token.similarity(word_token)
if similarity > max_similarity:
max_similarity = similarity
similar_word = w
if similar_word:
print("max_similarity:",max_similarity,"similar_word:",similar_word)
if similar_word: # 存在同义词说明场景中存在该物品
near_object = random.choices(list(cur_things), k=5) # 返回场景中的5个物品
if obj == "洗手间":
near_object = "Door"
return near_object
def find_location(self, **args):
import spacy
nlp = spacy.load('en_core_web_lg')
try:
location = args['obj']
self.function_success = True
except:
obj = None
print("参数解析错误")
near_location = None
# 用户咨询的地点
query_token = nlp(location)
max_similarity = 0
similar_word = None
# 到自己维护的地点列表中找同义词
for w in self.scene.all_loc_en:
word_token = nlp(w)
similarity = query_token.similarity(word_token)
if similarity > max_similarity:
max_similarity = similarity
similar_word = w
print("similarity:", max_similarity, "similar_word:", similar_word)
# 存在同义词说明客户咨询的地点有效
if similar_word:
mp = list(self.scene.loc_map_en[similar_word])
near_location = random.choice(mp)
return near_location
def stop_serve(self,**args):
return "好的"

View File

@ -1,10 +1,8 @@
import json
import time
import openai
from colorama import init, Fore
from loguru import logger
import json
from robowaiter.llm_client.tool_register import get_tools, dispatch_tool
import requests
import json

View File

@ -0,0 +1,179 @@
import time
import openai
from colorama import init, Fore
from loguru import logger
from robowaiter.llm_client.tool_register import get_tools, dispatch_tool
import requests
import json
from collections import deque
import urllib3
import copy
init(autoreset=True)
from robowaiter.utils import get_root_path
import os
import re
from robowaiter.llm_client.single_round import single_round
from robowaiter.algos.retrieval.retrieval_lm.retrieval import Retrieval
########################################
# 该文件实现了与大模型的通信以及工具调用
########################################
# 忽略https的安全性警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
base_url = "https://45.125.46.134:25344" # 本地部署的地址,或者使用你访问模型的API地址
root_path = get_root_path()
# load test questions
# file_path = os.path.join(root_path,"robowaiter/llm_client/data/fix_questions.txt")
#
# fix_questions_dict = {}
# no_reply_functions = ["create_sub_task"]
#
functions = get_tools()
retrieval = Retrieval(threshold=1.8)
role_system = [{
"role": "system",
"content": "你是RoboWaiter,一个由HPCL团队开发的机器人服务员你在咖啡厅工作。接受顾客的指令并调用工具函数来完成各种服务任务。如果顾客问你们这里有什么或者想要点单你说我们咖啡厅提供咖啡点心酸奶等食物。如果顾客不需要你了你就回到吧台招待。如果顾客叫你去做某事你回复好的我马上去做这件事。",
}]
def new_history(max_length=7):
history = deque(maxlen=max_length)
return history
def new_response():
return {'choices': [{'index': 0, 'message':{} }]}
def parse_fix_question(question):
response = new_response()
fix_ans = question
if not fix_ans['function']: #简单对话
message = {'role': 'assistant', 'content': fix_ans["answer"], 'name': None,
'function_call': None}
else:
func = fix_ans["function"]
args = fix_ans["args"]
# tool_response = dispatch_tool(function_call["name"], json.loads(args))
# logger.info(f"Tool Call Response: {tool_response}")
message = {'role': 'assistant',
'content': f"\n <|assistant|> {func}({args})\n ```python\ntool_call(goal={args})\n```",
'name': None,
'function_call': {'name': func, 'arguments': args}}
response["choices"][0]["message"] = message
return response
def get_response(sentence, history, allow_function_call = True):
if sentence:
history.append({"role": "user", "content": sentence})
retrieval_result = retrieval.get_result(sentence)
if retrieval_result is not None:
time.sleep(2)
# 处理多轮
if retrieval_result["answer"] == "multi_rounds" and len(history) >= 2:
print("触发多轮检索")
last_content = ""
for i in range(-2,-len(history)):
if history[i]["role"] == "user":
last_content = history[i]["content"]
break
retrieval_result = retrieval.get_result(last_content + sentence)
if retrieval_result is not None:
return parse_fix_question(retrieval_result)
params = dict(model="RoboWaiter")
params['messages'] = role_system + list(history)
if allow_function_call:
params["functions"] = functions
response = requests.post(f"{base_url}/v1/chat/completions", json=params, stream=False, verify=False)
decoded_line = response.json()
return decoded_line
def deal_response(response, history, func_map=None ):
if response["choices"][0]["message"].get("function_call"):
function_call = response["choices"][0]["message"]["function_call"]
logger.info(f"Function Call Response: {function_call}")
function_name = function_call["name"]
function_args = json.loads(function_call["arguments"])
if func_map:
tool_response = func_map[function_name](**function_args)
else:
try:
tool_response = dispatch_tool(function_call["name"], function_args)
logger.info(f"Tool Call Response: {tool_response}")
except:
logger.info(f"重试工具调用")
# tool_response = dispatch_tool(function_call["name"], function_args)
return function_name,None
return_message = response["choices"][0]["message"]
history.append(return_message)
t = {
"role": "function",
"name": function_call["name"],
"content": str(tool_response), # 调用函数返回结果
}
history.append(t)
return function_call["name"], tool_response
else:
return_message = response["choices"][0]["message"]
reply = return_message["content"]
history.append(return_message)
logger.info(f"Final Reply: \n{reply}")
return False, reply
def ask_llm(question,history, func_map=None, retry=3):
response = get_response(question, history)
print(f"response: {response}")
function_call,result = deal_response(response, history, func_map)
sentence = response["choices"][0]["message"]["content"]
if function_call:
if function_call == "create_sub_task":
result = single_round(sentence,
"你是机器人服务员,请把以下句子换一种表述方式对顾客说,但是意思不变,尽量简短:\n")
# elif function_call in ["get_object_info","find_location"] :
else:
result = single_round(f"你是机器人服务员,顾客想知道{question}, 你的具身场景查询返回的是{result},把返回的英文名词翻译成中文,请把按照以下句子对顾客说,{sentence}, 尽量简短。\n")
message = {'role': 'assistant', 'content': result, 'name': None,
'function_call': None}
history.append(message)
# response = get_response(None, history,allow_function_call=False)
# _,result = deal_response(response, history, func_map)
print(f'{len(history)}条历史记录:')
for x in history:
print(x)
return function_call, result
if __name__ == "__main__":
question = input("\n顾客:")
history = new_history()
n = 1
max_retry = 2
while question != 'end':
function_call, return_message = ask_llm(question,history)
question = input("\n顾客:")

View File

@ -233,6 +233,7 @@ def get_answer():
answer = example["ctxs"][0]["text"]
score = example["ctxs"][0]["score"]
return score, answer
def retri(query):
get_json(query)
parser = argparse.ArgumentParser()

View File

@ -0,0 +1,209 @@
你好
您好,我是这家咖啡厅的服务员,请问您要点什么?
感谢,这些够啦,你去忙吧。
ok, 有需要您再找我。
做一杯咖啡
好的,我马上做咖啡
create_sub_task
{"goal":"On(Coffee,CoffeeTable)"}
不用了。
好的,您有需要再跟我说
stop_serve
{}
来一号桌
好的,我马上来一号桌
create_sub_task
{"goal":"At(Robot,Table1)"}
早上好呀,我想找个能晒太阳的地方。
您右手边就有能晒太阳的位置呢。
可以带我过去嘛?
当然,请跟我来!
create_sub_task
{"goal":"At(Robot,BrightTable1)"}
RoboWaiter过来一下
我在来的路上啦,请稍等噢!
create_sub_task
{"goal":"At(Robot,BrightTable6)"}
你们这有什么饮料嘛?
我们咖啡厅提供各种口味咖啡,水,冰红茶,酸奶等饮品,还提供点心蛋糕等甜品。您想点什么?
来杯卡布奇诺吧。
ok请稍等
create_sub_task
{"goal":"On(Coffee,BrightTable6)"}
我昨天保温杯好像落在你们咖啡厅了,你看到了吗?
是的我有印象保温杯在大厅的2号桌子上。
get_object_info
{"obj":"保温杯"}
给我来份午餐套餐。
本店的午餐套餐包括咖啡和点心,请问您是打包还是堂食?
打包吧,快点!
请稍等,马上就好!
create_sub_task
{"goal":"On(Dessert,Bar),On(Coffee,Bar)"}
你可以帮我拿来吗,我在前门的桌子前等你。
当然,请稍等!
create_sub_task
{"goal":"On(VacuumCup,Table1)"}
麻烦啦!
没事儿,为您服务是我的荣幸!
大厅的桌子好啊,快带我去呀!
好的好的,请跟我来。
create_sub_task
{"goal":"At(Robot,WaterTable)"}
我想来杯水,帮我孩子拿个酸奶吧。
ok没问题请稍等
create_sub_task
{"goal":"On(Water,WaterTable),On(Yogurt,WaterTable)"}
来杯酸奶吧。
好的没问题,请稍等!
create_sub_task
{"goal":"On(Yogurt,WaterTable)"}
好热呀!太阳也好大!
抱歉,我这就去开空调和关窗帘。
create_sub_task
{"goal":"Is(AC,On),Is(Curtain,Off)"}
下班啦!别忘了打扫卫生。
收到!下班啦!
create_sub_task
{"goal":"Is(Floor,Clean),Is(Table1,Clean),Is(Chairs,Clean),Is(AC,Off),Is(HallLight,Off),Is(TubeLight,Off),Is(Curtain,Off)"}
请问洗手间在哪里?
洗手间在这附近
get_object_info
{"obj":"洗手间"}
给我来杯酸奶和冰红茶,我坐在对面的桌子那儿。
好的,请稍等。
create_sub_task
{"goal":"On(Chips,WaterTable),On(NFCJuice,WaterTable)"}
给我来份薯片和果汁,我坐在对面的桌子那儿。
好的,请稍等。
create_sub_task
{"goal":"On(BottledDrink,WaterTable),On(Yogurt,WaterTable)"}
好热呀,想开空调,想要温度调低点!
抱歉,我马上去开空调并调低空调温度。
create_sub_task
{"goal":"Is(ACTemperature,Down)"}
请问哪里有空位啊?
现在有不少空位呢,请问您有什么要求嘛?
我想坐高脚凳子。
没问题,大厅里就有这样的位置呢!
你带我去吧。
OK请跟我来
create_sub_task
{"goal":"At(Robot,BrightTable5)"}
可以关筒灯和关窗帘吗?
好的,请稍等。
create_sub_task
{"goal":"Is(TubeLight,Off),Is(Curtain,Off)"}
给我来杯咖啡,哦对,再倒一杯水。
速速就好,请稍等!
create_sub_task
{"goal":"On(Water,WaterTable),On(Coffee,WaterTable)"}
我想来份点心和酸奶。
稍等片刻噢~
create_sub_task
{"goal":"On(Yogurt,Bar),On(Dessert,Bar)"}
酸奶。
稍等片刻噢~
create_sub_task
{"goal":"On(Yogurt,Bar)"}
你好呀,你们这有啥好吃的?
我们咖啡厅有咖啡,还有饮料和点心蛋糕呢?您想点什么不?
听起来都好甜呀,我女朋友爱吃水果。
没问题,我们也提供草莓、橘子和苹果等水果呢。
你们这人可真多。
目前还有空位呢,你们可以坐下来一起品尝水果呢?
get_object_info
{"obj":"空位"}
我女朋友怕晒,有空余的阴凉位置嘛?
我查询完毕,阴凉位置有两个。在你们右侧往里走,就有适合你们二位的沙发呢。
get_object_info
{"obj":"阴凉空位"}
那还不错。
是的,那边安静,不会晒到太阳呢。
我们还想一起下下棋,切磋切磋。
我们这有两份军棋呢,在大厅的三号桌上。
get_object_info
{"obj":"棋"}
请问洗手间在哪呢?
在后门,你们的左侧往里走就是了呢。
get_object_info
{"obj":"洗手间"}
我带着孩子呢,想要宽敞亮堂的地方。
好的,我明白了,那么我们推荐您到大厅的桌子,那里的空间比较宽敞,环境也比较明亮,适合带着孩子一起用餐。
冰红茶
好的
create_sub_task
{"goal":"On(Softdrink,Bar)"}
水杯
好的
create_sub_task
{"goal":"On(Glass,Bar)"}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 122 KiB

After

Width:  |  Height:  |  Size: 92 KiB