|
|
| import json |
|
|
|
|
| def get_query(messages, num_turns=5): |
| |
| |
| query = "" |
| for item in messages[-num_turns:]: |
| item['role'] = item['role'].replace("assistant", "agent") |
| query += "{}: {}\n".format(item['role'], item['content']) |
| query = query.strip() |
| |
| return query |
|
|
|
|
| def get_query_with_topic(messages, topic, num_turns=3): |
| |
| |
| query = "" |
| for item in messages[-num_turns:]: |
| item['role'] = item['role'].replace("assistant", "agent") |
| if item['role'] == 'user': |
| query += "{}: this is a question about {}. {}\n".format(item['role'], topic, item['content']) |
| else: |
| query += "{}: {}\n".format(item['role'], item['content']) |
| query = query.strip() |
|
|
| return query |
|
|
|
|
| def get_data_for_evaluation(input_datapath, document_datapath, dataset_name): |
|
|
| print('reading evaluation data from %s' % input_datapath) |
| with open(input_datapath, "r") as f: |
| input_list = json.load(f) |
| |
| print('reading documents from %s' % document_datapath) |
| with open(document_datapath, "r") as f: |
| documents = json.load(f) |
|
|
| eval_data = {} |
| for item in input_list: |
| """ |
| We incorporate topic information for topiocqa and inscit datasets: |
| query = get_query_with_topic(item['messages'], item['topic']) |
| """ |
| query = get_query(item['messages']) |
|
|
| doc_id = item['document'] |
| gold_idx = item['ground_truth_ctx']['index'] |
|
|
| if dataset_name == 'qrecc': |
| """ |
| The 'gold context' for the qrecc dataset is obtained based on the word |
| overlaps between gold answer and each context in the document, which might |
| not be the real gold context. |
| |
| To improve the evaluation quality of this dataset, |
| we further add the answer of the query into the 'gold context' |
| to ensure the 'gold context' is the most relevant chunk to the query. |
| |
| Note that this is just for the retrieval evaluation purpose, we do not |
| add answer to the context for the ChatRAG evaluation. |
| """ |
| answer = item['answers'][0] |
| documents[doc_id][gold_idx] += " || " + answer |
| |
| if doc_id not in eval_data: |
| eval_data[doc_id] = [{"query": query, "gold_idx": gold_idx}] |
| else: |
| eval_data[doc_id].append({"query": query, "gold_idx": gold_idx}) |
|
|
| return eval_data, documents |
|
|