Source code for FlagEmbedding.evaluation.beir.runner
import logging
from FlagEmbedding.abc.evaluation import AbsEvalRunner
from .data_loader import BEIREvalDataLoader
from .prompts import BEIRInstructions
from .evaluator import BEIREvaluator
logger = logging.getLogger(__name__)
[docs]
class BEIREvalRunner(AbsEvalRunner):
"""
Runner class of BEIR evaluation.
"""
def run(self):
"""
Run the whole evaluation.
"""
if self.eval_args.dataset_names is None:
dataset_names = self.data_loader.available_dataset_names()
else:
dataset_names = self.data_loader.check_dataset_names(self.eval_args.dataset_names)
if len(dataset_names) == 0:
logger.info(f"Running {self.eval_args.eval_name} evaluation on the default dataset.")
self.evaluator(
splits=self.eval_args.splits,
search_results_save_dir=self.eval_args.output_dir,
retriever=self.retriever,
reranker=self.reranker,
corpus_embd_save_dir=self.eval_args.corpus_embd_save_dir,
ignore_identical_ids=self.eval_args.ignore_identical_ids,
k_values=self.eval_args.k_values
)
logger.info(f"{self.eval_args.eval_name} evaluation completed.")
else:
logger.info(f"Running {self.eval_args.eval_name} evaluation on the following dataset names: {dataset_names}")
for dataset_name in dataset_names:
if self.eval_args.use_special_instructions:
self.retriever.stop_multi_process_pool()
self.retriever.embedder.query_instruction_for_retrieval = BEIRInstructions[dataset_name]
logger.info(f"Running {self.eval_args.eval_name} evaluation on: {dataset_name}")
self.evaluator(
splits=self.eval_args.splits,
search_results_save_dir=self.eval_args.output_dir,
retriever=self.retriever,
reranker=self.reranker,
corpus_embd_save_dir=self.eval_args.corpus_embd_save_dir,
ignore_identical_ids=self.eval_args.ignore_identical_ids,
k_values=self.eval_args.k_values,
dataset_name=dataset_name,
)
logger.info(f"{self.eval_args.eval_name} evaluation on {dataset_names} completed.")
logger.info("Start computing metrics.")
self.evaluate_metrics(
search_results_save_dir=self.eval_args.output_dir,
output_method=self.eval_args.eval_output_method,
output_path=self.eval_args.eval_output_path,
metrics=self.eval_args.eval_metrics
)
def load_data_loader(self) -> BEIREvalDataLoader:
"""Load the data loader
Returns:
BEIREvalDataLoader: BEIR data loader object.
"""
data_loader = BEIREvalDataLoader(
eval_name=self.eval_args.eval_name,
dataset_dir=self.eval_args.dataset_dir,
cache_dir=self.eval_args.cache_path,
token=self.eval_args.token,
force_redownload=self.eval_args.force_redownload,
)
return data_loader
def load_evaluator(self) -> BEIREvaluator:
"""Load the evaluator for evaluation
Returns:
BEIREvaluator: The BEIR evaluator to run the evaluation.
"""
evaluator = BEIREvaluator(
eval_name=self.eval_args.eval_name,
data_loader=self.data_loader,
overwrite=self.eval_args.overwrite,
)
return evaluator