Source code for FlagEmbedding.evaluation.beir.runner

import logging
from FlagEmbedding.abc.evaluation import AbsEvalRunner

from .data_loader import BEIREvalDataLoader
from .prompts import BEIRInstructions
from .evaluator import BEIREvaluator

logger = logging.getLogger(__name__)


[docs] class BEIREvalRunner(AbsEvalRunner): """ Runner class of BEIR evaluation. """ def run(self): """ Run the whole evaluation. """ if self.eval_args.dataset_names is None: dataset_names = self.data_loader.available_dataset_names() else: dataset_names = self.data_loader.check_dataset_names(self.eval_args.dataset_names) if len(dataset_names) == 0: logger.info(f"Running {self.eval_args.eval_name} evaluation on the default dataset.") self.evaluator( splits=self.eval_args.splits, search_results_save_dir=self.eval_args.output_dir, retriever=self.retriever, reranker=self.reranker, corpus_embd_save_dir=self.eval_args.corpus_embd_save_dir, ignore_identical_ids=self.eval_args.ignore_identical_ids, k_values=self.eval_args.k_values ) logger.info(f"{self.eval_args.eval_name} evaluation completed.") else: logger.info(f"Running {self.eval_args.eval_name} evaluation on the following dataset names: {dataset_names}") for dataset_name in dataset_names: if self.eval_args.use_special_instructions: self.retriever.stop_multi_process_pool() self.retriever.embedder.query_instruction_for_retrieval = BEIRInstructions[dataset_name] logger.info(f"Running {self.eval_args.eval_name} evaluation on: {dataset_name}") self.evaluator( splits=self.eval_args.splits, search_results_save_dir=self.eval_args.output_dir, retriever=self.retriever, reranker=self.reranker, corpus_embd_save_dir=self.eval_args.corpus_embd_save_dir, ignore_identical_ids=self.eval_args.ignore_identical_ids, k_values=self.eval_args.k_values, dataset_name=dataset_name, ) logger.info(f"{self.eval_args.eval_name} evaluation on {dataset_names} completed.") logger.info("Start computing metrics.") self.evaluate_metrics( search_results_save_dir=self.eval_args.output_dir, output_method=self.eval_args.eval_output_method, output_path=self.eval_args.eval_output_path, metrics=self.eval_args.eval_metrics ) def load_data_loader(self) -> BEIREvalDataLoader: """Load the data loader Returns: BEIREvalDataLoader: BEIR data loader object. """ data_loader = BEIREvalDataLoader( eval_name=self.eval_args.eval_name, dataset_dir=self.eval_args.dataset_dir, cache_dir=self.eval_args.cache_path, token=self.eval_args.token, force_redownload=self.eval_args.force_redownload, ) return data_loader def load_evaluator(self) -> BEIREvaluator: """Load the evaluator for evaluation Returns: BEIREvaluator: The BEIR evaluator to run the evaluation. """ evaluator = BEIREvaluator( eval_name=self.eval_args.eval_name, data_loader=self.data_loader, overwrite=self.eval_args.overwrite, ) return evaluator