Source code for FlagEmbedding.evaluation.mteb.searcher

import numpy as np

from typing import List, Dict, Optional
from FlagEmbedding.abc.evaluation import EvalDenseRetriever, EvalReranker


[docs] class MTEBEvalDenseRetriever(EvalDenseRetriever): """ Child class of :class:EvalRetriever for MTEB dense retrieval. """ def __init__(self, embedder, **kwargs): super().__init__(embedder, **kwargs) def set_examples(self, examples_for_task: Optional[List[dict]] = None): """Set examples for the model. Args: examples_for_task (Optional[List[dict]], optional): Examples for the task. Defaults to None. """ self.embedder.set_examples(examples_for_task) def set_instruction(self, instruction: Optional[str] = None): """Set the instruction to use for the embedding model. Args: instruction (Optional[str], optional): _description_. Defaults to None. """ self.embedder.query_instruction_for_retrieval = instruction def get_instruction(self): """Get the instruction of embedding model. Returns: str: Instruction """ return self.embedder.query_instruction_for_retrieval def set_normalize_embeddings(self, normalize_embeddings: bool = True): """Set whether normalize the output embeddings Args: normalize_embeddings (bool, optional): Boolean to control whether or not normalize the embeddings. Defaults to ``True``. """ self.embedder.normalize_embeddings = normalize_embeddings def stop_pool(self): self.embedder.stop_self_pool() try: self.embedder.stop_self_query_pool() except: pass def encode_queries(self, queries: List[str], **kwargs): """Encode input queries. Args: queries (List[str]): Input queries. Returns: Union[np.ndarray, torch.Tensor]: Query embeddings. """ emb = self.embedder.encode_queries(queries) if isinstance(emb, dict): emb = emb["dense_vecs"] return emb.astype(np.float32) def encode_corpus(self, corpus: List[Dict[str, str]], **kwargs): """Encode input corpus. Args: corpus (List[Dict[str, str]]): Input corpus. Returns: Union[np.ndarray, torch.Tensor]: Corpus embeddings. """ if isinstance(corpus[0], dict): input_texts = ['{} {}'.format(doc.get('title', ''), doc['text']).strip() for doc in corpus] else: input_texts = corpus emb = self.embedder.encode_corpus(input_texts) if isinstance(emb, dict): emb = emb["dense_vecs"] return emb.astype(np.float32) def encode(self, corpus: List[Dict[str, str]], **kwargs): """Encode the imput. Args: corpus (List[Dict[str, str]]): Input corpus or sentences. Returns: Union[np.ndarray, torch.Tensor]: Corpus embeddings. """ if isinstance(corpus[0], dict): input_texts = ['{} {}'.format(doc.get('title', ''), doc['text']).strip() for doc in corpus] else: input_texts = corpus emb = self.embedder.encode_queries(input_texts) if isinstance(emb, dict): emb = emb["dense_vecs"] return emb.astype(np.float32)
[docs] class MTEBEvalReranker(EvalReranker): """ Child class of :class:EvalReranker for reranker in MTEB. """ def __init__(self, reranker, **kwargs): super().__init__(reranker, **kwargs)