Arguments#

class FlagEmbedding.finetune.embedder.encoder_only.m3.EncoderOnlyEmbedderM3ModelArguments(model_name_or_path: str, config_name: str = None, tokenizer_name: str = None, cache_dir: str = None, trust_remote_code: bool = False, token: str = <factory>, colbert_dim: int = -1)[source]#: Model argument class for M3.

class FlagEmbedding.finetune.embedder.encoder_only.m3.EncoderOnlyEmbedderM3TrainingArguments(output_dir: str | None = None, overwrite_output_dir: bool = False, do_train: bool = False, do_eval: bool = False, do_predict: bool = False, eval_strategy: ~transformers.trainer_utils.IntervalStrategy | str = 'no', prediction_loss_only: bool = False, per_device_train_batch_size: int = 8, per_device_eval_batch_size: int = 8, per_gpu_train_batch_size: int | None = None, per_gpu_eval_batch_size: int | None = None, gradient_accumulation_steps: int = 1, eval_accumulation_steps: int | None = None, eval_delay: float | None = 0, torch_empty_cache_steps: int | None = None, learning_rate: float = 5e-05, weight_decay: float = 0.0, adam_beta1: float = 0.9, adam_beta2: float = 0.999, adam_epsilon: float = 1e-08, max_grad_norm: float = 1.0, num_train_epochs: float = 3.0, max_steps: int = -1, lr_scheduler_type: ~transformers.trainer_utils.SchedulerType | str = 'linear', lr_scheduler_kwargs: dict | str | None = <factory>, warmup_ratio: float = 0.0, warmup_steps: int = 0, log_level: str | None = 'passive', log_level_replica: str | None = 'warning', log_on_each_node: bool = True, logging_dir: str | None = None, logging_strategy: ~transformers.trainer_utils.IntervalStrategy | str = 'steps', logging_first_step: bool = False, logging_steps: float = 500, logging_nan_inf_filter: bool = True, save_strategy: ~transformers.trainer_utils.SaveStrategy | str = 'steps', save_steps: float = 500, save_total_limit: int | None = None, save_safetensors: bool | None = True, save_on_each_node: bool = False, save_only_model: bool = False, restore_callback_states_from_checkpoint: bool = False, no_cuda: bool = False, use_cpu: bool = False, use_mps_device: bool = False, seed: int = 42, data_seed: int | None = None, jit_mode_eval: bool = False, use_ipex: bool = False, bf16: bool = False, fp16: bool = False, fp16_opt_level: str = 'O1', half_precision_backend: str = 'auto', bf16_full_eval: bool = False, fp16_full_eval: bool = False, tf32: bool | None = None, local_rank: int = -1, ddp_backend: str | None = None, tpu_num_cores: int | None = None, tpu_metrics_debug: bool = False, debug: str | list[~transformers.debug_utils.DebugOption] = '', dataloader_drop_last: bool = False, eval_steps: float | None = None, dataloader_num_workers: int = 0, dataloader_prefetch_factor: int | None = None, past_index: int = -1, run_name: str | None = None, disable_tqdm: bool | None = None, remove_unused_columns: bool | None = True, label_names: list[str] | None = None, load_best_model_at_end: bool | None = False, metric_for_best_model: str | None = None, greater_is_better: bool | None = None, ignore_data_skip: bool = False, fsdp: list[~transformers.trainer_utils.FSDPOption] | str | None = '', fsdp_min_num_params: int = 0, fsdp_config: dict | str | None = None, fsdp_transformer_layer_cls_to_wrap: str | None = None, accelerator_config: dict | str | None = None, deepspeed: dict | str | None = None, label_smoothing_factor: float = 0.0, optim: ~transformers.training_args.OptimizerNames | str = 'adamw_torch', optim_args: str | None = None, adafactor: bool = False, group_by_length: bool = False, length_column_name: str | None = 'length', report_to: None | str | list[str] = None, ddp_find_unused_parameters: bool | None = None, ddp_bucket_cap_mb: int | None = None, ddp_broadcast_buffers: bool | None = None, dataloader_pin_memory: bool = True, dataloader_persistent_workers: bool = False, skip_memory_metrics: bool = True, use_legacy_prediction_loop: bool = False, push_to_hub: bool = False, resume_from_checkpoint: str | None = None, hub_model_id: str | None = None, hub_strategy: ~transformers.trainer_utils.HubStrategy | str = 'every_save', hub_token: str | None = None, hub_private_repo: bool | None = None, hub_always_push: bool = False, gradient_checkpointing: bool = False, gradient_checkpointing_kwargs: dict | str | None = None, include_inputs_for_metrics: bool = False, include_for_metrics: list[str] = <factory>, eval_do_concat_batches: bool = True, fp16_backend: str = 'auto', push_to_hub_model_id: str | None = None, push_to_hub_organization: str | None = None, push_to_hub_token: str | None = None, mp_parameters: str = '', auto_find_batch_size: bool = False, full_determinism: bool = False, torchdynamo: str | None = None, ray_scope: str | None = 'last', ddp_timeout: int | None = 1800, torch_compile: bool = False, torch_compile_backend: str | None = None, torch_compile_mode: str | None = None, include_tokens_per_second: bool | None = False, include_num_input_tokens_seen: bool | None = False, neftune_noise_alpha: float | None = None, optim_target_modules: None | str | list[str] = None, batch_eval_metrics: bool = False, eval_on_start: bool = False, use_liger_kernel: bool | None = False, eval_use_gather_object: bool | None = False, average_tokens_across_devices: bool | None = False, negatives_cross_device: bool = False, temperature: float | None = 0.02, fix_position_embedding: bool = False, sentence_pooling_method: str = 'cls', normalize_embeddings: bool = True, sub_batch_size: int | None = None, kd_loss_type: str = 'kl_div', unified_finetuning: bool = False, use_self_distill: bool = False, fix_encoder: bool = False, self_distill_start_step: int = -1)[source]#: Training argument class for M3.

Arguments#

This Page