Skip to main content
Ctrl+K
langtest 2.7.0 documentation - Home langtest 2.7.0 documentation - Home
  • Quick Start
  • API Reference
  • GitHub
  • Quick Start
  • API Reference
  • GitHub

Section Navigation

  • langtest.augmentation
    • langtest.augmentation.augmenter
      • langtest.augmentation.augmenter.DataAugmenter
    • langtest.augmentation.base
      • langtest.augmentation.base.AugmentRobustness
      • langtest.augmentation.base.BaseAugmentaion
      • langtest.augmentation.base.TemplaticAugment
    • langtest.augmentation.debias
      • langtest.augmentation.debias.DebiasTextProcessing
    • langtest.augmentation.utils
      • langtest.augmentation.utils.generate_templates_azoi
      • langtest.augmentation.utils.generate_templates_ollama
      • langtest.augmentation.utils.generate_templates_openai
      • langtest.augmentation.utils.AzureOpenAIConfig
      • langtest.augmentation.utils.OpenAIConfig
      • langtest.augmentation.utils.Templates
  • langtest.callback
    • langtest.callback.LangTestCallback
  • langtest.config
    • langtest.config.read_config
    • langtest.config.update_config
    • langtest.config.write_config
  • langtest.datahandler
    • langtest.datahandler.dataset_info
    • langtest.datahandler.datasource
      • langtest.datahandler.datasource.BaseDataset
      • langtest.datahandler.datasource.CSVDataset
      • langtest.datahandler.datasource.ConllDataset
      • langtest.datahandler.datasource.DataFactory
      • langtest.datahandler.datasource.DeltaLiveTablesDataset
      • langtest.datahandler.datasource.HuggingFaceDataset
      • langtest.datahandler.datasource.JSONDataset
      • langtest.datahandler.datasource.JSONLDataset
      • langtest.datahandler.datasource.PandasDataset
      • langtest.datahandler.datasource.SparkDataset
      • langtest.datahandler.datasource.SynteticDataset
    • langtest.datahandler.format
      • langtest.datahandler.format.BaseFormatter
      • langtest.datahandler.format.Formatter
      • langtest.datahandler.format.NEROutputFormatter
      • langtest.datahandler.format.QAFormatter
      • langtest.datahandler.format.SequenceClassificationOutputFormatter
    • langtest.datahandler.utils
      • langtest.datahandler.utils.get_results
      • langtest.datahandler.utils.process_document
  • langtest.embeddings
    • langtest.embeddings.huggingface
      • langtest.embeddings.huggingface.HuggingfaceEmbeddings
    • langtest.embeddings.openai
      • langtest.embeddings.openai.OpenaiEmbeddings
  • langtest.errors
    • langtest.errors.Errors
    • langtest.errors.ErrorsWithCodes
    • langtest.errors.Warnings
    • langtest.errors.ColumnNameError
  • langtest.evaluation
  • langtest.langtest
    • langtest.langtest.Harness
  • langtest.leaderboard
    • langtest.leaderboard.create_folder
    • langtest.leaderboard.create_leaderboard
    • langtest.leaderboard.generate_folder_key
    • langtest.leaderboard.generate_store_testcases
    • langtest.leaderboard.get_lm_studio_model_name
    • langtest.leaderboard.get_parameters
    • langtest.leaderboard.get_store_path
    • langtest.leaderboard.load_old_testcases
    • langtest.leaderboard.prepare_accuracy_summary
    • langtest.leaderboard.prepare_robustness_summary
    • langtest.leaderboard.reorder_columns
    • langtest.leaderboard.run_store_checkpoints
    • langtest.leaderboard.save_file
    • langtest.leaderboard.update_leaderboard
    • langtest.leaderboard.update_summary
  • langtest.logger
    • langtest.logger.Logger
  • langtest.metrics
    • langtest.metrics.embedding_distance
      • langtest.metrics.embedding_distance.EmbeddingDistance
    • langtest.metrics.eval_prompts
      • langtest.metrics.eval_prompts.ConsultationMetricScores
      • langtest.metrics.eval_prompts.MHCEvaluation
    • langtest.metrics.llm_eval
      • langtest.metrics.llm_eval.EvalTemplate
      • langtest.metrics.llm_eval.LlmEval
      • langtest.metrics.llm_eval.RatingEval
      • langtest.metrics.llm_eval.SummaryEval
    • langtest.metrics.prometheus_eval
      • langtest.metrics.prometheus_eval.check_memory
      • langtest.metrics.prometheus_eval.AbsoluteGrading
      • langtest.metrics.prometheus_eval.PrometheusEval
      • langtest.metrics.prometheus_eval.RelativeGrading
    • langtest.metrics.string_distance
      • langtest.metrics.string_distance.StringDistance
  • langtest.modelhandler
  • langtest.pipelines
    • langtest.pipelines.embedding
      • langtest.pipelines.embedding.BasePipeline
      • langtest.pipelines.embedding.EmbeddingPipeline
    • langtest.pipelines.transformers
      • langtest.pipelines.transformers.ner_pipeline
        • langtest.pipelines.transformers.ner_pipeline.NEREnd2EndPipeline
    • langtest.pipelines.utils
      • langtest.pipelines.utils.data_helpers
        • langtest.pipelines.utils.data_helpers.ner_dataset
      • langtest.pipelines.utils.metrics
        • langtest.pipelines.utils.metrics.compute_ner_metrics
  • langtest.prompts
    • langtest.prompts.Conversion
    • langtest.prompts.MessageType
    • langtest.prompts.PromptConfig
    • langtest.prompts.PromptManager
  • langtest.tasks
    • langtest.tasks.task
      • langtest.tasks.task.BaseTask
      • langtest.tasks.task.Clinical
      • langtest.tasks.task.CrowsPairs
      • langtest.tasks.task.Disinformation
      • langtest.tasks.task.Factuality
      • langtest.tasks.task.FillMask
      • langtest.tasks.task.Ideology
      • langtest.tasks.task.Legal
      • langtest.tasks.task.NER
      • langtest.tasks.task.QuestionAnswering
      • langtest.tasks.task.Security
      • langtest.tasks.task.Sensitivity
      • langtest.tasks.task.Stereoset
      • langtest.tasks.task.Summarization
      • langtest.tasks.task.Sycophancy
      • langtest.tasks.task.TaskManager
      • langtest.tasks.task.TextClassification
      • langtest.tasks.task.TextGeneration
      • langtest.tasks.task.Toxicity
      • langtest.tasks.task.Translation
      • langtest.tasks.task.VisualQA
      • langtest.tasks.task.WinoBias
  • langtest.transform
    • langtest.transform.accuracy
      • langtest.transform.accuracy.AccuracyTestFactory
      • langtest.transform.accuracy.BaseAccuracy
      • langtest.transform.accuracy.DegradationAnalysis
      • langtest.transform.accuracy.LLMEval
      • langtest.transform.accuracy.MinBLEUcore
      • langtest.transform.accuracy.MinEMcore
      • langtest.transform.accuracy.MinF1Score
      • langtest.transform.accuracy.MinMacroF1Score
      • langtest.transform.accuracy.MinMicroF1Score
      • langtest.transform.accuracy.MinPrecisionScore
      • langtest.transform.accuracy.MinROUGEcore
      • langtest.transform.accuracy.MinRecallScore
      • langtest.transform.accuracy.MinWeightedF1Score
    • langtest.transform.base
      • langtest.transform.base.ITests
      • langtest.transform.base.TestFactory
    • langtest.transform.bias
      • langtest.transform.bias.BaseBias
      • langtest.transform.bias.BiasTestFactory
      • langtest.transform.bias.CountryEconomicBias
      • langtest.transform.bias.EthnicityNameBias
      • langtest.transform.bias.GenderPronounBias
      • langtest.transform.bias.ReligionBias
    • langtest.transform.clinical
      • langtest.transform.clinical.AMEGA
      • langtest.transform.clinical.BaseClinical
      • langtest.transform.clinical.Brand2Generic
      • langtest.transform.clinical.ClinicalNoteSummary
      • langtest.transform.clinical.ClinicalTestFactory
      • langtest.transform.clinical.DemographicBias
      • langtest.transform.clinical.FCT
      • langtest.transform.clinical.FQT
      • langtest.transform.clinical.Generic2Brand
      • langtest.transform.clinical.MedFuzz
      • langtest.transform.clinical.MentalHealth
      • langtest.transform.clinical.NOTA
      • langtest.transform.clinical.Posology
    • langtest.transform.constants
    • langtest.transform.custom_data
      • langtest.transform.custom_data.add_custom_data
    • langtest.transform.disinformation
      • langtest.transform.disinformation.DisinformationTestFactory
    • langtest.transform.factuality
      • langtest.transform.factuality.FactualityTestFactory
    • langtest.transform.fairness
      • langtest.transform.fairness.BaseFairness
      • langtest.transform.fairness.FairnessTestFactory
      • langtest.transform.fairness.MaxGenderF1Score
      • langtest.transform.fairness.MaxGenderLLMEval
      • langtest.transform.fairness.MaxGenderRougeScore
      • langtest.transform.fairness.MinGenderF1Score
      • langtest.transform.fairness.MinGenderLLMEval
      • langtest.transform.fairness.MinGenderRougeScore
    • langtest.transform.grammar
      • langtest.transform.grammar.BaseGrammar
      • langtest.transform.grammar.GrammarTestFactory
      • langtest.transform.grammar.Paraphrase
    • langtest.transform.ideology
      • langtest.transform.ideology.BaseIdeology
      • langtest.transform.ideology.IdeologyTestFactory
      • langtest.transform.ideology.PoliticalCompass
    • langtest.transform.image
      • langtest.transform.image.robustness
        • langtest.transform.image.robustness.ImageBlackSpot
        • langtest.transform.image.robustness.ImageBlur
        • langtest.transform.image.robustness.ImageBrightness
        • langtest.transform.image.robustness.ImageColor
        • langtest.transform.image.robustness.ImageConstrast
        • langtest.transform.image.robustness.ImageCrop
        • langtest.transform.image.robustness.ImageFlip
        • langtest.transform.image.robustness.ImageLayeredMask
        • langtest.transform.image.robustness.ImageNoise
        • langtest.transform.image.robustness.ImageRandomLineOverlay
        • langtest.transform.image.robustness.ImageRandomPolygonOverlay
        • langtest.transform.image.robustness.ImageRandomTextOverlay
        • langtest.transform.image.robustness.ImageResizing
        • langtest.transform.image.robustness.ImageRotation
        • langtest.transform.image.robustness.ImageSharpness
        • langtest.transform.image.robustness.ImageShear
        • langtest.transform.image.robustness.ImageTextOverlay
        • langtest.transform.image.robustness.ImageTranslate
        • langtest.transform.image.robustness.ImageWatermark
    • langtest.transform.legal
      • langtest.transform.legal.LegalTestFactory
    • langtest.transform.performance
      • langtest.transform.performance.BasePerformance
      • langtest.transform.performance.PerformanceTestFactory
      • langtest.transform.performance.Speed
    • langtest.transform.representation
      • langtest.transform.representation.BaseRepresentation
      • langtest.transform.representation.CountryEconomicRepresentation
      • langtest.transform.representation.EthnicityRepresentation
      • langtest.transform.representation.GenderRepresentation
      • langtest.transform.representation.LabelRepresentation
      • langtest.transform.representation.ReligionRepresentation
      • langtest.transform.representation.RepresentationTestFactory
    • langtest.transform.robustness
      • langtest.transform.robustness.AbbreviationInsertion
      • langtest.transform.robustness.AddContext
      • langtest.transform.robustness.AddContraction
      • langtest.transform.robustness.AddNewLines
      • langtest.transform.robustness.AddOcrTypo
      • langtest.transform.robustness.AddPunctuation
      • langtest.transform.robustness.AddSlangifyTypo
      • langtest.transform.robustness.AddSpeechToTextTypo
      • langtest.transform.robustness.AddTabs
      • langtest.transform.robustness.AddTypo
      • langtest.transform.robustness.AdjectiveAntonymSwap
      • langtest.transform.robustness.AdjectiveSynonymSwap
      • langtest.transform.robustness.BaseRobustness
      • langtest.transform.robustness.ConvertAccent
      • langtest.transform.robustness.DyslexiaWordSwap
      • langtest.transform.robustness.LowerCase
      • langtest.transform.robustness.MultiplePerturbations
      • langtest.transform.robustness.NumberToWord
      • langtest.transform.robustness.RandomAge
      • langtest.transform.robustness.RandomizeOptions
      • langtest.transform.robustness.RobustnessTestFactory
      • langtest.transform.robustness.StripAllPunctuation
      • langtest.transform.robustness.StripPunctuation
      • langtest.transform.robustness.SwapEntities
      • langtest.transform.robustness.TitleCase
      • langtest.transform.robustness.UpperCase
    • langtest.transform.safety
      • langtest.transform.safety.BaseSafetyTest
      • langtest.transform.safety.InjectionProbalities
      • langtest.transform.safety.JailBreakProbalities
      • langtest.transform.safety.Misuse
      • langtest.transform.safety.SafetyTestFactory
    • langtest.transform.security
      • langtest.transform.security.BaseSecurity
      • langtest.transform.security.PromptInjection
      • langtest.transform.security.SecurityTestFactory
    • langtest.transform.sensitivity
      • langtest.transform.sensitivity.AddNegation
      • langtest.transform.sensitivity.AddToxicWords
      • langtest.transform.sensitivity.BaseSensitivity
      • langtest.transform.sensitivity.SensitivityTestFactory
    • langtest.transform.stereoset
      • langtest.transform.stereoset.StereoSetTestFactory
    • langtest.transform.stereotype
      • langtest.transform.stereotype.StereoTypeTestFactory
    • langtest.transform.sycophancy
      • langtest.transform.sycophancy.BaseSycophancy
      • langtest.transform.sycophancy.SycophancyMath
      • langtest.transform.sycophancy.SycophancyNlp
      • langtest.transform.sycophancy.SycophancyTestFactory
    • langtest.transform.toxicity
      • langtest.transform.toxicity.BaseToxicity
      • langtest.transform.toxicity.GeneralToxicity
      • langtest.transform.toxicity.ToxicityTestFactory
      • langtest.transform.toxicity.ToxicityTypes
    • langtest.transform.utils
      • langtest.transform.utils.check_name
      • langtest.transform.utils.compare_generations_overlap
      • langtest.transform.utils.create_terminology
      • langtest.transform.utils.filter_unique_samples
      • langtest.transform.utils.get_default_font
      • langtest.transform.utils.get_substitution_names
      • langtest.transform.utils.AttackerLLM
      • langtest.transform.utils.DataRetriever
      • langtest.transform.utils.RepresentationOperation
      • langtest.transform.utils.ResponseEvaluator
      • langtest.transform.utils.ResponseGenerator
      • langtest.transform.utils.TargetLLM
  • langtest.types
    • langtest.types.AccuracyTestsConfig
    • langtest.types.BiasTestsConfig
    • langtest.types.ClinicalTestsConfig
    • langtest.types.DatasetConfig
    • langtest.types.DisinformationTestsConfig
    • langtest.types.FactualityTestsConfig
    • langtest.types.FairnessTestsConfig
    • langtest.types.GrammarTestsConfig
    • langtest.types.HarnessConfig
    • langtest.types.IdeologyTestsConfig
    • langtest.types.LegalTestsConfig
    • langtest.types.ModelConfig
    • langtest.types.PerformanceTestsConfig
    • langtest.types.RepresentationTestsConfig
    • langtest.types.RobustnessTestsConfig
    • langtest.types.SafetyTestsConfig
    • langtest.types.SecurityTestsConfig
    • langtest.types.SensitivityTestsConfig
    • langtest.types.SterosetTestsConfig
    • langtest.types.SterotypeTestsConfig
    • langtest.types.SycophancyTestsConfig
    • langtest.types.TestCategories
    • langtest.types.ToxicityTestsConfig
  • langtest.utils
    • langtest.utils.SoundsLikeFunctions
      • langtest.utils.SoundsLikeFunctions.G2p
      • langtest.utils.SoundsLikeFunctions.PhoneFunctions
      • langtest.utils.SoundsLikeFunctions.PronunciationFunctions
      • langtest.utils.SoundsLikeFunctions.Search
      • langtest.utils.SoundsLikeFunctions.WordFunctions
    • langtest.utils.benchmark_utils
      • langtest.utils.benchmark_utils.Leaderboard
      • langtest.utils.benchmark_utils.Summary
    • langtest.utils.checkpoints
      • langtest.utils.checkpoints.divide_into_batches
      • langtest.utils.checkpoints.CheckpointManager
    • langtest.utils.config_utils
      • langtest.utils.config_utils.BenchmarkDatasets
    • langtest.utils.custom_types
      • langtest.utils.custom_types.helpers
        • langtest.utils.custom_types.helpers.build_qa_input
        • langtest.utils.custom_types.helpers.build_qa_prompt
        • langtest.utils.custom_types.helpers.create_dirs
        • langtest.utils.custom_types.helpers.create_folder
        • langtest.utils.custom_types.helpers.highlight_differences_both
        • langtest.utils.custom_types.helpers.is_pass_embedding_distance
        • langtest.utils.custom_types.helpers.is_pass_llm_eval
        • langtest.utils.custom_types.helpers.is_pass_prometheus_eval
        • langtest.utils.custom_types.helpers.is_pass_string_distance
        • langtest.utils.custom_types.helpers.llm_prompt_eval
        • langtest.utils.custom_types.helpers.prepare_llm_evaluation_data
        • langtest.utils.custom_types.helpers.prepare_model_response
        • langtest.utils.custom_types.helpers.transformer_prompt_eval
        • langtest.utils.custom_types.helpers.HashableDict
        • langtest.utils.custom_types.helpers.SimplePromptTemplate
        • langtest.utils.custom_types.helpers.Span
        • langtest.utils.custom_types.helpers.TestResultManager
        • langtest.utils.custom_types.helpers.Transformation
      • langtest.utils.custom_types.output
        • langtest.utils.custom_types.output.MaxScoreOutput
        • langtest.utils.custom_types.output.MinScoreOutput
        • langtest.utils.custom_types.output.NEROutput
        • langtest.utils.custom_types.output.SequenceClassificationOutput
        • langtest.utils.custom_types.output.TranslationOutput
      • langtest.utils.custom_types.predictions
        • langtest.utils.custom_types.predictions.NERPrediction
        • langtest.utils.custom_types.predictions.SequenceLabel
      • langtest.utils.custom_types.sample
        • langtest.utils.custom_types.sample.AMEGASample
        • langtest.utils.custom_types.sample.BaseQASample
        • langtest.utils.custom_types.sample.BaseSample
        • langtest.utils.custom_types.sample.ClinicalSample
        • langtest.utils.custom_types.sample.CrowsPairsSample
        • langtest.utils.custom_types.sample.DegradationSample
        • langtest.utils.custom_types.sample.DialogueToSummarySample
        • langtest.utils.custom_types.sample.DisinformationSample
        • langtest.utils.custom_types.sample.FactualitySample
        • langtest.utils.custom_types.sample.FillMaskSample
        • langtest.utils.custom_types.sample.LLMAnswerSample
        • langtest.utils.custom_types.sample.LegalSample
        • langtest.utils.custom_types.sample.MaxScoreQASample
        • langtest.utils.custom_types.sample.MaxScoreSample
        • langtest.utils.custom_types.sample.MedFuzzSample
        • langtest.utils.custom_types.sample.MinScoreQASample
        • langtest.utils.custom_types.sample.MinScoreSample
        • langtest.utils.custom_types.sample.NERSample
        • langtest.utils.custom_types.sample.QASample
        • langtest.utils.custom_types.sample.SecuritySample
        • langtest.utils.custom_types.sample.SensitivitySample
        • langtest.utils.custom_types.sample.SequenceClassificationSample
        • langtest.utils.custom_types.sample.ShuffleOptions
        • langtest.utils.custom_types.sample.SimplePrompt
        • langtest.utils.custom_types.sample.SpeedTestSample
        • langtest.utils.custom_types.sample.StereoSetSample
        • langtest.utils.custom_types.sample.SummarizationSample
        • langtest.utils.custom_types.sample.SycophancySample
        • langtest.utils.custom_types.sample.TextGenerationSample
        • langtest.utils.custom_types.sample.ToxicitySample
        • langtest.utils.custom_types.sample.TranslationSample
        • langtest.utils.custom_types.sample.VisualQASample
        • langtest.utils.custom_types.sample.WinoBiasSample
    • langtest.utils.gender_classifier
      • langtest.utils.gender_classifier.GenderClassifier
    • langtest.utils.hf_utils
      • langtest.utils.hf_utils.build_dataset
      • langtest.utils.hf_utils.clean_input
      • langtest.utils.hf_utils.get_model_n_tokenizer
      • langtest.utils.hf_utils.login_with_token
      • langtest.utils.hf_utils.HuggingFacePipeline
      • langtest.utils.hf_utils.GatedRepoAccessError
    • langtest.utils.lib_manager
      • langtest.utils.lib_manager.log_verbosity_handler
      • langtest.utils.lib_manager.try_import_lib
    • langtest.utils.number_to_word
      • langtest.utils.number_to_word.print3
      • langtest.utils.number_to_word.ConvertNumberToWord
      • langtest.utils.number_to_word.BadChunkingOptionError
      • langtest.utils.number_to_word.NumOutOfRangeError
    • langtest.utils.report_utils
      • langtest.utils.report_utils.amega_report_summary
      • langtest.utils.report_utils.color_cells
      • langtest.utils.report_utils.mlflow_report
      • langtest.utils.report_utils.model_report
      • langtest.utils.report_utils.multi_dataset_multi_model_report
      • langtest.utils.report_utils.multi_dataset_report
      • langtest.utils.report_utils.multi_model_report
      • langtest.utils.report_utils.political_report
      • langtest.utils.report_utils.save_format
    • langtest.utils.util_metrics
      • langtest.utils.util_metrics.calculate_f1_score
      • langtest.utils.util_metrics.calculate_f1_score_multi_label
      • langtest.utils.util_metrics.classification_report
      • langtest.utils.util_metrics.classification_report_multi_label
      • langtest.utils.util_metrics.combine_labels
      • langtest.utils.util_metrics.simple_multilabel_binarizer
  • langtest
  • langtest.metrics
  • langtest.metrics.eval_prompts

langtest.metrics.eval_prompts#

Classes

ConsultationMetricScores(*, ...)

MHCEvaluation(*, explanation, ...)

previous

langtest.metrics.embedding_distance.EmbeddingDistance

next

langtest.metrics.eval_prompts.ConsultationMetricScores

© Copyright 2025, Pacific AI.

Created using Sphinx 6.1.3.

Built with the PyData Sphinx Theme 0.16.1.