Skip to main content
Back to top
Ctrl
+
K
Site Navigation
Quick Start
API Reference
GitHub
Site Navigation
Quick Start
API Reference
GitHub
Section Navigation
langtest.augmentation
langtest.augmentation.augmenter
langtest.augmentation.augmenter.DataAugmenter
langtest.augmentation.base
langtest.augmentation.base.AugmentRobustness
langtest.augmentation.base.BaseAugmentaion
langtest.augmentation.base.TemplaticAugment
langtest.callback
langtest.callback.LangTestCallback
langtest.config
langtest.config.read_config
langtest.config.update_config
langtest.config.write_config
langtest.datahandler
langtest.datahandler.dataset_info
langtest.datahandler.datasource
langtest.datahandler.datasource.BaseDataset
langtest.datahandler.datasource.CSVDataset
langtest.datahandler.datasource.ConllDataset
langtest.datahandler.datasource.DataFactory
langtest.datahandler.datasource.HuggingFaceDataset
langtest.datahandler.datasource.JSONDataset
langtest.datahandler.datasource.JSONLDataset
langtest.datahandler.datasource.PandasDataset
langtest.datahandler.datasource.SynteticDataset
langtest.datahandler.format
langtest.datahandler.format.BaseFormatter
langtest.datahandler.format.Formatter
langtest.datahandler.format.NEROutputFormatter
langtest.datahandler.format.QAFormatter
langtest.datahandler.format.SequenceClassificationOutputFormatter
langtest.embeddings
langtest.embeddings.huggingface
langtest.embeddings.huggingface.HuggingfaceEmbeddings
langtest.embeddings.openai
langtest.embeddings.openai.OpenaiEmbeddings
langtest.errors
langtest.errors.Errors
langtest.errors.ErrorsWithCodes
langtest.errors.Warnings
langtest.errors.ColumnNameError
langtest.langtest
langtest.langtest.Harness
langtest.leaderboard
langtest.leaderboard.create_folder
langtest.leaderboard.create_leaderboard
langtest.leaderboard.generate_folder_key
langtest.leaderboard.generate_store_testcases
langtest.leaderboard.get_lm_studio_model_name
langtest.leaderboard.get_parameters
langtest.leaderboard.get_store_path
langtest.leaderboard.load_old_testcases
langtest.leaderboard.prepare_accuracy_summary
langtest.leaderboard.prepare_robustness_summary
langtest.leaderboard.reorder_columns
langtest.leaderboard.run_store_checkpoints
langtest.leaderboard.save_file
langtest.leaderboard.update_leaderboard
langtest.leaderboard.update_summary
langtest.logger
langtest.logger.Logger
langtest.metrics
langtest.metrics.embedding_distance
langtest.metrics.embedding_distance.EmbeddingDistance
langtest.metrics.llm_eval
langtest.metrics.llm_eval.LlmEval
langtest.metrics.prometheus_eval
langtest.metrics.prometheus_eval.check_memory
langtest.metrics.prometheus_eval.AbsoluteGrading
langtest.metrics.prometheus_eval.PrometheusEval
langtest.metrics.prometheus_eval.RelativeGrading
langtest.metrics.string_distance
langtest.metrics.string_distance.StringDistance
langtest.modelhandler
langtest.pipelines
langtest.pipelines.embedding
langtest.pipelines.embedding.BasePipeline
langtest.pipelines.embedding.EmbeddingPipeline
langtest.pipelines.transformers
langtest.pipelines.transformers.ner_pipeline
langtest.pipelines.transformers.ner_pipeline.NEREnd2EndPipeline
langtest.pipelines.utils
langtest.pipelines.utils.data_helpers
langtest.pipelines.utils.data_helpers.ner_dataset
langtest.pipelines.utils.metrics
langtest.pipelines.utils.metrics.compute_ner_metrics
langtest.prompts
langtest.prompts.Conversion
langtest.prompts.MessageType
langtest.prompts.PromptConfig
langtest.prompts.PromptManager
langtest.tasks
langtest.tasks.task
langtest.tasks.task.BaseTask
langtest.tasks.task.Clinical
langtest.tasks.task.CrowsPairs
langtest.tasks.task.Disinformation
langtest.tasks.task.Factuality
langtest.tasks.task.FillMask
langtest.tasks.task.Ideology
langtest.tasks.task.Legal
langtest.tasks.task.NER
langtest.tasks.task.QuestionAnswering
langtest.tasks.task.Security
langtest.tasks.task.Sensitivity
langtest.tasks.task.Stereoset
langtest.tasks.task.Summarization
langtest.tasks.task.Sycophancy
langtest.tasks.task.TaskManager
langtest.tasks.task.TextClassification
langtest.tasks.task.TextGeneration
langtest.tasks.task.Toxicity
langtest.tasks.task.Translation
langtest.tasks.task.WinoBias
langtest.transform
langtest.transform.accuracy
langtest.transform.accuracy.AccuracyTestFactory
langtest.transform.accuracy.BaseAccuracy
langtest.transform.accuracy.LLMEval
langtest.transform.accuracy.MinBLEUcore
langtest.transform.accuracy.MinEMcore
langtest.transform.accuracy.MinF1Score
langtest.transform.accuracy.MinMacroF1Score
langtest.transform.accuracy.MinMicroF1Score
langtest.transform.accuracy.MinPrecisionScore
langtest.transform.accuracy.MinROUGEcore
langtest.transform.accuracy.MinRecallScore
langtest.transform.accuracy.MinWeightedF1Score
langtest.transform.base
langtest.transform.base.ITests
langtest.transform.base.TestFactory
langtest.transform.bias
langtest.transform.bias.BaseBias
langtest.transform.bias.BiasTestFactory
langtest.transform.bias.CountryEconomicBias
langtest.transform.bias.EthnicityNameBias
langtest.transform.bias.GenderPronounBias
langtest.transform.bias.ReligionBias
langtest.transform.clinical
langtest.transform.clinical.BaseClincial
langtest.transform.clinical.Brand2Generic
langtest.transform.clinical.ClinicalTestFactory
langtest.transform.clinical.DemographicBias
langtest.transform.clinical.Generic2Brand
langtest.transform.clinical.Posology
langtest.transform.constants
langtest.transform.custom_data
langtest.transform.custom_data.add_custom_data
langtest.transform.disinformation
langtest.transform.disinformation.DisinformationTestFactory
langtest.transform.factuality
langtest.transform.factuality.FactualityTestFactory
langtest.transform.fairness
langtest.transform.fairness.BaseFairness
langtest.transform.fairness.FairnessTestFactory
langtest.transform.fairness.MaxGenderF1Score
langtest.transform.fairness.MaxGenderLLMEval
langtest.transform.fairness.MaxGenderRougeScore
langtest.transform.fairness.MinGenderF1Score
langtest.transform.fairness.MinGenderLLMEval
langtest.transform.fairness.MinGenderRougeScore
langtest.transform.grammar
langtest.transform.grammar.BaseGrammar
langtest.transform.grammar.GrammarTestFactory
langtest.transform.grammar.Paraphrase
langtest.transform.ideology
langtest.transform.ideology.BaseIdeology
langtest.transform.ideology.IdeologyTestFactory
langtest.transform.ideology.PoliticalCompass
langtest.transform.legal
langtest.transform.legal.LegalTestFactory
langtest.transform.performance
langtest.transform.performance.BasePerformance
langtest.transform.performance.PerformanceTestFactory
langtest.transform.performance.Speed
langtest.transform.representation
langtest.transform.representation.BaseRepresentation
langtest.transform.representation.CountryEconomicRepresentation
langtest.transform.representation.EthnicityRepresentation
langtest.transform.representation.GenderRepresentation
langtest.transform.representation.LabelRepresentation
langtest.transform.representation.ReligionRepresentation
langtest.transform.representation.RepresentationTestFactory
langtest.transform.robustness
langtest.transform.robustness.AbbreviationInsertion
langtest.transform.robustness.AddContext
langtest.transform.robustness.AddContraction
langtest.transform.robustness.AddOcrTypo
langtest.transform.robustness.AddPunctuation
langtest.transform.robustness.AddSlangifyTypo
langtest.transform.robustness.AddSpeechToTextTypo
langtest.transform.robustness.AddTypo
langtest.transform.robustness.AdjectiveAntonymSwap
langtest.transform.robustness.AdjectiveSynonymSwap
langtest.transform.robustness.BaseRobustness
langtest.transform.robustness.ConvertAccent
langtest.transform.robustness.DyslexiaWordSwap
langtest.transform.robustness.LowerCase
langtest.transform.robustness.MultiplePerturbations
langtest.transform.robustness.NumberToWord
langtest.transform.robustness.RandomAge
langtest.transform.robustness.RobustnessTestFactory
langtest.transform.robustness.StripAllPunctuation
langtest.transform.robustness.StripPunctuation
langtest.transform.robustness.SwapEntities
langtest.transform.robustness.TitleCase
langtest.transform.robustness.UpperCase
langtest.transform.safety
langtest.transform.safety.BaseSafetyTest
langtest.transform.safety.Misuse
langtest.transform.safety.SafetyTestFactory
langtest.transform.security
langtest.transform.security.BaseSecurity
langtest.transform.security.PromptInjection
langtest.transform.security.SecurityTestFactory
langtest.transform.sensitivity
langtest.transform.sensitivity.AddNegation
langtest.transform.sensitivity.AddToxicWords
langtest.transform.sensitivity.BaseSensitivity
langtest.transform.sensitivity.SensitivityTestFactory
langtest.transform.stereoset
langtest.transform.stereoset.StereoSetTestFactory
langtest.transform.stereotype
langtest.transform.stereotype.StereoTypeTestFactory
langtest.transform.sycophancy
langtest.transform.sycophancy.BaseSycophancy
langtest.transform.sycophancy.SycophancyMath
langtest.transform.sycophancy.SycophancyNlp
langtest.transform.sycophancy.SycophancyTestFactory
langtest.transform.toxicity
langtest.transform.toxicity.BaseToxicity
langtest.transform.toxicity.GeneralToxicity
langtest.transform.toxicity.ToxicityTestFactory
langtest.transform.toxicity.ToxicityTypes
langtest.transform.utils
langtest.transform.utils.check_name
langtest.transform.utils.compare_generations_overlap
langtest.transform.utils.create_terminology
langtest.transform.utils.filter_unique_samples
langtest.transform.utils.get_substitution_names
langtest.transform.utils.RepresentationOperation
langtest.utils
langtest.utils.SoundsLikeFunctions
langtest.utils.SoundsLikeFunctions.G2p
langtest.utils.SoundsLikeFunctions.PhoneFunctions
langtest.utils.SoundsLikeFunctions.PronunciationFunctions
langtest.utils.SoundsLikeFunctions.Search
langtest.utils.SoundsLikeFunctions.WordFunctions
langtest.utils.benchmark_utils
langtest.utils.benchmark_utils.Leaderboard
langtest.utils.benchmark_utils.Summary
langtest.utils.checkpoints
langtest.utils.checkpoints.divide_into_batches
langtest.utils.checkpoints.CheckpointManager
langtest.utils.config_utils
langtest.utils.config_utils.BenchmarkDatasets
langtest.utils.custom_types
langtest.utils.custom_types.helpers
langtest.utils.custom_types.helpers.build_qa_input
langtest.utils.custom_types.helpers.build_qa_prompt
langtest.utils.custom_types.helpers.create_dirs
langtest.utils.custom_types.helpers.create_folder
langtest.utils.custom_types.helpers.is_pass_embedding_distance
langtest.utils.custom_types.helpers.is_pass_llm_eval
langtest.utils.custom_types.helpers.is_pass_prometheus_eval
langtest.utils.custom_types.helpers.is_pass_string_distance
langtest.utils.custom_types.helpers.llm_prompt_eval
langtest.utils.custom_types.helpers.prepare_llm_evaluation_data
langtest.utils.custom_types.helpers.prepare_model_response
langtest.utils.custom_types.helpers.transformer_prompt_eval
langtest.utils.custom_types.helpers.HashableDict
langtest.utils.custom_types.helpers.SimplePromptTemplate
langtest.utils.custom_types.helpers.Span
langtest.utils.custom_types.helpers.TestResultManager
langtest.utils.custom_types.helpers.Transformation
langtest.utils.custom_types.output
langtest.utils.custom_types.output.MaxScoreOutput
langtest.utils.custom_types.output.MinScoreOutput
langtest.utils.custom_types.output.NEROutput
langtest.utils.custom_types.output.SequenceClassificationOutput
langtest.utils.custom_types.output.TranslationOutput
langtest.utils.custom_types.predictions
langtest.utils.custom_types.predictions.NERPrediction
langtest.utils.custom_types.predictions.SequenceLabel
langtest.utils.custom_types.sample
langtest.utils.custom_types.sample.BaseQASample
langtest.utils.custom_types.sample.BaseSample
langtest.utils.custom_types.sample.ClinicalSample
langtest.utils.custom_types.sample.CrowsPairsSample
langtest.utils.custom_types.sample.DisinformationSample
langtest.utils.custom_types.sample.FactualitySample
langtest.utils.custom_types.sample.FillMaskSample
langtest.utils.custom_types.sample.LLMAnswerSample
langtest.utils.custom_types.sample.LegalSample
langtest.utils.custom_types.sample.MaxScoreQASample
langtest.utils.custom_types.sample.MaxScoreSample
langtest.utils.custom_types.sample.MinScoreQASample
langtest.utils.custom_types.sample.MinScoreSample
langtest.utils.custom_types.sample.NERSample
langtest.utils.custom_types.sample.QASample
langtest.utils.custom_types.sample.SecuritySample
langtest.utils.custom_types.sample.SensitivitySample
langtest.utils.custom_types.sample.SequenceClassificationSample
langtest.utils.custom_types.sample.SpeedTestSample
langtest.utils.custom_types.sample.StereoSetSample
langtest.utils.custom_types.sample.SummarizationSample
langtest.utils.custom_types.sample.SycophancySample
langtest.utils.custom_types.sample.TextGenerationSample
langtest.utils.custom_types.sample.ToxicitySample
langtest.utils.custom_types.sample.TranslationSample
langtest.utils.custom_types.sample.WinoBiasSample
langtest.utils.gender_classifier
langtest.utils.gender_classifier.GenderClassifier
langtest.utils.hf_utils
langtest.utils.hf_utils.build_dataset
langtest.utils.hf_utils.clean_input
langtest.utils.hf_utils.get_model_n_tokenizer
langtest.utils.hf_utils.login_with_token
langtest.utils.hf_utils.HuggingFacePipeline
langtest.utils.hf_utils.GatedRepoAccessError
langtest.utils.lib_manager
langtest.utils.lib_manager.log_verbosity_handler
langtest.utils.lib_manager.try_import_lib
langtest.utils.number_to_word
langtest.utils.number_to_word.print3
langtest.utils.number_to_word.ConvertNumberToWord
langtest.utils.number_to_word.BadChunkingOptionError
langtest.utils.number_to_word.NumOutOfRangeError
langtest.utils.report_utils
langtest.utils.report_utils.color_cells
langtest.utils.report_utils.mlflow_report
langtest.utils.report_utils.model_report
langtest.utils.report_utils.multi_dataset_multi_model_report
langtest.utils.report_utils.multi_dataset_report
langtest.utils.report_utils.multi_model_report
langtest.utils.report_utils.political_report
langtest.utils.report_utils.save_format
langtest.utils.util_metrics
langtest.utils.util_metrics.calculate_f1_score
langtest.utils.util_metrics.classification_report
langtest
langtest.utils
langtest.uti...
langtest.utils.config_utils
#
Classes
BenchmarkDatasets
(task, dataset_name)