Skip to main content
Ctrl+K
langtest 2.3.1 documentation - Home

Site Navigation

  • Quick Start
  • API Reference
  • GitHub

Site Navigation

  • Quick Start
  • API Reference
  • GitHub

Section Navigation

  • langtest.augmentation
    • langtest.augmentation.augmenter
      • langtest.augmentation.augmenter.DataAugmenter
    • langtest.augmentation.base
      • langtest.augmentation.base.AugmentRobustness
      • langtest.augmentation.base.BaseAugmentaion
      • langtest.augmentation.base.TemplaticAugment
  • langtest.callback
    • langtest.callback.LangTestCallback
  • langtest.config
    • langtest.config.read_config
    • langtest.config.update_config
    • langtest.config.write_config
  • langtest.datahandler
    • langtest.datahandler.dataset_info
    • langtest.datahandler.datasource
      • langtest.datahandler.datasource.BaseDataset
      • langtest.datahandler.datasource.CSVDataset
      • langtest.datahandler.datasource.ConllDataset
      • langtest.datahandler.datasource.DataFactory
      • langtest.datahandler.datasource.HuggingFaceDataset
      • langtest.datahandler.datasource.JSONDataset
      • langtest.datahandler.datasource.JSONLDataset
      • langtest.datahandler.datasource.PandasDataset
      • langtest.datahandler.datasource.SynteticDataset
    • langtest.datahandler.format
      • langtest.datahandler.format.BaseFormatter
      • langtest.datahandler.format.Formatter
      • langtest.datahandler.format.NEROutputFormatter
      • langtest.datahandler.format.QAFormatter
      • langtest.datahandler.format.SequenceClassificationOutputFormatter
  • langtest.embeddings
    • langtest.embeddings.huggingface
      • langtest.embeddings.huggingface.HuggingfaceEmbeddings
    • langtest.embeddings.openai
      • langtest.embeddings.openai.OpenaiEmbeddings
  • langtest.errors
    • langtest.errors.Errors
    • langtest.errors.ErrorsWithCodes
    • langtest.errors.Warnings
    • langtest.errors.ColumnNameError
  • langtest.langtest
    • langtest.langtest.Harness
  • langtest.leaderboard
    • langtest.leaderboard.create_folder
    • langtest.leaderboard.create_leaderboard
    • langtest.leaderboard.generate_folder_key
    • langtest.leaderboard.generate_store_testcases
    • langtest.leaderboard.get_lm_studio_model_name
    • langtest.leaderboard.get_parameters
    • langtest.leaderboard.get_store_path
    • langtest.leaderboard.load_old_testcases
    • langtest.leaderboard.prepare_accuracy_summary
    • langtest.leaderboard.prepare_robustness_summary
    • langtest.leaderboard.reorder_columns
    • langtest.leaderboard.run_store_checkpoints
    • langtest.leaderboard.save_file
    • langtest.leaderboard.update_leaderboard
    • langtest.leaderboard.update_summary
  • langtest.logger
    • langtest.logger.Logger
  • langtest.metrics
    • langtest.metrics.embedding_distance
      • langtest.metrics.embedding_distance.EmbeddingDistance
    • langtest.metrics.llm_eval
      • langtest.metrics.llm_eval.LlmEval
    • langtest.metrics.prometheus_eval
      • langtest.metrics.prometheus_eval.check_memory
      • langtest.metrics.prometheus_eval.AbsoluteGrading
      • langtest.metrics.prometheus_eval.PrometheusEval
      • langtest.metrics.prometheus_eval.RelativeGrading
    • langtest.metrics.string_distance
      • langtest.metrics.string_distance.StringDistance
  • langtest.modelhandler
  • langtest.pipelines
    • langtest.pipelines.embedding
      • langtest.pipelines.embedding.BasePipeline
      • langtest.pipelines.embedding.EmbeddingPipeline
    • langtest.pipelines.transformers
      • langtest.pipelines.transformers.ner_pipeline
        • langtest.pipelines.transformers.ner_pipeline.NEREnd2EndPipeline
    • langtest.pipelines.utils
      • langtest.pipelines.utils.data_helpers
        • langtest.pipelines.utils.data_helpers.ner_dataset
      • langtest.pipelines.utils.metrics
        • langtest.pipelines.utils.metrics.compute_ner_metrics
  • langtest.prompts
    • langtest.prompts.Conversion
    • langtest.prompts.MessageType
    • langtest.prompts.PromptConfig
    • langtest.prompts.PromptManager
  • langtest.tasks
    • langtest.tasks.task
      • langtest.tasks.task.BaseTask
      • langtest.tasks.task.Clinical
      • langtest.tasks.task.CrowsPairs
      • langtest.tasks.task.Disinformation
      • langtest.tasks.task.Factuality
      • langtest.tasks.task.FillMask
      • langtest.tasks.task.Ideology
      • langtest.tasks.task.Legal
      • langtest.tasks.task.NER
      • langtest.tasks.task.QuestionAnswering
      • langtest.tasks.task.Security
      • langtest.tasks.task.Sensitivity
      • langtest.tasks.task.Stereoset
      • langtest.tasks.task.Summarization
      • langtest.tasks.task.Sycophancy
      • langtest.tasks.task.TaskManager
      • langtest.tasks.task.TextClassification
      • langtest.tasks.task.TextGeneration
      • langtest.tasks.task.Toxicity
      • langtest.tasks.task.Translation
      • langtest.tasks.task.WinoBias
  • langtest.transform
    • langtest.transform.accuracy
      • langtest.transform.accuracy.AccuracyTestFactory
      • langtest.transform.accuracy.BaseAccuracy
      • langtest.transform.accuracy.LLMEval
      • langtest.transform.accuracy.MinBLEUcore
      • langtest.transform.accuracy.MinEMcore
      • langtest.transform.accuracy.MinF1Score
      • langtest.transform.accuracy.MinMacroF1Score
      • langtest.transform.accuracy.MinMicroF1Score
      • langtest.transform.accuracy.MinPrecisionScore
      • langtest.transform.accuracy.MinROUGEcore
      • langtest.transform.accuracy.MinRecallScore
      • langtest.transform.accuracy.MinWeightedF1Score
    • langtest.transform.base
      • langtest.transform.base.ITests
      • langtest.transform.base.TestFactory
    • langtest.transform.bias
      • langtest.transform.bias.BaseBias
      • langtest.transform.bias.BiasTestFactory
      • langtest.transform.bias.CountryEconomicBias
      • langtest.transform.bias.EthnicityNameBias
      • langtest.transform.bias.GenderPronounBias
      • langtest.transform.bias.ReligionBias
    • langtest.transform.clinical
      • langtest.transform.clinical.BaseClincial
      • langtest.transform.clinical.Brand2Generic
      • langtest.transform.clinical.ClinicalTestFactory
      • langtest.transform.clinical.DemographicBias
      • langtest.transform.clinical.Generic2Brand
      • langtest.transform.clinical.Posology
    • langtest.transform.constants
    • langtest.transform.custom_data
      • langtest.transform.custom_data.add_custom_data
    • langtest.transform.disinformation
      • langtest.transform.disinformation.DisinformationTestFactory
    • langtest.transform.factuality
      • langtest.transform.factuality.FactualityTestFactory
    • langtest.transform.fairness
      • langtest.transform.fairness.BaseFairness
      • langtest.transform.fairness.FairnessTestFactory
      • langtest.transform.fairness.MaxGenderF1Score
      • langtest.transform.fairness.MaxGenderLLMEval
      • langtest.transform.fairness.MaxGenderRougeScore
      • langtest.transform.fairness.MinGenderF1Score
      • langtest.transform.fairness.MinGenderLLMEval
      • langtest.transform.fairness.MinGenderRougeScore
    • langtest.transform.grammar
      • langtest.transform.grammar.BaseGrammar
      • langtest.transform.grammar.GrammarTestFactory
      • langtest.transform.grammar.Paraphrase
    • langtest.transform.ideology
      • langtest.transform.ideology.BaseIdeology
      • langtest.transform.ideology.IdeologyTestFactory
      • langtest.transform.ideology.PoliticalCompass
    • langtest.transform.legal
      • langtest.transform.legal.LegalTestFactory
    • langtest.transform.performance
      • langtest.transform.performance.BasePerformance
      • langtest.transform.performance.PerformanceTestFactory
      • langtest.transform.performance.Speed
    • langtest.transform.representation
      • langtest.transform.representation.BaseRepresentation
      • langtest.transform.representation.CountryEconomicRepresentation
      • langtest.transform.representation.EthnicityRepresentation
      • langtest.transform.representation.GenderRepresentation
      • langtest.transform.representation.LabelRepresentation
      • langtest.transform.representation.ReligionRepresentation
      • langtest.transform.representation.RepresentationTestFactory
    • langtest.transform.robustness
      • langtest.transform.robustness.AbbreviationInsertion
      • langtest.transform.robustness.AddContext
      • langtest.transform.robustness.AddContraction
      • langtest.transform.robustness.AddOcrTypo
      • langtest.transform.robustness.AddPunctuation
      • langtest.transform.robustness.AddSlangifyTypo
      • langtest.transform.robustness.AddSpeechToTextTypo
      • langtest.transform.robustness.AddTypo
      • langtest.transform.robustness.AdjectiveAntonymSwap
      • langtest.transform.robustness.AdjectiveSynonymSwap
      • langtest.transform.robustness.BaseRobustness
      • langtest.transform.robustness.ConvertAccent
      • langtest.transform.robustness.DyslexiaWordSwap
      • langtest.transform.robustness.LowerCase
      • langtest.transform.robustness.MultiplePerturbations
      • langtest.transform.robustness.NumberToWord
      • langtest.transform.robustness.RandomAge
      • langtest.transform.robustness.RobustnessTestFactory
      • langtest.transform.robustness.StripAllPunctuation
      • langtest.transform.robustness.StripPunctuation
      • langtest.transform.robustness.SwapEntities
      • langtest.transform.robustness.TitleCase
      • langtest.transform.robustness.UpperCase
    • langtest.transform.safety
      • langtest.transform.safety.BaseSafetyTest
      • langtest.transform.safety.Misuse
      • langtest.transform.safety.SafetyTestFactory
    • langtest.transform.security
      • langtest.transform.security.BaseSecurity
      • langtest.transform.security.PromptInjection
      • langtest.transform.security.SecurityTestFactory
    • langtest.transform.sensitivity
      • langtest.transform.sensitivity.AddNegation
      • langtest.transform.sensitivity.AddToxicWords
      • langtest.transform.sensitivity.BaseSensitivity
      • langtest.transform.sensitivity.SensitivityTestFactory
    • langtest.transform.stereoset
      • langtest.transform.stereoset.StereoSetTestFactory
    • langtest.transform.stereotype
      • langtest.transform.stereotype.StereoTypeTestFactory
    • langtest.transform.sycophancy
      • langtest.transform.sycophancy.BaseSycophancy
      • langtest.transform.sycophancy.SycophancyMath
      • langtest.transform.sycophancy.SycophancyNlp
      • langtest.transform.sycophancy.SycophancyTestFactory
    • langtest.transform.toxicity
      • langtest.transform.toxicity.BaseToxicity
      • langtest.transform.toxicity.GeneralToxicity
      • langtest.transform.toxicity.ToxicityTestFactory
      • langtest.transform.toxicity.ToxicityTypes
    • langtest.transform.utils
      • langtest.transform.utils.check_name
      • langtest.transform.utils.compare_generations_overlap
      • langtest.transform.utils.create_terminology
      • langtest.transform.utils.filter_unique_samples
      • langtest.transform.utils.get_substitution_names
      • langtest.transform.utils.RepresentationOperation
  • langtest.utils
    • langtest.utils.SoundsLikeFunctions
      • langtest.utils.SoundsLikeFunctions.G2p
      • langtest.utils.SoundsLikeFunctions.PhoneFunctions
      • langtest.utils.SoundsLikeFunctions.PronunciationFunctions
      • langtest.utils.SoundsLikeFunctions.Search
      • langtest.utils.SoundsLikeFunctions.WordFunctions
    • langtest.utils.benchmark_utils
      • langtest.utils.benchmark_utils.Leaderboard
      • langtest.utils.benchmark_utils.Summary
    • langtest.utils.checkpoints
      • langtest.utils.checkpoints.divide_into_batches
      • langtest.utils.checkpoints.CheckpointManager
    • langtest.utils.config_utils
      • langtest.utils.config_utils.BenchmarkDatasets
    • langtest.utils.custom_types
      • langtest.utils.custom_types.helpers
        • langtest.utils.custom_types.helpers.build_qa_input
        • langtest.utils.custom_types.helpers.build_qa_prompt
        • langtest.utils.custom_types.helpers.create_dirs
        • langtest.utils.custom_types.helpers.create_folder
        • langtest.utils.custom_types.helpers.is_pass_embedding_distance
        • langtest.utils.custom_types.helpers.is_pass_llm_eval
        • langtest.utils.custom_types.helpers.is_pass_prometheus_eval
        • langtest.utils.custom_types.helpers.is_pass_string_distance
        • langtest.utils.custom_types.helpers.llm_prompt_eval
        • langtest.utils.custom_types.helpers.prepare_llm_evaluation_data
        • langtest.utils.custom_types.helpers.prepare_model_response
        • langtest.utils.custom_types.helpers.transformer_prompt_eval
        • langtest.utils.custom_types.helpers.HashableDict
        • langtest.utils.custom_types.helpers.SimplePromptTemplate
        • langtest.utils.custom_types.helpers.Span
        • langtest.utils.custom_types.helpers.TestResultManager
        • langtest.utils.custom_types.helpers.Transformation
      • langtest.utils.custom_types.output
        • langtest.utils.custom_types.output.MaxScoreOutput
        • langtest.utils.custom_types.output.MinScoreOutput
        • langtest.utils.custom_types.output.NEROutput
        • langtest.utils.custom_types.output.SequenceClassificationOutput
        • langtest.utils.custom_types.output.TranslationOutput
      • langtest.utils.custom_types.predictions
        • langtest.utils.custom_types.predictions.NERPrediction
        • langtest.utils.custom_types.predictions.SequenceLabel
      • langtest.utils.custom_types.sample
        • langtest.utils.custom_types.sample.BaseQASample
        • langtest.utils.custom_types.sample.BaseSample
        • langtest.utils.custom_types.sample.ClinicalSample
        • langtest.utils.custom_types.sample.CrowsPairsSample
        • langtest.utils.custom_types.sample.DisinformationSample
        • langtest.utils.custom_types.sample.FactualitySample
        • langtest.utils.custom_types.sample.FillMaskSample
        • langtest.utils.custom_types.sample.LLMAnswerSample
        • langtest.utils.custom_types.sample.LegalSample
        • langtest.utils.custom_types.sample.MaxScoreQASample
        • langtest.utils.custom_types.sample.MaxScoreSample
        • langtest.utils.custom_types.sample.MinScoreQASample
        • langtest.utils.custom_types.sample.MinScoreSample
        • langtest.utils.custom_types.sample.NERSample
        • langtest.utils.custom_types.sample.QASample
        • langtest.utils.custom_types.sample.SecuritySample
        • langtest.utils.custom_types.sample.SensitivitySample
        • langtest.utils.custom_types.sample.SequenceClassificationSample
        • langtest.utils.custom_types.sample.SpeedTestSample
        • langtest.utils.custom_types.sample.StereoSetSample
        • langtest.utils.custom_types.sample.SummarizationSample
        • langtest.utils.custom_types.sample.SycophancySample
        • langtest.utils.custom_types.sample.TextGenerationSample
        • langtest.utils.custom_types.sample.ToxicitySample
        • langtest.utils.custom_types.sample.TranslationSample
        • langtest.utils.custom_types.sample.WinoBiasSample
    • langtest.utils.gender_classifier
      • langtest.utils.gender_classifier.GenderClassifier
    • langtest.utils.hf_utils
      • langtest.utils.hf_utils.build_dataset
      • langtest.utils.hf_utils.clean_input
      • langtest.utils.hf_utils.get_model_n_tokenizer
      • langtest.utils.hf_utils.login_with_token
      • langtest.utils.hf_utils.HuggingFacePipeline
      • langtest.utils.hf_utils.GatedRepoAccessError
    • langtest.utils.lib_manager
      • langtest.utils.lib_manager.log_verbosity_handler
      • langtest.utils.lib_manager.try_import_lib
    • langtest.utils.number_to_word
      • langtest.utils.number_to_word.print3
      • langtest.utils.number_to_word.ConvertNumberToWord
      • langtest.utils.number_to_word.BadChunkingOptionError
      • langtest.utils.number_to_word.NumOutOfRangeError
    • langtest.utils.report_utils
      • langtest.utils.report_utils.color_cells
      • langtest.utils.report_utils.mlflow_report
      • langtest.utils.report_utils.model_report
      • langtest.utils.report_utils.multi_dataset_multi_model_report
      • langtest.utils.report_utils.multi_dataset_report
      • langtest.utils.report_utils.multi_model_report
      • langtest.utils.report_utils.political_report
      • langtest.utils.report_utils.save_format
    • langtest.utils.util_metrics
      • langtest.utils.util_metrics.calculate_f1_score
      • langtest.utils.util_metrics.classification_report
  • langtest

langtest#

Modules

langtest.augmentation

langtest.callback

langtest.config

langtest.datahandler

langtest.embeddings

langtest.errors

langtest.langtest

langtest.leaderboard

langtest.logger

langtest.metrics

langtest.modelhandler

langtest.pipelines

langtest.prompts

langtest.tasks

langtest.transform

langtest.utils

previous

Quick Start

next

langtest.augmentation

© Copyright 2023, John Snow Labs.

Created using Sphinx 6.1.3.

Built with the PyData Sphinx Theme 0.14.4.