QData · qiyanjun · Sep 29, 2021 · Aug 20, 2021 · Aug 20, 2021 · Aug 20, 2021
diff --git a/tests/sample_outputs/run_attack_hotflip_lstm_mr_4_adv_metrics.txt b/tests/sample_outputs/run_attack_hotflip_lstm_mr_4_adv_metrics.txt
@@ -0,0 +1,74 @@
+/.*/Attack(
+  (search_method): BeamSearch(
+    (beam_width):  10
+  )
+  (goal_function):  UntargetedClassification
+  (transformation):  WordSwapGradientBased(
+    (top_n):  1
+  )
+  (constraints): 
+    (0): MaxWordsPerturbed(
+        (max_num_words):  2
+        (compare_against_original):  True
+      )
+    (1): WordEmbeddingDistance(
+        (embedding):  WordEmbedding
+        (min_cos_sim):  0.8
+        (cased):  False
+        (include_unknown_words):  True
+        (compare_against_original):  True
+      )
+    (2): PartOfSpeech(
+        (tagger_type):  nltk
+        (tagset):  universal
+        (allow_verb_noun_swap):  True
+        (compare_against_original):  True
+      )
+    (3): RepeatModification
+    (4): StopwordModification
+  (is_black_box):  False
+) 
+
+--------------------------------------------- Result 1 ---------------------------------------------
+[[Positive (96%)]] --> [[Negative (77%)]]
+
+the story gives ample opportunity for large-scale action and suspense , which director shekhar kapur [[supplies]] with tremendous skill .
+
+the story gives ample opportunity for large-scale action and suspense , which director shekhar kapur [[stagnated]] with tremendous skill .
+
+
+--------------------------------------------- Result 2 ---------------------------------------------
+[[Negative (57%)]] --> [[[SKIPPED]]]
+
+red dragon " never cuts corners .
+
+
+--------------------------------------------- Result 3 ---------------------------------------------
+[[Positive (51%)]] --> [[[FAILED]]]
+
+fresnadillo has something serious to say about the ways in which extravagant chance can distort our perspective and throw us off the path of good sense .
+
+
+--------------------------------------------- Result 4 ---------------------------------------------
+[[Positive (89%)]] --> [[[FAILED]]]
+
+throws in enough clever and unexpected twists to make the formula feel fresh .
+
+
+
++-------------------------------+--------+
+| Attack Results                |        |
++-------------------------------+--------+
+| Number of successful attacks: | 1      |
+| Number of failed attacks:     | 2      |
+| Number of skipped attacks:    | 1      |
+| Original accuracy:            | 75.0%  |
+| Accuracy under attack:        | 50.0%  |
+| Attack success rate:          | 33.33% |
+| Average perturbed word %:     | 5.56%  |
+| Average num. words per input: | 15.5   |
+| Avg num queries:              | 1.33   |
+| Average Original Perplexity:  | 291.47 |
+| Average Attack Perplexity:    | 320.33 |
+| Average Attack USE Score:     | 0.91   |
++-------------------------------+--------+
diff --git a/tests/sample_outputs/run_attack_transformers_datasets_adv_metrics.txt b/tests/sample_outputs/run_attack_transformers_datasets_adv_metrics.txt
@@ -0,0 +1,68 @@
+/.*/Attack(
+  (search_method): GreedyWordSwapWIR(
+    (wir_method):  unk
+  )
+  (goal_function):  UntargetedClassification
+  (transformation):  CompositeTransformation(
+    (0): WordSwapNeighboringCharacterSwap(
+        (random_one):  True
+      )
+    (1): WordSwapRandomCharacterSubstitution(
+        (random_one):  True
+      )
+    (2): WordSwapRandomCharacterDeletion(
+        (random_one):  True
+      )
+    (3): WordSwapRandomCharacterInsertion(
+        (random_one):  True
+      )
+    )
+  (constraints): 
+    (0): LevenshteinEditDistance(
+        (max_edit_distance):  30
+        (compare_against_original):  True
+      )
+    (1): RepeatModification
+    (2): StopwordModification
+  (is_black_box):  True
+) 
+
+--------------------------------------------- Result 1 ---------------------------------------------
+[[Negative (100%)]] --> [[Positive (71%)]]
+
+[[hide]] [[new]] secretions from the parental units 
+
+[[Ehide]] [[enw]] secretions from the parental units 
+
+
+--------------------------------------------- Result 2 ---------------------------------------------
+[[Negative (100%)]] --> [[[FAILED]]]
+
+contains no wit , only labored gags 
+
+
+--------------------------------------------- Result 3 ---------------------------------------------
+[[Positive (100%)]] --> [[Negative (96%)]]
+
+that [[loves]] its characters and communicates [[something]] [[rather]] [[beautiful]] about human nature 
+
+that [[lodes]] its characters and communicates [[somethNng]] [[rathrer]] [[beautifdul]] about human nature 
+
+
+
++-------------------------------+---------+
+| Attack Results                |         |
++-------------------------------+---------+
+| Number of successful attacks: | 2       |
+| Number of failed attacks:     | 1       |
+| Number of skipped attacks:    | 0       |
+| Original accuracy:            | 100.0%  |
+| Accuracy under attack:        | 33.33%  |
+| Attack success rate:          | 66.67%  |
+| Average perturbed word %:     | 30.95%  |
+| Average num. words per input: | 8.33    |
+| Avg num queries:              | 22.67   |
+| Average Original Perplexity:  | 1126.57 |
+| Average Attack Perplexity:    | 2823/.*/|
+| Average Attack USE Score:     | 0.76    |
++-------------------------------+---------+
diff --git a/tests/test_command_line/test_attack.py b/tests/test_command_line/test_attack.py
@@ -48,6 +48,20 @@
         "tests/sample_outputs/run_attack_transformers_datasets.txt",
     ),
     #
+    # test loading an attack from the transformers model hub and calculate perplexity and use
+    #
+    (
+        "attack_from_transformers_adv_metrics",
+        (
+            "textattack attack --model-from-huggingface "
+            "distilbert-base-uncased-finetuned-sst-2-english "
+            "--dataset-from-huggingface glue^sst2^train --recipe deepwordbug --num-examples 3 "
+            "--enable-advance-metrics"
+            ""
+        ),
+        "tests/sample_outputs/run_attack_transformers_datasets_adv_metrics.txt",
+    ),
+    #
     # test running an attack by loading a model and dataset from file
     #
     (
@@ -72,6 +86,17 @@
         "tests/sample_outputs/run_attack_hotflip_lstm_mr_4.txt",
     ),
     #
+    # test hotflip on 10 samples from LSTM MR and calculate perplexity and use
+    #
+    (
+        "run_attack_hotflip_lstm_mr_4_adv_metrics",
+        (
+            "textattack attack --model lstm-mr --recipe hotflip "
+            "--num-examples 4 --num-examples-offset 3 --enable-advance-metrics "
+        ),
+        "tests/sample_outputs/run_attack_hotflip_lstm_mr_4_adv_metrics.txt",
+    ),
+    #
     # test: run_attack deepwordbug attack on 10 samples from LSTM MR
     #
     (

diff --git a/textattack/__init__.py b/textattack/__init__.py
@@ -8,7 +8,6 @@
 
 TextAttack provides components for common NLP tasks like sentence encoding, grammar-checking, and word replacement that can be used on their own.
 """
-
 from .attack_args import AttackArgs, CommandLineAttackArgs
 from .augment_args import AugmenterArgs
 from .dataset_args import DatasetArgs
@@ -17,6 +16,7 @@
 from .attack import Attack
 from .attacker import Attacker
 from .trainer import Trainer
+from .metrics import Metric
 
 from . import (
     attack_recipes,
@@ -28,10 +28,12 @@
     goal_function_results,
     goal_functions,
     loggers,
+    metrics,
     models,
     search_methods,
     shared,
     transformations,
 )
 
+
 name = "textattack"
diff --git a/textattack/attack_args.py b/textattack/attack_args.py
@@ -173,6 +173,8 @@ class AttackArgs:
             Disable displaying individual attack results to stdout.
         silent (:obj:`bool`, `optional`, defaults to :obj:`False`):
             Disable all logging (except for errors). This is stronger than :obj:`disable_stdout`.
+        enable_advance_metrics (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            Enable calculation and display of optional advance post-hoc metrics like perplexity, grammar errors, etc.
     """
 
     num_examples: int = 10
@@ -193,6 +195,7 @@ class AttackArgs:
     log_to_wandb: str = None
     disable_stdout: bool = False
     silent: bool = False
+    enable_advance_metrics: bool = False
 
     def __post_init__(self):
         if self.num_successful_examples:
@@ -350,6 +353,12 @@ def _add_parser_args(cls, parser):
             default=default_obj.silent,
             help="Disable all logging",
         )
+        parser.add_argument(
+            "--enable-advance-metrics",
+            action="store_true",
+            default=default_obj.enable_advance_metrics,
+            help="Enable calculation and display of optional advance post-hoc metrics like perplexity, use distance, etc.",
+        )
 
         return parser
 

diff --git a/textattack/attacker.py b/textattack/attacker.py
@@ -219,6 +219,10 @@ def _attack(self):
         # Enable summary stdout
         if not self.attack_args.silent and self.attack_args.disable_stdout:
             self.attack_log_manager.enable_stdout()
+
+        if self.attack_args.enable_advance_metrics:
+            self.attack_log_manager.enable_advance_metrics = True
+
         self.attack_log_manager.log_summary()
         self.attack_log_manager.flush()
         print()
@@ -390,6 +394,10 @@ def _attack_parallel(self):
         # Enable summary stdout.
         if not self.attack_args.silent and self.attack_args.disable_stdout:
             self.attack_log_manager.enable_stdout()
+
+        if self.attack_args.enable_advance_metrics:
+            self.attack_log_manager.enable_advance_metrics = True
+
         self.attack_log_manager.log_summary()
         self.attack_log_manager.flush()
         print()