Add pre-commit hooks & restyle code

HHousen · Jul 4, 2021 · ad84986 · ad84986
1 parent c3b3230
commit ad84986
Show file tree

Hide file tree

Showing 35 changed files with 665 additions and 446 deletions.
diff --git a/.gitignore b/.gitignore
@@ -117,4 +117,4 @@ doc/_build
 notebooks/
 models
 .DS_Store
-venv
+venv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,29 @@
+repos:
+-   repo: https://github.com/ambv/black
+    rev: 21.6b0
+    hooks:
+    -   id: black
+-   repo: https://github.com/pycqa/isort
+    rev: 5.9.1
+    hooks:
+    -   id: isort
+        args: ["--profile", "black", "--filter-files"]
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.0.1
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: debug-statements
+    -   id: check-docstring-first
+    -   id: check-yaml
+    -   id: check-added-large-files
+    -   id: pretty-format-json
+        args: ["--autofix"]
+-   repo: https://github.com/asottile/pyupgrade
+    rev: v2.20.0
+    hooks:
+    -   id: pyupgrade
+-   repo: https://github.com/PyCQA/flake8
+    rev: 3.9.2
+    hooks:
+    -   id: flake8
diff --git a/LICENSE b/LICENSE
@@ -616,4 +616,4 @@ above cannot be given local legal effect according to their terms,
 reviewing courts shall apply local law that most closely approximates
 an absolute waiver of all civil liability in connection with the
 Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
+copy of the Program in return for a fee.
diff --git a/doc/_static/loss_functions/loss_avg.csv b/doc/_static/loss_functions/loss_avg.csv
@@ -1600,4 +1600,4 @@
 "7990","7995","0.2672945559024811","undefined","7995","0.34593504667282104","undefined","7995","0.3321243226528168","undefined","7995","0.35803699493408203","undefined","7995","0.3589259684085846","undefined"
 "7995","8000","0.33078715205192566","undefined","8000","0.30306801199913025","undefined","8000","0.3155066967010498","undefined","8000","0.35373300313949585","undefined","8000","0.3202584385871887","undefined"
 "8000","8005","0.32276827096939087","undefined","8005","0.35233697295188904","undefined","8005","0.344290554523468","undefined","8005","0.33728644251823425","undefined","8005","0.29650598764419556","undefined"
-"8005","8010","0.3273674249649048","undefined","8010","0.3167572319507599","undefined","8010","0.3407208025455475","undefined","8010","0.3501826226711273","undefined","8010","0.34778228402137756","undefined"
+"8005","8010","0.3273674249649048","undefined","8010","0.3167572319507599","undefined","8010","0.3407208025455475","undefined","8010","0.3501826226711273","undefined","8010","0.34778228402137756","undefined"
diff --git a/doc/_static/loss_functions/loss_avg_seq_mean.csv b/doc/_static/loss_functions/loss_avg_seq_mean.csv
@@ -1600,4 +1600,4 @@
 "7990","7995","0.2740124464035034","undefined","7995","0.35077494382858276","undefined","7995","0.3478199243545532","undefined","7995","0.35866016149520874","undefined","7995","0.3791751563549042","undefined"
 "7995","8000","0.34686484932899475","undefined","8000","0.31498754024505615","undefined","8000","0.3321347236633301","undefined","8000","0.3559577465057373","undefined","8000","0.33466649055480957","undefined"
 "8000","8005","0.3251478672027588","undefined","8005","0.3561927378177643","undefined","8005","0.36540666222572327","undefined","8005","0.3430340886116028","undefined","8005","0.30841362476348877","undefined"
-"8005","8010","0.33854520320892334","undefined","8010","0.32373565435409546","undefined","8010","0.3489498198032379","undefined","8010","0.3549184799194336","undefined","8010","0.3679059147834778","undefined"
+"8005","8010","0.33854520320892334","undefined","8010","0.32373565435409546","undefined","8010","0.3489498198032379","undefined","8010","0.3549184799194336","undefined","8010","0.3679059147834778","undefined"
diff --git a/doc/_static/loss_functions/loss_avg_seq_sum.csv b/doc/_static/loss_functions/loss_avg_seq_sum.csv
@@ -1600,4 +1600,4 @@
 "7990","7995","8.76839828491211","undefined","7995","11.224798202514648","undefined","7995","11.130237579345703","undefined","7995","11.47712516784668","undefined","7995","12.133605003356934","undefined"
 "7995","8000","11.099675178527832","undefined","8000","10.079601287841797","undefined","8000","10.628311157226562","undefined","8000","11.390647888183594","undefined","8000","10.709327697753906","undefined"
 "8000","8005","10.404731750488281","undefined","8005","11.398167610168457","undefined","8005","11.693013191223145","undefined","8005","10.977090835571289","undefined","8005","9.86923599243164","undefined"
-"8005","8010","10.833446502685547","undefined","8010","10.359540939331055","undefined","8010","11.166394233703613","undefined","8010","11.357391357421875","undefined","8010","11.772989273071289","undefined"
+"8005","8010","10.833446502685547","undefined","8010","10.359540939331055","undefined","8010","11.166394233703613","undefined","8010","11.357391357421875","undefined","8010","11.772989273071289","undefined"
diff --git a/doc/_static/loss_functions/loss_total.csv b/doc/_static/loss_functions/loss_total.csv
@@ -1600,4 +1600,4 @@
 "7990","7995","198.599853515625","undefined","7995","246.30575561523438","undefined","7995","187.65023803710938","undefined","7995","218.76060485839844","undefined","7995","202.7931671142578","undefined"
 "7995","8000","226.91998291015625","undefined","8000","192.14512634277344","undefined","8000","192.14356994628906","undefined","8000","219.31446838378906","undefined","8000","189.59300231933594","undefined"
 "8000","8005","211.0904541015625","undefined","8005","221.9722900390625","undefined","8005","215.87017822265625","undefined","8005","213.1650390625","undefined","8005","194.21142578125","undefined"
-"8005","8010","212.13409423828125","undefined","8010","188.47055053710938","undefined","8010","214.9948272705078","undefined","8010","219.21432495117188","undefined","8010","211.79940795898438","undefined"
+"8005","8010","212.13409423828125","undefined","8010","188.47055053710938","undefined","8010","214.9948272705078","undefined","8010","219.21432495117188","undefined","8010","211.79940795898438","undefined"
diff --git a/doc/_static/loss_functions/loss_total_norm_batch.csv b/doc/_static/loss_functions/loss_total_norm_batch.csv
@@ -1600,4 +1600,4 @@
 "7990","7995","6.206245422363281","undefined","7995","7.697054862976074","undefined","7995","5.864069938659668","undefined","7995","6.836268901824951","undefined","7995","6.337286472320557","undefined"
 "7995","8000","7.091249465942383","undefined","8000","6.00453519821167","undefined","8000","6.004486560821533","undefined","8000","6.853577136993408","undefined","8000","5.924781322479248","undefined"
 "8000","8005","6.596576690673828","undefined","8005","6.936634063720703","undefined","8005","6.745943069458008","undefined","8005","6.661407470703125","undefined","8005","6.0691070556640625","undefined"
-"8005","8010","6.629190444946289","undefined","8010","5.889704704284668","undefined","8010","6.718588352203369","undefined","8010","6.850447654724121","undefined","8010","6.618731498718262","undefined"
+"8005","8010","6.629190444946289","undefined","8010","5.889704704284668","undefined","8010","6.718588352203369","undefined","8010","6.850447654724121","undefined","8010","6.618731498718262","undefined"
diff --git a/doc/_static/summarization-model-experiments-raw-data.csv b/doc/_static/summarization-model-experiments-raw-data.csv
@@ -70,4 +70,4 @@
 "loss-test_avg_seq_mean","2020-04-17T02:34:16.000Z","13849","loss-func-test-old","","2020-11-19T22:45:56.000Z","239269211beb","","","","","","","","","","3h7bb65i","finished","-","hhousen","","2020-04-16T22:43:27.000Z","","2","1e-8","O1","","1","","0.1","","binary","./cnn_dm_pt/bert-base-uncased","true","true","-1","1","0.00002","false","false","INFO","<pytorch_lightning.loggers.wandb.WandbLogger object at 0x7fb2f22812e8>","loss_avg_seq_mean","3","512","1","","distilbert-base-uncased","bert","","true","","0","5","4","false","adamw","none","0.6","sent_rep_tokens","32","","2","true","true","50","6","","32","top_k","3","test","","","false","","","32","train","","false","wandb","linear","32","val","","1800","0.01","","","","","","","","","","","","","","","","","","","","","8005","","","","","","","","","","","","","0.3167572319507599","0.32373565435409546","10.359540939331056","188.47055053710935","5.889704704284668","0.918835997581482","0.5851296782493591","0.25142356753349304","0.30322131514549255","0.31995004415512085","10.238401412963867","176.49374389648438","5.515429496765137",""
 "loss-test_avg_seq_sum","2020-04-16T20:38:12.000Z","14230","loss-func-test-old","","2020-11-19T22:45:56.000Z","5f1cc95c99b6","","","","","","","","","","1nhtgv2c","finished","-","hhousen","","2020-04-16T16:41:02.000Z","","2","1e-8","O1","","1","","0.1","","binary","./cnn_dm_pt/bert-base-uncased","true","true","-1","1","0.00002","false","false","INFO","<pytorch_lightning.loggers.wandb.WandbLogger object at 0x7f0a4373de48>","loss_avg_seq_sum","3","512","1","","distilbert-base-uncased","bert","","true","","0","5","4","false","adamw","none","0.6","sent_rep_tokens","32","","2","true","true","50","6","","32","top_k","3","test","","","false","","","32","train","","false","wandb","linear","32","val","","1800","0.01","","","","","","","","","","","","","","","","","","","","","8005","","","","","","","","","","","","","0.3407208025455475","0.3489498198032379","11.166394233703612","214.9948272705078","6.718588352203369","0.9136065244674684","0.5195010304450989","0.125395268201828","0.31573837995529175","0.3229138255119324","10.333242416381836","187.5449981689453","5.860781192779541",""
 "loss-test_total_norm_batch","2020-04-16T08:36:54.000Z","13855","loss-func-test-old","","2020-11-19T22:45:56.000Z","17e694aceeed","","","","","","","","","","39586k7k","finished","-","hhousen","","2020-04-16T04:45:59.000Z","","2","1e-8","O1","","1","","0.1","","binary","./cnn_dm_pt/bert-base-uncased","true","true","-1","1","0.00002","false","false","INFO","<pytorch_lightning.loggers.wandb.WandbLogger object at 0x7f0cda26c2b0>","loss_total_norm_batch","3","512","1","","distilbert-base-uncased","bert","","true","","0","5","4","false","adamw","none","0.6","sent_rep_tokens","32","","2","true","true","50","6","","32","top_k","3","test","","","false","","","32","train","","false","wandb","linear","32","val","","1800","0.01","","","","","","","","","","","","","","","","","","","","","8005","","","","","","","","","","","","","0.3501826226711273","0.3549184799194336","11.357391357421877","219.21432495117188","6.850447654724121","0.91948664188385","0.5414910316467285","0.16349512338638306","0.30516937375068665","0.3220357596874237","10.30514430999756","177.595458984375","5.549858093261719",""
-"loss-test_total","2020-04-16T04:44:56.000Z","13842","loss-func-test-old","","2020-11-19T22:45:56.000Z","17e694aceeed","","","","","","","","","","1lto2dd2","finished","-","hhousen","","2020-04-16T00:54:14.000Z","","2","1e-8","O1","","1","","0.1","","binary","./cnn_dm_pt/bert-base-uncased","true","true","-1","1","0.00002","false","false","INFO","<pytorch_lightning.loggers.wandb.WandbLogger object at 0x7f5e914d9f60>","loss_total","3","512","1","","distilbert-base-uncased","bert","","true","","0","5","4","false","adamw","none","0.6","sent_rep_tokens","32","","2","true","true","50","6","","32","top_k","3","test","","","false","","","32","train","","false","wandb","linear","32","val","","1800","0.01","","","","","","","","","","","","","","","","","","","","","8005","","","","","","","","","","","","","0.34778228402137756","0.3679059147834778","11.772989273071287","211.7994079589844","6.618731498718262","0.919694483280182","0.5366775989532471","0.15366090834140778","0.3051919937133789","0.32205986976623535","10.305915832519531","177.60853576660156","5.550266742706299",""
+"loss-test_total","2020-04-16T04:44:56.000Z","13842","loss-func-test-old","","2020-11-19T22:45:56.000Z","17e694aceeed","","","","","","","","","","1lto2dd2","finished","-","hhousen","","2020-04-16T00:54:14.000Z","","2","1e-8","O1","","1","","0.1","","binary","./cnn_dm_pt/bert-base-uncased","true","true","-1","1","0.00002","false","false","INFO","<pytorch_lightning.loggers.wandb.WandbLogger object at 0x7f5e914d9f60>","loss_total","3","512","1","","distilbert-base-uncased","bert","","true","","0","5","4","false","adamw","none","0.6","sent_rep_tokens","32","","2","true","true","50","6","","32","top_k","3","test","","","false","","","32","train","","false","wandb","linear","32","val","","1800","0.01","","","","","","","","","","","","","","","","","","","","","8005","","","","","","","","","","","","","0.34778228402137756","0.3679059147834778","11.772989273071287","211.7994079589844","6.618731498718262","0.919694483280182","0.5366775989532471","0.15366090834140778","0.3051919937133789","0.32205986976623535","10.305915832519531","177.60853576660156","5.550266742706299",""
diff --git a/doc/abstractive/api.rst b/doc/abstractive/api.rst
@@ -4,4 +4,4 @@ Abstractive API Reference
 Model/Module
 ------------
 
-.. automodule:: abstractive
+.. automodule:: abstractive
diff --git a/doc/abstractive/training.rst b/doc/abstractive/training.rst
@@ -70,7 +70,7 @@ Step-by-Step Instructions
 2. Extract (≈90GB): ``tar -xzvf longformer-encdec-base-8192.tar.gz``
 3. Training command:
 
-    .. code-block:: 
+    .. code-block::
 
         python main.py \
         --mode abstractive \

diff --git a/doc/extractive/api.rst b/doc/extractive/api.rst
@@ -27,4 +27,3 @@ Convert To Extractive
 ---------------------
 
 .. automodule:: convert_to_extractive
-
diff --git a/doc/extractive/convert-to-extractive.rst b/doc/extractive/convert-to-extractive.rst
@@ -23,7 +23,7 @@ Simply run ``convert_to_extractive.py`` with the path to the data. For example,
 
 * ``--shard_interval`` processes the file in chunks of ``5000`` and writes results to disk in chunks of ``5000`` (saves RAM)
 * ``--compression`` compresses each output chunk with gzip (depending on the dataset reduces space usage requirement by about 1/2 to 1/3)
-* ``--add_target_to`` will save the abstractive target text to the splits (in ``--split_names``) specified. 
+* ``--add_target_to`` will save the abstractive target text to the splits (in ``--split_names``) specified.
 
 The default output directory is the input directory that was specified, but the output directory can be changed with ``--base_output_path`` if desired.
 
@@ -36,7 +36,7 @@ Option 2: Automatic pre-processing through ``nlp``
 
 You will need to run the ``convert_to_extractive.py`` command with the ``--dataset``, ``--dataset_version``, ``--data_example_column``, and ``--data_summarized_column`` options set. To use the CNN/DM dataset you would set these arguments as shown below:
 
-.. code-block:: 
+.. code-block::
 
     --dataset cnn_dailymail \
     --dataset_version 3.0.0 \
@@ -66,7 +66,7 @@ Extractive Dataset Format
 
 This section briefly discusses the format of datasets created by the ``convert_to_extractive`` script.
 
-The training and validation sets only need the ``src`` and ``labels`` keys saved as json. The ``src`` value should be a list of lists where each list contains a series of tokens (see below). The ``labels`` value is a list of 0s (not in summary) and 1s (sentence should be in summary) that is the same length as the ``src`` value (the number of sentences). Each value in this list corresponds to a sentence in ``src``. The testing set is special because it needs the ``src``, ``labels``, and ``tgt`` keys. The ``tgt`` key represents the target summary as a single string with a ``<q>`` between each sentence. 
+The training and validation sets only need the ``src`` and ``labels`` keys saved as json. The ``src`` value should be a list of lists where each list contains a series of tokens (see below). The ``labels`` value is a list of 0s (not in summary) and 1s (sentence should be in summary) that is the same length as the ``src`` value (the number of sentences). Each value in this list corresponds to a sentence in ``src``. The testing set is special because it needs the ``src``, ``labels``, and ``tgt`` keys. The ``tgt`` key represents the target summary as a single string with a ``<q>`` between each sentence.
 
 First document in **CNN/DM** extractive **training** set:
 
@@ -151,4 +151,4 @@ Script Help
     --max_example_nsents MAX_EXAMPLE_NSENTS
                             maximum number of sentences per example
     -l {DEBUG,INFO,WARNING,ERROR,CRITICAL}, --log {DEBUG,INFO,WARNING,ERROR,CRITICAL}
-                            Set the logging level (default: 'Info').
+                            Set the logging level (default: 'Info').
diff --git a/doc/extractive/experiments.rst b/doc/extractive/experiments.rst
@@ -95,7 +95,7 @@ Commit `dfefd15` added a :class:`~classifier.SimpleLinearClassifier`. This exper
 
 Command used to run the tests:
 
-.. code-block:: 
+.. code-block::
 
    python main.py \
    --model_name_or_path distilbert-base-uncased \
@@ -697,4 +697,4 @@ Classifier/Encoder Results
 **Relative Time:**
 
 .. image:: ../_static/encoder/loss_avg_seq_mean_reltime.png
-   :width: 48%
+   :width: 48%
diff --git a/doc/extractive/models-results.rst b/doc/extractive/models-results.rst
@@ -3,7 +3,7 @@
 Extractive Pre-trained Models & Results
 =======================================
 
-The recommended model to use is ``distilroberta-base-ext-sum`` because of its fast performance, relatively low number of parameters, and good performance. 
+The recommended model to use is ``distilroberta-base-ext-sum`` because of its fast performance, relatively low number of parameters, and good performance.
 
 Notes
 -----
@@ -14,7 +14,7 @@ The remarkable performance to size ratio of the distil* models can be transferre
 
 `MobileBERT <https://arxiv.org/abs/2004.02984>`_ is similar to ``distilbert`` in that it is a smaller version of BERT that achieves amazing performance at a very small size. `According to the authors <https://openreview.net/forum?id=SJxjVaNKwB&noteId=S1gxqk_7jH>`__, MobileBERT is *2.64x smaller and 2.45x faster* than DistilBERT. DistilBERT successfully halves the depth of BERT model by knowledge distillation in the pre-training stage and an optional fine-tuning stage. MobileBERT only uses knowledge transfer in the pre-training stage and does not require a fine-tuned teacher or data augmentation in the down-stream tasks. DistilBERT compresses BERT by reducing its depth, while MobileBERT compresses BERT by reducing its width, which has been shown to be more effective. MobileBERT usually needs a larger learning rate and more training epochs in fine-tuning than the original BERT.
 
-.. important:: Interactive charts, graphs, raw data, run commands, hyperparameter choices, and more for all trained models are publicly available on the `TransformerSum Weights & Biases page <https://app.wandb.ai/hhousen/transformerextsum>`__. You can download the raw data for each model on this site, or `download an overview as a CSV <../_static/summarization-model-experiments-raw-data.csv>`__. Please open an `issue <https://github.com/HHousen/TransformerSum/issues/new>`__ if you have questions about these models. 
+.. important:: Interactive charts, graphs, raw data, run commands, hyperparameter choices, and more for all trained models are publicly available on the `TransformerSum Weights & Biases page <https://app.wandb.ai/hhousen/transformerextsum>`__. You can download the raw data for each model on this site, or `download an overview as a CSV <../_static/summarization-model-experiments-raw-data.csv>`__. Please open an `issue <https://github.com/HHousen/TransformerSum/issues/new>`__ if you have questions about these models.
 
 Additionally, all of the models on this page were trained completely for free using Tesla P100-PCIE-16GB GPUs on `Google Colaboratory <https://colab.research.google.com/>`_. Those that took over 12 hours to train were split into multiple training sessions since ``pytorch_lightning`` enables easy resuming with the ``--resume_from_checkpoint`` argument.
 
@@ -130,7 +130,7 @@ Test set results on the WikiHow dataset using ROUGE F\ :sub:`1`\ .
 +---------------------------------+------------+------------+------------+-------------+
 | distilroberta-base-ext-sum      | 31.07      | 8.96       | 19.34      | 28.95       |
 +---------------------------------+------------+------------+------------+-------------+
-| bert-base-uncased-ext-sum       | 30.68      | 08.67      | 19.16      | 28.59       | 
+| bert-base-uncased-ext-sum       | 30.68      | 08.67      | 19.16      | 28.59       |
 +---------------------------------+------------+------------+------------+-------------+
 | roberta-base-ext-sum            | 31.26      | 09.09      | 19.47      | 29.14       |
 +---------------------------------+------------+------------+------------+-------------+
-Original file line number
+Diff line change
@@ Expand Up / @@ -117,4 +117,4 @@ doc/_build @@
     notebooks/
     models
     .DS_Store
-    venv
+    venv
Original file line number	Diff line number	Diff line change
Expand Up		@@ -27,4 +27,3 @@ Convert To Extractive
		---------------------

		.. automodule:: convert_to_extractive