From 836e266a878ca7f112c33cba5f03ffe500028cb4 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Wed, 7 Aug 2024 21:25:59 -0500 Subject: [PATCH 01/22] Fix test; clean up warnings --- .github/workflows/lint-and-test.yml | 2 +- .gitignore | 3 +++ nfl_data_py/__init__.py | 18 +----------------- nfl_data_py/tests/nfl_test.py | 14 ++++++-------- requirements.txt | 4 ++++ setup.py | 9 +++++---- 6 files changed, 20 insertions(+), 30 deletions(-) create mode 100644 requirements.txt diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 3a913be..8763389 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index a15111e..fd0e404 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,6 @@ dmypy.json # Cython debug symbols cython_debug/ + +# Mac local files +.DS_Store diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index 604ba7c..5e6fbca 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -150,7 +150,7 @@ def import_pbp_data( pbp_data.append(raw) print(str(year) + ' done.') - except Error as e: + except Exception as e: print(e) print('Data not available for ' + str(year)) @@ -1139,18 +1139,6 @@ def clean_nfl_data(df): 'Louisiana State': 'LSU' } - pro_tm_repl = { - 'GNB': 'GB', - 'KAN': 'KC', - 'LA': 'LAR', - 'LVR': 'LV', - 'NWE': 'NE', - 'NOR': 'NO', - 'SDG': 'SD', - 'SFO': 'SF', - 'TAM': 'TB' - } - na_replace = { 'NA':numpy.nan } @@ -1164,8 +1152,4 @@ def clean_nfl_data(df): if 'col_team' in df.columns: df.replace({'col_team': col_tm_repl}, inplace=True) - if 'name' in df.columns: - for z in player_col_tm_repl: - df[df['name'] == z[0]] = df[df['name'] == z[0]].replace({z[1]: z[2]}) - return df diff --git a/nfl_data_py/tests/nfl_test.py b/nfl_data_py/tests/nfl_test.py index 290b968..763a886 100644 --- a/nfl_data_py/tests/nfl_test.py +++ b/nfl_data_py/tests/nfl_test.py @@ -1,6 +1,7 @@ from unittest import TestCase from pathlib import Path import shutil +import random import pandas as pd @@ -20,7 +21,7 @@ def test_is_df_with_data_thread_requests(self): def test_uses_cache_when_cache_is_true(self): - cache = Path(__file__).parent/"tmpcache" + cache = Path(__file__).parent/f"tmpcache-{random.randint(0, 10000)}" self.assertRaises( ValueError, nfl.import_pbp_data, [2020], cache=True, alt_path=cache @@ -268,17 +269,14 @@ def test_is_df_with_data_thread_requests(self): class test_cache(TestCase): def test_cache(self): - cache = Path(__file__).parent/"tmpcache" + cache = Path(__file__).parent/f"tmpcache-{random.randint(0, 10000)}" self.assertFalse(cache.is_dir()) nfl.cache_pbp([2020], alt_path=cache) - new_paths = list(cache.glob("**/*")) - self.assertEqual(len(new_paths), 2) - self.assertTrue(new_paths[0].is_dir()) - self.assertTrue(new_paths[1].is_file()) - - pbp2020 = pd.read_parquet(new_paths[1]) + self.assertTrue(cache.is_dir()) + + pbp2020 = pd.read_parquet(cache/"season=2020"/"part.0.parquet") self.assertIsInstance(pbp2020, pd.DataFrame) self.assertFalse(pbp2020.empty) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..df06964 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +appdirs +fastparquet +numpy +pandas \ No newline at end of file diff --git a/setup.py b/setup.py index d94e14f..c716eb5 100644 --- a/setup.py +++ b/setup.py @@ -8,15 +8,16 @@ NAME = 'nfl_data_py' DESCRIPTION = 'python library for interacting with NFL data sourced from nflfastR' URL = 'https://github.com/nflverse/nfl_data_py' -EMAIL = 'cooper.dff11@gmail.com' -AUTHOR = 'cooperdff' +EMAIL = 'alec.ostrander@gmail.com' +AUTHOR = 'Alec Ostrander' REQUIRES_PYTHON = '>=3.6.0' -VERSION = '0.3.2' +VERSION = '0.3.3' # What packages are required for this module to be executed? REQUIRED = [ - 'pandas>1', + 'numpy>1, <2' + 'pandas>1, <2', 'appdirs>1', 'fastparquet>0.5', ] From 24229f083b5c183de6bc13bf275ac0d531405241 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Wed, 7 Aug 2024 21:50:06 -0500 Subject: [PATCH 02/22] lock dependencies for test suite --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index df06964..4d89128 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ appdirs -fastparquet -numpy -pandas \ No newline at end of file +fastparquet>0.5 +numpy>1, <2 +pandas>1, <2 \ No newline at end of file From a3358b13811b5093eab4050c0cf5f1c8bfdf6828 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Wed, 7 Aug 2024 22:23:10 -0500 Subject: [PATCH 03/22] update setup-python action version --- .github/workflows/lint-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 8763389..dada1ab 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -21,7 +21,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies From 133b0c47b33eafad20fd4c09fcad111666987f09 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Wed, 7 Aug 2024 22:23:52 -0500 Subject: [PATCH 04/22] change workflow name --- .github/workflows/lint-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index dada1ab..e25338c 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Python package +name: Lint and Test on: push: From 4387069e6800e7d1507e36cbd50a337eb4503617 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Wed, 7 Aug 2024 22:31:16 -0500 Subject: [PATCH 05/22] run the test suite on both linux and windows --- .github/workflows/lint-and-test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index e25338c..aa3649b 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -11,13 +11,12 @@ on: jobs: build: - - runs-on: ubuntu-latest strategy: fail-fast: false matrix: + os: [ubuntu-latest, windows-latest] python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] - + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} From 25a8cf12637b65775e0e17623443cf8bf5ba291a Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Thu, 8 Aug 2024 00:03:16 -0500 Subject: [PATCH 06/22] Run test suite against matrix of relevant OS's and Python versions (#93) --- .github/workflows/lint-and-test.yml | 70 +++++++++++++++++++---------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 8763389..e4123ad 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -1,40 +1,62 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Python package +name: Lint and Test on: push: branches: [ "main" ] pull_request: - branches: [ "main" ] + branches: [ "main", "v*" ] jobs: build: - - runs-on: ubuntu-latest + strategy: fail-fast: false matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + os: [macos-latest, windows-latest, ubuntu-latest] + include: + - python-version: "3.6" + os: macos-12 + - python-version: "3.6" + os: windows-latest + - python-version: "3.6" + os: ubuntu-20.04 + - python-version: "3.7" + os: macos-12 + - python-version: "3.7" + os: windows-latest + - python-version: "3.7" + os: ubuntu-latest + + runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + shell: bash + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Lint with flake8 + shell: bash + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Test with pytest + shell: bash + run: | + pytest From dce8c436b23942bfaeb55988a8e0ee3b178c0845 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Sun, 11 Aug 2024 16:36:59 -0500 Subject: [PATCH 07/22] Made caching failure mode more readily evident; added deprecation warning for future behavior change --- nfl_data_py/__init__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index 5e6fbca..5bdd4f0 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -4,6 +4,7 @@ import os import logging from concurrent.futures import ThreadPoolExecutor, as_completed +from warnings import warn import appdirs import numpy @@ -230,7 +231,15 @@ def cache_pbp(years, downcast=True, alt_path=None): print(str(year) + ' done.') - except: + except Exception as e: + warn( + f"Caching failed for {year}, skipping.\n" + "In nfl_data_py 1.0, this will raise an exception.\n" + f"Failure: {e}", + DeprecationWarning, + stacklevel=2 + ) + next From 3820ba097b69f6165344bd8b1e1893d8acb094ea Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Sun, 11 Aug 2024 16:37:46 -0500 Subject: [PATCH 08/22] fixed fastparquet dep version for python3.6 --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4d89128..6b33db5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ appdirs -fastparquet>0.5 +fastparquet; python_version >= '3.7' +fastparquet==0.7.2; python_version <= '3.6' numpy>1, <2 pandas>1, <2 \ No newline at end of file From b9c0af467663f757564aa773aaba0f89b4c623bb Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Sun, 11 Aug 2024 17:00:26 -0500 Subject: [PATCH 09/22] Fix a failure in clean_nfl_data() when using very old versions of pandas --- nfl_data_py/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index 5bdd4f0..e3af05f 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -1153,7 +1153,8 @@ def clean_nfl_data(df): } for col in df.columns: - df.replace({col:na_replace}, inplace=True) + if df[col].dtype == 'object': + df.replace({col:na_replace}, inplace=True) if 'name' in df.columns: df.replace({'name': name_repl}, inplace=True) From 49cccd9829706cd47258977ab4f32ce0f3e9bb48 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Sun, 11 Aug 2024 17:26:22 -0500 Subject: [PATCH 10/22] cleanup; update cache path --- nfl_data_py/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index e3af05f..ab6c076 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -184,12 +184,10 @@ def cache_pbp(years, downcast=True, alt_path=None): if min(years) < 1999: raise ValueError('Data not available before 1999.') - plays = pandas.DataFrame() - url1 = r'https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_' url2 = r'.parquet' appname = 'nfl_data_py' - appauthor = 'cooper_dff' + appauthor = 'nflverse' # define path for caching if alt_path is not None: From bcfffc9c3f392ea67a769fc5accadf472fa981d9 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Sun, 11 Aug 2024 17:27:24 -0500 Subject: [PATCH 11/22] don't bother running the Lint and Test workflow until the PR is promoted from 'draft' status --- .github/workflows/lint-and-test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index e4123ad..d6c6187 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -8,10 +8,11 @@ on: branches: [ "main" ] pull_request: branches: [ "main", "v*" ] + types: [ opened, synchronize, reopened, ready_for_review ] jobs: build: - + if: github.event.pull_request.draft == false strategy: fail-fast: false matrix: From b78044b58b792a4443df4c8d58f9972323bbd89b Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Fri, 16 Aug 2024 17:03:00 -0500 Subject: [PATCH 12/22] fix requirements format --- .gitignore | 3 +++ requirements.txt | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index fd0e404..9abefa2 100644 --- a/.gitignore +++ b/.gitignore @@ -142,3 +142,6 @@ cython_debug/ # Mac local files .DS_Store + +# Dev +scratch.ipynb diff --git a/requirements.txt b/requirements.txt index 6b33db5..be97161 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ appdirs fastparquet; python_version >= '3.7' fastparquet==0.7.2; python_version <= '3.6' -numpy>1, <2 -pandas>1, <2 \ No newline at end of file +numpy>=1.0,<=2.0 +pandas>=1.0,<=2.0 \ No newline at end of file From f78bae4fa2c82e104be8937ae5eff8f0939c416e Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Fri, 16 Aug 2024 17:07:25 -0500 Subject: [PATCH 13/22] actually fix requirements format --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c716eb5..4dabb10 100644 --- a/setup.py +++ b/setup.py @@ -16,8 +16,8 @@ # What packages are required for this module to be executed? REQUIRED = [ - 'numpy>1, <2' - 'pandas>1, <2', + 'numpy>=1.0, <=2.0', + 'pandas>=1.0, <=2.0', 'appdirs>1', 'fastparquet>0.5', ] From 823d00967e80494df38b1f66a353fab8896875b6 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Fri, 16 Aug 2024 17:08:34 -0500 Subject: [PATCH 14/22] =?UTF-8?q?really=20the=20right=20format=20this=20ti?= =?UTF-8?q?me=20=F0=9F=A4=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 4dabb10..ed0b913 100644 --- a/setup.py +++ b/setup.py @@ -16,8 +16,8 @@ # What packages are required for this module to be executed? REQUIRED = [ - 'numpy>=1.0, <=2.0', - 'pandas>=1.0, <=2.0', + 'numpy>=1.0, <2.0', + 'pandas>=1.0, <2.0', 'appdirs>1', 'fastparquet>0.5', ] From f8b0fe8544f6cb47c6cba35995a08cd84777d466 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Sun, 15 Sep 2024 16:38:38 -0500 Subject: [PATCH 15/22] remove accidentally non-ignored pyc files --- .gitignore | 2 +- .../nfl_test.cpython-37-pytest-6.2.4.pyc | Bin 7669 -> 0 bytes .../nfl_test.cpython-39-pytest-6.2.4.pyc | Bin 7066 -> 0 bytes 3 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 nfl_data_py/tests/__pycache__/nfl_test.cpython-37-pytest-6.2.4.pyc delete mode 100644 nfl_data_py/tests/__pycache__/nfl_test.cpython-39-pytest-6.2.4.pyc diff --git a/.gitignore b/.gitignore index 9abefa2..1b7fa12 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ # Byte-compiled / optimized / DLL files -__pycache__/ *.py[cod] *$py.class @@ -50,6 +49,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +*/**/tmpcache-* # Translations *.mo diff --git a/nfl_data_py/tests/__pycache__/nfl_test.cpython-37-pytest-6.2.4.pyc b/nfl_data_py/tests/__pycache__/nfl_test.cpython-37-pytest-6.2.4.pyc deleted file mode 100644 index d1d8cde65b437c99fb207e5eb67130b2b30afd9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7669 zcmb`MS##S|7=|TzjpOY5S~pD>hmu~v1u)R*G=&rf!d76$WJV}Ha;wx^&yne*Q?A-y z!Bum~U(zky@E5q^d(V+%Ikii5urpHh+ScpyJ^Id~Pj{!LN(Q~Y{(R3loHmR zPkN5clb)A)f%F1fB)us0BIzZzOnO=BCDJQwmGr9A%cR%XI_Y((S4eNLP12iEuae$k z+oZRpUL$>pRY+H)UMGE-?U3G)dV};8wo7_f>P^yD*)`JFq~0QZo!uaPL+WkPRklZZ zPwGpg-(fdN-;}yS`dzk9dSB|x%>3S{-ufF??5Ju6#RsnN4;`*Vv%;P zyoNQ4nap}=)G{nX4%wjS)0uYnq+6$JEnhe*Pul@)VwZNl@ycZ8L-XAH&3taWvY6#( zo|}I5H21uhs2v>1;E{cY;Yr2$%bxKIu8lpyUa! zEqte4cLS@-f{AhT;nBotEPtwo-b9nI4@eg#aIR4_G)9D`H>(qNK zx9yALL(jk8J2|G`YdFIH<_t&j*X-`uF#;3v9USd<&w`SS)e|;rP*V+oJU7g;`D`JA zu;2g25#ZQ%o7!aCLCLmT9oB2&xNO@9LVd!g$V0X>u`A=f4DG;C0BF&aI`!q5lnr|7 zx{sS@W4l5efd}*96;kFB8_&@P9F67x#EZ16T6_Y>7%nb`h}aQppX73)y@N10jWPzG z!EP42k=*9!Q(X6m8+2-JQ?ebxbwsD_G{;M>wiuF&?6uh;v_8Alfi`Q>TOdpDjp#vZ z(xXAau6LSZfFAPV0RGo#hmqmj2uB6e&-hvO&I=kH5@UmP^q`tkNmmxGi_u0J_tP!N z2-kl@rUhlmhPrOk6aF%OUv=K1`D|T&6nVzj@I~sw7t?Nq&P}8-5{=95iZOBt#$k7b zP7Q8Wwk7OQ8tiiC#t?RS$*wj7b_wr3J`N%rt$zbUj?gz~>!aJc*j#OTtK_>5?~opJ zYI^D-^N#Rouph{}|1%#cc++8@4c!y$-g=~~Aij9cSdr{PyO{# z4LUWoY1t6pby_xaMSVE83OQfwt*_1D=%-<>&4##QTOD9m5n9|&*9P0Ei3~}$3*QhK zv?el9f8=6HA9t!n>|%c%c6)txv_MhpVna=K=={|LJLuHxCe)Zg%eUc~hKrMO#i)Ad zT(lnX+P2^Eou<&?b_?;vopdp7tR}KevfGDcm~POik*OZqX*9gLM`4YbTa<3GzYezd z2e7Ghiw!l|ZIS1D33kw_*{KA|5?=lBxamc!;KVQJ@cVFp--b-Pgrz3IZF2n} zK@d7M!MQ|Zes!9?QR7{O%+pK}3t@j%VJc@Gbhi;;JWFi8)>=(=m{&i6b=Y&EQ?pYm zl6t3g;SGlg1~gr1t{D&%+TPe&o zGSo@3`}A}DBvbeMWTgq{GPxbXF$4sy38?*W1fU)ARP*S6)Lm+YE|pfi_QQDb7S&$J zTRW@1LU^iImZAa)`Dp?}Fkiwk1O}ZNm@41Yn=X}&h8C-^W&$$WsLX|eRfsM_C95ng zcXOxt#BFI@vMPkP2-4b)YDj#Z>~wPNI|QoG&704bp=Sn0bZO15}umucFFcfScmMOQ?paWB(HtiiWx1w)L1W@t4Zw!`JKWuOfP6nYITR|8*vZB zgqfl30OlwV_o;s8=7iF!OOE$@bYj- zISeNbEy3c04sB8J(AAxwc+&HlJ{25-@}2IP9ji5Z%7e*uUp-m!I_-V#KINY81{3jH zK_PmtL?_F+*x^C3*YgyWU8l_)As>1KlhFcz>?I2MzQVK_Lfa5Ng<3^3+j)PaQ!}^f#V0Q&OBrV6t$}xIy^bC#*%}0?5iXW?|Pq%Ry U1#Z@h#=KEBi&oK^&Asj4ztqWJegFUf diff --git a/nfl_data_py/tests/__pycache__/nfl_test.cpython-39-pytest-6.2.4.pyc b/nfl_data_py/tests/__pycache__/nfl_test.cpython-39-pytest-6.2.4.pyc deleted file mode 100644 index 0c113eb1536dfadc04cf914e87389fb3355a7cd9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7066 zcmb`M+jHAQ6vid_8ppZcuXWQjt$PU%@Bj>SIs|Bkfeg?BgUO6gymG5lzI0cnlTLYR zpLplJdCb40SNg)gz!T@&m1H?>Qb~zB+W51rPrE;zv%5OA#>Ofdy?*_B#(FcRY5zdP zUwIOD=@5VEn#MGyyIMn+e~os|&~&YxZxnQVW4Oh3sZo;eb+_CYk-w@mM(=4X$MUZ< zmbXhUbB!wL0xOa(N|5 z*VsDgb*Yy~Ut%@VHK~_LUuGMmH>6%6eT8k3-jsTk^i{S+dQ0jx(%0B^($}S4CtYVZ zNZ*k966u@l7U^43*O>l5tKa??cWJk-`{jqW@OCU=H}OgQWstZ_hnS_tmiMh;Fr68% zv__8Q$R+QWJv!U$?f05=f1?-9EYh(%1vx+QWL$&vjaEW5Q|ox?H-O%;dF#&b(*$s z^q4>LCH>@UZnf=W!_N!bZTY%5);L9__TRmoPxpSJbH&~_JKb(?@5^qp-?lrR*t_R= z_xt;MbiI}(yzdY3PF~II9qwUpLVkjG+r2}-BFF0pleMU;nm~MOdR2cu6N1EJM%JK_F}6To%+V2&gU&>5QuZfE zm5lHTN7^tvzDV5((y=iCda(-(0P;JU>XRW>D>9s7 zUdd=(A(M||w4gK5nu=Zj56kVJH9ytpBB|!^nIQ4h@RWTjR(CMOXc}LAtFOsmVbb^< zPC*JnXM&~n1I=!G-|5&FoPZ&#z&-s2eP_UmrgF6@2+WdU!kYO-3=?!FOzNTBVP|As zjl5HZQY8o_a!nOM_c5R-g49zoc_br(2Z(zx2m%NC97H;g&LD#MPOWTb6)Ebu-A1WL zbqJ^Phzb?*i6tsjkbnBH`3~b#fGS}bfsB#?x<)5|3xfaQ925u7bZQGwQi#T-wHX#yIG(0b#nF!#rNT6=KcAB`k@QRk z>N=hN11thi(3wC5^R?ac9-Ga_7Wd8wsTy4*Rg#5s5R2KTqVx-f7-d^9U&qXn!NP9x zXE+70pfkZzB^9T0(C5yDE^J|GFtknuX>SOUs-%QQ$p9e{ev1Ku&ICx!)KB;M=}A{3 z?^Hn)&eY_Z3X6pSMX{r1YVt@%ggZuZHG{c36`2+Wc7hCf zB_nf-PPSuYpfizCQ?%$#%Hysy|k^n4cbfj3p%`cxNB#pelzr})s$N6pQ$ zpUDW_rqd5#5yTF3{jy))?>nwXPosYIZtu{H)Dl(ZG5v3={|#`uoo#MEq8`f-%9II1-q z Date: Sun, 15 Sep 2024 16:40:01 -0500 Subject: [PATCH 16/22] add __pycache__ back to ignore after deletes --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1b7fa12..870e8b0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Byte-compiled / optimized / DLL files +__pycache__/ *.py[cod] *$py.class From ba15dffea4af14339b9cff4f9694e87a73fed6fc Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Sun, 15 Sep 2024 18:35:03 -0500 Subject: [PATCH 17/22] Made numpy and pandas deps strictly v1 for test automation --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index be97161..964f8c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ appdirs fastparquet; python_version >= '3.7' fastparquet==0.7.2; python_version <= '3.6' -numpy>=1.0,<=2.0 -pandas>=1.0,<=2.0 \ No newline at end of file +numpy>=1.0,<2.0 +pandas>=1.0,<2.0 \ No newline at end of file From 1df0931cf749d537943fc5fda3fbbfad9d4357c1 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Sun, 15 Sep 2024 18:56:49 -0500 Subject: [PATCH 18/22] Use lists in import_ids dataframe indexing to clean up warnings (#105) * Added Brendan's tests - passing with warnings * Applied Brendan's fix - fixes warnings, but breaks in default-argument case * fix default issue + some cleanup --- nfl_data_py/__init__.py | 64 ++++++++++++----------------------- nfl_data_py/tests/nfl_test.py | 23 +++++++++++++ 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index ab6c076..c6a2ecc 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -1,15 +1,15 @@ name = 'nfl_data_py' -import datetime import os import logging -from concurrent.futures import ThreadPoolExecutor, as_completed +import datetime from warnings import warn +from typing import Iterable +from concurrent.futures import ThreadPoolExecutor, as_completed -import appdirs import numpy import pandas -from typing import Iterable +import appdirs # module level doc string __doc__ = """ @@ -735,52 +735,32 @@ def import_ids(columns=None, ids=None): """Import mapping table of ids for most major data providers Args: - columns (List[str]): list of columns to return - ids (List[str]): list of specific ids to return + columns (Iterable[str]): list of columns to return + ids (Iterable[str]): list of specific ids to return Returns: DataFrame """ - - # create list of id options - avail_ids = ['mfl_id', 'sportradar_id', 'fantasypros_id', 'gsis_id', 'pff_id', - 'sleeper_id', 'nfl_id', 'espn_id', 'yahoo_id', 'fleaflicker_id', - 'cbs_id', 'rotowire_id', 'rotoworld_id', 'ktc_id', 'pfr_id', - 'cfbref_id', 'stats_id', 'stats_global_id', 'fantasy_data_id'] - avail_sites = [x[:-3] for x in avail_ids] - - # check variable types - if columns is None: - columns = [] - - if ids is None: - ids = [] - if not isinstance(columns, list): - raise ValueError('columns variable must be list.') - - if not isinstance(ids, list): - raise ValueError('ids variable must be list.') - - # confirm id is in table - if False in [x in avail_sites for x in ids]: - raise ValueError('ids variable can only contain ' + ', '.join(avail_sites)) + columns = columns or [] + if not isinstance(columns, Iterable): + raise ValueError('columns argument must be a list.') + + ids = ids or [] + if not isinstance(ids, Iterable): + raise ValueError('ids argument must be a list.') - # import data - df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv') + df = pandas.read_csv("https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv") - rem_cols = [x for x in df.columns if x not in avail_ids] - tgt_ids = [x + '_id' for x in ids] - - # filter df to just specified columns - if len(columns) > 0 and len(ids) > 0: - df = df[set(tgt_ids + columns)] - elif len(columns) > 0 and len(ids) == 0: - df = df[set(avail_ids + columns)] - elif len(columns) == 0 and len(ids) > 0: - df = df[set(tgt_ids + rem_cols)] + id_cols = [c for c in df.columns if c.endswith('_id')] + non_id_cols = [c for c in df.columns if not c.endswith('_id')] - return df + # filter df to just specified ids + columns + ret_ids = [x + '_id' for x in ids] or id_cols + ret_cols = columns or non_id_cols + ret_columns = list(set([*ret_ids, *ret_cols])) + + return df[ret_columns] def import_contracts(): diff --git a/nfl_data_py/tests/nfl_test.py b/nfl_data_py/tests/nfl_test.py index 763a886..980ad3f 100644 --- a/nfl_data_py/tests/nfl_test.py +++ b/nfl_data_py/tests/nfl_test.py @@ -167,6 +167,29 @@ def test_is_df_with_data(self): s = nfl.import_ids() self.assertEqual(True, isinstance(s, pd.DataFrame)) self.assertTrue(len(s) > 0) + + def test_import_using_ids(self): + ids = ["espn", "yahoo", "gsis"] + s = nfl.import_ids(ids=ids) + self.assertTrue(all([f"{id}_id" in s.columns for id in ids])) + + def test_import_using_columns(self): + ret_columns = ["name", "birthdate", "college"] + not_ret_columns = ["draft_year", "db_season", "team"] + s = nfl.import_ids(columns=ret_columns) + self.assertTrue(all([column in s.columns for column in ret_columns])) + self.assertTrue(all([column not in s.columns for column in not_ret_columns])) + + def test_import_using_ids_and_columns(self): + ret_ids = ["espn", "yahoo", "gsis"] + ret_columns = ["name", "birthdate", "college"] + not_ret_ids = ["cfbref_id", "pff_id", "prf_id"] + not_ret_columns = ["draft_year", "db_season", "team"] + s = nfl.import_ids(columns=ret_columns, ids=ret_ids) + self.assertTrue(all([column in s.columns for column in ret_columns])) + self.assertTrue(all([column not in s.columns for column in not_ret_columns])) + self.assertTrue(all([f"{id}_id" in s.columns for id in ret_ids])) + self.assertTrue(all([f"{id}_id" not in s.columns for id in not_ret_ids])) class test_ngs(TestCase): def test_is_df_with_data(self): From c9886313b5a6a895677b2e5a414ede769678d69e Mon Sep 17 00:00:00 2001 From: RGAlexander216 <20406603+RGAlexander216@users.noreply.github.com> Date: Sun, 15 Sep 2024 20:24:34 -0500 Subject: [PATCH 19/22] Fix pbp and pbp participation join issue (#79) Resolves issue with play_by_play data having incorrect old_game_id values. For example, all old_game_id values in the play_by_play_2023 for week 15 start with "2022" but they begin with "2023" in the pbp_participation_2023 data. --- nfl_data_py/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index c6a2ecc..bdacfba 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -146,7 +146,10 @@ def import_pbp_data( if all([include_participation, year >= 2016, not cache]): path = r'https://github.com/nflverse/nflverse-data/releases/download/pbp_participation/pbp_participation_{}.parquet'.format(year) partic = pandas.read_parquet(path) - raw = raw.merge(partic, how='left', on=['play_id','old_game_id']) + raw = raw.merge(partic, + how='left', + left_on=['play_id','game_id'], + right_on=['play_id','nflverse_game_id']) pbp_data.append(raw) print(str(year) + ' done.') From 4541ab64fe00d3c9ce605418e9c002605095e279 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Mon, 16 Sep 2024 21:16:44 -0500 Subject: [PATCH 20/22] Fix weekly/seasonal roster import with multiple seasons (#106) * Add roster tests for multiple seasons * fix roster import indices --- .gitignore | 2 +- nfl_data_py/__init__.py | 2 +- nfl_data_py/tests/nfl_test.py | 26 ++++++++++++++++++-------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 870e8b0..b8e9c1a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # Byte-compiled / optimized / DLL files -__pycache__/ +__pycache__ *.py[cod] *$py.class diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index bdacfba..3c874e1 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -442,7 +442,7 @@ def __import_rosters(release, years, columns=None): rosters = pandas.concat([ pandas.read_parquet(uri.format(y)) for y in years - ]) + ], ignore_index=True) # Post-import processing rosters['birth_date'] = pandas.to_datetime(rosters.birth_date) diff --git a/nfl_data_py/tests/nfl_test.py b/nfl_data_py/tests/nfl_test.py index 980ad3f..b4a0983 100644 --- a/nfl_data_py/tests/nfl_test.py +++ b/nfl_data_py/tests/nfl_test.py @@ -11,12 +11,12 @@ class test_pbp(TestCase): def test_is_df_with_data(self): s = nfl.import_pbp_data([2020]) - self.assertEqual(True, isinstance(s, pd.DataFrame)) + self.assertIsInstance(s, pd.DataFrame) self.assertTrue(len(s) > 0) def test_is_df_with_data_thread_requests(self): s = nfl.import_pbp_data([2020, 2021], thread_requests=True) - self.assertEqual(True, isinstance(s, pd.DataFrame)) + self.assertIsInstance(s, pd.DataFrame) self.assertTrue(len(s) > 0) @@ -38,38 +38,42 @@ def test_uses_cache_when_cache_is_true(self): class test_weekly(TestCase): def test_is_df_with_data(self): s = nfl.import_weekly_data([2020]) - self.assertEqual(True, isinstance(s, pd.DataFrame)) + self.assertIsInstance(s, pd.DataFrame) self.assertTrue(len(s) > 0) def test_is_df_with_data_thread_requests(self): s = nfl.import_weekly_data([2020, 2021], thread_requests=True) - self.assertEqual(True, isinstance(s, pd.DataFrame)) + self.assertIsInstance(s, pd.DataFrame) self.assertTrue(len(s) > 0) class test_seasonal(TestCase): def test_is_df_with_data(self): s = nfl.import_seasonal_data([2020]) - self.assertEqual(True, isinstance(s, pd.DataFrame)) + self.assertIsInstance(s, pd.DataFrame) self.assertTrue(len(s) > 0) class test_pbp_cols(TestCase): def test_is_list_with_data(self): s = nfl.see_pbp_cols() - self.assertEqual(True, isinstance(set(nfl.see_pbp_cols()), set)) self.assertTrue(len(s) > 0) class test_weekly_cols(TestCase): def test_is_list_with_data(self): s = nfl.see_weekly_cols() - self.assertEqual(True, isinstance(set(nfl.see_pbp_cols()), set)) self.assertTrue(len(s) > 0) class test_seasonal_rosters(TestCase): data = nfl.import_seasonal_rosters([2020]) def test_is_df_with_data(self): - self.assertEqual(True, isinstance(self.data, pd.DataFrame)) + self.assertIsInstance(self.data, pd.DataFrame) self.assertTrue(len(self.data) > 0) + + def test_import_multiple_years(self): + s = nfl.import_weekly_rosters([2022, 2023]) + self.assertIsInstance(s, pd.DataFrame) + self.assertGreater(len(s), len(self.data)) + self.assertListEqual(s.season.unique().tolist(), [2022, 2023]) def test_computes_age_as_of_season_start(self): mahomes_ages = get_pat(self.data).age @@ -83,6 +87,12 @@ class test_weekly_rosters(TestCase): def test_is_df_with_data(self): assert isinstance(self.data, pd.DataFrame) self.assertGreater(len(self.data), 0) + + def test_import_multiple_years(self): + s = nfl.import_weekly_rosters([2022, 2023]) + self.assertIsInstance(s, pd.DataFrame) + self.assertGreater(len(s), len(self.data)) + self.assertListEqual(s.season.unique().tolist(), [2022, 2023]) def test_gets_weekly_updates(self): assert isinstance(self.data, pd.DataFrame) From 478ec27b13cbf3e4e29489a428d55c873f778724 Mon Sep 17 00:00:00 2001 From: Alec Ostrander Date: Mon, 16 Sep 2024 22:42:56 -0500 Subject: [PATCH 21/22] Exclude pbp participation for any years where the data file isn't available (#107) * Exclude pbp participation data if the file isn't available * Fixed incorrect error import --- nfl_data_py/__init__.py | 26 ++++++++++++++++++-------- nfl_data_py/tests/nfl_test.py | 23 +++++++++++++++++++---- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index 3c874e1..e66d810 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -10,6 +10,7 @@ import numpy import pandas import appdirs +from urllib.error import HTTPError # module level doc string __doc__ = """ @@ -143,13 +144,20 @@ def import_pbp_data( raw = pandas.DataFrame(data) raw['season'] = year - if all([include_participation, year >= 2016, not cache]): + + if include_participation and not cache: path = r'https://github.com/nflverse/nflverse-data/releases/download/pbp_participation/pbp_participation_{}.parquet'.format(year) - partic = pandas.read_parquet(path) - raw = raw.merge(partic, - how='left', - left_on=['play_id','game_id'], - right_on=['play_id','nflverse_game_id']) + + try: + partic = pandas.read_parquet(path) + raw = raw.merge( + partic, + how='left', + left_on=['play_id','game_id'], + right_on=['play_id','nflverse_game_id'] + ) + except HTTPError: + pass pbp_data.append(raw) print(str(year) + ' done.') @@ -158,8 +166,10 @@ def import_pbp_data( print(e) print('Data not available for ' + str(year)) - if pbp_data: - plays = pandas.concat(pbp_data).reset_index(drop=True) + if not pbp_data: + return pandas.DataFrame() + + plays = pandas.concat(pbp_data, ignore_index=True) # converts float64 to float32, saves ~30% memory if downcast: diff --git a/nfl_data_py/tests/nfl_test.py b/nfl_data_py/tests/nfl_test.py index b4a0983..779a4c2 100644 --- a/nfl_data_py/tests/nfl_test.py +++ b/nfl_data_py/tests/nfl_test.py @@ -9,17 +9,17 @@ class test_pbp(TestCase): + pbp = nfl.import_pbp_data([2020]) + def test_is_df_with_data(self): - s = nfl.import_pbp_data([2020]) - self.assertIsInstance(s, pd.DataFrame) - self.assertTrue(len(s) > 0) + self.assertIsInstance(self.pbp, pd.DataFrame) + self.assertTrue(len(self.pbp) > 0) def test_is_df_with_data_thread_requests(self): s = nfl.import_pbp_data([2020, 2021], thread_requests=True) self.assertIsInstance(s, pd.DataFrame) self.assertTrue(len(s) > 0) - def test_uses_cache_when_cache_is_true(self): cache = Path(__file__).parent/f"tmpcache-{random.randint(0, 10000)}" self.assertRaises( @@ -33,6 +33,21 @@ def test_uses_cache_when_cache_is_true(self): self.assertIsInstance(data, pd.DataFrame) shutil.rmtree(cache) + + def test_includes_participation_by_default(self): + self.assertIn("offense_players", self.pbp.columns) + + def test_excludes_participation_when_requested(self): + data = nfl.import_pbp_data([2020], include_participation=False) + self.assertIsInstance(self.pbp, pd.DataFrame) + self.assertTrue(len(self.pbp) > 0) + self.assertNotIn("offense_players", data.columns) + + def test_excludes_participation_if_not_available(self): + data = nfl.import_pbp_data([2024]) + self.assertIsInstance(self.pbp, pd.DataFrame) + self.assertTrue(len(self.pbp) > 0) + self.assertNotIn("offense_players", data.columns) class test_weekly(TestCase): From 16423da7f9f3488835d92fa4d5d5d29d2ffd46e7 Mon Sep 17 00:00:00 2001 From: Felix Blochwitz <44145511+felixblochwitz@users.noreply.github.com> Date: Tue, 17 Sep 2024 19:24:13 +0200 Subject: [PATCH 22/22] Ability to import def pfr data (#109) * add ability to import def pfr data * add missing quotation mark --- nfl_data_py/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index e66d810..01ddaa1 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -916,8 +916,8 @@ def import_qbr(years=None, level='nfl', frequency='season'): def __validate_pfr_inputs(s_type, years=None): - if s_type not in ('pass', 'rec', 'rush'): - raise ValueError('s_type variable must be one of "pass", "rec", or "rush".') + if s_type not in ('pass', 'rec', 'rush', 'def'): + raise ValueError('s_type variable must be one of "pass", "rec","rush", or "def".') if years is None: return [] @@ -939,7 +939,7 @@ def import_seasonal_pfr(s_type, years=None): """Import PFR advanced season-level statistics Args: - s_type (str): must be one of pass, rec, rush + s_type (str): must be one of pass, rec, rush, def years (List[int]): years to return data for, optional Returns: DataFrame @@ -957,7 +957,7 @@ def import_weekly_pfr(s_type, years=None): """Import PFR advanced week-level statistics Args: - s_type (str): must be one of pass, rec, rush + s_type (str): must be one of pass, rec, rush, def years (List[int]): years to return data for, optional Returns: DataFrame