Skip to content

Commit

Permalink
Merge branch 'main' into fix_ky
Browse files Browse the repository at this point in the history
  • Loading branch information
flooie authored Nov 21, 2024
2 parents ed8ed8a + 76186d0 commit 7d7ebb8
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 17 deletions.
11 changes: 8 additions & 3 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@ Releases are also tagged in git, if that's helpful.

## Current

**2.6.40 - 2024-11-20**
**2.6.42 - 2024-11-21**

- Fixes:
- Fix `mass` and `massctapp` scrapers, scrape new endpoint
- Exclude "Commomwealth" string from short case names
- Fix `mass` and `massctapp` cleanup content method

## Past

**2.6.40 - 2024-11-20**

- Fixes:
- Fix `mass` and `massctapp` scrapers, scrape new endpoint
- Exclude "Commonwealth" string from short case names

**2.6.39 - 2024-11-18**

- Fixes:
Expand Down
35 changes: 23 additions & 12 deletions juriscraper/opinions/united_states/state/colo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
- 2024-07-04: Update to new site, grossir
"""

from datetime import date, datetime
from typing import Tuple
from datetime import date, datetime, timedelta
from typing import Optional, Tuple
from urllib.parse import urlencode

from juriscraper.AbstractSite import logger
Expand All @@ -31,7 +31,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.court_id = self.__module__
self.params = {
"product_id": "WW",
"product_id": "COLORADO",
"jurisdiction": "US",
"content_type": "2",
"court": self.api_court_code,
Expand All @@ -40,14 +40,13 @@ def __init__(self, *args, **kwargs):
"per_page": "30", # Server breaks down when per_page=500, returns 503
"page": "1",
"sort": "date",
"type": "document",
"include_local_exclusive": "true",
"cbm": "6.0|361.0|5.0|9.0|4.0|2.0=0.01|400.0|1.0|0.001|1.5|0.2",
"locale": "en",
"hide_ct6": "true",
"t": str(datetime.now().timestamp())[:10],
"type": "document",
}
self.url = f"{self.base_url}?{urlencode(self.params)}"
self.update_url()

# Request won't work without some of these X- headers
self.request["headers"].update(
Expand Down Expand Up @@ -123,19 +122,31 @@ def _download_backwards(self, dates: Tuple[date]) -> None:
:return None
"""
logger.info("Backscraping for range %s %s", *dates)
self.update_url(dates)
self.html = self._download()
self._process_html()

def update_url(self, dates: Optional[Tuple[date]] = None) -> None:
"""
Set URL with date filters and current timestamp.
Request with no date filter was returning very old documents
instead of the most recent ones
:param dates: start and end date tuple. If not present,
scrape last week
"""
if not dates:
today = datetime.now()
dates = (today - timedelta(7), today + timedelta(1))

start = dates[0].strftime("%Y-%m-%d")
end = dates[1].strftime("%Y-%m-%d")
timestamp = str(datetime.now().timestamp())[:10]
params = {**self.params}
params.update(
{
"date": f"{start}..{end}",
# These are duplicated by the frontend too
"locale": ["en", "en"],
"hide_ct6": ["true", "true"],
"t": [timestamp, timestamp],
"t": timestamp,
}
)
self.url = f"{self.base_url}?{urlencode(params)}"
self.html = self._download()
self._process_html()
2 changes: 1 addition & 1 deletion juriscraper/opinions/united_states/state/mass.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,4 @@ def cleanup_content(content):
new_tree = etree.Element("html")
body = etree.SubElement(new_tree, "body")
body.append(content)
return html.tostring(new_tree, pretty_print=True, encoding="unicode")
return html.tostring(new_tree).decode("utf-8")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from setuptools import find_packages, setup
from setuptools.command.install import install

VERSION = "2.6.40"
VERSION = "2.6.42"
AUTHOR = "Free Law Project"
EMAIL = "info@free.law"
HERE = os.path.abspath(os.path.dirname(__file__))
Expand Down

0 comments on commit 7d7ebb8

Please sign in to comment.