Skip to content

Commit

Permalink
fix: 403 http error consulcam website (#126)
Browse files Browse the repository at this point in the history
issue caused by activation of cpanel badbots rule
  • Loading branch information
billmetangmo committed Jul 26, 2023
1 parent 7b5ff91 commit bebb50a
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 12 deletions.
19 changes: 12 additions & 7 deletions infra/api/scan.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sys

import shutil
sys.path.insert(0, "./package")
import requests
import urllib.request
Expand All @@ -13,6 +13,9 @@
bucket_name = os.environ["BUCKET_NAME"]
Table_Links = os.environ["LINKS_TABLE"]
maintainer_mail = os.environ["MAINTAINER_MAIL"]
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"
}


def S3_bucket_pictures(Picture_image, bucket_name):
Expand All @@ -37,7 +40,12 @@ def dowload_image(url):
"""
name = url.split("/")[-1]
real_image = f"/tmp/{str(name)}" # image in jpg version ( only /tmp is writable in aws lambda)
urllib.request.urlretrieve(url, real_image)
r = requests.get(url,stream=True,headers=headers)
r.raw.decode_content = True
r.raise_for_status()

with open( real_image, 'wb') as f:
shutil.copyfileobj(r.raw, f)
return real_image


Expand All @@ -48,12 +56,9 @@ def get_source_code(link):
:param link: the link of the web page you want to scrape
:return: the source code of the web page
"""
proxy_url = os.environ["PROXY_URL"]
proxies = {"http": proxy_url, "https": proxy_url}

r = requests.get(link,proxies=proxies, verify=False)
r = requests.get(link,headers=headers)
r.raise_for_status()
return soup(r.text)
return soup(r.text,features="html.parser")


def filter(code_source_html):
Expand Down
2 changes: 1 addition & 1 deletion infra/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ resource "aws_lambda_function" "scan" {
API_KEY = var.API_KEY
SENTRY_DNS = var.SENTRY_DNS
ENV = (terraform.workspace == "mtchoun-mouh-master") ? "production" : "${terraform.workspace}"
PROXY_URL = var.PROXY_URL

}
}

Expand Down
4 changes: 0 additions & 4 deletions infra/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,3 @@ variable "TFC_WORKSPACE_NAME" {
type = string
default = ""
}

variable "PROXY_URL" {
type = string
}

0 comments on commit bebb50a

Please sign in to comment.