Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add keep2strava #653

Merged
merged 9 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 61 additions & 31 deletions run_page/keep_sync.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As in the other files do, this project is mainly for running, I think the newly introduced IS_ONLY_RUN = False can be a breaking change. Others who only need running will have to modify their script files and add --only-run as well or their projects will sync all sport types(even they don't need), this is a little wired. it's more reasonable in @ben-29 's [workout page] (https://github.com/ben-29/workouts_page) project.
A suggested approach could be by introducing additional argument --sync-all-types or --sync-types then followed needed sport types. users can choose their sport types they need, and default is running.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also open PR in Workout Page after this PR has been merged @SongJgit

Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,18 @@
from utils import adjust_time
import xml.etree.ElementTree as ET

KEEP_DATA_TYPE_API = ["running", "hiking", "cycling"]
KEEP2STRAVA = {
"outdoorWalking": "Walk",
"outdoorRunning": "Run",
"outdoorCycling": "Ride",
"indoorRunning": "VirtualRun",
}
IS_ONLY_RUN = False
# need to test
LOGIN_API = "https://api.gotokeep.com/v1.1/users/login"
RUN_DATA_API = "https://api.gotokeep.com/pd/v3/stats/detail?dateUnit=all&type=running&lastDate={last_date}"
RUN_LOG_API = "https://api.gotokeep.com/pd/v3/runninglog/{run_id}"
RUN_DATA_API = "https://api.gotokeep.com/pd/v3/stats/detail?dateUnit=all&type={data_type_api}&lastDate={last_date}"
RUN_LOG_API = "https://api.gotokeep.com/pd/v3/{data_type_api}log/{run_id}"

HR_FRAME_THRESHOLD_IN_DECISECOND = 100 # Maximum time difference to consider a data point as the nearest, the unit is decisecond(分秒)

Expand All @@ -43,11 +51,15 @@ def login(session, mobile, password):
return session, headers


def get_to_download_runs_ids(session, headers):
def get_to_download_runs_ids(session, headers, data_type_api):
last_date = 0
result = []

while 1:
r = session.get(RUN_DATA_API.format(last_date=last_date), headers=headers)
r = session.get(
RUN_DATA_API.format(data_type_api=data_type_api, last_date=last_date),
headers=headers,
)
if r.ok:
run_logs = r.json()["data"]["records"]

Expand All @@ -63,8 +75,10 @@ def get_to_download_runs_ids(session, headers):
return result


def get_single_run_data(session, headers, run_id):
r = session.get(RUN_LOG_API.format(run_id=run_id), headers=headers)
def get_single_run_data(session, headers, run_id, data_type_api):
r = session.get(
RUN_LOG_API.format(data_type_api=data_type_api, run_id=run_id), headers=headers
)
if r.ok:
return r.json()

Expand All @@ -82,7 +96,10 @@ def decode_runmap_data(text, is_geo=False):


def parse_raw_data_to_nametuple(
run_data, old_gpx_ids, session, with_download_gpx=False
run_data,
old_gpx_ids,
session,
with_download_gpx=False,
):
run_data = run_data["data"]
run_points_data = []
Expand Down Expand Up @@ -119,11 +136,10 @@ def parse_raw_data_to_nametuple(
if p_hr:
p["hr"] = p_hr
if with_download_gpx:
if (
str(keep_id) not in old_gpx_ids
and run_data["dataType"] == "outdoorRunning"
):
gpx_data = parse_points_to_gpx(run_points_data_gpx, start_time)
if run_data["dataType"].startswith("outdoor"):
gpx_data = parse_points_to_gpx(
run_points_data_gpx, start_time, KEEP2STRAVA[run_data["dataType"]]
)
download_keep_gpx(gpx_data, str(keep_id))
else:
print(f"ID {keep_id} no gps data")
Expand All @@ -139,9 +155,9 @@ def parse_raw_data_to_nametuple(
return
d = {
"id": int(keep_id),
"name": "run from keep",
"name": f"{KEEP2STRAVA[run_data['dataType']]} from keep",
# future to support others workout now only for run
"type": "Run",
"type": f"{KEEP2STRAVA[(run_data['dataType'])]}",
"start_date": datetime.strftime(start_date, "%Y-%m-%d %H:%M:%S"),
"end": datetime.strftime(end, "%Y-%m-%d %H:%M:%S"),
"start_date_local": datetime.strftime(start_date_local, "%Y-%m-%d %H:%M:%S"),
Expand All @@ -161,31 +177,35 @@ def parse_raw_data_to_nametuple(
return namedtuple("x", d.keys())(*d.values())


def get_all_keep_tracks(email, password, old_tracks_ids, with_download_gpx=False):
def get_all_keep_tracks(email, password, old_tracks_ids, with_download_gpx=True):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't change the default value here

if with_download_gpx and not os.path.exists(GPX_FOLDER):
os.mkdir(GPX_FOLDER)
s = requests.Session()
s, headers = login(s, email, password)
runs = get_to_download_runs_ids(s, headers)
runs = [run for run in runs if run.split("_")[1] not in old_tracks_ids]
print(f"{len(runs)} new keep runs to generate")
tracks = []
old_gpx_ids = os.listdir(GPX_FOLDER)
old_gpx_ids = [i.split(".")[0] for i in old_gpx_ids if not i.startswith(".")]
for run in runs:
print(f"parsing keep id {run}")
try:
run_data = get_single_run_data(s, headers, run)
track = parse_raw_data_to_nametuple(
run_data, old_gpx_ids, s, with_download_gpx
)
tracks.append(track)
except Exception as e:
print(f"Something wrong paring keep id {run}" + str(e))
global KEEP_DATA_TYPE_API
if IS_ONLY_RUN:
KEEP_DATA_TYPE_API = ["running"]
for api in KEEP_DATA_TYPE_API:
runs = get_to_download_runs_ids(s, headers, api)
runs = [run for run in runs if run.split("_")[1] not in old_tracks_ids]
print(f"{len(runs)} new keep {api} data to generate")
old_gpx_ids = os.listdir(GPX_FOLDER)
old_gpx_ids = [i.split(".")[0] for i in old_gpx_ids if not i.startswith(".")]
for run in runs:
print(f"parsing keep id {run}")
try:
run_data = get_single_run_data(s, headers, run, api)
track = parse_raw_data_to_nametuple(
run_data, old_gpx_ids, s, with_download_gpx
)
tracks.append(track)
except Exception as e:
print(f"Something wrong paring keep id {run}" + str(e))
return tracks


def parse_points_to_gpx(run_points_data, start_time):
def parse_points_to_gpx(run_points_data, start_time, type):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do not name type here, type is a key word in Python
sport_type or _type

"""
Convert run points data to GPX format.

Expand Down Expand Up @@ -219,6 +239,7 @@ def parse_points_to_gpx(run_points_data, start_time):
gpx.nsmap["gpxtpx"] = "http://www.garmin.com/xmlschemas/TrackPointExtension/v1"
gpx_track = gpxpy.gpx.GPXTrack()
gpx_track.name = "gpx from keep"
gpx_track.type = type
gpx.tracks.append(gpx_track)

# Create first segment in our GPX track:
Expand Down Expand Up @@ -292,6 +313,7 @@ def download_keep_gpx(gpx_data, keep_id):
file_path = os.path.join(GPX_FOLDER, str(keep_id) + ".gpx")
with open(file_path, "w") as fb:
fb.write(gpx_data)
return file_path
except:
print(f"wrong id {keep_id}")
pass
Expand All @@ -318,5 +340,13 @@ def run_keep_sync(email, password, with_download_gpx=False):
action="store_true",
help="get all keep data to gpx and download",
)
parser.add_argument(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please also add this argument to readme

"--only-run",
dest="only_run",
action="store_true",
help="if is only for running",
)

options = parser.parse_args()
IS_ONLY_RUN = options.only_run
run_keep_sync(options.phone_number, options.password, options.with_gpx)
152 changes: 152 additions & 0 deletions run_page/keep_to_strava.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import argparse
import json
import os
from sre_constants import SUCCESS
import time
from collections import namedtuple
import requests
from config import GPX_FOLDER
from Crypto.Cipher import AES
from config import OUTPUT_DIR
from stravalib.exc import ActivityUploadFailed, RateLimitTimeout
from utils import make_strava_client, upload_file_to_strava
from keep_sync import (
login,
KEEP_DATA_TYPE_API,
parse_raw_data_to_nametuple,
get_to_download_runs_ids,
get_single_run_data,
)

"""
Only provide the ability to sync data from Keep's multiple sport types to Strava's corresponding sport types to help those who use multiple devices like me, the web page presentation still uses Strava (or refer to nike_to_strava_sync.py to modify it to suit you).
My own best practices:
1. running/hiking/Cycling (Huawei/OPPO) -> Keep
2. Keep -> Strava (add this scripts to run_data_sync.yml)
3. Road Cycling(Garmin) -> Strava.
4. running_page(Strava)

"""


def get_all_keep_tracks(email, password, old_tracks_ids, with_download_gpx=True):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this part is duplicated

if with_download_gpx and not os.path.exists(GPX_FOLDER):
os.mkdir(GPX_FOLDER)
s = requests.Session()
s, headers = login(s, email, password)
tracks = []
for api in KEEP_DATA_TYPE_API:
runs = get_to_download_runs_ids(s, headers, api)
runs = [run for run in runs if run.split("_")[1] not in old_tracks_ids]
print(f"{len(runs)} new keep {api} data to generate")
old_gpx_ids = os.listdir(GPX_FOLDER)
old_gpx_ids = [i.split(".")[0] for i in old_gpx_ids if not i.startswith(".")]
for run in runs:
print(f"parsing keep id {run}")
try:
run_data = get_single_run_data(s, headers, run, api)
track = parse_raw_data_to_nametuple(
run_data, old_gpx_ids, s, with_download_gpx
)
# By default only outdoor sports have latlng as well as GPX.
if track.start_latlng is not None:
file_path = namedtuple("x", "gpx_file_path")(
os.path.join(GPX_FOLDER, str(track.id) + ".gpx")
)
else:
file_path = namedtuple("x", "gpx_file_path")(None)
track = namedtuple("y", track._fields + file_path._fields)(
*(track + file_path)
)
tracks.append(track)
except Exception as e:
print(f"Something wrong paring keep id {run}" + str(e))
return tracks


def run_keep_sync(email, password, with_download_gpx=True):
keep2strava_bk_path = os.path.join(OUTPUT_DIR, "keep2strava.json")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to make the file name and path to a static variable

if not os.path.exists(keep2strava_bk_path):
file = open(keep2strava_bk_path, "w")
file.close()
content = []
else:
with open(keep2strava_bk_path) as f:
try:
content = json.loads(f.read())
except:
content = []
old_tracks_ids = [str(a["run_id"]) for a in content]
new_tracks = get_all_keep_tracks(email, password, old_tracks_ids, with_download_gpx)

return new_tracks


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("phone_number", help="keep login phone number")
parser.add_argument("password", help="keep login password")
parser.add_argument("client_id", help="strava client id")
parser.add_argument("client_secret", help="strava client secret")
parser.add_argument("strava_refresh_token", help="strava refresh token")

options = parser.parse_args()
new_tracks = run_keep_sync(options.phone_number, options.password, True)

# to strava.
print("Need to load all gpx files maybe take some time")
last_time = 0
client = make_strava_client(
options.client_id, options.client_secret, options.strava_refresh_token
)

index = 1
print(f"Up to {len(new_tracks)} files are waiting to be uploaded")
uploaded_file_paths = []
for track in new_tracks:
if track.gpx_file_path is not None:
try:
upload_file_to_strava(client, track.gpx_file_path, "gpx", False)
uploaded_file_paths.append(track)
except RateLimitTimeout as e:
timeout = e.timeout
print(f"Strava API Rate Limit Timeout. Retry in {timeout} seconds\n")
time.sleep(timeout)
# try previous again
upload_file_to_strava(client, track.gpx_file_path, "gpx", False)
uploaded_file_paths.append(track)
except ActivityUploadFailed as e:
print(f"Upload faild error {str(e)}")
# spider rule
time.sleep(1)
else:
# for no gps data, like indoorRunning.
uploaded_file_paths.append(track)
time.sleep(10)

keep2strava_bk_path = os.path.join(OUTPUT_DIR, "keep2strava.json")
with open(keep2strava_bk_path, "r") as f:
try:
content = json.loads(f.read())
except:
content = []
content.extend(
[
dict(
run_id=track.id,
name=track.name,
type=track.type,
gpx_file_path=track.gpx_file_path,
)
for track in uploaded_file_paths
]
)
with open(keep2strava_bk_path, "w") as f:
json.dump(content, f, indent=0)

# del gpx
for track in new_tracks:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it to delete the uploaded_file_paths? or we can delete the file while the file is uploaded.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since GPX files that fail to upload will still be uploaded next time, it might be better to delete them all to keep GPXOUT clean.

Deleting files while uploading, I'm not sure if I'm going to have a problem similar to ”# Fix the issue that the github action runs too fast, resulting in unsuccessful file generation“ in nike_to_strava.py. If I need to add sleep to avoid it, then I might as well delete them last to keep the execution efficient.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding comments would be helpful to clarify the code

if track.gpx_file_path is not None:
os.remove(track.gpx_file_path)
else:
continue