Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make providers configurable, add config file for easier configuration #15

Merged
merged 13 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 21 additions & 54 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,44 +37,26 @@ pip install traveltime-google-comparison
```

## Setup
Provide credentials for the APIs via environment variables.

For Google Maps API:

```bash
export GOOGLE_API_KEY=[Your Google Maps API Key]
```

For TomTom API:

```bash
export TOMTOM_API_KEY=[Your TomTom API Key]
```

For HERE API:

```bash
export HERE_API_KEY=[Your HERE API Key]
```

For Mapbox API:

```bash
export MAPBOX_API_KEY=[Your Mapbox API Key]
```

For OpenRoutes API:

```bash
export OPENROUTES_API_KEY=[Your OpenRoutes API Key]
```

For OSRM API: OSRM does not require a key.

For TravelTime API:
```bash
export TRAVELTIME_APP_ID=[Your TravelTime App ID]
export TRAVELTIME_API_KEY=[Your TravelTime API Key]
Provide credentials and desired max requests per minute for the APIs inside the `config.json` file.
You can also disable unwanted APIs by changing the `enabled` value to `false`.

```json
{
"traveltime": {
"app-id": "<your-app-id>",
"api-key": "<your-api-key>",
"max-rpm": "60"
},
"api-providers": [
{
"name": "google",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
...other providers
]
}
```

## Usage
Expand Down Expand Up @@ -104,23 +86,8 @@ Required arguments:
- `--time-zone-id [Time zone ID]`: non-abbreviated time zone identifier in which the time values are specified.
For example: `Europe/London`. For more information, see [here](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).



Optional arguments:
- `--google-max-rpm [int]`: Set max number of parallel requests sent to Google API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--tomtom-max-rpm [int]`: Set max number of parallel requests sent to TomTom API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--mapbox-max-rpm [int]`: Set max number of parallel requests sent to Mapbox API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--here-max-rpm [int]`: Set max number of parallel requests sent to HERE API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--osrm-max-rpm [int]`: Set max number of parallel requests sent to OSRM API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--openroutes-max-rpm [int]`: Set max number of parallel requests sent to OpenRoutes API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--traveltime-max-rpm [int]`: Set max number of parallel requests sent to TravelTime API per minute. Default is 60.
It is enforced on per-second basis, to avoid bursts.
- `--config [Config file path]`: Path to the config file. Default - ./config.json

Example:

Expand Down
45 changes: 45 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"traveltime": {
"app-id": "<your-app-id>",
"api-key": "<your-api-key>",
"max-rpm": "60"
},
"api-providers": [
{
"name": "google",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
{
"name": "tomtom",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
{
"name": "here",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
{
"name": "mapbox",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "60"
},
{
"name": "osrm",
"enabled": true,
"api-key": "not-needed!",
"max-rpm": "60"
},
{
"name": "openroutes",
"enabled": true,
"api-key": "<your-api-key>",
"max-rpm": "20"
}
]
}
45 changes: 23 additions & 22 deletions src/traveltime_google_comparison/analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
from dataclasses import dataclass
from typing import List

from pandas import DataFrame

Expand All @@ -9,6 +8,7 @@
TRAVELTIME_API,
get_capitalized_provider_name,
)
from traveltime_google_comparison.config import Providers


def absolute_error(api_provider: str) -> str:
Expand All @@ -26,31 +26,31 @@ class QuantileErrorResult:


def log_results(
results_with_differences: DataFrame, quantile: float, api_providers: List[str]
results_with_differences: DataFrame, quantile: float, api_providers: Providers
):
for provider in api_providers:
capitalized_provider = get_capitalized_provider_name(provider)
for provider in api_providers.competitors:
name = provider.name
capitalized_provider = get_capitalized_provider_name(name)
logging.info(
f"Mean relative error compared to {capitalized_provider} "
f"API: {results_with_differences[relative_error(provider)].mean():.2f}%"
)
quantile_errors = calculate_quantiles(
results_with_differences, quantile, provider
f"API: {results_with_differences[relative_error(name)].mean():.2f}%"
)
quantile_errors = calculate_quantiles(results_with_differences, quantile, name)
logging.info(
f"{int(quantile * 100)}% of TravelTime results differ from {capitalized_provider} API "
f"by less than {int(quantile_errors.relative_error)}%"
)


def format_results_for_csv(
results_with_differences: DataFrame, api_providers: List[str]
results_with_differences: DataFrame, api_providers: Providers
) -> DataFrame:
formatted_results = results_with_differences.copy()

for provider in api_providers:
formatted_results = formatted_results.drop(columns=[absolute_error(provider)])
relative_error_col = relative_error(provider)
for provider in api_providers.competitors:
name = provider.name
formatted_results = formatted_results.drop(columns=[absolute_error(name)])
relative_error_col = relative_error(name)
formatted_results[relative_error_col] = formatted_results[
relative_error_col
].astype(int)
Expand All @@ -59,7 +59,7 @@ def format_results_for_csv(


def run_analysis(
results: DataFrame, output_file: str, quantile: float, api_providers: List[str]
results: DataFrame, output_file: str, quantile: float, api_providers: Providers
):
results_with_differences = calculate_differences(results, api_providers)
log_results(results_with_differences, quantile, api_providers)
Expand All @@ -71,21 +71,22 @@ def run_analysis(
formatted_results.to_csv(output_file, index=False)


def calculate_differences(results: DataFrame, api_providers: List[str]) -> DataFrame:
def calculate_differences(results: DataFrame, api_providers: Providers) -> DataFrame:
results_with_differences = results.copy()

for provider in api_providers:
absolute_error_col = absolute_error(provider)
relative_error_col = relative_error(provider)
for provider in api_providers.competitors:
name = provider.name
absolute_error_col = absolute_error(name)
relative_error_col = relative_error(name)

results_with_differences[absolute_error_col] = abs(
results[Fields.TRAVEL_TIME[provider]]
results[Fields.TRAVEL_TIME[name]]
- results[Fields.TRAVEL_TIME[TRAVELTIME_API]]
)

results_with_differences[relative_error_col] = (
results_with_differences[absolute_error_col]
/ results_with_differences[Fields.TRAVEL_TIME[provider]]
/ results_with_differences[Fields.TRAVEL_TIME[name]]
* 100
)

Expand All @@ -95,13 +96,13 @@ def calculate_differences(results: DataFrame, api_providers: List[str]) -> DataF
def calculate_quantiles(
results_with_differences: DataFrame,
quantile: float,
api_provider: str,
api_provider_name: str,
) -> QuantileErrorResult:
quantile_absolute_error = results_with_differences[
absolute_error(api_provider)
absolute_error(api_provider_name)
].quantile(quantile, "higher")
quantile_relative_error = results_with_differences[
relative_error(api_provider)
relative_error(api_provider_name)
].quantile(quantile, "higher")
return QuantileErrorResult(
int(quantile_absolute_error), int(quantile_relative_error)
Expand Down
24 changes: 8 additions & 16 deletions src/traveltime_google_comparison/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from traveltime_google_comparison.config import Mode
from traveltime_google_comparison.requests.base_handler import BaseRequestHandler


GOOGLE_API = "google"
TOMTOM_API = "tomtom"
HERE_API = "here"
Expand Down Expand Up @@ -132,7 +133,10 @@ def generate_tasks(


async def collect_travel_times(
args, data, request_handlers: Dict[str, BaseRequestHandler], providers: List[str]
args,
data,
request_handlers: Dict[str, BaseRequestHandler],
provider_names: List[str],
) -> DataFrame:
timezone = pytz.timezone(args.time_zone_id)
localized_start_datetime = localize_datetime(args.date, args.start_time, timezone)
Expand All @@ -144,28 +148,16 @@ async def collect_travel_times(
tasks = generate_tasks(data, time_instants, request_handlers, mode=Mode.DRIVING)

capitalized_providers_str = ", ".join(
[get_capitalized_provider_name(provider) for provider in providers]
)
logger.info(
f"Sending {len(tasks)} requests to {capitalized_providers_str} and TravelTime APIs"
[get_capitalized_provider_name(provider) for provider in provider_names]
)
logger.info(f"Sending {len(tasks)} requests to {capitalized_providers_str} APIs")

results = await asyncio.gather(*tasks)

results_df = pd.DataFrame(results)
deduplicated = results_df.groupby(
[Fields.ORIGIN, Fields.DESTINATION, Fields.DEPARTURE_TIME], as_index=False
).agg(
{
Fields.TRAVEL_TIME[GOOGLE_API]: "first",
Fields.TRAVEL_TIME[TOMTOM_API]: "first",
Fields.TRAVEL_TIME[HERE_API]: "first",
Fields.TRAVEL_TIME[OSRM_API]: "first",
Fields.TRAVEL_TIME[OPENROUTES_API]: "first",
Fields.TRAVEL_TIME[MAPBOX_API]: "first",
Fields.TRAVEL_TIME[TRAVELTIME_API]: "first",
}
)
).agg({Fields.TRAVEL_TIME[provider]: "first" for provider in provider_names})
deduplicated.to_csv(args.output, index=False)
return deduplicated

Expand Down
Loading
Loading