Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dev -> main #2140

Merged
merged 2 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,389 changes: 724 additions & 665 deletions tests/data/KWS-L-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

1,389 changes: 724 additions & 665 deletions tests/data/KWS-L-1d-bad-div.csv

Large diffs are not rendered by default.

816 changes: 428 additions & 388 deletions tests/data/NVT-L-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

546 changes: 293 additions & 253 deletions tests/data/NVT-L-1d-bad-div.csv

Large diffs are not rendered by default.

886 changes: 460 additions & 426 deletions tests/data/SCR-TO-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

480 changes: 257 additions & 223 deletions tests/data/SCR-TO-1d-bad-div.csv

Large diffs are not rendered by default.

1,109 changes: 586 additions & 523 deletions tests/data/SOLB-BR-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

615 changes: 339 additions & 276 deletions tests/data/SOLB-BR-1d-bad-div.csv

Large diffs are not rendered by default.

66 changes: 42 additions & 24 deletions yfinance/scrapers/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,7 @@ def _fix_bad_div_adjust(self, df, interval, currency):
typical_volatility = np.nan
else:
diffs = df2['Close'].iloc[start:end-1].to_numpy() - df2['Low'].iloc[start+1:end].to_numpy()
typical_volatility = np.median(np.abs(diffs))
typical_volatility = np.mean(np.abs(diffs))

possibilities = []
if (drops==0.0).all() and df2['Volume'].iloc[div_idx]==0:
Expand Down Expand Up @@ -1681,10 +1681,6 @@ def cluster_dividends(df, column='div', threshold=7):
div_status_df.loc[phantom_div_dt, c] = False
checks.append('phantom')

if not div_status_df[checks].any().any():
# Perfect
return df

# Remove phantoms early
if 'phantom' in div_status_df.columns:
f_phantom = div_status_df['phantom']
Expand All @@ -1709,6 +1705,29 @@ def cluster_dividends(df, column='div', threshold=7):
if 'phantom' in checks:
checks.remove('phantom')

if not div_status_df[checks].any().any():
# Maybe failed to detect a too-small div. If div is ~0.01x of previous and next, then
# treat as a 0.01x error
if len(div_status_df) > 1:
for i in range(0, len(div_status_df)):
r_pre, r_post = None, None
if i > 0:
r_pre = div_status_df['%'].iloc[i-1] / div_status_df['%'].iloc[i]
if i < (len(div_status_df)-1):
r_post = div_status_df['%'].iloc[i+1] / div_status_df['%'].iloc[i]
r_pre = r_pre or r_post
r_post = r_post or r_pre
if abs(r_pre-currency_divide)<20 and abs(r_post-currency_divide)<20:
div_dt = div_status_df.index[i]
div_status_df.loc[div_dt, 'div_too_small'] = True

if not div_status_df[checks].any().any():
# Perfect
if df_modified:
return df2
else:
return df

# Check if the present div-adjustment contradicts price action
for i in range(len(div_status_df)):
div_idx = div_status_df['idx'].iloc[i]
Expand Down Expand Up @@ -1789,7 +1808,8 @@ def cluster_dividends(df, column='div', threshold=7):
elif adjDelta_drop > 0.39*adjDiv:
# Still true that applied adjustment exceeds price action,
# just not clear what solution is (if any).
div_adj_exceeds_prices = True
if (x['Adj']<1.0).any():
div_adj_exceeds_prices = True
break

# Can prune the space:
Expand Down Expand Up @@ -1843,22 +1863,6 @@ def cluster_dividends(df, column='div', threshold=7):

checks += ['adj_exceeds_prices', 'div_date_wrong']

if not div_status_df[checks].any().any():
# Maybe failed to detect a too-small div. If div is ~0.01x of previous and next, then
# treat as a 0.01x error
if len(div_status_df) > 1:
for i in range(0, len(div_status_df)):
r_pre, r_post = None, None
if i > 0:
r_pre = div_status_df['%'].iloc[i-1] / div_status_df['%'].iloc[i]
if i < (len(div_status_df)-1):
r_post = div_status_df['%'].iloc[i+1] / div_status_df['%'].iloc[i]
r_pre = r_pre or r_post
r_post = r_post or r_pre
if abs(r_pre-currency_divide)<20 and abs(r_post-currency_divide)<20:
div_dt = div_status_df.index[i]
div_status_df.loc[div_dt, 'div_too_small'] = True

for c in checks:
if not div_status_df[c].any():
div_status_df = div_status_df.drop(c, axis=1)
Expand Down Expand Up @@ -1887,11 +1891,16 @@ def cluster_dividends(df, column='div', threshold=7):
div_pcts['avg yr yield'] = div_pcts['%'] / div_pcts['period']

for c in checks:
if not cluster[c].to_numpy().any():
cluster = cluster.drop(c, axis=1)
cluster_checks = [c for c in checks if c in cluster.columns]

for c in cluster_checks:
f_fail = cluster[c].to_numpy()
n_fail = np.sum(f_fail)
if n_fail in [0, n]:
continue
pct_fail = np.sum(f_fail) / n
pct_fail = n_fail / n
if c == 'div_too_big':
true_threshold = 1.0
fals_threshold = 0.2
Expand All @@ -1900,7 +1909,16 @@ def cluster_dividends(df, column='div', threshold=7):
continue

if 'adj_exceeds_prices' in cluster.columns and (cluster[c] == (cluster[c] & cluster['adj_exceeds_prices'])).all():
# More likely that true-positive. Maybe the div never happened
# Treat div_too_big=False as false positives IFF adj_exceeds_prices=true AND
# true ratio above (lowered) threshold.
true_threshold = 0.5
f_adj_exceeds_prices = cluster['adj_exceeds_prices'].to_numpy()
n = np.sum(f_adj_exceeds_prices)
n_fail = np.sum(f_fail[f_adj_exceeds_prices])
pct_fail = n_fail / n
if pct_fail > true_threshold:
f = fc & div_status_df['adj_exceeds_prices'].to_numpy()
div_status_df.loc[f, c] = True
continue

if 'div_exceeds_adj' in cluster.columns and cluster['div_exceeds_adj'].all():
Expand Down
Loading