1 Star 1 Fork 10

云金杞/alphalens

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
performance.py 44.22 KB
一键复制 编辑 原始数据 按行查看 历史
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168
#
# Copyright 2017 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pandas as pd
import numpy as np
import warnings
import empyrical as ep
from pandas.tseries.offsets import BDay
from scipy import stats
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant
from . import utils
def factor_information_coefficient(factor_data,
group_adjust=False,
by_group=False):
"""
Computes the Spearman Rank Correlation based Information Coefficient (IC)
between factor values and N period forward returns for each period in
the factor index.
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to, and
(optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
group_adjust : bool
Demean forward returns by group before computing IC.
by_group : bool
If True, compute period wise IC separately for each group.
Returns
-------
ic : pd.DataFrame
Spearman Rank correlation between factor and
provided forward returns.
"""
def src_ic(group):
f = group['factor']
_ic = group[utils.get_forward_returns_columns(factor_data.columns)] \
.apply(lambda x: stats.spearmanr(x, f)[0])
return _ic
factor_data = factor_data.copy()
grouper = [factor_data.index.get_level_values('date')]
if group_adjust:
factor_data = utils.demean_forward_returns(factor_data,
grouper + ['group'])
if by_group:
grouper.append('group')
ic = factor_data.groupby(grouper).apply(src_ic)
return ic
def mean_information_coefficient(factor_data,
group_adjust=False,
by_group=False,
by_time=None):
"""
Get the mean information coefficient of specified groups.
Answers questions like:
What is the mean IC for each month?
What is the mean IC for each group for our whole timerange?
What is the mean IC for for each group, each week?
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to, and
(optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
group_adjust : bool
Demean forward returns by group before computing IC.
by_group : bool
If True, take the mean IC for each group.
by_time : str (pd time_rule), optional
Time window to use when taking mean IC.
See http://pandas.pydata.org/pandas-docs/stable/timeseries.html
for available options.
Returns
-------
ic : pd.DataFrame
Mean Spearman Rank correlation between factor and provided
forward price movement windows.
"""
ic = factor_information_coefficient(factor_data, group_adjust, by_group)
grouper = []
if by_time is not None:
grouper.append(pd.Grouper(freq=by_time))
if by_group:
grouper.append('group')
if len(grouper) == 0:
ic = ic.mean()
else:
ic = (ic.reset_index().set_index('date').groupby(grouper).mean())
return ic
def factor_weights(factor_data,
demeaned=True,
group_adjust=False,
equal_weight=False):
"""
Computes asset weights by factor values and dividing by the sum of their
absolute value (achieving gross leverage of 1). Positive factor values will
results in positive weights and negative values in negative weights.
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to, and
(optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
demeaned : bool
Should this computation happen on a long short portfolio? if True,
weights are computed by demeaning factor values and dividing by the sum
of their absolute value (achieving gross leverage of 1). The sum of
positive weights will be the same as the negative weights (absolute
value), suitable for a dollar neutral long-short portfolio
group_adjust : bool
Should this computation happen on a group neutral portfolio? If True,
compute group neutral weights: each group will weight the same and
if 'demeaned' is enabled the factor values demeaning will occur on the
group level.
equal_weight : bool, optional
if True the assets will be equal-weighted instead of factor-weighted
If demeaned is True then the factor universe will be split in two
equal sized groups, top assets with positive weights and bottom assets
with negative weights
Returns
-------
returns : pd.Series
Assets weighted by factor value.
"""
def to_weights(group, _demeaned, _equal_weight):
if _equal_weight:
group = group.copy()
if _demeaned:
# top assets positive weights, bottom ones negative
group = group - group.median()
negative_mask = group < 0
group[negative_mask] = -1.0
positive_mask = group > 0
group[positive_mask] = 1.0
if _demeaned:
# positive weights must equal negative weights
if negative_mask.any():
group[negative_mask] /= negative_mask.sum()
if positive_mask.any():
group[positive_mask] /= positive_mask.sum()
elif _demeaned:
group = group - group.mean()
return group / group.abs().sum()
grouper = [factor_data.index.get_level_values('date')]
if group_adjust:
grouper.append('group')
# weights = factor_data.groupby(grouper)['factor'] \
# .apply(to_weights, demeaned, equal_weight)
# todo 根据pandas升级的需要,对groupby增加参数
weights = factor_data.groupby(grouper,group_keys=False)['factor'] \
.apply(to_weights, demeaned, equal_weight)
if group_adjust:
weights = weights.groupby(level='date').apply(to_weights, False, False)
return weights
def factor_returns(factor_data,
demeaned=True,
group_adjust=False,
equal_weight=False,
by_asset=False):
"""
Computes period wise returns for portfolio weighted by factor
values.
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to, and
(optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
demeaned : bool
Control how to build factor weights
-- see performance.factor_weights for a full explanation
group_adjust : bool
Control how to build factor weights
-- see performance.factor_weights for a full explanation
equal_weight : bool, optional
Control how to build factor weights
-- see performance.factor_weights for a full explanation
by_asset: bool, optional
If True, returns are reported separately for each esset.
Returns
-------
returns : pd.DataFrame
Period wise factor returns
"""
weights = \
factor_weights(factor_data, demeaned, group_adjust, equal_weight)
weighted_returns = \
factor_data[utils.get_forward_returns_columns(factor_data.columns)] \
.multiply(weights, axis=0)
if by_asset:
returns = weighted_returns
else:
returns = weighted_returns.groupby(level='date').sum()
return returns
def factor_alpha_beta(factor_data,
returns=None,
demeaned=True,
group_adjust=False,
equal_weight=False):
"""
Compute the alpha (excess returns), alpha t-stat (alpha significance),
and beta (market exposure) of a factor. A regression is run with
the period wise factor universe mean return as the independent variable
and mean period wise return from a portfolio weighted by factor values
as the dependent variable.
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to, and
(optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
returns : pd.DataFrame, optional
Period wise factor returns. If this is None then it will be computed
with 'factor_returns' function and the passed flags: 'demeaned',
'group_adjust', 'equal_weight'
demeaned : bool
Control how to build factor returns used for alpha/beta computation
-- see performance.factor_return for a full explanation
group_adjust : bool
Control how to build factor returns used for alpha/beta computation
-- see performance.factor_return for a full explanation
equal_weight : bool, optional
Control how to build factor returns used for alpha/beta computation
-- see performance.factor_return for a full explanation
Returns
-------
alpha_beta : pd.Series
A list containing the alpha, beta, a t-stat(alpha)
for the given factor and forward returns.
"""
if returns is None:
returns = \
factor_returns(factor_data, demeaned, group_adjust, equal_weight)
universe_ret = factor_data.groupby(level='date')[
utils.get_forward_returns_columns(factor_data.columns)] \
.mean().loc[returns.index]
if isinstance(returns, pd.Series):
returns.name = universe_ret.columns.values[0]
returns = pd.DataFrame(returns)
alpha_beta = pd.DataFrame()
for period in returns.columns.values:
x = universe_ret[period].values
y = returns[period].values
x = add_constant(x)
reg_fit = OLS(y, x).fit()
try:
alpha, beta = reg_fit.params
except ValueError:
alpha_beta.loc['Ann. alpha', period] = np.nan
alpha_beta.loc['beta', period] = np.nan
else:
freq_adjust = pd.Timedelta('252Days') / pd.Timedelta(period)
alpha_beta.loc['Ann. alpha', period] = \
(1 + alpha) ** freq_adjust - 1
alpha_beta.loc['beta', period] = beta
return alpha_beta
def cumulative_returns(returns):
"""
Computes cumulative returns from simple daily returns.
Parameters
----------
returns: pd.Series
pd.Series containing daily factor returns (i.e. '1D' returns).
Returns
-------
Cumulative returns series : pd.Series
Example:
2015-01-05 1.001310
2015-01-06 1.000805
2015-01-07 1.001092
2015-01-08 0.999200
"""
return ep.cum_returns(returns, starting_value=1)
def positions(weights, period, freq=None):
"""
Builds net position values time series, the portfolio percentage invested
in each position.
Parameters
----------
weights: pd.Series
pd.Series containing factor weights, the index contains timestamps at
which the trades are computed and the values correspond to assets
weights
- see factor_weights for more details
period: pandas.Timedelta or string
Assets holding period (1 day, 2 mins, 3 hours etc). It can be a
Timedelta or a string in the format accepted by Timedelta constructor
('1 days', '1D', '30m', '3h', '1D1h', etc)
freq : pandas DateOffset, optional
Used to specify a particular trading calendar. If not present
weights.index.freq will be used
Returns
-------
pd.DataFrame
Assets positions series, datetime on index, assets on columns.
Example:
index 'AAPL' 'MSFT' cash
2004-01-09 10:30:00 13939.3800 -14012.9930 711.5585
2004-01-09 15:30:00 0.00 -16012.9930 411.5585
2004-01-12 10:30:00 14492.6300 -14624.8700 0.0
2004-01-12 15:30:00 14874.5400 -15841.2500 0.0
2004-01-13 10:30:00 -13853.2800 13653.6400 -43.6375
"""
weights = weights.unstack()
if not isinstance(period, pd.Timedelta):
period = pd.Timedelta(period)
if freq is None:
freq = weights.index.freq
if freq is None:
freq = BDay()
warnings.warn("'freq' not set, using business day calendar",
UserWarning)
#
# weights index contains factor computation timestamps, then add returns
# timestamps too (factor timestamps + period) and save them to 'full_idx'
# 'full_idx' index will contain an entry for each point in time the weights
# change and hence they have to be re-computed
#
trades_idx = weights.index.copy()
returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq)
weights_idx = trades_idx.union(returns_idx)
#
# Compute portfolio weights for each point in time contained in the index
#
portfolio_weights = pd.DataFrame(index=weights_idx,
columns=weights.columns)
active_weights = []
for curr_time in weights_idx:
#
# fetch new weights that become available at curr_time and store them
# in active weights
#
if curr_time in weights.index:
assets_weights = weights.loc[curr_time]
expire_ts = utils.add_custom_calendar_timedelta(curr_time,
period, freq)
active_weights.append((expire_ts, assets_weights))
#
# remove expired entry in active_weights (older than 'period')
#
if active_weights:
expire_ts, assets_weights = active_weights[0]
if expire_ts <= curr_time:
active_weights.pop(0)
if not active_weights:
continue
#
# Compute total weights for curr_time and store them
#
tot_weights = [w for (ts, w) in active_weights]
tot_weights = pd.concat(tot_weights, axis=1)
tot_weights = tot_weights.sum(axis=1)
tot_weights /= tot_weights.abs().sum()
portfolio_weights.loc[curr_time] = tot_weights
return portfolio_weights.fillna(0)
def mean_return_by_quantile(factor_data,
by_date=False,
by_group=False,
demeaned=True,
group_adjust=False):
"""
Computes mean returns for factor quantiles across
provided forward returns columns.
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to, and
(optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
by_date : bool
If True, compute quantile bucket returns separately for each date.
by_group : bool
If True, compute quantile bucket returns separately for each group.
demeaned : bool
Compute demeaned mean returns (long short portfolio)
group_adjust : bool
Returns demeaning will occur on the group level.
Returns
-------
mean_ret : pd.DataFrame
Mean period wise returns by specified factor quantile.
std_error_ret : pd.DataFrame
Standard error of returns by specified quantile.
"""
if group_adjust:
grouper = [factor_data.index.get_level_values('date')] + ['group']
factor_data = utils.demean_forward_returns(factor_data, grouper)
elif demeaned:
factor_data = utils.demean_forward_returns(factor_data)
else:
factor_data = factor_data.copy()
grouper = ['factor_quantile', factor_data.index.get_level_values('date')]
if by_group:
grouper.append('group')
group_stats = factor_data.groupby(grouper)[
utils.get_forward_returns_columns(factor_data.columns)] \
.agg(['mean', 'std', 'count'])
mean_ret = group_stats.T.xs('mean', level=1).T
if not by_date:
grouper = [mean_ret.index.get_level_values('factor_quantile')]
if by_group:
grouper.append(mean_ret.index.get_level_values('group'))
group_stats = mean_ret.groupby(grouper)\
.agg(['mean', 'std', 'count'])
mean_ret = group_stats.T.xs('mean', level=1).T
std_error_ret = group_stats.T.xs('std', level=1).T \
/ np.sqrt(group_stats.T.xs('count', level=1).T)
return mean_ret, std_error_ret
def compute_mean_returns_spread(mean_returns,
upper_quant,
lower_quant,
std_err=None):
"""
Computes the difference between the mean returns of
two quantiles. Optionally, computes the standard error
of this difference.
Parameters
----------
mean_returns : pd.DataFrame
DataFrame of mean period wise returns by quantile.
MultiIndex containing date and quantile.
See mean_return_by_quantile.
upper_quant : int
Quantile of mean return from which we
wish to subtract lower quantile mean return.
lower_quant : int
Quantile of mean return we wish to subtract
from upper quantile mean return.
std_err : pd.DataFrame, optional
Period wise standard error in mean return by quantile.
Takes the same form as mean_returns.
Returns
-------
mean_return_difference : pd.Series
Period wise difference in quantile returns.
joint_std_err : pd.Series
Period wise standard error of the difference in quantile returns.
if std_err is None, this will be None
"""
mean_return_difference = mean_returns.xs(upper_quant,
level='factor_quantile') \
- mean_returns.xs(lower_quant, level='factor_quantile')
if std_err is None:
joint_std_err = None
else:
std1 = std_err.xs(upper_quant, level='factor_quantile')
std2 = std_err.xs(lower_quant, level='factor_quantile')
joint_std_err = np.sqrt(std1**2 + std2**2)
return mean_return_difference, joint_std_err
def quantile_turnover(quantile_factor, quantile, period=1):
"""
Computes the proportion of names in a factor quantile that were
not in that quantile in the previous period.
Parameters
----------
quantile_factor : pd.Series
DataFrame with date, asset and factor quantile.
quantile : int
Quantile on which to perform turnover analysis.
period: int, optional
Number of days over which to calculate the turnover.
Returns
-------
quant_turnover : pd.Series
Period by period turnover for that quantile.
"""
quant_names = quantile_factor[quantile_factor == quantile]
quant_name_sets = quant_names.groupby(level=['date']).apply(
lambda x: set(x.index.get_level_values('asset')))
name_shifted = quant_name_sets.shift(period)
new_names = (quant_name_sets - name_shifted).dropna()
quant_turnover = new_names.apply(
lambda x: len(x)) / quant_name_sets.apply(lambda x: len(x))
quant_turnover.name = quantile
return quant_turnover
def factor_rank_autocorrelation(factor_data, period=1):
"""
Computes autocorrelation of mean factor ranks in specified time spans.
We must compare period to period factor ranks rather than factor values
to account for systematic shifts in the factor values of all names or names
within a group. This metric is useful for measuring the turnover of a
factor. If the value of a factor for each name changes randomly from period
to period, we'd expect an autocorrelation of 0.
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to, and
(optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
period: int, optional
Number of days over which to calculate the turnover.
Returns
-------
autocorr : pd.Series
Rolling 1 period (defined by time_rule) autocorrelation of
factor values.
"""
grouper = [factor_data.index.get_level_values('date')]
ranks = factor_data.groupby(grouper)['factor'].rank()
asset_factor_rank = ranks.reset_index().pivot(index='date',
columns='asset',
values='factor')
asset_shifted = asset_factor_rank.shift(period)
autocorr = asset_factor_rank.corrwith(asset_shifted, axis=1)
autocorr.name = period
return autocorr
def common_start_returns(factor,
returns,
before,
after,
cumulative=False,
mean_by_date=False,
demean_by=None):
"""
A date and equity pair is extracted from each index row in the factor
dataframe and for each of these pairs a return series is built starting
from 'before' the date and ending 'after' the date specified in the pair.
All those returns series are then aligned to a common index (-before to
after) and returned as a single DataFrame
Parameters
----------
factor : pd.DataFrame
DataFrame with at least date and equity as index, the columns are
irrelevant
returns : pd.DataFrame
A wide form Pandas DataFrame indexed by date with assets in the
columns. Returns data should span the factor analysis time period
plus/minus an additional buffer window corresponding to after/before
period parameters.
before:
How many returns to load before factor date
after:
How many returns to load after factor date
cumulative: bool, optional
Whether or not the given returns are cumulative. If False the given
returns are assumed to be daily.
mean_by_date: bool, optional
If True, compute mean returns for each date and return that
instead of a return series for each asset
demean_by: pd.DataFrame, optional
DataFrame with at least date and equity as index, the columns are
irrelevant. For each date a list of equities is extracted from
'demean_by' index and used as universe to compute demeaned mean
returns (long short portfolio)
Returns
-------
aligned_returns : pd.DataFrame
Dataframe containing returns series for each factor aligned to the same
index: -before to after
"""
if not cumulative:
returns = returns.apply(cumulative_returns, axis=0)
all_returns = []
for timestamp, df in factor.groupby(level='date'):
equities = df.index.get_level_values('asset')
try:
day_zero_index = returns.index.get_loc(timestamp)
except KeyError:
continue
starting_index = max(day_zero_index - before, 0)
ending_index = min(day_zero_index + after + 1,
len(returns.index))
equities_slice = set(equities)
if demean_by is not None:
demean_equities = demean_by.loc[timestamp] \
.index.get_level_values('asset')
equities_slice |= set(demean_equities)
series = returns.loc[returns.index[starting_index:ending_index],
equities_slice]
series.index = range(starting_index - day_zero_index,
ending_index - day_zero_index)
if demean_by is not None:
mean = series.loc[:, demean_equities].mean(axis=1)
series = series.loc[:, equities]
series = series.sub(mean, axis=0)
if mean_by_date:
series = series.mean(axis=1)
all_returns.append(series)
return pd.concat(all_returns, axis=1)
def average_cumulative_return_by_quantile(factor_data,
returns,
periods_before=10,
periods_after=15,
demeaned=True,
group_adjust=False,
by_group=False):
"""
Plots average cumulative returns by factor quantiles in the period range
defined by -periods_before to periods_after
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to, and
(optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
returns : pd.DataFrame
A wide form Pandas DataFrame indexed by date with assets in the
columns. Returns data should span the factor analysis time period
plus/minus an additional buffer window corresponding to periods_after/
periods_before parameters.
periods_before : int, optional
How many periods before factor to plot
periods_after : int, optional
How many periods after factor to plot
demeaned : bool, optional
Compute demeaned mean returns (long short portfolio)
group_adjust : bool
Returns demeaning will occur on the group level (group
neutral portfolio)
by_group : bool
If True, compute cumulative returns separately for each group
Returns
-------
cumulative returns and std deviation : pd.DataFrame
A MultiIndex DataFrame indexed by quantile (level 0) and mean/std
(level 1) and the values on the columns in range from
-periods_before to periods_after
If by_group=True the index will have an additional 'group' level
::
---------------------------------------------------
| | -2 | -1 | 0 | 1 | ...
---------------------------------------------------
quantile | | | | | |
---------------------------------------------------
| mean | x | x | x | x |
1 ---------------------------------------
| std | x | x | x | x |
---------------------------------------------------
| mean | x | x | x | x |
2 ---------------------------------------
| std | x | x | x | x |
---------------------------------------------------
... | ...
---------------------------------------------------
"""
def cumulative_return_around_event(q_fact, demean_by):
return common_start_returns(
q_fact,
returns,
periods_before,
periods_after,
cumulative=True,
mean_by_date=True,
demean_by=demean_by,
)
def average_cumulative_return(q_fact, demean_by):
q_returns = cumulative_return_around_event(q_fact, demean_by)
q_returns.replace([np.inf, -np.inf], np.nan, inplace=True)
return pd.DataFrame({'mean': q_returns.mean(skipna=True, axis=1),
'std': q_returns.std(skipna=True, axis=1)}).T
if by_group:
#
# Compute quantile cumulative returns separately for each group
# Deman those returns accordingly to 'group_adjust' and 'demeaned'
#
returns_bygroup = []
for group, g_data in factor_data.groupby('group'):
g_fq = g_data['factor_quantile']
if group_adjust:
demean_by = g_fq # demeans at group level
elif demeaned:
demean_by = factor_data['factor_quantile'] # demean by all
else:
demean_by = None
#
# Align cumulative return from different dates to the same index
# then compute mean and std
#
avgcumret = g_fq.groupby(g_fq).apply(average_cumulative_return,
demean_by)
if len(avgcumret) == 0:
continue
avgcumret['group'] = group
avgcumret.set_index('group', append=True, inplace=True)
returns_bygroup.append(avgcumret)
return pd.concat(returns_bygroup, axis=0)
else:
#
# Compute quantile cumulative returns for the full factor_data
# Align cumulative return from different dates to the same index
# then compute mean and std
# Deman those returns accordingly to 'group_adjust' and 'demeaned'
#
if group_adjust:
all_returns = []
for group, g_data in factor_data.groupby('group'):
g_fq = g_data['factor_quantile']
avgcumret = g_fq.groupby(g_fq).apply(
cumulative_return_around_event, g_fq
)
all_returns.append(avgcumret)
q_returns = pd.concat(all_returns, axis=1)
q_returns = pd.DataFrame({'mean': q_returns.mean(axis=1),
'std': q_returns.std(axis=1)})
return q_returns.unstack(level=1).stack(level=0)
elif demeaned:
fq = factor_data['factor_quantile']
return fq.groupby(fq).apply(average_cumulative_return, fq)
else:
fq = factor_data['factor_quantile']
return fq.groupby(fq).apply(average_cumulative_return, None)
def factor_cumulative_returns(factor_data,
period,
long_short=True,
group_neutral=False,
equal_weight=False,
quantiles=None,
groups=None):
"""
Simulate a portfolio using the factor in input and returns the cumulative
returns of the simulated portfolio
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to,
and (optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
period : string
'factor_data' column name corresponding to the 'period' returns to be
used in the computation of porfolio returns
long_short : bool, optional
if True then simulates a dollar neutral long-short portfolio
- see performance.create_pyfolio_input for more details
group_neutral : bool, optional
If True then simulates a group neutral portfolio
- see performance.create_pyfolio_input for more details
equal_weight : bool, optional
Control the assets weights:
- see performance.create_pyfolio_input for more details
quantiles: sequence[int], optional
Use only specific quantiles in the computation. By default all
quantiles are used
groups: sequence[string], optional
Use only specific groups in the computation. By default all groups
are used
Returns
-------
Cumulative returns series : pd.Series
Example:
2015-07-16 09:30:00 -0.012143
2015-07-16 12:30:00 0.012546
2015-07-17 09:30:00 0.045350
2015-07-17 12:30:00 0.065897
2015-07-20 09:30:00 0.030957
"""
fwd_ret_cols = utils.get_forward_returns_columns(factor_data.columns)
if period not in fwd_ret_cols:
raise ValueError("Period '%s' not found" % period)
todrop = list(fwd_ret_cols)
todrop.remove(period)
portfolio_data = factor_data.drop(todrop, axis=1)
if quantiles is not None:
portfolio_data = portfolio_data[portfolio_data['factor_quantile'].isin(
quantiles)]
if groups is not None:
portfolio_data = portfolio_data[portfolio_data['group'].isin(groups)]
returns = \
factor_returns(portfolio_data, long_short, group_neutral, equal_weight)
return cumulative_returns(returns[period], period)
def factor_positions(factor_data,
period,
long_short=True,
group_neutral=False,
equal_weight=False,
quantiles=None,
groups=None):
"""
Simulate a portfolio using the factor in input and returns the assets
positions as percentage of the total portfolio.
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to,
and (optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
period : string
'factor_data' column name corresponding to the 'period' returns to be
used in the computation of porfolio returns
long_short : bool, optional
if True then simulates a dollar neutral long-short portfolio
- see performance.create_pyfolio_input for more details
group_neutral : bool, optional
If True then simulates a group neutral portfolio
- see performance.create_pyfolio_input for more details
equal_weight : bool, optional
Control the assets weights:
- see performance.create_pyfolio_input for more details.
quantiles: sequence[int], optional
Use only specific quantiles in the computation. By default all
quantiles are used
groups: sequence[string], optional
Use only specific groups in the computation. By default all groups
are used
Returns
-------
assets positions : pd.DataFrame
Assets positions series, datetime on index, assets on columns.
Example:
index 'AAPL' 'MSFT' cash
2004-01-09 10:30:00 13939.3800 -14012.9930 711.5585
2004-01-09 15:30:00 0.00 -16012.9930 411.5585
2004-01-12 10:30:00 14492.6300 -14624.8700 0.0
2004-01-12 15:30:00 14874.5400 -15841.2500 0.0
2004-01-13 10:30:00 -13853.2800 13653.6400 -43.6375
"""
fwd_ret_cols = utils.get_forward_returns_columns(factor_data.columns)
if period not in fwd_ret_cols:
raise ValueError("Period '%s' not found" % period)
todrop = list(fwd_ret_cols)
todrop.remove(period)
portfolio_data = factor_data.drop(todrop, axis=1)
if quantiles is not None:
portfolio_data = portfolio_data[portfolio_data['factor_quantile'].isin(
quantiles)]
if groups is not None:
portfolio_data = portfolio_data[portfolio_data['group'].isin(groups)]
weights = \
factor_weights(portfolio_data, long_short, group_neutral, equal_weight)
return positions(weights, period)
def create_pyfolio_input(factor_data,
period,
capital=None,
long_short=True,
group_neutral=False,
equal_weight=False,
quantiles=None,
groups=None,
benchmark_period='1D'):
"""
Simulate a portfolio using the input factor and returns the portfolio
performance data properly formatted for Pyfolio analysis.
For more details on how this portfolio is built see:
- performance.cumulative_returns (how the portfolio returns are computed)
- performance.factor_weights (how assets weights are computed)
Parameters
----------
factor_data : pd.DataFrame - MultiIndex
A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
containing the values for a single alpha factor, forward returns for
each period, the factor quantile/bin that factor value belongs to,
and (optionally) the group the asset belongs to.
- See full explanation in utils.get_clean_factor_and_forward_returns
period : string
'factor_data' column name corresponding to the 'period' returns to be
used in the computation of porfolio returns
capital : float, optional
If set, then compute 'positions' in dollar amount instead of percentage
long_short : bool, optional
if True enforce a dollar neutral long-short portfolio: asset weights
will be computed by demeaning factor values and dividing by the sum of
their absolute value (achieving gross leverage of 1) which will cause
the portfolio to hold both long and short positions and the total
weights of both long and short positions will be equal.
If False the portfolio weights will be computed dividing the factor
values and by the sum of their absolute value (achieving gross
leverage of 1). Positive factor values will generate long positions and
negative factor values will produce short positions so that a factor
with only posive values will result in a long only portfolio.
group_neutral : bool, optional
If True simulates a group neutral portfolio: the portfolio weights
will be computed so that each group will weigh the same.
if 'long_short' is enabled the factor values demeaning will occur on
the group level resulting in a dollar neutral, group neutral,
long-short portfolio.
If False group information will not be used in weights computation.
equal_weight : bool, optional
if True the assets will be equal-weighted. If long_short is True then
the factor universe will be split in two equal sized groups with the
top assets in long positions and bottom assets in short positions.
if False the assets will be factor-weighed, see 'long_short' argument
quantiles: sequence[int], optional
Use only specific quantiles in the computation. By default all
quantiles are used
groups: sequence[string], optional
Use only specific groups in the computation. By default all groups
are used
benchmark_period : string, optional
By default benchmark returns are computed as the factor universe mean
daily returns but 'benchmark_period' allows to choose a 'factor_data'
column corresponding to the returns to be used in the computation of
benchmark returns. More generally benchmark returns are computed as the
factor universe returns traded at 'benchmark_period' frequency, equal
weighting and long only
Returns
-------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- Time series with decimal returns.
- Example:
2015-07-16 -0.012143
2015-07-17 0.045350
2015-07-20 0.030957
2015-07-21 0.004902
positions : pd.DataFrame
Time series of dollar amount (or percentage when 'capital' is not
provided) invested in each position and cash.
- Days where stocks are not held can be represented by 0.
- Non-working capital is labelled 'cash'
- Example:
index 'AAPL' 'MSFT' cash
2004-01-09 13939.3800 -14012.9930 711.5585
2004-01-12 14492.6300 -14624.8700 27.1821
2004-01-13 -13853.2800 13653.6400 -43.6375
benchmark : pd.Series
Benchmark returns computed as the factor universe mean daily returns.
"""
#
# Build returns:
# we don't know the frequency at which the factor returns are computed but
# pyfolio wants daily returns. So we compute the cumulative returns of the
# factor, then resample it at 1 day frequency and finally compute daily
# returns
#
cumrets = factor_cumulative_returns(factor_data,
period,
long_short,
group_neutral,
equal_weight,
quantiles,
groups)
cumrets = cumrets.resample('1D').last().fillna(method='ffill')
returns = cumrets.pct_change().fillna(0)
#
# Build positions. As pyfolio asks for daily position we have to resample
# the positions returned by 'factor_positions' at 1 day frequency and
# recompute the weights so that the sum of daily weights is 1.0
#
positions = factor_positions(factor_data,
period,
long_short,
group_neutral,
equal_weight,
quantiles,
groups)
positions = positions.resample('1D').sum().fillna(method='ffill')
positions = positions.div(positions.abs().sum(axis=1), axis=0).fillna(0)
positions['cash'] = 1. - positions.sum(axis=1)
# transform percentage positions to dollar positions
if capital is not None:
positions = positions.mul(
cumrets.reindex(positions.index) * capital, axis=0)
#
#
#
# Build benchmark returns as the factor universe mean returns traded at
# 'benchmark_period' frequency
#
fwd_ret_cols = utils.get_forward_returns_columns(factor_data.columns)
if benchmark_period in fwd_ret_cols:
benchmark_data = factor_data.copy()
# make sure no negative positions
benchmark_data['factor'] = benchmark_data['factor'].abs()
benchmark_rets = factor_cumulative_returns(benchmark_data,
benchmark_period,
long_short=False,
group_neutral=False,
equal_weight=True)
benchmark_rets = benchmark_rets.resample(
'1D').last().fillna(method='ffill')
benchmark_rets = benchmark_rets.pct_change().fillna(0)
benchmark_rets.name = 'benchmark'
else:
benchmark_rets = None
return returns, positions, benchmark_rets
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/yunjinqi/alphalens.git
[email protected]:yunjinqi/alphalens.git
yunjinqi
alphalens
alphalens
master

搜索帮助