import numpy as np
import pandas as pd  # pandas data frame
import os            # to access the file directories
import matplotlib.pyplot as plt
%matplotlib inline
%pylab inline

pylab.rcParams['figure.figsize'] = (15, 6)
font = {'family' : 'serif',
        'weight' : 'normal',
        'size'   : 12}
matplotlib.rc('font', **font)

Populating the interactive namespace from numpy and matplotlib


stock_name_dict = {'A': 'Agilent Technologies, Inc.',
                   'B': 'Barnes Group Inc.',
                   'C': 'Citigroup Inc.',
                   'D': 'Dominion Energy, Inc.',
                   'F': 'Ford Motor Company',
                   'M': 'Macy\'s, Inc.',
                   'S': 'Sprint Corporation',
                   'T': 'AT&T Inc.',
                   'X': 'United States Steel Corporation',
                   'Y': 'Alleghany Corporation'}


directory = 'data/'
stocks_dict = {}
prices_dict = {}
price_relatives_dict = {}
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        df = pd.read_csv(directory+filename)
        stock_symbol = os.path.splitext(filename)[0]
        
        temp_dict = {}
        temp_dict['Name'] = stock_name_dict[stock_symbol]
        temp_dict['Close'] = np.array(df['Close'])
        temp_dict['Relative'] = np.array(df['Close']/df['Open'])
        
        stocks_dict[stock_symbol] = temp_dict
    else:
        continue


for key in stocks_dict:
    plt.plot(stocks_dict[key]['Close'], label=stocks_dict[key]['Name'])
plt.legend(loc=1)
plt.title(r'Stock prices');

plt.figure()
for key in stocks_dict:
    plt.plot(stocks_dict[key]['Relative'], label=stocks_dict[key]['Name'])
plt.legend(loc=1)
plt.title(r'Price relatives');


prices_stack = np.vstack([stocks_dict[k]['Close'] for k in sorted(stocks_dict.keys())])
price_relatives_stack = np.vstack([stocks_dict[k]['Relative'] for k in sorted(stocks_dict.keys())])


def invest_with_CRP(a_CRP, price_relatives_stack):
    assert len(a_CRP) == len(price_relatives_stack)
    return np.prod(np.dot(a_CRP, price_relatives_stack))


def approx_optimal_CRP(price_relatives_stack, N):
    """
    Given price relative vectors, find an approximate optimal CRP 
    by Monte Carlo sampling of CRP over the simplex uniformly at random.
    """
    num_stocks = price_relatives_stack.shape[0]
    sampled_CRPs = [np.random.dirichlet([1]*num_stocks) for __ in range(N)]  # uniformly sampled over the simplex
    sampled_CRPs = sorted(sampled_CRPs, key=lambda x: x[0])
    return_rate_list = [invest_with_CRP(CRP, price_relatives_stack) for CRP in sampled_CRPs]
    
    approx_opt_CRP_idx = argmax(return_rate_list)
    if return_rate_list[approx_opt_CRP_idx] > 1:
        plt.plot([1-CRP[0] for CRP in sampled_CRPs], return_rate_list, marker="*")
        
    return sampled_CRPs[approx_opt_CRP_idx], return_rate_list[approx_opt_CRP_idx]


def approx_UP(price_relatives_stack, alpha=1/2, N=100):
    """
    N: number of CRPs drawn from alpha
    alpha: a parpameter for Dirichlet(alpha,...,alpha) prior
    """
    num_stocks = price_relatives_stack.shape[0]
    sampled_CRPs = [np.random.dirichlet([alpha]*num_stocks) for __ in range(N)]
    return_rate_list = [invest_with_CRP(CRP, price_relatives_stack) for CRP in sampled_CRPs]
    
    num_days = price_relatives_stack.shape[1]
    b_UP = np.zeros(price_relatives_stack.shape)
    
    for t in range(num_days):
        return_rate_list_t = [invest_with_CRP(CRP, price_relatives_stack[:, :t]) for CRP in sampled_CRPs]
        for k, CRP in enumerate(sampled_CRPs):
            b_UP[:, t] += CRP * return_rate_list_t[k]
        b_UP[:, t] /= sum(return_rate_list_t)
    
    return np.mean(return_rate_list), b_UP


plt.plot(np.transpose(prices_stack[(0,1), :]))

[<matplotlib.lines.Line2D at 0x200d6a259e8>,
 <matplotlib.lines.Line2D at 0x200d6a25b38>]


%time approx_optimal_CRP(price_relatives_stack[(0,1,2), :], N=500000)

Wall time: 10.1 s

(array([7.93127792e-01, 2.06863282e-01, 8.92595149e-06]), 4.465356404129358)


%time approx_UP(price_relatives_stack[(0,1),:], N=1000, alpha=0.5)

Wall time: 30.7 s

(4.059432161074425,
 array([[0.50271361, 0.50108446, 0.49933109, ..., 0.53650511, 0.53630791,
         0.53634435],
        [0.49728639, 0.49891554, 0.50066891, ..., 0.46349489, 0.46369209,
         0.46365565]]))


b_UP = _[1]


b_UP

array([0.48279106, 0.48442079, 0.4861753 , ..., 0.44949084, 0.4496869 ,
       0.44965067])


plt.plot(b_UP[0])
plt.plot(b_UP[1])

[<matplotlib.lines.Line2D at 0x200ca0e9e48>]


plt.plot(prices_stack[0])
plt.plot(prices_stack[1])

[<matplotlib.lines.Line2D at 0x200cba437b8>]


N=500
for i in range(len(stocks_dict)):
    for j in range(i+1, len(stocks_dict)):
        stock_indices = (i,j)
        return_UP = approx_UP(price_relatives_stack[stock_indices,:], N=N, alpha=0.5)
        plt.subplot(1,2,1)
        __, return_optimal_CRP = approx_optimal_CRP(price_relatives_stack[stock_indices, :], N=N)
        
        if return_optimal_CRP > 1:
            print(stock_indices, return_UP, return_optimal_CRP)
            plt.subplot(1,2,2)
            plt.plot(np.transpose(prices_stack[stock_indices, :]))
            plt.title(stock_indices)
            plt.figure()

(0, 1) 4.0529095695587705 4.465675328712097
(0, 2) 1.0570894674563118 4.32899846003565
(0, 3) 3.3766002188855455 4.36916281063263
(0, 4) 1.0869808634386162 4.3596953458441865
(0, 5) 2.052217215118657 4.362765949188257
(0, 6) 1.0956870419114644 4.332083136607008
(0, 7) 2.124001272366331 4.368963532237619
(0, 8) 1.2853187498953944 4.263733846189493
(0, 9) 3.188312225382015 4.368820778481182
(1, 2) 0.7582146162913356 3.282675863032962
(1, 3) 2.998499012658981 3.3687708572977444
(1, 4) 0.6873832944481592 3.129789007187662
(1, 5) 1.7254527296203863 3.2875990170171514
(1, 6) 0.8441957084107145 3.2400563510259905
(1, 7) 1.8561290001879047 3.284332860347145
(1, 8) 1.0217392383068304 3.2848530077155713
(1, 9) 2.792817240760202 3.2966632014245616
(2, 3) 0.5151777580706158 2.3220220686358624

C:\Users\Jongha\Anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py:106: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance.  In a future version, a new instance will always be created and returned.  Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
  warnings.warn(message, mplDeprecation, stacklevel=1)

(2, 9) 0.4619838440570705 1.931117995948166
(3, 4) 0.5938321506559189 2.283464665598109
(3, 5) 1.3110442018280828 2.330834716222988
(3, 6)

C:\Users\Jongha\Anaconda3\lib\site-packages\matplotlib\pyplot.py:528: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)

 0.6352772457794267 2.3239116834277325
(3, 7) 1.3868056905673263 2.330806020612513
(3, 8) 0.7186073775623206 2.3200884394147177
(3, 9) 2.2369220270717936 2.3977724870237305
(4, 9) 0.4536030409124963 1.9185672577702815
(5, 9) 1.2240939548688388 1.947617055623075
(6, 9) 0.5606042919228192 1.9117078032333024
(7, 9) 1.2462517708091545 1.9479921600219072
(8, 9) 0.624765123256832 1.9488397302975098

<matplotlib.figure.Figure at 0x1e50f3a89b0>


approx_UP(price_relatives_stack[0:4], N=100000, alpha=1/2)

1.4687807371527541


a_CRP = np.random.dirichlet([1/2]*price_relatives_stack.shape[0])
invest_by_CRP(a_CRP, price_relatives_stack)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-5f635d903491> in <module>()
      1 a_CRP = np.random.dirichlet([1/2]*price_relatives_stack.shape[0])
----> 2 invest_by_CRP(a_CRP, price_relatives_stack)

NameError: name 'invest_by_CRP' is not defined


'''Functions for drawing contours of Dirichlet distributions.'''

# Author: Thomas Boggs
# https://gist.github.com/tboggs/8778945
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.tri as tri
from functools import reduce 

_corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
_triangle = tri.Triangulation(_corners[:, 0], _corners[:, 1])
_midpoints = [(_corners[(i + 1) % 3] + _corners[(i + 2) % 3]) / 2.0 \
              for i in range(3)]

def xy2bc(xy, tol=1.e-3):
    '''Converts 2D Cartesian coordinates to barycentric.
    Arguments:
        `xy`: A length-2 sequence containing the x and y value.
    '''
    s = [(_corners[i] - _midpoints[i]).dot(xy - _midpoints[i]) / 0.75 \
         for i in range(3)]
    return np.clip(s, tol, 1.0 - tol)

class Dirichlet(object):
    def __init__(self, alpha):
        '''Creates Dirichlet distribution with parameter `alpha`.'''
        from math import gamma
        from operator import mul
        self._alpha = np.array(alpha)
        self._coef = gamma(np.sum(self._alpha)) / \
                     reduce(mul, [gamma(a) for a in self._alpha])
    def pdf(self, x):
        '''Returns pdf value for `x`.'''
        from operator import mul
        return self._coef * reduce(mul, [xx ** (aa - 1)
                                         for (xx, aa)in zip(x, self._alpha)])
    def sample(self, N):
        '''Generates a random sample of size `N`.'''
        return np.random.dirichlet(self._alpha, N)

def draw_pdf_contours(dist, border=False, nlevels=200, subdiv=8, **kwargs):
    '''Draws pdf contours over an equilateral triangle (2-simplex).
    Arguments:
        `dist`: A distribution instance with a `pdf` method.
        `border` (bool): If True, the simplex border is drawn.
        `nlevels` (int): Number of contours to draw.
        `subdiv` (int): Number of recursive mesh subdivisions to create.
        kwargs: Keyword args passed on to `plt.triplot`.
    '''
    from matplotlib import ticker, cm
    import math

    refiner = tri.UniformTriRefiner(_triangle)
    trimesh = refiner.refine_triangulation(subdiv=subdiv)
    pvals = [dist.pdf(xy2bc(xy)) for xy in zip(trimesh.x, trimesh.y)]

    plt.tricontourf(trimesh, pvals, nlevels, **kwargs)
    plt.axis('equal')
    plt.xlim(0, 1)
    plt.ylim(0, 0.75**0.5)
    plt.axis('off')
    if border is True:
        plt.hold(1)
        plt.triplot(_triangle, linewidth=1)

def plot_points(X, barycentric=True, border=True, **kwargs):
    '''Plots a set of points in the simplex.
    Arguments:
        `X` (ndarray): A 2xN array (if in Cartesian coords) or 3xN array
                       (if in barycentric coords) of points to plot.
        `barycentric` (bool): Indicates if `X` is in barycentric coords.
        `border` (bool): If True, the simplex border is drawn.
        kwargs: Keyword args passed on to `plt.plot`.
    '''
    if barycentric is True:
        X = X.dot(_corners)
    plt.plot(X[:, 0], X[:, 1], 'k.', ms=1, **kwargs)
    plt.axis('equal')
    plt.xlim(0, 1)
    plt.ylim(0, 0.75**0.5)
    plt.axis('off')
    if border is True:
        plt.hold(1)
        plt.triplot(_triangle, linewidth=1)

if __name__ == '__main__':
#     f = plt.figure(figsize=(8, 6))
#     alphas = [[0.999] * 3,
#               [5] * 3,
#               [2, 5, 15]]
    alphas = [[5] * 3]
    for (i, alpha) in enumerate(alphas):
        plt.subplot(2, len(alphas), i + 1)
        dist = Dirichlet(alpha)
        draw_pdf_contours(dist, cmap='hot')
        title = r'$\alpha$ = {}'.format(alpha)
        plt.title(title, fontdict={'fontsize': 12})
#         plt.subplot(2, len(alphas), i + 1 + len(alphas))
#         plot_points(dist.sample(5000))
    plt.savefig('dirichlet_plots.png')
    print('Wrote plots to "dirichlet_plots.png".')

ECE 225B: Universal Probability and Applications @UCSD¶

Homework 3: Universal portfolios¶

Review¶

1) Constant rebalanced portfolio as a probability induced portfolio¶

2) Cover's Universal Portfolio algorithm¶

3) Implementation¶

The stock data¶

Problem 1. (Approximate optimal CRP in hindsight)¶

Problem 2. (Approximate Universal Portfolio algorithm)¶

Problem 3. (Experiments)¶

Problem 3(a).¶

Problem 3(b).¶