In [102]:

import pandas as pd
import pandas.io.data as web
import numpy as np
import numpy.random as npr
import statsmodels.api as sm
import scipy.stats as scs
import matplotlib.pyplot as plt
%matplotlib inline



wrapper function for the describe function from

the scipy.stats sublibrary




In [104]:

def print_statistics(array):
    '''Prints selected statistics.
    Parameters
    array: ndarray
    object to generate statistics on
    '''

    sta=scs.describe(array)
    print ' %14s %15s ' %(' statistics ' ,' value' )
    print 30* '-'
    print ' %14s %15f ' %('size  ' ,sta[0] )
    print ' %14s %15f ' %('min  ' , sta[1][0])
    print ' %14s %15f ' %(' max ' ,sta[1][1] )
    print ' %14s %15f ' %(' mean ' ,sta[2] )
    print ' %14s %15f ' %('  std' ,np.sqrt(sta[3]) )
    print ' %14s %15f ' %(' skew ' ,sta[4] )
    print ' %14s %15f ' %('  kurtosis' , sta[5])




The function normality_tests combines three different statistical tests: 
Skewness test ( skewtest ) This tests whether the skew of the sample data 
is “normal” (i.e., has a value close enough to zero). Kurtosis test ( kurtosistest )

Similarly, this tests whether the kurtosis of the sample data is “normal”

(again, close enough to zero). Normality test ( normaltest ) This combines

the other two test approaches to test for normality.




In [105]:

def normality_tests(arr):
    '''Tests for normality distribution of given data set.
Parameters
array: ndarray
object to generate statistics on
    '''
    print ' Skew of data set %14.3f' %scs.skew(arr)
    print ' Skew test p-value  %14.3f' %scs.skewtest(arr)[1]
    print '  kurt of data set %14.3f' %scs.kurtosis(arr)
    print '  kurt test p-value %14.3f' %scs.kurtosistest(arr)[1]
    print '  Norm test p-values %14.3f' %scs.normaltest(arr)[1]
   





the German DAX index :^GDAXI the American S&P 500 index:

^GSPC YHOO: Yahoo MSFT: Micro Soft




In [106]:

symbols=['^GDAXI','^GSPC','YHOO','MSFT']


In [107]:

symbols


Out[107]:

['^GDAXI', '^GSPC', 'YHOO', 'MSFT']

In [108]:

data=pd.DataFrame()
for sym in symbols:
    data[sym]=web.DataReader(sym,data_source='yahoo',start='1/1/2006')['Adj Close']
data=data.dropna()


In [109]:

data.info()




DatetimeIndex: 2352 entries, 2006-01-03 00:00:00 to 2015-06-15 00:00:00
Data columns (total 4 columns):
^GDAXI    2352 non-null float64
^GSPC     2352 non-null float64
YHOO      2352 non-null float64
MSFT      2352 non-null float64
dtypes: float64(4)



Index start at 100

In [110]:

(data/data.ix[0]*100).plot(figsize=(8,6))


Out[110]:





In [111]:

log_returns=np.log(data/data.shift(1))


In [112]:

log_returns.head()


Out[112]:

^GDAXI^GSPCYHOOMSFT
Date
2006-01-03NaNNaNNaNNaN
2006-01-040.0114600.0036660.0014660.004832
2006-01-05-0.0012840.0000160.0135760.000741
2006-01-060.0035810.0093560.039656-0.002968
2006-01-090.0001430.0036500.004848-0.001860


In [113]:

log_returns.hist(bins=50,figsize=(9,6))


Out[113]:

array([[,
        ],
       [,
        ]], dtype=object)



In [114]:

for sym in symbols:
    print ' \n Results for symbols %s' %sym
    print 30*'-'
    log_data=np.array(log_returns[sym].dropna())
    print_statistics(log_data)



 
 Results for symbols ^GDAXI
------------------------------
    statistics            value 
------------------------------
         size       2351.000000 
          min         -0.077391 
           max         0.107975 
          mean         0.000297 
            std        0.014518 
          skew         0.015973 
       kurtosis        6.204635 
 
 Results for symbols ^GSPC
------------------------------
    statistics            value 
------------------------------
         size       2351.000000 
          min         -0.094695 
           max         0.109572 
          mean         0.000211 
            std        0.013282 
          skew        -0.318551 
       kurtosis       10.343515 
 
 Results for symbols YHOO
------------------------------
    statistics            value 
------------------------------
         size       2351.000000 
          min         -0.246364 
           max         0.391817 
          mean        -0.000005 
            std        0.025672 
          skew         0.544870 
       kurtosis       32.247736 
 
 Results for symbols MSFT
------------------------------
    statistics            value 
------------------------------
         size       2351.000000 
          min         -0.124578 
           max         0.170626 
          mean         0.000316 
            std        0.017844 
          skew         0.047480 
       kurtosis       10.277228 


In [115]:

sm.qqplot(log_returns['^GSPC'].dropna(),line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')


Out[115]:





In [116]:

sm.qqplot(log_returns['MSFT'].dropna(),line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')


Out[116]:





In [117]:

sm.qqplot(log_returns['^GDAXI'].dropna(),line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')


Out[117]:





In [118]:

sm.qqplot(log_returns['YHOO'].dropna(),line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')


Out[118]:





In [119]:

for sym in symbols:
    print ' \nResults for symbol %s' %sym
    print 32*'-'
    log_data=np.array(log_returns[sym].dropna())
    normality_tests(log_data)



 
Results for symbol ^GDAXI
--------------------------------
 Skew of data set          0.016
 Skew test p-value           0.751
  kurt of data set          6.205
  kurt test p-value          0.000
  Norm test p-values          0.000
 
Results for symbol ^GSPC
--------------------------------
 Skew of data set         -0.319
 Skew test p-value           0.000
  kurt of data set         10.344
  kurt test p-value          0.000
  Norm test p-values          0.000
 
Results for symbol YHOO
--------------------------------
 Skew of data set          0.545
 Skew test p-value           0.000
  kurt of data set         32.248
  kurt test p-value          0.000
  Norm test p-values          0.000
 
Results for symbol MSFT
--------------------------------
 Skew of data set          0.047
 Skew test p-value           0.346
  kurt of data set         10.277
  kurt test p-value          0.000
  Norm test p-values          0.000
Alfa Investor In A Good Mood

analyze four historical time series

The function normality_tests combines three different statistical tests:

the German DAX index :^GDAXI the American S&P 500 index:

^GSPC YHOO: Yahoo MSFT: Micro Soft

Index start at 100

Nikkei225

	^GDAXI	^GSPC	YHOO	MSFT
Date
2006-01-03	NaN	NaN	NaN	NaN
2006-01-04	0.011460	0.003666	0.001466	0.004832
2006-01-05	-0.001284	0.000016	0.013576	0.000741
2006-01-06	0.003581	0.009356	0.039656	-0.002968
2006-01-09	0.000143	0.003650	0.004848	-0.001860