analyze four historical time series

In [102]:
import pandas as pd
import pandas.io.data as web
import numpy as np
import numpy.random as npr
import statsmodels.api as sm
import scipy.stats as scs
import matplotlib.pyplot as plt
%matplotlib inline
wrapper function for the describe function from
the scipy.stats sublibrary

In [104]:
def print_statistics(array):
    '''Prints selected statistics.
    Parameters
    array: ndarray
    object to generate statistics on
    '''

    sta=scs.describe(array)
    print ' %14s %15s ' %(' statistics ' ,' value' )
    print 30* '-'
    print ' %14s %15f ' %('size  ' ,sta[0] )
    print ' %14s %15f ' %('min  ' , sta[1][0])
    print ' %14s %15f ' %(' max ' ,sta[1][1] )
    print ' %14s %15f ' %(' mean ' ,sta[2] )
    print ' %14s %15f ' %('  std' ,np.sqrt(sta[3]) )
    print ' %14s %15f ' %(' skew ' ,sta[4] )
    print ' %14s %15f ' %('  kurtosis' , sta[5])

The function normality_tests combines three different statistical tests: 

Skewness test ( skewtest ) This tests whether the skew of the sample data  is “normal” (i.e., has a value close enough to zero). Kurtosis test ( kurtosistest )
Similarly, this tests whether the kurtosis of the sample data is “normal”
(again, close enough to zero). Normality test ( normaltest ) This combines
the other two test approaches to test for normality.

In [105]:
def normality_tests(arr):
    '''Tests for normality distribution of given data set.
Parameters
array: ndarray
object to generate statistics on
    '''
    print ' Skew of data set %14.3f' %scs.skew(arr)
    print ' Skew test p-value  %14.3f' %scs.skewtest(arr)[1]
    print '  kurt of data set %14.3f' %scs.kurtosis(arr)
    print '  kurt test p-value %14.3f' %scs.kurtosistest(arr)[1]
    print '  Norm test p-values %14.3f' %scs.normaltest(arr)[1]
   


the German DAX index :^GDAXI the American S&P 500 index:

^GSPC YHOO: Yahoo MSFT: Micro Soft

In [106]:
symbols=['^GDAXI','^GSPC','YHOO','MSFT']
In [107]:
symbols
Out[107]:
['^GDAXI', '^GSPC', 'YHOO', 'MSFT']
In [108]:
data=pd.DataFrame()
for sym in symbols:
    data[sym]=web.DataReader(sym,data_source='yahoo',start='1/1/2006')['Adj Close']
data=data.dropna()
In [109]:
data.info()

DatetimeIndex: 2352 entries, 2006-01-03 00:00:00 to 2015-06-15 00:00:00
Data columns (total 4 columns):
^GDAXI    2352 non-null float64
^GSPC     2352 non-null float64
YHOO      2352 non-null float64
MSFT      2352 non-null float64
dtypes: float64(4)
Index start at 100
In [110]:
(data/data.ix[0]*100).plot(figsize=(8,6))
Out[110]:
In [111]:
log_returns=np.log(data/data.shift(1))
In [112]:
log_returns.head()
Out[112]:
^GDAXI^GSPCYHOOMSFT
Date
2006-01-03NaNNaNNaNNaN
2006-01-040.0114600.0036660.0014660.004832
2006-01-05-0.0012840.0000160.0135760.000741
2006-01-060.0035810.0093560.039656-0.002968
2006-01-090.0001430.0036500.004848-0.001860
In [113]:
log_returns.hist(bins=50,figsize=(9,6))
Out[113]:
array([[,
        ],
       [,
        ]], dtype=object)
In [114]:
for sym in symbols:
    print ' \n Results for symbols %s' %sym
    print 30*'-'
    log_data=np.array(log_returns[sym].dropna())
    print_statistics(log_data)
 
 Results for symbols ^GDAXI
------------------------------
    statistics            value 
------------------------------
         size       2351.000000 
          min         -0.077391 
           max         0.107975 
          mean         0.000297 
            std        0.014518 
          skew         0.015973 
       kurtosis        6.204635 
 
 Results for symbols ^GSPC
------------------------------
    statistics            value 
------------------------------
         size       2351.000000 
          min         -0.094695 
           max         0.109572 
          mean         0.000211 
            std        0.013282 
          skew        -0.318551 
       kurtosis       10.343515 
 
 Results for symbols YHOO
------------------------------
    statistics            value 
------------------------------
         size       2351.000000 
          min         -0.246364 
           max         0.391817 
          mean        -0.000005 
            std        0.025672 
          skew         0.544870 
       kurtosis       32.247736 
 
 Results for symbols MSFT
------------------------------
    statistics            value 
------------------------------
         size       2351.000000 
          min         -0.124578 
           max         0.170626 
          mean         0.000316 
            std        0.017844 
          skew         0.047480 
       kurtosis       10.277228 
In [115]:
sm.qqplot(log_returns['^GSPC'].dropna(),line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')
Out[115]:
In [116]:
sm.qqplot(log_returns['MSFT'].dropna(),line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')
Out[116]:
In [117]:
sm.qqplot(log_returns['^GDAXI'].dropna(),line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')
Out[117]:
In [118]:
sm.qqplot(log_returns['YHOO'].dropna(),line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')
Out[118]:
In [119]:
for sym in symbols:
    print ' \nResults for symbol %s' %sym
    print 32*'-'
    log_data=np.array(log_returns[sym].dropna())
    normality_tests(log_data)
 
Results for symbol ^GDAXI
--------------------------------
 Skew of data set          0.016
 Skew test p-value           0.751
  kurt of data set          6.205
  kurt test p-value          0.000
  Norm test p-values          0.000
 
Results for symbol ^GSPC
--------------------------------
 Skew of data set         -0.319
 Skew test p-value           0.000
  kurt of data set         10.344
  kurt test p-value          0.000
  Norm test p-values          0.000
 
Results for symbol YHOO
--------------------------------
 Skew of data set          0.545
 Skew test p-value           0.000
  kurt of data set         32.248
  kurt test p-value          0.000
  Norm test p-values          0.000
 
Results for symbol MSFT
--------------------------------
 Skew of data set          0.047
 Skew test p-value           0.346
  kurt of data set         10.277
  kurt test p-value          0.000
  Norm test p-values          0.000

Nikkei225

28000-28550 up in the early session, down lately.