{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a709f077-6ac0-4d32-a0d2-69329ec4a449", "metadata": { "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "07c48ed9-2b3a-4bd5-a021-c9f3b6cce68c", "metadata": {}, "outputs": [], "source": [ "returns = pd.read_csv('analysis csv file/returns.csv')\n", "sentiment_score_df = pd.read_csv('analysis csv file/sentiment_score_df.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "af971dbc-6a19-44d9-a138-6d68d2579d2a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TickerDateReturnEvent
0XLB2021-09-14-0.0110Event 1
1XLE2021-09-14-0.0140Event 1
2XLF2021-09-14-0.0134Event 1
3XLI2021-09-14-0.0120Event 1
4XLK2021-09-14-0.0010Event 1
...............
3746XLRE2023-01-200.0116Event 10
3747XLU2023-01-200.0060Event 10
3748XLV2023-01-200.0050Event 10
3749XLY2023-01-200.0242Event 10
3750XLC2023-01-200.0313Event 10
\n", "

3751 rows × 4 columns

\n", "
" ], "text/plain": [ " Ticker Date Return Event\n", "0 XLB 2021-09-14 -0.0110 Event 1\n", "1 XLE 2021-09-14 -0.0140 Event 1\n", "2 XLF 2021-09-14 -0.0134 Event 1\n", "3 XLI 2021-09-14 -0.0120 Event 1\n", "4 XLK 2021-09-14 -0.0010 Event 1\n", "... ... ... ... ...\n", "3746 XLRE 2023-01-20 0.0116 Event 10\n", "3747 XLU 2023-01-20 0.0060 Event 10\n", "3748 XLV 2023-01-20 0.0050 Event 10\n", "3749 XLY 2023-01-20 0.0242 Event 10\n", "3750 XLC 2023-01-20 0.0313 Event 10\n", "\n", "[3751 rows x 4 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "returns" ] }, { "cell_type": "code", "execution_count": 4, "id": "a7d95f52-d112-402e-9cc1-f7750bf7968d", "metadata": { "tags": [] }, "outputs": [], "source": [ "announce_date = pd.DataFrame({'Date': ['2021-09-24', '2021-11-09', '2021-11-29', '2022-03-21', '2022-05-24', '2022-06-17', '2022-08-26', '2022-09-28', '2022-11-30', '2023-01-11']})\n", "announce_date['Date'] = pd.to_datetime(announce_date['Date'])" ] }, { "cell_type": "code", "execution_count": 5, "id": "87154e60-3769-4534-8624-7412fceb394d", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Date Return_mean Return_Standard_Deviation SentimentScore\n", "0 2021-09-24 0.936924 -0.298629 8\n", "1 2021-11-09 -3.196928 -0.136439 26\n", "2 2021-11-29 0.069060 0.400765 21\n", "3 2022-03-21 -0.106188 0.005310 2\n", "4 2022-05-24 0.520890 -0.200474 28\n", "5 2022-06-17 1.815041 0.313768 9\n", "6 2022-08-26 1.290273 -0.577637 18\n", "7 2022-09-28 -3.113639 -0.364481 12\n", "8 2022-11-30 1.642191 0.699407 28\n", " Return_mean Return_Standard_Deviation \\\n", "Return_mean 1.000000 0.349404 \n", "Return_Standard_Deviation 0.349404 1.000000 \n", "SentimentScore -0.063444 0.227383 \n", "\n", " SentimentScore \n", "Return_mean -0.063444 \n", "Return_Standard_Deviation 0.227383 \n", "SentimentScore 1.000000 \n" ] } ], "source": [ "window = 10\n", "event_dates = ['2021-09-24', '2021-11-09', '2021-11-29', '2022-03-21', '2022-05-24', '2022-06-17', '2022-08-26', '2022-09-28', '2022-11-30', '2023-01-11']\n", "event_dates = [pd.Timestamp(date) for date in event_dates]\n", "returns['Date'] = pd.to_datetime(returns['Date'])\n", "\n", "for event_date in event_dates:\n", " before_event = returns[returns['Date'] < event_date].tail(window)\n", " after_event = returns[returns['Date'] > event_date].head(window)\n", "\n", " mean_before = before_event['Return'].mean()\n", " std_before = before_event['Return'].std()\n", " mean_after = after_event['Return'].mean()\n", " std_after = after_event['Return'].std()\n", "\n", " return_mean = (mean_before-mean_after)/mean_before\n", " return_std = (std_before-std_after)/std_before\n", " announce_date.loc[announce_date['Date'] == event_date, 'Return_mean'] = return_mean\n", " announce_date.loc[announce_date['Date'] == event_date, 'Return_Standard_Deviation'] = return_std\n", "\n", "sentiment_score_df['Date'] = pd.to_datetime(sentiment_score_df['Date'])\n", "merged_df = pd.merge(announce_date, sentiment_score_df, on='Date')\n", "print(merged_df)\n", "\n", "corr_matrix = merged_df[['Return_mean', 'Return_Standard_Deviation', 'SentimentScore']].corr()\n", "print(corr_matrix)\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "bdafdb40-eff7-4cda-bb4e-856f6f045c49", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Ticker Date Return_mean Return_std SentimentScore\n", "0 XLB 2021-09-24 1.055172 0.228157 8\n", "1 XLB 2021-11-09 1.270270 0.368564 26\n", "2 XLB 2021-11-29 1.314286 -1.130570 21\n", "3 XLB 2022-03-21 0.230769 0.456828 2\n", "4 XLB 2022-05-24 -0.320000 -0.044449 28\n", ".. ... ... ... ... ...\n", "94 XLC 2022-05-24 -2.015306 0.120532 28\n", "95 XLC 2022-06-17 1.334322 0.162240 9\n", "96 XLC 2022-08-26 1.562500 0.064757 18\n", "97 XLC 2022-09-28 0.558089 -1.031141 12\n", "98 XLC 2022-11-30 -2.878378 -0.194767 28\n", "\n", "[99 rows x 5 columns]\n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Assuming you have the returns, announce_date, and sentiment_score_df dataframes\n", "\n", "window = 10\n", "event_dates = ['2021-09-24', '2021-11-09', '2021-11-29', '2022-03-21', '2022-05-24', '2022-06-17', '2022-08-26', '2022-09-28', '2022-11-30', '2023-01-11']\n", "event_dates = [pd.Timestamp(date) for date in event_dates]\n", "returns['Date'] = pd.to_datetime(returns['Date'])\n", "\n", "# Get unique tickers\n", "tickers = returns['Ticker'].unique()\n", "\n", "# Create an empty DataFrame to store the results\n", "results = []\n", "\n", "for ticker in tickers:\n", " ticker_returns = returns[returns['Ticker'] == ticker]\n", " for event_date in event_dates:\n", " before_event = ticker_returns[ticker_returns['Date'] < event_date].tail(window)\n", " after_event = ticker_returns[ticker_returns['Date'] > event_date].head(window)\n", "\n", " mean_before = before_event['Return'].mean()\n", " std_before = before_event['Return'].std()\n", " mean_after = after_event['Return'].mean()\n", " std_after = after_event['Return'].std()\n", "\n", " if mean_before != 0:\n", " return_mean = (mean_before - mean_after) / mean_before\n", " else:\n", " return_mean = None\n", "\n", " if std_before != 0:\n", " return_std = (std_before - std_after) / std_before\n", " else:\n", " return_std = None\n", "\n", " results.append({'Ticker': ticker, 'Date': event_date, 'Return_mean': return_mean, 'Return_std': return_std})\n", "\n", "# Convert the results list to a DataFrame\n", "results_df = pd.DataFrame(results)\n", "\n", "# Create a DataFrame with sentiment scores for each ticker\n", "sentiment_score_df['Date'] = pd.to_datetime(sentiment_score_df['Date'])\n", "sentiment_scores_tickers = pd.concat([sentiment_score_df.assign(Ticker=ticker) for ticker in tickers], ignore_index=True)\n", "\n", "# Merge sentiment scores with the results DataFrame\n", "merged_df = pd.merge(results_df, sentiment_scores_tickers, on=['Ticker', 'Date'])\n", "print(merged_df)\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "9aa57c3b-c14a-40ba-bcba-b220c653b593", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Group the merged DataFrame by Ticker and calculate the correlation matrix for each group\n", "grouped = merged_df.groupby('Ticker')\n", "\n", "# Initialize an empty list to store the correlation results\n", "ticker_correlations = []\n", "\n", "# Loop through each group (ticker) and calculate the correlation matrix\n", "for ticker, group in grouped:\n", " corr_matrix = group[['Return_mean', 'Return_std', 'SentimentScore']].corr()\n", " corr_matrix['Ticker'] = ticker\n", " ticker_correlations.append(corr_matrix)\n", "\n", "# Concatenate the list of DataFrames into a single DataFrame\n", "ticker_correlations = pd.concat(ticker_correlations)\n", "\n", "# Reset the index and filter out unnecessary columns\n", "ticker_correlations.reset_index(inplace=True)\n", "ticker_correlations = ticker_correlations[['Ticker', 'index', 'Return_mean', 'Return_std', 'SentimentScore']]\n", "\n", "# Rename the columns for better readability\n", "ticker_correlations.columns = ['Ticker', 'Metric', 'Return_mean_corr', 'Return_std_corr', 'SentimentScore_corr']\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "602b86ab-5645-4aa6-bb1f-35d5b36aedd7", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TickerMetricReturn_mean_corrReturn_std_corrSentimentScore_corr
0XLBReturn_mean1.000000-0.250130-0.411511
1XLBReturn_std-0.2501301.000000-0.160990
2XLBSentimentScore-0.411511-0.1609901.000000
3XLCReturn_mean1.000000-0.323311-0.033343
4XLCReturn_std-0.3233111.000000-0.168661
5XLCSentimentScore-0.033343-0.1686611.000000
6XLEReturn_mean1.000000-0.015576-0.451910
7XLEReturn_std-0.0155761.000000-0.392416
8XLESentimentScore-0.451910-0.3924161.000000
9XLFReturn_mean1.000000-0.1725280.099480
10XLFReturn_std-0.1725281.000000-0.184279
11XLFSentimentScore0.099480-0.1842791.000000
12XLIReturn_mean1.0000000.2111600.517600
13XLIReturn_std0.2111601.000000-0.067749
14XLISentimentScore0.517600-0.0677491.000000
15XLKReturn_mean1.0000000.2495750.014031
16XLKReturn_std0.2495751.000000-0.113968
17XLKSentimentScore0.014031-0.1139681.000000
18XLPReturn_mean1.000000-0.245176-0.098916
19XLPReturn_std-0.2451761.0000000.120359
20XLPSentimentScore-0.0989160.1203591.000000
21XLREReturn_mean1.0000000.1919160.621911
22XLREReturn_std0.1919161.0000000.419158
23XLRESentimentScore0.6219110.4191581.000000
24XLUReturn_mean1.0000000.443714-0.576991
25XLUReturn_std0.4437141.0000000.273541
26XLUSentimentScore-0.5769910.2735411.000000
27XLVReturn_mean1.000000-0.451140-0.231532
28XLVReturn_std-0.4511401.0000000.075094
29XLVSentimentScore-0.2315320.0750941.000000
30XLYReturn_mean1.0000000.0727560.043473
31XLYReturn_std0.0727561.000000-0.019665
32XLYSentimentScore0.043473-0.0196651.000000
\n", "
" ], "text/plain": [ " Ticker Metric Return_mean_corr Return_std_corr \\\n", "0 XLB Return_mean 1.000000 -0.250130 \n", "1 XLB Return_std -0.250130 1.000000 \n", "2 XLB SentimentScore -0.411511 -0.160990 \n", "3 XLC Return_mean 1.000000 -0.323311 \n", "4 XLC Return_std -0.323311 1.000000 \n", "5 XLC SentimentScore -0.033343 -0.168661 \n", "6 XLE Return_mean 1.000000 -0.015576 \n", "7 XLE Return_std -0.015576 1.000000 \n", "8 XLE SentimentScore -0.451910 -0.392416 \n", "9 XLF Return_mean 1.000000 -0.172528 \n", "10 XLF Return_std -0.172528 1.000000 \n", "11 XLF SentimentScore 0.099480 -0.184279 \n", "12 XLI Return_mean 1.000000 0.211160 \n", "13 XLI Return_std 0.211160 1.000000 \n", "14 XLI SentimentScore 0.517600 -0.067749 \n", "15 XLK Return_mean 1.000000 0.249575 \n", "16 XLK Return_std 0.249575 1.000000 \n", "17 XLK SentimentScore 0.014031 -0.113968 \n", "18 XLP Return_mean 1.000000 -0.245176 \n", "19 XLP Return_std -0.245176 1.000000 \n", "20 XLP SentimentScore -0.098916 0.120359 \n", "21 XLRE Return_mean 1.000000 0.191916 \n", "22 XLRE Return_std 0.191916 1.000000 \n", "23 XLRE SentimentScore 0.621911 0.419158 \n", "24 XLU Return_mean 1.000000 0.443714 \n", "25 XLU Return_std 0.443714 1.000000 \n", "26 XLU SentimentScore -0.576991 0.273541 \n", "27 XLV Return_mean 1.000000 -0.451140 \n", "28 XLV Return_std -0.451140 1.000000 \n", "29 XLV SentimentScore -0.231532 0.075094 \n", "30 XLY Return_mean 1.000000 0.072756 \n", "31 XLY Return_std 0.072756 1.000000 \n", "32 XLY SentimentScore 0.043473 -0.019665 \n", "\n", " SentimentScore_corr \n", "0 -0.411511 \n", "1 -0.160990 \n", "2 1.000000 \n", "3 -0.033343 \n", "4 -0.168661 \n", "5 1.000000 \n", "6 -0.451910 \n", "7 -0.392416 \n", "8 1.000000 \n", "9 0.099480 \n", "10 -0.184279 \n", "11 1.000000 \n", "12 0.517600 \n", "13 -0.067749 \n", "14 1.000000 \n", "15 0.014031 \n", "16 -0.113968 \n", "17 1.000000 \n", "18 -0.098916 \n", "19 0.120359 \n", "20 1.000000 \n", "21 0.621911 \n", "22 0.419158 \n", "23 1.000000 \n", "24 -0.576991 \n", "25 0.273541 \n", "26 1.000000 \n", "27 -0.231532 \n", "28 0.075094 \n", "29 1.000000 \n", "30 0.043473 \n", "31 -0.019665 \n", "32 1.000000 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ticker_correlations" ] }, { "cell_type": "code", "execution_count": 9, "id": "09ae8e06-d68d-4e10-8bf8-79079b7c7f8a", "metadata": { "tags": [] }, "outputs": [], "source": [ "ticker_correlations.to_csv('analysis csv file/ticker_correlations.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }