import pandas as pd
import seaborn
import matplotlib.pyplot as plt
import datetime as dt
from datetime import date
from dateutil.relativedelta import relativedelta
!pip install yahoo_fin
from yahoo_fin import stock_info as si
def stock_data(stock_name):
df = si.get_data(stock_name, start_date=(date.today() - relativedelta(years=3)).strftime('%m/%d/%Y'),
end_date=date.today().strftime('%m/%d/%Y'))
return df
ticker='HD'
company_name='Home Depot'
df = stock_data(ticker)
df.head()
open | high | low | close | adjclose | volume | ticker | |
---|---|---|---|---|---|---|---|
2020-06-08 | 252.490005 | 256.809998 | 252.259995 | 256.769989 | 239.264954 | 3811900 | HD |
2020-06-09 | 255.330002 | 258.290009 | 253.860001 | 256.760010 | 239.255646 | 3716100 | HD |
2020-06-10 | 257.450012 | 259.290009 | 254.220001 | 254.449997 | 237.103104 | 3589800 | HD |
2020-06-11 | 248.860001 | 250.619995 | 238.740005 | 239.470001 | 223.144394 | 6563700 | HD |
2020-06-12 | 243.070007 | 246.389999 | 237.050003 | 242.449997 | 225.921204 | 5238500 | HD |
df.tail()
open | high | low | close | adjclose | volume | ticker | |
---|---|---|---|---|---|---|---|
2023-05-31 | 289.589996 | 290.000000 | 281.959991 | 283.450012 | 283.450012 | 18288800 | HD |
2023-06-01 | 284.049988 | 289.220001 | 279.980011 | 288.390015 | 288.390015 | 4305100 | HD |
2023-06-02 | 290.649994 | 296.209991 | 289.720001 | 295.940002 | 295.940002 | 4514600 | HD |
2023-06-05 | 295.619995 | 295.720001 | 291.369995 | 293.100006 | 293.100006 | 3028800 | HD |
2023-06-06 | 291.820007 | 296.920013 | 291.649994 | 296.000000 | 296.000000 | 2854800 | HD |
#Add target values for two weeks and one month.
#Dataset has values for weekdays only, so two weeks will be 10 data points
# and one month will be 20 data points
df['TwoWeeks'] = df['close'].rolling(10).mean()
df['Month'] = df['close'].rolling(20).mean()
df.head()
open | high | low | close | adjclose | volume | ticker | TwoWeeks | Month | |
---|---|---|---|---|---|---|---|---|---|
2020-06-08 | 252.490005 | 256.809998 | 252.259995 | 256.769989 | 239.264938 | 3811900 | HD | NaN | NaN |
2020-06-09 | 255.330002 | 258.290009 | 253.860001 | 256.760010 | 239.255646 | 3716100 | HD | NaN | NaN |
2020-06-10 | 257.450012 | 259.290009 | 254.220001 | 254.449997 | 237.103119 | 3589800 | HD | NaN | NaN |
2020-06-11 | 248.860001 | 250.619995 | 238.740005 | 239.470001 | 223.144379 | 6563700 | HD | NaN | NaN |
2020-06-12 | 243.070007 | 246.389999 | 237.050003 | 242.449997 | 225.921188 | 5238500 | HD | NaN | NaN |
df.dropna(inplace=True)
df.head()
open | high | low | close | adjclose | volume | ticker | TwoWeeks | Month | |
---|---|---|---|---|---|---|---|---|---|
2020-07-06 | 250.270004 | 251.500000 | 247.039993 | 249.550003 | 232.537186 | 3133800 | HD | 247.481999 | 248.1370 |
2020-07-07 | 247.369995 | 250.779999 | 247.070007 | 247.350006 | 230.487167 | 2927800 | HD | 247.300999 | 247.6660 |
2020-07-08 | 247.869995 | 249.789993 | 246.220001 | 249.169998 | 232.183075 | 2294000 | HD | 247.187000 | 247.2865 |
2020-07-09 | 249.660004 | 250.509995 | 246.350006 | 247.960007 | 231.055588 | 2994700 | HD | 247.370000 | 246.9620 |
2020-07-10 | 248.289993 | 250.330002 | 246.639999 | 250.110001 | 233.058990 | 2745300 | HD | 247.842999 | 247.4940 |
!pip install pytrends
import pytrends
from pytrends.request import TrendReq
pytrends = TrendReq()
kw_list=['Home Depot']
#Gather Google trends for ticker 'HD' and Home Depot
pytrends.build_payload(kw_list, geo='', timeframe='{} {}'.format((date.today() - relativedelta(years=3)).strftime('%Y-%m-%d'),date.today().strftime('%Y-%m-%d')))#timeframe='2010-06-29 2023-05-16')
keyword_interest = pytrends.interest_over_time()
del keyword_interest['isPartial']
keyword_interest.columns.rename("{}".format(ticker))
keyword_interest.head()
Home Depot | |
---|---|
date | |
2020-06-07 | 100 |
2020-06-14 | 100 |
2020-06-21 | 96 |
2020-06-28 | 97 |
2020-07-05 | 90 |
keyword_interest['Home Depot'].plot(title='Google Trends for \'Home Depot\' in the last three years')
<AxesSubplot: title={'center': "Google Trends for 'Home Depot' in the last three years"}>
df_combined = pd.concat([df, keyword_interest], axis=1)
#df_combined.dropna(subset='Month', inplace=True)
df_combined.head(20)
open | high | low | close | adjclose | volume | ticker | TwoWeeks | Month | Home Depot | |
---|---|---|---|---|---|---|---|---|---|---|
2020-06-07 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 100.0 |
2020-06-14 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 100.0 |
2020-06-21 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 96.0 |
2020-06-28 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 97.0 |
2020-07-05 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 90.0 |
2020-07-06 | 250.270004 | 251.500000 | 247.039993 | 249.550003 | 232.537186 | 3133800.0 | HD | 247.481999 | 248.137000 | NaN |
2020-07-07 | 247.369995 | 250.779999 | 247.070007 | 247.350006 | 230.487167 | 2927800.0 | HD | 247.300999 | 247.666000 | NaN |
2020-07-08 | 247.869995 | 249.789993 | 246.220001 | 249.169998 | 232.183075 | 2294000.0 | HD | 247.187000 | 247.286500 | NaN |
2020-07-09 | 249.660004 | 250.509995 | 246.350006 | 247.960007 | 231.055588 | 2994700.0 | HD | 247.370000 | 246.962000 | NaN |
2020-07-10 | 248.289993 | 250.330002 | 246.639999 | 250.110001 | 233.058990 | 2745300.0 | HD | 247.842999 | 247.494000 | NaN |
2020-07-12 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 86.0 |
2020-07-13 | 251.919998 | 257.859985 | 249.089996 | 249.619995 | 232.602386 | 4437500.0 | HD | 248.703999 | 247.852500 | NaN |
2020-07-14 | 249.000000 | 258.179993 | 248.460007 | 257.790009 | 240.215408 | 4614200.0 | HD | 249.871001 | 248.674001 | NaN |
2020-07-15 | 260.140015 | 261.290009 | 255.149994 | 257.799988 | 240.224716 | 4343700.0 | HD | 250.600000 | 249.066500 | NaN |
2020-07-16 | 256.760010 | 260.500000 | 256.000000 | 258.079987 | 240.485641 | 2511200.0 | HD | 251.592999 | 249.427999 | NaN |
2020-07-17 | 260.029999 | 260.649994 | 257.720001 | 260.380005 | 242.628845 | 3091300.0 | HD | 252.781000 | 249.986499 | NaN |
2020-07-19 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 85.0 |
2020-07-20 | 259.040009 | 261.200012 | 258.019989 | 260.170013 | 242.433228 | 2451100.0 | HD | 253.843001 | 250.662500 | NaN |
2020-07-21 | 261.619995 | 263.869995 | 260.720001 | 262.420013 | 244.529800 | 2439900.0 | HD | 255.350002 | 251.325500 | NaN |
2020-07-22 | 262.589996 | 265.589996 | 262.000000 | 265.170013 | 247.092316 | 2750000.0 | HD | 256.950003 | 252.068501 | NaN |
Google Trends give values for Sundays only when the data is pulled in years. I will move Sunday values to the week after since we are concerned with trends rather than daily values.
df_combined['Home Depot'] = df_combined['Home Depot'].fillna(method='ffill')
df_combined.dropna(inplace=True)
df_combined.head(30)
open | high | low | close | adjclose | volume | ticker | TwoWeeks | Month | Home Depot | |
---|---|---|---|---|---|---|---|---|---|---|
2020-07-06 | 250.270004 | 251.500000 | 247.039993 | 249.550003 | 232.537186 | 3133800.0 | HD | 247.481999 | 248.137000 | 90.0 |
2020-07-07 | 247.369995 | 250.779999 | 247.070007 | 247.350006 | 230.487167 | 2927800.0 | HD | 247.300999 | 247.666000 | 90.0 |
2020-07-08 | 247.869995 | 249.789993 | 246.220001 | 249.169998 | 232.183075 | 2294000.0 | HD | 247.187000 | 247.286500 | 90.0 |
2020-07-09 | 249.660004 | 250.509995 | 246.350006 | 247.960007 | 231.055588 | 2994700.0 | HD | 247.370000 | 246.962000 | 90.0 |
2020-07-10 | 248.289993 | 250.330002 | 246.639999 | 250.110001 | 233.058990 | 2745300.0 | HD | 247.842999 | 247.494000 | 90.0 |
2020-07-13 | 251.919998 | 257.859985 | 249.089996 | 249.619995 | 232.602386 | 4437500.0 | HD | 248.703999 | 247.852500 | 86.0 |
2020-07-14 | 249.000000 | 258.179993 | 248.460007 | 257.790009 | 240.215408 | 4614200.0 | HD | 249.871001 | 248.674001 | 86.0 |
2020-07-15 | 260.140015 | 261.290009 | 255.149994 | 257.799988 | 240.224716 | 4343700.0 | HD | 250.600000 | 249.066500 | 86.0 |
2020-07-16 | 256.760010 | 260.500000 | 256.000000 | 258.079987 | 240.485641 | 2511200.0 | HD | 251.592999 | 249.427999 | 86.0 |
2020-07-17 | 260.029999 | 260.649994 | 257.720001 | 260.380005 | 242.628845 | 3091300.0 | HD | 252.781000 | 249.986499 | 86.0 |
2020-07-20 | 259.040009 | 261.200012 | 258.019989 | 260.170013 | 242.433228 | 2451100.0 | HD | 253.843001 | 250.662500 | 85.0 |
2020-07-21 | 261.619995 | 263.869995 | 260.720001 | 262.420013 | 244.529800 | 2439900.0 | HD | 255.350002 | 251.325500 | 85.0 |
2020-07-22 | 262.589996 | 265.589996 | 262.000000 | 265.170013 | 247.092316 | 2750000.0 | HD | 256.950003 | 252.068501 | 85.0 |
2020-07-23 | 267.799988 | 267.799988 | 261.799988 | 263.809998 | 245.824997 | 2680100.0 | HD | 258.535002 | 252.952501 | 85.0 |
2020-07-24 | 265.040009 | 266.890015 | 262.989990 | 265.309998 | 247.222717 | 2984500.0 | HD | 260.055002 | 253.949001 | 85.0 |
2020-07-27 | 265.089996 | 268.679993 | 265.089996 | 267.420013 | 249.188919 | 2412500.0 | HD | 261.835004 | 255.269501 | 85.0 |
2020-07-28 | 268.559998 | 269.070007 | 264.670013 | 265.279999 | 247.194794 | 2227000.0 | HD | 262.584003 | 256.227502 | 85.0 |
2020-07-29 | 264.799988 | 267.109985 | 264.170013 | 264.660004 | 246.617065 | 2874100.0 | HD | 263.270004 | 256.935002 | 85.0 |
2020-07-30 | 263.339996 | 267.350006 | 261.549988 | 266.309998 | 248.154556 | 2347800.0 | HD | 264.093005 | 257.843002 | 85.0 |
2020-07-31 | 265.000000 | 267.170013 | 260.609985 | 265.489990 | 247.390457 | 3640600.0 | HD | 264.604004 | 258.692502 | 85.0 |
2020-08-03 | 266.730011 | 268.579987 | 265.670013 | 266.179993 | 248.033463 | 2363500.0 | HD | 265.205002 | 259.524001 | 80.0 |
2020-08-04 | 266.630005 | 267.890015 | 263.839996 | 267.869995 | 249.608231 | 2224000.0 | HD | 265.750000 | 260.550001 | 80.0 |
2020-08-05 | 268.390015 | 268.390015 | 265.890015 | 267.480011 | 249.244797 | 1959900.0 | HD | 265.981000 | 261.465501 | 80.0 |
2020-08-06 | 266.600006 | 270.440002 | 266.529999 | 269.369995 | 251.005936 | 2203400.0 | HD | 266.537000 | 262.536001 | 80.0 |
2020-08-07 | 270.609985 | 274.920013 | 269.809998 | 271.640015 | 253.121140 | 2846300.0 | HD | 267.170001 | 263.612502 | 80.0 |
2020-08-10 | 272.420013 | 275.000000 | 271.799988 | 274.730011 | 256.000580 | 2393100.0 | HD | 267.901001 | 264.868002 | 79.0 |
2020-08-11 | 277.690002 | 279.369995 | 274.410004 | 274.920013 | 256.177612 | 3321300.0 | HD | 268.865002 | 265.724503 | 79.0 |
2020-08-12 | 279.750000 | 282.970001 | 276.959991 | 281.579987 | 262.383514 | 3867900.0 | HD | 270.557001 | 266.913503 | 79.0 |
2020-08-13 | 281.160004 | 282.649994 | 279.739990 | 281.660004 | 262.458069 | 2202400.0 | HD | 272.092001 | 268.092503 | 79.0 |
2020-08-14 | 281.140015 | 282.000000 | 279.190002 | 280.549988 | 261.423767 | 2490400.0 | HD | 273.598001 | 269.101003 | 79.0 |
!pip3 install git+https://github.com/JustAnotherArchivist/snscrape.git
kw_list
['HD', 'Home Depot']
# The following code collects data related to 'HD' and Home Depot from
# "markets" and "MarketWatch" twitter accounts published in the last three years
import snscrape.modules.twitter as sntwitter
import pandas as pd
import datetime as dt
from datetime import date
# Creating list to append tweet data to
attributes_container = []
sources_all =['CNBC','cnn', 'cnnbrk','MarketWatch', 'Benzinga', 'Stocktwits','BreakoutStocks',
'bespokeinvest','WSJMarkets','Stephanie_Link','nytimesbusiness','IBDinvestors','WSJDealJournal',
'business', 'TheEconomist','WSJ', 'ABC', 'CBSNews','FoxNews', 'NBCNews']
sources=['markets','MarketWatch']
for tweet in sntwitter.TwitterSearchScraper('from:{} since:{} until:{}'.format(sources[0], (date.today() - relativedelta(years=3)).strftime('%Y-%m-%d'), date.today().strftime('%Y-%m-%d'))).get_items():
if 'Home Depot'.lower() in tweet.content.lower():
attributes_container.append([tweet.date, tweet.content.split('http')[0]])
tweets_df = pd.DataFrame(attributes_container, columns=["Date Created", "Tweets"])
tweets_df.drop_duplicates(subset='Tweets',inplace=True)
attributes_container = []
for tweet in sntwitter.TwitterSearchScraper('from:{} since:{} until:{}'.format(sources[1], (date.today() - relativedelta(years=3)).strftime('%Y-%m-%d'), date.today().strftime('%Y-%m-%d'))).get_items():
if 'Home Depot'.lower() in tweet.content.lower():
attributes_container.append([tweet.date, tweet.content.split('http')[0]])
# Creating a dataframe from the tweets list above
tweets_df_marketwatch = pd.DataFrame(attributes_container, columns=["Date Created", "Tweets"])
print(tweets_df_marketwatch.shape)
tweets_df_marketwatch.drop_duplicates(subset='Tweets',inplace=True)
pd.set_option("max_colwidth", None)
tweets_df.head()
Date Created | Tweets | |
---|---|---|
0 | 2023-03-01 18:05:43+00:00 | From Meta to Home Depot, corporate America is talking about AI on earnings calls |
1 | 2023-02-21 19:06:41+00:00 | Home Depot, Walmart and DocuSign.\n\n@RitikaGuptaTV has your stocks to watch this Tuesday |
2 | 2023-02-21 15:15:07+00:00 | Home Depot forecasts a fiscal-year profit decline and announces plans for a $1 billion wage investment for hourly workers |
3 | 2022-11-15 18:11:03+00:00 | Home Depot reported profit that beat expectations, with the CEO saying consumers are staying resilient. But a drop in transactions volume has investors concerned |
4 | 2022-08-16 15:09:26+00:00 | Home Depot’s second-quarter results beat Wall Street estimates even as the US housing market shows signs of cooling off |
tweets_df_marketwatch.shape
(1159, 3)
!pip install pygooglenews
kw_list
['Home Depot']
import pygooglenews
from pygooglenews import GoogleNews
gn = GoogleNews()
headlines_related = []
date_list = pd.date_range(end=date.today().strftime('%Y-%m-%d'), start=(date.today()-relativedelta(years=3)).strftime('%Y-%m-%d')).tolist()
clean_date = [str(i).split(" ")[0] for i in date_list]
for date in clean_date:
headlines = []
for word in kw_list:
search =gn.search(word,when=date)
for item in search['entries']:
headlines.append(item['title'])
#We have headlines of the news whose body may contain one of the keywords.
#We will use only headlines to make predictions but some headlines do not contain any of the keywords.
#Only include headlines that has one of the keywords
for headline in headlines:
for i in range(len(kw_list)):
if kw_list[i] in headline:
if headline not in headlines_related:
headlines_related.append((date,headline))
len(headlines_related)
1739
headlines_related[:10]
[('2020-06-08', "Home Depot Father's Day sales: Save on DeWalt and Milwaukee ... - USA TODAY"), ('2020-06-17', "Here's the Defining Characteristic of Home Depot's Success - Nasdaq"), ('2020-06-17', 'Home Depot’s new outdoor tool sale takes up to 40% off RYOBI, DEWALT, more - 9to5Toys'), ('2020-06-18', 'Home Depot’s Last Chance Father’s Day sale takes up to 35% off tools and more - 9to5Toys'), ('2020-06-24', '1 dead, 1 hurt in Home Depot parking lot crash in Costa Mesa - OCRegister'), ('2020-06-25', "'It's dehumanizing': Home Depot employee felt colleague's racist ... - Hamilton Spectator"), ('2020-06-30', 'Home Depot 4th of July sale: Shop top deals on DeWalt, Dyna-Glo ... - USA TODAY'), ('2020-06-30', 'The Home Depot Announces Renewable Energy Goal and Pledges ... - PR Newswire'), ('2020-07-01', 'Home Depot bans some rope sales after nooses were found tied on ... - Courier Journal'), ('2020-07-02', 'Home Depot changes rope sales practice after nooses are found in store - CNN')]
df_news = pd.DataFrame(headlines_related, columns=['Date', 'Headline'])
df_news['Headline'] = df_news['Headline'].apply(lambda x: x.split('-')[0])
df_news['Date'] = pd.to_datetime(df_news['Date'])
print(df_news.shape)
df_news.drop_duplicates(subset='Headline', inplace=True)
print(df_news.shape)
df_news.head()
(1739, 2) (1373, 2)
Date | Headline | |
---|---|---|
0 | 2020-06-08 | Home Depot Father's Day sales: Save on DeWalt and Milwaukee ... |
1 | 2020-06-17 | Here's the Defining Characteristic of Home Depot's Success |
2 | 2020-06-17 | Home Depot’s new outdoor tool sale takes up to 40% off RYOBI, DEWALT, more |
3 | 2020-06-18 | Home Depot’s Last Chance Father’s Day sale takes up to 35% off tools and more |
4 | 2020-06-24 | 1 dead, 1 hurt in Home Depot parking lot crash in Costa Mesa |
!pip install vaderSentiment
tweets_df_marketwatch.drop("User Name", axis=1, inplace=True)
#combine tweets coming from markets, and marketwatch
markets = pd.concat([tweets_df, tweets_df_marketwatch])
markets.shape
(1169, 2)
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
def vader(row):
comp = sid.polarity_scores(row)['compound']
return comp
import re
markets['Tweets'] = markets['Tweets'].apply(lambda x: re.sub("amp;","&", x))
markets['Tweets'] = markets['Tweets'].apply(lambda x: re.sub("\n"," ", x))
markets['Tweets'] = markets['Tweets'].apply(lambda x: re.sub("\xa0"," ", x))
markets.head()
Date Created | Tweets | |
---|---|---|
0 | 2023-03-01 18:05:43+00:00 | From Meta to Home Depot, corporate America is talking about AI on earnings calls |
1 | 2023-02-21 19:06:41+00:00 | Home Depot, Walmart and DocuSign. @RitikaGuptaTV has your stocks to watch this Tuesday |
2 | 2023-02-21 15:15:07+00:00 | Home Depot forecasts a fiscal-year profit decline and announces plans for a $1 billion wage investment for hourly workers |
3 | 2022-11-15 18:11:03+00:00 | Home Depot reported profit that beat expectations, with the CEO saying consumers are staying resilient. But a drop in transactions volume has investors concerned |
4 | 2022-08-16 15:09:26+00:00 | Home Depot’s second-quarter results beat Wall Street estimates even as the US housing market shows signs of cooling off |
markets['Date Created'] = markets['Date Created'].astype('str')
markets['Date Created'] = markets['Date Created'].apply(lambda x:x[:11])
markets.head()
Date Created | Tweets | |
---|---|---|
0 | 2023-03-01 | From Meta to Home Depot, corporate America is talking about AI on earnings calls |
1 | 2023-02-21 | Home Depot, Walmart and DocuSign. @RitikaGuptaTV has your stocks to watch this Tuesday |
2 | 2023-02-21 | Home Depot forecasts a fiscal-year profit decline and announces plans for a $1 billion wage investment for hourly workers |
3 | 2022-11-15 | Home Depot reported profit that beat expectations, with the CEO saying consumers are staying resilient. But a drop in transactions volume has investors concerned |
4 | 2022-08-16 | Home Depot’s second-quarter results beat Wall Street estimates even as the US housing market shows signs of cooling off |
markets['Date Created'] = pd.to_datetime(markets['Date Created'])
markets.sort_values(by='Date Created', inplace=True)
markets.index =range(markets.shape[0])
markets.head()
Date Created | Tweets | |
---|---|---|
0 | 2020-06-06 | What to make of the recent jobs numbers? Joseph Stiglitz says there's more than meets the eye, including those who aren't considered "unemployed," but aren't working. And without intervention, things may get worse, he says. WATCH: |
1 | 2020-06-08 | Bull, bear, bull, bear and now a new bull market — whatever’s next, these stocks will outperform, strategist says |
2 | 2020-06-09 | HD Supply misses on profit expectations but beats on sales |
3 | 2020-06-11 | Normally, Madrid swells with tourists. Our editor @bkollmeyer writes about how locals are reclaiming their city in a summer without travel. |
4 | 2020-06-11 | Should I tell my sister that her husband, a notorious spender, has a secret credit card? |
markets['VaderSent'] = markets['Tweets'].apply(vader)
markets
Date Created | Tweets | VaderSent | |
---|---|---|---|
0 | 2020-06-06 | What to make of the recent jobs numbers? Joseph Stiglitz says there's more than meets the eye, including those who aren't considered "unemployed," but aren't working. And without intervention, things may get worse, he says. WATCH: | -0.6310 |
1 | 2020-06-08 | Bull, bear, bull, bear and now a new bull market — whatever’s next, these stocks will outperform, strategist says | 0.0000 |
2 | 2020-06-09 | HD Supply misses on profit expectations but beats on sales | 0.1280 |
3 | 2020-06-11 | Normally, Madrid swells with tourists. Our editor @bkollmeyer writes about how locals are reclaiming their city in a summer without travel. | 0.0000 |
4 | 2020-06-11 | Should I tell my sister that her husband, a notorious spender, has a secret credit card? | -0.0772 |
... | ... | ... | ... |
1164 | 2023-05-30 | Trans designer in Target anti-LGBTQ+ backlash says he was ‘dealt the worst hand’ | -0.6249 |
1165 | 2023-05-31 | Brown-Forman to invest $200 million to expand Tequila distillery in Jalisco, Mexico | 0.3182 |
1166 | 2023-06-02 | Zelle and Chase working to resolve duplicate-payments issue | 0.3818 |
1167 | 2023-06-03 | The ‘best job in America’ pays over $120,000 a year, offers low stress, healthy work-life balance — and its workers are in high demand | -0.4019 |
1168 | 2023-06-05 | Cava sets IPO terms, as restaurant chain is set to be valued at more than $2 billion | 0.4404 |
1169 rows × 3 columns
#Vader gives a sentiment value between -1 and 1, -1 being the most negative,
#1 being the most positive and 0 being neutral. We will put them in the corresponding
# pos, neu or neg bins based on this value.
def bins(value):
hold_sent = []
if -1 <= value < -0.33:
hold_sent.append('neg')
if -0.33<=value<=0.33:
hold_sent.append('neu')
if 0.33 < value <= 1:
hold_sent.append('pos')
return hold_sent
markets['sentiment'] = markets['VaderSent'].apply(bins)
values=[]
for i in markets['sentiment'].values:
values.append(i[0])
markets['sentiment']=values
markets.head(10)
Date Created | Tweets | VaderSent | sentiment | |
---|---|---|---|---|
0 | 2020-06-06 | What to make of the recent jobs numbers? Joseph Stiglitz says there's more than meets the eye, including those who aren't considered "unemployed," but aren't working. And without intervention, things may get worse, he says. WATCH: | -0.6310 | neg |
1 | 2020-06-08 | Bull, bear, bull, bear and now a new bull market — whatever’s next, these stocks will outperform, strategist says | 0.0000 | neu |
2 | 2020-06-09 | HD Supply misses on profit expectations but beats on sales | 0.1280 | neu |
3 | 2020-06-11 | Normally, Madrid swells with tourists. Our editor @bkollmeyer writes about how locals are reclaiming their city in a summer without travel. | 0.0000 | neu |
4 | 2020-06-11 | Should I tell my sister that her husband, a notorious spender, has a secret credit card? | -0.0772 | neu |
5 | 2020-06-12 | Palantir Technologies Inc. is reportedly preparing to confidentially file for its long-awaited IPO. | 0.0000 | neu |
6 | 2020-06-15 | It's a well-known secret of Wall Street: Little business actually takes place in New York. We spoke to several Wall Street road warriors to learn what the past 3 months have been like when they can't fly anywhere. | 0.3612 | pos |
7 | 2020-06-16 | Dear airline passengers: Wear your face mask or you might get banned from flying. | -0.1027 | neu |
8 | 2020-06-18 | Facebook takes down Trump-Pence ads featuring symbols previously used by Nazis | 0.0000 | neu |
9 | 2020-06-18 | Dow opens with 170 point drop as jobless claims stay elevated | -0.2732 | neu |
#Each week determine the weight of positive, neutral and negative tweets:
tweets_weight = pd.DataFrame(markets.groupby([pd.Grouper(key='Date Created', freq='W')])['sentiment'], columns=['Date','Data'])
tweets_weight.head()
for i in range(len(tweets_weight)):
n = dict(tweets_weight.loc[i,'Data'].value_counts(normalize=True))
if 'neg' not in n:
n['neg']=0
if 'pos' not in n:
n['pos']=0
if 'neu' not in n:
n['neu']=0
tweets_weight.loc[i,'tweet_pos'] = n['pos']
tweets_weight.loc[i,'tweet_neg'] = n['neg']
tweets_weight.loc[i,'tweet_neu'] = n['neu']
tweets_weight
Date | Data | tweet_pos | tweet_neg | tweet_neu | |
---|---|---|---|---|---|
0 | 2020-06-07 | 0 neg Name: sentiment, dtype: object | 0.000000 | 1.000000 | 0.000000 |
1 | 2020-06-14 | 1 neu 2 neu 3 neu 4 neu 5 neu Name: sentiment, dtype: object | 0.000000 | 0.000000 | 1.000000 |
2 | 2020-06-21 | 6 pos 7 neu 8 neu 9 neu 10 pos 11 neu Name: sentiment, dtype: object | 0.333333 | 0.000000 | 0.666667 |
3 | 2020-06-28 | 12 neu 13 neu 14 neu 15 neu 16 neg 17 neu 18 neu 19 neg 20 neu Name: sentiment, dtype: object | 0.000000 | 0.222222 | 0.777778 |
4 | 2020-07-05 | 21 neg 22 neg 23 neg 24 neg 25 pos 26 neu 27 neg 28 neu 29 neu 30 neu Name: sentiment, dtype: object | 0.100000 | 0.500000 | 0.400000 |
... | ... | ... | ... | ... | ... |
153 | 2023-05-14 | 1141 pos 1142 neu 1143 neu 1144 neu 1145 neg Name: sentiment, dtype: object | 0.200000 | 0.200000 | 0.600000 |
154 | 2023-05-21 | 1146 neu 1147 neu 1148 neu 1149 neu 1150 neu 1151 neu 1152 neg 1153 neu 1154 neg 1155 neu 1156 pos 1157 neu 1158 neu 1159 neu 1160 neu Name: sentiment, dtype: object | 0.066667 | 0.133333 | 0.800000 |
155 | 2023-05-28 | 1161 neg 1162 neg 1163 neg Name: sentiment, dtype: object | 0.000000 | 1.000000 | 0.000000 |
156 | 2023-06-04 | 1164 neg 1165 neu 1166 pos 1167 neg Name: sentiment, dtype: object | 0.250000 | 0.500000 | 0.250000 |
157 | 2023-06-11 | 1168 pos Name: sentiment, dtype: object | 1.000000 | 0.000000 | 0.000000 |
158 rows × 5 columns
tweets_weight.drop('Data', inplace=True, axis=1)
tweets_weight.head()
Date | tweet_pos | tweet_neg | tweet_neu | |
---|---|---|---|---|
0 | 2020-06-07 | 0.000000 | 1.000000 | 0.000000 |
1 | 2020-06-14 | 0.000000 | 0.000000 | 1.000000 |
2 | 2020-06-21 | 0.333333 | 0.000000 | 0.666667 |
3 | 2020-06-28 | 0.000000 | 0.222222 | 0.777778 |
4 | 2020-07-05 | 0.100000 | 0.500000 | 0.400000 |
tweets_weight.plot(x='Date', y=['tweet_pos','tweet_neu','tweet_neg'],title='Tweets\' Sentiment')
<AxesSubplot: title={'center': "Tweets' Sentiment"}, xlabel='Date'>
Tweets were mostly neutral in tone. Unsurprisingly, there is a negative correlation between positive and negative tweets.
df_news['VaderSent'] = df_news['Headline'].apply(vader)
df_news.head(10)
Date | Headline | VaderSent | |
---|---|---|---|
0 | 2020-06-08 | Home Depot Father's Day sales: Save on DeWalt and Milwaukee ... | 0.4939 |
1 | 2020-06-17 | Here's the Defining Characteristic of Home Depot's Success | 0.5719 |
2 | 2020-06-17 | Home Depot’s new outdoor tool sale takes up to 40% off RYOBI, DEWALT, more | 0.0000 |
3 | 2020-06-18 | Home Depot’s Last Chance Father’s Day sale takes up to 35% off tools and more | 0.2500 |
4 | 2020-06-24 | 1 dead, 1 hurt in Home Depot parking lot crash in Costa Mesa | -0.8860 |
5 | 2020-06-25 | 'It's dehumanizing': Home Depot employee felt colleague's racist ... | -0.8126 |
6 | 2020-06-30 | Home Depot 4th of July sale: Shop top deals on DeWalt, Dyna | 0.2023 |
7 | 2020-06-30 | The Home Depot Announces Renewable Energy Goal and Pledges ... | 0.2732 |
8 | 2020-07-01 | Home Depot bans some rope sales after nooses were found tied on ... | 0.0000 |
9 | 2020-07-02 | Home Depot changes rope sales practice after nooses are found in store | 0.0000 |
df_news['Date'] = pd.to_datetime(df_news['Date'])
df_news['sentiment'] = df_news['VaderSent'].apply(bins)
values=[]
for i in df_news['sentiment'].values:
values.append(i[0])
df_news['sentiment']=values
df_news.head(10)
Date | Headline | VaderSent | sentiment | |
---|---|---|---|---|
0 | 2020-06-08 | Home Depot Father's Day sales: Save on DeWalt and Milwaukee ... | 0.4939 | pos |
1 | 2020-06-17 | Here's the Defining Characteristic of Home Depot's Success | 0.5719 | pos |
2 | 2020-06-17 | Home Depot’s new outdoor tool sale takes up to 40% off RYOBI, DEWALT, more | 0.0000 | neu |
3 | 2020-06-18 | Home Depot’s Last Chance Father’s Day sale takes up to 35% off tools and more | 0.2500 | neu |
4 | 2020-06-24 | 1 dead, 1 hurt in Home Depot parking lot crash in Costa Mesa | -0.8860 | neg |
5 | 2020-06-25 | 'It's dehumanizing': Home Depot employee felt colleague's racist ... | -0.8126 | neg |
6 | 2020-06-30 | Home Depot 4th of July sale: Shop top deals on DeWalt, Dyna | 0.2023 | neu |
7 | 2020-06-30 | The Home Depot Announces Renewable Energy Goal and Pledges ... | 0.2732 | neu |
8 | 2020-07-01 | Home Depot bans some rope sales after nooses were found tied on ... | 0.0000 | neu |
9 | 2020-07-02 | Home Depot changes rope sales practice after nooses are found in store | 0.0000 | neu |
news_weight = pd.DataFrame(df_news.groupby([pd.Grouper(key='Date', freq='W')])['sentiment'], columns=['Date','Data'])
for i in range(len(news_weight)):
n = dict(news_weight.loc[i,'Data'].value_counts(normalize=True))
if 'neg' not in n:
n['neg']=0
if 'pos' not in n:
n['pos']=0
if 'neu' not in n:
n['neu']=0
news_weight.loc[i,'news_pos'] = n['pos']
news_weight.loc[i,'news_neg'] = n['neg']
news_weight.loc[i,'news_neu'] = n['neu']
news_weight
Date | Data | news_pos | news_neg | news_neu | |
---|---|---|---|---|---|
0 | 2020-06-14 | 0 pos Name: sentiment, dtype: object | 1.000000 | 0.000000 | 0.000000 |
1 | 2020-06-21 | 1 pos 2 neu 3 neu Name: sentiment, dtype: object | 0.333333 | 0.000000 | 0.666667 |
2 | 2020-06-28 | 4 neg 5 neg Name: sentiment, dtype: object | 0.000000 | 1.000000 | 0.000000 |
3 | 2020-07-05 | 6 neu 7 neu 8 neu 9 neu 10 neu 11 neg Name: sentiment, dtype: object | 0.000000 | 0.166667 | 0.833333 |
4 | 2020-07-12 | 12 neu 13 neu Name: sentiment, dtype: object | 0.000000 | 0.000000 | 1.000000 |
... | ... | ... | ... | ... | ... |
152 | 2023-05-14 | 1546 pos 1547 neu 1548 neu 1549 pos 1550 pos 1551 neu 1552 neu 1562 neu 1570 neu 1571 neg 1577 neu 1578 neu 1579 neu 1581 neu 1584 neg 1586 neu Name: sentiment, dtype: object | 0.187500 | 0.125000 | 0.687500 |
153 | 2023-05-21 | 1587 neg 1589 neu 1590 neu 1592 neu 1593 neg 1594 neg 1595 neu 1596 neg 1597 neu 1598 neu 1599 neu 1600 neu 1601 pos 1602 neu 1603 neu 1604 neu 1605 neu 1606 neu 1608 neu 1609 neg 1610 neu 1611 neg 1614 neg 1615 pos 1616 neu 1619 neu 1620 neu 1621 neu 1623 pos 1631 neu 1632 neu 1633 neu Name: sentiment, dtype: object | 0.093750 | 0.218750 | 0.687500 |
154 | 2023-05-28 | 1634 neu 1638 pos 1640 pos 1641 neu 1642 neu 1643 neg 1646 neu 1647 neu 1648 neu 1649 neg 1650 neu 1658 neu 1659 neg 1666 neu 1667 neu 1668 neu 1669 neu 1670 neu 1671 neg 1672 neu 1673 neg 1674 neg 1676 pos Name: sentiment, dtype: object | 0.130435 | 0.260870 | 0.608696 |
155 | 2023-06-04 | 1677 pos 1679 neg 1680 neu 1682 neu 1683 neg 1685 neu 1692 neu 1694 neg 1696 neg 1698 pos 1699 pos 1700 pos 1704 pos 1705 neu 1706 neg 1708 neu 1710 pos 1711 neu 1713 neu 1716 neu 1718 neu 1719 neg 1721 neu 1722 neu 1724 neu Name: sentiment, dtype: object | 0.240000 | 0.240000 | 0.520000 |
156 | 2023-06-11 | 1725 pos 1726 neu 1727 neu 1728 neu 1731 neg 1732 neu 1733 neg 1734 neu 1735 neu 1736 neg 1737 neg Name: sentiment, dtype: object | 0.090909 | 0.363636 | 0.545455 |
157 rows × 5 columns
news_weight.drop('Data', inplace=True, axis=1)
news_weight
Date | news_pos | news_neg | news_neu | |
---|---|---|---|---|
0 | 2020-06-14 | 1.000000 | 0.000000 | 0.000000 |
1 | 2020-06-21 | 0.333333 | 0.000000 | 0.666667 |
2 | 2020-06-28 | 0.000000 | 1.000000 | 0.000000 |
3 | 2020-07-05 | 0.000000 | 0.166667 | 0.833333 |
4 | 2020-07-12 | 0.000000 | 0.000000 | 1.000000 |
... | ... | ... | ... | ... |
152 | 2023-05-14 | 0.187500 | 0.125000 | 0.687500 |
153 | 2023-05-21 | 0.093750 | 0.218750 | 0.687500 |
154 | 2023-05-28 | 0.130435 | 0.260870 | 0.608696 |
155 | 2023-06-04 | 0.240000 | 0.240000 | 0.520000 |
156 | 2023-06-11 | 0.090909 | 0.363636 | 0.545455 |
157 rows × 4 columns
news_weight.plot(x='Date', y=['news_pos','news_neg','news_neu'], title='News Sentiment')
<AxesSubplot: title={'center': 'News Sentiment'}, xlabel='Date'>
News were also mostly neutral in tone. Again, there is a negative correlation between positive and negative tweets.
df_combined.drop('ticker', inplace=True, axis=1)
df_combined.head()
open | high | low | close | adjclose | volume | TwoWeeks | Month | Home Depot | |
---|---|---|---|---|---|---|---|---|---|
2020-07-06 | 250.270004 | 251.500000 | 247.039993 | 249.550003 | 232.537186 | 3133800.0 | 247.481999 | 248.1370 | 90.0 |
2020-07-07 | 247.369995 | 250.779999 | 247.070007 | 247.350006 | 230.487167 | 2927800.0 | 247.300999 | 247.6660 | 90.0 |
2020-07-08 | 247.869995 | 249.789993 | 246.220001 | 249.169998 | 232.183075 | 2294000.0 | 247.187000 | 247.2865 | 90.0 |
2020-07-09 | 249.660004 | 250.509995 | 246.350006 | 247.960007 | 231.055588 | 2994700.0 | 247.370000 | 246.9620 | 90.0 |
2020-07-10 | 248.289993 | 250.330002 | 246.639999 | 250.110001 | 233.058990 | 2745300.0 | 247.842999 | 247.4940 | 90.0 |
df_combined = df_combined.reset_index()
df_combined.rename(columns={'index':'Date'}, inplace=True)
df_combined.head()
Date | open | high | low | close | adjclose | volume | TwoWeeks | Month | Home Depot | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-07-06 | 250.270004 | 251.500000 | 247.039993 | 249.550003 | 232.537186 | 3133800.0 | 247.481999 | 248.1370 | 90.0 |
1 | 2020-07-07 | 247.369995 | 250.779999 | 247.070007 | 247.350006 | 230.487167 | 2927800.0 | 247.300999 | 247.6660 | 90.0 |
2 | 2020-07-08 | 247.869995 | 249.789993 | 246.220001 | 249.169998 | 232.183075 | 2294000.0 | 247.187000 | 247.2865 | 90.0 |
3 | 2020-07-09 | 249.660004 | 250.509995 | 246.350006 | 247.960007 | 231.055588 | 2994700.0 | 247.370000 | 246.9620 | 90.0 |
4 | 2020-07-10 | 248.289993 | 250.330002 | 246.639999 | 250.110001 | 233.058990 | 2745300.0 | 247.842999 | 247.4940 | 90.0 |
import numpy as np
df_weekly = pd.DataFrame(df_combined.groupby([pd.Grouper(key='Date', freq='W')]).agg(np.mean))
df_weekly.head()
open | high | low | close | adjclose | volume | TwoWeeks | Month | Home Depot | |
---|---|---|---|---|---|---|---|---|---|
Date | |||||||||
2020-07-12 | 248.691998 | 250.581998 | 246.664001 | 248.828003 | 231.864401 | 2819120.0 | 247.436599 | 247.509100 | 90.0 |
2020-07-19 | 255.570004 | 259.695996 | 253.284000 | 256.733997 | 239.231400 | 3799580.0 | 250.709800 | 249.001500 | 86.0 |
2020-07-26 | 263.217999 | 265.070001 | 261.105994 | 263.376007 | 245.420612 | 2661120.0 | 256.946602 | 252.191601 | 85.0 |
2020-08-02 | 265.357996 | 267.876001 | 263.217999 | 265.832001 | 247.709158 | 2700400.0 | 263.277204 | 256.993502 | 85.0 |
2020-08-09 | 267.792004 | 270.044006 | 266.348004 | 268.508002 | 250.202713 | 2319420.0 | 266.128600 | 261.537601 | 80.0 |
df_final = df_weekly.merge(news_weight, on='Date').merge(tweets_weight, on='Date')
df_final.columns = [i.lower() for i in df_final.columns]
df_final.head()
date | open | high | low | close | adjclose | volume | twoweeks | month | home depot | news_pos | news_neg | news_neu | tweet_pos | tweet_neg | tweet_neu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-07-12 | 248.691998 | 250.581998 | 246.664001 | 248.828003 | 231.864401 | 2819120.0 | 247.436599 | 247.509100 | 90.0 | 0.000000 | 0.00 | 1.000000 | 0.000000 | 0.166667 | 0.833333 |
1 | 2020-07-19 | 255.570004 | 259.695996 | 253.284000 | 256.733997 | 239.231400 | 3799580.0 | 250.709800 | 249.001500 | 86.0 | 0.000000 | 0.75 | 0.250000 | 0.000000 | 0.400000 | 0.600000 |
2 | 2020-07-26 | 263.217999 | 265.070001 | 261.105994 | 263.376007 | 245.420612 | 2661120.0 | 256.946602 | 252.191601 | 85.0 | 0.333333 | 0.00 | 0.666667 | 0.125000 | 0.125000 | 0.750000 |
3 | 2020-08-02 | 265.357996 | 267.876001 | 263.217999 | 265.832001 | 247.709158 | 2700400.0 | 263.277204 | 256.993502 | 85.0 | 0.200000 | 0.20 | 0.600000 | 0.181818 | 0.272727 | 0.545455 |
4 | 2020-08-09 | 267.792004 | 270.044006 | 266.348004 | 268.508002 | 250.202713 | 2319420.0 | 266.128600 | 261.537601 | 80.0 | 0.333333 | 0.00 | 0.666667 | 0.333333 | 0.222222 | 0.444444 |
df_final.rename(columns={"home depot":"trends"}, inplace=True)
df_final.head()
date | open | high | low | close | adjclose | volume | twoweeks | month | trends | news_pos | news_neg | news_neu | tweet_pos | tweet_neg | tweet_neu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-07-12 | 248.691998 | 250.581998 | 246.664001 | 248.828003 | 231.864401 | 2819120.0 | 247.436599 | 247.509100 | 90.0 | 0.000000 | 0.00 | 1.000000 | 0.000000 | 0.166667 | 0.833333 |
1 | 2020-07-19 | 255.570004 | 259.695996 | 253.284000 | 256.733997 | 239.231400 | 3799580.0 | 250.709800 | 249.001500 | 86.0 | 0.000000 | 0.75 | 0.250000 | 0.000000 | 0.400000 | 0.600000 |
2 | 2020-07-26 | 263.217999 | 265.070001 | 261.105994 | 263.376007 | 245.420612 | 2661120.0 | 256.946602 | 252.191601 | 85.0 | 0.333333 | 0.00 | 0.666667 | 0.125000 | 0.125000 | 0.750000 |
3 | 2020-08-02 | 265.357996 | 267.876001 | 263.217999 | 265.832001 | 247.709158 | 2700400.0 | 263.277204 | 256.993502 | 85.0 | 0.200000 | 0.20 | 0.600000 | 0.181818 | 0.272727 | 0.545455 |
4 | 2020-08-09 | 267.792004 | 270.044006 | 266.348004 | 268.508002 | 250.202713 | 2319420.0 | 266.128600 | 261.537601 | 80.0 | 0.333333 | 0.00 | 0.666667 | 0.333333 | 0.222222 | 0.444444 |
#Scaling the data
from sklearn.preprocessing import StandardScaler
SS = StandardScaler()
df_final_scaled = pd.DataFrame(SS.fit_transform(df_final.iloc[:,1:]), columns=df_final.columns[1:])
df_final_scaled.insert(0, "date", df_final['date'])
df_final_scaled.head()
date | open | high | low | close | adjclose | volume | twoweeks | month | trends | news_pos | news_neg | news_neu | tweet_pos | tweet_neg | tweet_neu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-07-12 | -1.703818 | -1.730261 | -1.682950 | -1.705764 | -1.872820 | -0.840392 | -1.738792 | -1.736071 | 2.580289 | -1.106820 | -1.193801 | 1.766450 | -1.503002 | -0.103907 | 1.464049 |
1 | 2020-07-19 | -1.505790 | -1.470011 | -1.490960 | -1.477822 | -1.658905 | -0.124160 | -1.644438 | -1.692888 | 2.198301 | -1.106820 | 3.504942 | -1.899286 | -1.503002 | 1.212374 | 0.306837 |
2 | 2020-07-26 | -1.285591 | -1.316556 | -1.264110 | -1.286323 | -1.479190 | -0.955812 | -1.464655 | -1.600581 | 2.102805 | 1.052492 | -1.193801 | 0.137234 | -0.824220 | -0.338957 | 1.050759 |
3 | 2020-08-02 | -1.223978 | -1.236431 | -1.202859 | -1.215514 | -1.412737 | -0.927118 | -1.282168 | -1.461636 | 2.102805 | 0.188767 | 0.059197 | -0.188609 | -0.515683 | 0.494403 | 0.036320 |
4 | 2020-08-09 | -1.153899 | -1.174523 | -1.112084 | -1.138360 | -1.340332 | -1.205426 | -1.199974 | -1.330150 | 1.625320 | 1.052492 | -1.193801 | 0.137234 | 0.307084 | 0.209494 | -0.464638 |
df_final_scaled.corr().round(3)
<ipython-input-214-f6a92ee3555b>:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. df_final_scaled.corr().round(3)
open | high | low | close | adjclose | volume | twoweeks | month | trends | news_pos | news_neg | news_neu | tweet_pos | tweet_neg | tweet_neu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
open | 1.000 | 0.999 | 0.999 | 0.998 | 0.986 | -0.051 | 0.983 | 0.954 | -0.221 | 0.241 | -0.043 | -0.148 | 0.009 | -0.075 | 0.058 |
high | 0.999 | 1.000 | 0.998 | 0.999 | 0.988 | -0.040 | 0.982 | 0.954 | -0.227 | 0.246 | -0.038 | -0.156 | 0.010 | -0.074 | 0.056 |
low | 0.999 | 0.998 | 1.000 | 0.999 | 0.985 | -0.075 | 0.978 | 0.947 | -0.210 | 0.238 | -0.045 | -0.145 | 0.008 | -0.079 | 0.062 |
close | 0.998 | 0.999 | 0.999 | 1.000 | 0.988 | -0.059 | 0.978 | 0.949 | -0.218 | 0.245 | -0.041 | -0.152 | 0.009 | -0.077 | 0.059 |
adjclose | 0.986 | 0.988 | 0.985 | 0.988 | 1.000 | -0.042 | 0.970 | 0.945 | -0.321 | 0.267 | -0.013 | -0.192 | 0.011 | -0.072 | 0.053 |
volume | -0.051 | -0.040 | -0.075 | -0.059 | -0.042 | 1.000 | 0.018 | 0.058 | -0.057 | 0.048 | -0.005 | -0.032 | 0.014 | 0.090 | -0.092 |
twoweeks | 0.983 | 0.982 | 0.978 | 0.978 | 0.970 | 0.018 | 1.000 | 0.986 | -0.266 | 0.248 | -0.035 | -0.160 | 0.006 | -0.077 | 0.062 |
month | 0.954 | 0.954 | 0.947 | 0.949 | 0.945 | 0.058 | 0.986 | 1.000 | -0.311 | 0.229 | -0.034 | -0.146 | -0.003 | -0.074 | 0.068 |
trends | -0.221 | -0.227 | -0.210 | -0.218 | -0.321 | -0.057 | -0.266 | -0.311 | 1.000 | -0.227 | -0.072 | 0.227 | -0.015 | 0.006 | 0.008 |
news_pos | 0.241 | 0.246 | 0.238 | 0.245 | 0.267 | 0.048 | 0.248 | 0.229 | -0.227 | 1.000 | -0.151 | -0.637 | -0.074 | 0.002 | 0.066 |
news_neg | -0.043 | -0.038 | -0.045 | -0.041 | -0.013 | -0.005 | -0.035 | -0.034 | -0.072 | -0.151 | 1.000 | -0.666 | -0.004 | -0.005 | 0.008 |
news_neu | -0.148 | -0.156 | -0.145 | -0.152 | -0.192 | -0.032 | -0.160 | -0.146 | 0.227 | -0.637 | -0.666 | 1.000 | 0.059 | 0.003 | -0.056 |
tweet_pos | 0.009 | 0.010 | 0.008 | 0.009 | 0.011 | 0.014 | 0.006 | -0.003 | -0.015 | -0.074 | -0.004 | 0.059 | 1.000 | -0.378 | -0.581 |
tweet_neg | -0.075 | -0.074 | -0.079 | -0.077 | -0.072 | 0.090 | -0.077 | -0.074 | 0.006 | 0.002 | -0.005 | 0.003 | -0.378 | 1.000 | -0.534 |
tweet_neu | 0.058 | 0.056 | 0.062 | 0.059 | 0.053 | -0.092 | 0.062 | 0.068 | 0.008 | 0.066 | 0.008 | -0.056 | -0.581 | -0.534 | 1.000 |
There is a positive correlation between positive news and the stock price in two weeks. We don't see this correlation with positive tweets or any other sentiment columns. Additionally, there is a negative correlation between trends and the stock price in two weeks. This means when people start searching Home Depot on Google News, stock prices starting falling in two weeks.
cols=['news_pos', 'news_neg','news_neu', 'tweet_pos', 'tweet_neg','tweet_neu', 'trends']
fig, ax =plt.subplots(3,3, figsize=(15, 12))
ax=ax.ravel()
for i in range(len(cols)):
df_final_scaled.plot(x='date', y=['close','twoweeks','month', cols[i]], ax=ax[i], title=cols[i])
ax[7].axis('off')
ax[8].axis('off')
plt.tight_layout()
\We can observe the correlation mentioned above in the plots as well. The first plot, representing positive news, indicates that positive news loosely follows the stock price. On the other hand, the last plot depicting trends shows a negative correlation between Google searches and the stock price. Furthermore, over time, the number of Google searches for "Home Depot" has steadily declined.
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use("ggplot")
df_final_scaled = pd.read_csv('df_final_home_depot_scaled.csv')
df_final_scaled.drop('Unnamed: 0', inplace=True, axis=1)
df_final_scaled.head()
date | open | high | low | close | adjclose | volume | twoweeks | month | trends | news_pos | news_neg | news_neu | tweet_pos | tweet_neg | tweet_neu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-07-12 | -1.703818 | -1.730261 | -1.682950 | -1.705764 | -1.872820 | -0.840392 | -1.738792 | -1.736071 | 2.580289 | -1.106820 | -1.193801 | 1.766450 | -1.503002 | -0.103907 | 1.464049 |
1 | 2020-07-19 | -1.505790 | -1.470011 | -1.490960 | -1.477822 | -1.658905 | -0.124160 | -1.644438 | -1.692888 | 2.198301 | -1.106820 | 3.504942 | -1.899286 | -1.503002 | 1.212374 | 0.306837 |
2 | 2020-07-26 | -1.285591 | -1.316556 | -1.264110 | -1.286323 | -1.479190 | -0.955812 | -1.464655 | -1.600581 | 2.102805 | 1.052492 | -1.193801 | 0.137234 | -0.824220 | -0.338957 | 1.050759 |
3 | 2020-08-02 | -1.223978 | -1.236431 | -1.202859 | -1.215514 | -1.412737 | -0.927118 | -1.282168 | -1.461636 | 2.102805 | 0.188767 | 0.059197 | -0.188609 | -0.515683 | 0.494403 | 0.036320 |
4 | 2020-08-09 | -1.153899 | -1.174523 | -1.112084 | -1.138360 | -1.340332 | -1.205426 | -1.199974 | -1.330150 | 1.625320 | 1.052492 | -1.193801 | 0.137234 | 0.307084 | 0.209494 | -0.464638 |
x= df_final_scaled[['open','high','low','close', 'adjclose','volume','news_pos', 'news_neg', 'news_neu', 'tweet_pos',
'tweet_neg', 'tweet_neu', 'trends']]
y=df_final_scaled['twoweeks']
import statsmodels.api as sm
x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
print(model.summary())
OLS Regression Results ============================================================================== Dep. Variable: twoweeks R-squared: 0.977 Model: OLS Adj. R-squared: 0.975 Method: Least Squares F-statistic: 537.8 Date: Mon, 12 Jun 2023 Prob (F-statistic): 3.14e-109 Time: 02:36:26 Log-Likelihood: 70.561 No. Observations: 153 AIC: -117.1 Df Residuals: 141 BIC: -80.76 Df Model: 11 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const -1.501e-16 0.013 -1.17e-14 1.000 -0.025 0.025 open 2.3468 0.511 4.594 0.000 1.337 3.357 high -0.2428 0.541 -0.448 0.655 -1.313 0.828 low -0.7347 0.559 -1.314 0.191 -1.840 0.371 close -0.2749 0.545 -0.504 0.615 -1.352 0.802 adjclose -0.1252 0.118 -1.064 0.289 -0.358 0.107 volume 0.0494 0.017 2.981 0.003 0.017 0.082 news_pos 0.0014 0.010 0.141 0.888 -0.019 0.022 news_neg 0.0043 0.010 0.438 0.662 -0.015 0.023 news_neu -0.0044 0.007 -0.594 0.554 -0.019 0.010 tweet_pos -0.0020 0.009 -0.229 0.819 -0.019 0.015 tweet_neg -0.0071 0.009 -0.778 0.438 -0.025 0.011 tweet_neu 0.0081 0.008 1.009 0.315 -0.008 0.024 trends -0.0524 0.019 -2.785 0.006 -0.090 -0.015 ============================================================================== Omnibus: 1.461 Durbin-Watson: 1.694 Prob(Omnibus): 0.482 Jarque-Bera (JB): 1.534 Skew: 0.193 Prob(JB): 0.464 Kurtosis: 2.697 Cond. No. 5.93e+15 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 2.26e-29. This might indicate that there are strong multicollinearity problems or that the design matrix is singular.
x= df_final_scaled[['open','high','low','close', 'adjclose','volume']]
y=df_final_scaled['twoweeks']
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3 , shuffle=False,random_state = 0)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix, accuracy_score
regression = LinearRegression()
regression.fit(train_x, train_y)
print("regression coefficient",regression.coef_)
print("regression intercept",regression.intercept_)
regression coefficient [ 2.15513818 0.1568129 -0.63183193 -0.94351993 0.2494587 0.05989017] regression intercept 0.017827179785784394
regression_confidence = regression.score(test_x, test_y)
print("linear regression confidence: ", regression_confidence)
linear regression confidence: 0.8354280496114086
predicted=regression.predict(test_x)
print(test_x.head())
open high low close adjclose volume 107 -0.243796 -0.255966 -0.239776 -0.208430 -0.104242 -0.699127 108 -0.115385 -0.079953 -0.082182 -0.058794 0.042456 -0.785429 109 0.099918 0.070874 0.110736 0.082941 0.181410 -0.899154 110 0.366759 0.437578 0.380624 0.428573 0.520257 0.542134 111 0.080282 0.039007 0.007433 -0.028003 0.072644 -0.737113
dfr=pd.DataFrame({'Actual_Price':test_y, 'Predicted_Price':predicted})
dfr.head(10)
Actual_Price | Predicted_Price | |
---|---|---|
107 | -0.251375 | -0.267445 |
108 | -0.142925 | -0.172431 |
109 | -0.051125 | 0.087459 |
110 | 0.166720 | 0.394254 |
111 | 0.250333 | 0.192664 |
112 | -0.043019 | -0.309534 |
113 | -0.333535 | -0.397486 |
114 | -0.476311 | -0.423891 |
115 | -0.740981 | -0.954391 |
116 | -0.992016 | -0.840699 |
dfr.describe()
Actual_Price | Predicted_Price | |
---|---|---|
count | 46.000000 | 46.000000 |
mean | -0.191756 | -0.132461 |
std | 0.438800 | 0.433547 |
min | -0.992016 | -0.954391 |
25% | -0.502699 | -0.417290 |
50% | -0.331794 | -0.271980 |
75% | 0.274308 | 0.247046 |
max | 0.478988 | 0.858482 |
from sklearn import metrics
import numpy as np
print('Mean Absolute Error (MAE):', metrics.mean_absolute_error(test_y, predicted))
print('Mean Squared Error (MSE) :', metrics.mean_squared_error(test_y, predicted))
print('Root Mean Squared Error (RMSE):', np.sqrt(metrics.mean_squared_error(test_y, predicted)))
Mean Absolute Error (MAE): 0.14249150773348154 Mean Squared Error (MSE) : 0.030998755667901733 Root Mean Squared Error (RMSE): 0.17606463491542454
plt.scatter(dfr.Actual_Price, dfr.Predicted_Price, color='Darkblue')
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.show()
plt.plot(dfr.Actual_Price, color='black', label='Actual')
plt.plot(dfr.Predicted_Price, color='blue',label='Predicted')
plt.title("Prediction using only Historical Data")
plt.legend()
<matplotlib.legend.Legend at 0x7fc3b62ce770>
df_final_scaled.columns
Index(['date', 'open', 'high', 'low', 'close', 'adjclose', 'volume', 'twoweeks', 'month', 'trends', 'news_pos', 'news_neg', 'news_neu', 'tweet_pos', 'tweet_neg', 'tweet_neu'], dtype='object')
x= df_final_scaled[['open','high','low','close', 'adjclose','volume','news_pos', 'news_neg', 'news_neu', 'tweet_pos',
'tweet_neg', 'tweet_neu', 'trends']]
y=df_final_scaled['twoweeks']
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3 , shuffle=False,random_state = 0)
regression = LinearRegression()
regression.fit(train_x, train_y)
print("regression coefficient",regression.coef_)
print("regression intercept",regression.intercept_)
regression coefficient [ 1.91871810e+00 1.89637622e-01 -3.49725608e-01 -8.35917172e-01 4.63540880e-02 6.48403899e-02 1.45467759e-03 5.40953328e-03 -5.31782650e-03 -3.13379351e-03 -7.57685954e-03 9.52332816e-03 -5.55338621e-02] regression intercept 0.023765631258188702
regression_confidence = regression.score(test_x, test_y)
print("linear regression confidence: ", regression_confidence)
linear regression confidence: 0.8294859517339385
predicted=regression.predict(test_x)
print(test_x.head())
open high low close adjclose volume news_pos \ 107 -0.243796 -0.255966 -0.239776 -0.208430 -0.104242 -0.699127 0.332721 108 -0.115385 -0.079953 -0.082182 -0.058794 0.042456 -0.785429 -1.106820 109 0.099918 0.070874 0.110736 0.082941 0.181410 -0.899154 0.332721 110 0.366759 0.437578 0.380624 0.428573 0.520257 0.542134 0.512664 111 0.080282 0.039007 0.007433 -0.028003 0.072644 -0.737113 0.512664 news_neg news_neu tweet_pos tweet_neg tweet_neu trends 107 0.894529 -0.948910 0.533344 1.071344 -1.428981 -0.475612 108 0.894529 0.137234 0.824251 -1.044107 0.165137 -0.380115 109 0.894529 -0.948910 -1.503002 0.567665 0.873634 -0.666606 110 -0.802239 0.239060 0.471637 -0.018434 -0.414542 -0.666606 111 0.372447 -0.677374 1.890908 -0.338957 -1.428981 -0.475612
from sklearn.metrics import r2_score
r2_score(predicted, test_y)
0.8290817124619406
dfr=pd.DataFrame({'Actual_Price':test_y, 'Predicted_Price':predicted})
dfr.head(10)
Actual_Price | Predicted_Price | |
---|---|---|
107 | -0.251375 | -0.271245 |
108 | -0.142925 | -0.153351 |
109 | -0.051125 | 0.127086 |
110 | 0.166720 | 0.405224 |
111 | 0.250333 | 0.177392 |
112 | -0.043019 | -0.300313 |
113 | -0.333535 | -0.381184 |
114 | -0.476311 | -0.385414 |
115 | -0.740981 | -0.939092 |
116 | -0.992016 | -0.805985 |
dfr.describe()
Actual_Price | Predicted_Price | |
---|---|---|
count | 46.000000 | 46.000000 |
mean | -0.191756 | -0.112709 |
std | 0.438800 | 0.438281 |
min | -0.992016 | -0.939092 |
25% | -0.502699 | -0.419594 |
50% | -0.331794 | -0.269632 |
75% | 0.274308 | 0.268095 |
max | 0.478988 | 0.895990 |
print('Mean Absolute Error (MAE):', metrics.mean_absolute_error(test_y, predicted))
print('Mean Squared Error (MSE) :', metrics.mean_squared_error(test_y, predicted))
print('Root Mean Squared Error (RMSE):', np.sqrt(metrics.mean_squared_error(test_y, predicted)))
Mean Absolute Error (MAE): 0.14758907019543632 Mean Squared Error (MSE) : 0.0321180086136408 Root Mean Squared Error (RMSE): 0.17921497876472492
plt.scatter(dfr.Actual_Price, dfr.Predicted_Price, color='Darkblue')
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.show()
plt.plot(dfr.Actual_Price, color='black', label='Actual')
plt.plot(dfr.Predicted_Price, color='blue',label='Predicted')
plt.title("Predictions using Historical, Sentiment and Trends")
plt.legend()
<matplotlib.legend.Legend at 0x7fc3b6000850>
import pandas as pd
df = pd.read_csv('df_final_home_depot_scaled.csv')
df.drop('Unnamed: 0', inplace=True, axis=1)
df.head()
date | open | high | low | close | adjclose | volume | twoweeks | month | trends | news_pos | news_neg | news_neu | tweet_pos | tweet_neg | tweet_neu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-07-12 | -1.703818 | -1.730261 | -1.682950 | -1.705764 | -1.872820 | -0.840392 | -1.738792 | -1.736071 | 2.580289 | -1.106820 | -1.193801 | 1.766450 | -1.503002 | -0.103907 | 1.464049 |
1 | 2020-07-19 | -1.505790 | -1.470011 | -1.490960 | -1.477822 | -1.658905 | -0.124160 | -1.644438 | -1.692888 | 2.198301 | -1.106820 | 3.504942 | -1.899286 | -1.503002 | 1.212374 | 0.306837 |
2 | 2020-07-26 | -1.285591 | -1.316556 | -1.264110 | -1.286323 | -1.479190 | -0.955812 | -1.464655 | -1.600581 | 2.102805 | 1.052492 | -1.193801 | 0.137234 | -0.824220 | -0.338957 | 1.050759 |
3 | 2020-08-02 | -1.223978 | -1.236431 | -1.202859 | -1.215514 | -1.412737 | -0.927118 | -1.282168 | -1.461636 | 2.102805 | 0.188767 | 0.059197 | -0.188609 | -0.515683 | 0.494403 | 0.036320 |
4 | 2020-08-09 | -1.153899 | -1.174523 | -1.112084 | -1.138360 | -1.340332 | -1.205426 | -1.199974 | -1.330150 | 1.625320 | 1.052492 | -1.193801 | 0.137234 | 0.307084 | 0.209494 | -0.464638 |
df.columns
Index(['date', 'open', 'high', 'low', 'close', 'adjclose', 'volume', 'twoweeks', 'month', 'trends', 'news_pos', 'news_neg', 'news_neu', 'tweet_pos', 'tweet_neg', 'tweet_neu'], dtype='object')
df_hist = df[['date', 'open', 'high', 'low', 'close', 'adjclose', 'volume',
'twoweeks', 'month']]
df_hist.shape
(153, 9)
#Append each row to X (high, low, open, close, adjclose, volume)
X = [[df_hist.iloc[j,i+1] for i in range(df_hist.shape[1]-3)] for j in range(df_hist.shape[0])]
#Append all "twoweeks" values
Y = [df_hist.iloc[i,7] for i in range(df_hist.shape[0])]
import numpy as np
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, shuffle=True)
train_X = np.array(x_train)
test_X = np.array(x_test)
train_Y = np.array(y_train)
test_Y = np.array(y_test)
train_X = train_X.reshape(train_X.shape[0],1,6,1)
test_X = test_X.reshape(test_X.shape[0],1,6,1)
print(len(train_X))
print(len(test_X))
107 46
train_X[0]
array([[[-1.09683374], [-1.10216509], [-1.12420655], [-1.11575665], [-1.28069411], [-0.48313096]]])
from tensorflow.keras import backend as K
K.image_data_format()=="channels_first"
False
# For creating model and training
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed
from tensorflow.keras.layers import MaxPooling1D, Flatten
from tensorflow.keras.regularizers import L1, L2
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.metrics import RootMeanSquaredError
model = tf.keras.Sequential()
# Creating the Neural Network model here...
# CNN layers
model.add(TimeDistributed(Conv1D(64, kernel_size=2, activation='relu', input_shape=(None, 6, 1))))
model.add(TimeDistributed(MaxPooling1D(1)))
model.add(TimeDistributed(Conv1D(128, kernel_size=2, activation='relu')))
model.add(TimeDistributed(MaxPooling1D(1)))
model.add(TimeDistributed(Conv1D(64, kernel_size=2, activation='relu')))
model.add(TimeDistributed(MaxPooling1D(1)))
model.add(TimeDistributed(Flatten()))
# model.add(Dense(5, kernel_regularizer=L2(0.01)))
# LSTM layers
model.add(Bidirectional(LSTM(100, return_sequences=True)))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(100, return_sequences=False)))
model.add(Dropout(0.5))
#Final layers
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss='mse', metrics=['mse', 'mae'])
history = model.fit(train_X, train_Y, validation_data=(test_X,test_Y), epochs=40,batch_size=40, verbose=1, shuffle =True)
Epoch 1/40 3/3 [==============================] - 13s 910ms/step - loss: 0.8740 - mse: 0.8740 - mae: 0.7390 - val_loss: 1.1876 - val_mse: 1.1876 - val_mae: 0.8232 Epoch 2/40 3/3 [==============================] - 0s 34ms/step - loss: 0.8003 - mse: 0.8003 - mae: 0.7047 - val_loss: 1.0375 - val_mse: 1.0375 - val_mae: 0.7627 Epoch 3/40 3/3 [==============================] - 0s 38ms/step - loss: 0.6692 - mse: 0.6692 - mae: 0.6315 - val_loss: 0.7258 - val_mse: 0.7258 - val_mae: 0.6202 Epoch 4/40 3/3 [==============================] - 0s 42ms/step - loss: 0.3853 - mse: 0.3853 - mae: 0.4627 - val_loss: 0.2971 - val_mse: 0.2971 - val_mae: 0.3433 Epoch 5/40 3/3 [==============================] - 0s 41ms/step - loss: 0.1832 - mse: 0.1832 - mae: 0.2912 - val_loss: 0.1127 - val_mse: 0.1127 - val_mae: 0.2656 Epoch 6/40 3/3 [==============================] - 0s 46ms/step - loss: 0.1054 - mse: 0.1054 - mae: 0.2555 - val_loss: 0.2310 - val_mse: 0.2310 - val_mae: 0.3723 Epoch 7/40 3/3 [==============================] - 0s 67ms/step - loss: 0.1590 - mse: 0.1590 - mae: 0.3158 - val_loss: 0.1858 - val_mse: 0.1858 - val_mae: 0.3358 Epoch 8/40 3/3 [==============================] - 0s 60ms/step - loss: 0.1221 - mse: 0.1221 - mae: 0.2605 - val_loss: 0.0740 - val_mse: 0.0740 - val_mae: 0.2339 Epoch 9/40 3/3 [==============================] - 0s 60ms/step - loss: 0.0769 - mse: 0.0769 - mae: 0.2229 - val_loss: 0.0902 - val_mse: 0.0902 - val_mae: 0.2187 Epoch 10/40 3/3 [==============================] - 0s 81ms/step - loss: 0.0740 - mse: 0.0740 - mae: 0.1926 - val_loss: 0.1062 - val_mse: 0.1062 - val_mae: 0.2100 Epoch 11/40 3/3 [==============================] - 0s 64ms/step - loss: 0.0807 - mse: 0.0807 - mae: 0.1986 - val_loss: 0.0893 - val_mse: 0.0893 - val_mae: 0.2025 Epoch 12/40 3/3 [==============================] - 0s 61ms/step - loss: 0.0891 - mse: 0.0891 - mae: 0.2086 - val_loss: 0.0575 - val_mse: 0.0575 - val_mae: 0.1793 Epoch 13/40 3/3 [==============================] - 0s 60ms/step - loss: 0.0599 - mse: 0.0599 - mae: 0.1769 - val_loss: 0.0513 - val_mse: 0.0513 - val_mae: 0.1723 Epoch 14/40 3/3 [==============================] - 0s 65ms/step - loss: 0.0515 - mse: 0.0515 - mae: 0.1749 - val_loss: 0.0740 - val_mse: 0.0740 - val_mae: 0.2093 Epoch 15/40 3/3 [==============================] - 0s 57ms/step - loss: 0.0550 - mse: 0.0550 - mae: 0.1783 - val_loss: 0.0788 - val_mse: 0.0788 - val_mae: 0.2178 Epoch 16/40 3/3 [==============================] - 0s 56ms/step - loss: 0.0655 - mse: 0.0655 - mae: 0.1941 - val_loss: 0.0548 - val_mse: 0.0548 - val_mae: 0.1837 Epoch 17/40 3/3 [==============================] - 0s 60ms/step - loss: 0.0431 - mse: 0.0431 - mae: 0.1603 - val_loss: 0.0412 - val_mse: 0.0412 - val_mae: 0.1610 Epoch 18/40 3/3 [==============================] - 0s 57ms/step - loss: 0.0392 - mse: 0.0392 - mae: 0.1512 - val_loss: 0.0473 - val_mse: 0.0473 - val_mae: 0.1615 Epoch 19/40 3/3 [==============================] - 0s 60ms/step - loss: 0.0426 - mse: 0.0426 - mae: 0.1599 - val_loss: 0.0502 - val_mse: 0.0502 - val_mae: 0.1643 Epoch 20/40 3/3 [==============================] - 0s 61ms/step - loss: 0.0579 - mse: 0.0579 - mae: 0.1759 - val_loss: 0.0423 - val_mse: 0.0423 - val_mae: 0.1570 Epoch 21/40 3/3 [==============================] - 0s 60ms/step - loss: 0.0507 - mse: 0.0507 - mae: 0.1801 - val_loss: 0.0473 - val_mse: 0.0473 - val_mae: 0.1655 Epoch 22/40 3/3 [==============================] - 0s 71ms/step - loss: 0.0399 - mse: 0.0399 - mae: 0.1639 - val_loss: 0.0588 - val_mse: 0.0588 - val_mae: 0.1883 Epoch 23/40 3/3 [==============================] - 0s 36ms/step - loss: 0.0468 - mse: 0.0468 - mae: 0.1620 - val_loss: 0.0428 - val_mse: 0.0428 - val_mae: 0.1614 Epoch 24/40 3/3 [==============================] - 0s 35ms/step - loss: 0.0417 - mse: 0.0417 - mae: 0.1593 - val_loss: 0.0419 - val_mse: 0.0419 - val_mae: 0.1566 Epoch 25/40 3/3 [==============================] - 0s 40ms/step - loss: 0.0481 - mse: 0.0481 - mae: 0.1619 - val_loss: 0.0418 - val_mse: 0.0418 - val_mae: 0.1554 Epoch 26/40 3/3 [==============================] - 0s 40ms/step - loss: 0.0392 - mse: 0.0392 - mae: 0.1476 - val_loss: 0.0420 - val_mse: 0.0420 - val_mae: 0.1560 Epoch 27/40 3/3 [==============================] - 0s 38ms/step - loss: 0.0471 - mse: 0.0471 - mae: 0.1628 - val_loss: 0.0429 - val_mse: 0.0429 - val_mae: 0.1625 Epoch 28/40 3/3 [==============================] - 0s 35ms/step - loss: 0.0461 - mse: 0.0461 - mae: 0.1648 - val_loss: 0.0434 - val_mse: 0.0434 - val_mae: 0.1660 Epoch 29/40 3/3 [==============================] - 0s 36ms/step - loss: 0.0360 - mse: 0.0360 - mae: 0.1533 - val_loss: 0.0427 - val_mse: 0.0427 - val_mae: 0.1616 Epoch 30/40 3/3 [==============================] - 0s 36ms/step - loss: 0.0376 - mse: 0.0376 - mae: 0.1511 - val_loss: 0.0431 - val_mse: 0.0431 - val_mae: 0.1578 Epoch 31/40 3/3 [==============================] - 0s 37ms/step - loss: 0.0435 - mse: 0.0435 - mae: 0.1703 - val_loss: 0.0448 - val_mse: 0.0448 - val_mae: 0.1599 Epoch 32/40 3/3 [==============================] - 0s 39ms/step - loss: 0.0414 - mse: 0.0414 - mae: 0.1560 - val_loss: 0.0459 - val_mse: 0.0459 - val_mae: 0.1602 Epoch 33/40 3/3 [==============================] - 0s 44ms/step - loss: 0.0437 - mse: 0.0437 - mae: 0.1640 - val_loss: 0.0477 - val_mse: 0.0477 - val_mae: 0.1672 Epoch 34/40 3/3 [==============================] - 0s 43ms/step - loss: 0.0421 - mse: 0.0421 - mae: 0.1515 - val_loss: 0.0508 - val_mse: 0.0508 - val_mae: 0.1743 Epoch 35/40 3/3 [==============================] - 0s 45ms/step - loss: 0.0427 - mse: 0.0427 - mae: 0.1623 - val_loss: 0.0461 - val_mse: 0.0461 - val_mae: 0.1658 Epoch 36/40 3/3 [==============================] - 0s 34ms/step - loss: 0.0444 - mse: 0.0444 - mae: 0.1668 - val_loss: 0.0426 - val_mse: 0.0426 - val_mae: 0.1588 Epoch 37/40 3/3 [==============================] - 0s 38ms/step - loss: 0.0468 - mse: 0.0468 - mae: 0.1661 - val_loss: 0.0496 - val_mse: 0.0496 - val_mae: 0.1661 Epoch 38/40 3/3 [==============================] - 0s 38ms/step - loss: 0.0431 - mse: 0.0431 - mae: 0.1544 - val_loss: 0.0435 - val_mse: 0.0435 - val_mae: 0.1588 Epoch 39/40 3/3 [==============================] - 0s 36ms/step - loss: 0.0392 - mse: 0.0392 - mae: 0.1587 - val_loss: 0.0440 - val_mse: 0.0440 - val_mae: 0.1577 Epoch 40/40 3/3 [==============================] - 0s 34ms/step - loss: 0.0546 - mse: 0.0546 - mae: 0.1726 - val_loss: 0.0544 - val_mse: 0.0544 - val_mae: 0.1774
import matplotlib.pyplot as plt
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.legend()
<matplotlib.legend.Legend at 0x7fc355192470>
plt.plot(history.history['mse'], label='train mse')
plt.plot(history.history['val_mse'], label='val mse')
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.legend()
<matplotlib.legend.Legend at 0x7fc355252b30>