Skip to content

Commit bb7edc8

Browse files
committed
update prediction performance
1 parent f38ab47 commit bb7edc8

File tree

4 files changed

+10299
-1
lines changed

4 files changed

+10299
-1
lines changed
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
---
2+
title: "performance Prediction"
3+
author: "Wanzhi"
4+
date: "12/6/2020"
5+
output: html_document
6+
---
7+
8+
```{r setup, include=FALSE}
9+
knitr::opts_chunk$set(echo = TRUE)
10+
```
11+
12+
```{r message=FALSE}
13+
library(RMySQL)
14+
library(aod)
15+
library(ggplot2)
16+
library(ggpubr)
17+
library(pscl)
18+
library(stargazer)
19+
library(dplyr)
20+
library(lfe)
21+
library(lubridate)
22+
```
23+
24+
## Linear Regression
25+
26+
This document shows the linear regression analysis on relationships between dependent variables and strategy scores. Data are only from 2011.
27+
28+
### Load Data
29+
```{r load data}
30+
data = read.csv('/Users/liwangzhi/Documents/GitHub/StartupStrategy/data/prediction_df_1206.csv')
31+
```
32+
33+
```{r show data}
34+
head(data, 3)
35+
```
36+
37+
```{r data_frame}
38+
summary(data)
39+
```
40+
41+
## Seed Funding
42+
43+
```{r seed funding}
44+
gr.reg1 = felm(seed_founding ~ Strat_score|0|0|0, data=data)
45+
gr.reg2 = felm(seed_founding ~ Strat_score|score_year|0|score_year, data=data)
46+
gr.reg3 = felm(seed_founding ~ Strat_score|score_year+portfoliocompanycity|0|
47+
score_year+portfoliocompanycity, data=data)
48+
gr.reg4 = felm(seed_founding ~ Strat_score|score_year+portfoliocompanycity
49+
+industryclassification|0|score_year+portfoliocompanycity+
50+
industryclassification, data=data)
51+
```
52+
53+
```{r output, results = "asis", message=FALSE}
54+
stargazer(gr.reg1,gr.reg2,gr.reg3,gr.reg4,
55+
column.labels=c("OLS","OLS","OLS","OLS"),
56+
add.lines=list(
57+
c("Score Year F.E.","No","Yes","Yes","Yes"),
58+
c("City F.E.","No","No","Yes","Yes"),
59+
c("Industry F.E.","No","No","No","Yes")
60+
),
61+
keep=c("Strat_score"),
62+
title="Linear Regression: Seed funding",
63+
align=TRUE,type = 'html')
64+
```
65+
66+
```{r log_seed}
67+
gr.reg2_1 = felm(log_seed ~ Strat_score|0|0|0, data=data)
68+
gr.reg2_2 = felm(log_seed ~ Strat_score|score_year|0|score_year, data=data)
69+
gr.reg2_3 = felm(log_seed ~ Strat_score|score_year+portfoliocompanycity|0|
70+
score_year+portfoliocompanycity, data=data)
71+
gr.reg2_4 = felm(log_seed ~ Strat_score|score_year+portfoliocompanycity
72+
+industryclassification|0|score_year+portfoliocompanycity+
73+
industryclassification, data=data)
74+
```
75+
76+
```{r output2, results = "asis", message=FALSE}
77+
stargazer(gr.reg2_1,gr.reg2_2,gr.reg2_3,gr.reg2_4,
78+
column.labels=c("OLS","OLS","OLS","OLS"),
79+
add.lines=list(
80+
c("Score Year F.E.","No","Yes","Yes","Yes"),
81+
c("City F.E.","No","No","Yes","Yes"),
82+
c("Industry F.E.","No","No","No","Yes")
83+
),
84+
keep=c("Strat_score"),
85+
title="Linear Regression: Log(Seed founding + 1)",
86+
align=TRUE,type = 'html')
87+
```
88+
89+
```{r log_seed_zero}
90+
gr.reg3_1 = felm(log_seed_zero ~ Strat_score|0|0|0, data=data)
91+
gr.reg3_2 = felm(log_seed_zero ~ Strat_score|score_year|0|score_year, data=data)
92+
gr.reg3_3 = felm(log_seed_zero ~ Strat_score|score_year+portfoliocompanycity|0|
93+
score_year+portfoliocompanycity, data=data)
94+
gr.reg3_4 = felm(log_seed_zero ~ Strat_score|score_year+portfoliocompanycity
95+
+industryclassification|0|score_year+portfoliocompanycity+
96+
industryclassification, data=data)
97+
```
98+
99+
```{r output3, results = "asis", message=FALSE}
100+
stargazer(gr.reg3_1,gr.reg3_2,gr.reg3_3,gr.reg3_4,
101+
column.labels=c("OLS","OLS","OLS","OLS"),
102+
add.lines=list(
103+
c("Score Year F.E.","No","Yes","Yes","Yes"),
104+
c("City F.E.","No","No","Yes","Yes"),
105+
c("Industry F.E.","No","No","No","Yes")
106+
),
107+
keep=c("Strat_score"),
108+
title="Linear Regression: Log seed founding (drop zeros)",
109+
align=TRUE,type = 'html')
110+
```
111+
112+
```{r log_seed plotting,message=FALSE}
113+
p1 <- ggplot(data, aes(x = Strat_score, y = seed_founding)) +
114+
geom_point(alpha = 0.5) +
115+
stat_smooth(method = "lm")
116+
p2 <- ggplot(data, aes(x = Strat_score, y = log_seed)) +
117+
geom_point(alpha = 0.5) +
118+
stat_smooth(method = "lm")
119+
p3 <- ggplot(data, aes(x = Strat_score, y = log_seed_zero)) +
120+
geom_point(alpha = 0.5) +
121+
stat_smooth(method = "lm")
122+
123+
figure <- ggarrange(p1,p2,p3,nrow = 1)
124+
annotate_figure(figure,
125+
top = text_grob("Strategy Score with Seed funding, Log Seed funding, \n and Log Seed funding (drop zeros)", color = "black", face = "bold", size = 14))
126+
```
127+
128+
## Logistic Regression
129+
130+
```{r has_seed}
131+
gr.reg4_1 = glm(has_seed ~ Strat_score, data = data)
132+
gr.reg4_2 = glm(has_ipo ~ Strat_score, data = data)
133+
gr.reg4_3 = glm(has_tradesale ~ Strat_score, data = data)
134+
```
135+
136+
```{r output4, results = "asis", message=FALSE}
137+
stargazer(gr.reg4_1,gr.reg4_2, gr.reg4_3,
138+
title="Logistic Regression: Has Seed, Has IPO, and Has Trade sales", align=TRUE,type = 'html')
139+
```
140+
141+
```{r logistic plotting, message=FALSE}
142+
p4 <- ggplot(data, aes(x = Strat_score, y = has_seed)) +
143+
geom_point(alpha = 0.5) +
144+
stat_smooth(method = "glm")
145+
p5 <- ggplot(data, aes(x = Strat_score, y = has_ipo)) +
146+
geom_point(alpha = 0.5) +
147+
stat_smooth(method = "glm")
148+
p6 <- ggplot(data, aes(x = Strat_score, y = has_tradesale)) +
149+
geom_point(alpha = 0.5) +
150+
stat_smooth(method = "glm")
151+
152+
figure <- ggarrange(p4,p5,p6,nrow = 1)
153+
annotate_figure(figure,
154+
top = text_grob("Strategy Score with Has Seed Funding,\n Has IPO, and Has Trade Sales",
155+
color = "black", face = "bold", size = 14))
156+
```
157+
158+
## IPO value
159+
160+
```{r log_ipo}
161+
gr.reg5_1 = felm(log_ipo ~ Strat_score|0|0|0, data=data)
162+
gr.reg5_2 = felm(log_ipo ~ Strat_score|score_year|0|score_year, data=data)
163+
gr.reg5_3 = felm(log_ipo ~ Strat_score|score_year+portfoliocompanycity|0|
164+
score_year+portfoliocompanycity, data=data)
165+
gr.reg5_4 = felm(log_ipo ~ Strat_score|score_year+portfoliocompanycity
166+
+industryclassification|0|score_year+portfoliocompanycity+
167+
industryclassification, data=data)
168+
```
169+
170+
```{r output5, results = "asis", message=FALSE}
171+
stargazer(gr.reg5_1,gr.reg5_2,gr.reg5_3,gr.reg5_4,
172+
column.labels=c("OLS","OLS","OLS","OLS"),
173+
add.lines=list(
174+
c("Score Year F.E.","No","Yes","Yes","Yes"),
175+
c("City F.E.","No","No","Yes","Yes"),
176+
c("Industry F.E.","No","No","No","Yes")
177+
),
178+
keep=c("Strat_score"),
179+
title="Linear Regression: Log(IPO value + 1)",
180+
align=TRUE,type = 'html')
181+
```
182+
183+
```{r log_ipo_zero, message=FALSE, warning=FALSE}
184+
gr.reg6_1 = felm(log_ipo_zero ~ Strat_score|0|0|0, data=data)
185+
gr.reg6_2 = felm(log_ipo_zero ~ Strat_score|score_year|0|score_year, data=data)
186+
gr.reg6_3 = felm(log_ipo_zero ~ Strat_score|score_year+portfoliocompanycity|0|
187+
score_year+portfoliocompanycity, data=data)
188+
gr.reg6_4 = felm(log_ipo_zero ~ Strat_score|score_year+portfoliocompanycity
189+
+industryclassification|0|score_year+portfoliocompanycity+
190+
industryclassification, data=data)
191+
```
192+
193+
```{r output6, results = "asis", message=FALSE}
194+
stargazer(gr.reg6_1,gr.reg6_2,gr.reg6_3,gr.reg6_4,
195+
column.labels=c("OLS","OLS","OLS","OLS"),
196+
add.lines=list(
197+
c("Score Year F.E.","No","Yes","Yes","Yes"),
198+
c("City F.E.","No","No","Yes","Yes"),
199+
c("Industry F.E.","No","No","No","Yes")
200+
),
201+
keep=c("Strat_score"),
202+
title="Linear Regression: Log IPO value (drop zeros)",
203+
align=TRUE,type = 'html')
204+
```
205+
206+
## First Rounds Funding
207+
208+
```{r first rounds funding}
209+
gr.reg7_1 = felm(firstrounds_founding ~ Strat_score|0|0|0, data=data)
210+
gr.reg7_2 = felm(firstrounds_founding ~ Strat_score|score_year|0|score_year, data=data)
211+
gr.reg7_3 = felm(firstrounds_founding ~ Strat_score|score_year+portfoliocompanycity|0|
212+
score_year+portfoliocompanycity, data=data)
213+
gr.reg7_4 = felm(firstrounds_founding ~ Strat_score|score_year+portfoliocompanycity
214+
+industryclassification|0|score_year+portfoliocompanycity+
215+
industryclassification, data=data)
216+
```
217+
218+
```{r output7, results = "asis", message=FALSE}
219+
stargazer(gr.reg7_1,gr.reg7_2,gr.reg7_3,gr.reg7_4,
220+
column.labels=c("OLS","OLS","OLS","OLS"),
221+
add.lines=list(
222+
c("Score Year F.E.","No","Yes","Yes","Yes"),
223+
c("City F.E.","No","No","Yes","Yes"),
224+
c("Industry F.E.","No","No","No","Yes")
225+
),
226+
keep=c("Strat_score"),
227+
title="Linear Regression: First Rounds of funding",
228+
align=TRUE,type = 'html')
229+
```
230+
```{r log first funding}
231+
gr.reg8_1 = felm(log_first_rounds ~ Strat_score|0|0|0, data=data)
232+
gr.reg8_2 = felm(log_first_rounds ~ Strat_score|score_year|0|score_year, data=data)
233+
gr.reg8_3 = felm(log_first_rounds ~ Strat_score|score_year+portfoliocompanycity|0|
234+
score_year+portfoliocompanycity, data=data)
235+
gr.reg8_4 = felm(log_first_rounds ~ Strat_score|score_year+portfoliocompanycity
236+
+industryclassification|0|score_year+portfoliocompanycity+
237+
industryclassification, data=data)
238+
```
239+
240+
```{r output8, results = "asis", message=FALSE}
241+
stargazer(gr.reg8_1,gr.reg8_2,gr.reg8_3,gr.reg8_4,
242+
column.labels=c("OLS","OLS","OLS","OLS"),
243+
add.lines=list(
244+
c("Score Year F.E.","No","Yes","Yes","Yes"),
245+
c("City F.E.","No","No","Yes","Yes"),
246+
c("Industry F.E.","No","No","No","Yes")
247+
),
248+
keep=c("Strat_score"),
249+
title="Linear Regression: Log (First Rounds of funding + 1)",
250+
align=TRUE,type = 'html')
251+
```

Notebooks/Prediction/Performance-Prediction.html

Lines changed: 2570 additions & 0 deletions
Large diffs are not rendered by default.

Notebooks/Prediction/linear_regression_analysis.rmd

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@ stargazer(gr.reg4, gr.reg5, gr.reg6,
260260
```{r logistic regression}
261261
gr.reg8 = glm(has_ipo ~ Strat_score, data = All_tfidf)
262262
#gr.reg8 = glm(has_ipo ~ Strat_score + as.factor(score_year), data = All_tfidf)
263+
library(pROC)
264+
resLroc <- roc(gr.reg8)
263265
```
264266

265267
```{r output3, results = "asis", message=FALSE}
@@ -270,4 +272,15 @@ stargazer(gr.reg8,
270272
#### next step
271273

272274
- normalize/standardize dependent variable
273-
- add industry into consideration
275+
- add industry into consideration
276+
277+
278+
```{r logistic regression2}
279+
gr.reg7 = glm(has_ipo ~ Strat_score + as.factor(score_year) + as.factor(portfoliocompanycity), data = All_tfidf)
280+
#gr.reg8 = glm(has_ipo ~ Strat_score + as.factor(score_year), data = All_tfidf)
281+
```
282+
283+
```{r output4, results = "asis", message=FALSE}
284+
stargazer(gr.reg7,
285+
title="Logistic Regression: Has IPO", align=TRUE,type = 'html')
286+
```

0 commit comments

Comments
 (0)