|
| 1 | +--- |
| 2 | +title: "performance Prediction" |
| 3 | +author: "Wanzhi" |
| 4 | +date: "12/6/2020" |
| 5 | +output: html_document |
| 6 | +--- |
| 7 | + |
| 8 | +```{r setup, include=FALSE} |
| 9 | +knitr::opts_chunk$set(echo = TRUE) |
| 10 | +``` |
| 11 | + |
| 12 | +```{r message=FALSE} |
| 13 | +library(RMySQL) |
| 14 | +library(aod) |
| 15 | +library(ggplot2) |
| 16 | +library(ggpubr) |
| 17 | +library(pscl) |
| 18 | +library(stargazer) |
| 19 | +library(dplyr) |
| 20 | +library(lfe) |
| 21 | +library(lubridate) |
| 22 | +``` |
| 23 | + |
| 24 | +## Linear Regression |
| 25 | + |
| 26 | +This document shows the linear regression analysis on relationships between dependent variables and strategy scores. Data are only from 2011. |
| 27 | + |
| 28 | +### Load Data |
| 29 | +```{r load data} |
| 30 | +data = read.csv('/Users/liwangzhi/Documents/GitHub/StartupStrategy/data/prediction_df_1206.csv') |
| 31 | +``` |
| 32 | + |
| 33 | +```{r show data} |
| 34 | +head(data, 3) |
| 35 | +``` |
| 36 | + |
| 37 | +```{r data_frame} |
| 38 | +summary(data) |
| 39 | +``` |
| 40 | + |
| 41 | +## Seed Funding |
| 42 | + |
| 43 | +```{r seed funding} |
| 44 | +gr.reg1 = felm(seed_founding ~ Strat_score|0|0|0, data=data) |
| 45 | +gr.reg2 = felm(seed_founding ~ Strat_score|score_year|0|score_year, data=data) |
| 46 | +gr.reg3 = felm(seed_founding ~ Strat_score|score_year+portfoliocompanycity|0| |
| 47 | + score_year+portfoliocompanycity, data=data) |
| 48 | +gr.reg4 = felm(seed_founding ~ Strat_score|score_year+portfoliocompanycity |
| 49 | + +industryclassification|0|score_year+portfoliocompanycity+ |
| 50 | + industryclassification, data=data) |
| 51 | +``` |
| 52 | + |
| 53 | +```{r output, results = "asis", message=FALSE} |
| 54 | +stargazer(gr.reg1,gr.reg2,gr.reg3,gr.reg4, |
| 55 | + column.labels=c("OLS","OLS","OLS","OLS"), |
| 56 | + add.lines=list( |
| 57 | + c("Score Year F.E.","No","Yes","Yes","Yes"), |
| 58 | + c("City F.E.","No","No","Yes","Yes"), |
| 59 | + c("Industry F.E.","No","No","No","Yes") |
| 60 | + ), |
| 61 | + keep=c("Strat_score"), |
| 62 | + title="Linear Regression: Seed funding", |
| 63 | + align=TRUE,type = 'html') |
| 64 | +``` |
| 65 | + |
| 66 | +```{r log_seed} |
| 67 | +gr.reg2_1 = felm(log_seed ~ Strat_score|0|0|0, data=data) |
| 68 | +gr.reg2_2 = felm(log_seed ~ Strat_score|score_year|0|score_year, data=data) |
| 69 | +gr.reg2_3 = felm(log_seed ~ Strat_score|score_year+portfoliocompanycity|0| |
| 70 | + score_year+portfoliocompanycity, data=data) |
| 71 | +gr.reg2_4 = felm(log_seed ~ Strat_score|score_year+portfoliocompanycity |
| 72 | + +industryclassification|0|score_year+portfoliocompanycity+ |
| 73 | + industryclassification, data=data) |
| 74 | +``` |
| 75 | + |
| 76 | +```{r output2, results = "asis", message=FALSE} |
| 77 | +stargazer(gr.reg2_1,gr.reg2_2,gr.reg2_3,gr.reg2_4, |
| 78 | + column.labels=c("OLS","OLS","OLS","OLS"), |
| 79 | + add.lines=list( |
| 80 | + c("Score Year F.E.","No","Yes","Yes","Yes"), |
| 81 | + c("City F.E.","No","No","Yes","Yes"), |
| 82 | + c("Industry F.E.","No","No","No","Yes") |
| 83 | + ), |
| 84 | + keep=c("Strat_score"), |
| 85 | + title="Linear Regression: Log(Seed founding + 1)", |
| 86 | + align=TRUE,type = 'html') |
| 87 | +``` |
| 88 | + |
| 89 | +```{r log_seed_zero} |
| 90 | +gr.reg3_1 = felm(log_seed_zero ~ Strat_score|0|0|0, data=data) |
| 91 | +gr.reg3_2 = felm(log_seed_zero ~ Strat_score|score_year|0|score_year, data=data) |
| 92 | +gr.reg3_3 = felm(log_seed_zero ~ Strat_score|score_year+portfoliocompanycity|0| |
| 93 | + score_year+portfoliocompanycity, data=data) |
| 94 | +gr.reg3_4 = felm(log_seed_zero ~ Strat_score|score_year+portfoliocompanycity |
| 95 | + +industryclassification|0|score_year+portfoliocompanycity+ |
| 96 | + industryclassification, data=data) |
| 97 | +``` |
| 98 | + |
| 99 | +```{r output3, results = "asis", message=FALSE} |
| 100 | +stargazer(gr.reg3_1,gr.reg3_2,gr.reg3_3,gr.reg3_4, |
| 101 | + column.labels=c("OLS","OLS","OLS","OLS"), |
| 102 | + add.lines=list( |
| 103 | + c("Score Year F.E.","No","Yes","Yes","Yes"), |
| 104 | + c("City F.E.","No","No","Yes","Yes"), |
| 105 | + c("Industry F.E.","No","No","No","Yes") |
| 106 | + ), |
| 107 | + keep=c("Strat_score"), |
| 108 | + title="Linear Regression: Log seed founding (drop zeros)", |
| 109 | + align=TRUE,type = 'html') |
| 110 | +``` |
| 111 | + |
| 112 | +```{r log_seed plotting,message=FALSE} |
| 113 | +p1 <- ggplot(data, aes(x = Strat_score, y = seed_founding)) + |
| 114 | + geom_point(alpha = 0.5) + |
| 115 | + stat_smooth(method = "lm") |
| 116 | +p2 <- ggplot(data, aes(x = Strat_score, y = log_seed)) + |
| 117 | + geom_point(alpha = 0.5) + |
| 118 | + stat_smooth(method = "lm") |
| 119 | +p3 <- ggplot(data, aes(x = Strat_score, y = log_seed_zero)) + |
| 120 | + geom_point(alpha = 0.5) + |
| 121 | + stat_smooth(method = "lm") |
| 122 | +
|
| 123 | +figure <- ggarrange(p1,p2,p3,nrow = 1) |
| 124 | +annotate_figure(figure, |
| 125 | + top = text_grob("Strategy Score with Seed funding, Log Seed funding, \n and Log Seed funding (drop zeros)", color = "black", face = "bold", size = 14)) |
| 126 | +``` |
| 127 | + |
| 128 | +## Logistic Regression |
| 129 | + |
| 130 | +```{r has_seed} |
| 131 | +gr.reg4_1 = glm(has_seed ~ Strat_score, data = data) |
| 132 | +gr.reg4_2 = glm(has_ipo ~ Strat_score, data = data) |
| 133 | +gr.reg4_3 = glm(has_tradesale ~ Strat_score, data = data) |
| 134 | +``` |
| 135 | + |
| 136 | +```{r output4, results = "asis", message=FALSE} |
| 137 | +stargazer(gr.reg4_1,gr.reg4_2, gr.reg4_3, |
| 138 | + title="Logistic Regression: Has Seed, Has IPO, and Has Trade sales", align=TRUE,type = 'html') |
| 139 | +``` |
| 140 | + |
| 141 | +```{r logistic plotting, message=FALSE} |
| 142 | +p4 <- ggplot(data, aes(x = Strat_score, y = has_seed)) + |
| 143 | + geom_point(alpha = 0.5) + |
| 144 | + stat_smooth(method = "glm") |
| 145 | +p5 <- ggplot(data, aes(x = Strat_score, y = has_ipo)) + |
| 146 | + geom_point(alpha = 0.5) + |
| 147 | + stat_smooth(method = "glm") |
| 148 | +p6 <- ggplot(data, aes(x = Strat_score, y = has_tradesale)) + |
| 149 | + geom_point(alpha = 0.5) + |
| 150 | + stat_smooth(method = "glm") |
| 151 | +
|
| 152 | +figure <- ggarrange(p4,p5,p6,nrow = 1) |
| 153 | +annotate_figure(figure, |
| 154 | + top = text_grob("Strategy Score with Has Seed Funding,\n Has IPO, and Has Trade Sales", |
| 155 | + color = "black", face = "bold", size = 14)) |
| 156 | +``` |
| 157 | + |
| 158 | +## IPO value |
| 159 | + |
| 160 | +```{r log_ipo} |
| 161 | +gr.reg5_1 = felm(log_ipo ~ Strat_score|0|0|0, data=data) |
| 162 | +gr.reg5_2 = felm(log_ipo ~ Strat_score|score_year|0|score_year, data=data) |
| 163 | +gr.reg5_3 = felm(log_ipo ~ Strat_score|score_year+portfoliocompanycity|0| |
| 164 | + score_year+portfoliocompanycity, data=data) |
| 165 | +gr.reg5_4 = felm(log_ipo ~ Strat_score|score_year+portfoliocompanycity |
| 166 | + +industryclassification|0|score_year+portfoliocompanycity+ |
| 167 | + industryclassification, data=data) |
| 168 | +``` |
| 169 | + |
| 170 | +```{r output5, results = "asis", message=FALSE} |
| 171 | +stargazer(gr.reg5_1,gr.reg5_2,gr.reg5_3,gr.reg5_4, |
| 172 | + column.labels=c("OLS","OLS","OLS","OLS"), |
| 173 | + add.lines=list( |
| 174 | + c("Score Year F.E.","No","Yes","Yes","Yes"), |
| 175 | + c("City F.E.","No","No","Yes","Yes"), |
| 176 | + c("Industry F.E.","No","No","No","Yes") |
| 177 | + ), |
| 178 | + keep=c("Strat_score"), |
| 179 | + title="Linear Regression: Log(IPO value + 1)", |
| 180 | + align=TRUE,type = 'html') |
| 181 | +``` |
| 182 | + |
| 183 | +```{r log_ipo_zero, message=FALSE, warning=FALSE} |
| 184 | +gr.reg6_1 = felm(log_ipo_zero ~ Strat_score|0|0|0, data=data) |
| 185 | +gr.reg6_2 = felm(log_ipo_zero ~ Strat_score|score_year|0|score_year, data=data) |
| 186 | +gr.reg6_3 = felm(log_ipo_zero ~ Strat_score|score_year+portfoliocompanycity|0| |
| 187 | + score_year+portfoliocompanycity, data=data) |
| 188 | +gr.reg6_4 = felm(log_ipo_zero ~ Strat_score|score_year+portfoliocompanycity |
| 189 | + +industryclassification|0|score_year+portfoliocompanycity+ |
| 190 | + industryclassification, data=data) |
| 191 | +``` |
| 192 | + |
| 193 | +```{r output6, results = "asis", message=FALSE} |
| 194 | +stargazer(gr.reg6_1,gr.reg6_2,gr.reg6_3,gr.reg6_4, |
| 195 | + column.labels=c("OLS","OLS","OLS","OLS"), |
| 196 | + add.lines=list( |
| 197 | + c("Score Year F.E.","No","Yes","Yes","Yes"), |
| 198 | + c("City F.E.","No","No","Yes","Yes"), |
| 199 | + c("Industry F.E.","No","No","No","Yes") |
| 200 | + ), |
| 201 | + keep=c("Strat_score"), |
| 202 | + title="Linear Regression: Log IPO value (drop zeros)", |
| 203 | + align=TRUE,type = 'html') |
| 204 | +``` |
| 205 | + |
| 206 | +## First Rounds Funding |
| 207 | + |
| 208 | +```{r first rounds funding} |
| 209 | +gr.reg7_1 = felm(firstrounds_founding ~ Strat_score|0|0|0, data=data) |
| 210 | +gr.reg7_2 = felm(firstrounds_founding ~ Strat_score|score_year|0|score_year, data=data) |
| 211 | +gr.reg7_3 = felm(firstrounds_founding ~ Strat_score|score_year+portfoliocompanycity|0| |
| 212 | + score_year+portfoliocompanycity, data=data) |
| 213 | +gr.reg7_4 = felm(firstrounds_founding ~ Strat_score|score_year+portfoliocompanycity |
| 214 | + +industryclassification|0|score_year+portfoliocompanycity+ |
| 215 | + industryclassification, data=data) |
| 216 | +``` |
| 217 | + |
| 218 | +```{r output7, results = "asis", message=FALSE} |
| 219 | +stargazer(gr.reg7_1,gr.reg7_2,gr.reg7_3,gr.reg7_4, |
| 220 | + column.labels=c("OLS","OLS","OLS","OLS"), |
| 221 | + add.lines=list( |
| 222 | + c("Score Year F.E.","No","Yes","Yes","Yes"), |
| 223 | + c("City F.E.","No","No","Yes","Yes"), |
| 224 | + c("Industry F.E.","No","No","No","Yes") |
| 225 | + ), |
| 226 | + keep=c("Strat_score"), |
| 227 | + title="Linear Regression: First Rounds of funding", |
| 228 | + align=TRUE,type = 'html') |
| 229 | +``` |
| 230 | +```{r log first funding} |
| 231 | +gr.reg8_1 = felm(log_first_rounds ~ Strat_score|0|0|0, data=data) |
| 232 | +gr.reg8_2 = felm(log_first_rounds ~ Strat_score|score_year|0|score_year, data=data) |
| 233 | +gr.reg8_3 = felm(log_first_rounds ~ Strat_score|score_year+portfoliocompanycity|0| |
| 234 | + score_year+portfoliocompanycity, data=data) |
| 235 | +gr.reg8_4 = felm(log_first_rounds ~ Strat_score|score_year+portfoliocompanycity |
| 236 | + +industryclassification|0|score_year+portfoliocompanycity+ |
| 237 | + industryclassification, data=data) |
| 238 | +``` |
| 239 | + |
| 240 | +```{r output8, results = "asis", message=FALSE} |
| 241 | +stargazer(gr.reg8_1,gr.reg8_2,gr.reg8_3,gr.reg8_4, |
| 242 | + column.labels=c("OLS","OLS","OLS","OLS"), |
| 243 | + add.lines=list( |
| 244 | + c("Score Year F.E.","No","Yes","Yes","Yes"), |
| 245 | + c("City F.E.","No","No","Yes","Yes"), |
| 246 | + c("Industry F.E.","No","No","No","Yes") |
| 247 | + ), |
| 248 | + keep=c("Strat_score"), |
| 249 | + title="Linear Regression: Log (First Rounds of funding + 1)", |
| 250 | + align=TRUE,type = 'html') |
| 251 | +``` |
0 commit comments