11import pandas as pd
2- df = pd .read_stata (data_filepath + "individual_characteristics.dta" )
2+ from os import getcwd
3+ import argparse
4+
5+ parser = argparse .ArgumentParser ()
6+ parser .add_argument ('-d' , '--data_filepath' , help = """Select the directory where
7+ is stored the file. Default is the current directory.""" ,
8+ default = getcwd ())
9+
10+ args = parser .parse_args ()
11+ data_filepath = args .data_filepath
12+
13+ df = pd .read_stata (data_filepath + "individual_characteristics.dta" )
314df1 = df [df .village == 1 ]
415df2 = df [df .village == 2 ]
516
617# Enter code here!
718df1 .head ()
819
920
10- sex1 = {df1 .pid [i ] : df1 .resp_gend [i ] for i in range (len (df1 .pid ))}
11- caste1 = {df1 .pid [i ] : df1 .caste [i ] for i in range (len (df1 .pid ))}
12- religion1 = {df1 .pid [i ] : df1 .religion [i ] for i in range (len (df1 .pid ))}
21+ def get_params (dfx , elem , items_range ):
22+ return {dfx .pid [i ]: elem [i ] for i in items_range }
23+
24+ sex1 = get_params (df1 , df1 .resp_gend , range (len (df1 .pid )))
25+ caste1 = get_params (df1 , df1 .caste , range (len (df1 .pid )))
26+ religion1 = (df1 , df1 .religion , range (len (df1 .pid )))
27+
1328# Continue for df2 as well.
1429
1530j = 203
16- sex2 = {df2 .pid [j ] : df2 .resp_gend [j ] for j in range (203 ,406 )}
17- caste2 = {df2 .pid [j ] : df2 .caste [j ] for j in range (203 ,406 ) }
18- religion2 = {df2 .pid [j ] : df2 .religion [j ] for j in range (203 ,406 )}
19-
31+ sex2 = get_params (df2 , df2 .resp_gend , range (203 , 406 ))
32+ caste2 = get_params (df2 , df2 .caste , range (203 , 406 ))
33+ religion2 = (df2 , df2 .religion , range (203 , 406 ))
2034
2135
22- from collections import Counter
2336def chance_homophily (chars ):
2437 # Enter code here!
2538 z = set (chars .values ())
2639 su = 0
2740 for c in z :
28-
29- su = su + pow ((sum (x == c for x in chars .values ())/ len (chars ) * 1.0 ),2 )
30-
41+ su = su + pow ((sum (x == c for x in chars .values ())/ len (chars ) * 1 ), 2 )
3142 return su
3243
44+
3345favorite_colors = {
3446 "ankit" : "red" ,
3547 "xiaoyu" : "blue" ,
@@ -40,7 +52,6 @@ def chance_homophily(chars):
4052print (color_homophily )
4153
4254
43-
4455print ("Village 1 chance of same sex:" , chance_homophily (sex1 ))
4556# Enter your code here.
4657print ("Village 1 chance of same caste:" , chance_homophily (caste1 ))
@@ -51,6 +62,31 @@ def chance_homophily(chars):
5162print ("Village 2 chance of same caste:" , chance_homophily (caste2 ))
5263
5364
65+ def checks_for_homophility (nodes , G , chars , IDs ):
66+
67+ num_ties = 0 ;
68+ num_same_ties = 0 ;
69+ n1 = nodes [0 ]
70+ n2 = nodes [1 ]
71+
72+ if n1 <= n2 :
73+ return num_ties , num_same_ties
74+
75+ # do not double-count edges!
76+ if (IDs [n1 ] in chars and IDs [n2 ] in chars ) == False :
77+ return num_ties , num_same_ties
78+
79+ if G .has_edge (n1 , n2 ) == False :
80+ return num_ties , num_same_ties
81+
82+ # Should `num_ties` be incremented?
83+ # What about `num_same_ties`?
84+ num_ties = 1
85+ if chars [IDs [n1 ]] == chars [IDs [n2 ]]:
86+ num_same_ties = 1
87+
88+ return num_ties , num_same_ties
89+
5490
5591def homophily (G , chars , IDs ):
5692 """
@@ -61,26 +97,25 @@ def homophily(G, chars, IDs):
6197 num_same_ties , num_ties = 0 , 0
6298 for n1 in G .nodes ():
6399 for n2 in G .nodes ():
64- if n1 > n2 : # do not double-count edges!
65- if IDs [n1 ] in chars and IDs [n2 ] in chars :
66- if G .has_edge (n1 , n2 ):
67- # Should `num_ties` be incremented? What about `num_same_ties`?
68- num_ties += 1
69- if chars [IDs [n1 ]] == chars [IDs [n2 ]]:
70100
101+ nodes = [n1 , n2 ]
102+ ties , same_ties = check_for_homophily (nodes , G , chars , IDs )
71103
72- return (num_same_ties / num_ties )
73-
74-
75- print ("Village 1 observed proportion of same sex:" , homophily (G1 , sex1 , pid1 ))
76- print ("Village 1 observed proportion of same caste:" , homophily (G1 , caste1 , pid1 ))
77- print ("Village 1 observed proportion of same religion:" , homophily (G1 , religion1 , pid1 ))
78- # Enter your code here!
79- print ("Village 2 observed proportion of same sex:" , homophily (G2 , sex2 , pid2 ))
80- print ("Village 2 observed proportion of same caste:" , homophily (G2 , caste2 , pid2 ))
81- print ("Village 2 observed proportion of same religion:" , homophily (G1 , religion2 , pid2 ))
82-
104+ num_ties += ties
105+ num_same_ties += same_ties
83106
84107
108+ return (num_same_ties / num_ties )
85109
86110
111+ print ("Village 1 observed proportion of same sex:" , homophily (G1 , sex1 , pid1 ))
112+ print ("Village 1 observed proportion of same caste:" ,
113+ homophily (G1 , caste1 , pid1 ))
114+ print ("Village 1 observed proportion of same religion:" ,
115+ homophily (G1 , religion1 , pid1 ))
116+ # Enter your code here!
117+ print ("Village 2 observed proportion of same sex:" , homophily (G2 , sex2 , pid2 ))
118+ print ("Village 2 observed proportion of same caste:" ,
119+ homophily (G2 , caste2 , pid2 ))
120+ print ("Village 2 observed proportion of same religion:" ,
121+ homophily (G1 , religion2 , pid2 ))
0 commit comments