Skip to content

Commit 2707576

Browse files
authored
Add files via upload
1 parent e81cf68 commit 2707576

File tree

8 files changed

+1365
-0
lines changed

8 files changed

+1365
-0
lines changed

Logistic_Project1/Logistic.py

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,312 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sat Jul 21 21:10:57 2018
4+
5+
@author: wzy
6+
"""
7+
from matplotlib.font_manager import FontProperties
8+
import matplotlib.pyplot as plt
9+
import numpy as np
10+
import random
11+
12+
"""
13+
函数说明:梯度上升算法测试函数
14+
求函数f(x) = -x^2+4x的极大值
15+
16+
Parameters:
17+
None
18+
19+
Returns:
20+
None
21+
22+
Modify:
23+
2018-07-22
24+
"""
25+
def Gradient_Ascent_test():
26+
# f(x)的导数
27+
def f_prime(x_old):
28+
return -2 * x_old + 4
29+
# 初始值,给一个小于x_new的值
30+
x_old = -1
31+
# 梯度上升算法初始值,即从(0, 0)开始
32+
x_new = 0
33+
# 步长,也就是学习速率,控制更新的幅度
34+
alpha = 0.01
35+
# 精度,也就是更新阈值
36+
presision = 0.00000001
37+
while abs(x_new - x_old) > presision:
38+
x_old = x_new
39+
# 利用上面的公式
40+
x_new = x_old + alpha * f_prime(x_old)
41+
# 打印最终求解的极值近似值
42+
print(x_new)
43+
44+
45+
"""
46+
函数说明:加载数据
47+
48+
Parameters:
49+
None
50+
51+
Returns:
52+
dataMat - 数据列表
53+
labelMat - 标签列表
54+
55+
Modify:
56+
2018-07-22
57+
"""
58+
def loadDataSet():
59+
# 创建数据列表
60+
dataMat = []
61+
# 创建标签列表
62+
labelMat = []
63+
# 打开文件
64+
fr = open('testSet.txt')
65+
# 逐行读取
66+
for line in fr.readlines():
67+
# 去掉每行两边的空白字符,并以空格分隔每行数据元素
68+
lineArr = line.strip().split()
69+
# 添加数据
70+
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
71+
# 添加标签
72+
labelMat.append(int(lineArr[2]))
73+
# 关闭文件
74+
fr.close()
75+
# 返回
76+
return dataMat, labelMat
77+
78+
79+
"""
80+
函数说明:绘制数据集
81+
82+
Parameters:
83+
weights - 权重参数数组
84+
85+
Returns:
86+
None
87+
88+
Modify:
89+
2018-07-22
90+
"""
91+
def plotBestFit(weights):
92+
# 加载数据集
93+
dataMat, labelMat = loadDataSet()
94+
# 转换成numpy的array数组
95+
dataArr = np.array(dataMat)
96+
# 数据个数
97+
# 例如建立一个4*2的矩阵c,c.shape[1]为第一维的长度2, c.shape[0]为第二维的长度4
98+
n = np.shape(dataMat)[0]
99+
# 正样本
100+
xcord1 = []
101+
ycord1 = []
102+
# 负样本
103+
xcord2 = []
104+
ycord2 = []
105+
# 根据数据集标签进行分类
106+
for i in range(n):
107+
if int(labelMat[i]) == 1:
108+
# 1为正样本
109+
xcord1.append(dataArr[i, 1])
110+
ycord1.append(dataArr[i, 2])
111+
else:
112+
# 0为负样本
113+
xcord2.append(dataArr[i, 1])
114+
ycord2.append(dataArr[i, 2])
115+
# 新建图框
116+
fig = plt.figure()
117+
# 添加subplot
118+
ax = fig.add_subplot(111)
119+
# 绘制正样本
120+
ax.scatter(xcord1, ycord1, s=20, c='red', marker='s', alpha=.5)
121+
# 绘制负样本
122+
ax.scatter(xcord2, ycord2, s=20, c='green', alpha=.5)
123+
# x轴坐标
124+
x = np.arange(-3.0, 3.0, 0.1)
125+
# w0*x0 + w1*x1 * w2*x2 = 0
126+
# x0 = 1, x1 = x, x2 = y
127+
y = (-weights[0] - weights[1] * x) / weights[2]
128+
ax.plot(x, y)
129+
# 绘制title
130+
plt.title('BestFit')
131+
# 绘制label
132+
plt.xlabel('x1')
133+
plt.ylabel('y2')
134+
# 显示
135+
plt.show()
136+
137+
138+
"""
139+
函数说明:sigmoid函数
140+
141+
Parameters:
142+
inX - 数据
143+
144+
Returns:
145+
sigmoid函数
146+
147+
Modify:
148+
2018-07-22
149+
"""
150+
def sigmoid(inX):
151+
return 1.0 / (1 + np.exp(-inX))
152+
153+
154+
"""
155+
函数说明:梯度上升法
156+
157+
Parameters:
158+
dataMath - 数据集
159+
classLabels - 数据标签
160+
161+
Returns:
162+
weights.getA() - 求得的权重数组(最优参数)
163+
weights_array - 每次更新的回归系数
164+
165+
Modify:
166+
2018-07-22
167+
"""
168+
def gradAscent(dataMath, classLabels):
169+
# 转换成numpy的mat(矩阵)
170+
dataMatrix = np.mat(dataMath)
171+
# 转换成numpy的mat(矩阵)并进行转置
172+
labelMat = np.mat(classLabels).transpose()
173+
# 返回dataMatrix的大小,m为行数,n为列数
174+
m, n = np.shape(dataMatrix)
175+
# 移动步长,也就是学习效率,控制更新的幅度
176+
alpha = 0.01
177+
# 最大迭代次数
178+
maxCycles = 500
179+
weights = np.ones((n, 1))
180+
weights_array = np.array([])
181+
for k in range(maxCycles):
182+
# 梯度上升矢量化公式
183+
h = sigmoid(dataMatrix * weights)
184+
error = labelMat - h
185+
weights = weights + alpha * dataMatrix.transpose() * error
186+
# numpy.append(arr, values, axis=None):就是arr和values会重新组合成一个新的数组,做为返回值。
187+
# 当axis无定义时,是横向加成,返回总是为一维数组
188+
weights_array = np.append(weights_array, weights)
189+
weights_array = weights_array.reshape(maxCycles, n)
190+
# 将矩阵转换为数组,返回权重数组
191+
# mat.getA()将自身矩阵变量转化为ndarray类型变量
192+
return weights.getA(), weights_array
193+
194+
195+
"""
196+
函数说明:改进的随机梯度上升法
197+
198+
Parameters:
199+
dataMatrix - 数据数组
200+
classLabels - 数据标签
201+
numIter - 迭代次数
202+
203+
Returns:
204+
weights - 求得的回归系数数组(最优参数)
205+
weights_array - 每次更新的回归系数
206+
207+
Modify:
208+
2018-07-22
209+
"""
210+
def stocGradAscent1(dataMatrix, classLabels, numIter=150):
211+
# 返回dataMatrix的大小,m为行数,n为列数
212+
m, n = np.shape(dataMatrix)
213+
# 参数初始化
214+
weights = np.ones(n)
215+
weights_array = np.array([])
216+
for j in range(numIter):
217+
dataIndex = list(range(m))
218+
for i in range(m):
219+
# 每次都降低alpha的大小
220+
alpha = 4/(1.0+j+i)+0.01
221+
# 随机选择样本
222+
randIndex = int(random.uniform(0, len(dataIndex)))
223+
# 随机选择一个样本计算h
224+
h = sigmoid(sum(dataMatrix[randIndex] * weights))
225+
# 计算误差
226+
error = classLabels[randIndex] - h
227+
# 更新回归系数
228+
weights = weights + alpha * error * dataMatrix[randIndex]
229+
# 添加返回系数到数组中当axis为0时,数组是加在下面(列数要相同)
230+
weights_array = np.append(weights_array, weights, axis=0)
231+
# 删除已使用的样本
232+
del(dataIndex[randIndex])
233+
# 改变维度
234+
weights_array = weights_array.reshape(numIter*m, n)
235+
# 返回
236+
return weights, weights_array
237+
238+
239+
"""
240+
函数说明:绘制回归系数与迭代次数的关系
241+
242+
Parameters:
243+
weights_array1 - 回归系数数组1
244+
weights_array2 - 回归系数数组2
245+
246+
Returns:
247+
None
248+
249+
Modify:
250+
2018-07-22
251+
"""
252+
def plotWeights(weights_array1, weights_array2):
253+
# 设置汉字格式为14号简体字
254+
font = FontProperties(fname=r"C:\Windows\Fonts\simsun.ttc", size=14)
255+
# 将fig画布分隔成1行1列,不共享x轴和y轴,fig画布的大小为(20, 10)
256+
# 当nrows=3,ncols=2时,代表fig画布被分为6个区域,axs[0][0]代表第一行第一个区域
257+
fig, axs = plt.subplots(nrows=3, ncols=2, sharex=False, sharey=False, figsize=(20, 10))
258+
# x1坐标轴的范围
259+
x1 = np.arange(0, len(weights_array1), 1)
260+
# 绘制w0与迭代次数的关系
261+
axs[0][0].plot(x1, weights_array1[:, 0])
262+
axs0_title_text = axs[0][0].set_title(u'改进的梯度上升算法,回归系数与迭代次数关系', FontProperties=font)
263+
axs0_ylabel_text = axs[0][0].set_ylabel(u'w0', FontProperties=font)
264+
plt.setp(axs0_title_text, size=20, weight='bold', color='black')
265+
plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
266+
# 绘制w1与迭代次数的关系
267+
axs[1][0].plot(x1, weights_array1[:, 1])
268+
axs1_ylabel_text = axs[1][0].set_ylabel(u'w1', FontProperties=font)
269+
plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
270+
# 绘制w2与迭代次数的关系
271+
axs[2][0].plot(x1, weights_array1[:, 2])
272+
axs2_title_text = axs[2][0].set_title(u'迭代次数', FontProperties=font)
273+
axs2_ylabel_text = axs[2][0].set_ylabel(u'w2', FontProperties=font)
274+
plt.setp(axs2_title_text, size=20, weight='bold', color='black')
275+
plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')
276+
277+
# x2坐标轴的范围
278+
x2 = np.arange(0, len(weights_array2), 1)
279+
# 绘制w0与迭代次数的关系
280+
axs[0][1].plot(x2, weights_array2[:, 0])
281+
axs0_title_text = axs[0][1].set_title(u'梯度上升算法,回归系数与迭代次数关系', FontProperties=font)
282+
axs0_ylabel_text = axs[0][1].set_ylabel(u'w0', FontProperties=font)
283+
plt.setp(axs0_title_text, size=20, weight='bold', color='black')
284+
plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
285+
# 绘制w1与迭代次数的关系
286+
axs[1][1].plot(x2, weights_array2[:, 1])
287+
axs1_ylabel_text = axs[1][1].set_ylabel(u'w1', FontProperties=font)
288+
plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
289+
# 绘制w2与迭代次数的关系
290+
axs[2][1].plot(x2, weights_array2[:, 2])
291+
axs2_title_text = axs[2][1].set_title(u'迭代次数', FontProperties=font)
292+
axs2_ylabel_text = axs[2][1].set_ylabel(u'w2', FontProperties=font)
293+
plt.setp(axs2_title_text, size=20, weight='bold', color='black')
294+
plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')
295+
296+
plt.show()
297+
298+
299+
if __name__ == '__main__':
300+
# 测试简单梯度上升法
301+
# Gradient_Ascent_test()
302+
# 加载数据集
303+
dataMat, labelMat = loadDataSet()
304+
# 训练权重
305+
weights2, weights_array2 = gradAscent(dataMat, labelMat)
306+
# 新方法训练权重
307+
weights1, weights_array1 = stocGradAscent1(np.array(dataMat), labelMat)
308+
# 绘制数据集中的y和x的散点图
309+
# plotBestFit(weights)
310+
# print(gradAscent(dataMat, labelMat))
311+
plotWeights(weights_array1, weights_array2)
312+

0 commit comments

Comments
 (0)