Skip to content

Commit fd1934b

Browse files
committed
Add data pipeline and models for IMDB, Reuters, and Boston Housing
1 parent 38a3779 commit fd1934b

File tree

1 file changed

+146
-0
lines changed

1 file changed

+146
-0
lines changed

Chapter04/chapter_04.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
from tensorflow import keras
4+
import matplotlib.pyplot as plt
5+
from tensorflow.keras import layers
6+
from tensorflow.keras.datasets import imdb, reuters, boston_housing
7+
8+
9+
class DataLoader:
10+
@staticmethod
11+
def load_imdb():
12+
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
13+
return train_data, train_labels, test_data, test_labels
14+
15+
@staticmethod
16+
def load_reuters():
17+
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)
18+
return train_data, train_labels, test_data, test_labels
19+
20+
@staticmethod
21+
def load_boston_housing():
22+
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
23+
return train_data, train_targets, test_data, test_targets
24+
25+
class Preprocessor:
26+
@staticmethod
27+
def vectorize_sequences(sequences, dimension=10000):
28+
results = np.zeros((len(sequences), dimension))
29+
for i, sequence in enumerate(sequences):
30+
results[i, sequence] = 1.0
31+
return results
32+
33+
@staticmethod
34+
def to_one_hot(labels, dimension=46):
35+
return keras.utils.to_categorical(labels, num_classes=dimension)
36+
37+
@staticmethod
38+
def normalize_data(train_data, test_data):
39+
mean = train_data.mean(axis=0)
40+
std = train_data.std(axis=0)
41+
train_data = (train_data - mean) / std
42+
test_data = (test_data - mean) / std
43+
return train_data, test_data
44+
45+
class TextModel:
46+
def __init__(self, output_dim, loss_function):
47+
self.model = keras.Sequential([
48+
layers.Dense(64, activation="relu"),
49+
layers.Dense(64, activation="relu"),
50+
layers.Dense(output_dim, activation="softmax" if output_dim > 1 else "sigmoid")
51+
])
52+
self.model.compile(optimizer="rmsprop", loss=loss_function, metrics=["accuracy"])
53+
54+
def get_model(self):
55+
return self.model
56+
57+
class RegressionModel:
58+
@staticmethod
59+
def build_model():
60+
model = keras.Sequential([
61+
layers.Dense(64, activation="relu"),
62+
layers.Dense(64, activation="relu"),
63+
layers.Dense(1)
64+
])
65+
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
66+
return model
67+
68+
class Trainer:
69+
def __init__(self, model, train_data, train_labels, val_data=None, val_labels=None):
70+
self.model = model
71+
self.train_data = train_data
72+
self.train_labels = train_labels
73+
self.val_data = val_data
74+
self.val_labels = val_labels
75+
76+
def train(self, epochs=20, batch_size=512):
77+
history = self.model.fit(
78+
self.train_data, self.train_labels,
79+
epochs=epochs, batch_size=batch_size,
80+
validation_data=(self.val_data, self.val_labels) if self.val_data is not None else None
81+
)
82+
return history.history
83+
84+
class Evaluator:
85+
@staticmethod
86+
def evaluate(model, test_data, test_labels):
87+
results = model.evaluate(test_data, test_labels)
88+
print(f"Test results: {results}")
89+
return results
90+
91+
@staticmethod
92+
def predict(model, test_data):
93+
predictions = model.predict(test_data)
94+
return predictions
95+
96+
class Plotter:
97+
@staticmethod
98+
def plot_loss(history):
99+
epochs = range(1, len(history["loss"]) + 1)
100+
plt.plot(epochs, history["loss"], "bo", label="Training loss")
101+
plt.plot(epochs, history["val_loss"], "b", label="Validation loss")
102+
plt.title("Training and validation loss")
103+
plt.xlabel("Epochs")
104+
plt.ylabel("Loss")
105+
plt.legend()
106+
plt.show()
107+
108+
@staticmethod
109+
def plot_accuracy(history):
110+
epochs = range(1, len(history["accuracy"]) + 1)
111+
plt.plot(epochs, history["accuracy"], "bo", label="Training accuracy")
112+
plt.plot(epochs, history["val_accuracy"], "b", label="Validation accuracy")
113+
plt.title("Training and validation accuracy")
114+
plt.xlabel("Epochs")
115+
plt.ylabel("Accuracy")
116+
plt.legend()
117+
plt.show()
118+
119+
train_data, train_labels, test_data, test_labels = DataLoader.load_imdb()
120+
x_train = Preprocessor.vectorize_sequences(train_data)
121+
x_test = Preprocessor.vectorize_sequences(test_data)
122+
y_train = np.asarray(train_labels).astype("float32")
123+
y_test = np.asarray(test_labels).astype("float32")
124+
125+
imdb_model = TextModel(output_dim=1, loss_function="binary_crossentropy").get_model()
126+
trainer = Trainer(imdb_model, x_train[10000:], y_train[10000:], x_train[:10000], y_train[:10000])
127+
history = trainer.train(epochs=4)
128+
Evaluator.evaluate(imdb_model, x_test, y_test)
129+
Plotter.plot_loss(history)
130+
Plotter.plot_accuracy(history)
131+
132+
train_data, train_labels, test_data, test_labels = DataLoader.load_reuters()
133+
x_train = Preprocessor.vectorize_sequences(train_data)
134+
x_test = Preprocessor.vectorize_sequences(test_data)
135+
y_train = Preprocessor.to_one_hot(train_labels)
136+
y_test = Preprocessor.to_one_hot(test_labels)
137+
138+
reuters_model = TextModel(output_dim=46, loss_function="categorical_crossentropy").get_model()
139+
trainer = Trainer(reuters_model, x_train[1000:], y_train[1000:], x_train[:1000], y_train[:1000])
140+
history = trainer.train(epochs=9)
141+
Evaluator.evaluate(reuters_model, x_test, y_test)
142+
Plotter.plot_loss(history)
143+
Plotter.plot_accuracy(history)
144+
145+
train_data, train_targets, test_data, test_targets = DataLoader.load_boston_housing()
146+
train_data, test_data = Preprocessor.normalize_data(train_data, test_data)

0 commit comments

Comments
 (0)