Skip to content

Commit 450d675

Browse files
author
ugik
committed
Rx chat-bot
1 parent 34e0aa9 commit 450d675

12 files changed

+1284
-60
lines changed

Rx_training_data

2.22 KB
Binary file not shown.
5.25 KB
Binary file not shown.

Rxmodel.tflearn.index

887 Bytes
Binary file not shown.

Rxmodel.tflearn.meta

128 KB
Binary file not shown.
Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 40,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"# things we need for NLP\n",
12+
"import nltk\n",
13+
"from nltk.stem.lancaster import LancasterStemmer\n",
14+
"stemmer = LancasterStemmer()\n",
15+
"\n",
16+
"# things we need for Tensorflow\n",
17+
"import numpy as np\n",
18+
"import tflearn\n",
19+
"import tensorflow as tf\n",
20+
"import random"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": 41,
26+
"metadata": {
27+
"collapsed": false
28+
},
29+
"outputs": [],
30+
"source": [
31+
"# import our chat-bot intents file\n",
32+
"import json\n",
33+
"with open('intents_Rx.json') as json_data:\n",
34+
" intents = json.load(json_data)"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": 42,
40+
"metadata": {
41+
"collapsed": false
42+
},
43+
"outputs": [
44+
{
45+
"name": "stdout",
46+
"output_type": "stream",
47+
"text": [
48+
"17 documents\n",
49+
"5 classes ['coupon', 'goodbye', 'greeting', 'med', 'thanks']\n",
50+
"40 unique stemmed words [\"'m\", \"'s\", 'a', 'anyon', 'ar', 'buy', 'bye', 'can', 'cheap', 'cheapest', 'coupon', 'day', 'deal', 'find', 'for', 'good', 'goodby', 'hello', 'help', 'hi', 'how', 'i', 'is', 'lat', 'less', 'look', 'me', 'med', 'money', 'see', 'send', 'thank', 'that', 'the', 'ther', 'to', 'want', 'what', 'wher', 'you']\n"
51+
]
52+
}
53+
],
54+
"source": [
55+
"words = []\n",
56+
"classes = []\n",
57+
"documents = []\n",
58+
"ignore_words = ['?']\n",
59+
"# loop through each sentence in our intents patterns\n",
60+
"for intent in intents['intents']:\n",
61+
" for pattern in intent['patterns']:\n",
62+
" # tokenize each word in the sentence\n",
63+
" w = nltk.word_tokenize(pattern)\n",
64+
" # add to our words list\n",
65+
" words.extend(w)\n",
66+
" # add to documents in our corpus\n",
67+
" documents.append((w, intent['tag']))\n",
68+
" # add to our classes list\n",
69+
" if intent['tag'] not in classes:\n",
70+
" classes.append(intent['tag'])\n",
71+
"\n",
72+
"# stem and lower each word and remove duplicates\n",
73+
"words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]\n",
74+
"words = sorted(list(set(words)))\n",
75+
"\n",
76+
"# remove duplicates\n",
77+
"classes = sorted(list(set(classes)))\n",
78+
"\n",
79+
"print (len(documents), \"documents\")\n",
80+
"print (len(classes), \"classes\", classes)\n",
81+
"print (len(words), \"unique stemmed words\", words)"
82+
]
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": 43,
87+
"metadata": {
88+
"collapsed": false
89+
},
90+
"outputs": [],
91+
"source": [
92+
"# create our training data\n",
93+
"training = []\n",
94+
"output = []\n",
95+
"# create an empty array for our output\n",
96+
"output_empty = [0] * len(classes)\n",
97+
"\n",
98+
"# training set, bag of words for each sentence\n",
99+
"for doc in documents:\n",
100+
" # initialize our bag of words\n",
101+
" bag = []\n",
102+
" # list of tokenized words for the pattern\n",
103+
" pattern_words = doc[0]\n",
104+
" # stem each word\n",
105+
" pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]\n",
106+
" # create our bag of words array\n",
107+
" for w in words:\n",
108+
" bag.append(1) if w in pattern_words else bag.append(0)\n",
109+
"\n",
110+
" # output is a '0' for each tag and '1' for current tag\n",
111+
" output_row = list(output_empty)\n",
112+
" output_row[classes.index(doc[1])] = 1\n",
113+
"\n",
114+
" training.append([bag, output_row])\n",
115+
"\n",
116+
"# shuffle our features and turn into np.array\n",
117+
"random.shuffle(training)\n",
118+
"training = np.array(training)\n",
119+
"\n",
120+
"# create train and test lists\n",
121+
"train_x = list(training[:,0])\n",
122+
"train_y = list(training[:,1])"
123+
]
124+
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": 44,
128+
"metadata": {
129+
"collapsed": false,
130+
"scrolled": true
131+
},
132+
"outputs": [
133+
{
134+
"name": "stdout",
135+
"output_type": "stream",
136+
"text": [
137+
"Training Step: 2999 | total loss: \u001b[1m\u001b[32m0.15651\u001b[0m\u001b[0m | time: 0.006s\n",
138+
"| Adam | epoch: 1000 | loss: 0.15651 - acc: 0.9794 -- iter: 16/17\n",
139+
"Training Step: 3000 | total loss: \u001b[1m\u001b[32m0.14101\u001b[0m\u001b[0m | time: 0.008s\n",
140+
"| Adam | epoch: 1000 | loss: 0.14101 - acc: 0.9815 -- iter: 17/17\n",
141+
"--\n",
142+
"INFO:tensorflow:/home/gk/gensim/notebooks/Rxmodel.tflearn is not in all_model_checkpoint_paths. Manually adding it.\n"
143+
]
144+
}
145+
],
146+
"source": [
147+
"# reset underlying graph data\n",
148+
"tf.reset_default_graph()\n",
149+
"# Build neural network\n",
150+
"net = tflearn.input_data(shape=[None, len(train_x[0])])\n",
151+
"net = tflearn.fully_connected(net, 8)\n",
152+
"net = tflearn.fully_connected(net, 8)\n",
153+
"net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')\n",
154+
"net = tflearn.regression(net)\n",
155+
"\n",
156+
"# Define model and setup tensorboard\n",
157+
"model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')\n",
158+
"# Start training (apply gradient descent algorithm)\n",
159+
"model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)\n",
160+
"model.save('Rxmodel.tflearn')"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": 45,
166+
"metadata": {
167+
"collapsed": true
168+
},
169+
"outputs": [],
170+
"source": [
171+
"def clean_up_sentence(sentence):\n",
172+
" # tokenize the pattern\n",
173+
" sentence_words = nltk.word_tokenize(sentence)\n",
174+
" # stem each word\n",
175+
" sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]\n",
176+
" return sentence_words\n",
177+
"\n",
178+
"# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence\n",
179+
"def bow(sentence, words, show_details=False):\n",
180+
" # tokenize the pattern\n",
181+
" sentence_words = clean_up_sentence(sentence)\n",
182+
" # bag of words\n",
183+
" bag = [0]*len(words) \n",
184+
" for s in sentence_words:\n",
185+
" for i,w in enumerate(words):\n",
186+
" if w == s: \n",
187+
" bag[i] = 1\n",
188+
" if show_details:\n",
189+
" print (\"found in bag: %s\" % w)\n",
190+
"\n",
191+
" return(np.array(bag))"
192+
]
193+
},
194+
{
195+
"cell_type": "code",
196+
"execution_count": 46,
197+
"metadata": {
198+
"collapsed": false
199+
},
200+
"outputs": [
201+
{
202+
"name": "stdout",
203+
"output_type": "stream",
204+
"text": [
205+
"[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
206+
" 0 0 0]\n",
207+
"['coupon', 'goodbye', 'greeting', 'med', 'thanks']\n"
208+
]
209+
}
210+
],
211+
"source": [
212+
"p = bow(\"hello\", words)\n",
213+
"print (p)\n",
214+
"print (classes)"
215+
]
216+
},
217+
{
218+
"cell_type": "code",
219+
"execution_count": 47,
220+
"metadata": {
221+
"collapsed": false
222+
},
223+
"outputs": [
224+
{
225+
"name": "stdout",
226+
"output_type": "stream",
227+
"text": [
228+
"[[4.3407872851730644e-09, 0.009914605878293514, 0.9880092740058899, 0.0020757599268108606, 3.3042027780538774e-07]]\n"
229+
]
230+
}
231+
],
232+
"source": [
233+
"print(model.predict([p]))"
234+
]
235+
},
236+
{
237+
"cell_type": "code",
238+
"execution_count": 48,
239+
"metadata": {
240+
"collapsed": false
241+
},
242+
"outputs": [],
243+
"source": [
244+
"# save all of our data structures\n",
245+
"import pickle\n",
246+
"pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( \"Rx_training_data\", \"wb\" ) )"
247+
]
248+
}
249+
],
250+
"metadata": {
251+
"kernelspec": {
252+
"display_name": "Python 3",
253+
"language": "python",
254+
"name": "python3"
255+
},
256+
"language_info": {
257+
"codemirror_mode": {
258+
"name": "ipython",
259+
"version": 3
260+
},
261+
"file_extension": ".py",
262+
"mimetype": "text/x-python",
263+
"name": "python",
264+
"nbconvert_exporter": "python",
265+
"pygments_lexer": "ipython3",
266+
"version": "3.5.2"
267+
}
268+
},
269+
"nbformat": 4,
270+
"nbformat_minor": 1
271+
}

0 commit comments

Comments
 (0)