keras 情感分析的例子

例子1

from numpy import array from keras.preprocessing.text import one_hot from keras.preprocessing.sequence import pad_sequences from keras.models import Sequential from keras.layers import Dense from keras.layers import Flatten from keras.layers.embeddings import Embedding # define documents docs = ['Well done!',         'Good work',         'Great effort',         'nice work',         'Excellent!',         'Weak',         'Poor effort!',         'not good',         'poor work',         'Could have done better.'] # define class labels labels = array([1,1,1,1,1,0,0,0,0,0]) # integer encode the documents vocab_size = 50 encoded_docs = [one_hot(d, vocab_size) for d in docs] print(encoded_docs) # pad documents to a max length of 4 words max_length = 4 padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post') print(padded_docs) # define the model model = Sequential() model.add(Embedding(vocab_size, 8, input_length=max_length)) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) # compile the model model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) # summarize the model print(model.summary()) # fit the model model.fit(padded_docs, labels, epochs=50, verbose=0) # evaluate the model loss, accuracy = model.evaluate(padded_docs, labels, verbose=0) print('Accuracy: %f' % (accuracy*100))

例子2

# -*- coding: utf-8 -*- from keras.layers.core import Activation, Dense from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from keras.models import Sequential from keras.preprocessing import sequence from sklearn.model_selection import train_test_split import collections import nltk nltk.download('punkt') import numpy as np  ## EDA  maxlen = 0 word_freqs = collections.Counter() num_recs = 0 with open('train_data.txt','r+') as f:     for line in f:         label, sentence = line.strip().split("\t")         words = nltk.word_tokenize(sentence.lower())         if len(words) > maxlen:             maxlen = len(words)         for word in words:             word_freqs[word] += 1         num_recs += 1 print('max_len ',maxlen) print('nb_words ', len(word_freqs))  ## 准备数据 MAX_FEATURES = 2000 MAX_SENTENCE_LENGTH = 40 vocab_size = min(MAX_FEATURES, len(word_freqs)) + 2 word2index = {x[0]: i+2 for i, x in enumerate(word_freqs.most_common(MAX_FEATURES))} word2index["PAD"] = 0 word2index["UNK"] = 1 index2word = {v:k for k, v in word2index.items()} X = np.empty(num_recs,dtype=list) y = np.zeros(num_recs) i=0 with open('train_data.txt','r+') as f:     for line in f:         label, sentence = line.strip().split("\t")         words = nltk.word_tokenize(sentence.lower())         seqs = []         for word in words:             if word in word2index:                 seqs.append(word2index[word])             else:                 seqs.append(word2index["UNK"])         X[i] = seqs         y[i] = int(label)         i += 1 X = sequence.pad_sequences(X, maxlen=MAX_SENTENCE_LENGTH) ## 数据划分 Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42) ## 网络构建 EMBEDDING_SIZE = 128 HIDDEN_LAYER_SIZE = 64 BATCH_SIZE = 32 NUM_EPOCHS = 10 model = Sequential() model.add(Embedding(vocab_size, EMBEDDING_SIZE,input_length=MAX_SENTENCE_LENGTH)) model.add(LSTM(HIDDEN_LAYER_SIZE, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1)) model.add(Activation("sigmoid")) model.compile(loss="binary_crossentropy", optimizer="adam",metrics=["accuracy"]) ## 网络训练 model.fit(Xtrain, ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,validation_data=(Xtest, ytest)) ## 预测 score, acc = model.evaluate(Xtest, ytest, batch_size=BATCH_SIZE) print("\nTest score: %.3f, accuracy: %.3f" % (score, acc)) print('{}   {}      {}'.format('预测','真实','句子')) for i in range(5):     idx = np.random.randint(len(Xtest))     xtest = Xtest[idx].reshape(1,40)     ylabel = ytest[idx]      whk =model.predict(xtest)     print whk     ypred = model.predict(xtest)[0][0]     sent = " ".join([index2word[x] for x in xtest[0] if x != 0])     print(' {}      {}     {}'.format(int(round(ypred)), int(ylabel), sent)) ##### 自己输入 INPUT_SENTENCES = ['I love reading.','You are so boring.'] XX = np.empty(len(INPUT_SENTENCES),dtype=list) i=0 for sentence in  INPUT_SENTENCES:     words = nltk.word_tokenize(sentence.lower())     seq = []     for word in words:         if word in word2index:             seq.append(word2index[word])         else:             seq.append(word2index['UNK'])     XX[i] = seq     i+=1  XX = sequence.pad_sequences(XX, maxlen=MAX_SENTENCE_LENGTH) labels = [int(round(x[0])) for x in model.predict(XX) ] label2word = {1:'积极', 0:'消极'} for i in range(len(INPUT_SENTENCES)):     print('{}   {}'.format(label2word[labels[i]], INPUT_SENTENCES[i]))  whk = model.predict(XX) print whk

txt中的内容大致如下

1       The Da Vinci Code book is just awesome. 1       this was the first clive cussler i've ever read, but even books like Relic, and Da Vinci code were more plausible than this. 1       i liked the Da Vinci Code a lot. 1       i liked the Da Vinci Code a lot. 1       I liked the Da Vinci Code but it ultimatly didn't seem to hold it's own. 1       that's not even an exaggeration ) and at midnight we went to Wal-Mart to buy the Da Vinci Code, which is amazing of course. 1       I loved the Da Vinci Code, but now I want something better and different!.. 1       i thought da vinci code was great, same with kite runner. 1       The Da Vinci Code is actually a good movie... 1       I thought the Da Vinci Code was a pretty good book. 1       The Da Vinci Code is one of the most beautiful movies ive ever seen. 1       The Da Vinci Code is an * amazing * book, do not get me wrong. 1       then I turn on the light and the radio and enjoy my Da Vinci Code. 1       The Da Vinci Code was REALLY good. 1       i love da vinci code.... 1       i loved da vinci code.. 1       TO NIGHT:: THE DA VINCI CODE AND A BEAUTIFUL MIND... 1       THE DA VINCI CODE is AN AWESOME BOOK.... 1       Thing is, I enjoyed The Da Vinci Code. 1       very da vinci code slash amazing race.

例子3，此例子是时间序列预测

import numpy import matplotlib.pyplot as plt from pandas import read_csv import math from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error  def create_dataset(dataset, look_back=1):     dataX, dataY = [], []     for i in range(len(dataset)-look_back-1):         a = dataset[i:(i+look_back), 0]         dataX.append(a)         dataY.append(dataset[i + look_back, 0])     return numpy.array(dataX), numpy.array(dataY)  dataframe = read_csv('international-airline-passengers.csv', usecols=[1], engine='python', skipfooter=3) dataset = dataframe.values dataset = dataset.astype('float32')  train_size = int(len(dataset) * 0.67)  print dataset  scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset)  test_size = len(dataset) - train_size train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:] # reshape into X=t and Y=t+1  look_back = 1 trainX, trainY = create_dataset(train, look_back) testX, testY = create_dataset(test, look_back)  trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))  model = Sequential() model.add(LSTM(4, input_shape=(1, look_back))) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)  testPredict = model.predict(testX)  testPredict = scaler.inverse_transform(testPredict)  whk=[[112],[390],[622]] whk = scaler.fit_transform(whk)  testOne=numpy.reshape(whk, (3, 1, 1)) print testOne testPredict = model.predict(testOne) testPredict = scaler.inverse_transform(testPredict)  print testPredict

其中 international-airline-passengers.csv 的内容较少，大致如下

"Month","International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60" "1949-01",112 "1949-02",118 "1949-03",132 "1949-04",129 "1949-05",121 "1949-06",135 "1949-07",148 "1949-08",148 "1949-09",136 "1949-10",119 "1949-11",104 "1949-12",118 "1950-01",115 "1950-02",126 "1950-03",141 "1950-04",135 "1950-05",125 "1950-06",149 "1950-07",170 "1950-08",170 "1950-09",158 "1950-10",133 "1950-11",114 "1950-12",140 "1951-01",145 "1951-02",150 "1951-03",178 "1951-04",163 "1951-05",172 "1951-06",178

标签

情感分析