r - Using SVM and mapreduce for text classification -
i'm new hadoop , mapreduce , trying use mapreduce train model using svm classify messages categories.
currently, can on small set of data without mapreduce , rtexttools , e1701 use mapreduce train model larger training data.
below code have used on small dataset.
library("e1071") library("rtexttools") predicttest <- function(test_text, mat, classifier){ train_mat = mat[1:2,] train_mat[,1:ncol(train_mat)] = 0 test_matrix = create_matrix(test_text, language="english",removestopwords=t, removenumbers=t,stemwords=t, tolower=t, removepunctuation=t) test_mat <- as.matrix(test_matrix) for(col in colnames(test_mat)){ if(col %in% colnames(train_mat)) { train_mat[2,col] = test_mat[1,col]; } } #test_mat = as.matrix(t(test_mat)) row.names(train_mat)[1] = "" row.names(train_mat)[2] = test_text p <- predict(classifier, train_mat[1:2,]) as.character(p[2]) } #using smaller set of training data (large memory requirements) training_data1 <- read.csv("train_final_combined.csv", header=t) matrix <- create_matrix(training_data1$text, language="english", removestopwords=t, removenumbers=t,stemwords=t, tolower=t, removepunctuation=t, removesparseterms=0.998) mat = as.matrix(matrix) svm_classifier = svm(mat, as.factor(training_data1$category) ) predicttest("emma stone bringing style", mat, svm_classifier)
sample of training data: https://i.stack.imgur.com/m9pep.png
Comments
Post a Comment