summaryrefslogtreecommitdiff
path: root/Assignment 2/main.py
diff options
context:
space:
mode:
authorHunter <h.sterk@students.uu.nl>2020-10-23 13:27:30 +0200
committerHunter <h.sterk@students.uu.nl>2020-10-23 13:27:30 +0200
commitcee6b4d208b207345f82cc58b614a91071443fa3 (patch)
tree8138a513ac195787bd8cbcf022f75e1b7d08bc70 /Assignment 2/main.py
parent03141cdce5ac2971311ab0d247c425cafee09ff1 (diff)
Preprocessing van de reviews weggehaald
Diffstat (limited to 'Assignment 2/main.py')
-rw-r--r--Assignment 2/main.py5
1 files changed, 0 insertions, 5 deletions
diff --git a/Assignment 2/main.py b/Assignment 2/main.py
index 6e06004..8a90d02 100644
--- a/Assignment 2/main.py
+++ b/Assignment 2/main.py
@@ -2,7 +2,6 @@ import fnmatch
import os
import pandas as pd
import regex as re
-from nltk.corpus import stopwords
def fetch_reviews(testdata):
@@ -40,10 +39,6 @@ def fetch_reviews(testdata):
data = pd.merge(reviews, labels, right_index=True, left_index=True)
# convert reviews to lowercase
data['Review'] = data['Review'].map(lambda x: x.lower())
- # remove stopwords
- stop = stopwords.words('english')
- data['Review without stopwords'] = data['Review'].apply(lambda x: ' '.join(
- [word for word in x.split() if word not in (stop)]))
return data