diff options
| author | Hunter <h.sterk@students.uu.nl> | 2020-10-23 13:27:30 +0200 |
|---|---|---|
| committer | Hunter <h.sterk@students.uu.nl> | 2020-10-23 13:27:30 +0200 |
| commit | cee6b4d208b207345f82cc58b614a91071443fa3 (patch) | |
| tree | 8138a513ac195787bd8cbcf022f75e1b7d08bc70 | |
| parent | 03141cdce5ac2971311ab0d247c425cafee09ff1 (diff) | |
Preprocessing van de reviews weggehaald
| -rw-r--r-- | Assignment 2/main.py | 5 |
1 files changed, 0 insertions, 5 deletions
diff --git a/Assignment 2/main.py b/Assignment 2/main.py index 6e06004..8a90d02 100644 --- a/Assignment 2/main.py +++ b/Assignment 2/main.py @@ -2,7 +2,6 @@ import fnmatch import os import pandas as pd import regex as re -from nltk.corpus import stopwords def fetch_reviews(testdata): @@ -40,10 +39,6 @@ def fetch_reviews(testdata): data = pd.merge(reviews, labels, right_index=True, left_index=True) # convert reviews to lowercase data['Review'] = data['Review'].map(lambda x: x.lower()) - # remove stopwords - stop = stopwords.words('english') - data['Review without stopwords'] = data['Review'].apply(lambda x: ' '.join( - [word for word in x.split() if word not in (stop)])) return data |
