From cee6b4d208b207345f82cc58b614a91071443fa3 Mon Sep 17 00:00:00 2001 From: Hunter Date: Fri, 23 Oct 2020 13:27:30 +0200 Subject: Preprocessing van de reviews weggehaald --- Assignment 2/main.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Assignment 2/main.py b/Assignment 2/main.py index 6e06004..8a90d02 100644 --- a/Assignment 2/main.py +++ b/Assignment 2/main.py @@ -2,7 +2,6 @@ import fnmatch import os import pandas as pd import regex as re -from nltk.corpus import stopwords def fetch_reviews(testdata): @@ -40,10 +39,6 @@ def fetch_reviews(testdata): data = pd.merge(reviews, labels, right_index=True, left_index=True) # convert reviews to lowercase data['Review'] = data['Review'].map(lambda x: x.lower()) - # remove stopwords - stop = stopwords.words('english') - data['Review without stopwords'] = data['Review'].apply(lambda x: ' '.join( - [word for word in x.split() if word not in (stop)])) return data -- cgit v1.2.3