diff options
| author | mike <mike1994vink@gmail.com> | 2021-03-17 17:36:45 +0100 |
|---|---|---|
| committer | mike <mike1994vink@gmail.com> | 2021-03-17 17:36:45 +0100 |
| commit | 6a37e0c93349bc1ade29a0c3858295d897310856 (patch) | |
| tree | e41fa86796db496904b428d9dcbac51172f2c49e /Assignment 2/main.py | |
| parent | 5ab1872bc99282722726c65142a28b87aacaca5c (diff) | |
remove(not relevant)
Diffstat (limited to 'Assignment 2/main.py')
| -rw-r--r-- | Assignment 2/main.py | 49 |
1 files changed, 0 insertions, 49 deletions
diff --git a/Assignment 2/main.py b/Assignment 2/main.py deleted file mode 100644 index 8a90d02..0000000 --- a/Assignment 2/main.py +++ /dev/null @@ -1,49 +0,0 @@ -import fnmatch -import os -import pandas as pd -import regex as re - - -def fetch_reviews(testdata): - path = 'op_spam_v1.4/' - label = [] - - # Fetch all the file paths of the .txt files and append in a list - if testdata: - file_paths = [os.path.join(subdir,f) - for subdir, dirs, files in os.walk(path) - for f in fnmatch.filter(files, '*.txt') if 'fold5' in subdir] - else: - file_paths = [os.path.join(subdir, f) - for subdir, dirs, files in os.walk(path) - for f in fnmatch.filter(files, '*.txt') if 'fold5' not in subdir] - - # Fetch all the labels and append in a list - for path in file_paths: - c = re.search('(trut|deceptiv)\w',path) - label.append(c.group()) - - # Create a dataframe of the label list - labels = pd.DataFrame(label, columns=['Label']) - - # Fetch all the reviews and append in a list - reviews = [] - for path in file_paths: - with open(path) as f_input: - reviews.append(f_input.read()) - - # Create a dataframe of the review list - reviews = pd.DataFrame(reviews, columns=['Review']) - - # Merge the review dataframe and label dataframe - data = pd.merge(reviews, labels, right_index=True, left_index=True) - # convert reviews to lowercase - data['Review'] = data['Review'].map(lambda x: x.lower()) - - return data - - -training_data = fetch_reviews(testdata=False) -print(training_data) -test_data = fetch_reviews(testdata=True) -print(test_data)
\ No newline at end of file |
