Count Of Specific Words In Multiple Text Files
I have a multiple text files and I need to find and cound specific words in those files and write them in a csv file. Column A contains the txt file names and in the header the wor
Solution 1:
Edit: As per your new request, I have added the "total_words" column. The code has been updated.
Below is a code that works. Just change the "folderpath" variable to the path of the folder with the text files, and change the "target_file" variable to where you want the output csv file to be created.
Sample csv output:
Code:
from collections import Counter
import glob
import os
import re
header = ['annual', 'investment', 'statement', 'range' , 'deposit' , 'supercalifragilisticexpialidocious']
folderpath = r'C:\Users\USERname4\Desktop\myfolder'
target_file = r'C:\Users\USERname4\Desktop\mycsv.csv'
queueWAP = []
defwriteAndPrint(fileObject,toBeWAP,opCode=0):
global queueWAP
if (opCode == 0):
fileObject.write(toBeWAP)
print(toBeWAP)
if (opCode == 1):
queueWAP.append(toBeWAP)
if (opCode == 2):
for temp4 inrange(len(queueWAP)):
fileObject.write(queueWAP[temp4])
print(queueWAP[temp4])
queueWAP = []
mycsvfile = open(target_file, 'w')
writeAndPrint(mycsvfile,"file_name,total_words")
for temp1 in header:
writeAndPrint(mycsvfile,","+temp1)
writeAndPrint(mycsvfile,"\n")
filepaths = glob.glob(folderpath + r"\*.txt")
for file in filepaths:
withopen(file) as f:
writeAndPrint(mycsvfile,file.split("\\")[-1])
counter = Counter()
words = re.findall(r'\w+', f.read().lower())
counter = counter + Counter(words)
for temp2 in header:
temp3 = False
temp5 = 0for myword in counter.items():
temp5 = temp5 + 1if myword[0] == temp2:
writeAndPrint(mycsvfile,","+str(myword[1]),1)
temp3 = Trueif temp3 == False:
writeAndPrint(mycsvfile,","+"0",1)
writeAndPrint(mycsvfile,","+str(temp5))
writeAndPrint(mycsvfile,"",2)
writeAndPrint(mycsvfile,"\n")
mycsvfile.close()
Solution 2:
Using 'Counter' seems to be the right choice here, but I think you are using it wrong.
Here is a possible solution that may work for you:
words = ['Abuse', 'Accommodating', 'Accommodation', 'Accountability']
rows = []
for file in filepaths:
withopen(file, 'r') as f:
words_in_file = [word for line in f for word in line.split()]
# this will count all the words in the file (not optimal)
wordcounts = Counter(words_in_file)
# interested only in specific words
counts = list(map(lambda x: wordcounts[x], words))
# insert first column (filenam)
counts.insert(0, file)
# append it to the rest of the rows
rows.append(counts)
f = open('C:/Users/haris/Downloads/PDF/firstcsv.csv', 'w')
writer = csv.writer(f)
for row in rows:
writer.writerow(row)
Post a Comment for "Count Of Specific Words In Multiple Text Files"