Skip to content Skip to sidebar Skip to footer

Count Of Specific Words In Multiple Text Files

I have a multiple text files and I need to find and cound specific words in those files and write them in a csv file. Column A contains the txt file names and in the header the wor

Solution 1:

Edit: As per your new request, I have added the "total_words" column. The code has been updated.

enter image description here


Below is a code that works. Just change the "folderpath" variable to the path of the folder with the text files, and change the "target_file" variable to where you want the output csv file to be created.

Sample csv output:

enter image description here

Code:

from collections import Counter
import glob
import os
import re

header = ['annual', 'investment', 'statement', 'range' , 'deposit' , 'supercalifragilisticexpialidocious']
folderpath = r'C:\Users\USERname4\Desktop\myfolder'
target_file = r'C:\Users\USERname4\Desktop\mycsv.csv'

queueWAP = []
defwriteAndPrint(fileObject,toBeWAP,opCode=0):
    global queueWAP
    if (opCode == 0):
        fileObject.write(toBeWAP)
        print(toBeWAP)
    if (opCode == 1):
        queueWAP.append(toBeWAP)
    if (opCode == 2):
        for temp4 inrange(len(queueWAP)):
            fileObject.write(queueWAP[temp4])
            print(queueWAP[temp4])
        queueWAP = []
mycsvfile = open(target_file, 'w')
writeAndPrint(mycsvfile,"file_name,total_words")
for temp1 in header:
    writeAndPrint(mycsvfile,","+temp1)
writeAndPrint(mycsvfile,"\n")
filepaths = glob.glob(folderpath + r"\*.txt")
for file in filepaths:
    withopen(file) as f:
        writeAndPrint(mycsvfile,file.split("\\")[-1])
        counter = Counter()
        words = re.findall(r'\w+', f.read().lower())
        counter = counter + Counter(words)
        for temp2 in header:
            temp3 = False
            temp5 = 0for myword in counter.items():
                temp5 = temp5 + 1if myword[0] == temp2:
                    writeAndPrint(mycsvfile,","+str(myword[1]),1)
                    temp3 = Trueif temp3 == False:
                writeAndPrint(mycsvfile,","+"0",1)
        writeAndPrint(mycsvfile,","+str(temp5))
        writeAndPrint(mycsvfile,"",2)
        writeAndPrint(mycsvfile,"\n")
mycsvfile.close()

Solution 2:

Using 'Counter' seems to be the right choice here, but I think you are using it wrong.

Here is a possible solution that may work for you:

words = ['Abuse', 'Accommodating', 'Accommodation', 'Accountability']

rows = []
for file in filepaths:
  withopen(file, 'r') as f:
    words_in_file = [word for line in f for word in line.split()]
  # this will count all the words in the file (not optimal)
  wordcounts = Counter(words_in_file)
  # interested only in specific words
  counts = list(map(lambda x: wordcounts[x], words))
  # insert first column (filenam)
  counts.insert(0, file)
  # append it to the rest of the rows
  rows.append(counts)

f = open('C:/Users/haris/Downloads/PDF/firstcsv.csv', 'w')
writer = csv.writer(f)
for row in rows:
    writer.writerow(row)

Post a Comment for "Count Of Specific Words In Multiple Text Files"