import sys, math, re from operator import itemgetter import math #reading from test file wF = open (r'C:\Craig\Data2\craiglist-file1.txt') wordFile = wF.read() wordList = wordFile.upper() wordList = wordFile.split() #Reading from the keyword file kWF = open('keywords.txt','r') keywordFile = kWF.read() KeywordList = keywordFile.upper() keywordList = keywordFile.split() word = 'k' document = 'wordlist' wordlist = 'w' #total number of times terms occurs in file def countTerm(word, document): return for k in keywordList: countTerm = 0 for w in wordList: if k == w: countTerm = countTerm + 1 if k == "the": print k, w, countTerm print k, countTerm #total number of word in document def wordfreq(wordList): return wordfreq = [wordList.count(p) for p in wordList] dictionary = dict(zip(wordList, wordfreq)) count2 = 0 for t in wordList: count2+=1 print 'total number of words', count2 #total number of file being processed def docfreq(k, wF): count = 0 for wordList in wF: if countTerm(k, w) > 0: count += 1 return count print countTerm #docfreq = [wordList.count(z) for z in wF] #dictionary = dict(zip(wF, docfreq)) #count3 = 1 #for j in wF: #count3 +=1 #print 'total number of document', count3 for n in range(0,len(keywordList)): print keywordList[n] wordfreq[n]/count2 # term frequency # total number of times terms occurs in file / total number of word in document #inverse document frequency
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)