Click here to Skip to main content
15,880,405 members
Please Sign up or sign in to vote.
1.00/5 (3 votes)
See more:
import sys, math, re
from operator import itemgetter
import math

#reading from test file
wF = open (r'C:\Craig\Data2\craiglist-file1.txt')
wordFile = wF.read()
wordList = wordFile.upper()
wordList = wordFile.split()

#Reading from the keyword file
kWF = open('keywords.txt','r')
keywordFile = kWF.read()
KeywordList = keywordFile.upper()
keywordList = keywordFile.split()

word = 'k'
document = 'wordlist'
wordlist = 'w'


#total number of times terms occurs in file
def countTerm(word, document):
        return
for k in keywordList:
    countTerm = 0
    for w in wordList:
        if k == w:
            countTerm = countTerm + 1
        if k == "the":
           print k, w, countTerm
    print k, countTerm


#total number of word in document
def wordfreq(wordList):
    return
wordfreq = [wordList.count(p) for p in wordList]
dictionary = dict(zip(wordList, wordfreq))
count2 = 0
for t in wordList:
    count2+=1
print 'total number of words', count2


#total number of file being processed
def docfreq(k, wF):
    count = 0
    for wordList in wF:
        if countTerm(k, w) > 0:
            count += 1
      
    return count
print countTerm
#docfreq = [wordList.count(z) for z in wF]
#dictionary = dict(zip(wF, docfreq))
#count3 = 1
#for j in wF:
    #count3 +=1
#print 'total number of document', count3

for n in range(0,len(keywordList)):
    print keywordList[n] wordfreq[n]/count2




# term frequency
# total number of times terms occurs in file / total number of word in document

#inverse document frequency
Posted
Updated 10-Dec-10 6:46am
v2
Comments
HimanshuJoshi 10-Dec-10 12:46pm    
Added pre blocks.
HimanshuJoshi 10-Dec-10 12:46pm    
So what seems to be the problem?
Dave Kreskowiak 10-Dec-10 13:40pm    
Soooooooooooooooooooo...... did you have a question or did you just want to post a meaningless code snippet for everyone to completely ignore?
Keith Barrow 12-Dec-10 7:15am    
Nice code. What is the problem?
caisar oentoro 6-Dec-13 4:01am    
Well, no explanation about your code?

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900