Given that tweets have location, find the happiest state:
Answer: happiest_state.py:
import sys
def hw():
afinnfile = open("AFINN-111.txt")
scores = {} # initialize an empty dictionary
for line in afinnfile:
term, score = line.split("\t") # The file is tab-delimited. "\t" means "tab character"
scores[term] = int(score) # Convert the score to an integer.
print scores.items()
import json
outputfile = open("output.txt")
tweets = []
for line in outputfile:
tweets += [json.loads(line)]
nonsentiment_scores = []
for item in tweets:
if item.text:
sentence = trim(item.text)
words = sentence.split()
score = 0
for i, word in enumerate(words, start=1):
term = tolower(trim(word))
if term not in scores:
if i-1 > 0 && is_present(scores, words[i-1]):
score += get_score(scores, words[i-1]) > 0 ? 1 : -1
if i+1 < len(words) && is_present(scores,words[i+1]):
score += get_score(scores, words[i-1]) > 0 ? 1 : -1
score = score/3
nonsentiment_scores.append(tolower(trim(word)), score)
for item in nonsentiment_scores:
print(item)
def is_present(scores, word):
term = tolower(trim(word))
return term in scores
def get_score(scores, word):
score = 0
term = tolower(trim(word))
if term in scores:
if scores[term] > 0:
score += 1
else if scores[term] < 0:
score -= 1
else:
score += 0
return score
def lines(fp):
print str(len(fp.readlines()))
states = {
'AK': 'Alaska',
'AL': 'Alabama',
'AR': 'Arkansas',
'AS': 'American Samoa',
'AZ': 'Arizona',
'CA': 'California',
'CO': 'Colorado',
'CT': 'Connecticut',
'DC': 'District of Columbia',
'DE': 'Delaware',
'FL': 'Florida',
'GA': 'Georgia',
'GU': 'Guam',
'HI': 'Hawaii',
'IA': 'Iowa',
'ID': 'Idaho',
'IL': 'Illinois',
'IN': 'Indiana',
'KS': 'Kansas',
'KY': 'Kentucky',
'LA': 'Louisiana',
'MA': 'Massachusetts',
'MD': 'Maryland',
'ME': 'Maine',
'MI': 'Michigan',
'MN': 'Minnesota',
'MO': 'Missouri',
'MP': 'Northern Mariana Islands',
'MS': 'Mississippi',
'MT': 'Montana',
'NA': 'National',
'NC': 'North Carolina',
'ND': 'North Dakota',
'NE': 'Nebraska',
'NH': 'New Hampshire',
'NJ': 'New Jersey',
'NM': 'New Mexico',
'NV': 'Nevada',
'NY': 'New York',
'OH': 'Ohio',
'OK': 'Oklahoma',
'OR': 'Oregon',
'PA': 'Pennsylvania',
'PR': 'Puerto Rico',
'RI': 'Rhode Island',
'SC': 'South Carolina',
'SD': 'South Dakota',
'TN': 'Tennessee',
'TX': 'Texas',
'UT': 'Utah',
'VA': 'Virginia',
'VI': 'Virgin Islands',
'VT': 'Vermont',
'WA': 'Washington',
'WI': 'Wisconsin',
'WV': 'West Virginia',
'WY': 'Wyoming'
}
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
hw()
lines(sent_file)
lines(tweet_file)
if __name__ == '__main__':
main()
No comments:
Post a Comment