"""
0.1 Version of Term Watching Program
Have you ever noticed those legal term pages on websites that say
this information can change at any moment with or with out us telling you?
Well, this software is the beginning of my attempt to give people
tools to watch all such things. In the future, it will probabily be
a webservice where users can submit urls and sign up for the e mail letters
or just subscribe to a feed based on the processing of the legal term
urls.
1 I have to have a list of urls stored
2 have to download copies of files from the web
3 need a way to find the differences in files
4 A nifty report mechanism (for the webbased version it emails a list)
5 ish run this automatically
Chris B Stones
April 6, 2009
"""
# returns list of urls
def urlList():
urlfile = open('urls.txt','r')
urls = []
for line in urlfile:
urls.append(line)
urlfile.close()
return urls
def addUrl(urlstr):
urlfile = open('urls.txt','a')
urlfile.write(urlstr+"\n")
urlfile.close()
# download the webpage with the terms on them
# _SOMESUFFIX.txt for
def cacheTerm(url,suffix,name):
import urllib
import time
urlref = urllib.urlopen(url)
newfile = open(name+suffix,'w')
newfile.write(urlref.read())
newfile.close()
urlref.close()
# If strings are different then return True
# if they are the same returns False
# long as strings are the same
def compareStrs(a,b):
for x,y in zip(list(a),list(b)):
diff = ord(x)-ord(y)
if diff != 0: # then strings are different
return True
return False
# compare files and print differences
# given lists of file lines
# print out a report
def compareFiles(old,new):
change_count = 0
for x,y in zip(old,new):
if compareStrs(x,y):
# strings are different
change_count += 1
# if the change is larger than some set value
# tell us for now keep it low
if change_count > 0:
print "There are ",change_count,"differences in this files."
print "Term Watching Program"
print "1 Add a URL"
print "2 Run Diff Test"
choice = raw_input("Choice: ");
if choice == '1':
u = raw_input("Add a URL:")
addUrl(u)
else:
import urllib
from urllib import urlopen
report = []
for u in urlList():
# Clean the urls for a nice name
name = u.replace("\n",'')
name = name.replace("http://",'')
name = name.replace("/","_");
name = name.replace(".","_");
# open the past url
cacheTerm(u,"_new.txt",name)
try:
old = open(name+"_old.txt",'r')
new = open(name+"_new.txt",'r')
report.append(compareFiles(old.readlines(),new.readlines()))
# write over the old file
old.close()
new.close()
except:
print "prob old file not existing"
#overwrite old file
new = open(name+"_new.txt",'r')
writeover = open(name+"_old.txt",'w') # or for the first time
writeover.write(new.read())
writeover.close()