February 2012
M T W T F S S
« Jul «-»  
 12345
6789101112
13141516171819
20212223242526
272829  

Leaving Chris World?

Why not bring back a souvenir?

Archives

Term Watching 0.1

 
 
"""
0.1 Version of Term Watching Program
 
Have you ever noticed those legal term pages on websites that say
this information can change at any moment with or with out us telling you?
Well, this software is the beginning of my attempt to give people
tools to watch all such things. In the future, it will probabily be
a webservice where users can submit urls and sign up for the e mail letters
or just subscribe to a feed based on the processing of the legal term
urls. 
 
1 I have to have a list of urls stored
2 have to download copies of files from the web
3 need a way to find the differences in files
4 A nifty report mechanism (for the webbased version it emails a list)
5 ish run this automatically 
 
Chris B Stones
April 6, 2009
"""
 
# returns list of urls
def urlList():
	urlfile = open('urls.txt','r')
	urls = []
	for line in urlfile:
		urls.append(line)
	urlfile.close()
	return urls
 
def addUrl(urlstr):
	urlfile = open('urls.txt','a')
	urlfile.write(urlstr+"\n")
	urlfile.close()
 
# download the webpage with the terms on them
# _SOMESUFFIX.txt  for
def cacheTerm(url,suffix,name):
	import urllib
	import time
	urlref = urllib.urlopen(url)
	newfile = open(name+suffix,'w')
	newfile.write(urlref.read())
	newfile.close()
	urlref.close()
 
# If strings are different then return True
# if they are the same returns False
# long as strings are the same
def compareStrs(a,b):
	for x,y in zip(list(a),list(b)):
		diff = ord(x)-ord(y)
		if diff != 0: # then strings are different
			return True
	return False
 
# compare files and print differences
# given lists of file lines
# print out a report
def compareFiles(old,new):
	change_count = 0
	for x,y in zip(old,new):
		if compareStrs(x,y):
			# strings are different
			change_count += 1
	# if the change is larger than some set value
	# tell us for now keep it low
	if change_count > 0:
		print "There are ",change_count,"differences in this files."			
 
print "Term Watching Program"
print "1 Add a URL"
print "2 Run Diff Test"
choice = raw_input("Choice: ");
 
if choice == '1':
	u = raw_input("Add a URL:")
	addUrl(u)
else:
	import urllib
	from urllib import urlopen
	report = []
	for u in urlList():
		# Clean the urls for a nice name
		name = u.replace("\n",'')
		name = name.replace("http://",'')
		name = name.replace("/","_");
		name = name.replace(".","_");
 
		# open the past url
		cacheTerm(u,"_new.txt",name)
		try:
			old = open(name+"_old.txt",'r')
			new = open(name+"_new.txt",'r')
			report.append(compareFiles(old.readlines(),new.readlines()))
			# write over the old file
			old.close()
			new.close()
		except:
			print "prob old file not existing"
 
		#overwrite old file
		new = open(name+"_new.txt",'r')
		writeover = open(name+"_old.txt",'w') # or for the first time
		writeover.write(new.read())
		writeover.close()
 

You must be logged in to post a comment.